Coverage for onnxcustom/utils/nvprof2json.py: 99%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Converts traces from :epkg:`nvprof`.
4The source comes from `nvprof2json <https://github.com/ezyang/nvprof2json>`_.
5"""
7import sqlite3
8import enum
9import json
10import copy
11import io
12import os
13import zipfile
14import cxxfilt
15import pandas
18def convert_trace_to_json(filename, output=None, temporary_file=None,
19 verbose=0, fLOG=None):
20 """
21 Converts traces produced by :epkg:`nvprof` and saved with
22 format *sqlite3* (extension `.sql`). The output format
23 follows `Trace Event Format
24 <https://docs.google.com/document/d/
25 1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview>`_.
27 :param filename: filename
28 :param output: output file or None
29 :param temporary_file: if the file needs to be unzipped,
30 this file will be created to be the unzipped file,
31 it is not cleaned after the unzipping.
32 :param verbose: verbosity
33 :param fLOG: logging function
34 :return: json (if output is None, the list of events otherwise)
36 This file, if not too big, can be viewed with `chrome-tracing`.
37 The traces are usually generated by using a command line similar to:
39 ::
41 nvprof -o gpu_profile.sql python plot_gpu_training.py
42 """
43 ext = os.path.splitext(filename)[-1]
44 if ext == ".zip":
45 if temporary_file in (None, ''):
46 temporary_file = filename + ".unzipped"
47 if os.path.exists(temporary_file):
48 if verbose > 0 and fLOG is not None:
49 fLOG( # pragma: no cover
50 "[convert_trace_to_json] %r already unzipped into %r"
51 "." % (filename, temporary_file))
52 else:
53 if verbose > 0 and fLOG is not None:
54 fLOG( # pragma: no cover
55 "[convert_trace_to_json] unzipping to file %r"
56 "." % temporary_file)
57 zipf = zipfile.ZipFile(filename)
58 names = zipf.namelist()
59 if len(names) != 1:
60 raise RuntimeError( # pragma: no cover
61 "More than one file is stored in zip file %r." % filename)
62 stream = zipf.open(names[0], "r")
63 with open(temporary_file, "wb") as f:
64 while True:
65 data = stream.read(65536)
66 if len(data) == 0:
67 break
68 f.write(data)
69 zipf.close()
70 filename = temporary_file
72 conn = sqlite3.connect(filename)
73 conn.row_factory = sqlite3.Row
75 strings = {}
76 for r in conn.execute("SELECT _id_ as id, value FROM StringTable"):
77 strings[r["id"]] = _demangle(r["value"])
79 traceEvents = []
81 # """
82 # _id_: 11625
83 # cbid: 17
84 # start: 1496933427584362152
85 # end: 1496933427584362435
86 # processId: 1317533
87 # threadId: 1142654784
88 # correlationId: 13119
89 # returnValue: 0
90 # """
91 if verbose > 0 and fLOG is not None:
92 fLOG("[convert_trace_to_json] step 1 begin.")
93 for row in conn.execute("SELECT * FROM CUPTI_ACTIVITY_KIND_RUNTIME"):
94 try:
95 cbid = Cbids(row["cbid"]).name
96 except ValueError: # pragma: no cover
97 cbid = str(row["cbid"])
98 if verbose > 0 and fLOG is None:
99 fLOG("[convert_trace_to_json] unrecognized cbid %r." % cbid)
100 event = {
101 "name": cbid,
102 "ph": "X", # Complete Event (Begin + End event)
103 "cat": "cuda",
104 "ts": _munge_time(row["start"]),
105 "dur": _munge_time(row["end"] - row["start"]),
106 "tid": "Thread {}: Runtime API".format(row["threadId"]),
107 "pid": "[{}] Process".format(row["processId"]),
108 "args": {
109 # ...
110 },
111 }
112 traceEvents.append(event)
114 # DRIVER?
116 # """
117 # _id_: 1
118 # flags: 2
119 # timestamp: 1496844806028263989
120 # id: 1
121 # objectKind: 2
122 # objectId: b'\xe5\xc0\x16\x00@\xe7\x10J\x00\x00\x00\x00'
123 # name: 3
124 # domain: 0
125 # """
126 if verbose > 0 and fLOG is not None:
127 fLOG("[convert_trace_to_json] step 2 begin.")
128 for row in conn.execute(" ".join([
129 "SELECT",
130 ",".join([
131 "start.name AS name",
132 "start.timestamp AS start_time",
133 "end.timestamp AS end_time"
134 ]),
135 "FROM",
136 "(SELECT * FROM CUPTI_ACTIVITY_KIND_MARKER WHERE name != 0) "
137 "AS start",
138 "LEFT JOIN",
139 "(SELECT * FROM CUPTI_ACTIVITY_KIND_MARKER WHERE name = 0) "
140 "AS end",
141 "ON start.id = end.id"])):
142 event = {
143 "name": strings[row["name"]],
144 "cat": "cuda",
145 "ts": _munge_time(row["start_time"]),
146 # Weirdly, these don't seem to be associated with a
147 # CPU/GPU. I guess there's no CUDA Context available
148 # when you run these, so it makes sense. But nvvp
149 # associates these with a GPU strangely enough
150 "tid": "Markers and Ranges",
151 "pid": "Markers and Ranges",
152 # parse objectId?
153 "args": {
154 # ...
155 },
156 }
157 if row["end_time"] is None:
158 event["ph"] = "I"
159 else:
160 event["ph"] = "X"
161 event["dur"] = _munge_time(row["end_time"] - row["start_time"])
162 traceEvents.append(event)
164 # """
165 # _id_: 1
166 # copyKind: 1
167 # srcKind: 1
168 # dstKind: 3
169 # flags: 0
170 # bytes: 7436640
171 # start: 1496933426915778221
172 # end: 1496933426916558424
173 # deviceId: 0
174 # contextId: 1
175 # streamId: 7
176 # correlationId: 809
177 # runtimeCorrelationId: 0
178 # """
179 if verbose > 0 and fLOG is not None:
180 fLOG("[convert_trace_to_json] step 3 begin.")
181 for row in conn.execute("SELECT * FROM CUPTI_ACTIVITY_KIND_MEMCPY"):
182 # copyKind:
183 # 1 - Memcpy HtoD
184 # 2 - Memcpy DtoH
185 # 8 - Memcpy DtoD
186 # flags: ???
187 # 0 - Sync
188 # 1 - Async
189 # srcKind/dstKind
190 # 1 - Pageable
191 # 2 - Page-locked ???
192 # 3 - Device
193 # eprintRow(row)
194 if row["copyKind"] == 1:
195 copyKind = "HtoD"
196 elif row["copyKind"] == 2:
197 copyKind = "DtoH"
198 elif row["copyKind"] == 8:
199 copyKind = "DtoD"
200 else:
201 copyKind = str(row["copyKind"])
202 if row["flags"] == 0:
203 flags = "sync"
204 elif row["flags"] == 1:
205 flags = "async"
206 else:
207 flags = str(row["flags"])
208 event = {
209 "name": "Memcpy {} [{}]".format(copyKind, flags),
210 "ph": "X", # Complete Event (Begin + End event)
211 "cat": "cuda",
212 "ts": _munge_time(row["start"]),
213 "dur": _munge_time(row["end"] - row["start"]),
214 "tid": "MemCpy ({})".format(copyKind),
215 # lookup GPU name. This is tored in CUPTI_ACTIVITY_KIND_DEVICE
216 "pid": "[{}:{}] Overview".format(
217 row["deviceId"], row["contextId"]),
218 "args": {
219 "Size": _sizeof_fmt(row["bytes"]),
220 },
221 }
222 traceEvents.append(event)
224 # name: index into StringTable
225 # What is thed difference between end and completed?
226 # """
227 # _id_: 1
228 # cacheConfig: b'\x00'
229 # sharedMemoryConfig: 1
230 # registersPerThread: 32
231 # partitionedGlobalCacheRequested: 2
232 # partitionedGlobalCacheExecuted: 2
233 # start: 1496844806032514222
234 # end: 1496844806032531694
235 # completed: 1496844806032531694
236 # deviceId: 0
237 # contextId: 1
238 # streamId: 7
239 # gridX: 57
240 # gridY: 1
241 # gridZ: 1
242 # blockX: 128
243 # blockY: 1
244 # blockZ: 1
245 # staticSharedMemory: 0
246 # dynamicSharedMemory: 0
247 # localMemoryPerThread: 0
248 # localMemoryTotal: 78643200
249 # correlationId: 487
250 # gridId: 669
251 # name: 5
252 # """
253 if verbose > 0 and fLOG is not None:
254 fLOG("[convert_trace_to_json] step 4 begin.")
255 for row in conn.execute(
256 "SELECT * FROM CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL"):
257 # eprint(strings[row["name"]])
258 # eprintRow(row)
259 event = {
260 "name": strings[row["name"]],
261 "ph": "X", # Complete Event (Begin + End event)
262 "cat": "cuda",
263 "ts": _munge_time(row["start"]),
264 "dur": _munge_time(row["end"] - row["start"]),
265 "tid": "Compute",
266 # lookup GPU name?
267 "pid": "[{}:{}] Overview".format(
268 row["deviceId"], row["contextId"]),
269 "args": {
270 "Grid size": "[ {}, {}, {} ]".format(
271 row["gridX"], row["gridY"], row["gridZ"]),
272 "Block size": "[ {}, {}, {} ]".format(
273 row["blockX"], row["blockY"], row["blockZ"]),
274 # ...
275 },
276 }
277 alt_event = copy.deepcopy(event)
278 alt_event["tid"] = alt_event["name"]
279 alt_event["pid"] = "[{}:{}] Compute".format(
280 row["deviceId"], row["contextId"])
281 traceEvents.append(event)
282 traceEvents.append(alt_event)
284 if output not in (None, ''):
285 if verbose > 0 and fLOG is not None:
286 fLOG("[convert_trace_to_json] converting into json in %r"
287 "." % output)
288 with open(output, "w", encoding="utf-8") as f:
289 json.dump(traceEvents, f, separators=(',\n', ':'))
290 f.write('\n')
291 if verbose > 0 and fLOG is not None:
292 fLOG("[convert_trace_to_json] done.")
293 return traceEvents
294 else:
295 if verbose > 0 and fLOG is not None:
296 fLOG( # pragma: no cover
297 "[convert_trace_to_json] converting into json.")
298 st = io.StringIO()
299 json.dump(traceEvents, st, separators=(',\n', ':'))
300 st.write('\n')
301 if verbose > 0 and fLOG is not None:
302 fLOG("[convert_trace_to_json] done.") # pragma: no cover
303 fLOG(st.getvalue()) # pragma: no cover
304 return st.getvalue()
307def _munge_time(t):
308 """Take a time from nvprof and convert it into a chrome://tracing time."""
309 # For strict correctness, divide by 1000, but this reduces accuracy.
310 return t # / 1000.
313def _demangle(name):
314 """Demangle a C++ identifier using c++filt"""
315 try:
316 return cxxfilt.demangle(name)
317 except cxxfilt.LibraryNotFound: # pragma: no cover
318 # One library is missing.
319 return name
322class Cbids(enum.IntEnum):
323 "List of events."
324 INVALID = 0
325 cudaDriverGetVersion = 1
326 cudaRuntimeGetVersion = 2
327 cudaGetDeviceCount = 3
328 cudaGetDeviceProperties = 4
329 cudaChooseDevice = 5
330 cudaGetChannelDesc = 6
331 cudaCreateChannelDesc = 7
332 cudaConfigureCall = 8
333 cudaSetupArgument = 9
334 cudaGetLastError = 10
335 cudaPeekAtLastError = 11
336 cudaGetErrorString = 12
337 cudaLaunch = 13
338 cudaFuncSetCacheConfig = 14
339 cudaFuncGetAttributes = 15
340 cudaSetDevice = 16
341 cudaGetDevice = 17
342 cudaSetValidDevices = 18
343 cudaSetDeviceFlags = 19
344 cudaMalloc = 20
345 cudaMallocPitch = 21
346 cudaFree = 22
347 cudaMallocArray = 23
348 cudaFreeArray = 24
349 cudaMallocHost = 25
350 cudaFreeHost = 26
351 cudaHostAlloc = 27
352 cudaHostGetDevicePointer = 28
353 cudaHostGetFlags = 29
354 cudaMemGetInfo = 30
355 cudaMemcpy = 31
356 cudaMemcpy2D = 32
357 cudaMemcpyToArray = 33
358 cudaMemcpy2DToArray = 34
359 cudaMemcpyFromArray = 35
360 cudaMemcpy2DFromArray = 36
361 cudaMemcpyArrayToArray = 37
362 cudaMemcpy2DArrayToArray = 38
363 cudaMemcpyToSymbol = 39
364 cudaMemcpyFromSymbol = 40
365 cudaMemcpyAsync = 41
366 cudaMemcpyToArrayAsync = 42
367 cudaMemcpyFromArrayAsync = 43
368 cudaMemcpy2DAsync = 44
369 cudaMemcpy2DToArrayAsync = 45
370 cudaMemcpy2DFromArrayAsync = 46
371 cudaMemcpyToSymbolAsync = 47
372 cudaMemcpyFromSymbolAsync = 48
373 cudaMemset = 49
374 cudaMemset2D = 50
375 cudaMemsetAsync = 51
376 cudaMemset2DAsync = 52
377 cudaGetSymbolAddress = 53
378 cudaGetSymbolSize = 54
379 cudaBindTexture = 55
380 cudaBindTexture2D = 56
381 cudaBindTextureToArray = 57
382 cudaUnbindTexture = 58
383 cudaGetTextureAlignmentOffset = 59
384 cudaGetTextureReference = 60
385 cudaBindSurfaceToArray = 61
386 cudaGetSurfaceReference = 62
387 cudaGLSetGLDevice = 63
388 cudaGLRegisterBufferObject = 64
389 cudaGLMapBufferObject = 65
390 cudaGLUnmapBufferObject = 66
391 cudaGLUnregisterBufferObject = 67
392 cudaGLSetBufferObjectMapFlags = 68
393 cudaGLMapBufferObjectAsync = 69
394 cudaGLUnmapBufferObjectAsync = 70
395 cudaWGLGetDevice = 71
396 cudaGraphicsGLRegisterImage = 72
397 cudaGraphicsGLRegisterBuffer = 73
398 cudaGraphicsUnregisterResource = 74
399 cudaGraphicsResourceSetMapFlags = 75
400 cudaGraphicsMapResources = 76
401 cudaGraphicsUnmapResources = 77
402 cudaGraphicsResourceGetMappedPointer = 78
403 cudaGraphicsSubResourceGetMappedArray = 79
404 cudaVDPAUGetDevice = 80
405 cudaVDPAUSetVDPAUDevice = 81
406 cudaGraphicsVDPAURegisterVideoSurface = 82
407 cudaGraphicsVDPAURegisterOutputSurface = 83
408 cudaD3D11GetDevice = 84
409 cudaD3D11GetDevices = 85
410 cudaD3D11SetDirect3DDevice = 86
411 cudaGraphicsD3D11RegisterResource = 87
412 cudaD3D10GetDevice = 88
413 cudaD3D10GetDevices = 89
414 cudaD3D10SetDirect3DDevice = 90
415 cudaGraphicsD3D10RegisterResource = 91
416 cudaD3D10RegisterResource = 92
417 cudaD3D10UnregisterResource = 93
418 cudaD3D10MapResources = 94
419 cudaD3D10UnmapResources = 95
420 cudaD3D10ResourceSetMapFlags = 96
421 cudaD3D10ResourceGetSurfaceDimensions = 97
422 cudaD3D10ResourceGetMappedArray = 98
423 cudaD3D10ResourceGetMappedPointer = 99
424 cudaD3D10ResourceGetMappedSize = 100
425 cudaD3D10ResourceGetMappedPitch = 101
426 cudaD3D9GetDevice = 102
427 cudaD3D9GetDevices = 103
428 cudaD3D9SetDirect3DDevice = 104
429 cudaD3D9GetDirect3DDevice = 105
430 cudaGraphicsD3D9RegisterResource = 106
431 cudaD3D9RegisterResource = 107
432 cudaD3D9UnregisterResource = 108
433 cudaD3D9MapResources = 109
434 cudaD3D9UnmapResources = 110
435 cudaD3D9ResourceSetMapFlags = 111
436 cudaD3D9ResourceGetSurfaceDimensions = 112
437 cudaD3D9ResourceGetMappedArray = 113
438 cudaD3D9ResourceGetMappedPointer = 114
439 cudaD3D9ResourceGetMappedSize = 115
440 cudaD3D9ResourceGetMappedPitch = 116
441 cudaD3D9Begin = 117
442 cudaD3D9End = 118
443 cudaD3D9RegisterVertexBuffer = 119
444 cudaD3D9UnregisterVertexBuffer = 120
445 cudaD3D9MapVertexBuffer = 121
446 cudaD3D9UnmapVertexBuffer = 122
447 cudaThreadExit = 123
448 cudaSetDoubleForDevice = 124
449 cudaSetDoubleForHost = 125
450 cudaThreadSynchronize = 126
451 cudaThreadGetLimit = 127
452 cudaThreadSetLimit = 128
453 cudaStreamCreate = 129
454 cudaStreamDestroy = 130
455 cudaStreamSynchronize = 131
456 cudaStreamQuery = 132
457 cudaEventCreate = 133
458 cudaEventCreateWithFlags = 134
459 cudaEventRecord = 135
460 cudaEventDestroy = 136
461 cudaEventSynchronize = 137
462 cudaEventQuery = 138
463 cudaEventElapsedTime = 139
464 cudaMalloc3D = 140
465 cudaMalloc3DArray = 141
466 cudaMemset3D = 142
467 cudaMemset3DAsync = 143
468 cudaMemcpy3D = 144
469 cudaMemcpy3DAsync = 145
470 cudaThreadSetCacheConfig = 146
471 cudaStreamWaitEvent = 147
472 cudaD3D11GetDirect3DDevice = 148
473 cudaD3D10GetDirect3DDevice = 149
474 cudaThreadGetCacheConfig = 150
475 cudaPointerGetAttributes = 151
476 cudaHostRegister = 152
477 cudaHostUnregister = 153
478 cudaDeviceCanAccessPeer = 154
479 cudaDeviceEnablePeerAccess = 155
480 cudaDeviceDisablePeerAccess = 156
481 cudaPeerRegister = 157
482 cudaPeerUnregister = 158
483 cudaPeerGetDevicePointer = 159
484 cudaMemcpyPeer = 160
485 cudaMemcpyPeerAsync = 161
486 cudaMemcpy3DPeer = 162
487 cudaMemcpy3DPeerAsync = 163
488 cudaDeviceReset = 164
489 cudaDeviceSynchronize = 165
490 cudaDeviceGetLimit = 166
491 cudaDeviceSetLimit = 167
492 cudaDeviceGetCacheConfig = 168
493 cudaDeviceSetCacheConfig = 169
494 cudaProfilerInitialize = 170
495 cudaProfilerStart = 171
496 cudaProfilerStop = 172
497 cudaDeviceGetByPCIBusId = 173
498 cudaDeviceGetPCIBusId = 174
499 cudaGLGetDevices = 175
500 cudaIpcGetEventHandle = 176
501 cudaIpcOpenEventHandle = 177
502 cudaIpcGetMemHandle = 178
503 cudaIpcOpenMemHandle = 179
504 cudaIpcCloseMemHandle = 180
505 cudaArrayGetInfo = 181
506 cudaFuncSetSharedMemConfig = 182
507 cudaDeviceGetSharedMemConfig = 183
508 cudaDeviceSetSharedMemConfig = 184
509 cudaCreateTextureObject = 185
510 cudaDestroyTextureObject = 186
511 cudaGetTextureObjectResourceDesc = 187
512 cudaGetTextureObjectTextureDesc = 188
513 cudaCreateSurfaceObject = 189
514 cudaDestroySurfaceObject = 190
515 cudaGetSurfaceObjectResourceDesc = 191
516 cudaMallocMipmappedArray = 192
517 cudaGetMipmappedArrayLevel = 193
518 cudaFreeMipmappedArray = 194
519 cudaBindTextureToMipmappedArray = 195
520 cudaGraphicsResourceGetMappedMipmappedArray = 196
521 cudaStreamAddCallback = 197
522 cudaStreamCreateWithFlags = 198
523 cudaGetTextureObjectResourceViewDesc = 199
524 cudaDeviceGetAttribute = 200
525 cudaStreamDestroy_v5050 = 201
526 cudaStreamCreateWithPriority = 202
527 cudaStreamGetPriority = 203
528 cudaStreamGetFlags = 204
529 cudaDeviceGetStreamPriorityRange = 205
530 cudaMallocManaged = 206
531 cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6000 = 207
532 cudaStreamAttachMemAsync = 208
533 cudaGetErrorName = 209
534 cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050 = 210
535 cudaLaunchKernel = 211
536 cudaGetDeviceFlags = 212
537 cudaLaunch_ptsz = 213
538 cudaLaunchKernel_ptsz = 214
539 cudaMemcpy_ptds = 215
540 cudaMemcpy2D_ptds = 216
541 cudaMemcpyToArray_ptds = 217
542 cudaMemcpy2DToArray_ptds = 218
543 cudaMemcpyFromArray_ptds = 219
544 cudaMemcpy2DFromArray_ptds = 220
545 cudaMemcpyArrayToArray_ptds = 221
546 cudaMemcpy2DArrayToArray_ptds = 222
547 cudaMemcpyToSymbol_ptds = 223
548 cudaMemcpyFromSymbol_ptds = 224
549 cudaMemcpyAsync_ptsz = 225
550 cudaMemcpyToArrayAsync_ptsz = 226
551 cudaMemcpyFromArrayAsync_ptsz = 227
552 cudaMemcpy2DAsync_ptsz = 228
553 cudaMemcpy2DToArrayAsync_ptsz = 229
554 cudaMemcpy2DFromArrayAsync_ptsz = 230
555 cudaMemcpyToSymbolAsync_ptsz = 231
556 cudaMemcpyFromSymbolAsync_ptsz = 232
557 cudaMemset_ptds = 233
558 cudaMemset2D_ptds = 234
559 cudaMemsetAsync_ptsz = 235
560 cudaMemset2DAsync_ptsz = 236
561 cudaStreamGetPriority_ptsz = 237
562 cudaStreamGetFlags_ptsz = 238
563 cudaStreamSynchronize_ptsz = 239
564 cudaStreamQuery_ptsz = 240
565 cudaStreamAttachMemAsync_ptsz = 241
566 cudaEventRecord_ptsz = 242
567 cudaMemset3D_ptds = 243
568 cudaMemset3DAsync_ptsz = 244
569 cudaMemcpy3D_ptds = 245
570 cudaMemcpy3DAsync_ptsz = 246
571 cudaStreamWaitEvent_ptsz = 247
572 cudaStreamAddCallback_ptsz = 248
573 cudaMemcpy3DPeer_ptds = 249
574 cudaMemcpy3DPeerAsync_ptsz = 250
575 cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = 251
576 cudaMemPrefetchAsync = 252
577 cudaMemPrefetchAsync_ptsz = 253
578 cudaMemAdvise = 254
579 cudaDeviceGetP2PAttribute = 255
580 cudaGraphicsEGLRegisterImage = 256
581 cudaEGLStreamConsumerConnect = 257
582 cudaEGLStreamConsumerDisconnect = 258
583 cudaEGLStreamConsumerAcquireFrame = 259
584 cudaEGLStreamConsumerReleaseFrame = 260
585 cudaEGLStreamProducerConnect = 261
586 cudaEGLStreamProducerDisconnect = 262
587 cudaEGLStreamProducerPresentFrame = 263
588 cudaEGLStreamProducerReturnFrame = 264
589 cudaGraphicsResourceGetMappedEglFrame = 265
590 cudaMemRangeGetAttribute = 266
591 cudaMemRangeGetAttributes = 267
592 cudaEGLStreamConsumerConnectWithFlags = 268
593 cudaLaunchCooperativeKernel = 269
594 cudaLaunchCooperativeKernel_ptsz = 270
595 cudaEventCreateFromEGLSync = 271
596 cudaLaunchCooperativeKernelMultiDevice = 272
597 cudaFuncSetAttribute = 273
598 cudaImportExternalMemory = 274
599 cudaExternalMemoryGetMappedBuffer = 275
600 cudaExternalMemoryGetMappedMipmappedArray = 276
601 cudaDestroyExternalMemory = 277
602 cudaImportExternalSemaphore = 278
603 cudaSignalExternalSemaphoresAsync = 279
604 cudaSignalExternalSemaphoresAsync_ptsz = 280
605 cudaWaitExternalSemaphoresAsync = 281
606 cudaWaitExternalSemaphoresAsync_ptsz = 282
607 cudaDestroyExternalSemaphore = 283
608 cudaLaunchHostFunc = 284
609 cudaLaunchHostFunc_ptsz = 285
610 cudaGraphCreate = 286
611 cudaGraphKernelNodeGetParams = 287
612 cudaGraphKernelNodeSetParams = 288
613 cudaGraphAddKernelNode = 289
614 cudaGraphAddMemcpyNode = 290
615 cudaGraphMemcpyNodeGetParams = 291
616 cudaGraphMemcpyNodeSetParams = 292
617 cudaGraphAddMemsetNode = 293
618 cudaGraphMemsetNodeGetParams = 294
619 cudaGraphMemsetNodeSetParams = 295
620 cudaGraphAddHostNode = 296
621 cudaGraphHostNodeGetParams = 297
622 cudaGraphAddChildGraphNode = 298
623 cudaGraphChildGraphNodeGetGraph = 299
624 cudaGraphAddEmptyNode = 300
625 cudaGraphClone = 301
626 cudaGraphNodeFindInClone = 302
627 cudaGraphNodeGetType = 303
628 cudaGraphGetRootNodes = 304
629 cudaGraphNodeGetDependencies = 305
630 cudaGraphNodeGetDependentNodes = 306
631 cudaGraphAddDependencies = 307
632 cudaGraphRemoveDependencies = 308
633 cudaGraphDestroyNode = 309
634 cudaGraphInstantiate = 310
635 cudaGraphLaunch = 311
636 cudaGraphLaunch_ptsz = 312
637 cudaGraphExecDestroy = 313
638 cudaGraphDestroy = 314
639 cudaStreamBeginCapture = 315
640 cudaStreamBeginCapture_ptsz = 316
641 cudaStreamIsCapturing = 317
642 cudaStreamIsCapturing_ptsz = 318
643 cudaStreamEndCapture = 319
644 cudaStreamEndCapture_ptsz = 320
645 cudaGraphHostNodeSetParams = 321
646 cudaGraphGetNodes = 322
647 cudaGraphGetEdges = 323
648 cudaStreamGetCaptureInfo = 324
649 cudaStreamGetCaptureInfo_ptsz = 325
650 cudaGraphExecKernelNodeSetParams = 326
651 cudaThreadExchangeStreamCaptureMode = 327
652 cudaDeviceGetNvSciSyncAttributes = 328
653 cudaOccupancyAvailableDynamicSMemPerBlock = 329
654 cudaStreamSetFlags = 330
655 cudaStreamSetFlags_ptsz = 331
656 cudaGraphExecMemcpyNodeSetParams = 332
657 cudaGraphExecMemsetNodeSetParams = 333
658 cudaGraphExecHostNodeSetParams = 334
659 cudaGraphExecUpdate = 335
660 SIZE = 336
661 FORCE_INT = 0x7FFFFFFF
664def _sizeof_fmt(num, suffix='B'):
665 """Format size with metric units (like nvvp)"""
666 for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
667 if abs(num) < 1000.0:
668 return "%3.1f%s%s" % (num, unit, suffix)
669 num /= 1000.0 # pragma: no cover
670 return "%.1f%s%s" % (num, 'Y', suffix) # pragma: no cover
673def json_to_dataframe(js):
674 """
675 Converts a json dump obtained with function
676 @see fn convert_trace_to_json
677 to a dataframe.
679 :param js: a filename, a json string, a stream containing json
680 :return: a dataframe
681 """
682 if isinstance(js, str) and os.path.exists(js):
683 if len(js) < 5000:
684 df = pandas.read_json(js)
685 else: # pragma: no cover
686 st = io.StringIO(js)
687 df = pandas.read_json(st)
688 else:
689 df = pandas.read_json(js)
691 df['ts_sec'] = df['ts'].apply(lambda t: t / 1e9)
692 return df
695def json_to_dataframe_streaming(js, chunksize=100000, flatten=False, **kwargs):
696 """
697 Converts a big json dump (from @see fn convert_trace_to_json)
698 to a dataframe. The function processes the data by streaming to avoid
699 loading huge data in memory.
700 Returns an iterator on dataframes.
701 The function relies on :epkg:`pandas_streaming`.
703 :param js: a filename, a json string, a stream containing json
704 :param chunksize:
705 see :func:`pandas_streaming.df.StreamingDataFrame.read_json`
706 :param flatten:
707 see :func:`pandas_streaming.df.StreamingDataFrame.read_json`
708 :param kwargs:
709 see :func:`pandas_streaming.df.StreamingDataFrame.read_json`
710 :return: a dataframe
711 """
712 from pandas_streaming.df import StreamingDataFrame # pylint: disable=C0415
713 if isinstance(js, str):
714 if len(js) < 5000 and os.path.exists(js):
715 sdf = StreamingDataFrame.read_json(js)
716 else:
717 raise RuntimeError(
718 "Use a stream or function json_to_dataframe instead of "
719 "the streaming version.")
720 else:
721 sdf = StreamingDataFrame.read_json(js)
723 sdf['ts_sec'] = sdf['ts'].apply(lambda t: t / 1e9)
724 return sdf