Coverage for onnxcustom/utils/nvprof2json.py: 99%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

455 statements  

1""" 

2@file 

3@brief Converts traces from :epkg:`nvprof`. 

4The source comes from `nvprof2json <https://github.com/ezyang/nvprof2json>`_. 

5""" 

6 

7import sqlite3 

8import enum 

9import json 

10import copy 

11import io 

12import os 

13import zipfile 

14import cxxfilt 

15import pandas 

16 

17 

18def convert_trace_to_json(filename, output=None, temporary_file=None, 

19 verbose=0, fLOG=None): 

20 """ 

21 Converts traces produced by :epkg:`nvprof` and saved with 

22 format *sqlite3* (extension `.sql`). The output format 

23 follows `Trace Event Format 

24 <https://docs.google.com/document/d/ 

25 1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview>`_. 

26 

27 :param filename: filename 

28 :param output: output file or None 

29 :param temporary_file: if the file needs to be unzipped, 

30 this file will be created to be the unzipped file, 

31 it is not cleaned after the unzipping. 

32 :param verbose: verbosity 

33 :param fLOG: logging function 

34 :return: json (if output is None, the list of events otherwise) 

35 

36 This file, if not too big, can be viewed with `chrome-tracing`. 

37 The traces are usually generated by using a command line similar to: 

38 

39 :: 

40 

41 nvprof -o gpu_profile.sql python plot_gpu_training.py 

42 """ 

43 ext = os.path.splitext(filename)[-1] 

44 if ext == ".zip": 

45 if temporary_file in (None, ''): 

46 temporary_file = filename + ".unzipped" 

47 if os.path.exists(temporary_file): 

48 if verbose > 0 and fLOG is not None: 

49 fLOG( # pragma: no cover 

50 "[convert_trace_to_json] %r already unzipped into %r" 

51 "." % (filename, temporary_file)) 

52 else: 

53 if verbose > 0 and fLOG is not None: 

54 fLOG( # pragma: no cover 

55 "[convert_trace_to_json] unzipping to file %r" 

56 "." % temporary_file) 

57 zipf = zipfile.ZipFile(filename) 

58 names = zipf.namelist() 

59 if len(names) != 1: 

60 raise RuntimeError( # pragma: no cover 

61 "More than one file is stored in zip file %r." % filename) 

62 stream = zipf.open(names[0], "r") 

63 with open(temporary_file, "wb") as f: 

64 while True: 

65 data = stream.read(65536) 

66 if len(data) == 0: 

67 break 

68 f.write(data) 

69 zipf.close() 

70 filename = temporary_file 

71 

72 conn = sqlite3.connect(filename) 

73 conn.row_factory = sqlite3.Row 

74 

75 strings = {} 

76 for r in conn.execute("SELECT _id_ as id, value FROM StringTable"): 

77 strings[r["id"]] = _demangle(r["value"]) 

78 

79 traceEvents = [] 

80 

81 # """ 

82 # _id_: 11625 

83 # cbid: 17 

84 # start: 1496933427584362152 

85 # end: 1496933427584362435 

86 # processId: 1317533 

87 # threadId: 1142654784 

88 # correlationId: 13119 

89 # returnValue: 0 

90 # """ 

91 if verbose > 0 and fLOG is not None: 

92 fLOG("[convert_trace_to_json] step 1 begin.") 

93 for row in conn.execute("SELECT * FROM CUPTI_ACTIVITY_KIND_RUNTIME"): 

94 try: 

95 cbid = Cbids(row["cbid"]).name 

96 except ValueError: # pragma: no cover 

97 cbid = str(row["cbid"]) 

98 if verbose > 0 and fLOG is None: 

99 fLOG("[convert_trace_to_json] unrecognized cbid %r." % cbid) 

100 event = { 

101 "name": cbid, 

102 "ph": "X", # Complete Event (Begin + End event) 

103 "cat": "cuda", 

104 "ts": _munge_time(row["start"]), 

105 "dur": _munge_time(row["end"] - row["start"]), 

106 "tid": "Thread {}: Runtime API".format(row["threadId"]), 

107 "pid": "[{}] Process".format(row["processId"]), 

108 "args": { 

109 # ... 

110 }, 

111 } 

112 traceEvents.append(event) 

113 

114 # DRIVER? 

115 

116 # """ 

117 # _id_: 1 

118 # flags: 2 

119 # timestamp: 1496844806028263989 

120 # id: 1 

121 # objectKind: 2 

122 # objectId: b'\xe5\xc0\x16\x00@\xe7\x10J\x00\x00\x00\x00' 

123 # name: 3 

124 # domain: 0 

125 # """ 

126 if verbose > 0 and fLOG is not None: 

127 fLOG("[convert_trace_to_json] step 2 begin.") 

128 for row in conn.execute(" ".join([ 

129 "SELECT", 

130 ",".join([ 

131 "start.name AS name", 

132 "start.timestamp AS start_time", 

133 "end.timestamp AS end_time" 

134 ]), 

135 "FROM", 

136 "(SELECT * FROM CUPTI_ACTIVITY_KIND_MARKER WHERE name != 0) " 

137 "AS start", 

138 "LEFT JOIN", 

139 "(SELECT * FROM CUPTI_ACTIVITY_KIND_MARKER WHERE name = 0) " 

140 "AS end", 

141 "ON start.id = end.id"])): 

142 event = { 

143 "name": strings[row["name"]], 

144 "cat": "cuda", 

145 "ts": _munge_time(row["start_time"]), 

146 # Weirdly, these don't seem to be associated with a 

147 # CPU/GPU. I guess there's no CUDA Context available 

148 # when you run these, so it makes sense. But nvvp 

149 # associates these with a GPU strangely enough 

150 "tid": "Markers and Ranges", 

151 "pid": "Markers and Ranges", 

152 # parse objectId? 

153 "args": { 

154 # ... 

155 }, 

156 } 

157 if row["end_time"] is None: 

158 event["ph"] = "I" 

159 else: 

160 event["ph"] = "X" 

161 event["dur"] = _munge_time(row["end_time"] - row["start_time"]) 

162 traceEvents.append(event) 

163 

164 # """ 

165 # _id_: 1 

166 # copyKind: 1 

167 # srcKind: 1 

168 # dstKind: 3 

169 # flags: 0 

170 # bytes: 7436640 

171 # start: 1496933426915778221 

172 # end: 1496933426916558424 

173 # deviceId: 0 

174 # contextId: 1 

175 # streamId: 7 

176 # correlationId: 809 

177 # runtimeCorrelationId: 0 

178 # """ 

179 if verbose > 0 and fLOG is not None: 

180 fLOG("[convert_trace_to_json] step 3 begin.") 

181 for row in conn.execute("SELECT * FROM CUPTI_ACTIVITY_KIND_MEMCPY"): 

182 # copyKind: 

183 # 1 - Memcpy HtoD 

184 # 2 - Memcpy DtoH 

185 # 8 - Memcpy DtoD 

186 # flags: ??? 

187 # 0 - Sync 

188 # 1 - Async 

189 # srcKind/dstKind 

190 # 1 - Pageable 

191 # 2 - Page-locked ??? 

192 # 3 - Device 

193 # eprintRow(row) 

194 if row["copyKind"] == 1: 

195 copyKind = "HtoD" 

196 elif row["copyKind"] == 2: 

197 copyKind = "DtoH" 

198 elif row["copyKind"] == 8: 

199 copyKind = "DtoD" 

200 else: 

201 copyKind = str(row["copyKind"]) 

202 if row["flags"] == 0: 

203 flags = "sync" 

204 elif row["flags"] == 1: 

205 flags = "async" 

206 else: 

207 flags = str(row["flags"]) 

208 event = { 

209 "name": "Memcpy {} [{}]".format(copyKind, flags), 

210 "ph": "X", # Complete Event (Begin + End event) 

211 "cat": "cuda", 

212 "ts": _munge_time(row["start"]), 

213 "dur": _munge_time(row["end"] - row["start"]), 

214 "tid": "MemCpy ({})".format(copyKind), 

215 # lookup GPU name. This is tored in CUPTI_ACTIVITY_KIND_DEVICE 

216 "pid": "[{}:{}] Overview".format( 

217 row["deviceId"], row["contextId"]), 

218 "args": { 

219 "Size": _sizeof_fmt(row["bytes"]), 

220 }, 

221 } 

222 traceEvents.append(event) 

223 

224 # name: index into StringTable 

225 # What is thed difference between end and completed? 

226 # """ 

227 # _id_: 1 

228 # cacheConfig: b'\x00' 

229 # sharedMemoryConfig: 1 

230 # registersPerThread: 32 

231 # partitionedGlobalCacheRequested: 2 

232 # partitionedGlobalCacheExecuted: 2 

233 # start: 1496844806032514222 

234 # end: 1496844806032531694 

235 # completed: 1496844806032531694 

236 # deviceId: 0 

237 # contextId: 1 

238 # streamId: 7 

239 # gridX: 57 

240 # gridY: 1 

241 # gridZ: 1 

242 # blockX: 128 

243 # blockY: 1 

244 # blockZ: 1 

245 # staticSharedMemory: 0 

246 # dynamicSharedMemory: 0 

247 # localMemoryPerThread: 0 

248 # localMemoryTotal: 78643200 

249 # correlationId: 487 

250 # gridId: 669 

251 # name: 5 

252 # """ 

253 if verbose > 0 and fLOG is not None: 

254 fLOG("[convert_trace_to_json] step 4 begin.") 

255 for row in conn.execute( 

256 "SELECT * FROM CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL"): 

257 # eprint(strings[row["name"]]) 

258 # eprintRow(row) 

259 event = { 

260 "name": strings[row["name"]], 

261 "ph": "X", # Complete Event (Begin + End event) 

262 "cat": "cuda", 

263 "ts": _munge_time(row["start"]), 

264 "dur": _munge_time(row["end"] - row["start"]), 

265 "tid": "Compute", 

266 # lookup GPU name? 

267 "pid": "[{}:{}] Overview".format( 

268 row["deviceId"], row["contextId"]), 

269 "args": { 

270 "Grid size": "[ {}, {}, {} ]".format( 

271 row["gridX"], row["gridY"], row["gridZ"]), 

272 "Block size": "[ {}, {}, {} ]".format( 

273 row["blockX"], row["blockY"], row["blockZ"]), 

274 # ... 

275 }, 

276 } 

277 alt_event = copy.deepcopy(event) 

278 alt_event["tid"] = alt_event["name"] 

279 alt_event["pid"] = "[{}:{}] Compute".format( 

280 row["deviceId"], row["contextId"]) 

281 traceEvents.append(event) 

282 traceEvents.append(alt_event) 

283 

284 if output not in (None, ''): 

285 if verbose > 0 and fLOG is not None: 

286 fLOG("[convert_trace_to_json] converting into json in %r" 

287 "." % output) 

288 with open(output, "w", encoding="utf-8") as f: 

289 json.dump(traceEvents, f, separators=(',\n', ':')) 

290 f.write('\n') 

291 if verbose > 0 and fLOG is not None: 

292 fLOG("[convert_trace_to_json] done.") 

293 return traceEvents 

294 else: 

295 if verbose > 0 and fLOG is not None: 

296 fLOG( # pragma: no cover 

297 "[convert_trace_to_json] converting into json.") 

298 st = io.StringIO() 

299 json.dump(traceEvents, st, separators=(',\n', ':')) 

300 st.write('\n') 

301 if verbose > 0 and fLOG is not None: 

302 fLOG("[convert_trace_to_json] done.") # pragma: no cover 

303 fLOG(st.getvalue()) # pragma: no cover 

304 return st.getvalue() 

305 

306 

307def _munge_time(t): 

308 """Take a time from nvprof and convert it into a chrome://tracing time.""" 

309 # For strict correctness, divide by 1000, but this reduces accuracy. 

310 return t # / 1000. 

311 

312 

313def _demangle(name): 

314 """Demangle a C++ identifier using c++filt""" 

315 try: 

316 return cxxfilt.demangle(name) 

317 except cxxfilt.LibraryNotFound: # pragma: no cover 

318 # One library is missing. 

319 return name 

320 

321 

322class Cbids(enum.IntEnum): 

323 "List of events." 

324 INVALID = 0 

325 cudaDriverGetVersion = 1 

326 cudaRuntimeGetVersion = 2 

327 cudaGetDeviceCount = 3 

328 cudaGetDeviceProperties = 4 

329 cudaChooseDevice = 5 

330 cudaGetChannelDesc = 6 

331 cudaCreateChannelDesc = 7 

332 cudaConfigureCall = 8 

333 cudaSetupArgument = 9 

334 cudaGetLastError = 10 

335 cudaPeekAtLastError = 11 

336 cudaGetErrorString = 12 

337 cudaLaunch = 13 

338 cudaFuncSetCacheConfig = 14 

339 cudaFuncGetAttributes = 15 

340 cudaSetDevice = 16 

341 cudaGetDevice = 17 

342 cudaSetValidDevices = 18 

343 cudaSetDeviceFlags = 19 

344 cudaMalloc = 20 

345 cudaMallocPitch = 21 

346 cudaFree = 22 

347 cudaMallocArray = 23 

348 cudaFreeArray = 24 

349 cudaMallocHost = 25 

350 cudaFreeHost = 26 

351 cudaHostAlloc = 27 

352 cudaHostGetDevicePointer = 28 

353 cudaHostGetFlags = 29 

354 cudaMemGetInfo = 30 

355 cudaMemcpy = 31 

356 cudaMemcpy2D = 32 

357 cudaMemcpyToArray = 33 

358 cudaMemcpy2DToArray = 34 

359 cudaMemcpyFromArray = 35 

360 cudaMemcpy2DFromArray = 36 

361 cudaMemcpyArrayToArray = 37 

362 cudaMemcpy2DArrayToArray = 38 

363 cudaMemcpyToSymbol = 39 

364 cudaMemcpyFromSymbol = 40 

365 cudaMemcpyAsync = 41 

366 cudaMemcpyToArrayAsync = 42 

367 cudaMemcpyFromArrayAsync = 43 

368 cudaMemcpy2DAsync = 44 

369 cudaMemcpy2DToArrayAsync = 45 

370 cudaMemcpy2DFromArrayAsync = 46 

371 cudaMemcpyToSymbolAsync = 47 

372 cudaMemcpyFromSymbolAsync = 48 

373 cudaMemset = 49 

374 cudaMemset2D = 50 

375 cudaMemsetAsync = 51 

376 cudaMemset2DAsync = 52 

377 cudaGetSymbolAddress = 53 

378 cudaGetSymbolSize = 54 

379 cudaBindTexture = 55 

380 cudaBindTexture2D = 56 

381 cudaBindTextureToArray = 57 

382 cudaUnbindTexture = 58 

383 cudaGetTextureAlignmentOffset = 59 

384 cudaGetTextureReference = 60 

385 cudaBindSurfaceToArray = 61 

386 cudaGetSurfaceReference = 62 

387 cudaGLSetGLDevice = 63 

388 cudaGLRegisterBufferObject = 64 

389 cudaGLMapBufferObject = 65 

390 cudaGLUnmapBufferObject = 66 

391 cudaGLUnregisterBufferObject = 67 

392 cudaGLSetBufferObjectMapFlags = 68 

393 cudaGLMapBufferObjectAsync = 69 

394 cudaGLUnmapBufferObjectAsync = 70 

395 cudaWGLGetDevice = 71 

396 cudaGraphicsGLRegisterImage = 72 

397 cudaGraphicsGLRegisterBuffer = 73 

398 cudaGraphicsUnregisterResource = 74 

399 cudaGraphicsResourceSetMapFlags = 75 

400 cudaGraphicsMapResources = 76 

401 cudaGraphicsUnmapResources = 77 

402 cudaGraphicsResourceGetMappedPointer = 78 

403 cudaGraphicsSubResourceGetMappedArray = 79 

404 cudaVDPAUGetDevice = 80 

405 cudaVDPAUSetVDPAUDevice = 81 

406 cudaGraphicsVDPAURegisterVideoSurface = 82 

407 cudaGraphicsVDPAURegisterOutputSurface = 83 

408 cudaD3D11GetDevice = 84 

409 cudaD3D11GetDevices = 85 

410 cudaD3D11SetDirect3DDevice = 86 

411 cudaGraphicsD3D11RegisterResource = 87 

412 cudaD3D10GetDevice = 88 

413 cudaD3D10GetDevices = 89 

414 cudaD3D10SetDirect3DDevice = 90 

415 cudaGraphicsD3D10RegisterResource = 91 

416 cudaD3D10RegisterResource = 92 

417 cudaD3D10UnregisterResource = 93 

418 cudaD3D10MapResources = 94 

419 cudaD3D10UnmapResources = 95 

420 cudaD3D10ResourceSetMapFlags = 96 

421 cudaD3D10ResourceGetSurfaceDimensions = 97 

422 cudaD3D10ResourceGetMappedArray = 98 

423 cudaD3D10ResourceGetMappedPointer = 99 

424 cudaD3D10ResourceGetMappedSize = 100 

425 cudaD3D10ResourceGetMappedPitch = 101 

426 cudaD3D9GetDevice = 102 

427 cudaD3D9GetDevices = 103 

428 cudaD3D9SetDirect3DDevice = 104 

429 cudaD3D9GetDirect3DDevice = 105 

430 cudaGraphicsD3D9RegisterResource = 106 

431 cudaD3D9RegisterResource = 107 

432 cudaD3D9UnregisterResource = 108 

433 cudaD3D9MapResources = 109 

434 cudaD3D9UnmapResources = 110 

435 cudaD3D9ResourceSetMapFlags = 111 

436 cudaD3D9ResourceGetSurfaceDimensions = 112 

437 cudaD3D9ResourceGetMappedArray = 113 

438 cudaD3D9ResourceGetMappedPointer = 114 

439 cudaD3D9ResourceGetMappedSize = 115 

440 cudaD3D9ResourceGetMappedPitch = 116 

441 cudaD3D9Begin = 117 

442 cudaD3D9End = 118 

443 cudaD3D9RegisterVertexBuffer = 119 

444 cudaD3D9UnregisterVertexBuffer = 120 

445 cudaD3D9MapVertexBuffer = 121 

446 cudaD3D9UnmapVertexBuffer = 122 

447 cudaThreadExit = 123 

448 cudaSetDoubleForDevice = 124 

449 cudaSetDoubleForHost = 125 

450 cudaThreadSynchronize = 126 

451 cudaThreadGetLimit = 127 

452 cudaThreadSetLimit = 128 

453 cudaStreamCreate = 129 

454 cudaStreamDestroy = 130 

455 cudaStreamSynchronize = 131 

456 cudaStreamQuery = 132 

457 cudaEventCreate = 133 

458 cudaEventCreateWithFlags = 134 

459 cudaEventRecord = 135 

460 cudaEventDestroy = 136 

461 cudaEventSynchronize = 137 

462 cudaEventQuery = 138 

463 cudaEventElapsedTime = 139 

464 cudaMalloc3D = 140 

465 cudaMalloc3DArray = 141 

466 cudaMemset3D = 142 

467 cudaMemset3DAsync = 143 

468 cudaMemcpy3D = 144 

469 cudaMemcpy3DAsync = 145 

470 cudaThreadSetCacheConfig = 146 

471 cudaStreamWaitEvent = 147 

472 cudaD3D11GetDirect3DDevice = 148 

473 cudaD3D10GetDirect3DDevice = 149 

474 cudaThreadGetCacheConfig = 150 

475 cudaPointerGetAttributes = 151 

476 cudaHostRegister = 152 

477 cudaHostUnregister = 153 

478 cudaDeviceCanAccessPeer = 154 

479 cudaDeviceEnablePeerAccess = 155 

480 cudaDeviceDisablePeerAccess = 156 

481 cudaPeerRegister = 157 

482 cudaPeerUnregister = 158 

483 cudaPeerGetDevicePointer = 159 

484 cudaMemcpyPeer = 160 

485 cudaMemcpyPeerAsync = 161 

486 cudaMemcpy3DPeer = 162 

487 cudaMemcpy3DPeerAsync = 163 

488 cudaDeviceReset = 164 

489 cudaDeviceSynchronize = 165 

490 cudaDeviceGetLimit = 166 

491 cudaDeviceSetLimit = 167 

492 cudaDeviceGetCacheConfig = 168 

493 cudaDeviceSetCacheConfig = 169 

494 cudaProfilerInitialize = 170 

495 cudaProfilerStart = 171 

496 cudaProfilerStop = 172 

497 cudaDeviceGetByPCIBusId = 173 

498 cudaDeviceGetPCIBusId = 174 

499 cudaGLGetDevices = 175 

500 cudaIpcGetEventHandle = 176 

501 cudaIpcOpenEventHandle = 177 

502 cudaIpcGetMemHandle = 178 

503 cudaIpcOpenMemHandle = 179 

504 cudaIpcCloseMemHandle = 180 

505 cudaArrayGetInfo = 181 

506 cudaFuncSetSharedMemConfig = 182 

507 cudaDeviceGetSharedMemConfig = 183 

508 cudaDeviceSetSharedMemConfig = 184 

509 cudaCreateTextureObject = 185 

510 cudaDestroyTextureObject = 186 

511 cudaGetTextureObjectResourceDesc = 187 

512 cudaGetTextureObjectTextureDesc = 188 

513 cudaCreateSurfaceObject = 189 

514 cudaDestroySurfaceObject = 190 

515 cudaGetSurfaceObjectResourceDesc = 191 

516 cudaMallocMipmappedArray = 192 

517 cudaGetMipmappedArrayLevel = 193 

518 cudaFreeMipmappedArray = 194 

519 cudaBindTextureToMipmappedArray = 195 

520 cudaGraphicsResourceGetMappedMipmappedArray = 196 

521 cudaStreamAddCallback = 197 

522 cudaStreamCreateWithFlags = 198 

523 cudaGetTextureObjectResourceViewDesc = 199 

524 cudaDeviceGetAttribute = 200 

525 cudaStreamDestroy_v5050 = 201 

526 cudaStreamCreateWithPriority = 202 

527 cudaStreamGetPriority = 203 

528 cudaStreamGetFlags = 204 

529 cudaDeviceGetStreamPriorityRange = 205 

530 cudaMallocManaged = 206 

531 cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6000 = 207 

532 cudaStreamAttachMemAsync = 208 

533 cudaGetErrorName = 209 

534 cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050 = 210 

535 cudaLaunchKernel = 211 

536 cudaGetDeviceFlags = 212 

537 cudaLaunch_ptsz = 213 

538 cudaLaunchKernel_ptsz = 214 

539 cudaMemcpy_ptds = 215 

540 cudaMemcpy2D_ptds = 216 

541 cudaMemcpyToArray_ptds = 217 

542 cudaMemcpy2DToArray_ptds = 218 

543 cudaMemcpyFromArray_ptds = 219 

544 cudaMemcpy2DFromArray_ptds = 220 

545 cudaMemcpyArrayToArray_ptds = 221 

546 cudaMemcpy2DArrayToArray_ptds = 222 

547 cudaMemcpyToSymbol_ptds = 223 

548 cudaMemcpyFromSymbol_ptds = 224 

549 cudaMemcpyAsync_ptsz = 225 

550 cudaMemcpyToArrayAsync_ptsz = 226 

551 cudaMemcpyFromArrayAsync_ptsz = 227 

552 cudaMemcpy2DAsync_ptsz = 228 

553 cudaMemcpy2DToArrayAsync_ptsz = 229 

554 cudaMemcpy2DFromArrayAsync_ptsz = 230 

555 cudaMemcpyToSymbolAsync_ptsz = 231 

556 cudaMemcpyFromSymbolAsync_ptsz = 232 

557 cudaMemset_ptds = 233 

558 cudaMemset2D_ptds = 234 

559 cudaMemsetAsync_ptsz = 235 

560 cudaMemset2DAsync_ptsz = 236 

561 cudaStreamGetPriority_ptsz = 237 

562 cudaStreamGetFlags_ptsz = 238 

563 cudaStreamSynchronize_ptsz = 239 

564 cudaStreamQuery_ptsz = 240 

565 cudaStreamAttachMemAsync_ptsz = 241 

566 cudaEventRecord_ptsz = 242 

567 cudaMemset3D_ptds = 243 

568 cudaMemset3DAsync_ptsz = 244 

569 cudaMemcpy3D_ptds = 245 

570 cudaMemcpy3DAsync_ptsz = 246 

571 cudaStreamWaitEvent_ptsz = 247 

572 cudaStreamAddCallback_ptsz = 248 

573 cudaMemcpy3DPeer_ptds = 249 

574 cudaMemcpy3DPeerAsync_ptsz = 250 

575 cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = 251 

576 cudaMemPrefetchAsync = 252 

577 cudaMemPrefetchAsync_ptsz = 253 

578 cudaMemAdvise = 254 

579 cudaDeviceGetP2PAttribute = 255 

580 cudaGraphicsEGLRegisterImage = 256 

581 cudaEGLStreamConsumerConnect = 257 

582 cudaEGLStreamConsumerDisconnect = 258 

583 cudaEGLStreamConsumerAcquireFrame = 259 

584 cudaEGLStreamConsumerReleaseFrame = 260 

585 cudaEGLStreamProducerConnect = 261 

586 cudaEGLStreamProducerDisconnect = 262 

587 cudaEGLStreamProducerPresentFrame = 263 

588 cudaEGLStreamProducerReturnFrame = 264 

589 cudaGraphicsResourceGetMappedEglFrame = 265 

590 cudaMemRangeGetAttribute = 266 

591 cudaMemRangeGetAttributes = 267 

592 cudaEGLStreamConsumerConnectWithFlags = 268 

593 cudaLaunchCooperativeKernel = 269 

594 cudaLaunchCooperativeKernel_ptsz = 270 

595 cudaEventCreateFromEGLSync = 271 

596 cudaLaunchCooperativeKernelMultiDevice = 272 

597 cudaFuncSetAttribute = 273 

598 cudaImportExternalMemory = 274 

599 cudaExternalMemoryGetMappedBuffer = 275 

600 cudaExternalMemoryGetMappedMipmappedArray = 276 

601 cudaDestroyExternalMemory = 277 

602 cudaImportExternalSemaphore = 278 

603 cudaSignalExternalSemaphoresAsync = 279 

604 cudaSignalExternalSemaphoresAsync_ptsz = 280 

605 cudaWaitExternalSemaphoresAsync = 281 

606 cudaWaitExternalSemaphoresAsync_ptsz = 282 

607 cudaDestroyExternalSemaphore = 283 

608 cudaLaunchHostFunc = 284 

609 cudaLaunchHostFunc_ptsz = 285 

610 cudaGraphCreate = 286 

611 cudaGraphKernelNodeGetParams = 287 

612 cudaGraphKernelNodeSetParams = 288 

613 cudaGraphAddKernelNode = 289 

614 cudaGraphAddMemcpyNode = 290 

615 cudaGraphMemcpyNodeGetParams = 291 

616 cudaGraphMemcpyNodeSetParams = 292 

617 cudaGraphAddMemsetNode = 293 

618 cudaGraphMemsetNodeGetParams = 294 

619 cudaGraphMemsetNodeSetParams = 295 

620 cudaGraphAddHostNode = 296 

621 cudaGraphHostNodeGetParams = 297 

622 cudaGraphAddChildGraphNode = 298 

623 cudaGraphChildGraphNodeGetGraph = 299 

624 cudaGraphAddEmptyNode = 300 

625 cudaGraphClone = 301 

626 cudaGraphNodeFindInClone = 302 

627 cudaGraphNodeGetType = 303 

628 cudaGraphGetRootNodes = 304 

629 cudaGraphNodeGetDependencies = 305 

630 cudaGraphNodeGetDependentNodes = 306 

631 cudaGraphAddDependencies = 307 

632 cudaGraphRemoveDependencies = 308 

633 cudaGraphDestroyNode = 309 

634 cudaGraphInstantiate = 310 

635 cudaGraphLaunch = 311 

636 cudaGraphLaunch_ptsz = 312 

637 cudaGraphExecDestroy = 313 

638 cudaGraphDestroy = 314 

639 cudaStreamBeginCapture = 315 

640 cudaStreamBeginCapture_ptsz = 316 

641 cudaStreamIsCapturing = 317 

642 cudaStreamIsCapturing_ptsz = 318 

643 cudaStreamEndCapture = 319 

644 cudaStreamEndCapture_ptsz = 320 

645 cudaGraphHostNodeSetParams = 321 

646 cudaGraphGetNodes = 322 

647 cudaGraphGetEdges = 323 

648 cudaStreamGetCaptureInfo = 324 

649 cudaStreamGetCaptureInfo_ptsz = 325 

650 cudaGraphExecKernelNodeSetParams = 326 

651 cudaThreadExchangeStreamCaptureMode = 327 

652 cudaDeviceGetNvSciSyncAttributes = 328 

653 cudaOccupancyAvailableDynamicSMemPerBlock = 329 

654 cudaStreamSetFlags = 330 

655 cudaStreamSetFlags_ptsz = 331 

656 cudaGraphExecMemcpyNodeSetParams = 332 

657 cudaGraphExecMemsetNodeSetParams = 333 

658 cudaGraphExecHostNodeSetParams = 334 

659 cudaGraphExecUpdate = 335 

660 SIZE = 336 

661 FORCE_INT = 0x7FFFFFFF 

662 

663 

664def _sizeof_fmt(num, suffix='B'): 

665 """Format size with metric units (like nvvp)""" 

666 for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: 

667 if abs(num) < 1000.0: 

668 return "%3.1f%s%s" % (num, unit, suffix) 

669 num /= 1000.0 # pragma: no cover 

670 return "%.1f%s%s" % (num, 'Y', suffix) # pragma: no cover 

671 

672 

673def json_to_dataframe(js): 

674 """ 

675 Converts a json dump obtained with function 

676 @see fn convert_trace_to_json 

677 to a dataframe. 

678 

679 :param js: a filename, a json string, a stream containing json 

680 :return: a dataframe 

681 """ 

682 if isinstance(js, str) and os.path.exists(js): 

683 if len(js) < 5000: 

684 df = pandas.read_json(js) 

685 else: # pragma: no cover 

686 st = io.StringIO(js) 

687 df = pandas.read_json(st) 

688 else: 

689 df = pandas.read_json(js) 

690 

691 df['ts_sec'] = df['ts'].apply(lambda t: t / 1e9) 

692 return df 

693 

694 

695def json_to_dataframe_streaming(js, chunksize=100000, flatten=False, **kwargs): 

696 """ 

697 Converts a big json dump (from @see fn convert_trace_to_json) 

698 to a dataframe. The function processes the data by streaming to avoid 

699 loading huge data in memory. 

700 Returns an iterator on dataframes. 

701 The function relies on :epkg:`pandas_streaming`. 

702 

703 :param js: a filename, a json string, a stream containing json 

704 :param chunksize: 

705 see :func:`pandas_streaming.df.StreamingDataFrame.read_json` 

706 :param flatten: 

707 see :func:`pandas_streaming.df.StreamingDataFrame.read_json` 

708 :param kwargs: 

709 see :func:`pandas_streaming.df.StreamingDataFrame.read_json` 

710 :return: a dataframe 

711 """ 

712 from pandas_streaming.df import StreamingDataFrame # pylint: disable=C0415 

713 if isinstance(js, str): 

714 if len(js) < 5000 and os.path.exists(js): 

715 sdf = StreamingDataFrame.read_json(js) 

716 else: 

717 raise RuntimeError( 

718 "Use a stream or function json_to_dataframe instead of " 

719 "the streaming version.") 

720 else: 

721 sdf = StreamingDataFrame.read_json(js) 

722 

723 sdf['ts_sec'] = sdf['ts'].apply(lambda t: t / 1e9) 

724 return sdf