Generating SQLite file /root/arle-decode-hidden-scratch/docs/trace-artifacts/2026-05-15-dsv4-deepep/nsys-single-decode-token-expert-grouped/trace.unpacked.sqlite from /root/arle-decode-hidden-scratch/docs/trace-artifacts/2026-05-15-dsv4-deepep/nsys-single-decode-token-expert-grouped/trace.unpacked.nsys-rep

Exporting 545680 events: [1%                                                   ]
Exporting 545680 events: [2%                                                   ]
Exporting 545680 events: [3%                                                   ]
Exporting 545680 events: [4%                                                   ]
Exporting 545680 events: [5%                                                   ]
Exporting 545680 events: [=6%                                                  ]
Exporting 545680 events: [=7%                                                  ]
Exporting 545680 events: [==8%                                                 ]
Exporting 545680 events: [==9%                                                 ]
Exporting 545680 events: [==10%                                                ]
Exporting 545680 events: [==11%                                                ]
Exporting 545680 events: [===12%                                               ]
Exporting 545680 events: [===13%                                               ]
Exporting 545680 events: [====14%                                              ]
Exporting 545680 events: [====15%                                              ]
Exporting 545680 events: [=====16%                                             ]
Exporting 545680 events: [======17%                                            ]
Exporting 545680 events: [======18%                                            ]
Exporting 545680 events: [=======19%                                           ]
Exporting 545680 events: [=======20%                                           ]
Exporting 545680 events: [========21%                                          ]
Exporting 545680 events: [========22%                                          ]
Exporting 545680 events: [=========23%                                         ]
Exporting 545680 events: [=========24%                                         ]
Exporting 545680 events: [==========25%                                        ]
Exporting 545680 events: [==========26%                                        ]
Exporting 545680 events: [===========27%                                       ]
Exporting 545680 events: [===========28%                                       ]
Exporting 545680 events: [============29%                                      ]
Exporting 545680 events: [============30%                                      ]
Exporting 545680 events: [=============31%                                     ]
Exporting 545680 events: [=============32%                                     ]
Exporting 545680 events: [==============33%                                    ]
Exporting 545680 events: [===============34%                                   ]
Exporting 545680 events: [===============35%                                   ]
Exporting 545680 events: [================36%                                  ]
Exporting 545680 events: [================37%                                  ]
Exporting 545680 events: [=================38%                                 ]
Exporting 545680 events: [=================39%                                 ]
Exporting 545680 events: [==================40%                                ]
Exporting 545680 events: [==================41%                                ]
Exporting 545680 events: [===================42%                               ]
Exporting 545680 events: [===================43%                               ]
Exporting 545680 events: [====================44%                              ]
Exporting 545680 events: [====================45%                              ]
Exporting 545680 events: [=====================46%                             ]
Exporting 545680 events: [=====================47%                             ]
Exporting 545680 events: [======================48%                            ]
Exporting 545680 events: [======================49%                            ]
Exporting 545680 events: [=======================50%                           ]
Exporting 545680 events: [========================51%                          ]
Exporting 545680 events: [========================52%                          ]
Exporting 545680 events: [=========================53%                         ]
Exporting 545680 events: [=========================54%                         ]
Exporting 545680 events: [==========================55%                        ]
Exporting 545680 events: [==========================56%                        ]
Exporting 545680 events: [===========================57%                       ]
Exporting 545680 events: [===========================58%                       ]
Exporting 545680 events: [============================59%                      ]
Exporting 545680 events: [============================60%                      ]
Exporting 545680 events: [=============================61%                     ]
Exporting 545680 events: [=============================62%                     ]
Exporting 545680 events: [==============================63%                    ]
Exporting 545680 events: [==============================64%                    ]
Exporting 545680 events: [===============================65%                   ]
Exporting 545680 events: [===============================66%                   ]
Exporting 545680 events: [================================67%                  ]
Exporting 545680 events: [=================================68%                 ]
Exporting 545680 events: [=================================69%                 ]
Exporting 545680 events: [==================================70%                ]
Exporting 545680 events: [==================================71%                ]
Exporting 545680 events: [===================================72%               ]
Exporting 545680 events: [===================================73%               ]
Exporting 545680 events: [====================================74%              ]
Exporting 545680 events: [====================================75%              ]
Exporting 545680 events: [=====================================76%             ]
Exporting 545680 events: [=====================================77%             ]
Exporting 545680 events: [======================================78%            ]
Exporting 545680 events: [======================================79%            ]
Exporting 545680 events: [=======================================80%           ]
Exporting 545680 events: [=======================================81%           ]
Exporting 545680 events: [========================================82%          ]
Exporting 545680 events: [========================================83%          ]
Exporting 545680 events: [=========================================84%         ]
Exporting 545680 events: [==========================================85%        ]
Exporting 545680 events: [==========================================86%        ]
Exporting 545680 events: [===========================================87%       ]
Exporting 545680 events: [===========================================88%       ]
Exporting 545680 events: [============================================89%      ]
Exporting 545680 events: [============================================90%      ]
Exporting 545680 events: [=============================================91%     ]
Exporting 545680 events: [=============================================92%     ]
Exporting 545680 events: [==============================================93%    ]
Exporting 545680 events: [==============================================94%    ]
Exporting 545680 events: [===============================================95%   ]
Exporting 545680 events: [===============================================96%   ]
Exporting 545680 events: [================================================97%  ]
Exporting 545680 events: [================================================98%  ]
Exporting 545680 events: [=================================================99% ]
Exporting 545680 events: [=================================================100%]
Processing [/root/arle-decode-hidden-scratch/docs/trace-artifacts/2026-05-15-dsv4-deepep/nsys-single-decode-token-expert-grouped/trace.unpacked.sqlite] with [/opt/nvidia/nsight-systems/2023.2.3/host-linux-x64/reports/cuda_api_sum.py]... 

 ** CUDA API Summary (cuda_api_sum):

 Time (%)  Total Time (ns)  Num Calls   Avg (ns)     Med (ns)    Min (ns)    Max (ns)   StdDev (ns)                 Name               
 --------  ---------------  ---------  -----------  -----------  ---------  ----------  -----------  ----------------------------------
     55.2    4,239,369,268      1,044  4,060,698.5    864,169.0     12,161  16,451,890  5,380,234.3  cuMemcpyDtoHAsync_v2              
     17.7    1,356,913,058     39,624     34,244.7      2,416.0        146   1,053,060     74,029.2  cuMemFreeAsync                    
     13.3    1,025,553,763     39,624     25,882.1      7,458.0        118     487,746     37,649.0  cuMemAllocAsync                   
      5.2      397,682,162     33,240     11,964.0      8,678.0      2,820     128,761      8,494.8  cudaLaunchKernel                  
      3.3      252,977,038     26,234      9,643.1      6,312.5        124     304,298      8,352.8  cuMemsetD8Async                   
      1.9      146,590,796         16  9,161,924.8  9,246,335.5  1,446,126  16,992,235  7,800,582.6  cuStreamSynchronize               
      0.9       71,332,929      5,754     12,397.1      7,225.5      1,806      74,951     11,043.2  cuMemcpyHtoDAsync_v2              
      0.6       46,315,482      6,208      7,460.6      3,976.5        490      65,704      8,240.5  cudaEventRecord                   
      0.5       40,021,120      5,504      7,271.3      3,682.5        555      91,970      8,212.8  cudaStreamWaitEvent               
      0.4       33,077,664      2,752     12,019.5      8,763.5      3,505     100,247      8,420.0  cuLaunchKernelEx                  
      0.4       30,541,251      1,740     17,552.4     13,841.0      4,756      74,322     11,507.1  cuMemcpyDtoDAsync_v2              
      0.3       26,844,285      6,272      4,280.0      1,797.0        242      98,772      5,932.4  cudaStreamGetCaptureInfo_v2_v11030
      0.1        8,967,672      2,752      3,258.6        846.5        141      74,170      5,372.3  cudaGetFuncBySymbol_v11000        
      0.1        4,630,980        344     13,462.2     10,036.5      4,550      48,224      7,980.3  cudaMemsetAsync                   
      0.0        1,469,064          8    183,633.0    176,977.0     77,342     269,061     69,035.5  cuMemGetInfo_v2                   
      0.0           30,617          1     30,617.0     30,617.0     30,617      30,617          0.0  cuCtxSynchronize                  
      0.0            4,777          1      4,777.0      4,777.0      4,777       4,777          0.0  cuProfilerStart                   
      0.0            2,741          1      2,741.0      2,741.0      2,741       2,741          0.0  cuCtxSetCurrent                   
      0.0            1,191          1      1,191.0      1,191.0      1,191       1,191          0.0  cuProfilerStop                    

