
    3KiT                        S SK r S SKrS SKrS SKJr  S SKJr  S SKJrJ	r	J
r
  S SKJr  S SKJr  \
(       a  S SKJr  S S	4S
\4S jjr\ R&                  " \S SS9r\ R&                  " \S S	S9r\ " S S5      5       r\ " S S5      5       r " S S5      r " S S5      rS"S jrS S S4S jrS rS#S jr\ " S S5      5       r\ " S S 5      5       rS! r g)$    N)deque)	dataclass)AnyLiteralTYPE_CHECKINGprofile)
DeviceType)_KinetoEventc                     U R                   $ N)childrenxs    W/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/torch/profiler/_utils.py<lambda>r      s    1::    Freversec              #      #    U(       a  [         OS n[        U" U 5      5      nU(       a<  U" U5      nUv   U" U" U5      5       H  nUR                  U5        M     U(       a  M;  g g 7f)Nc                     U $ r    r   s    r   r   _traverse.<locals>.<lambda>   s    qr   )reversedr   append)treenext_fnchildren_fnr   order	remaining
curr_eventchild_events           r   	_traverser"      sX     H[EeDk"I
Y'
 Z!89K[) : )s   A"A(&A(c                 "    U R                  5       $ r   )popr   s    r   r   r      s
    aeegr   T)r   r   c                 "    U R                  5       $ r   )popleftr   s    r   r   r      s
    r   c                   ^    \ rS rSr% Sr\\S'   Sr\\S'   Sr\\S'   Sr	\\S'   \
S 5       rSrg	)
EventMetrics!   r   duration_time_nsself_time_nsidle_time_nsqueue_depthc                 V    U R                   S:X  a  gU R                  U R                   -  $ )Nr   g        )r*   r,   selfs    r   fraction_idle_timeEventMetrics.fraction_idle_time(   s*      A%  4#8#888r   r   N)__name__
__module____qualname____firstlineno__r*   int__annotations__r+   r,   r-   propertyr1   __static_attributes__r   r   r   r(   r(   !   s=    cL#L#K9 9r   r(   c                   8    \ rS rSr% \\S'   \\S'   Sr\\S'   Srg)Interval/   startendr   r-   r   N)r3   r4   r5   r6   r7   r8   r-   r:   r   r   r   r<   r<   /   s    J	HKr   r<   c                   L    \ rS rSrSS jrS rS rS\4S jrS\	\
   4S	 jrS
rg)EventKey6   returnNc                     Xl         g r   event)r0   rF   s     r   __init__EventKey.__init__7   s    
r   c                 @    [        U R                  R                  5      $ r   )hashrF   idr/   s    r   __hash__EventKey.__hash__:   s    DJJMM""r   c                 \    U R                   R                  UR                   R                  :H  $ r   )rF   rK   )r0   others     r   __eq__EventKey.__eq__=   s    zz}}..r   c                 0    U R                   R                   $ r   )rF   namer/   s    r   __repr__EventKey.__repr__@   s    **//"#r   	intervalsc                    Sn[        US S9nU(       af  [        U R                  R                  US   R                  5      n[        U R                  R                  US   R                  5      nX4:  a  X$U-
  -  nSu  pVU[        U5      :  a  X   nX   nUS-  nUR                  UR                  :  a4  UR                  UR                  :  a  US-  nMW  UR                  Ul        Un[        U R                  R                  UR                  5      n[        U R                  R                  UR                  5      nX4:  a  X$U-
  -  nU[        U5      :  a  M  U$ )Nr   c                     U R                   $ r   r>   r   s    r   r   ,EventKey.intervals_overlap.<locals>.<lambda>E   s    AGGr   key)r      r]   )	sortedmaxrF   start_time_nsr>   minend_time_nsr?   len)	r0   rV   overlap_timeoverlap_startoverlap_endijprev_intervalcurr_intervals	            r   intervals_overlapEventKey.intervals_overlapC   s<   9*;<	

 8 8)A,:L:LMMdjj44il6F6FGK*m ;;#i. %LM%LMFA  =#6#66 $$}'8'88FA*7*;*;M'A

 8 8-:M:MNMdjj44m6G6GHK*m ;;! #i. $ r   rE   rC   N)r3   r4   r5   r6   rG   rL   rP   strrT   listr<   rk   r:   r   r   r   rA   rA   6   s-    #/$# $4> r   rA   c                   \    \ rS rSrS\SS4S jrSS jrS rSS jrS	 r	SS
\
S\4S jjrSrg)BasicEvaluationd   profrC   Nc                 F   Xl         0 U l        U R                  5         [        U R                  R	                  5       S S9U l        U R
                   Vs/ s H  o"R                  PM     snU l        / U l        U R                  5       U l
        U R                  5         g s  snf )Nc                 .    U R                   R                  $ r   )rF   r`   r   s    r   r   *BasicEvaluation.__init__.<locals>.<lambda>j   s    qww/D/Dr   r[   )r	   metricscompute_self_timer^   keys
event_keysrF   eventscuda_eventscompute_queue_depthqueue_depth_listcompute_idle_time)r0   rs   es      r   rG   BasicEvaluation.__init__e   s    57  LL%D
 )-81ww8/1 $ 8 8 :  9s   Bc                 N   U R                   R                  c   e[        U R                   R                  R                  5       5      nU(       a  UR	                  5       nUR
                  nUR                   H"  nX4R
                  -  nUR                  U5        M$     [        U5      U R                  ;  d!   SUR                   SUR                   35       e[        US9U R                  [        U5      '   UR
                  U R                  [        U5         l        U(       a  M  gg)z=
Computes event's self time(total time - time in child ops).
NzDuplicate id: z, )r+   )r	   kineto_resultsr   experimental_event_treer$   r*   r   r   rA   rw   rK   rS   r(   )r0   stackr    	self_timer!   s        r   rx   !BasicEvaluation.compute_self_timeq   s     ||**666dll11IIKL J"33I)22999	[)  3 J't||;  r*//1BC; 2>91UDLL*-. ",!<!< LL$ er   c                 
  ^^^ U R                   R                  c   eU R                   R                  R                  5       nS mS m[        U4S jU 5       S S9n[        U4S jU 5       S S9n[        X#-   S S9U l        0 nS	nU H  m[        UU4S
 jUS9nXdT'   Ub  UOUnM     S	nSnX#-   U R                  -   n	S n
/ nU	R                  U
S9  U	 GH  n[        US5      (       aE  UR                  5       S-  nUR                  5       UR                  5       -   S-  nX;   a	  XL   b  XL   n[        US5      (       a@  UR                  5       nUR                  5       UR                  5       -   nX;   a	  XL   b  XL   nO)[        US5      (       a  UR                  nUR                  nU[        U5      :  aB  X7   R                  5       W::  a,  US-  nU[        U5      :  a  X7   R                  5       U::  a  M,  X-
  S-   n[        US	5      n[        US5      (       d  [        US5      (       a  UR!                  [#        WWU5      5        GMy  [        US5      (       d  GM  XR$                  ['        U5         l        GM     U$ )z
Computes queue_depth at each event. This will calculate the queue depth data for
All the events in the tree.
This will return a list of Interval of queue depth data of cuda launch and kernels.
c                 b   ^ 1 Skn[        [        U SU 5      5      m[        U4S jU 5       5      $ )z+Check if the event is a CUDA launch kernel.>   cudaLaunchKernel__cudaLaunchKernelcudaLaunchKernelExCcudaLaunchCooperativeKernel&cudaLaunchCooperativeKernelMultiDevicerS   c              3   F   >#    U  H  nTR                  U5      v   M     g 7fr   )
startswith.0patternrS   s     r   	<genexpr>UBasicEvaluation.compute_queue_depth.<locals>.is_cuda_launch_kernel.<locals>.<genexpr>   s     OGtw//s   !)rn   getattrany)r   launch_patternsrS   s     @r   is_cuda_launch_kernelBBasicEvaluation.compute_queue_depth.<locals>.is_cuda_launch_kernel   s0    O wq&!,-DOOOOr   c                    ^ U R                  5       [        R                  :w  a  g[        [	        U SU 5      5      R                  5       m1 Skn[        U4S jU 5       5      (       + $ )z,Check if the event is a CUDA runtime kernel.FrS   >   cpymemfreeallocc              3   ,   >#    U  H	  oT;   v   M     g 7fr   r   r   s     r   r   NBasicEvaluation.compute_queue_depth.<locals>.is_cuda_kernel.<locals>.<genexpr>   s     K:Jwd?:Js   )device_typer
   CUDArn   r   lowerr   )r   exclude_patternsrS   s     @r   is_cuda_kernel;BasicEvaluation.compute_queue_depth.<locals>.is_cuda_kernel   sS     }}*//1wq&!,-335D  ?K:JKKKKr   c              3   F   >#    U  H  nT" U5      (       d  M  Uv   M     g 7fr   r   )r   r   r   s     r   r   6BasicEvaluation.compute_queue_depth.<locals>.<genexpr>   s     D1+@+CQQ   !	!c                 "    U R                  5       $ r   start_nsr   s    r   r   5BasicEvaluation.compute_queue_depth.<locals>.<lambda>   
    !**,r   r[   c              3   F   >#    U  H  nT" U5      (       d  M  Uv   M     g 7fr   r   )r   r   r   s     r   r   r      s     =1>!+<QQr   c                 "    U R                  5       $ r   r   r   s    r   r   r      r   r   c                 "    U R                  5       $ r   r   r   s    r   r   r      s
    1::<r   r   c                 F   > U R                  5       TR                  5       :H  $ r   )linked_correlation_id)r   cuda_launch_events    r   r   r      s    !113$::<=r   rY   c                     [        U S5      (       a  U R                  5       S-  $ [        U S5      (       a  U R                  5       $ [        U S5      (       a  U R                  $ [	        S5      e)Nstart_us  r   r`   zUnknown Event Type)hasattrr   r   r`   	ExceptionrE   s    r   new_old_event_comparatorEBasicEvaluation.compute_queue_depth.<locals>.new_old_event_comparator   s`    uj))~~'$..uj))~~''uo..***011r   r   r   r   r`   r]   )r	   r   r{   r^   r|   index_of_first_matchsortr   r   duration_usr   duration_nsr`   rb   rc   r_   r   r<   rw   rA   r-   )r0   cuda_event_listcuda_launch_eventscuda_kernel_eventskernel_mappinglast_mapped_kernelindexcurrent_kernel_indexspawned_kernel_index
all_eventsr   r~   rF   
start_timeend_timecurrent_queue_depthr   r   r   s                   @@@r   r}   #BasicEvaluation.compute_queue_depth   s    ||**666,,55<<>
	P	L $DD&
 $==&

 "39O
 35!3("=(	E 16,-*/*;AS "4  !!'<t{{J
	2 ,.45Euj))"^^-4
!NN,u/@/@/BBdJ*~/D/P+9+@(uj))"^^-
 >>+e.?.?.AA*~/D/P+9+@(00"00
 ,, %s+='>>'=FFHZW$)$ %s+='>>'=FFHZW #7"MPQ"Q"%&91"=uj))WUJ-G-G ''Z3FG 00<OXe_-9A  D  r   c                    SnSn/ nU R                   (       a  U R                  (       aw  U[        U R                  S   R                  U R                   S   R                  5      [        U R                   S   R
                  U R                  S   R                  5      /-  nU R                    Hi  nUR                  S:X  a  U(       d  UR
                  nSnUR                  S:  d  M:  U(       d  MC  UR                  [        X$R                  5      5        SnMk     U R                   Vs/ s H  oUR                  PM     nnU H8  n[        U5      R                  U5      U R                  [        U5         l        M:     gs  snf )z$
Computes idle time of the profile.
Fr   r   TN)r~   r{   r<   r`   r>   r?   rb   r-   r   rw   rF   rA   rk   r,   )r0   idle
idle_startidle_intervals
data_pointr   
event_listrF   s           r   r   !BasicEvaluation.compute_idle_time   s9   
 
)+  T[[Q55t7L7LQ7O7U7UV..r266B8S8ST N
 //J%%*4'^^
%%)dd%%hz;K;K&LM 0 (,||4|!gg|
4E9A:/ LL%)6   5s   'E=c                   ^ SSK n[        [        U R                  5      5      nU Vs/ s H  oDR                  PM     nnSmSn/ nSnU[        U5      :  a  XX   T:  a  US-  nM  [        US-   [        U5      5       He  n	[        UU4S jU	S9n
[        XYU
S9nUc  M!  X[   U:  d  M+  UR                  [        X;   R                  X8   R                  5      5        U
b  U
OUn  O   US-  nU[        U5      :  a  M  U R                   Vs/ s H  nUR                  U5      (       d  M  UPM     nnU(       Ga  UR                  U Vs/ s H  oR                  U   R                  PM     snUR                   S9nUR                  U Vs/ s H  oR                  U   R"                  PM     snUR                   S9nXR%                  U5      -
  UR'                  U5      -  nXR%                  U5      -
  UR'                  U5      -  nUS	U-  -   n[)        [+        UUS
S9[,        R.                  " S5      S
S9 VVs/ s H  u  nnUPM
     nnnUSU nU$ s  snf s  snf s  snf s  snf s  snnf )z
Filter and Rank the events based on some heuristics:
1) Events that are in the falling phase of the queue depth.
2) Events that have a high idle_time, self_time difference.

Parameters:
    length: The number of events to return.
r   N   r]   c                    > U T:*  $ r   r   )r   bottom_threasholds    r   r   -BasicEvaluation.rank_events.<locals>.<lambda>.  s    .?)?r   rY   )r>   r?   )dtypeg333333?T)strict)r\   r   )torchro   r   r~   r-   rc   ranger   argmaxr   r<   r>   rw   rk   tensorr+   float32r1   meanstdr^   zipoperator
itemgetter)r0   lengthr   r~   r   	qd_valuestop_threasholddecrease_intervalrg   rh   next_minimum_idxpeak_idxrF   r   r   	idle_timenormalized_gainnormalized_selfheuristic_score_list_r   s                       @r   rank_eventsBasicEvaluation.rank_events  s    	)>)> ?@,<=,<q]],<	=#i. |//Q1q5#i.1 $8?q$  "):JK 'I,?>,Q%,, ,6<<>N>Q>W>W
 -=,H(aA! 2" FA+ #i. 2 
%&&'89 % 	 

 ?IJzee$11zJmm % I EOPZEe$77ZPmm % I  )::i+@@EIIiDXXO(::i+@@EIIiDXXO#2S?5J#J 
 !',jF ++A. !!HAu !   $GV,Js >:
 K Qs#   I
I(I"I"I7I$r   print_enablec                 @   U R                  U5      nU(       d  U$ U(       a  SOSnUSR                  U Vs/ s HA  nS SU S[        UR                  5       SU R                  U   R
                  S-  S	 S
S 3	PMC     sn5      -  nU(       a  [        U5        U$ s  snf )NzOptimizable events:
zNo events to optimize

zP--------------------------------------------------------------------------------z
Event:                z
Source code location: z
Percentage idle time: rr   z.2fz%
)r   joinsource_code_locationrF   rw   r1   print)r0   r   r   r   outputrF   s         r   get_optimizable_events&BasicEvaluation.get_optimizable_events[  s    %%f-
,6(<U$)) ( (E J g +EKK89 :||E*==CCH I	
	
 (	
 		
 &Ms   AB
)r|   rz   r{   rw   r	   r~   rm   )r]   T)r3   r4   r5   r6   r	   rG   rx   r}   r   r   r7   boolr   r:   r   r   r   rq   rq   d   sE    
!W 
! 
!=,m ^08GRS D  r   rq   c                     Ub  U[        U 5      :  a  [        U 5      n[        X#5       H  nU" X   5      (       d  M  Us  $    g r   )rc   r   )seq	predicater>   r?   rg   s        r   r   r   p  s@    
{cSXo#h5SVH  r   c                     U $ r   r   r   s    r   r   r   y  s    ar   c                 `    XU n [        U 5      S:X  a  g U R                  [        XS95      U-   $ )Nr   r[   )rc   r   r_   )r   r\   r>   r?   s       r   r   r   y  s2    
C.C
3x1}99S&'%//r   c                     U b>  [         R                  " SU R                  5      nUc  U R                  n M5  U R                  $ g)Nz
\.py\(.*\)zNo source code location found)researchrS   parent)rF   matchs     r   r   r     s:    

		-4=LLEzz*r   c                  T    SSK Jn   U " 5           S S S 5        g ! , (       d  f       g = f)Nr   r   )torch.autograd.profilerr	   r   s    r   _init_for_cuda_graphsr
    s    /	 
s   
'c                   t    \ rS rSr% Sr\\S'   \S   \S'   \S   S-  \S'   \\-  S-  \S	'   \	\\
4   \S
'   Srg)TimelineEventi  z-Represents an event in the profiler timeline.	timestamp)r>   r?   regular
event_typefilenamenodeNmarker_type
identifierrF   r   )r3   r4   r5   r6   __doc__r7   r8   r   rn   dictr   r:   r   r   r   r  r    sD    7N122+,t33c	D  S>r   r  c                   ^    \ rS rSr% Sr\S   \S'   \\-  \S'   \	S-  \S'   Sr
\S-  \S'   S	rg)
ContextStackEntryi  z5Represents a context (filename or node) in the stack.r  context_typer  Nmetadatatidr   )r3   r4   r5   r6   r  r   r8   rn   r7   r  r  r:   r   r   r   r  r    s3    ?,--c	TkCtr   r  c           
        ^ SSK Jn  U R                  S/ 5      n/ mS nU4S jnU H  nSU;  d  SU;  a  M  U" U5      (       aA  US   S	S
 nUR                  S5      (       a  U" SXe5        MG   [	        U5      nU" SWU5        M_  US   nTR                  [        USSSU5      5        M     TR                  S S9  / n	T GH  n
U
R                  =S:X  Gay    U
R                  c   eU
R                  S:X  a  [        U
R                  [        5      (       d   eUR                  U
R                  5      nU
R                  R                  S5      nU	R                  [        SU
R                  X5      5        M  U
R                  S:X  a  SnU
R                  R                  S5      n[!        U	5       H3  nUR"                  S:X  d  M  UR$                  U:X  d  M'  UR&                  n  O   U(       a^  UR                  S0 5      nU
R                  U;   a9  UU
R                     nU	R                  [        SU
R                  UU5      5        GM  GM  GM  GM  =S:X  at    [)        [+        U	5      S-
  SS5       HS  nU	U   nU
R                  UR"                  :X  d  M$  U
R                  UR                  :X  d  M@  U	R-                  U5          GM     GM	  S:X  d  GM  SnSnU
R                  R                  S5      n[!        U	5       Hr  nUR$                  U:X  d  M  UR"                  S:X  d  M'  UR&                  (       d  M:  UR&                  R                  SS5      nUR&                  R                  SS5      n  O   U(       d  U(       a:  U
R                  R/                  S0 5      nU(       a  UUS'   U(       a  UUS'   GM  GM  GM     g! [
         a     GNSf = f)aF  
Maps recorded profiler events to their corresponding fx nodes and adds stack traces.

Builds a timeline of all events (regular ops and FX markers for filenames/nodes),
sorts by timestamp, then processes chronologically while maintaining a context stack of active
filename/node scopes. Regular events are augmented with stack traces and node names from the
innermost active context. Runtime is O(n log n) for n events.

Args:
    traced_data: Json of profiler events from Chrome trace

Returns:
    Dict mapping recorded event names to their aten operations with added stack traces
r   )_FX_METADATA_REGISTRYtraceEventsc                     U R                  S5      S:H  =(       aI    U R                  SS5      R                  S5      =(       a!    U R                  SS5      R                  S5      $ )Ncatcpu_oprS    z## z ##)getr   endswithrE   s    r   is_fx_marker_eventLmap_recorded_events_to_aten_ops_with_stack_trace.<locals>.is_fx_marker_event  sT    IIe( 6		&"%0076		&"%..u5	
r   c           	         > US   nX2S   -   nTR                  [        USXU5      5        TR                  [        USXU5      5        g )Ntsdurr>   r?   )r   r  )r  r  rF   start_tsend_tsevent_timelines        r   append_fx_marker_eventPmap_recorded_events_to_aten_ops_with_stack_trace.<locals>.append_fx_marker_event  sR    ;%L((GZUK	
 	&%G	
r   r(  r)  rS      z.pyr  r  r  Nc                     U R                   $ r   )r  r   s    r   r   Bmap_recorded_events_to_aten_ops_with_stack_trace.<locals>.<lambda>  s    akkr   r[   r>   r  node_metadatar?   r]   r   stack_tracezNo model stack trace availabler"  args	node_name)torch.fx.tracebackr  r#  r$  r7   
ValueErrorr   r  r   r  r  r  
isinstancern   rF   r  r   r  r  r  r   rc   r$   
setdefault)traced_datar  trace_eventsr%  r-  rF   content
node_indexr*  context_stacktimeline_eventr  r  current_file_metadata	ctx_entryr3  	node_metarg   current_stack_tracecurrent_node_name	event_tidr5  r,  s                         @r   0map_recorded_events_to_aten_ops_with_stack_tracerG    s    9??="5L +-N

 uU 2e$$FmAb)G&&&z7B!$WJ 'vz5A T{H!!-)T4QV"WX' , 12 .0M )''%00<<<!--;%n&?&?EEEE4889R9RSH(..2259C!(()&(A(A8
 $//69,0)(..2259C%-m%<	%22j@ ) 44=4F4F1! &= -(=(A(A/SU(V)44E5B . 9 96I *00 1$*N,E,EyRU!"	 F - :0 s=1A5r2>A -a 0I&22i6L6LL*559M9MM%))!, ?  '+#$(!*0044U;	!)-!8I }}	1$11V;	@R@R@R2;2D2D2H2H -/O3/ 1:0B0B0F0Fvr0R- " "9 '*;)//::62FD*.A]+(,=[) )	 +<O )! " s   /O  
OO)r   Nrm   )!	functoolsr   r  collectionsr   dataclassesr   typingr   r   r   r	  r	   torch.profilerr
   torch.autogradr   r   r"   partialtraverse_dfstraverse_bfsr(   r<   rA   rq   r   r   r   r
  r  r  rG  r   r   r   <module>rQ     s	     	  ! . . + % + *>u * *   4EtT  ,e
 
9 
9 
9   + +\I IX  qd 0+      O>r   