
    9i_9                     f   S SK r S SKrS SKrS SKJr  S SKJr  S SKJr  S SK	J
r
  S SKJr  \(       a  S SKJr  S S	4S
\4S jjr\ R"                  " \S SS9r\ R"                  " \S S	S9r\ " S S5      5       r\ " S S5      5       r " S S5      r " S S5      rSS jrS S S4S jrS rSS jrg)    N)deque)	dataclass)TYPE_CHECKINGprofile)
DeviceType)_KinetoEventc                     U R                   $ N)childrenxs    U/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/torch/profiler/_utils.py<lambda>r      s    1::    Freversec              #      #    U(       a  [         OS n[        U" U 5      5      nU(       a<  U" U5      nUv   U" U" U5      5       H  nUR                  U5        M     U(       a  M;  g g 7f)Nc                     U $ r    r   s    r   r   _traverse.<locals>.<lambda>   s    qr   )reversedr   append)treenext_fnchildren_fnr   order	remaining
curr_eventchild_events           r   	_traverser       sX     H[EeDk"I
Y'
 Z!89K[) : )s   A"A(&A(c                 "    U R                  5       $ r   )popr   s    r   r   r      s
    aeegr   T)r   r   c                 "    U R                  5       $ r   )popleftr   s    r   r   r      s
    r   c                   ^    \ rS rSr% Sr\\S'   Sr\\S'   Sr\\S'   Sr	\\S'   \
S 5       rSrg	)
EventMetrics!   r   duration_time_nsself_time_nsidle_time_nsqueue_depthc                 V    U R                   S:X  a  gU R                  U R                   -  $ )Nr   g        )r(   r*   selfs    r   fraction_idle_timeEventMetrics.fraction_idle_time(   s*      A%  4#8#888r   r   N)__name__
__module____qualname____firstlineno__r(   int__annotations__r)   r*   r+   propertyr/   __static_attributes__r   r   r   r&   r&   !   s=    cL#L#K9 9r   r&   c                   8    \ rS rSr% \\S'   \\S'   Sr\\S'   Srg)Interval/   startendr   r+   r   N)r1   r2   r3   r4   r5   r6   r+   r8   r   r   r   r:   r:   /   s    J	HKr   r:   c                   L    \ rS rSrSS jrS rS rS\4S jrS\	\
   4S	 jrS
rg)EventKey6   returnNc                     Xl         g r   event)r.   rD   s     r   __init__EventKey.__init__7   s    
r   c                 @    [        U R                  R                  5      $ r   )hashrD   idr-   s    r   __hash__EventKey.__hash__:   s    DJJMM""r   c                 \    U R                   R                  UR                   R                  :H  $ r   )rD   rI   )r.   others     r   __eq__EventKey.__eq__=   s    zz}}..r   c                 0    U R                   R                   $ r   )rD   namer-   s    r   __repr__EventKey.__repr__@   s    **//"#r   	intervalsc                    Sn[        US S9nU(       af  [        U R                  R                  US   R                  5      n[        U R                  R                  US   R                  5      nX4:  a  X$U-
  -  nSu  pVU[        U5      :  a  X   nX   nUS-  nUR                  UR                  :  a4  UR                  UR                  :  a  US-  nMW  UR                  Ul        Un[        U R                  R                  UR                  5      n[        U R                  R                  UR                  5      nX4:  a  X$U-
  -  nU[        U5      :  a  M  U$ )Nr   c                     U R                   $ r   r<   r   s    r   r   ,EventKey.intervals_overlap.<locals>.<lambda>E   s    AGGr   key)r      r[   )	sortedmaxrD   start_time_nsr<   minend_time_nsr=   len)	r.   rT   overlap_timeoverlap_startoverlap_endijprev_intervalcurr_intervals	            r   intervals_overlapEventKey.intervals_overlapC   s<   9*;<	

 8 8)A,:L:LMMdjj44il6F6FGK*m ;;#i. %LM%LMFA  =#6#66 $$}'8'88FA*7*;*;M'A

 8 8-:M:MNMdjj44m6G6GHK*m ;;! #i. $ r   rC   rA   N)r1   r2   r3   r4   rE   rJ   rN   strrR   listr:   ri   r8   r   r   r   r?   r?   6   s-    #/$# $4> r   r?   c                   \    \ rS rSrS\SS4S jrSS jrS rSS jrS	 r	SS
\
S\4S jjrSrg)BasicEvaluationd   profrA   Nc                 T   Xl         0 U l        U R                  5         [        S U R                  R	                  5        5       S S9U l        U R
                   Vs/ s H  o"R                  PM     snU l        / U l        U R                  5       U l
        U R                  5         g s  snf )Nc              3   $   #    U  H  ov   M     g 7fr   r   ).0es     r   	<genexpr>+BasicEvaluation.__init__.<locals>.<genexpr>j   s     ,+1Q+s   c                 .    U R                   R                  $ r   )rD   r^   r   s    r   r   *BasicEvaluation.__init__.<locals>.<lambda>j   s    AGG<Q<Qr   rY   )r   metricscompute_self_timer\   keys
event_keysrD   eventscuda_eventscompute_queue_depthqueue_depth_listcompute_idle_time)r.   rq   ru   s      r   rE   BasicEvaluation.__init__e   s    57  ,))+,2Q
 )-81ww8/1 $ 8 8 :  9s   B%c                 N   U R                   R                  c   e[        U R                   R                  R                  5       5      nU(       a  UR	                  5       nUR
                  nUR                   H"  nX4R
                  -  nUR                  U5        M$     [        U5      U R                  ;  d!   SUR                   SUR                   35       e[        US9U R                  [        U5      '   UR
                  U R                  [        U5         l        U(       a  M  gg)z=
Computes event's self time(total time - time in child ops).
NzDuplicate id: z, )r)   )r   kineto_resultsr   experimental_event_treer"   r(   r   r   r?   rz   rI   rQ   r&   )r.   stackr   	self_timer   s        r   r{   !BasicEvaluation.compute_self_timeq   s     ||**666dll11IIKL J"33I)22999	[)  3 J't||;  r*//1BC; 2>91UDLL*-. ",!<!< LL$ er   c                 
  ^^^ U R                   R                  c   eU R                   R                  R                  5       nS mS m[        U4S jU 5       S S9n[        U4S jU 5       S S9n[        X#-   S S9U l        0 nS	nU H  m[        UU4S
 jUS9nXdT'   Ub  UOUnM     S	nSnX#-   U R                  -   n	S n
/ nU	R                  U
S9  U	 GH  n[        US5      (       aE  UR                  5       S-  nUR                  5       UR                  5       -   S-  nX;   a	  XL   b  XL   n[        US5      (       a@  UR                  5       nUR                  5       UR                  5       -   nX;   a	  XL   b  XL   nO)[        US5      (       a  UR                  nUR                  nU[        U5      :  aB  X7   R                  5       W::  a,  US-  nU[        U5      :  a  X7   R                  5       U::  a  M,  X-
  S-   n[        US	5      n[        US5      (       d  [        US5      (       a  UR!                  [#        WWU5      5        GMy  [        US5      (       d  GM  XR$                  ['        U5         l        GM     U$ )z
Computes queue_depth at each event. This will calculate the queue depth data for
All the events in the tree.
This will return a list of Interval of queue depth data of cuda launch and kernels.
c                 b   ^ 1 Skn[        [        U SU 5      5      m[        U4S jU 5       5      $ )z+Check if the event is a CUDA launch kernel.>   cudaLaunchKernel__cudaLaunchKernelcudaLaunchKernelExCcudaLaunchCooperativeKernel&cudaLaunchCooperativeKernelMultiDevicerQ   c              3   F   >#    U  H  nTR                  U5      v   M     g 7fr   )
startswithrt   patternrQ   s     r   rv   UBasicEvaluation.compute_queue_depth.<locals>.is_cuda_launch_kernel.<locals>.<genexpr>   s     OGtw//s   !)rl   getattrany)ru   launch_patternsrQ   s     @r   is_cuda_launch_kernelBBasicEvaluation.compute_queue_depth.<locals>.is_cuda_launch_kernel   s0    O wq&!,-DOOOOr   c                    ^ U R                  5       [        R                  :w  a  g[        [	        U SU 5      5      R                  5       m1 Skn[        U4S jU 5       5      (       + $ )z,Check if the event is a CUDA runtime kernel.FrQ   >   cpymemfreeallocc              3   ,   >#    U  H	  oT;   v   M     g 7fr   r   r   s     r   rv   NBasicEvaluation.compute_queue_depth.<locals>.is_cuda_kernel.<locals>.<genexpr>   s     K:Jwd?:Js   )device_typer   CUDArl   r   lowerr   )ru   exclude_patternsrQ   s     @r   is_cuda_kernel;BasicEvaluation.compute_queue_depth.<locals>.is_cuda_kernel   sS     }}*//1wq&!,-335D  ?K:JKKKKr   c              3   F   >#    U  H  nT" U5      (       d  M  Uv   M     g 7fr   r   )rt   ru   r   s     r   rv   6BasicEvaluation.compute_queue_depth.<locals>.<genexpr>   s     D1+@+CQQ   !	!c                 "    U R                  5       $ r   start_nsr   s    r   r   5BasicEvaluation.compute_queue_depth.<locals>.<lambda>   
    !**,r   rY   c              3   F   >#    U  H  nT" U5      (       d  M  Uv   M     g 7fr   r   )rt   ru   r   s     r   rv   r      s     =1>!+<QQr   c                 "    U R                  5       $ r   r   r   s    r   r   r      r   r   c                 "    U R                  5       $ r   r   r   s    r   r   r      s
    1::<r   r   c                 F   > U R                  5       TR                  5       :H  $ r   )linked_correlation_id)r   cuda_launch_events    r   r   r      s    !113$::<=r   rW   c                     [        U S5      (       a  U R                  5       S-  $ [        U S5      (       a  U R                  5       $ [        U S5      (       a  U R                  $ [	        S5      e)Nstart_us  r   r^   zUnknown Event Type)hasattrr   r   r^   	ExceptionrC   s    r   new_old_event_comparatorEBasicEvaluation.compute_queue_depth.<locals>.new_old_event_comparator   s`    uj))~~'$..uj))~~''uo..***011r   r   r   r   r^   r[   )r   r   r~   r\   r   index_of_first_matchsortr   r   duration_usr   duration_nsr^   r`   ra   r]   r   r:   rz   r?   r+   )r.   cuda_event_listcuda_launch_eventscuda_kernel_eventskernel_mappinglast_mapped_kernelindexcurrent_kernel_indexspawned_kernel_index
all_eventsr   r   rD   
start_timeend_timecurrent_queue_depthr   r   r   s                   @@@r   r   #BasicEvaluation.compute_queue_depth   s    ||**666,,55<<>
	P	L $DD&
 $==&

 "39O
 35!3("=(	E 16,-*/*;AS "4  !!'<t{{J
	2 ,.45Euj))"^^-4
!NN,u/@/@/BBdJ*~/D/P+9+@(uj))"^^-
 >>+e.?.?.AA*~/D/P+9+@(00"00
 ,, %s+='>>'=FFHZW$)$ %s+='>>'=FFHZW #7"MPQ"Q"%&91"=uj))WUJ-G-G ''Z3FG 00<OXe_-9?  B  r   c                     SnSn/ nU R                   (       a  U R                  (       aw  U[        U R                  S   R                  U R                   S   R                  5      [        U R                   S   R
                  U R                  S   R                  5      /-  nU R                    Hi  nUR                  S:X  a  U(       d  UR
                  nSnUR                  S:  d  M:  U(       d  MC  UR                  [        X$R                  5      5        SnMk     U R                  R                  5        Vs/ s H  oUR                  PM     nnU H8  n[        U5      R                  U5      U R                  [        U5         l        M:     gs  snf )z$
Computes idle time of the profile.
Fr   r   TN)r   r~   r:   r^   r<   r=   r`   r+   r   rz   r|   rD   r?   ri   r*   )r.   idle
idle_startidle_intervals
data_pointru   
event_listrD   s           r   r   !BasicEvaluation.compute_idle_time   sD   
 
)+  T[[Q55t7L7LQ7O7U7UV..r266B8S8ST N
 //J%%*4'^^
%%)dd%%hz;K;K&LM 0 (,||'8'8':;':!gg':
;E9A:/ LL%)6   <s   5Fc                   ^ SSK n[        [        U R                  5      5      nU Vs/ s H  oDR                  PM     nnSmSn/ nSnU[        U5      :  a  XX   T:  a  US-  nM  [        US-   [        U5      5       He  n	[        UU4S jU	S9n
[        XYU
S9nUc  M!  X[   U:  d  M+  UR                  [        X;   R                  X8   R                  5      5        U
b  U
OUn  O   US-  nU[        U5      :  a  M  U R                  R                  5        Vs/ s H  nUR                  U5      (       d  M  UPM     nnU(       Ga  UR                  U Vs/ s H  oR                  U   R                   PM     snUR"                  S9nUR                  U Vs/ s H  oR                  U   R$                  PM     snUR"                  S9nXR'                  U5      -
  UR)                  U5      -  nXR'                  U5      -
  UR)                  U5      -  nUS	U-  -   n[+        [-        UU5      [.        R0                  " S5      S
S9 VVs/ s H  u  nnUPM
     nnnUSU nU$ s  snf s  snf s  snf s  snf s  snnf )z
Filter and Rank the events based on some heuristics:
1) Events that are in the falling phase of the queue depth.
2) Events that have a high idle_time, self_time difference.

Parameters:
    length: The number of events to return.
r   N   r[   c                    > U T:*  $ r   r   )r   bottom_threasholds    r   r   -BasicEvaluation.rank_events.<locals>.<lambda>-  s    .?)?r   rW   )r<   r=   )dtypeg333333?T)rZ   r   )torchrm   r   r   r+   ra   ranger   argmaxr   r:   r<   rz   r|   ri   tensorr)   float32r/   meanstdr\   zipoperator
itemgetter)r.   lengthr   r   ru   	qd_valuestop_threasholddecrease_intervalre   rf   next_minimum_idxpeak_idxrD   r   r   	idle_timenormalized_gainnormalized_selfheuristic_score_list_r   s                       @r   rank_eventsBasicEvaluation.rank_events  s    	)>)> ?@,<=,<q]],<	=#i. |//Q1q5#i.1 $8?q$  "):JK 'I,?>,Q%,, ,6<<>N>Q>W>W
 -=,H(aA! 2" FA+ #i. 2 **,
,&&'89 , 	 

 ?IJzee$11zJmm % I EOPZEe$77ZPmm % I  )::i+@@EIIiDXXO(::i+@@EIIiDXXO#2S?5J#J 
 !',j9 ++A. !!HAu !   $GV,Js >:
 K Qs#   II$6I$"I)"I.I3r   print_enablec                 @   U R                  U5      nU(       d  U$ U(       a  SOSnUSR                  U Vs/ s HA  nS SU S[        UR                  5       SU R                  U   R
                  S-  S	 S
S 3	PMC     sn5      -  nU(       a  [        U5        U$ s  snf )NzOptimizable events:
zNo events to optimize

zP--------------------------------------------------------------------------------z
Event:                z
Source code location: z
Percentage idle time: rp   z.2fz%
)r   joinsource_code_locationrD   rz   r/   print)r.   r   r   r   outputrD   s         r   get_optimizable_events&BasicEvaluation.get_optimizable_eventsZ  s    %%f-
,6(<U$)) ( (E J g +EKK89 :||E*==CCH I	
	
 (	
 		
 &Ms   AB
)r   r}   r~   rz   r   r   rk   )r[   T)r1   r2   r3   r4   r   rE   r{   r   r   r   r5   boolr   r8   r   r   r   ro   ro   d   sE    
!W 
! 
!=,l \08GRS D  r   ro   c                     Ub  U[        U 5      :  a  [        U 5      n[        X#5       H  nU" X   5      (       d  M  Us  $    g r   )ra   r   )seq	predicater<   r=   re   s        r   r   r   o  s@    
{cSXo#h5SVH  r   c                     U $ r   r   r   s    r   r   r   x  s    ar   c                 `    XU n [        U 5      S:X  a  g U R                  [        XS95      U-   $ )Nr   rY   )ra   r   r]   )r   rZ   r<   r=   s       r   r   r   x  s2    
C.C
3x1}99S&'%//r   c                     U b>  [         R                  " SU R                  5      nUc  U R                  n M5  U R                  $ g)Nz
\.py\(.*\)zNo source code location found)researchrQ   parent)rD   matchs     r   r   r     s:    

		-4=LLEzz*r   c                  T    SSK Jn   U " 5           S S S 5        g ! , (       d  f       g = f)Nr   r   )torch.autograd.profilerr   r   s    r   _init_for_cuda_graphsr	    s    /	 
s   
')r   Nrk   )	functoolsr   r  collectionsr   dataclassesr   typingr   r  r   torch.profilerr   torch.autogradr	   r   r    partialtraverse_dfstraverse_bfsr&   r:   r?   ro   r   r   r   r	  r   r   r   <module>r     s      	  !   + % + *>u * *   4EtT  ,e
 
9 
9 
9   + +\H HV  qd 0+r   