
    9i;                     d   S SK r S SKJrJrJrJr  S SKrS SKJr	  S SKJ
r  S SKJr  S SKJr  S SKJr  S SKJ
r  S SKJr  S S	KJr  S S
KJrJr  S SKJr  SSKJr  SSKJ r J!r!  SSK"J#r#  Sr$ " S S5      r% " S S5      r&S\\'\#RP                  4   S\#RP                  4S jr)S\'4S jr* " S S\5      r+g)    N)DictOptionalUnionAny)ir)proton)amd)nvidia)passes)LazyDict)JITFunction)set_profile_allocatorNullAllocator)backends   )Hook   )set_instrumentation_onset_instrumentation_off)modec                   6    \ rS rSrS rS\S\S\\   4S jrSrg)	CudaAllocator   c                     Xl         g Ninstrumentation_hook)selfr   s     e/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/triton/profiler/hooks/instrumentation.py__init__CudaAllocator.__init__   s    $8!    size	alignmentstreamc                 :   X R                   R                  :w  a%  [        SU SU R                   R                   35      eX-   S-
  U-  U-  n[        X@R                   R                  5      nSS KnUR                  U4UR                  SS9nX`R                   l        U$ )NzAlignment mismatch: z != r   r   cudadtypedevice)	r   profile_buffer_alignmentRuntimeErrormaxprofile_buffer_sizetorchemptyuint8buffer)r   r#   r$   r%   aligned_sizer/   r2   s          r   __call__CudaAllocator.__call__   s    11JJJ&ykd6O6O6h6h5ijl l(1,:YF
 <)B)B)V)VW 	l-U[[P+1!!(r"   r   N)	__name__
__module____qualname____firstlineno__r    intr   r4   __static_attributes__ r"   r   r   r      s%    9S S (3- r"   r   c                   N    \ rS rSrS\\\4   4S jrS\4S jrS\4S jr	S r
Srg	)
Instrumentation/   ir_mapc                     Xl         g r   manager)r   r@   s     r   r    Instrumentation.__init__1   s    r"   r   c                 Z    XR                   ;   a  [        SU 35      eX R                   U'   g )NzIR already registered: )rC   r,   )r   r   funcs      r   registerInstrumentation.register4   s+    !8=>>Rr"   c                 p    U R                  U5        XR                  ;   a  U R                  U   " U5        g g r   )load_dialectsrC   )r   r   pmcontexts       r   patchInstrumentation.patch9   s0    7#LLR  r"   c                 0    [         R                  " U5        g r   )triton_protonrJ   )r   ctxs     r   rJ   Instrumentation.load_dialects>   s    ##C(r"   rB   N)r6   r7   r8   r9   r   strr   r    rG   rM   rJ   r;   r<   r"   r   r>   r>   /   s3    tCH~  3  
! !
)r"   r>   mode_objreturnc                   ^
 [        U [        R                  5      (       a  U $ U (       d  Sn U R                  S5      nUS   n0 nUSS   H0  nSU;   a  UR                  SS5      u  pVXcU'   M#  [	        SU S35      e   UR                  SS	5      UR                  S
S5      UR                  SS5      [        UR                  SS5      5      UR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      S.m
U
4S jnU" S[        R                  5      T
S'   U" S
[        R                  5      T
S
'   U" S[        R                  5      T
S'   U" S[        R                  5      T
S'   U" S[        R                  5      T
S'   [        T
S   5      S:  a3  T
S   R                  S5       Vs/ s H  nUR                  5       PM     snO/ n	U	 H%  nU[        R                  ;  d  M  [	        SU 35      e   U	 Vs/ s H  n[        R                  U   PM     snT
S'   US:X  a  [        R                  " S0 T
D6$ US:X  a  [        R                   " S0 T
D6$ [	        SU  35      es  snf s  snf )Ndefault:r   r   =z#Malformed instrumentation option: ''metric_typecyclebuffer_typesharedbuffer_strategycircularbuffer_size0granularitywarpsampling_strategynonesampling_options optimizations)r[   r]   r_   ra   rc   re   rg   ri   c                 b   > TU    nU(       a  X!;  a  [        SU  SU 35      eU(       a  X   $ U$ )NzUnknown z: )
ValueError)opt_namemappingvalueoptionss      r   get_option_value)_interpret_mode.<locals>.get_option_value[   s<    !U)xzE7;<<!&w~1E1r"   ,zUnknown optimization: mmazUnknown mode: r<   )
isinstancer   InstrumentationModesplitrk   getr:   metric_typesbuffer_typesbuffer_strategiesgranularitiessampling_strategieslenstripri   DefaultMMA)rT   parts	mode_nameoptsoptkeyvalrp   rn   valuesro   s             @r   _interpret_moder   B   s   (D4455NN3EaIDQRy#:yya(HCIB3%qIJJ  xxw7Q^`hHi88$5zBSVW[W_W_`morWsStxxv6TXXVikqMr HH%7<txxXgikOl	G2 .mT=N=NOGM-mT=N=NOGM!12CTE[E[!\G-mT=O=OPGM#34GIaIa#bG  EHP_H`DadeDeGO4::3?A?5 {{}?Akm ***5eW=>>  HNNve 2 25 9vNGO I||&g&&	e	xx"'"">(455A
  Os   I8I=c                      [         R                  R                  R                  R	                  5       R
                  n U S:X  a  gU S:X  a  g[        SU  35      e)Nr'   r
   hipr	   zUnsupported backend: )tritonruntimedriveractiveget_current_targetbackendr,   )r   s    r   _get_backend_namer   x   sO    nn##**==?GGG&	E	27)<==r"   c                      \ rS rSr% Sr\\S'   Sr\\S'   Sr\	\S'   Sr
\\   \S'   S	r\\S
'   Sr\\S'   S\S\\R$                  4   4S jrS rS rS\S\S\S\\\4   S\SS4S jrS\4S jrS\SS4S jrS\SS4S jrS\SS4S jrSrg)InstrumentationHook   r   priorityactive_countFenable_host_bufferNhost_bufferr   r.      r+   rT   c                 `    [        U5      U l        [        U 5      U l        S U l        0 U l        g r   )r   r   r   	allocatorr2   metadata_path)r   rT   s     r   r    InstrumentationHook.__init__   s)    .=h.G	&t,79r"   c                   ^ ^^^^^^^ [         R                  S:  a  [        S5      e[         =R                  S-  sl        [        5         [        R
                  R                  R                  R                  5       m[        R
                  R                  R                  R                  R                  T5      S   m[        5       mUUU 4S jmUU4S jm[        U4S jU4S jS	.5      [        T   R                  l        [!        T R"                  5        [$        R&                  mT R(                  m[*        R,                  " T5      UU4S
 j5       nU[$        l        g )Nr   zFOnly one instance of the instrumentation hook can be active at a time.r   max_shared_memc                   > [         R                  R                  TR                   R                  ;   a  SOSn[        R
                  " U TR                   R                  TR                   R                  TR                   R                  TR                   R                  TR                   R                  TR                   R                  TR                   R                  TTR                  TR                  U5        [        R                   R#                  U 5        [         R                  R$                  TR                   R                  ;   a  [        R&                  " U 5        [        R(                  " U 5        [         R                  R*                  TR                   R                  ;   a  TS:X  a  [        R,                  " U 5        g g g )NFTr	   )r   OptimizeCLOCK32ri   rP   add_convert_proton_to_protongpur[   re   rg   rc   r_   r]   ra   r.   r+   triton_passescommonadd_cseSCHED_STORESadd_schedule_buffer_store!add_allocate_proton_shared_memorySCHED_BARRIERSadd_sched_barriers)rK   is_long_clkbackend_namer   r   s     r   to_llvmir_passes6InstrumentationHook.activate.<locals>.to_llvmir_passes   s9   #'==#8#8DII<S<S#S%Y]K99"dii>S>SUYU^U^UpUp:>)):T:TVZV_V_VkVk:>)):S:SUYU^U^UjUj:>)):O:OQ_:>:R:RTXTqTq:EG   ((,}}))TYY-D-DD77;;;B?}}++tyy/F/FF<[`K`004 LaFr"   c                 J  > [         R                  " U 5        TS:X  a  [         R                  " U 5        g TS:X  ai  [        R                  R
                  R                  R                  R                  T5      S   R                  S5      S   n[         R                  " X5        g g )Nr
   r	   archrX   r   )rP   )add_allocate_proton_global_scratch_buffer%add_convert_proton_nvidia_gpu_to_llvmr   r   r   r   utilsget_device_propertiesrv   "add_convert_proton_amd_gpu_to_llvm)rK   r   r   r*   s     r   to_llvm_passes4InstrumentationHook.activate.<locals>.to_llvm_passes   s    CCBGx'CCBG&~~,,3399OOPVWX^_eefijklm@@J 'r"   c                    > T" U 5      $ r   r<   )rK   r   s    r   <lambda>.InstrumentationHook.activate.<locals>.<lambda>   s    '+r"   c                    > T" U 5      $ r   r<   )rK   r   s    r   r   r      s
    ~b)r"   )ttgpuir_to_llvmirllvmir_to_llvmc                 8   > [        T5      US'   T" U /UQ70 UD6$ )Ninstrumentation_mode)rS   )r   argskwargsoriginal_modeoriginal_runs      r   instrumented_run6InstrumentationHook.activate.<locals>.instrumented_run   s(    -0-?F)*6t6v66r"   )r   r   r,   r   r   r   r   r   get_current_devicer   r   r   r>   r   compilerinstrumentationr   r   r   runr   	functoolswraps)	r   r   r   r*   r   r   r   r   r   s	   ` @@@@@@@r   activateInstrumentationHook.activate   s    ++a/ghh((A-( &&--@@B..55;;QQRXYZjk(*	5$	K ;J+)	K
 ;''7 	dnn-"				&	7 
'	7 +r"   c                    [         R                  S:X  a  g [         =R                  S-  sl        [        5       n0 [        U   R                  l        [        5         [        [        R                  S5      (       a#  [        R                  R                  [        l	        [        [        5       5        [         R                  (       a  S [         l        S U l        g )Nr   r   __wrapped__)r   r   r   r   r   r   r   hasattrr   r   r   r   r   r   r   r2   )r   r   s     r   
deactivateInstrumentationHook.deactivate   s    ++q0((A-((* ;=''7 	 ! ;??M22)oo99KO 	mo. 11.2+ r"   modulefunctionnamemetadata_grouphashrU   c                    U(       d  g [        S UR                  5        5       S 5      n[        S UR                  5        5       S 5      nXpR                  U'   U(       a  [        R                  " 5       n[        R
                  " U5        [        5       n	U	S:X  a  [        R
                  " U5        OU	S:X  a  [        R
                  " U5        [        R
                  " U5        [        R                  " Xh5      nXl        [        R                  " U5      n
[        R                  " U5      n[        R                  " X#XU5        g [        SU 35      e)Nc              3   Z   #    U  H!  u  pUR                  S 5      (       d  M  Uv   M#     g7f)ttgirNendswith.0r   paths      r   	<genexpr>2InstrumentationHook.init_handle.<locals>.<genexpr>   s"     `.DV]H_.D   +	+c              3   Z   #    U  H!  u  pUR                  S 5      (       d  M  Uv   M#     g7f)jsonNr   r   s      r   r   r      s"     e4Jyscll\bNddd4Jr   r
   r	   z+IR path not found in metadata for function )nextitemsr   	triton_irrL   rJ   r   triton_nvidia
triton_amdrP   parse_mlir_moduleget_scope_id_namesget_scope_id_parents	libprotoninit_function_metadatar,   )r   r   r   r   r   r   ir_pathr   rL   r   scope_id_namesscope_id_parentss               r   init_handleInstrumentationHook.init_handle   s    `n.B.B.D`bfgeN4H4H4Jegkl'48$'')G##G,,.Lx'++G4&((1''000BF$N*==fEN,AA&I,,X^_lm!LXJWXXr"   c                 T    U R                   c  S$ U R                   R                  5       $ )Nr   )r2   data_ptr)r   s    r   	_data_ptrInstrumentationHook._data_ptr  s#    KK'qCT[[-A-A-CCr"   metadatac                    UR                   R                  S5      nUR                   R                  S5      nU R                  c  SO4U R                  R                  5       U R                  R	                  5       -  n[
        R                  " X2U R                  5       U5        [        R                  (       a  S [        l
        g g Nr   r%   r   )datarw   r2   element_sizenumelr   enter_instrumented_opr   r   r   r   r   r   rF   r%   
alloc_sizes        r   enterInstrumentationHook.enter	  s    }}  ,""8,++-Q4;;3K3K3MPTP[P[PaPaPc3c
''dnn6F
S11.2+ 2r"   c                    UR                   R                  S5      nUR                   R                  S5      nU R                  c  SO4U R                  R                  5       U R                  R	                  5       -  n[
        R                  " X2U R                  5       U5        [        R                  (       a  U R                  U5        g g r   )r   rw   r2   r   r   r   exit_instrumented_opr   r   r   _populate_host_bufferr   s        r   exitInstrumentationHook.exit  s    }}  ,""8,++-Q4;;3K3K3MPTP[P[PaPaPc3c
&&vT^^5EzR11&&t, 2r"   c           
         U(       Ga  U R                   U   (       Ga  SS KnSS KnSS KnS[        [
        [        4   S[        4S jnU R                  c  SO4U R                  R                  5       U R                  R                  5       -  nU R                  R                  R                  5       R                  S5      n0 n[        U R                   U   S5       n	UR!                  U	5      nS S S 5        U" US   5      n
US   nUS   nU R                  R"                  [$        R&                  R(                  :X  a  UO
[+        U5      n[        Xk-  5      nU R                  R                  S	:H  =(       a1    U R                  R,                  [$        R.                  R0                  :H  nU(       a  [3        U5       Vs/ s H  nUPM     nnOU Vs/ s H  n[        U5      PM     nnS
US-  -   nSnUnUn[4        UUUUXUX/
UQnUR6                  " S[+        U5      -  /UQ76 nUR9                  UU-   UR:                  SS9[<        l        [<        R>                  S U nURA                  URC                  [E        U5      UR:                  S95        [<        R>                  US  RG                  U R                  5      nURA                  U R                  RI                  5       5        g g g ! , (       d  f       GN = fs  snf s  snf )Nr   targetrU   c                 ,    U S   S:X  a  gU S   S:X  a  gg)Nr   r'   r   r   r   r   r<   )r
  s    r   encode_target@InstrumentationHook._populate_host_buffer.<locals>.encode_target   s%    )$.I&%/r"   rr   rprofile_scratch_size	num_warpsrh   (      Icpur(   )r)   )%r   r/   structr   r   rS   r   r:   r2   r   r   r   rg   r~   rv   openloadre   rP   SAMPLING_STRATEGYNONEr}   rc   GRANULARITYWARPrangeVERSIONpackr0   r1   r   r   copy_tensorlistview_asr  )r   r   r/   r  r   r  r  sampled_warpsr   filedevice_typescratch_mem_size
total_unituid_num	block_numis_all_warpsiuid_vecheader_sizeheader_offsetpayload_offsetpayload_sizeheader_valuesheader_bytesconfig_portiondata_portions                             r   r  )InstrumentationHook._populate_host_buffer  s   **844d38n   #kk1t{{7O7O7QTXT_T_TeTeTg7gJ II66<<>DDSIMDd((2C8Dyy 9 (X7K#$:;k*J$(II$?$?=CbCbCgCg$gjmpnGJ9:IB  9955;w		@U@UYfYrYrYwYw@wL&+J&78&71&78+89=a3q6=9w{*KM(N%L^\S^ku ,3M ";;sS-?'?P-PL.3kk+
:RZ_ZeZensk.t+0<<\kJN  d<.@!TU.::;<HPPQUQ\Q\]Lt{{01[ 58  98V 99s   K%:K7K<%
K4)r   r2   r   r   )r6   r7   r8   r9   r   r:   __annotations__r   r   boolr   r   r   r.   r+   r   rS   r   ru   r    r   r   r   r   r   r   r  r  r  r;   r<   r"   r   r   r      s    HcL#$$!%K#%  $'c':tS$2J2J'J!K :9+v8Y# Y YC YQUVY[^V^Q_ Ygj Yos Y6D3 D3h 34 3-X -$ -N2c N2d N2r"   r   ),r   typingr   r   r   r   r   triton._C.libtritonr   r   r   rP   r	   r   r
   r   r   r   triton._C.libprotonr   triton.compilerr   triton.runtime.jitr   triton.runtime._allocationr   r   triton.backendsr   hookr   flagsr   r   rh   r   r  r   r>   rS   ru   r   r   r   r<   r"   r   <module>rA     s     - -  / 7 1 7 7 3 $ * K $  C   .) )&36eC)A)A$AB 36tG_G_ 36l>3 >f2$ f2r"   