
    9i^              	         S SK JrJr  S SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKJr  S SKJr  S SKJr  S SKJrJrJrJrJrJrJrJrJrJr  S SKJr  S SKJr  S	S
K J!r!  SSK"J"r"  SSK J#r#  S	SK$J%r%J&r&J'r'J(r(  SSK)J*r*  S SK+J,r,  Sr-Sr.\" S5      r/ " S S\R`                  5      r1S6S jr2 " S S5      r30 r4/ r5S r6S7S jr7 " S S\\/   5      r8S r9S r:S r; " S  S!5      r<\ " S" S#5      5       r=S$ r> " S% S&\<\8\/   5      r?\S8S' j5       r@\SSSSSSSS(.             S9S) jj5       r@ S:SSSSSSSS(.               S;S* jjjr@ " S+ S,5      rA " S- S.5      rBS/ rCS0 rD " S1 S2\<5      rE " S3 S4\<5      rFS5 rGg)<    )annotationsdivisionN)defaultdict)	dataclass)cached_property)
CallableGenericIterableOptionalTypeVarUnionoverloadDictAnyTupleTensorDescriptor)
ModuleType   )knobs   )driver)_async_compile)find_paths_ifget_iterable_pathtype_canonicalisation_dictcanonicalize_dtype)get_cache_key)get_cache_invalidating_env_varsztriton.languagez"triton.experimental.gluon.languageTc                     ^  \ rS rSrSrSU 4S jjr\S 5       rS rS r	SS jr
S rS	 rS
 rS rS rS rS rS rS rSrU =r$ )DependenciesFinder"   a  
This AST visitor is used to find dependencies of a JITFunction. This can
be used to invalidate a JITFunction's hash when its source code -- or
that of its dependencies -- changes.

This visitor also keeps track of the global variables touched by the
JITFunction.  When we launch the kernel, we check that these have the same
values as they did when we ran this visitor.  If not, we raise an error (or
otherwise we could recompile).
c                   > [         TU ]  5         Xl        [        R                  " UR                  S5      5      U l        X l        X0l        1 SkU l	        [        [        SS1U l        0 U l        SU l        g )Nutf-8>
   intlenmaxminlistfloatprintrangegetattr
isinstancecopymathF)super__init__namehashlibsha256encodehasherglobals	nonlocalssupported_python_builtinsGLUON_MODULETRITON_MODULEsupported_modulesused_global_valsvisiting_arg_default_value)selfr4   r9   r:   src	__class__s        R/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/triton/runtime/jit.pyr3   DependenciesFinder.__init__.   sn    	nnSZZ%89 "*
& 	"
" TV*/'    c                6    U R                   R                  5       $ N)r8   	hexdigestrA   s    rD   retDependenciesFinder.retY   s    {{$$&&rF   c                    [         R                  " UR                  5      (       a  g[        USS5      nUR	                  [
        5      $ )NT
__module__ )inspect	isbuiltinfuncr.   
startswithr=   )rA   noderR   modules       rD   _is_triton_builtin%DependenciesFinder._is_triton_builtin]   s9    TYY''|R0  //rF   c                F   [        U[        5      (       d   eU R                  R                  5       UR                  R                  5       -   H]  nUu  p4U R                  U   u  pTUR                  U   u  pdXV:w  d  M0  [	        SU SU SU R
                   SUR                   SU S35      e   U R                  R                  UR                  5        UR                  nU[        [        USS5      5      -  nU R                  R                  UR                  S	5      5        g )
NGlobal variable z has value z when compiling z, but inner kernel z has conflicting value z7 from when it was first compiled.  This is not allowed.noinlineFr%   )r/   JITCallabler?   keysRuntimeErrorr4   __name__update	cache_keystrr.   r8   r7   )rA   rR   kvar_name_v1v2func_keys           rD   _update_hashDependenciesFinder._update_hashc   s&   $,,,, &&++-0E0E0J0J0LLAKH))!,EB))!,EBx"&xjB4?OPTPYPY{Zmnrn{n{m|  }T  UW  TX  XO  P  M 	$$T%:%:;>>Cj%8998??734rF   c                   SSK Jn  Ub  [        U5      [        L a  g [	        USS5      (       a  g [	        USS5      S:X  a  g [        U[        5      (       a  U R                  U5        g [        U5      (       a3  [        U[        5      (       d  [        X5      (       d  [        SU 35      eU R                  (       a  g Ub0  [        R                  " U5      U4U R                  U[        U5      4'   g )	Nr   	constexpr__triton_builtin__FrN   rO   ztriton.language.extra.libdevicez!Unsupported function referenced: )language.corerl   typer   r.   r/   r[   rh   callabler]   r@   r0   deepcopyr?   id)rA   valvar_dictr4   rl   s        rD   record_reference#DependenciesFinder.record_referenceu   s    - ;$s)z13,e44 3b)-NNc;''c"C==C!6!6z#?Y?Y!B3%HII **;?==;Mx:XD!!4H"67rF   c                F  ^  [        UR                  5      [        R                  L a  UR                  $ UR                  T R
                  ;   a  g U 4S jnU" UR                  5      u  p4UR                  T R                  ;   a  U$ T R                  X4UR                  5        U$ )Nc                   > TR                   R                  U S 5      nUb  UTR                   4$ TR                  R                  U S 5      nUb  UTR                  4$ g)NNN)r9   getr:   )r4   rs   rA   s     rD   name_lookup2DependenciesFinder.visit_Name.<locals>.name_lookup   sZ    ,,""4.CDLL((..$$T40CDNN**rF   )ro   ctxastStorerr   local_namesr;   ru   )rA   rT   r{   rs   rt   s   `    rD   
visit_NameDependenciesFinder.visit_Name   s|    >SYY&77N77d&&&	 $DGG,77d444JcTWW5
rF   c                b    UR                    Vs/ s H  o R                  U5      PM     sn$ s  snf rH   )eltsvisit)rA   rT   elts      rD   visit_TupleDependenciesFinder.visit_Tuple   s&     ,09959C

39555s   ,c                ~   U R                  UR                  5      n[        U[        R                  5      (       a<  U R                  UR                  5      n[        U[        R                  5      (       a  M<  [        USS5      nUb  X0R                  ;   a  g [        X!R                  5      nU R                  U5        U$ )Nr^   rO   )	r   valuer/   r~   	Attributer.   r>   attrru   )rA   rT   lhslhs_namerK   s        rD   visit_Attribute"DependenciesFinder.visit_Attribute   s    jj$cmm,,**SYY'C cmm,,3
B/;(&<&<<c99%c"
rF   c                    UR                   R                    Vs1 s H  o"R                  iM     snU l        U R                  U5        g s  snf rH   )argsargr   generic_visit)rA   rT   r   s      rD   visit_FunctionDef$DependenciesFinder.visit_FunctionDef   s6    /3yy~~>~GG~>4  ?s   Ac                  ^  U 4S jn[         R                  " UR                  UR                  UR                  (       a  UR                  /O/ UR
                  5       H  nT R                  U5        M     U" UR                  5        UR                  b  T R                  UR                  5        U" UR                  5        g )Nc                   >  TR                   (       a   eSTl         U  H  nUc  M  TR                  U5        M     STl         g ! STl         f = f)NTF)r@   r   )defaultsexprrA   s     rD   visit_defaults:DependenciesFinder.visit_arguments.<locals>.visit_defaults   sN    8::::26/$D'

4( % 38/%/s    A A 	A)
	itertoolschainposonlyargsr   vararg
kwonlyargsr   kw_defaultskwargr   )rA   rT   r   r   s   `   rD   visit_arguments"DependenciesFinder.visit_arguments   s    	8 ??4#3#3TYYQUQ\Q\bdfjfufuvCJJsO w 	t''(::!JJtzz"t}}%rF   c                    U R                  U5      n[        U[        5      (       a  U =R                  [	        U5      -  sl        g U R                  R                  U5        g rH   )r   r/   r*   r   setadd)rA   rT   targets      rD   visitAssnTarget"DependenciesFinder.visitAssnTarget   sH     D!fd##F+  (rF   c                    [        UR                  5      S:w  a  [        S5      eU R                  UR                  S   5        U R	                  U5        g )Nr   z2Simultaneous multiple assignment is not supported.r   )r'   targets	TypeErrorr   r   rA   rT   s     rD   visit_AssignDependenciesFinder.visit_Assign   sG    t||!
 PQQT\\!_- 	4 rF   c                \    U R                  UR                  5        U R                  U5        g rH   r   r   r   r   s     rD   visit_AnnAssign"DependenciesFinder.visit_AnnAssign   $    T[[) 	4 rF   c                \    U R                  UR                  5        U R                  U5        g rH   r   r   s     rD   	visit_ForDependenciesFinder.visit_For   r   rF   )	r9   r8   r   r4   r:   r>   r;   r?   r@   )returnNonery   )r^   rN   __qualname____firstlineno____doc__r3   propertyrK   rV   rh   ru   r   r   r   r   r   r   r   r   r   __static_attributes____classcell__rC   s   @rD   r"   r"   "   se    	)0V ' '05$ D06
	!
&@)!!! !rF   r"   c                Z   SS K Js  Jn  [        U [        5      (       a  U R                  5       n U R                  S5      (       a<  U R                  S5      n [        U 5      n U R                  S5      (       d   eSU SS  -   $ U R                  S5      (       a  S[        U S S 5      -   $ U R                  S5      (       a  S[        U SS  5      -   $ U R                  S5      (       a  [        U R                  S5      5      $ O[        XR                  5      (       a  S[        U R                  5       3$ [        XR                  5      (       a  U R                  n O-[        U [        5      (       a  U R                  n O[	        U 5      n [         R"                  " U R%                  S	S
5      U 5      $ )Nr   zconst const**kr   ztl._trO   )triton.language.corelanguagecorer/   ra   striprS   removeprefix_normalize_tyendswithpointer_type
element_tydtyper4   ro   r^   r   rz   replace)tyr   s     rD   r   r     s[   ''"cXXZ=="")Br"B==%%%%"QR&= ;;sr#2w///==r!"v...== !788  	B))	*	*=/011	B

	#	#WW	B		[[W%))"**T2*>CCrF   c                      \ rS rSrSr  SS jr\S 5       r\SS j5       r\SS j5       r	\S 5       r
\S 5       r\S	 5       r\S
 5       rSrg)KernelParami&  zBRepresents a parameter (name plus metadata) to a @jit'ed function.c                4    Xl         X l        X0l        X@l        g rH   )num_paramdo_not_specializedo_not_specialize_on_alignment)rA   r   paramr   r   s        rD   r3   KernelParam.__init__)  s    !2.L+rF   c                .    U R                   R                  $ rH   )r   r4   rJ   s    rD   r4   KernelParam.name0  s    {{rF   c                    U R                   R                  (       a2  U R                   R                  [        R                  R                  :X  a  g[        U R                   R                  5      $ )NrO   )r   
annotationrP   	Parameteremptyr   rJ   s    rD   r   KernelParam.annotation4  sD    {{%%)?)?7CTCTCZCZ)ZT[[3344rF   c                    U R                   nUR                  S5      (       a  USS  nOUR                  S5      (       a  USS  nU[        [        R                  " 5       5      ;   a  U R                   $ g)Nr   r   r   r   rO   )r   rS   r   r   values)rA   as     rD   annotation_typeKernelParam.annotation_type:  sc    OO<<!"A\\#!"A.55788??"rF   c                     SU R                   ;   $ Nrl   )r   rJ   s    rD   is_constexprKernelParam.is_constexprE  s    doo--rF   c                    U R                   (       a  gSU R                  ;   =(       d    U R                  R                  S5      $ )NFr   r   )r   r   rS   rJ   s    rD   is_constKernelParam.is_constI  s1    $//)MT__-G-G-MMrF   c                .    U R                   R                  $ rH   )r   defaultrJ   s    rD   r   KernelParam.defaultO  s    {{"""rF   c                d    U R                   R                  [        R                  R                  :g  $ rH   )r   r   rP   r   r   rJ   s    rD   has_defaultKernelParam.has_defaultS  s#    {{""g&7&7&=&===rF   )r   r   r   r   N)r   r&   r   zinspect.Parameterr   boolr   r   r   ra   )r^   rN   r   r   r   r3   r   r4   r   r   r   r   r   r   r   r    rF   rD   r   r   &  s    LM15M     5 5
   . . N N
 # # > >rF   r   c                <   ^ ^^^ SSK Jm  SSKJm  SUUU U4S jjmT$ )Nr   rk   r   r   c                Z  >^  T c  g[        T [        5      (       a  g[        T [        5      (       aC  U(       a  T" T SUS9OS nT S:X  a  U(       a  gST ::  a
  T S::  a  S	U4$ S
T ::  a
  T S::  a  SU4$ SU4$ [        T [        5      (       a  g[	        T S5      (       aa  T R
                  U4n[        R                  US 5      nUc&  US   (       a  SOS[        US   5      -   nU[        U'   U(       a  T" T SUS9OS nXd4$ [        T [        5      (       a  ST R                  4$ [        T T5      (       a  ST 4$ [        T [        5      (       aW  T  Vs/ s H  nT" U5      PM     nnU 4S jn	U	" U Vs/ s H  owS   PM	     sn5      n
U	" U Vs/ s H  owS   PM	     sn5      nX4$ [        T [        5      (       aY  [	        T R                  S5      (       d   e[        T R                  R
                  5      nSU [        T R                  5       S3S 4$ [        T T5      (       ag  [	        T R                  S5      (       d   e[        T R                  R
                  5      nSU [        T R                  5       ST R                   < S3S 4$ [#        S[%        T 5      -  5      es  snf s  snf s  snf )N)rl   N)u1Nr&   )alignr   )rl   r   i   ii32l            l    u64i64)fp32Ndata_ptrr   r   r   tensorrl   c                X   > [        TS5      (       a  [        T5      " U 6 $ [        U 5      $ )N_fields)hasattrro   tuple)valsr   s    rD   <lambda>Acreate_specialize_impl.<locals>.specialize_impl.<locals>.<lambda>  s)    '#y:Q:Qd3i&6&bW\]aWb&brF   ztensordesc<>,zUnsupported type: %s)r/   r   r&   r+   r  r   	dtype2strrz   r   r[   r`   r  r   baser*   block_shapelayoutr   ro   )r   r   specialize_valuer   keydskresxspec
make_tupletysr\   innerGluonTensorDescriptorrl   specialize_extraspecialize_impls   `            rD   r  /create_specialize_impl.<locals>.specialize_impla  s~   ;&T""S!!?O"3U;UYCax,'SSI%5s|###"2s|#s|#U##!S*%%99h'C--T*C{"1vt32DSV2LL!$	#BR"3>X\C:[))//Y''%%U##0341OA&D4bJD1DqdD12CT2TtT23D;-..388Z0000&sxx~~6E!%coo)>(?qA4HH233388Z0000&sxx~~6E!%coo)>(?qaPRVWW2T#Y>?? 512s   J.J#	J()FTT)r   rl   'triton.experimental.gluon.nvidia.hopperr   )r  r  rl   r  s   `@@@rD   create_specialize_implr  \  s    $a-@ -@^ rF   c                    [        [        5      S:X  a  [        R                  [        S 5      5        [        S   nU" XS9S   $ )Nr   c                    g rH   r   )rd   kwargss     rD   r	  mangle_type.<locals>.<lambda>  s    PTrF   )r  )r'   specialize_impl_cacheappendr  )r   
specializer  s      rD   mangle_typer'    s?    
 !Q&$$%;<T%UV+A.O3<Q??rF   c                  *    \ rS rSr% S\S'   SS jrSrg)KernelInterfacei  r    runc                   ^ ^ UU 4S j$ )z
A JIT function is launched with: fn[grid](*args, **kwargs).
Hence JITFunction.__getitem__ returns a callable proxy that
memorizes the grid.
c                 .   > TR                   " U TSS.UD6$ )NFgridwarmup)r*  )r   r"  r.  rA   s     rD   r	  -KernelInterface.__getitem__.<locals>.<lambda>  s    txx$T%'YRX'YrF   r   )rA   r.  s   ``rD   __getitem__KernelInterface.__getitem__  s     ZYrF   r   N)r   r    )r^   rN   r   r   __annotations__r1  r   r   rF   rD   r)  r)    s    	
FZrF   r)  c           
        UR                  5        VVs0 s H,  u  pVXVR                  R                  S:X  a  [        U5      OU_M.     nnnSS KnXUR                  5        Vs/ s H  n[        U5      PM     sn[        UR                  5       5      UR                  5        Vs/ s H  n[        U5      PM     sn[        UR                  5       5      UR                  WS.n	UR                  U	5      n
U
$ s  snnf s  snf s  snf )Nr   r   )r4   	signatureconstant_keysconstant_vals
attrs_keys
attrs_valsoptionsr  )
itemsrC   r^   ra   jsonr\   r*   r   __dict__dumps)r4   r5  	constantsattrsr:  r  r   r<  r  objserialized_objs              rD   serialize_specialization_datarC    s    enetetevwevWaWZOO$<$<$Gc%jURevIwQZQ_Q_Qa?bQaAQQa?bY %**,0O,Qa,0O_cdidpdpdr_s##CC
 ZZ_N x @c0Os   3C3!C9"C>c                t   [        U R                  5      [        U5      :X  d   e/ n[        U R                  R                  5       U5       GH0  u  pEUR                  (       a  UR                  SU S35        M.  UR                  (       a  SOSnUR                  (       a  SOSnUR                  (       a  SOSnSU SU SU SU S3	n	UR                  (       a  [        UR                  [        5      (       a%  UR                  S:X  d  UR                  SS	 S
;   a  SnU(       a$  UR                  SUR                   SU	 S35        M  UR                  SUR                   S35        GM  UR                  U	 5        GM3     S n
SSR                  [        [        XR                  R                  5       5      5      S/-   5       SSR                  U R                  R                  5        Vs/ s H  nSU SU 3PM     sn5       SSR                  U5       S3nU R                  R                  5        VVs0 s H>  u  pLUR                   ["        R$                  R&                  Ld  M.  SU 3UR                   _M@     nnn[(        US'   [+        UR,                  5      US'   [/        X5        US   $ s  snf s  snnf )a  
Equivalent to sig.bind followed by apply_defaults. This generates a
native Python function (using exec) which can be memoized on a per-kernel
basis to avoid having to run these expensive functions -- which constitute
much of the kernel launch overhead -- every time we run the kernel.
z("constexpr", )TrueFalsezspecialize_impl(, r   Nr   )fpbfFz("z",) + z[1:]z", None)c                z    U S   R                   [        R                  R                  L a  U S   $ U S    SU S    3$ )Nr   r   z	=default_)r   rP   r   r   )r  s    rD   r	  0create_function_from_signature.<locals>.<lambda>  sC    AaDLLG,=,=,C,CCAaDaAaD6QZ[\]^[_Z`IaarF   z
def dynamic_func(z	**optionsz):
    params = {'z': z}
    specialization = [r  z-]
    return params, specialization, options
default_r[   r  dynamic_func)r'   
parameterszipr\   r   r%  r   r   r   r   r/   ra   joinr*   mapr;  r   rP   r   r   r[   r  get_arg_specializationexec)sigkparamsbackendspecializationr4   kpr   r&  r   rK   r   	func_bodyr   func_namespaces                 rD   create_function_from_signaturer]    s    s~~#g,...N++-w7??!!N4&":;!#v'H$&$8$8fJ!@@GfE$TF"XJbBugQOC!!b00#66))T1R5G5G5K|5[%*
"))Br/A/A.B&T*RS #))Br/A/A.B(*KL%%/' 8, bC))DS..*>*>*@!ABk]RST U		3>>;N;N;PQ;P4QtfCv.;PQRS Txx/0 1I >>//11KD== 1 1 7 77 	)(4&5==(1   %0N=!(>w?]?](^N$% 	# .))% R
s   +J/6-J4'J4c                8    U R                    SU R                   3$ )N.)rN   r   fns    rD   get_full_namerb    s    mm_Aboo.//rF   c                  f    \ rS rSrS rS r\S 5       rS r\S 5       r	S r
S rS	 r\" \\S
9rSrg)r[   i  c                   Xl         [        R                  " U5      U l         [        R                  " U5      u  U l        U l        [        U5      U l	        [        R                  " 5       U l        [        R                  " SR                  U R                  5      5      nU[         R"                  " SU[         R$                  5      R'                  5       S  nX0l        S U l        0 U l        UR.                  U l        UR0                  U l        UR2                  U l        UR4                  U l        UR6                  U l        g ! [         a  n[        S5      UeS nAff = f)Nz1@jit functions should be defined in a Python filerO   z^def\s+\w+\s*\()ra  rP   r5  getsourcelinesraw_srcstarting_line_numberOSError
ValueErrorrb  _fn_name	threadingRLock
_hash_locktextwrapdedentrR  research	MULTILINEstart_srchashr?   r   r^   r   __globals__rN   )rA   ra  erB   s       rD   r3   JITCallable.__init__  s    **2.	Y6=6L6LR6P3DL$3 &b)#//+ oobggdll34")).R\\BHHJKL		 TV zzOO>>--7  	YPQWXX	Ys   #E   
E
EEc                p    U R                   [        R                  " U R                  5      R                  -  $ rH   )rv  rP   getclosurevarsra  r:   rJ   s    rD   get_capture_scopeJITCallable.get_capture_scope  s(    '"8"8"A"K"KKKrF   c                   U R                      U R                  b  U R                  sS S S 5        $ SU R                   3U l        [        R                  " U R
                  5      R                  n[        U R                  U R                  UU R                  S9nUR                  U R                  5       5        UR                  [        U R                  5      -   U l        [        [!        UR"                  R%                  5       5      5      U l        SSKJn  U =R                  [        U R"                  R%                  5        VVVs/ s H  u  u  pEu  pe[+        Xc5      (       d  M  XF4PM!     snnn5      -  sl        [,        R.                  " U R                  R1                  S5      5      R3                  5       U l        S S S 5        U R                  $ s  snnnf ! , (       d  f       U R                  $ = f)Nz
recursion:)r4   r9   r:   rB   r   rk   r%   )rm  ru  rj  rP   rz  ra  r:   r"   rv  rB   r   parserK   ra   rg  dictsortedr?   r;  r   rl   r/   r5   r6   r7   rI   )rA   r:   dependencies_finderrl   r4   rd   rs   s          rD   r`   JITCallable.cache_key  st    __yy$yy _
 %T]]O4DI..tww7AAI"4$--QUQaQamv9=#C%%djjl3+//#d6O6O2PPDI$(0C0T0T0Z0Z0\)]$^D!6II9=9N9N9T9T9V=9V"5)4Xc!+C!; *tk9V= > >I  tyy'7'7'@AKKMDI# $ yy	= _$ yys*   GDGG#G*AGG
G0c                   [         R                  " U R                  5      n[        U[         R                  5      (       d   e[        UR                  5      S:X  d   e[        UR                  S   [         R                  5      (       d   eU$ )Nr   r   )r~   r~  rt  r/   Moduler'   bodyFunctionDef)rA   trees     rD   r~  JITCallable.parse2  se    yy#$

++++499~"""$))A,8888rF   c                    SSK Jn  U" U 5      $ )Nr   )constexpr_type)r   r  )rA   r  s     rD   ro   JITCallable.type9  s    7d##rF   c                    SU l         Xl        g)a  
The only method allowed to modify src.
Bypasses the __setattr__ restriction by calling super().__setattr__ directly.

Note that it is the callers responsibility to make sure any triton functions that call this function have the `.hash` value reset to None.
N)ru  rt  )rA   new_srcs     rD   _unsafe_update_srcJITCallable._unsafe_update_src>  s     		rF   c                    [        S5      e)NzqCannot set attribute 'src' directly. Use '_unsafe_update_src()' and manually clear `.hash` of all callersinstead.)AttributeErrorrJ   s    rD   _set_srcJITCallable._set_srcH  s     ( ) 	)rF   c                    U R                   $ rH   )rt  rJ   s    rD   _get_srcJITCallable._get_srcM  s    yyrF   )fgetfset)r   rv  rN   r^   r   rj  rm  rt  ra  ru  rf  r5  rg  r?   N)r^   rN   r   r   r3   r{  r   r`   r~  ro   r  r  r  rB   r   r   rF   rD   r[   r[     sX     (DL  2 $ $)
 x
0CrF   r[   c                  4    \ rS rSr% S\S'   S\S'   S\S'   Srg	)
JitFunctionInfoiS  r   rU   ra   r4   JITFunctionjit_functionr   N)r^   rN   r   r   r3  r   r   rF   rD   r  r  S  s    
IrF   r  c                    [        U5      [        U5      4nU R                  US 5      nUb  U$ [        U5      [        U5      -   nX@U'   U$ rH   )r  ra   rz   )kernel_key_cacherY  r:  r  r`   s        rD   compute_cache_keyr  Z  sT     #g,
/C $$S$/IN#c'l2I%SrF   c                     ^  \ rS rSrS r  SS jrS rS rS rS r	S r
  SU 4S	 jjrS
 rS rS rS rS rSrU =r$ )r  ie  c                    g)NFr   rJ   s    rD   is_gluonJITFunction.is_gluong  s    rF   c	                   U(       d  g U R                   R                  n	U R                   R                  n
SR                  [	        U R
                  US   5       VVs/ s H  u  pUR                   SU 3PM     snn5      nU	 SUR                   SUR                   SUR                   SUR                   SUR                   S	U S
3n[        U R                   5      n[        XXWS   Xb5      nUUUUR                  UR                  UR                  UR                  UR                  UR                  UUUS.nU" UU[        XU 5      SU0UEUSS9$ s  snnf )NrH  r   z: z[num_warps=z, num_ctas=z, num_stages=z, enable_fp_fusion=z, launch_cooperative_grid=](rE  r   )r5  devicer?  	num_warpsnum_ctas
num_stagesenable_fp_fusionlaunch_cooperative_gridextern_libsconfigsspecialization_data	is_warmupr  F)r  reprra  compileis_manual_warmupalready_compiled)ra  r   rN   rR  rQ  paramsr4   r  r  r  r  r  rb  rC  r  r  )rA   hookr  r5  r  r?  r:  r  r  r4   rU   r   r   	arg_reprsr  	full_namer  r"  s                     rD   
_call_hookJITFunction._call_hookj  s    ww####IIc$++WZ[\W]F^_F^%**Rt4F^_`	{7#4#4"5[AQAQ@RR_`g`r`r_s  tG  HO  H`  H`  Ga  a{  |C  |[  |[  {\  \^  _h  ^i  ij  k!$''*	;IR[ef]gipv #" **((!,, ' 8 8'.'F'F"..#6"
 vT2C*6*&"
 	
+ `s   E
c                ^    [        U5      (       d   eU R                  R                  U5        g)zu
Add a hook that will be executed prior to the execution of run
function with args and kwargs passed into the kernel
N)rp   pre_run_hooksr%  )rA   r  s     rD   add_pre_run_hookJITFunction.add_pre_run_hook  s&    
 ~~~!!$'rF   c                    SSK JnJnJnJn  [
        R                  R                  5       nU" U5      nXl        X l        X0l        [        U R                  U R                  U5      n0 0 XVU4$ )z!
Precompute as much as possible.
r   )CompiledKernelr  	ASTSourcemake_backend)compilerr  r  r  r  r   activeget_current_targetr]  r5  r  )rA   r  r  r  r  r   rX  binders           rD   create_binderJITFunction.create_binder  s\     	PO113v&,"/WU2v..rF   c           
        UR                  U5      nU R                   Vs/ s H  ofR                  PM     nnU Vs/ s H  ofS   PM	     nn[        Xx5       V	V
s0 s H  u  pX_M	     nn	n
SU;  d   S5       eSU;  d   S5       eSU;  d   S5       eU H'  n	XR                  ;  d  M  X;  d  M  [        SU	-  5      e   [        US	 5      nU Vs0 s H&  o[        [        UR                  5       5      U5      _M(     nnU Vs/ s H  ofS
   PM	     nn[        US 5      nU V	s0 s H  oUR                  [        X5      5      _M     nn	X[X4$ s  snf s  snf s  sn
n	f s  snf s  snf s  sn	f )Nr   device_typez=device_type option is deprecated; current target will be usedr  z8device option is deprecated; current device will be usedstreamz8stream option is deprecated; current stream will be usedz2Keyword argument %s was specified but unrecognisedc                    US:H  $ r   r   )rd   rs   s     rD   r	  (JITFunction._pack_args.<locals>.<lambda>  s	    3+;MrF   r   c                "    [        U[        5      $ rH   )r/   ra   )rd   r  s     rD   r	  r    s    Z35GrF   )parse_optionsr  r4   rQ  r=  KeyErrorr   r   r*   r   
parse_attr)rA   rX  r"  
bound_argsrY  r:  r  sigkeyssigvalsrb   vr5  
constexprspathattrvalsr@  s                   rD   
_pack_argsJITFunction._pack_args  sx   ''/#';;/;a66;/!/0AQ40(+G(=>(=fqQT(=	>F*k,kk*v%a'aa%v%a'aa%A(((Q-=SVWWXX  #7,MN
[ef[eSW-d:3D3D3F.GNN[e
f"01.QaD.1h(GHPUVPU1G&&'8'EFFPUV:44% 00> g1Vs"   EEE-EE"#$E'c                  UR                  SU R                  5      =(       d    [        R                  R                  US'   [        R
                  R                  5       n[        R
                  R                  U5      nU R                   H  nU" U0 UD6  M     U R                  U   u  ppnU" U0 UD6u  pn[        XU5      nUR                  US 5      nUc3  U R                  XXU5      u  nnnnU R                  UUUUUUU5      nUc  g [        5       nU R                  R                  5        H8  u  u  nnu  nnUR                  UU5      =nU:w  d  M&  [!        SU SU SU 35      e   U(       d  Uc   e[#        U5      (       a  U" U5      n[%        U5      nUS   nUS:  a  US   OSnUS:  a  US   OSn['        US5      (       a  UR)                  5       nUR*                  " X/UR-                  5       Q76 nUR.                  " UUUUUR0                  UR2                  U[        R                  R4                  [        R                  R6                  /	UR-                  5       Q76   U$ )	NdebugrY   z1 has changed since we compiled this kernel, from z to r   r   r   result)rz   r  r   runtimer   r  get_current_deviceget_current_streamr  device_cachesr  r  _do_compileobjectr?   r;  r]   rp   r'   r  r  launch_metadatar   r*  functionpacked_metadatalaunch_enter_hooklaunch_exit_hook) rA   r.  r/  r   r"  r  r  r  kernel_cacher  r   rX  r  r  rY  r:  r  kernelr5  r  r@  not_presentr4   rd   rs   globals_dictnewVal	grid_sizegrid_0grid_1grid_2r  s                                    rD   r*  JITFunction.run  sj    **Wdjj9PU]]=P=Pw 11311&9 &&D$!&! ' CGBTBTU[B\? /5d.Ef.E+
G 0'J!!#t, >48OOGU_DK5M1GY
E %%c9fj'SXZ`aF~ h.2.C.C.I.I.K*IT1*\&**4==#E"&tf,]^a]bbfgmfnoq q /L
 ###~~J'D	I!WF )AT!W1F )AT!W1Fvx(($44TXJDUDUDWXOJJvvvvvH^H^`o}}668V8VnYcYjYjYlnrF   c                V    U R                   c  U R                  $ U R                  U5      $ rH   )_reprrj  )rA   rd   s     rD   r  JITFunction.repr  s"     $

 2t}}E

1ErF   c	           	     (  > U(       a  UO/ nU(       a  UO/ n[         TU ]  U5        UR                  U l        X l        X0l        X@l        Xpl        Xl        / U l	        [        U R                  R                  R                  5       5       H^  u  pX;   =(       d    U
R                  U;   nX;   =(       d    U
R                  U;   nU R                  R                  [!        XX5      5        M`     [#        U R$                  5      U l        S U l        XPl        X`l        U R                   Vs/ s H  oR                  PM     snU l        U R                   Vs/ s H!  oR0                  (       d  M  UR2                  PM#     snU l        / U l        g s  snf s  snf rH   )r2   r3   rN   rU   versionr   r   r  r  r  	enumerater5  rP  r   r4   r%  r   r   r  r  r  r  rZ   	arg_namesr   r   r  r  )rA   ra  r  r   r   r  rZ   r  r  ir   dnsdns_oaprC   s                 rD   r3   JITFunction.__init__  s=   1B-Ki)Goq&mm!2.L+
.!$..";";"B"B"DEHA(KEJJ:K,KC8hEJJJh<hFKK{1SAB F )););< 
  +/++6+Q&&+6*.++H+Q5155+H  	 7Hs   'F
F*Fc               \    U R                   " [        [        R                  U5      USS.UD6$ )NTr-  )r*  rS  
MockTensor
wrap_dtype)rA   r.  r   r"  s       rD   r/  JITFunction.warmup   s(    xxZ5J5JD1QT$\U[\\rF   c           
     ^   SS K nSS KJn  [        R                  R                  5       nUR                  U5      nUS   U R                  :w  a  [        SUS    SU R                   35      e[        [        US   5      nUS   n[        Xg5       VV	s0 s H8  u  pXR                  R                  U	5      (       a  UR                  U	5      OU	_M:     n
nn	[        [        US   5      nUS   n[        [        X5      5      n[        US	   R                  5       5      nUS
   R                  5        VV	s0 s H(  u  pU[!        U	["        5      (       a  [        U	5      OU	_M*     nnn	US   nU R$                  U   u      nnnUR'                  U5      nU R)                  UUUU
UUSS9$ s  sn	nf s  sn	nf )Nr   r4   zSpecialization data is for z but trying to preload for r6  r7  r8  r9  r5  r:  r  T)r/  )r<  triton.languager   r   r  r  loadsrj  r]   rS  r  rQ  r   is_dtyper  r;  r/   r*   r  r  r  )rA   r  r<  tlr  deserialized_objr6  r7  r  r   r  r8  r9  r@  r5  r:  rd   rX  s                     rD   preloadJITFunction.preload#  s   $113::&9:F#t}}4-.>v.F-GGbcgcpcpbqrt tE#3O#DE(9 "-?
?
 HH$5$5e$<$<%%G? 	 
  0 >?
%l3
S01)+6<<>?	 /y9??A
A
 E4!8!8ueCA 	 
 u%"0081a!''0   
 	


s   ?F#1/F)c           
     H  ^ ^^^^^^^^^^^ T R                   T   u  mnmpT R                  [        R                  R                  TTTTTT/T5      (       a  g T R                  T TTT5      m[        R                  R                  5       n
U
bD  [        5       m[        TU	TT5      nUUU UU4S jnUUUUUUU UU4	S jnU
R                  XU5      nU$ T R                  TTTR                  S9nUTT'   T R                  [        R                  R                  TTTTTT/T5        U$ )Nc                 <   > TR                  TTTR                  T S9$ )N)r   r:  	_env_vars)r  r=  )env_varsr:  rA   rB   r   s   rD   async_compile.JITFunction._do_compile.<locals>.async_compileS  s!    ||C@P@P\d|eerF   c           
     r   >	 U TT'   TR                  [        R                  R                  TTTTTT/T	5        g rH   )r  r   r  jit_post_compile_hook)
r  r@  r  r  r  r  r:  rA   r5  r/  s
    rD   finalize_compile1JITFunction._do_compile.<locals>.finalize_compileV  s9    $*S! C CS)U[]gip!&1rF   )r   r:  )r  r  r   r  jit_cache_hookr  r   active_moderz   r   r   submitr  r=  r  )rA   r  r5  r  r  r:  r@  r/  rd   rX  
async_moder`   r  r  r  r  r  rB   r   s   ````````       @@@@rD   r  JITFunction._do_compileF  s   .2.@.@.H+a??5==77iQ[]dglfmouvvnnT9j%@#//335
!68H%c7GXFIf f1 1
  &&yAQRF 	 \\#fg>N>N\OF &LOOEMM??iQWYcelotnu"$rF   c                    [        S5      e)Nz:Cannot call @triton.jit'd outside of the scope of a kernel)r]   rA   r   r"  s      rD   __call__JITFunction.__call__c  s    WXXrF   c                P    SU R                    SU R                  R                   S3$ )NzJITFunction(:rE  )rU   ra  r   rJ   s    rD   __repr__JITFunction.__repr__f  s&    dkk]!DGG,@,@+ACCrF   )r  r  r  r  r  r  r  r  r   r   r  r  rU   rZ   r  r  r  )r   zbool | None)NNNNNNN)r^   rN   r   r   r  r  r  r  r  r*  r  r3   r/  r  r  r  r  r   r   r   s   @rD   r  r  e  sf    ,
 
,
\(/502hF mq;?" H]!
F:YD DrF   r  c                    g rH   r   r`  s    rD   jitr"  o  s    rF   r  r  r  r   r   r  rZ   c                    g rH   r   r#  s          rD   r"  r"  t  s     rF   c               F   ^^^^^^^ SUUUUUUU4S jjnU b  U" U 5      $ U$ )a  
Decorator for JIT-compiling a function using the Triton compiler.

:note: When a jit'd function is called, arguments are
    implicitly converted to pointers if they have a :code:`.data_ptr()` method
    and a `.dtype` attribute.

:note: This function will be compiled and run on the GPU. It will only have access to:

       * python primitives,
       * builtins within the triton package,
       * arguments to this function,
       * other jit'd functions

:param fn: the function to be jit-compiled
:type fn: Callable
c                   > [        U 5      (       d   e[        R                  R                  (       a  SSKJn  U" U TTTTTTTS9$ [        U TTTTTTTS9$ )Nr   )InterpretedFunction)r  r   r   r  rZ   r  r  )rp   r   r  	interpretinterpreterr'  r  )	ra  r'  r  r   r   r  rZ   r  r  s	     rD   	decoratorjit.<locals>.decorator  sl    |||==""8&r7N_Fdlq08tUdf f "3/M! /	 	rF   ra  r    r   zJITFunction[T]r   )	ra  r  r  r  r   r   r  rZ   r*  s	    ``````` rD   r"  r"    s&    : & 
~} rF   c                  X    \ rS rSrSr\S 5       rS
S jrS r\S 5       r	\S 5       r
S	rg)r  i  zf
Can be used in place of real tensors when calling:
    kernel.warmup(MockTensor(torch.float32), ...)
c                p    U R                   R                  S:X  a  U R                  S:X  a  [        U 5      $ U $ )Nr   torch)rC   r^   rN   r  )r   s    rD   r   MockTensor.wrap_dtype  s.    ==!!W,71Jc?"
rF   Nc                (    Uc  S/nXl         X l        g )Nr   r   shape)rA   r   r3  s      rD   r3   MockTensor.__init__  s    =CE

rF   c                    S/nU R                   SS   H  nUR                  US   U-  5        M     [        [        U5      5      $ )Nr   r   )r3  r%  r  reversed)rA   stridessizes      rD   strideMockTensor.stride  sB    #JJqrNDNN72;-. #Xg&''rF   c                     gNr   r   r   rF   rD   r  MockTensor.data_ptr      rF   c                     gr<  r   r   rF   rD   	ptr_rangeMockTensor.ptr_range  r>  rF   r2  rH   )r^   rN   r   r   r   staticmethodr   r3   r9  r  r@  r   r   rF   rD   r  r    sM    
  
(    rF   r  c                  T    \ rS rSrS rS rS rSS jrS rS r	S r
S	 rS
 rS rSrg)TensorWrapperi  c                    X l         Xl        UR                  U l        UR                  U l        U R                  R                  U l        g rH   )r   r  datar  r3  )rA   r  r   s      rD   r3   TensorWrapper.__init__  s1    
	II	kkYY__
rF   c                6    U R                   R                  5       $ rH   )r  r  rJ   s    rD   r  TensorWrapper.data_ptr  s    yy!!##rF   c                4    U R                   R                  " U6 $ rH   )r  r9  )rA   r   s     rD   r9  TensorWrapper.stride  s    yy&&rF   c                <    SU R                    SU R                   S3$ )NzTensorWrapper[r  rE  )r   r  rJ   s    rD   __str__TensorWrapper.__str__  s    

|2dii[::rF   c                6    U R                   R                  5       $ rH   )r  element_sizerJ   s    rD   rP  TensorWrapper.element_size  s    yy%%''rF   c                ^    [        U R                  R                  5       U R                  5      $ rH   )rD  r  cpur   rJ   s    rD   rS  TensorWrapper.cpu  s    TYY]]_djj99rF   c                N    U R                   R                  UR                   5        g rH   )r  copy_)rA   others     rD   rV  TensorWrapper.copy_  s    		

#rF   c                ^    [        U R                  R                  5       U R                  5      $ rH   )rD  r  cloner   rJ   s    rD   rZ  TensorWrapper.clone  s    TYY__.

;;rF   c                `    [        U R                  R                  U5      U R                  5      $ rH   )rD  r  tor   )rA   r  s     rD   r]  TensorWrapper.to  s     TYY\\&14::>>rF   c                `    [        U R                  R                  U5      U R                  5      $ rH   )rD  r  	new_emptyr   )rA   sizess     rD   r`  TensorWrapper.new_empty   s"    TYY007DDrF   )r  rF  r  r   r3  Nr   )r^   rN   r   r   r3   r  r9  rM  rP  rS  rV  rZ  r]  r`  r   r   rF   rD   rD  rD    s5    %$';(:$<?ErF   rD  c                
   [        U [        5      (       a;  XR                  R                  :X  a  U R                  $ [        U R                  U5      $ [	        U S5      (       a  [        X5      $ [        S[        U 5       S35      e)Nr  zCannot reinterpret a r_  )r/   rD  r  r   r  r   ro   )r  r   s     rD   reinterpretrd    sm    &-((KK%%%;; !e44		$	$V++/V~Q?@@rF   c                h   U n[        U[        5      (       d#  UR                  n[        U[        5      (       d  M#  UR                  R                  R                  nUR
                  n[        UR                  5       H1  u  pEUR                  5       R                  S5      (       d  M+  X4-  n  X#4$    X#4$ )Nzdef )
r/   r[   ra  __code__co_filenamerg  r  rf  r   rS   )ra  base_fn	file_name
begin_lineidxlines         rD   get_jit_fn_file_linerm    s    G+..** +..

##//I--J w/	::<""6**J  	 0   rF   c                       \ rS rSrS rS rSrg)BoundConstexprFunctioni%  c                    Xl         X l        g rH   )__self____func__)rA   instancera  s      rD   r3   BoundConstexprFunction.__init__'  s     rF   c                B    U R                   " U R                  /UQ70 UD6$ rH   rr  rq  r  s      rD   r  BoundConstexprFunction.__call__+  s    }}T]]<T<V<<rF   rv  N)r^   rN   r   r   r3   r  r   r   rF   rD   ro  ro  %  s    =rF   ro  c                  <   ^  \ rS rSrU 4S jrS rSS.S jrSrU =r$ )ConstexprFunctioni/  c                $   > [         TU ]  U5        g rH   )r2   r3   )rA   ra  rC   s     rD   r3   ConstexprFunction.__init__1  s    rF   c                "    Ub  [        X5      $ U $ rH   )ro  )rA   rA  objclasss      rD   __get__ConstexprFunction.__get__4  s    ?)#44rF   N)	_semanticc               *   SSK JnJn  U Vs/ s H
  od" U5      PM     nnUR                  5        VVs0 s H  u  pxXt" U5      _M     nnnU R                  " U0 UD6n	Uc  U	$ [
        R                  R                  (       a  U	$ U" U	5      $ s  snf s  snnf )Nr   )_unwrap_if_constexprrl   )r   r  rl   r;  ra  r   r  r(  )
rA   r  r   r"  r  rl   r  rb   r  r  s
             rD   r  ConstexprFunction.__call__:  s    H156A$Q'6;A<<>J>!!)!,,>J ggt&v&J ==""J~ 7Js
   B
Br   )	r^   rN   r   r   r3   r~  r  r   r   r   s   @rD   ry  ry  /  s     )-  rF   ry  c                    [        U 5      $ )z
Wraps an arbitrary Python function so that it can be called at
compile-time on constexpr arguments in a Triton function and
returns a constexpr result.
)ry  r`  s    rD   constexpr_functionr  M  s     R  rF   r   )Fr,  )r  Optional[Callable]r  r  r   Optional[Iterable[int | str]]r   r  r  Optional[bool]rZ   r  r   zCallable[[T], JITFunction[T]]rH   )ra  zOptional[T]r  r  r  r  r   r  r   r  r  r  rZ   r  r   z4Union[JITFunction[T], Callable[[T], JITFunction[T]]])H
__future__r   r   r~   r0   r5   rP   r   rk  rp  rn  collectionsr   dataclassesr   	functoolsr   typingr   r	   r
   r   r   r   r   r   r   r   triton.tools.tensor_descriptorr   typesr   rO   r   r   r   _utilsr   r   r   r   cacher   triton._C.libtritonr   r=   r<   r    NodeVisitorr"   r   r   r  r$  r  r'  r)  rC  r]  rb  r[   r  r  r  r"  r  rD  rd  rm  ro  ry  r  r   rF   rD   <module>r     s>   , 
      	  # ! % d d d ;     e e   ?!3CLb! b!TD4/> />d 	 4n@	Zgaj 	Z	7*t0_1 _1D   BD+q1 BDT 
 
 
 #*.7;DH #
 
 (	

 5
 %B
 
 
 #
 

 4 #*.7;DH #44 	4
 (4 54 %B4 4 4 :4x B"E "EJA!$=[ = <!rF   