
    9i>U                        S SK JrJrJr  S SKJrJrJrJr  S SK	J
r
  S SKJr  S SKJr  S SKrS SKJrJrJrJr  S SKJr  S SKrS SKrS SKrS SKrS SKrS SKrS S	KJr  S
\4S jrS\
R@                  4S jr!\RD                  " 5       S 5       r#\RD                  " 5       S\$4S j5       r%S\$4S jr&\RD                  " 5       S\$4S j5       r'\RD                  " S5      S 5       r(S\$4S jr)\" SS9 " S S5      5       r* " S S\5      r+g)    )BaseBackend	GPUTargetLanguage)irpassesllvmnvidia)knobs)
PTXASError)	dataclassN)AnyDictTupleOptional)
ModuleType)Pathtargetc                 @    S[         [        [        [        4   4S jnU$ )Nreturnc                     U R                   R                  nUR                   R                  nX#:X  d   S5       eUS:X  a  gg)Nz%lhs and rhs bitwidth must be the same   )   r       )r   r      )scalarprimitive_bitwidth)lhs_typerhs_typelhs_bitwidthrhs_bitwidths       _/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/triton/backends/nvidia/compiler.pycheck_dot_compatibility-min_dot_size.<locals>.check_dot_compatibility   s@    9999+T-TT+1    )r   int)r   r"   s     r!   min_dot_sizer&      s!    uS#s]7K  #"r$   r   c                  6    [         R                  R                  $ N)r
   r	   ptxas r$   r!   	get_ptxasr+   "   s    <<r$   c                      [         R                  R                  n U b  U $ [        R                  " [        5       R                  S/5      R                  S5      nU$ )Nz	--versionutf-8)r
   r	   mock_ptx_version
subprocesscheck_outputr+   pathdecode)mock_verversions     r!   get_ptxas_versionr5   &   sI    ||,,H%%y{'7'7&EFMMgVGNr$   c                    [        U [        5      (       d   e[        [        U R	                  S5      5      u  pUS:X  a  US:  a  SU-   $ SU-   S-
  $ US:X  a  SU-   $ US:X  a  S	U-   $ US
:  a  SnX1S
-
  S-  -   U-   $ [        SU -   5      e)zC
Get the highest PTX version supported by the current CUDA driver.
.      P   r      F   
   ?      Z   z?Triton only support CUDA 10.0 or higher, but got CUDA version: )
isinstancestrmapr%   splitRuntimeError)cuda_versionmajorminorbase_ptxs       r!   ptx_get_versionrJ   /   s    
 lC((((sL..s34LE{19::>!{Ez{Ez{2:++e33
X[gg
hhr$   archc                 b    U R                   nUc  [        5       R                  n[        U5      nU$ r(   )ptx_versionr+   r4   rJ   )optionsrK   rM   rF   s       r!   get_ptx_version_from_optionsrO   G   s0    %%K {**%l3r$   c                 >    [        X5      n[        SU5      nSU 3nU$ )NV   z+ptx)rO   min)rN   rK   rM   llvm_ptx_versionfeaturess        r!   get_featuresrU   O   s.    .w=K 2{+&'(HOr$   c                     [        U S5       n[        R                  " UR                  5       5      R	                  5       sS S S 5        $ ! , (       d  f       g = f)Nrb)openhashlibsha256read	hexdigest)r1   fs     r!   	file_hashr^   ]   s5    	dD	Q~~affh'113 
		s   2A		
A
capabilityc                 $    U S:  a  SOSnSU  U 3$ )Nr@   a sm_r*   )r_   suffixs     r!   sm_arch_from_capabilityre   c   s!    "$S"FVH%%r$   T)frozenc                      \ rS rSr% Sr\\S'   Sr\\S'   Sr\\S'   Sr	\\S	'   S
r
\\   \S'   Sr\\S'   S
r\\S'   S
r\\S'   S
r\\   \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\   \S'   Sr\\   \S'   Sr\\S'   Sr\\   \S'   S
r\\S'   S
r\\S'   Sr\\S '   S!r\\S"'   Sr \\S#'   S
r!\\S$'   S%r"\\S&'   S' r#S( r$Sr%g
))CUDAOptionsi      	num_warpsr   num_ctas   
num_stagesr   	warp_sizeNmaxnreg)r   r   r   cluster_dimsrM   ptx_optionsir_overrideTenable_fp_fusionFlaunch_cooperative_grid
launch_pdl)fp8e5fp8e4b15supported_fp8_dtypesr*   !deprecated_fp8_dot_operand_dtypestf32default_dot_input_precision)r{   tf32x3ieeeallowed_dot_input_precisionsmax_num_imprecise_acc_defaultextern_libsdebugcudabackend_namesanitize_overflowrK   rb   instrumentation_modec                    [        [        5      R                  S-  nU R                  c  0 O[	        U R                  5      nUR                  SS 5      (       d2  [        R                  R                  =(       d    [        US-  5      US'   [        R                  U S[        UR                  5       5      5        U R                  S:  a   U R                  U R                  S-
  -  S:X  d   S5       eg )Nlib	libdevicezlibdevice.10.bcr   r   r   znum_warps must be a power of 2)r   __file__parentr   dictgetr
   r	   libdevice_pathrB   object__setattr__tupleitemsrk   )selfdefault_libdirr   s      r!   __post_init__CUDAOptions.__post_init__   s    h..6 ,,4b$t?O?O:P{D11',||'B'B'mc.[lJlFmK$4k6G6G6I0JK~~!t~~!9K'LQR&R 	0/	0R&Rr$   c           	      d   [        U R                  5      n[        S [        US   5       5       5      US'   SR	                  [        UR                  5       5       VVs/ s H  u  p#U SU 3PM     snn5      n[        R                  " UR                  S5      5      R                  5       $ s  snnf )Nc              3   @   #    U  H  u  pU[        U5      4v   M     g 7fr(   )r^   ).0kvs      r!   	<genexpr>#CUDAOptions.hash.<locals>.<genexpr>   s     (hGgtq!Yq\):Ggs   r   _-r-   )
r   __dict__r   sortedjoinr   rY   rZ   encoder\   )r   	hash_dictnamevalkeys        r!   hashCUDAOptions.hash   s    '	#((hviXeNfGg(h#h	- hh	@Q9RS9RID4&#9RST~~cjj12<<>> Ts   B,
)&__name__
__module____qualname____firstlineno__rk   r%   __annotations__rl   rn   ro   rp   r   rq   r   rM   rr   rB   rs   rt   boolru   rv   ry   r   rz   r|   r   r   r   r   r   r   r   rK   r   r   r   __static_attributes__r*   r$   r!   rh   rh   i   s   IsHcJIs "GXc]!#L%#KK!%K#%!d!$)T)J'<%*<46%uSz6'--/I %*I*.!4.KE4L#"t"D# "#"0?r$   rh   c                     ^  \ rS rSrSr\S\4S j5       rS rS\	4S jr
S\SS4U 4S jjrS\4S	 jrS
 rS rS\\	\4   4S jrS r\S 5       r\S 5       rS rS rS rS rS r\R8                  " 5       S 5       rSrU =r$ )CUDABackend   Nr   c                      U R                   S:H  $ )Nr   )backend)r   s    r!   supports_targetCUDABackend.supports_target   s    ~~''r$   c                     Sn[         R                  " X!5      nU(       d  [        SU 35      e[        UR	                  S5      5      $ )Nz	^sm(\d+)$z(TRITON_OVERRIDE_ARCH must have the form r   )re	fullmatch
ValueErrorr%   group)r   rK   patternmatchs       r!   _parse_archCUDABackend._parse_arch   s>    W+GyQRR5;;q>""r$   r   c                 B    U R                  UR                  5      nSU 3$ )Ncuda:)r   rK   )r   rN   r_   s      r!   get_target_nameCUDABackend.get_target_name   s#    %%gll3
zl##r$   c                 2   > [         TU ]  U5        SU l        g )Ncubin)super__init__
binary_ext)r   r   	__class__s     r!   r   CUDABackend.__init__   s     !r$   c                    S[         R                  R                  =(       d    SU R                  R                   30nUR                  [        R                  R                  5        Vs0 s H  o3U;   d  M
  X   c  M  X1U   _M     sn5        [        U R                  US   5      5      nUR                  SS5      S:  a  US:  a  [        SU S35      eSU;  aG  [        [        R                  5      nUS	:  a  UR                  S
5        [!        [#        U5      5      US'   SU;  a  US:  a  SUS'   SU;  a  [         R$                  R&                  US'   US:X  a  SOSUS'   [        S0 UD6$ s  snf )NrK   smrl   r   r@   zBnum_ctas > 1 requires NVIDIA SM90+ (Hopper). Current target is sm_zM. This configuration will fail. Please set num_ctas=1 or target an SM90+ GPU.ry   Y   fp8e4nvrz   )rx   rt   i   @r   r   r*   )r
   runtimeoverride_archr   rK   updaterh   __dataclass_fields__keysr%   r   r   r   setry   addr   r   languagedefault_fp_fusion)r   optsargsr   r_   ry   s         r!   parse_optionsCUDABackend.parse_options   sn   33NDKK<L<L;M7NO)I)I)N)N)Pu)PAY]T]ZaeahZQQZ)Puv))$v,78
88J"Q&:? !66@\ BNO Q Q "-#&{'G'G#H R$((3+08L1M+ND'(.d:R<J89T)',~~'G'GD#$9Cr9Iq,-"T""/ vs   (	E-5E-<	E-c                     UR                   UR                  UR                  UR                  S   UR                  S   UR                  S   4$ )Nr   r      )rk   rl   sharedrq   )r   metadatas     r!   pack_metadataCUDABackend.pack_metadata   sO    OO!!!$!!!$!!!$
 	
r$   c                     SS K Js  Js  Jn  [	        U R                  UR                  5      5      nUS:  a  UR                  OUR                  [        U R                  5      S.nU$ )Nr   r:   )convert_custom_typesr&   )triton.language.extra.cudar   extrar   r%   r   rK   convert_custom_float8_sm80convert_custom_float8_sm70r&   r   )r   rN   r   r_   codegen_fnss        r!   get_codegen_implementation&CUDABackend.get_codegen_implementation   sV    11))',,78
 0:R/?D++TEdEd%

 r$   c                     SSK Jn  SU0$ )Nr   )r   ztriton.language.extra.libdevice)r   r   )r   r   s     r!   get_module_mapCUDABackend.get_module_map   s    819==r$   c                     [         R                  " U5        [        R                  (       a   [        R                  R                  U5        g g r(   )r	   load_dialectsr   instrumentation)r   ctxs     r!   r   CUDABackend.load_dialects   s2    S!&&''55c: 'r$   c                    [         R                  " U R                  5      nUR                  5         [        R
                  R                  U5        [        R                  R                  U5        US-  S:  a  [        R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        UR!                  U 5        U $ )Nr=   	   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_symbol_dceadd_loop_unrollrun)modr   optr_   pms        r!   	make_ttirCUDABackend.make_ttir   s    __S[[)
!!"%..r2aKK@@D''+#))"-b!$$R(##B'
s
r$   c                    UR                   bI  U R                  S[        R                  " U R                  5      R                  UR                   5      5        [        R                  " 5       nUR                  b<  UR                  S   Ul	        UR                  S   Ul
        UR                  S   Ul        [        R                  " U R                  5      nUR                  5       n[        R                  R!                  USU 3UR"                  SUR$                  5        [        R&                  R)                  U5        US-  S:  a  [        R&                  R+                  U5        [        R                  R,                  R/                  XT5        [        R&                  R1                  U5        [        R&                  R3                  U5        [        R&                  R5                  U5        [        R&                  R1                  U5        [        R&                  R7                  XSS	:  5        [        R                  R,                  R9                  U5        [        R                  R;                  U5        US-  S
;   GaC  [        R&                  R=                  U5        [        R>                  RA                  U5        [        R                  RC                  U5        [        R>                  RA                  U5        [        R&                  RE                  U5        [        R                  RF                  RI                  XRRJ                  U5        [        R&                  RM                  XRRJ                  5        [        R&                  RO                  U5        [        R&                  RQ                  XRRJ                  U5        GOUS-  S:  Ga  [        R&                  R=                  U5        [        R>                  RA                  U5        [        R                  RC                  U5        [        R&                  RS                  U5        [        R&                  RU                  US5        [        R                  R,                  RW                  U5        [        R&                  RM                  XRRJ                  5        [        R&                  RO                  U5        [        R&                  RY                  XRRJ                  5        [        R&                  RQ                  XRRJ                  U5        [        R&                  RE                  U5        [        R&                  RU                  US5        [        R                  R,                  R[                  U5        O[        R                  RC                  U5        [        R>                  RA                  U5        [        R                  R;                  U5        [        R&                  R]                  U5        [        R&                  R7                  XSS	:  5        [        R&                  R_                  U5        [        R                  R,                  Ra                  U5        [        R&                  R1                  U5        [        R                  R,                  Rc                  U5        [        R&                  Re                  U5        [        R&                  Rg                  U5        [        R                  R;                  U5        [        R>                  Ri                  U5        US-  S:  a)  [        R                  R,                  Rk                  U5        [        R                  R,                  Rm                  XS5        [        R                  R,                  Ro                  U5        [        R>                  Rq                  U5        [        R>                  Rs                  U5        [        R>                  RA                  U5        URu                  U 5        UR                  UR                  UR                  4US'   U Rw                  5       nXqS'   U $ )Nzttg.maxnregr   r   r   r   r   r=   r   r:   )r   r   FTr   rq   tensordesc_meta)<rp   set_attrr   builderr   get_int32_attrr	   ClusterInforq   clusterDimXclusterDimYclusterDimZr   r   r   r   add_convert_to_ttgpuirrk   rl   ttgpuiradd_coalesceadd_f32_dot_tc	ttnvgpuiradd_plan_ctaadd_remove_layout_conversionsadd_optimize_thread_localityadd_accelerate_matmuladd_optimize_dot_operands add_optimize_descriptor_encodingadd_loop_aware_cseadd_fuse_nested_loopsr   r   add_triton_licm add_combine_tensor_select_and_ifhopperadd_hopper_warpspecrn   add_assign_latenciesadd_schedule_loopsadd_pipelineadd_optimize_accumulator_initadd_hoist_tmem_allocadd_promote_lhs_to_tmemadd_warp_specializeadd_remove_tmem_tokensadd_prefetchadd_coalesce_async_copyadd_optimize_tmem_layoutsadd_interleave_tmemadd_reduce_data_duplicationadd_reorder_instructionsr  add_tma_loweringadd_fence_insertionadd_lower_mmaadd_sccpr  r  get_tensordesc_metadata)r  r   r  r_   cluster_infor  dump_enabledr  s           r!   
make_ttgirCUDABackend.make_ttgir   sS    ;;"LL

3;;(?(N(Ns{{([\))+''*'7'7':L$'*'7'7':L$'*'7'7':L$__S[[)(**2zl/CS]]TVX[XdXde##B'q NN))"-,,R>44R833B7,,R044R80025EF@@D&&r*v%NN004MM++B/KK''+MM++B/NN;;B?MM  44RVNN//NNCNN--b1NN''NNLI2#NN004MM++B/KK''+NN88<NN//E:MM##;;B?NN//NNCNN--b1NN..r>>BNN''NNLINN;;B?NN//D9MM##::2>KK''+''+&&r*##B'0025EF..r299"=44R833B72226//3&&r*$$R(q MM##44R833BC--b1r"b!''+
s$0$<$<l>V>VXdXpXp#q 557&5"#
r$   c                 &   Un[         R                  " UR                  5      nUR                  5         [        R
                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        UR                  U5        UR!                  5       US'   U$ )Nr  )r   r   r   r   r   gluonr   add_resolve_auto_encodingsr   r5  r   r  r   r  r!  r  r6  )r   srcr   rN   r_   r  r  s          r!   gluon_to_ttgirCUDABackend.gluon_to_ttgirE  s    __S[[)
  $//3r"&&r*&&r*77;
s&)&A&A&C"#
r$   c                 D   [        X0R                  R                  5      nUn[        R                  " UR
                  5      nUR                  5         [        R                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R                  R                  R                  XtU5        [        R                  R                  R!                  U5        ["        R$                  R&                  (       a  [        R                  R)                  U5        [        R                  R+                  U5        [        R                  R                  R-                  Xt5        [.        R0                  (       a*  [.        R0                  R3                  SXvR
                  5        [        R                  R                  R5                  XtU5        [        R6                  R9                  U5        [        R6                  R;                  U5        [        R                  R                  R=                  U5        [        R                  R                  R?                  U5        [        R6                  R9                  U5        [        R6                  R;                  U5        [        R6                  RA                  U5        [        R                  RC                  U5        ["        R$                  RD                  (       d  [        RF                  RI                  U5        [.        R0                  (       a*  [.        R0                  R3                  SXvR
                  5        URK                  U5        [L        RN                  " 5         [L        R
                  " 5       n["        R$                  RP                  (       a  [S        S5      e[L        RT                  " Xh5      n	[W        U5      n
[Y        X0R                  R                  5      nSn[        RZ                  " 5         [L        R\                  " XX5        [        R^                  " U	5        UR`                  (       aQ  [        Rb                  " U	5      (       a6  UR`                   VVs/ s H  u  pUPM	     nnn[L        Rd                  " X5        [L        Rf                  " U	[L        Rh                  5        URk                  S5      nUb  UUS'   URk                  S5      US'   URk                  S	5      US
'   URk                  S5      US'   URk                  S5      US'   URk                  S5      =(       d    SUS'   URk                  S5      =(       d    SUS'   [m        U	5      nA	AU$ s  snnf )Nttgpuir_to_llvmirllvmir_to_llvmzYAddress Sanitizer Error: Address sanitizer is currently only supported on the AMD backendnvptx64-nvidia-cudazttg.total-num-warpsrk   z
ttg.sharedr   zttg.tensor_memory_size	tmem_sizezttg.global_scratch_memory_sizeglobal_scratch_sizez#ttg.global_scratch_memory_alignmentglobal_scratch_alignzttg.profile_scratch_memory_sizer   profile_scratch_sizez$ttg.profile_scratch_memory_alignmentr   profile_scratch_align)7rO   r   rK   r   r   r   r   r   r  r!  add_allocate_warp_groupsconvertadd_scf_to_cfr	   add_allocate_shared_memory_nvr  add_allocate_tensor_memoryr
   compilationenable_experimental_consanadd_concurrency_sanitizer"add_allocate_global_scratch_memoryadd_proxy_fence_insertionr   r   patchadd_to_llvmirr   r   r  add_nvgpu_to_llvmadd_warp_specialize_to_llvmr  add_nvvm_to_llvmdisable_line_infollvmiradd_di_scoper  r   init_targetsenable_asanrE   	to_modulere   rU   set_short_ptrattach_datalayoutset_nvvm_reflect_ftzr   has_extern_depslink_extern_libsoptimize_moduleOPTIMIZE_O3get_int_attrrB   )r   r>  r   rN   r_   rM   r  r  r   llvm_modprocrT   tripler   r1   pathstotal_num_warpsrets                     r!   	make_llirCUDABackend.make_llirU  s   27KK<L<LM__S[[)
77;//3$$R(;;BKX::2>77NN44R899"=99"I&&''--.A2{{S++BKH''+b!11"5;;B?''+b!$$R(''+  22MM&&r*&&''--.>KKP
s,,.((km m>>#/&z2)9)9:&x@##H-6#9#9(#C#C.5.A.AB.AltT.AEB!!(2Xt'7'78 **+@A&$3H[! --l; # 0 01I J*-*:*:;[*\&'+.+;+;<a+b'(+.+;+;<]+^+cbc'(,/,<,<=c,d,ihi()(m
' Cs   Vc           	         [        X0R                  R                  5      nSn[        U5      n[	        X0R                  R                  5      n[
        R                  " XXx/ UR                  S5      n	[        R                  " SU	5      n
[        U
5      S:X  d   eU
S   US'   US-   SUS-   3n[        R                  " S	S
U 3U	[        R                  S9n	[        R                  " SSU 3U	[        R                  S9n	[        R                  " SSU	5      n	[        R                  R                  (       a  [!        S5        [!        U	5        U	$ )NrD  Fz(.visible .entry ([a-zA-Z_][a-zA-Z0-9_]*)r   r   r   r=   r7   z\.version \d+\.\d+z	.version )flagsz\.target sm_\d+z.target sm_z,\s*debug|debug,\s*rb   z // -----// NVPTX Dump //----- //)rO   r   rK   re   rU   r   translate_to_asmrt   r   findalllensub	MULTILINEr
   r	   
dump_nvptxprint)r   r>  r   r  r_   rM   ri  rh  rT   rl  namess              r!   make_ptxCUDABackend.make_ptx  s   238H8HI&&z2[[%5%56##CSEYEY[`a

FL5zQ 8$b);r>*:;ff*i},EsRTR^R^_ff';zl)CSPRP\P\]ff+R5<<""45#J
r$   c           
         [        5       R                  n[        R                  " SSSS9 n[        R                  " SSSS9 nUR	                  U5        UR                  5         UR                  S-   n/ n	[        R                  R                  (       a  U	SS	/-  n	O,[        R                  R                  (       a  U	S
/-  n	OU	S/-  n	UR                  (       a  / OS/n
[        U5      n[        R                  R                  (       a  SS/O/ nUR                  (       a  UR                  R                  S5      O/ nU/U	QU
QSPUQUQSU 3PUR                  PSPUPn [         R"                  " USSUS9  [        R                  R$                  (       a7  ['        UR                  5       n[)        UR+                  5       5        S S S 5        [,        R                  R/                  UR                  5      (       a   [,        R0                  " UR                  5        [,        R                  R/                  UR                  5      (       a   [,        R0                  " UR                  5        ['        US5       nUR+                  5       nS S S 5        [,        R                  R/                  U5      (       a  [,        R0                  " U5        S S S 5        S S S 5        W$ ! , (       d  f       GN= f! [         R2                   Ga  n['        UR                  5       nUR+                  5       nS S S 5        O! , (       d  f       O= f[,        R                  R/                  UR                  5      (       a   [,        R0                  " UR                  5        UR4                  S:X  a  SnO3UR4                  S[6        R8                  -   :X  a  SnOSUR4                   3nU SW SSR;                  U5       S3n[)        SU SU S35        [=        U5      eS nAff = f! , (       d  f       GN= f! , (       d  f       GNe= f! , (       d  f       W$ = f) NFwz.ptx)deletemoderd   rz.logz.oz	-lineinfoz-suppress-debug-infoz-gz--fmad=falsez--opt-level0 z-vz--gpu-name=z-oT)check	close_fdsstderr   z!Internal Triton PTX codegen error   z`ptxas` raised SIGSEGVz`ptxas` failed with error code z
`ptxas` stderr:
z
Repro command: 
zC

================================================================
z

zy
================================================================
please share the reproducer above with Triton project.
rW   )r+   r1   tempfileNamedTemporaryFilewriteflushr   r
   rO  rY  r	   disable_ptxas_optrt   re   rr   rD   r/   r  dump_ptxas_logrX   rw  r[   osexistsremoveCalledProcessError
returncodesignalSIGSEGVr   r   )r   r>  r   r  r_   r)   fsrcflogfbin
debug_infofmadrK   disable_optptx_extra_options	ptxas_cmdlog_fileelogerrorr]   r   s                        r!   
make_cubinCUDABackend.make_cubin  st     ((COSW''u3vNRVJJsOJJL99t#DJ  22{,BCC
//tf$
 {m+
--2N3CD*:6D 38,,2P2P=#.VXK ?Boo 5 5c :SU "%)+/2=@QU`ae`fSgimirirI$(ydS<<..diiHhmmo. ) 77>>$)),,IIdii(77>>$)),,IIdii(: dD!Q "ww~~d##		$M O PP O ) 00 ($))_"--/C %__77>>$)),,IIdii(<<3&?E\\S6>>%994E=all^LE!7 #--0E 2++.88I+>*?rC       !''5(8 "!G ON POP s   P7DP%AK+*KB$K+(P%4PAP%P7
K(#K++P PL/	&	P/
L=9CPPP%
P"P%%
P4	/P77
Qc                   ^ ^^ T R                  TR                  5      mU[        R                  :X  a  UUU 4S jUS'   UUU 4S jUS'   OU[        R                  :X  a  UUU 4S jUS'   UUU 4S jUS'   UU 4S jUS	'   UU 4S
 jUS'   g )Nc                 *   > TR                  XTT5      $ r(   )r  r>  r   r_   rN   r   s     r!   <lambda>(CUDABackend.add_stages.<locals>.<lambda>  s    4>>#QXZd3er$   r   c                 *   > TR                  XTT5      $ r(   )r9  r  s     r!   r  r    s    DOOCSZ\f4gr$   ttgirc                 *   > TR                  XTT5      $ r(   )r?  r  s     r!   r  r    s    D4G4GW^`j4kr$   c                 *   > TR                  XTT5      $ r(   )rm  r  s     r!   r  r    s    t~~cWV`/ar$   llirc                 R   > TR                  XTTR                  R                  5      $ r(   )ry  r   rK   r>  r   rN   r   s     r!   r  r    s    dmmC7TXT_T_TdTd.er$   ptxc                 R   > TR                  XTTR                  R                  5      $ r(   )r  r   rK   r  s     r!   r  r    s    wX\XcXcXhXh0ir$   r   )r   rK   r   TRITONGLUON)r   stagesrN   r   r_   s   ` ` @r!   
add_stagesCUDABackend.add_stages  se    %%gll3
x&eF6NgF7O'kF7Oaveuiwr$   c                 L    [        5       nU SU R                  R                   3$ )Nr   )r5   r   rK   )r   r4   s     r!   r   CUDABackend.hash
  s&    #%!DKK,,-..r$   )r   ) r   r   r   r   r   staticmethodr   r   r   rB   r   r   r   r   r   r   r   r   r   r   r  r9  r?  rm  ry  r  r  	functools	lru_cacher   r   __classcell__)r   s   @r!   r   r      s    O(	 ( (#$# $"y "T "#S #6
>S*_ 5 >;
    M M^ FP,JX	j / /r$   r   ),triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r	   tritonr
   triton.runtime.errorsr   dataclassesr   r  typingr   r   r   r   typesr   rY   r   r  r  r  r/   pathlibr   r&   
NvidiaToolr+   r  r5   r%   rJ   rO   rU   r^   re   rh   r   r*   r$   r!   <module>r     s%   E E 8 8  , !  - -   	   	  # #5##    iS i i.  
 
 
 T4 4
& & $)? )? )?Xw/+ w/r$   