
    Ji%\                        S SK JrJrJr  S SKJrJrJrJr  S SK	J
r
  S SKJr  S SKJr  S SKrS SKJrJrJrJr  S SKJr  S SKrS SKrS SKrS SKrS SKrS SKrS S	KJr  S
\4S jrS\ S\
RB                  4S jr"\RF                  " 5       SS\ 4S jj5       r$\RF                  " 5       S\ 4S j5       r%S\ 4S jr&\RF                  " 5       S\ 4S j5       r'\RF                  " S5      S 5       r(S\ 4S jr)\" SS9 " S S5      5       r* " S S\5      r+g)    )BaseBackend	GPUTargetLanguage)irpassesllvmnvidia)knobs)
PTXASError)	dataclassN)AnyDictTupleOptional)
ModuleType)Pathtargetc                 @    S[         [        [        [        4   4S jnU$ )Nreturnc                     U R                   R                  nUR                   R                  nX#:X  d   S5       eUS:X  a  gg)Nz%lhs and rhs bitwidth must be the same   )   r       )r   r      )scalarprimitive_bitwidth)lhs_typerhs_typelhs_bitwidthrhs_bitwidths       a/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/triton/backends/nvidia/compiler.pycheck_dot_compatibility-min_dot_size.<locals>.check_dot_compatibility   s@    9999+T-TT+1    )r   int)r   r"   s     r!   min_dot_sizer&      s!    uS#s]7K  #"r$   archr   c                 v    U S:  a  [         R                  R                  $ [         R                  R                  $ )Nd   )r
   r	   ptxas_blackwellptxas)r'   s    r!   	get_ptxasr,   "   s'    +/3;5<<''NELL<N<NNr$   c                     [         R                  R                  nUb  U$ [        R                  " [        U 5      R                  S/5      R                  S5      nU$ )Nz	--versionutf-8)r
   r	   mock_ptx_version
subprocesscheck_outputr,   pathdecode)r'   mock_verversions      r!   get_ptxas_versionr6   &   sL    ||,,H%%y';';[&IJQQRYZGNr$   c                    [        U [        5      (       d   e[        [        U R	                  S5      5      u  pUS:X  a  US:  a  SU-   $ SU-   S-
  $ US:X  a  SU-   $ US:X  a  S	U-   $ US
:  a  SnX1S
-
  S-  -   U-   $ [        SU -   5      e)zC
Get the highest PTX version supported by the current CUDA driver.
.      P   r      F   
   ?      Z   z?Triton only support CUDA 10.0 or higher, but got CUDA version: )
isinstancestrmapr%   splitRuntimeError)cuda_versionmajorminorbase_ptxs       r!   ptx_get_versionrK   /   s    
 lC((((sL..s34LE{19::>!{Ez{Ez{2:++e33
X[gg
hhr$   c                 d    U R                   nUc   [        U5      R                  n[        U5      nU$ N)ptx_versionr,   r5   rK   )optionsr'   rN   rG   s       r!   get_ptx_version_from_optionsrP   G   s2    %%K ..%l3r$   c                 >    [        X5      n[        SU5      nSU 3nU$ )NV   z+ptx)rP   min)rO   r'   rN   llvm_ptx_versionfeaturess        r!   get_featuresrV   O   s.    .w=K 2{+&'(HOr$   c                     [        U S5       n[        R                  " UR                  5       5      R	                  5       sS S S 5        $ ! , (       d  f       g = f)Nrb)openhashlibsha256read	hexdigest)r2   fs     r!   	file_hashr_   ]   s5    	dD	Q~~affh'113 
		s   2A		
A
capabilityc                 $    U S:  a  SOSnSU  U 3$ )NrA   a sm_ )r`   suffixs     r!   sm_arch_from_capabilityrg   c   s!    "$S"FVH%%r$   T)frozenc                      \ rS rSr% Sr\\S'   Sr\\S'   Sr\\S'   Sr	\\S	'   S
r
\\   \S'   S
r\\S'   \R                  R                  r\\   \S'   S
r\\   \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\   \S'   Sr\\   \S'   Sr\\S'   Sr\\   \S'   S
r\\S'   S
r\\S'   Sr \\S'   S r!\\S!'   Sr"\\S"'   S
r#\\S#'   S$r$\\S%'   S& r%S' r&Sr'g
)(CUDAOptionsi      	num_warpsr   num_ctas   
num_stagesr   	warp_sizeNmaxnregrN   ptx_optionsir_overrideTenable_fp_fusionenable_reflect_ftzFlaunch_cooperative_grid
launch_pdl)fp8e5fp8e4b15supported_fp8_dtypesre   !deprecated_fp8_dot_operand_dtypestf32default_dot_input_precision)r}   tf32x3ieeebf16x3bf16x6allowed_dot_input_precisionsmax_num_imprecise_acc_defaultextern_libsdebugcudabackend_namesanitize_overflowr'   rc   instrumentation_modec                    [        [        5      R                  S-  nU R                  c  0 O[	        U R                  5      nUR                  SS 5      (       d2  [        R                  R                  =(       d    [        US-  5      US'   [        R                  U S[        UR                  5       5      5        U R                  S:  a   U R                  U R                  S-
  -  S:X  d   S5       eg )Nlib	libdevicezlibdevice.10.bcr   r   r   znum_warps must be a power of 2)r   __file__parentr   dictgetr
   r	   libdevice_pathrC   object__setattr__tupleitemsrm   )selfdefault_libdirr   s      r!   __post_init__CUDAOptions.__post_init__   s    h..6 ,,4b$t?O?O:P{D11',||'B'B'mc.[lJlFmK$4k6G6G6I0JK~~!t~~!9K'LQR&R 	0/	0R&Rr$   c           	      d   [        U R                  5      n[        S [        US   5       5       5      US'   SR	                  [        UR                  5       5       VVs/ s H  u  p#U SU 3PM     snn5      n[        R                  " UR                  S5      5      R                  5       $ s  snnf )Nc              3   @   #    U  H  u  pU[        U5      4v   M     g 7frM   )r_   ).0kvs      r!   	<genexpr>#CUDAOptions.hash.<locals>.<genexpr>   s     (hGgtq!Yq\):Ggs   r   _-r.   )
r   __dict__r   sortedjoinr   rZ   r[   encoder]   )r   	hash_dictnamevalkeys        r!   hashCUDAOptions.hash   s    '	#((hviXeNfGg(h#h	- hh	@Q9RS9RID4&#9RST~~cjj12<<>> Ts   B,
)(__name__
__module____qualname____firstlineno__rm   r%   __annotations__rn   rp   rq   rr   r   rN   r
   r	   ptxas_optionsrs   rC   rt   ru   boolrv   rw   rx   r{   r   r|   r~   r   r   r   r   r   r   r   r'   r   r   r   __static_attributes__re   r$   r!   rj   rj   i   s    IsHcJIs "GXc]!K!&!;!;K#;!%K#%!d!##$)T)J'<%*<46%uSz6'--/] %*]*.!4.KE4L#"t"D# "#"0?r$   rj   c                     ^  \ rS rSrSr\S\4S j5       rS rS\	4S jr
S\SS4U 4S jjrS\4S	 jrS
 rS rS\\	\4   4S jrS r\S 5       r\S 5       rS rS rS rS rS r\R8                  " 5       S 5       rSrU =r$ )CUDABackend   Nr   c                      U R                   S:H  $ )Nr   )backend)r   s    r!   supports_targetCUDABackend.supports_target   s    ~~''r$   c                     Sn[         R                  " X!5      nU(       d  [        SU 35      e[        UR	                  S5      5      $ )Nz	^sm(\d+)$z(TRITON_OVERRIDE_ARCH must have the form r   )re	fullmatch
ValueErrorr%   group)r   r'   patternmatchs       r!   _parse_archCUDABackend._parse_arch   s>    W+GyQRR5;;q>""r$   r   c                 B    U R                  UR                  5      nSU 3$ )Ncuda:)r   r'   )r   rO   r`   s      r!   get_target_nameCUDABackend.get_target_name   s#    %%gll3
zl##r$   c                 2   > [         TU ]  U5        SU l        g )Ncubin)super__init__
binary_ext)r   r   	__class__s     r!   r   CUDABackend.__init__   s     !r$   c                    SU;   a  US   S:X  a  SUS'   S[         R                  R                  =(       d    SU R                  R                   30nUR                  [        R                  R                  5        Vs0 s H  o3U;   d  M
  X   c  M  X1U   _M     sn5        [        U R                  US   5      5      nUR                  SS5      S:  a  US	:  a  [        S
U S35      eSU;  aG  [        [        R                  5      nUS:  a  UR                  S5        [!        [#        U5      5      US'   SU;  a  US	:  a  SUS'   SU;  a  [         R$                  R&                  US'   US	:X  a  SOSUS'   [        S0 UD6$ s  snf )Nr   consanTr   r'   smrn   r   rA   zBnum_ctas > 1 requires NVIDIA SM90+ (Hopper). Current target is sm_zM. This configuration will fail. Please set num_ctas=1 or target an SM90+ GPU.r{   Y   fp8e4nvr|   )rz   ru   i   @r   r   re   )r
   runtimeoverride_archr   r'   updaterj   __dataclass_fields__keysr%   r   r   r   setr{   addr   r   languagedefault_fp_fusion)r   optsargsr   r`   r{   s         r!   parse_optionsCUDABackend.parse_options   s   !T)d3I.Jh.V DM33NDKK<L<L;M7NO)I)I)N)N)Pu)PAY]T]ZaeahZQQZ)Puv))$v,78
88J"Q&:? !66@\ BNO Q Q "-#&{'G'G#H R$((3+08L1M+ND'(.d:R<J89T)',~~'G'GD#$9Cr9Iq,-"T""/ vs   <	F	F	Fc                 H    UR                   UR                  UR                  4$ rM   )rm   rn   shared)r   metadatas     r!   pack_metadataCUDABackend.pack_metadata   s%    OO
 	
r$   c                     SS K Js  Js  Jn  [	        U R                  UR                  5      5      nUS:  a  UR                  OUR                  [        U R                  5      S.nU$ )Nr   r;   )convert_custom_typesr&   )triton.language.extra.cudar   extrar   r%   r   r'   convert_custom_float8_sm80convert_custom_float8_sm70r&   r   )r   rO   r   r`   codegen_fnss        r!   get_codegen_implementation&CUDABackend.get_codegen_implementation   sV    11))',,78
 0:R/?D++TEdEd%

 r$   c                     SSK Jn  SU0$ )Nr   )r   ztriton.language.extra.libdevice)r   r   )r   r   s     r!   get_module_mapCUDABackend.get_module_map   s    819==r$   c                     [         R                  " U5        [        R                  (       a   [        R                  R                  U5        g g rM   )r	   load_dialectsr   instrumentation)r   ctxs     r!   r   CUDABackend.load_dialects   s2    S!&&''55c: 'r$   c                    [         R                  " U R                  5      nUR                  5         [        R
                  R                  U5        [        R                  R                  U5        US-  S:  a  [        R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        UR!                  U S5        U $ )Nr>   	   	make_ttir)r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_symbol_dceadd_loop_unrollrun)modr   optr`   pms        r!   r   CUDABackend.make_ttir   s    __S[[)
!!"%..r2aKK@@D''+#))"-b!$$R(##B'
sK 
r$   c                    UR                   bI  U R                  S[        R                  " U R                  5      R                  UR                   5      5        [        R                  " U R                  5      nUR                  5       nUS-  S:  n[        R                  R                  USU 3UR                  SUR                  5        [        R                  R                  U5        [        R                  R                  XF5        [         R                  R"                  R%                  U5        [        R                  R'                  U5        [        R                  R)                  U5        [        R                  R+                  U5        [        R                  R'                  U5        [        R                  R-                  XCS:  5        [         R                  R"                  R/                  U5        [        R                  R1                  U5        US-  S;   GaC  [        R                  R3                  U5        [        R4                  R7                  U5        [        R                  R9                  U5        [        R4                  R7                  U5        [        R                  R;                  U5        [         R                  R<                  R?                  XBR@                  U5        [        R                  RC                  XBR@                  5        [        R                  RE                  U5        [        R                  RG                  XBR@                  U5        GOUS-  S:  Ga  [        R                  R3                  U5        [        R4                  R7                  U5        [        R                  R9                  U5        [        R                  RI                  U5        [        R                  RK                  US5        [         R                  R"                  RM                  U5        [        R                  RC                  XBR@                  5        [        R                  RE                  U5        [        R                  RO                  XBR@                  5        [        R                  RG                  XBR@                  U5        [        R                  RQ                  U5        [        R                  R;                  U5        [        R                  RK                  US	5        [         R                  R"                  RS                  U5        O[        R                  R9                  U5        [        R4                  R7                  U5        [        R                  R1                  U5        [        R                  RU                  U5        [        R                  R-                  XCS:  5        [        R                  RW                  U5        [         R                  R"                  RY                  U5        US-  S
:  a)  [         R                  R"                  R[                  U5        [        R                  R'                  U5        [         R                  R"                  R]                  U5        [        R                  R_                  U5        [        R                  Ra                  U5        [        R                  R1                  U5        [        R4                  Rc                  U5        [         R                  R"                  Re                  XC5        [         R                  R"                  Rg                  U5        [        R4                  Ri                  U5        [        R4                  Rk                  U5        [        R4                  R7                  U5        URm                  U S5        U Ro                  5       US'   U $ )Nzttg.maxnregr>   r   r   r   r;   )r   r   FTr   
make_ttgirtensordesc_meta)8rr   set_attrr   builderr   get_int32_attrr   r   r   r  add_convert_to_ttgpuirrm   rn   ttgpuiradd_coalesceadd_f32_dot_tcr	   	ttnvgpuiradd_plan_ctaadd_remove_layout_conversionsadd_optimize_thread_localityadd_accelerate_matmuladd_optimize_dot_operands add_optimize_descriptor_encodingadd_loop_aware_cseadd_fuse_nested_loopsr   r  add_triton_licm add_combine_tensor_select_and_ifhopperadd_hopper_warpspecrp   add_assign_latenciesadd_schedule_loopsadd_pipelineadd_optimize_accumulator_initadd_hoist_tmem_allocadd_promote_lhs_to_tmemadd_warp_specializeadd_optimize_partition_warpsadd_remove_tmem_tokensadd_prefetchadd_coalesce_async_copyadd_optimize_tmem_layoutsadd_tma_loweringadd_interleave_tmemadd_reduce_data_duplicationadd_reorder_instructionsr  add_fence_insertionadd_lower_mmaadd_sccpr  r
  get_tensordesc_metadata)r  r   r  r`   r  dump_enabledemuTF32s          r!   r  CUDABackend.make_ttgir   s    ;;"LL

3;;(?(N(Ns{{([\__S[[)(#q(**2zl/CS]]TVX[XdXde##B'%%b2,,R044R833B7,,R044R80025EF@@D&&r*v%NN004MM++B/KK''+MM++B/NN;;B?MM  44RVNN//NNCNN--b1NN''NNLI2#NN004MM++B/KK''+NN88<NN//E:MM##;;B?NN//NNCNN--b1NN..r>>BNN''NNLINN77;NN;;B?NN//D9MM##::2>KK''+''+&&r*##B'0025EF..r299"=q MM##44R844R833B72226//3&&r*$$R(33BC--b1r"b!''+
sL!&)&A&A&C"#
r$   c                    Un[         R                  " UR                  5      nUR                  5         [        R
                  R                  U5        [        R
                  R                  U5        [        R
                  R                  U5        [        R                  R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R                  R!                  U5        [        R
                  R                  U5        [        R"                  R%                  U5        UR'                  US5        UR)                  5       US'   U$ )Ngluon_to_ttgirr  )r   r   r   r   r   gluonr   add_infer_coalesced_encodingsadd_resolve_auto_encodingsr	   r  r2  r  r   r8  r  r   r  r#  r
  r9  )r   srcr   rO   r`   r  r  s          r!   r>  CUDABackend.gluon_to_ttgir@  s    __S[[)
  $2226//3004&&r*r"&&r*&&r*77;
s$%&)&A&A&C"#
r$   c                 4   [        X0R                  R                  5      nUn[        R                  " UR
                  5      nUR                  5         [        R                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R                  R                  R!                  XtU5        [        R                  R"                  R%                  U5        [        R                  R"                  R'                  U5        [(        R*                  R,                  S:X  a  [        R                  R/                  U5        [        R                  R1                  U5        [        R                  R"                  R3                  Xt5        [4        R6                  (       a*  [4        R6                  R9                  SXvR
                  5        [        R                  R                  R;                  XtU5        [        R<                  R?                  U5        [        R<                  RA                  U5        [        R                  R"                  RC                  U5        [        R                  R"                  RE                  U5        [        R<                  R?                  U5        [        R<                  RA                  U5        [        R<                  RG                  U5        [        R                  RI                  U5        [(        R*                  RJ                  (       d>  [(        R*                  RL                  (       d  [        RN                  RQ                  U5        [4        R6                  (       a*  [4        R6                  R9                  SXvR
                  5        URS                  US5        [(        R*                  RL                  (       a  [(        R*                  RJ                  (       da  [        R                  " UR
                  5      nUR                  5         [        RN                  RQ                  U5        URS                  US5        [        R                  " UR
                  5      nUR                  5         [        RN                  RU                  U5        URS                  US5        [V        RX                  " 5         [V        R
                  " 5       n[(        R*                  RZ                  (       a  []        S5      e[V        R^                  " Xh5      n	[a        U5      n
[c        X0R                  R                  5      nSn[        Rd                  " 5         [V        Rf                  " XX5        URh                  (       a  [        Rj                  " U	5        URl                  (       aQ  [        Rn                  " U	5      (       a6  URl                   VVs/ s H  u  pUPM	     nnn[V        Rp                  " X5        [V        Rr                  " U	[V        Rt                  5        URw                  S	5      nUb  UUS
'   URw                  S5      US'   URw                  S5      US'   URw                  S5      US'   URw                  S5      US'   URw                  S5      =(       d    SUS'   URw                  S5      =(       d    SUS'   [y        U	5      nA	AU$ s  snnf )Nr   ttgpuir_to_llvmirllvmir_to_llvm	make_llirzmake_llir.disable_line_infoz,make_llir.dump_ir_extract_di_local_variableszYAddress Sanitizer Error: Address sanitizer is currently only supported on the AMD backendnvptx64-nvidia-cudazttg.total-num-warpsrm   z
ttg.sharedr   zttg.tensor_memory_size	tmem_sizezttg.global_scratch_memory_sizeglobal_scratch_sizez#ttg.global_scratch_memory_alignmentglobal_scratch_alignzttg.profile_scratch_memory_sizer   profile_scratch_sizez$ttg.profile_scratch_memory_alignmentr   profile_scratch_align)=rP   r   r'   r   r   r   r   r   r  r#  add_allocate_warp_groupsconvertadd_scf_to_cfr?  r   r	   add_allocate_shared_memory_nvr  add_allocate_tensor_memoryadd_check_matmul_two_ctar
   compilationr   add_concurrency_sanitizer"add_allocate_global_scratch_memoryadd_proxy_fence_insertionr   r   patchadd_to_llvmirr   r  r  add_nvgpu_to_llvmadd_warp_specialize_to_llvmr  add_nvvm_to_llvmdisable_line_info"dump_ir_extract_di_local_variablesllvmiradd_di_scoper
  add_di_local_variabler   init_targetsenable_asanrF   	to_modulerg   rV   set_short_ptrattach_datalayoutrv   set_nvvm_reflect_ftzr   has_extern_depslink_extern_libsoptimize_moduleOPTIMIZE_O3get_int_attrrC   )r   rB  r   rO   r`   rN   r  r  r   llvm_modprocrU   tripler   r2   pathstotal_num_warpsrets                     r!   rG  CUDABackend.make_llirS  s   27KK<L<LM__S[[)
77;//3$$R(  $;;BKX::2>88<11X=NN44R899"=99"I&&''--.A2{{S++BKH''+b!11"5;;B?''+b!$$R(''+  225;L;L;o;oMM&&r*&&''--.>KKP
sK ??$$66__S[[1!**2.s9: -BOOMM//3FF3FG 	,,.((km m>>#/&z2)9)9:&x@%%''16#9#9(#C#C.5.A.AB.AltT.AEB!!(2Xt'7'78 **+@A&$3H[! --l; # 0 01I J*-*:*:;[*\&'+.+;+;<a+b'(+.+;+;<]+^+cbc'(,/,<,<=c,d,ihi()(m
' Cs   \c           	         [        X0R                  R                  5      nSn[        U5      n[	        X0R                  R                  5      nS/n	[
        R                  " XXxXR                  S5      n
[        R                  " SU
5      n[        U5      S:X  d   eUS   US'   US-   S	US-   3n[        R                  " S
SU 3U
[        R                  S9n
[        R                  " SSU 3U
[        R                  S9n
[        R                  R                  (       d  [        R                  " SSU
5      n
[        R                   R"                  (       a  [%        S5        [%        U
5        U
$ )NrH  znvptx-mad-wide-optFz(.visible .entry ([a-zA-Z_][a-zA-Z0-9_]*)r   r   r   r>   r8   z\.version \d+\.\d+z	.version )flagsz\.target sm_\d+z.target sm_z,\s*debug|debug,\s*rc   z // -----// NVPTX Dump //----- //)rP   r   r'   rg   rV   r   translate_to_asmru   r   findalllensub	MULTILINEr
   rT  r^  r	   
dump_nvptxprint)r   rB  r   r  r`   rN   ro  rn  rU   ru  rr  namess               r!   make_ptxCUDABackend.make_ptx  s3   238H8HI&&z2[[%5%56%&##CH\H\^cd

FL5zQ 8$b);r>*:;ff*i},EsRTR^R^_ff';zl)CSPRP\P\]  CC &&/S9C<<""45#J
r$   c           
         [        U R                  R                  5      R                  n[        R
                  " SSSS9 n[        R
                  " SSSS9 nUR                  U5        UR                  5         UR                  S-   n/ n	[        R                  R                  (       a  U	SS	/-  n	O,[        R                  R                  (       a  U	S
/-  n	OU	S/-  n	UR                  (       a  / OS/n
[        U5      n[        R                  R                  (       a  SS/O/ nUR                   (       a  UR                   R#                  S5      O/ nU/U	QU
QSPUQUQSU 3PUR                  PSPUPn [$        R&                  " USSUS9  [        R                  R(                  (       a7  [+        UR                  5       n[-        UR/                  5       5        S S S 5        [0        R                  R3                  UR                  5      (       a   [0        R4                  " UR                  5        [0        R                  R3                  UR                  5      (       a   [0        R4                  " UR                  5        [+        US5       nUR/                  5       nS S S 5        [0        R                  R3                  U5      (       a  [0        R4                  " U5        S S S 5        S S S 5        W$ ! , (       d  f       GN= f! [$        R6                   Ga  n[+        UR                  5       nUR/                  5       nS S S 5        O! , (       d  f       O= f[0        R                  R3                  UR                  5      (       a   [0        R4                  " UR                  5        UR8                  S:X  a  SnO3UR8                  S[:        R<                  -   :X  a  SnOSUR8                   3nU SW SSR?                  U5       S3n[-        SU SU S35        [A        U5      eS nAff = f! , (       d  f       GN= f! , (       d  f       GNe= f! , (       d  f       W$ = f) NFwz.ptx)deletemoderf   rz.logz.oz	-lineinfoz-suppress-debug-infoz-gz--fmad=falsez--opt-level0 z-vz--gpu-name=z-oT)check	close_fdsstderr   z!Internal Triton PTX codegen error   z`ptxas` raised SIGSEGVz`ptxas` failed with error code z
`ptxas` stderr:
z
Repro command: 
zC

================================================================
z

zy
================================================================
please share the reproducer above with Triton project.
rX   )!r,   r   r'   r2   tempfileNamedTemporaryFilewriteflushr   r
   rT  r]  r	   disable_ptxas_optru   rg   rs   rE   r0   r
  dump_ptxas_logrY   r|  r\   osexistsremoveCalledProcessError
returncodesignalSIGSEGVr   r   )r   rB  r   r  r`   r+   fsrcflogfbin
debug_infofmadr'   disable_optptx_extra_options	ptxas_cmdlog_fileelogerrorr^   r   s                        r!   
make_cubinCUDABackend.make_cubin  s   $++**+00((COSW''u3vNRVJJsOJJL99t#DJ  22{,BCC
//tf$
 {m+
--2N3CD*:6D 38,,2P2P=#.VXK ?Boo 5 5c :SU "%)+/2=@QU`ae`fSgimirirI$(ydS<<..diiHhmmo. ) 77>>$)),,IIdii(77>>$)),,IIdii(: dD!Q "ww~~d##		$M O PP O ) 00 ($))_"--/C %__77>>$)),,IIdii(<<3&?E\\S6>>%994E=all^LE!7 #--0E 2++.88I+>*?rC       !''5(8 "!G ON POP s    QDP:4AL ?K.B$L =P:	P(AP:Q.
K=8L  P%P *M	;	P 
MCP  P%%P:(
P72P::
Q		Q
Qc                   ^ ^^ T R                  TR                  5      mU[        R                  :X  a  UUU 4S jUS'   UUU 4S jUS'   OU[        R                  :X  a  UUU 4S jUS'   UUU 4S jUS'   UU 4S jUS	'   UU 4S
 jUS'   [
        R                  R                  b$  [
        R                  R                  T UTUT5        g g )Nc                 *   > TR                  XTT5      $ rM   )r   rB  r   r`   rO   r   s     r!   <lambda>(CUDABackend.add_stages.<locals>.<lambda>  s    4>>#QXZd3er$   r  c                 *   > TR                  XTT5      $ rM   )r  r  s     r!   r  r    s    DOOCSZ\f4gr$   ttgirc                 *   > TR                  XTT5      $ rM   )r>  r  s     r!   r  r    s    D4G4GW^`j4kr$   c                 *   > TR                  XTT5      $ rM   )rG  r  s     r!   r  r     s    t~~cWV`/ar$   llirc                 R   > TR                  XTTR                  R                  5      $ rM   )r~  r   r'   rB  r   rO   r   s     r!   r  r  !  s    dmmC7TXT_T_TdTd.er$   ptxc                 R   > TR                  XTTR                  R                  5      $ rM   )r  r   r'   r  s     r!   r  r  "  s    wX\XcXcXhXh0ir$   r   )r   r'   r   TRITONGLUONr
   r   add_stages_inspection_hook)r   stagesrO   r   r`   s   ` ` @r!   
add_stagesCUDABackend.add_stages  s    %%gll3
x&eF6NgF7O'kF7Oaveuiw==33?MM44T67HV`a @r$   c                 v    [        U R                  R                  5      nU SU R                  R                   3$ )Nr   )r6   r   r'   )r   r5   s     r!   r   CUDABackend.hash&  s2    #DKK$4$45!DKK,,-..r$   )r   ) r   r   r   r   r   staticmethodr   r   r   rC   r   r   r   r   r   r   r   r   r   r   r   r  r>  rG  r~  r  r  	functools	lru_cacher   r   __classcell__)r   s   @r!   r   r      s    O(	 ( (#$# $"y "T "#S #>
>S*_ 5 >;
    G GR&^@4JXb / /r$   r   )r;   ),triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r	   tritonr
   triton.runtime.errorsr   dataclassesr   r  typingr   r   r   r   typesr   rZ   r   r  r  r  r0   pathlibr   r&   r%   
NvidiaToolr,   r  r6   rK   rP   rV   r_   rg   rj   r   re   r$   r!   <module>r     s6   E E 8 8  , !  - -   	   	  # #OC OE,, O C   iS i i.  
 
 
 T4 4
& & $)? )? )?XS/+ S/r$   