
    JiY                         S SK JrJrJr  S SKJrJrJrJr  S SK	J
r
  S SKJr  S SKJrJrJr  S SKJr  S SKrS SKrS SKrS SKrS SKrS SKJr  S	\4S
 jrS rS r\" SS9 " S S5      5       r " S S\5      rg)    )BaseBackend	GPUTargetLanguage)irpassesllvmamd)knobs)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                     S $ )Nc                     g)N)   r   r    )lhs_typerhs_types     ^/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/triton/backends/amd/compiler.py<lambda>"get_min_dot_size.<locals>.<lambda>   s    i    r   r   s    r   get_min_dot_sizer      s
     0/r   c                     [         R                  R                  c  U S:H  =(       d    U S:H  =(       a    USL $ [         R                  R                  $ )Ngfx942gfx950T)r
   r	   use_block_pingpong)archuse_async_copys     r   is_pingpong_schedule_enabledr$      sI    --5 HM!1!Ln6L X;@99;W;WXr   c                 v    [         R                  R                  c  U S:H  $ [         R                  R                  $ )Nr   )r
   r	   use_in_thread_transposer"   s    r   is_in_thread_transpose_enabledr(      s.    !&!B!B!JDHqPUPYPYPqPqqr   T)frozenc                   Z   \ rS rSr% Sr\\S'   Sr\\S'   Sr\\S'   Sr	\\S	'   S
r
\\S'   Sr\\S'   Sr\\S'   S
r\\S'   Sr\\   \S'   Sr\\   \S'   Sr\\S'   Sr\\   \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S '   S!r\\S"'   S#r\\S$'   S% rS& r Sr!g
)'
HIPOptions      	num_warpsr   waves_per_eu   
num_stagesr   num_ctasNextern_libsFdebugTsanitize_overflowr"   )fp8e4nvfp8e5fp8e5b16fp8e4b8supported_fp8_dtypesr   !deprecated_fp8_dot_operand_dtypesieeedefault_dot_input_precision)r<   bf16x3bf16x6allowed_dot_input_precisionsenable_fp_fusionlaunch_cooperative_gridmatrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_name instrumentation_modenoneschedule_hintc                    [        U R                  SS 5      nUS:  a  SOSn[        R                  U SU5        U R                  S:  a   U R                  U R                  S-
  -  S:X  d   S	5       eU R                  S
:X  aK  U R
                  S:w  a;  [        R                  " SU R
                   S35        [        R                  U SS5        [        [        5      R                  S-  nU R                  c  0 O[        U R                  5      nS H  n[        X5 S3-  5      XE'   M     [        R                  U S[        UR                  5       5      5        g )N   
       @   	warp_sizer   r   znum_warps must be a power of 2r    zckpack is deprecated starting from gfx950 and will be removed in later releases. So for now kpack = z7 will be overwritten to 1 to make transitioning easier.rD   lib)ocmlocklz.bcr3   )intr"   object__setattr__r.   rD   warningswarnr   __file__parentr3   dictstrtupleitems)self	gfx_majorrS   default_libdirr3   rT   s         r   __post_init__HIPOptions.__post_init__N   s<   		!B(	#r/Br	4i8~~!t~~!9K'LQR&R 	-,	-R II!

aMMuvz  wA  wA  vB  By  z tWa0h..6 ,,4b$t?O?O:P#C">e3K#?@K $4k6G6G6I0JKr   c           	          SR                  U R                  R                  5        VVs/ s H  u  pU SU 3PM     snn5      n[        R                  " UR                  S5      5      R                  5       $ s  snnf )N_-utf-8)join__dict__ra   hashlibsha256encode	hexdigest)rb   namevalkeys       r   hashHIPOptions.hasha   sa    hh9L9L9NO9NID4&#9NOP~~cjj12<<>> Ps   A7
)"__name__
__module____qualname____firstlineno__r.   rW   __annotations__r/   r1   r2   r3   r^   r4   boolr5   r"   r_   r:   r   r;   r=   r@   rA   rB   rC   rD   rE   rF   rH   rJ   rL   re   rt   __static_attributes__r   r   r   r+   r+      s    IsL#JHcKE4"t"D#
 (S%*R46%uSz6'--/K %*K!d!$)T) !#!E3N$$)*!3*L# "#"*  M3L&?r   r+   c                   ^  ^  \ rS rSrSrSr\S\4S j5       rS\SS4U 4S jjr	S\
4S jrS\4S	 jrS
 rS rS\\
\4   4S jrS r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       rS r\R>                  " 5       S 5       r Sr!U =r"$ )
HIPBackendf   NFr   c                      U R                   S:H  $ )NrG   )backendr   s    r   supports_targetHIPBackend.supports_targetj   s    ~~&&r   returnc                 t   > [         TU ]  U5        [        UR                  [        5      (       d   eSU l        g )Nhsaco)super__init__
isinstancer"   r_   
binary_ext)rb   r   	__class__s     r   r   HIPBackend.__init__n   s.     &++s++++!r   c                      SUR                    3$ )Nhip:r'   rb   optionss     r   get_target_nameHIPBackend.get_target_names   s    gll^$$r   c                    S[         R                  R                  =(       d    U R                  R                  0nUR                  SS5      S:  aQ  [        R                  " U R                  R                  5      (       d"  [        SU R                  R                   35      eU R                  R                  S:X  aB  [        [        R                  5      nUR                  S15        [        [        U5      5      US'   SU;  a%  [        [        [        R                  5      5      US'   U R                  R                  S	:X  aC  [        [        R                   5      nUR                  S
S15        [        [        U5      5      US'   SU;  a  [         R"                  R$                  US'   UR                  [        R&                  R)                  5        Vs0 s H  oUU;   d  M
  X   c  M  XQU   _M     sn5        [        S0 UD6$ s  snf )Nr"   r2   r   znum_ctas > 1 not supported on r   tf32r@   r:   r    r8   r9   r;   rA   r   )r
   runtimeoverride_archr   r"   getr	   supports_multi_cta_launch
ValueErrorsetr+   r@   updater`   sortedr:   r;   languagedefault_fp_fusion__dataclass_fields__keys)rb   optsargsr@   r;   ks         r   parse_optionsHIPBackend.parse_optionsv   s   33Gt{{7G7GH88J"Q&s/L/LT[[M]M]/^/^=dkk>N>N=OPQQ ;;x'+.z/V/V+W((//938@\9]3^D/0!-+0
8W8W1X+YD'(;;x'03J4`4`0a--44j)5LM8=fEf>g8hD45T)',~~'G'GD#$)H)H)M)M)Ou)OAX\S\ZaeahZQQZ)Ouv!D!! vs   	H H'	Hc                 H    UR                   UR                  UR                  4$ N)r.   r2   shared)rb   metadatas     r   pack_metadataHIPBackend.pack_metadata   s%    OO
 	
r   c                 0    S[        U R                  5      0$ )Nmin_dot_size)r   r   r   s     r   get_codegen_implementation%HIPBackend.get_codegen_implementation   s     0 =>>r   c                     SSK Jn  SU0$ )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )rb   r   s     r   get_module_mapHIPBackend.get_module_map   s    719==r   c                     [         R                  " U5        [        R                  (       a   [        R                  R                  U5        g g r   )r	   load_dialectsr~   instrumentation)rb   ctxs     r   r   HIPBackend.load_dialects   s2    #%%&&44S9 &r   c                     SS K nSn[        U S5      (       a  U R                  5       U:*  $ [        XR                  5      (       a2  [        U S5      (       a!  U R                  5       R                  5       U:*  $ g)Nr   i	ptr_rangeuntyped_storageF)torchhasattrr   r   Tensorr   size)argr   
MAX_INT_32s      r   is_within_2gbHIPBackend.is_within_2gb   sd    
3$$==?j00c<<((WS:K-L-L&&(--/:==r   c                 N    [         R                  " U 5      nSU ;   a  USS//-  nU$ )NSztt.pointer_rangerQ   )r   
parse_attr)descrets     r   r   HIPBackend.parse_attr   s1    $$T*$;',--C
r   c                     [         R                  " U 40 UD6n[        R                  R                  (       a  [
        R                  U 5      (       a  US-  nU$ )Nr   )r   get_tensor_specializationr
   r	   use_buffer_opsr~   r   )r   kwargsr   s      r   r   $HIPBackend.get_tensor_specialization   sB    33CB6B99##
(@(@(E(E3JC
r   c                    [         R                  " U R                  5      nUR                  5         [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R!                  U5        UR#                  U S5        U $ )N	make_ttir)r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_triton_licmadd_symbol_dceadd_loop_unrollrun)modr   r   pms       r   r   HIPBackend.make_ttir   s    __S[[)
!!"%..r2<<R@''+#))"-b!##B'$$R(##B'
sK 
r   c                    [         R                  " U R                  5      nUR                  5         [        R
                  R                  USUR                   3UR                  UR                  UR                  5        UR                  U S5        [         R                  " U R                  5      nUR                  5         Sn[        R                  R                  U5        [        R                  R                  X45        [        R                  R                  U5        [        R                  R!                  U5        ["        R                  R                  R%                  X2R                  UR&                  UR(                  5        [        R                  R                  U5        ["        R                  R                  R+                  U5        ["        R                  R                  R-                  X2R                  5        ["        R                  R                  R/                  U5        [        R                  R1                  U5        [        R2                  R5                  U5        [        R
                  R7                  U5        [        R2                  R5                  U5        [8        R"                  R:                  n[=        UR                  U5      n["        R                  R                  R?                  X2R@                  5        ["        R                  R                  RC                  X5U5        U(       a3  ["        R                  R                  RE                  X2R                  5        [        R2                  R5                  U5        URF                  RI                  5       S:w  aK  URF                  RK                  S5       H,  n["        R                  R                  RM                  X75        M.     [        R                  R                  U5        [        R                  RO                  U5        [Q        UR                  5      (       aH  ["        R                  R                  RS                  U5        [        R                  R                  U5        ["        R                  R                  RU                  U5        U(       aC  UR@                  S:  a3  ["        R                  R                  RW                  X2R@                  5        [8        R"                  RX                  (       a  ["        R                  R                  R[                  U5        [        R2                  R5                  U5        ["        R                  R                  R]                  UUR                  [8        R"                  R^                  [8        R"                  R`                  5        ["        R                  R                  Rc                  U5        [        R2                  R5                  U5        [        R2                  Re                  U5        [        R2                  Rg                  U5        UR                  U S5        U Ri                  5       US'   U $ )	Nr   make_ttgir_earlyFrK   ,r   
make_ttgirtensordesc_meta)5r   r   r   r   r   r   add_convert_to_ttgpuirr"   r.   rS   r2   r   ttgpuiradd_coalesceadd_f32_dot_tcadd_remove_layout_conversionsadd_optimize_thread_localityr	   add_accelerate_matmulrC   rD   add_optimize_epilogueadd_optimize_dot_operandsadd_hoist_layout_conversionsadd_fuse_nested_loopsr   r   r   r
   r#   r$   add_schedule_loopsr1   add_pipelineadd_coalesce_async_copyrL   lowersplitinsert_instruction_sched_hintsadd_reduce_data_duplicationr(   add_in_thread_transposeadd_reorder_instructionsadd_block_pingpongr   add_canonicalize_pointersadd_convert_to_buffer_opsuse_buffer_atomics%buffer_ops_analyze_small_tensor_rangeadd_fold_true_cmpir   r   get_tensordesc_metadata)r   r   r   r   emuTF32r#   r!   hints           r   r   HIPBackend.make_ttgir   s   __S[[)
**2gll^/DgFWFWY`YjYj+2+;+;	=
s&'__S[[)
##B'%%b244R833B7

00\\7C_C_ahanano44R8

004

44RF

77;,,R0''+##B'''+119',,W

--b2D2DE

''<NOJJ66r<<H''+  &&(F2--33C8

""AA"K 944R82226)',,77JJ66r:NN88<

33B7'"4"4q"8JJ11"6H6HI99##JJ88<MM++B/JJ88		,,		??	 	

--b1''+b!$$R(
sL!&)&A&A&C"#
r   c                 (   U n[         R                  " UR                  5      nUR                  5         [        R
                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        UR                  US5        UR!                  5       US'   U$ )Ngluon_to_ttgirr   )r   r   r   r   r   gluonr   add_resolve_auto_encodingsr   add_sccpr   add_loop_aware_cser   r    add_combine_tensor_select_and_ifr   r  )srcr   r   r   r   s        r   r	  HIPBackend.gluon_to_ttgir  s    __S[[)
  $//3r"&&r*&&r*77;
s$%&)&A&A&C"#
r   c                    U n[         R                  " UR                  5      nUR                  5         [        R
                  R                  R                  XBR                  5        Sn[        R
                  R                  R                  XBR                  U5        [
        R                  R                  U5        [
        R                  R                  U5        [
        R                  R                  U5        [        R
                  R                  R                  U5        [         R"                  (       a*  [         R"                  R%                  SXCR                  5        Sn[        R
                  R                  R'                  XBR                  U5        [
        R(                  R+                  U5        [
        R(                  R-                  U5        [
        R                  R/                  U5        [
        R                  R1                  U5        [
        R(                  R+                  U5        [
        R(                  R-                  U5        [
        R(                  R3                  U5        UR4                  R7                  5       S:w  a>  [        R
                  R                  R9                  XBR                  UR:                  5        [         R"                  (       a*  [         R"                  R%                  SXCR                  5        [<        R>                  R@                  (       d>  [<        R>                  RB                  (       d  [
        RD                  RG                  U5        [        R
                  R                  RI                  XF5        URK                  US5        [<        R>                  RB                  (       a  [<        R>                  R@                  (       da  [         R                  " UR                  5      nUR                  5         [
        RD                  RG                  U5        URK                  US5        [         R                  " UR                  5      nUR                  5         [
        RD                  RM                  U5        URK                  US5        [N        RP                  " 5         [N        R                  " 5       n[N        RR                  " X75      n[        RT                  " U5        S	n	[<        R>                  RV                  (       a  S
n	[N        RX                  " U[        RZ                  UR                  U	5        [        R\                  " XR                  5        [        R^                  " US5        [        R`                  " USS5        [        R`                  " USS5        [        R`                  " USS5        [        R`                  " USURb                  S:H  5        URe                  5        V
s/ s H  oRg                  5       (       a  M  U
PM     nn
US   Ri                  [        Rj                  5        US   Rm                  SSURn                  URb                  -   35        SUR4                  Rq                  S5      ;   a  US   Rm                  SS5        US   Rm                  SS5        US   Rm                  SURr                   SURr                   35        URt                  (       a  SOSnUS   Rm                  SU5        [<        R>                  RV                  (       a'  US   Rw                  S
5        US   Ry                  5         [        Rz                  " US   5        [<        R>                  RV                  (       a\  [}        [~        5      R                  S-  n[        US -  5      [        US!-  5      [        US"-  5      /n[N        R                  " X5        OtUR                  (       ac  UR                   VVs/ s H%  u  nn[        R                  " X5      (       d  M#  UPM'     nnn[        U5      S:  a  [N        R                  " X5        [N        R                  " U[N        R                  UR                  S	/ UR                  5        [        R                  " UR                  5      (       a<  US   R                  S#5        US   R                  S$5        US   R                  S%5        [<        R                  R                  (       a  [        R                  " US   5        U R                  S&5      US''   U R                  S(5      =(       d    SUS)'   U R                  S*5      =(       d    S+US,'   [        R                  " U5        [        R                  " U5        [        U5      $ s  sn
f s  snnf )-Nr   ttgpuir_to_llvmirTrK   llvmir_to_llvm	make_llirzmake_llir.disable_line_infoz,make_llir.dump_ir_extract_di_local_variablesrI   +xnacki  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rR   zamdgpu-flat-work-group-sizez1,zmemory-bound-attentionr   zamdgpu-sched-strategyziterative-ilpzuniform-work-group-sizetruezamdgpu-waves-per-euz, zpreserve-signr<   zdenormal-fp-math-f32rT   z
asanrtl.bczocml.bczockl.bczamdgpu-no-workgroup-id-xzamdgpu-no-workgroup-id-yzamdgpu-no-workgroup-id-zz
ttg.sharedr   zttg.profile_scratch_memory_sizeprofile_scratch_sizez$ttg.profile_scratch_memory_alignmentr   profile_scratch_align)Pr   r   r   r   r	   r   r   add_update_async_wait_countr"   add_optimize_lds_usageconvertadd_scf_to_cfr
  r   add_index_to_llvmiradd_allocate_shared_memoryr~   r   patchadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   rL   r   lower_instruction_sched_hintsr1   r
   compilationdisable_line_info"dump_ir_extract_di_local_variablesllvmiradd_di_scopeadd_builtin_func_to_llvmirr   add_di_local_variabler   init_targets	to_moduleattach_target_tripleenable_asanattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrS   get_functionsis_declarationset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr.   r   r/   rE   add_fn_target_featureadd_fn_asan_attrset_all_fn_arg_inregr   r\   r]   r_   link_extern_libsr3   need_extern_liblenoptimize_moduleOPTIMIZE_O3rA   has_architected_sgprsremove_fn_attrscalarize_packed_fops#add_scalarize_packed_fops_llvm_passget_int_attrcleanup_bitcode_metadatadisable_print_inline)r  r   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   llvm_modtarget_featuresfnfnsdenormal_moderd   pathsrq   paths                    r   r  HIPBackend.make_llir  s   __S[[)


66r<<H 

11"llOT$$R(  $**2.

55b9%%&&,,-@"kkR 	

((\\9E''+b!''+**2.''+b!$$R(  &&(F2JJ<<RwOaOab %%&&,,-=r;;O  225;L;L;o;oMM&&r*

55bD
sK ??$$66__S[[1!**2.s9: -BOOMM//3FF3FG 	,,.>>#/  *((&Ox):):GLL/Z 	Hll3Hc*%%h0H%P%%h0QSWX%%h0H%P%%h0H'J[J[_aJab %224P4b<M<M<Or4PA > >?A8Bw?P?PQXQbQb?b>c:de#w'<'<'B'B3'GGF6HA4f= 	A0W5I5I4J"WMaMaLb2cd+2+E+E6A1=A((F((2F##%
 	  Q(((!(^22U:NN\12NY./NY./E
 !!(2  .5.A.Ai.AltTSEXEXYaEhT.AEi5zA~%%h6Xt'7'7r2wOgOgh $$W\\22F!!"<=F!!"<=F!!"<=99**33CF; !--l;+.+;+;<]+^+cbc'(,/,<,<=c,d,ihi()$$X. 	  *8}A QJ js   gg"g9gc           	         [         R                  " SU 5      n[        U5      S:X  d   eUS   US'   / nSUR                  ;   a  SOSn[        R
                  " U R                  S5      5      R                  5       nUS   S	-   U-   n[        R                  " U [        R                  UR                  XTUR                  U5      n[        R                  " U [        R                  UR                  XTUR                  U5        [        R                  " U [        R                  UR                  XTUR                  S
5      n	[        R                  R                   (       a  [#        S5        [#        U	5        U	$ )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   rq   gfx11z-real-true16rI   rj   rh   Fz!// -----// AMDGCN Dump //----- //)refindallrB  r"   rm   rn   ro   rp   r   translate_to_mirr	   r4  rA   dump_sched_dagtranslate_to_asmr
   dump_amdgcnprint)
r  r   r   namesflagsfeaturesir_hashdump_file_idrh   amdgcns
             r   make_amdgcnHIPBackend.make_amdgcn  s'   
 

QSVW5zQ 8%,%<>"..G!45??AQx#~/!!#s'8'8',,Y`YqYq".0C!2!2GLL(SZSkSk(	*&&sC,=,=w||X^e^v^v',.99  56&Mr   c                    Sn[         R                  R                  (       a  Sn[        R                  " XR
                  U5      n[        R                  " 5        n[        R                  " 5        n[        UR                  S5       nUR                  U5        S S S 5        [        R                  " UR                  UR                  5        S S S 5        [        UR                  S5       nUR                  5       n	S S S 5        S S S 5        W	$ ! , (       d  f       Nz= f! , (       d  f       NX= f! , (       d  f       N:= f! , (       d  f       W	$ = f)NrI   r  wbrb)r
   r(  r2  r	   assemble_amdgcnr"   tempfileNamedTemporaryFileopenrq   write
link_hsacoread)
r  r   r   rO  r   tmp_outtmp_infd_infd_outr   s
             r   
make_hsacoHIPBackend.make_hsaco  s    ((&O##CG((*g,,.&&++t,KK& -v{{GLL9 / gllD)Vkkm * + 
 -, /. *) +* 
sT   E 0DD3DE *D/;E 
DD
D,	(E /
D=	9E  
Ec                 P  ^ ^ U[         R                  :X  a  UU 4S jUS'   UU 4S jUS'   OU[         R                  :X  a
  UU 4S jUS'   UU 4S jUS'   UU 4S jUS	'   UU 4S
 jUS'   [        R                  R
                  b$  [        R                  R                  T UTUS 5        g g )Nc                 (   > TR                  XT5      $ r   )r   r  r   r   rb   s     r   r   'HIPBackend.add_stages.<locals>.<lambda>  s    4>>#QX3Yr   r   c                 (   > TR                  XT5      $ r   )r   ry  s     r   r   rz    s    DOOCSZ4[r   ttgirc                 (   > TR                  XT5      $ r   )r	  ry  s     r   r   rz    s    D4G4GW^4_r   c                 (   > TR                  XT5      $ r   )r  ry  s     r   r   rz    s    t~~cW/Ur   llirc                 (   > TR                  XT5      $ r   )re  ry  s     r   r   rz    s    1A1A#QX1Yr   rd  c                 (   > TR                  XT5      $ r   )ru  ry  s     r   r   rz    s    w0Wr   r   )r   TRITONGLUONr
   r   add_stages_inspection_hook)rb   stagesr   r   s   ` ` r   
add_stagesHIPBackend.add_stages  s    x&YF6N[F7O'_F7OUvYxWw==33?MM44T67HVZ[ @r   c                     U R                    $ r   r   )rb   s    r   rt   HIPBackend.hash  s    ++r   )r   )#rv   rw   rx   ry   r   %supports_native_tensor_specializationstaticmethodr   r   r   r_   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r	  r  re  ru  r  	functools	lru_cachert   r|   __classcell__)r   s   @r   r~   r~   f   sQ   O,1)'	 ' '"y "T "
%# %"S "2
?>S*_ 5 >
:
          < <|    Z Zx  .  
\    r   r~   )triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r	   tritonr
   dataclassesr   typingr   r   r   typesr   rm   rk  rX  r  rZ   pathlibr   r   r$   r(   r+   r~   r   r   r   <module>r     sz    E E 5 5  ! # #    	   0Y 0X
r $D? D? D?NI  I r   