
    9iQ                         S SK JrJrJr  S SKJrJrJrJr  S SK	J
r
  S SKJr  S SKJrJrJr  S SKJr  S SKrS SKrS SKrS SKrS SKrS SKJr  S	\4S
 jrS rS r\" SS9 " S S5      5       r " S S\5      rg)    )BaseBackend	GPUTargetLanguage)irpassesllvmamd)knobs)	dataclass)AnyDictTuple)
ModuleTypeN)Pathtargetc                     S $ )Nc                     g)N   r   r    )lhs_typerhs_types     \/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/triton/backends/amd/compiler.py<lambda>"get_min_dot_size.<locals>.<lambda>   s    i    r   r   s    r   get_min_dot_sizer      s
     0/r   c                     [         R                  R                  c  U S:H  =(       d    U S:H  =(       a    USL $ [         R                  R                  $ )Ngfx942gfx950T)r
   r	   use_block_pingpong)archuse_async_copys     r   is_pingpong_schedule_enabledr%      sI    --5 HM!1!Ln6L X;@99;W;WXr   c                 v    [         R                  R                  c  U S:H  $ [         R                  R                  $ )Nr    )r
   r	   use_in_thread_transposer#   s    r   is_in_thread_transpose_enabledr)      s.    !&!B!B!JDHqPUPYPYPqPqqr   T)frozenc                   h   \ rS rSr% Sr\\S'   Sr\\S'   Sr\\S'   Sr	\\S'   S	r
\\S
'   Sr\\S'   Sr\\S'   Sr\\S'   S	r\\S'   Sr\\   \S'   Sr\\   \S'   Sr\\S'   Sr\\   \S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S'   Sr\\S '   S!r\\S"'   S#r\\S$'   S%r \\S&'   S' r!S( r"Sr#g	))
HIPOptions      	num_warpsr   waves_per_eu   
num_stagesnum_ctasNextern_libsr   cluster_dimsFdebugTsanitize_overflowr#   )fp8e4nvfp8e5fp8e5b16fp8e4b8supported_fp8_dtypesr   !deprecated_fp8_dot_operand_dtypesieeedefault_dot_input_precision)r>   allowed_dot_input_precisionsenable_fp_fusionlaunch_cooperative_gridr   matrix_instr_nonkdimkpackallow_flush_denormmax_num_imprecise_acc_defaulthipbackend_name instrumentation_modenoneschedule_hintc                    [        U R                  SS 5      nUS:  a  SOSn[        R                  U SU5        U R                  S:  a   U R                  U R                  S-
  -  S:X  d   S	5       eU R                  S
:X  aK  U R
                  S:w  a;  [        R                  " SU R
                   S35        [        R                  U SS5        [        [        5      R                  S-  nU R                  c  0 O[        U R                  5      nS H  n[        X5 S3-  5      XE'   M     [        R                  U S[        UR                  5       5      5        g )N   
       @   	warp_sizer   r   znum_warps must be a power of 2r!   zckpack is deprecated starting from gfx950 and will be removed in later releases. So for now kpack = z7 will be overwritten to 1 to make transitioning easier.rD   lib)ocmlocklz.bcr4   )intr#   object__setattr__r/   rD   warningswarnr   __file__parentr4   dictstrtupleitems)self	gfx_majorrS   default_libdirr4   rT   s         r   __post_init__HIPOptions.__post_init__G   s<   		!B(	#r/Br	4i8~~!t~~!9K'LQR&R 	0/	0R II!

aMMuvz  wA  wA  vB  By  z tWa0h..6 ,,4b$t?O?O:P#C">e3K#?@K $4k6G6G6I0JKr   c           	          SR                  U R                  R                  5        VVs/ s H  u  pU SU 3PM     snn5      n[        R                  " UR                  S5      5      R                  5       $ s  snnf )N_-zutf-8)join__dict__ra   hashlibsha256encode	hexdigest)rb   namevalkeys       r   hashHIPOptions.hashZ   sa    hh9L9L9NO9NID4&#9NOP~~cjj12<<>> Ps   A7
)$__name__
__module____qualname____firstlineno__r/   rW   __annotations__r0   r2   r3   r4   r^   r5   r`   r6   boolr7   r#   r_   r<   r   r=   r?   r@   rA   rB   rC   rD   rE   rF   rH   rJ   rL   re   rs   __static_attributes__r   r   r   r,   r,      s   IsL#JHcK#L%#E4"t"D#
 (S%*R46%uSz6'--/9 %*9!d!$)T) !#!E3N$$)*!3*L# "#"  M3L&?r   r,   c                   Z  ^  \ rS rSrSr\S\4S j5       rS\SS4U 4S jjrS\	4S jr
S\4S jrS	 rS
 rS\\	\4   4S jrS r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       r\S 5       rS r\R<                  " 5       S 5       rSr U =r!$ )
HIPBackend_   Nr   c                      U R                   S:H  $ )NrG   )backendr   s    r   supports_targetHIPBackend.supports_targetb   s    ~~&&r   returnc                 t   > [         TU ]  U5        [        UR                  [        5      (       d   eSU l        g )Nhsaco)super__init__
isinstancer#   r_   
binary_ext)rb   r   	__class__s     r   r   HIPBackend.__init__f   s.     &++s++++!r   c                      SUR                    3$ )Nhip:r(   rb   optionss     r   get_target_nameHIPBackend.get_target_namek   s    gll^$$r   c                    S[         R                  R                  =(       d    U R                  R                  0nUR                  SS5      S:  a  [        S5      eU R                  R                  S:X  aB  [        [        R                  5      nUR                  S15        [        [        U5      5      US'   SU;  a%  [        [        [        R                  5      5      US'   U R                  R                  S	:X  aC  [        [        R                  5      nUR                  S
S15        [        [        U5      5      US'   SU;  a  [         R                  R                   US'   UR                  [        R"                  R%                  5        Vs0 s H  nXQ;   d  M
  X   c  M  XQU   _M     sn5        [        S0 UD6$ s  snf )Nr#   r3   r   z'num_ctas > 1 not supported for AMD GPUsr    tf32r@   r<   r!   r:   r;   r=   rA   r   )r
   runtimeoverride_archr   r#   get
ValueErrorsetr,   r@   updater`   sortedr<   r=   languagedefault_fp_fusion__dataclass_fields__keys)rb   optsargsr@   r=   ks         r   parse_optionsHIPBackend.parse_optionsn   sz   33Gt{{7G7GH88J"Q&FGG ;;x'+.z/V/V+W((//938@\9]3^D/0!-+0
8W8W1X+YD'(;;x'03J4`4`0a--44j)5LM8=fEf>g8hD45T)',~~'G'GD#$)H)H)M)M)O ;)OA	  &*g  QQZ)O ; 	<!D!!;s   	F<F<!	F<c                     UR                   UR                  UR                  UR                  S   UR                  S   UR                  S   4$ )Nr   r   r1   )r/   r3   sharedr5   )rb   metadatas     r   pack_metadataHIPBackend.pack_metadata   sO    OO!!!$!!!$!!!$
 	
r   c                 0    S[        U R                  5      0$ )Nmin_dot_size)r   r   r   s     r   get_codegen_implementation%HIPBackend.get_codegen_implementation   s     0 =>>r   c                     SSK Jn  SU0$ )Nr   )	libdeviceztriton.language.extra.libdevice)triton.language.extra.hipr   )rb   r   s     r   get_module_mapHIPBackend.get_module_map   s    719==r   c                     [         R                  " U5        [        R                  (       a   [        R                  R                  U5        g g N)r	   load_dialectsr}   instrumentation)rb   ctxs     r   r   HIPBackend.load_dialects   s2    #%%&&44S9 &r   c                     SS K nSn[        U S5      (       a  U R                  5       U:*  $ [        XR                  5      (       a2  [        U S5      (       a!  U R                  5       R                  5       U:*  $ g)Nr   i	ptr_rangeuntyped_storageF)torchhasattrr   r   Tensorr   size)argr   
MAX_INT_32s      r   is_within_2gbHIPBackend.is_within_2gb   sd    
3$$==?j00c<<((WS:K-L-L&&(--/:==r   c                 N    [         R                  " U 5      nSU ;   a  USS//-  nU$ )NSztt.pointer_rangerQ   )r   
parse_attr)descrets     r   r   HIPBackend.parse_attr   s1    $$T*$;',--C
r   c                     [         R                  " X40 UD6n[        R                  R                  (       a%  US:X  a  [
        R                  U 5      (       a  US-  nU$ )Ntensorr   )r   get_arg_specializationr
   r	   use_buffer_opsr}   r   )r   tykwargsr   s       r   r   !HIPBackend.get_arg_specialization   sK    00CFC 99##h:;S;STW;X;X3JC
r   c                    [         R                  " U R                  5      nUR                  5         [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R!                  U5        UR#                  U 5        U $ r   )r   pass_managercontextenable_debugr   commonadd_inlinerttiradd_rewrite_tensor_pointer(add_rewrite_tensor_descriptor_to_pointeradd_canonicalizeradd_combineadd_reorder_broadcastadd_cseadd_triton_licmadd_symbol_dceadd_loop_unrollrun)modr   r   pms       r   	make_ttirHIPBackend.make_ttir   s    __S[[)
!!"%..r2<<R@''+#))"-b!##B'$$R(##B'
s
r   c                    [         R                  " U R                  5      nUR                  5         [        R
                  R                  USUR                   3UR                  UR                  UR                  5        UR                  U 5        [         R                  " U R                  5      nUR                  5         [        R                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [         R                  R                  R#                  X2R                  UR$                  UR&                  5        [        R                  R                  U5        [         R                  R                  R)                  U5        [        R                  R+                  US5        [         R                  R                  R-                  U5        [        R                  R/                  U5        [        R0                  R3                  U5        [        R
                  R5                  U5        [        R0                  R3                  U5        [6        R                   R8                  n[6        R                   R:                  n[6        R                   R<                  n[?        UR                  U5      n[         R                  R                  RA                  X2RB                  XEUU5        U(       a3  [         R                  R                  RE                  X2R                  5        [        R0                  R3                  U5        URF                  RI                  5       S:w  a3  [         R                  R                  RK                  X2RF                  5        [        R                  R+                  US5        [        R                  R                  U5        [        R                  RM                  U5        [O        UR                  5      (       aH  [         R                  R                  RQ                  U5        [        R                  R                  U5        [         R                  R                  RS                  U5        U(       aC  URB                  S:  a3  [         R                  R                  RU                  X2RB                  5        [6        R                   RV                  (       a  [         R                  R                  RY                  U5        [        R0                  R3                  U5        [         R                  R                  R[                  X2R                  [6        R                   R\                  5        [         R                  R                  R_                  U5        [        R0                  R3                  U5        [        R0                  Ra                  U5        [        R0                  Rc                  U5        U(       a3  [         R                  R                  Re                  X2R                  5        UR                  U 5        U $ )Nr   TrK   r   )3r   r   r   r   r   r   add_convert_to_ttgpuirr#   r/   rS   r3   r   ttgpuiradd_coalesceadd_remove_layout_conversionsadd_optimize_thread_localityr	   add_accelerate_matmulrC   rD   add_optimize_epilogueadd_optimize_dot_operandsadd_hoist_layout_conversionsadd_fuse_nested_loopsr   r   r   r
   global_prefetchlocal_prefetchr$   r%   add_stream_pipeliner2   add_coalesce_async_copyrL   lowerinsert_instruction_sched_hintsadd_reduce_data_duplicationr)   add_in_thread_transposeadd_reorder_instructionsadd_block_pingpongr   add_canonicalize_pointersadd_convert_to_buffer_opsuse_buffer_atomicsadd_fold_true_cmpir   r   add_update_async_wait_count)r   r   r   r   r   r   r$   r"   s           r   
make_ttgirHIPBackend.make_ttgir   s   __S[[)
**2gll^/DgFWFWY`YjYj+2+;+;	=
s__S[[)
##B'44R833B7

00\\7C_C_ahanano44R8

00400T:

77;,,R0''+##B'''+))3311119',,W

..r3E3Ehv/A	CJJ66r<<H''+  &&(F2JJ==bBWBWX00T:44R82226)',,77JJ66r:NN88<

33B7'"4"4q"8JJ11"6H6HI99##JJ88<MM++B/JJ88\\599KgKgh

--b1''+b!$$R(JJ::2||L
s
r   c                     U n[         R                  " UR                  5      nUR                  5         [        R
                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        [        R                  R                  U5        [        R
                  R                  U5        [        R                  R                  U5        UR                  U5        U$ r   )r   r   r   r   r   gluonr   add_resolve_auto_encodingsr   add_sccpr   add_loop_aware_cser   r    add_combine_tensor_select_and_ifr   )srcr   r   r   r   s        r   gluon_to_ttgirHIPBackend.gluon_to_ttgir  s    __S[[)
  $//3r"&&r*&&r*77;
s
r   c                    U n[         R                  " UR                  5      nUR                  5         Sn[        R
                  R                  R                  XBR                  U5        [
        R                  R                  U5        [
        R                  R                  U5        [        R
                  R                  R                  U5        [        R                  (       a*  [        R                  R                  SXCR                  5        Sn[        R
                  R                  R!                  XBR                  U5        [
        R"                  R%                  U5        [
        R"                  R'                  U5        [
        R                  R)                  U5        [
        R                  R+                  U5        [
        R"                  R%                  U5        [
        R"                  R'                  U5        [
        R"                  R-                  U5        UR.                  R1                  5       S:w  a>  [        R
                  R                  R3                  XBR                  UR4                  5        [        R                  (       a*  [        R                  R                  SXCR                  5        [6        R8                  R:                  (       d  [
        R<                  R?                  U5        [        R
                  R                  RA                  XF5        URC                  U5        [D        RF                  " 5         [D        R                  " 5       n[D        RH                  " X75      n[        RJ                  " U5        Sn	[6        R8                  RL                  (       a  Sn	[D        RN                  " U[        RP                  UR                  U	5        [        RR                  " XR                  5        [        RT                  " US5        [        RV                  " US	S
5        [        RV                  " USS5        [        RV                  " USS
5        [        RV                  " USURX                  S:H  5        UR[                  5        V
s/ s H  oR]                  5       (       a  M  U
PM     nn
US   R_                  [        R`                  5        US   Rc                  SSURd                  URX                  -   35        US   Rc                  SURf                   5        URh                  (       a  SOSnUS   Rc                  SU5        [6        R8                  RL                  (       a'  US   Rk                  S5        US   Rm                  5         [        Rn                  " US   5        [6        R8                  RL                  (       a\  [q        [r        5      Rt                  S-  n[w        US-  5      [w        US-  5      [w        US-  5      /n[D        Rx                  " X5        OtURz                  (       ac  URz                   VVs/ s H%  u  nn[        R|                  " X5      (       d  M#  UPM'     nnn[        U5      S:  a  [D        Rx                  " X5        [D        R                  " U[D        R                  UR                  S/ UR                  5        [        R                  " UR                  5      (       a<  US   R                  S5        US   R                  S5        US   R                  S5        [6        R                  R                  (       a  [        R                  " US   5        U R                  S5      US'   U R                  S5      =(       d    SUS'   U R                  S 5      =(       d    S!US"'   [        R                  " U5        [        R                  " U5        [w        U5      $ s  sn
f s  snnf )#Nr   ttgpuir_to_llvmirTrK   llvmir_to_llvmrI   +xnacki  __oclc_finite_only_optF__oclc_correctly_rounded_sqrt32__oclc_unsafe_math_opt__oclc_wavefrontsize64rR   zamdgpu-flat-work-group-sizez1,zamdgpu-waves-per-euzpreserve-signr>   zdenormal-fp-math-f32rT   z
asanrtl.bczocml.bczockl.bczamdgpu-no-workgroup-id-xzamdgpu-no-workgroup-id-yzamdgpu-no-workgroup-id-zz
ttg.sharedr   zttg.profile_scratch_memory_sizeprofile_scratch_sizez$ttg.profile_scratch_memory_alignmentr   profile_scratch_align)Jr   r   r   r   r	   r   r   add_optimize_lds_usager#   convertadd_scf_to_cfadd_index_to_llvmiradd_allocate_shared_memoryr}   r   patchadd_to_llvmirr   r   r   add_cf_to_llvmiradd_arith_to_llvmirr   rL   r   lower_instruction_sched_hintsr2   r
   compilationdisable_line_infollvmiradd_di_scopeadd_builtin_func_to_llvmirr   r   init_targets	to_moduleattach_target_tripleenable_asanattach_datalayoutTARGET_TRIPLEset_isa_versionset_abi_versionset_bool_control_constantrS   get_functionsis_declarationset_calling_convCALLING_CONV_AMDGPU_KERNELadd_fn_attrr/   r0   rE   add_fn_target_featureadd_fn_asan_attrset_all_fn_arg_inregr   r\   r]   r_   link_extern_libsr4   need_extern_liblenoptimize_moduleOPTIMIZE_O3rA   has_architected_sgprsremove_fn_attrscalarize_packed_fops#add_scalarize_packed_fops_llvm_passget_int_attrcleanup_bitcode_metadatadisable_print_inline)r  r   r   r   r   custom_lds_size_HIPBackend__HIP_FTZr   llvm_modtarget_featuresfnfnsdenormal_moderd   pathsrp   paths                    r   	make_llirHIPBackend.make_llir  s   __S[[)
 

11"llOT$$R(**2.

55b9%%&&,,-@"kkR 	

((\\9E''+b!''+**2.''+b!$$R(  &&(F2JJ<<RwOaOab %%&&,,-=r;;O  22MM&&r*

55bD
s 	,,.>>#/  *((&Ox):):GLL/Z 	Hll3Hc*%%h0H%P%%h0QSWX%%h0H%P%%h0H'J[J[_aJab %224P4b<M<M<Or4PA > >?A8Bw?P?PQXQbQb?b>c:de 	A0W5I5I4JL+2+E+E6A1=A((F((2F##%
 	  Q(((!(^22U:NN\12NY./NY./E
 !!(2  .5.A.Ai.AltTSEXEXYaEhT.AEi5zA~%%h6Xt'7'7r2wOgOgh $$W\\22F!!"<=F!!"<=F!!"<=99**33CF; !--l;+.+;+;<]+^+cbc'(,/,<,<=c,d,ihi()$$X. 	  *8}w Q@ js   _9-_9
"_>0_>c           	         [         R                  " SU 5      n[        U5      S:X  d   eUS   US'   / nUR                  S:X  a  UR	                  S5        SUR
                  ;   a  SOS	n[        R                  " U [        R                  UR
                  XTUR                  S
5      n[        R                  R                  (       a  [        S5        [        U5        U$ )Nz3define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)r   r   rp   	attentionzsink-insts-to-avoid-spillsgfx11z-real-true16rI   Fz!// -----// AMDGCN Dump //----- //)refindallr6  rL   appendr#   r   translate_to_asmr	   r(  rA   r
   dump_amdgcnprint)r  r   r   namesflagsfeaturesamdgcns          r   make_amdgcnHIPBackend.make_amdgcn  s    
 

QSVW5zQ 8
   K/LL56%,%<>"&&sC,=,=w||X^e^v^v',.99  56&Mr   c                    Sn[         R                  R                  (       a  Sn[        R                  " XR
                  U5      n[        R                  " 5        n[        R                  " 5        n[        UR                  S5       nUR                  U5        S S S 5        [        R                  " UR                  UR                  5        S S S 5        [        UR                  S5       nUR                  5       n	S S S 5        S S S 5        W	$ ! , (       d  f       Nz= f! , (       d  f       NX= f! , (       d  f       N:= f! , (       d  f       W	$ = f)NrI   r  wbrb)r
   r  r&  r	   assemble_amdgcnr#   tempfileNamedTemporaryFileopenrp   write
link_hsacoread)
r  r   r   rC  r   tmp_outtmp_infd_infd_outr   s
             r   
make_hsacoHIPBackend.make_hsaco  s    ((&O##CG((*g,,.&&++t,KK& -v{{GLL9 / gllD)Vkkm * + 
 -, /. *) +* 
sT   E 0DD3DE *D/;E 
DD
D,	(E /
D=	9E  
Ec                    ^ ^ U[         R                  :X  a  UU 4S jUS'   UU 4S jUS'   OU[         R                  :X  a
  UU 4S jUS'   UU 4S jUS'   UU 4S jUS	'   UU 4S
 jUS'   g )Nc                 (   > TR                  XT5      $ r   )r   r  r   r   rb   s     r   r   'HIPBackend.add_stages.<locals>.<lambda>  s    4>>#QX3Yr   r   c                 (   > TR                  XT5      $ r   )r   rl  s     r   r   rm    s    DOOCSZ4[r   ttgirc                 (   > TR                  XT5      $ r   )r  rl  s     r   r   rm    s    D4G4GW^4_r   c                 (   > TR                  XT5      $ r   )rI  rl  s     r   r   rm    s    t~~cW/Ur   llirc                 (   > TR                  XT5      $ r   )rX  rl  s     r   r   rm    s    1A1A#QX1Yr   rW  c                 (   > TR                  XT5      $ r   )rh  rl  s     r   r   rm    s    w0Wr   r   )r   TRITONGLUON)rb   stagesr   r   s   ` ` r   
add_stagesHIPBackend.add_stages  sR    x&YF6N[F7O'_F7OUvYxWwr   c                     U R                    $ r   r   )rb   s    r   rs   HIPBackend.hash  s    ++r   )r   )"ru   rv   rw   rx   r   staticmethodr   r   r   r_   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  rI  rX  rh  rx  	functools	lru_cachers   r{   __classcell__)r   s   @r   r}   r}   _   sK   O'	 ' '"y "T "
%# %"S "4
?>S*_ 5 >
:
          8 8t   A AF  .  X    r   r}   )triton.backends.compilerr   r   r   triton._C.libtritonr   r   r   r	   tritonr
   dataclassesr   typingr   r   r   typesr   rl   r^  rN  r}  rZ   pathlibr   r   r%   r)   r,   r}   r   r   r   <module>r     sw    E E 5 5  ! # #    	   0Y 0X
r $=? =? =?@n  n r   