
    9iW                       S SK Jr  S SKrS SKJrJrJrJrJrJ	r	J
r
  S SKrS SKJr  SSKJr  SSKJr  \" S	5      r\" S
5      r " S S\5      r " S S\	\   5      rg)    )annotationsN)ListOptionalSequenceTupleTypeVarGenericType)driver   )ir   )coreTTensorTyc                  (   ^  \ rS rSrU 4S jrSrU =r$ )IncompatibleTypeErrorImpl   c                   > Xl         X l        SU R                   R                  5       -   S-   U R                  R                  5       -   U l        [        [
        U ]  U R                  5        g )Nzinvalid operands of type  and )type_atype_b__repr__messagesuperr   __init__)selfr   r   	__class__s      X/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/triton/language/semantic.pyr   "IncompatibleTypeErrorImpl.__init__   sT    2T[[5I5I5KKgUX\XcXcXlXlXnn'7E    )r   r   r   )__name__
__module____qualname____firstlineno__r   __static_attributes____classcell__)r   s   @r   r   r      s    F Fr!   r   c                  >   \ rS rSr% \R
                  rS\S'   \rS\S'   S rS~S jr	S~S jr
SS	 jr    SS
 jrSSS jjrSS jr  S SS jjrSS jr    SS jr    SS jr    SS jrSS jrSS jrSS jrSS jrSS jrSS jrSS jrSS jrSS jrSS jrSS jrSS jrSS jr SS jr!SS  jr"SS! jr#SS" jr$SS# jr%SS$ jr&SS% jr'SS& jr(SS' jr)SS( jr*SS) jr+SS* jr,SS+ jr-SS, jr.S-S..SS/ jjr/SS0 jr0SS1 jr1SS2 jr2SS3 jr3SS4 jr4SS5 jr5SS6 jr6SS7 jr7SS8 jr8SS9 jr9SS: jr:SS; jr;SS< jr<SS= jr=SS> jr>SSS? jjr?S@ r@SA rASB rBSC rCSD rDSE rESF rFSG rGSH rH          SSI jrI    SSJ jrJSSK jrKSSL jrLSSM jrMSN rNSO rOSSP jrPSSQ jrQSSR jrRSSS jrSSST jrTSSU jrUSSV jrVSW rWSX rX    SSY jrYSSZ jrZ    SS[ jr[SS\ jr\SS] jr]SS^ jr^SS_ jr_SS` jr`SSa jraSSb jrbSSc jrcSd rd      SSe jreSSf jrfSSg jrg                SSh jrhSSi jriSj rjSSk jrk    SSl jrlSSm jrmSn rn  SSo jroSSp jrpSSq jrqSSr jrrSSs jrsSSt jrtSSu jruSSv jrvSSw jrwSx rxSSy jrySSz jrzSS{ jr{ S     SS| jjr|S}r}g-)TritonSemantic   zType[TensorTy]tensorz
ir.builderbuilderc                    Xl         g Nr,   )r   r,   s     r   r   TritonSemantic.__init__   s    r!   c                    US;  a  [        SU 35      eU R                  U R                  R                  U5      [        R
                  5      $ )Nr   r   r   z+program_id axis must be 0, 1, or 2 but got )
ValueErrorr+   r,   create_get_program_idtlint32r   axiss     r   
program_idTritonSemantic.program_id&   sA    y J4&QRR{{4<<==dCRXXNNr!   c                    US;  a  [        SU 35      eU R                  U R                  R                  U5      [        R
                  5      $ )Nr2   z-num_programs axis must be 0, 1, or 2 but got )r3   r+   r,   create_get_num_programsr5   r6   r7   s     r   num_programsTritonSemantic.num_programs+   sA    y LTFSTT{{4<<??ErxxPPr!   c                d   UR                   nUR                   nUR                  nUR                  nXV:X  a	  X4:  a  U$ U$ U[        R                  R                  R
                  :X  a	  X4:  a  U$ U$ U[        R                  R                  R
                  :X  a	  XC:  a  U$ U$ [        SU SU 35      e)Nzunexpected signedness r   )int_bitwidthint_signednessr5   dtype
SIGNEDNESSUNSIGNED	TypeError)r   a_tyb_tya_rankb_ranka_snb_sns          r   integer_promote_impl#TritonSemantic.integer_promote_impl4   s    """""""" <!?444RXX((111!+455RXX((111!+4550eD6BCCr!   c                r   X$:w  a  U(       a  X4OX14u  pgUR                  5       R                  UR                  5       R                  ::  a=  U(       a4  U[        R                  [        R                  4;   a  [        R
                  $ U$ UR                  5       (       d  UR                  5       (       a  [        R                  $ UR                  5       (       d  UR                  5       (       a  [        R
                  $ UR                  5       (       d  UR                  5       (       a'  U(       a  [        R
                  $ [        R                  $ UR                  5       (       a<  UR                  5       (       a'  U(       a  [        R
                  $ [        R                  $ UR                  5       (       d  UR                  5       (       a  [        R
                  $ UR                  5       (       a,  UR                  5       (       a  X:X  a  U$ [        R                  $ UR                  5       (       a  UR                  5       (       d  [        SU SU 35      eU(       aM  UR                  UR                  :w  a3  [        SUR                  5       -   S-   UR                  5       -   S-   5      eU R!                  X5      $ )Nunexpected type r   zCannot use /, #, or % with x because they have different signedness;this is unlikely to result in a useful answer. Cast them to the same signedness.)kindvaluer5   float16bfloat16float32is_fp64float64is_fp32is_fp16is_bf16is_fp8is_intrE   rA   r   rL   )r   rF   a_is_scalarrG   b_is_scalar
div_or_mod	scalar_ty	tensor_tys           r   computation_type_impl$TritonSemantic.computation_type_implC   s   
 %3>D<TL I~~%%)9)?)??9R[[0I#I::%   <<>>T\\^^:: <<>>T\\^^:: <<>>T\\^^zz!zz!<<>>dllnnzz!{{"<<>>T\\^^::;;==T[[]]<47RZZ7{{}}DKKMM.tfE$@AA $--1D1DD9DMMOKgUX\XeXeXggoo p p ((44r!   c                   [        U[        5      (       a9  U R                  U R                  R	                  U5      [
        R                  5      $ [        U[        5      (       a  SUs=::  a  S:  a  O  O[
        R                  nOrSUs=::  a  S:  a  O  O[
        R                  nOQSUs=::  a  S:  a  O  O[
        R                  nO0SUs=::  a  S:  a  O  O[
        R                  nO[        SU S35      eU R                  XS	9$ [        U[        5      (       ar  S
nSSS-  -  n[        S   " U5      nU[        S5      :X  d  US:X  d  X:w  d  XFs=::  a  U::  a  O  O[
        R                   nO[
        R"                  nU R                  XS	9$ [        U[
        R$                  5      (       a  U R'                  UR(                  5      $ [        XR                  5      (       a  U$ U(       a  [+        SU S[-        U5       S35      eU$ )N           l                             l            zNonrepresentable integer .rB   g      8g   ?r      absinfg        zcannot convert z	 of type z
 to tensor)
isinstanceboolr+   r,   get_int1r5   int1intr6   uint32int64uint64r3   scalar_constantfloat__builtins__rU   rW   	constexpr	to_tensorrR   rE   type)r   x
check_typerB   min_float32max_float32abs_xs          r   rz   TritonSemantic.to_tensoru   s   a;;t||44Q7AA3"U"!#e#		1$u$!#e#		 #<QCq!ABB'''775!!!K%C/K '*Ee$|v2{2



'''772<<((>>!''**;;''HoaS	$q'*MNNr!   c                    UR                  5       (       aX  U(       d  [        X5      eUR                  5       (       a  X:w  a  [        X5      eUR                  5       (       a  [        X5      eg g r.   )is_ptrr   is_floating)r   r   r   allow_ptr_as       r   check_ptr_type_impl"TritonSemantic.check_ptr_type_impl   sZ    ==??/??}}F$4/??!!##/?? $ r!   c                   [        U[        R                  5      n[        U[        R                  5      nU(       a  Un	U R                  U5      nU(       a  Un
U R                  U5      nUR                  R
                  nUR                  R
                  nU R                  XU5        U R                  XU5        U(       Gay  UR                  5       (       Gdc  UR                  5       (       GdM  U R                  XXU5      nU(       a  W	S:  a  UR                  5       (       d"  U(       a&  W
S:  a   UR                  5       (       a  [        S5      eUR                  5       (       a  U(       a<  UR                  5       W	s=::  a  UR                  5       ::  d  O  [        SU	 SU 35      eU(       a<  UR                  5       W
s=::  a  UR                  5       ::  d  O  [        SU
 SU 35      eU(       a  U R                  W	US9OU R                  X5      nU(       a  U R                  W
US9OU R                  X-5      nU R!                  X5      u  pX4$ )Nr   z{Cannot perform a binary operation between an unsigned tensor and a negative scalar. Perform a explicit cast on one of them.zScalar z is out of range for type rj   )rn   numbersNumberrz   r{   scalarr   r   rb   is_int_unsignedr3   r\   get_int_min_valueget_int_max_valuerv   castbroadcast_impl_value)r   lhsrhsallow_lhs_ptrallow_rhs_ptrarithmetic_checkr_   lhs_is_scalarrhs_is_scalar
lhs_scalar
rhs_scalar
lhs_sca_ty
rhs_sca_ty
ret_sca_tys                 r   binary_op_type_checking_impl+TritonSemantic.binary_op_type_checking_impl   s    #37"37J..%CJ..%C XX__
XX__
  G  GJ$5$5$7$7
@Q@Q@S@S33JzjtuJ*q.Z5O5O5Q5Q$aJ<V<V<X<X  "K L L  "" **F*F*HJ +I*4*F*F*H+I$wzl:TU_T`%abb **F*F*HJ +I*4*F*F*H+I$wzl:TU_T`%abbHU$&&z&D[_[d[deh[uCHU$&&z&D[_[d[deh[uC ,,S6xr!   c                :   UR                   R                  R                  S:  d%  U R                  R                  R
                  (       d  g UR                   R                  nUR                   R                  nXE:X  d   eUR                  5       (       d   eU R                  U[        R                  5      nU R                  U[        R                  5      nU" XS5      nUR                  5       nU R                  U[        R                  5      nUR                  5       nU R                  U[        R                  5      nU R                  U R                  Xg5      U R                  Xh5      5      n	SUR                   SUR                    3n
U R#                  XS 5        g )N@   Frr   z! overflow detected for operation )r{   r   r@   r,   optionssanitize_overflowr\   r   r5   rt   r   rv   r   and_
less_equalgreater_equalr"   device_assert)r   r   r   	binary_opr   r   ret	max_value	min_valuecondmsgs              r    binary_op_sanitize_overflow_impl/TritonSemantic.binary_op_sanitize_overflow_impl   s9   88??''2-T\\5I5I5[5[XX__
XX__
'''  """"iiRXX&iiRXX&%(002	((BHH=	002	((BHH=	yy8$:L:LS:\]J++,,MiN`N`Mab4d+r!   c                   U R                  XSS5      u  pUR                  R                  nUR                  R                  nUR                  5       (       a   UR                  5       (       a  [	        S5      eUR                  5       (       aC  UR                  5       (       d.  X!p!UR                  R                  nUR                  R                  nUR                  5       (       a  UR
                  nUR                  R                  5       (       a  UR                  R                  S:  ai  UR                  R                  [        R                  5      R                  U R                  5      nU R                  R                  UR
                  US5      nU R                  U R                  R!                  UR
                  U5      UR                  5      $ UR#                  5       (       aJ  U R                  U R                  R%                  UR
                  UR
                  5      UR                  5      $ UR'                  5       (       am  U(       a  U R)                  XU R*                  5        U R                  U R                  R-                  UR
                  UR
                  5      UR                  5      $ [	        SU 35      e)NTzcannot add pointers togetherr   FrO   )r   r{   r   r   rE   handlerB   r   r@   with_element_tyr5   rt   to_irr,   create_int_castr+   create_addptrr   create_faddr\   r   add
create_add)r   inputotherr   input_scalar_tyother_scalar_tyother_handlei64_tys           r   r   TritonSemantic.add   s   88tTR**++**++!!##(>(>(@(@:;; !!##O,B,B,D,D 5#jj//O#jj//O!!## <<L{{**,,1I1IB1N33BHH=CCDLLQ#||;;ELL&RWX;;t||99%,,UW\WaWabb((**;;t||77ellSUZU_U_``##%% 55eDHHM;;t||66u||U\\RTYT^T^__*?*;<==r!   c                   U R                  XSS5      u  pUR                  R                  nUR                  5       (       a  U R	                  XR                  U5      SS9$ UR                  5       (       aJ  U R                  U R                  R                  UR                  UR                  5      UR                  5      $ UR                  5       (       am  U(       a  U R                  XU R                  5        U R                  U R                  R                  UR                  UR                  5      UR                  5      $ [        SU 35      e)NTF)r   rO   )r   r{   r   r   r   minusr   r+   r,   create_fsubr   r\   r   sub
create_subrE   r   r   r   r   r`   s        r   r   TritonSemantic.sub   s    88tUSJJ%%	88E::e#48NN  "";;t||77ellSUZU_U_`` 55eDHHM;;t||66u||U\\RTYT^T^__*9+677r!   c                2   U R                  X5      u  pUR                  R                  nUR                  5       (       aJ  U R	                  U R
                  R                  UR                  UR                  5      UR                  5      $ UR                  5       (       am  U(       a  U R                  XU R                  5        U R	                  U R
                  R                  UR                  UR                  5      UR                  5      $ [        SU 35      eNrO   )r   r{   r   r   r+   r,   create_fmulr   r\   r   mul
create_mulrE   r   s        r   r   TritonSemantic.mul  s    88FJJ%%	  "";;t||77ellSUZU_U_`` 55eDHHM;;t||66u||U\\RTYT^T^__*9+677r!   c                   U R                  XSSSS5      u  pUR                  R                  nUR                  R                  nUR                  5       (       a(  UR	                  5       (       a  U R                  X#5      nGOUR	                  5       (       a'  UR                  5       (       a  U R                  X5      nOUR	                  5       (       aV  UR	                  5       (       aA  U R                  U[        R                  5      nU R                  U[        R                  5      nOvUR                  5       (       aS  UR                  5       (       a>  UR                  UR                  :  a  U R                  X#5      nO U R                  X5      nO[        SU 35      eU R                  U R                  R                  UR                  UR                  5      UR                  5      $ NFTrO   )r   r{   r   r   r\   r   r5   rU   fp_mantissa_widthrE   r+   r,   create_fdivr   )r   r   r   r   r   s        r   truedivTritonSemantic.truediv  sk   88ueUY[_`**++**++&&((_-C-C-E-EIIe5E##%%/*E*E*G*GIIe5E##%%/*@*@*B*BIIeRZZ0EIIeRZZ0E((**/J/J/L/L00?3T3TT		%9		%9 ..?@AA{{4<<33ELL%,,OQVQ[Q[\\r!   c                   U R                  XSSSS5      u  pUR                  R                  nUR                  R                  nUR                  5       (       a  UR                  5       (       a  U R	                  X45      nU R                  X5      nU R                  X%5      nUR                  5       (       aJ  U R                  U R                  R                  UR                  UR                  5      UR                  5      $ U R                  U R                  R                  UR                  UR                  5      UR                  5      $ [        SU 35      er   )r   r{   r   r\   rL   r   is_int_signedr+   r,   create_sdivr   create_udivrE   )r   r   r   r   r   ret_tys         r   floordivTritonSemantic.floordiv7  s
   88ueUY[_`**++**++!!##(>(>(@(@..PFIIe,EIIe,E##%%{{4<<#;#;ELL%,,#WY^YcYcdd{{4<<#;#;ELL%,,#WY^YcYcdd*?*;<==r!   c                   UR                   R                  nUR                   R                  nUR                  5       (       a  UR                  5       (       d  [        S5      eU R	                  XSSSS5      u  pU R
                  R                  UR                  UR                  5      nU R                  XaR                   5      $ )Nz4both operands of fdiv must have floating scalar typeFT)	r{   r   r   rE   r   r,   r   r   r+   )r   r   r   ieee_roundingr   r   r   s          r   fdivTritonSemantic.fdivE  s    **++**++**,,O4O4O4Q4QRSS88ueUZ\`all&&u||U\\B{{3

++r!   c                x   U R                  XSSSS5      u  pUR                  R                  nUR                  R                  nUR                  5       (       aJ  U R	                  U R
                  R                  UR                  UR                  5      UR                  5      $ UR                  5       (       a  UR                  UR                  :w  a3  [        SUR                  5       -   S-   UR                  5       -   S-   5      eUR                  5       (       aJ  U R	                  U R
                  R                  UR                  UR                  5      UR                  5      $ U R	                  U R
                  R                  UR                  UR                  5      UR                  5      $ [        SU 35      e)NFTzCannot mod z by rP   rO   )r   r{   r   r   r+   r,   create_fremr   r\   rA   rE   r   r   create_sremcreate_urem)r   r   r   r`   r   s        r   modTritonSemantic.modN  se   88ueUY[_`JJ%%	**++  "";;t||77ellSUZU_U_``''?+I+II	0B0B0D Dv MP_PhPhPj j ns !s t t &&(({{4<<#;#;ELL%,,#WY^YcYcdd{{4<<#;#;ELL%,,#WY^YcYcdd*9+677r!   c                   U R                  X5      u  pUR                  nUR                  5       (       a  U[        R                  R
                  :X  aJ  U R                  U R                  R                  UR                  UR                  5      UR                  5      $ U[        R                  R                  :X  aJ  U R                  U R                  R                  UR                  UR                  5      UR                  5      $ [        SU 35      eUR                  5       (       aJ  U R                  U R                  R                  UR                  UR                  5      UR                  5      $ UR!                  5       (       aJ  U R                  U R                  R#                  UR                  UR                  5      UR                  5      $ [%        SU 35      eNzUnexpected propagate_nan Unexpected dtype )r   rB   r   r5   PropagateNanALLr+   r,   create_minimumfr   r{   NONEcreate_minnumfr3   r   create_minsir   create_minuirE   r   r|   ypropagate_nanrB   s        r   minimumTritonSemantic.minimume  R   006 3 33{{4<<#?#?!((#SUVU[U[\\"//"6"66{{4<<#>#>qxx#RTUTZTZ[[ #<]O!LMM  "";;t||88188LaffUU""$$;;t||88188LaffUU/w788r!   c                   U R                  X5      u  pUR                  nUR                  5       (       a  U[        R                  R
                  :X  aJ  U R                  U R                  R                  UR                  UR                  5      UR                  5      $ U[        R                  R                  :X  aJ  U R                  U R                  R                  UR                  UR                  5      UR                  5      $ [        SU 35      eUR                  5       (       aJ  U R                  U R                  R                  UR                  UR                  5      UR                  5      $ UR!                  5       (       aJ  U R                  U R                  R#                  UR                  UR                  5      UR                  5      $ [%        SU 35      er   )r   rB   r   r5   r   r   r+   r,   create_maximumfr   r{   r   create_maxnumfr3   r   create_maxsir   create_maxuirE   r   s        r   maximumTritonSemantic.maximumv  r   r!   c                   U R                  X#5      u  p#U R                  X5      u  pU R                  X5      u  pUR                  nUR                  5       (       aV  U R                  U R                  R                  UR                  UR                  UR                  U5      UR                  5      $ [        SU S35      e)Nr   z(. Only floating point clamp is supported)	r   rB   r   r+   r,   create_clampfr   r{   rE   )r   r|   minmaxr   rB   s         r   clampTritonSemantic.clamp  s    44S>221:221:;;t||99!((CJJPSPZPZ\ijlmlrlrss/w6^_``r!   c                j   U R                  X5      u  pUR                  R                  nUR                  R                  nUR                  5       (       a  UR                  5       (       d  [	        X45      eU R                  X45      nXS:w  a  U R                  X5      nXT:w  a  U R                  X%5      nX4$ r.   )r   r{   r   r\   r   rL   r   )r   r   r   input_sca_tyother_sca_tyr   s         r   bitwise_op_type_checking_impl,TritonSemantic.bitwise_op_type_checking_impl  s    88Fzz((zz((""$$L,?,?,A,A+LGG..|J
%IIe0E%IIe0E|r!   c                    U R                  X5      u  pU R                  U R                  R                  UR                  UR                  5      UR
                  5      $ r.   )r  r+   r,   
create_andr   r{   r   r   r   s      r   r   TritonSemantic.and_  E    99%G{{4<<225<<NPUPZPZ[[r!   c                    U R                  X5      u  pU R                  U R                  R                  UR                  UR                  5      UR
                  5      $ r.   )r  r+   r,   	create_orr   r{   r  s      r   or_TritonSemantic.or_  sB    99%G{{4<<11%,,MuzzZZr!   c                    U R                  X5      u  pU R                  U R                  R                  UR                  UR                  5      UR
                  5      $ r.   )r  r+   r,   
create_xorr   r{   r  s      r   xor_TritonSemantic.xor_  r	  r!   c                    UR                   R                  5       (       d   U R                  U[        R                  5      nUR                   R                  5       (       d   U R                  U[        R                  5      nU R                  X5      $ r.   )r{   is_int1bitcastr5   rq   r   r  s      r   logical_andTritonSemantic.logical_and  s_    zz!!##LL0Ezz!!##LL0Eyy&&r!   c                    UR                   R                  5       (       d   U R                  U[        R                  5      nUR                   R                  5       (       d   U R                  U[        R                  5      nU R                  X5      $ r.   )r{   r  r  r5   rq   r  r  s      r   
logical_orTritonSemantic.logical_or  s_    zz!!##LL0Ezz!!##LL0Exx%%r!   c                    UR                   R                  5       (       d   U R                  U[        R                  5      nU R                  U5      $ r.   )r{   r  r  r5   rq   invertr   r   s     r   not_TritonSemantic.not_  s8    zz!!##LL0E{{5!!r!   c                    U R                  X5      u  pU R                  U R                  R                  UR                  UR                  5      UR
                  5      $ r.   )r  r+   r,   create_lshrr   r{   r  s      r   lshrTritonSemantic.lshr  E    99%G{{4<<33ELL%,,OQVQ[Q[\\r!   c                    U R                  X5      u  pU R                  U R                  R                  UR                  UR                  5      UR
                  5      $ r.   )r  r+   r,   create_ashrr   r{   r  s      r   ashrTritonSemantic.ashr  r#  r!   c                    U R                  X5      u  pU R                  U R                  R                  UR                  UR                  5      UR
                  5      $ r.   )r  r+   r,   
create_shlr   r{   r  s      r   shlTritonSemantic.shl  r	  r!   c                    U$ r.    r  s     r   plusTritonSemantic.plus  s    r!   c                B   UR                   R                  nUR                  5       (       a  [        SUR	                  5       -   S-   5      eU R                  U R                  R                  UR                  U R                  5      5      U5      nU R                  X1S5      $ )Nz$wrong type argument to unary minus ()T)
r{   r   r   r3   r   r+   r,   get_null_valuer   r   )r   r   r  _0s       r   r   TritonSemantic.minus  s    zz((  ClF[F[F]]`ccdd[[44\5G5G5UVXdexx4((r!   c                j   UR                   R                  nUR                  5       (       d  UR                  5       (       a  [	        SUR                  5       -   S-   5      eU R                  U R                  R                  UR                  U R                  5      5      U5      nU R                  X5      $ )Nz%wrong type argument to unary invert (r1  )r{   r   r   r   r3   r   r+   r,   get_all_ones_valuer   r  )r   r   r  _1s       r   r  TritonSemantic.invert  s    zz((  L$<$<$>$>D|G\G\G^^addee[[889K9KDLL9YZ\hiyy##r!   c                T    UR                   R                  [        R                  5      $ r.   )r{   r   r5   rq   )r   vs     r   
_bool_likeTritonSemantic._bool_like  s    vv%%bgg..r!   c                   U R                  X5      u  pUR                  R                  nUR                  5       (       aO  U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ UR                  5       (       a  UR                  5       (       aO  U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ [        SU 35      er   )r   r{   r   r   r+   r,   create_fcmpOGTr   r;  r\   r   create_icmpSGTcreate_icmpUGTrE   r   r   r   r`   s       r   greater_thanTritonSemantic.greater_than     88FJJ%%	  "";;t||::5<<VX\XgXghmXnoo&&(({{4<<#>#>u||U\\#Z\`\k\klq\rss{{4<<#>#>u||U\\#Z\`\k\klq\rss*9+677r!   c                   U R                  X5      u  pUR                  R                  nUR                  5       (       aO  U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ UR                  5       (       a  UR                  5       (       aO  U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ [        SU 35      er   )r   r{   r   r   r+   r,   create_fcmpOGEr   r;  r\   r   create_icmpSGEcreate_icmpUGErE   rA  s       r   r   TritonSemantic.greater_equal  rD  r!   c                   U R                  X5      u  pUR                  R                  nUR                  5       (       aO  U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ UR                  5       (       a  UR                  5       (       aO  U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ [        SU 35      er   )r   r{   r   r   r+   r,   create_fcmpOLTr   r;  r\   r   create_icmpSLTcreate_icmpULTrE   rA  s       r   	less_thanTritonSemantic.less_than  rD  r!   c                   U R                  X5      u  pUR                  R                  nUR                  5       (       aO  U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ UR                  5       (       a  UR                  5       (       aO  U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ [        SU 35      er   )r   r{   r   r   r+   r,   create_fcmpOLEr   r;  r\   r   create_icmpSLEcreate_icmpULErE   rA  s       r   r   TritonSemantic.less_equal  rD  r!   c                    U R                  X5      u  pUR                  R                  nUR                  5       (       aO  U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ UR                  5       (       aO  U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ [        SU 35      er   )r   r{   r   r   r+   r,   create_fcmpOEQr   r;  r\   create_icmpEQrE   rA  s       r   equalTritonSemantic.equal"      88FJJ%%	  "";;t||::5<<VX\XgXghmXnoo;;t||99%,,UW[WfWfglWmnn*9+677r!   c                    U R                  X5      u  pUR                  R                  nUR                  5       (       aO  U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ UR                  5       (       aO  U R	                  U R
                  R                  UR                  UR                  5      U R                  U5      5      $ [        SU 35      er   )r   r{   r   r   r+   r,   create_fcmpUNEr   r;  r\   create_icmpNErE   rA  s       r   	not_equalTritonSemantic.not_equal-  rZ  r!   N)r   c                  [        U[        5      (       a  [        U[        5      (       d  [        S5      e[        US-	  5      n[        US-	  5      nU(       d  U(       a  [        S5      eX!::  a  [        S5      eX!-
  nXfS-
  -  S:w  a  [        S5      eU/nUc%  [        R
                  " [        R                  U5      nUR                  U R                  5      nU R                  U R                  R                  XU5      U5      $ )Nz/arange's arguments must be of type tl.constexpr    zarange must fit in int32z=arange's end argument must be greater than the start argumentr   r   z#arange's range must be a power of 2)rn   rr   r3   ro   r5   
block_typer6   r   r,   r+   create_make_range)	r   startendr   is_start_int64is_end_int64rangeshape	ret_ty_irs	            r   arangeTritonSemantic.arange<  s    %%%ZS-A-ANOOerk*C2I\788<\]]QYA%BCC>]]288U3FLL.	{{4<<99)CPRXYYr!   c                   Uc  [        S5      eUS:X  a5  U R                  R                  UR                  U R                  5      5      nO+[	        U R                  SUR
                   35      nU" U5      nU R                  X5      $ )Nz2dtype must be specified when value is not a tensorr   get_)r3   r,   r2  r   getattrnamer+   )r   rR   rB   get_value_fns       r   rv   TritonSemantic.scalar_constantN  so    =QRRA:LL//DLL0IJE"4<<4

|1DEL 'E{{5((r!   c                    [        U[        R                  5      (       a2  UR                  R                  S:X  d   S5       eU R                  X5      $ U R                  X5      $ )Nr   zonly accepts size-1 tensor)rn   r5   r+   numelrR   r   rv   )r   rR   rB   s      r   make_scalarTritonSemantic.make_scalarY  sQ    eRYY'';;$$)G+GG)99U**##E11r!   c                D    U R                  U R                  X#5      U5      $ r.   )splatru  )r   ri  rR   rB   s       r   fullTritonSemantic.full`  s    zz$**58%@@r!   c                P   UR                   R                  5       (       a   S5       e[        U5      S:X  a  U$ [        R                  " UR
                  U5      nU R                  U R                  R                  UR                  U R                  5      UR                  5      U5      $ )NzCannot splat a block tensorr   )r{   is_blocklenr5   rb  rB   r+   r,   create_splatr   r   )r   rR   ri  r   s       r   rx  TritonSemantic.splatg  s|    ::&&((G*GG(u:?Lu{{E2{{4<<44V\\$,,5OQVQ]Q]^`fggr!   c                    U R                  U R                  R                  UR                  5      UR                  5      $ r.   )r+   r,   create_unsplatr   rB   )r   rR   s     r   unsplatTritonSemantic.unsplatn  s*    {{4<<66u||DekkRRr!   c                ,   SnU H  nXE-  nM	     UR                   R                  U:w  a  [        S5      e[        R                  " UR                   R
                  U5      nU R                  U R                  R                  UR                  X#5      U5      $ )Nr   z:reshape() cannot change total number of elements in tensor)
r{   rt  r3   r5   rb  r   r+   r,   create_reshaper   )r   r   	dst_shapecan_reorderrt  sr   s          r   reshapeTritonSemantic.reshapeq  sw    AJE ::u$YZZuzz00)<{{4<<66u||Y\^deer!   c                   UR                    Vs/ s H  n[        R                  " U5      PM     nnUR                  US5        UR                  R                  5       (       d  U R                  XS9$ [        R                  " UR                  R                  U5      nU R                  U R                  R                  UR                  U5      U5      $ s  snf )Nr   )ri  )ri  r5   _unwrap_if_constexprinsertr{   r|  rx  rb  r   r+   r,   create_expand_dimsr   )r   r   r8   r|   r  r   s         r   expand_dimsTritonSemantic.expand_dimsz  s    9>EAR,,Q/	Eq!zz""$$::e:55uzz00)<{{4<<::5<<NPVWW Fs    Cc                f   U(       d   S5       e[        UR                  5      S:X  d   e[        R                  " UR                  R
                  UR                  S   UR                  S   -   /5      nU R                  U R                  R                  UR                  UR                  5      U5      $ )Nz;current implementation of `cat` always may reorder elementsr   r   )
r}  ri  r5   rb  r{   r   r+   r,   
create_catr   )r   r   r   r  ret_types        r   catTritonSemantic.cat  s    YYY{399~"""==399Q<#))A,3N2OP{{4<<223::szzJHUUr!   c                R   U R                  X5      u  pUR                  / :H  nU(       a$  U R                  US5      nU R                  US5      n[        UR                  S   [        R
                  5      (       a  [        R
                  " S5      nOSnUR                  U/-   n[        R                  " UR                  R                  U5      nU R                  U R                  R                  UR                  UR                  5      U5      nU(       a  U R                  US/SS9nU$ )Nr   r   Fr  )r   ri  r  rn   r5   ry   rb  r{   r   r+   r,   create_joinr   r  )r   ab
was_rank_1two	new_shaper  r   s           r   joinTritonSemantic.join  s    ((. WW]
  A&A  A&Aaggbk2<<00,,q/CCGGseO	==	:kk$,,22188QXXFQ,,sQCU,;C
r!   c                   [        UR                  5      S:  d   e[        R                  " UR                  S   5      S:X  d   eUR                  S S n[        R                  " UR
                  R                  U5      nU R                  R                  UR                  5      u  pEU R                  XC5      U R                  XS5      4$ )Nr   r  r   )r}  ri  r5   r  rb  r{   r   r,   create_splitr   r+   )r   r  r  r  outLHSoutRHSs         r   splitTritonSemantic.split  s    AGGq ! ''49:9GGCRL	==	:22188<KK)KK)
 	
r!   c                   [        UR                  5      [        U5      :w  a  [        S5      e[        S U 5       5      [	        [        [        U5      5      5      :w  a  [        SU 35      e[        R                  " UR                  R                  U Vs/ s H  o1R                  U   PM     sn5      nU R                  U R                  R                  UR                  U5      U5      $ s  snf )Nz5permute dims must have the same length as input shapec              3  N   #    U  H  n[         R                  " U5      v   M     g 7fr.   )r5   r  ).0ds     r   	<genexpr>)TritonSemantic.permute.<locals>.<genexpr>  s     ;d"))!,,ds   #%z?permute dims must be a permutation of 0, 1, ..., n-1, but were )r}  ri  r3   sortedlistrh  r5   rb  r{   r   r+   r,   create_transr   )r   r   dimsr  r  s        r   permuteTritonSemantic.permute  s    u{{s4y(TUU;d;;tE#d)DT?UU^_c^deff==!2!2T4RT[[^T4RS{{4<<44U\\4H(SS 5Ss   C+
c                8   UR                   R                  5       (       d  U R                  X5      $ UR                   R                  5       n[	        U5      [	        U5      :w  a  [        SU SU 35      eX#:X  a  U$ [        U5       H1  u  pEX$   U:w  d  M  US:w  d  M  [        SX$    SU SU SU SU 3
5      e   [        R                  " UR                   R                  U5      nU R                  U R                  R                  UR                  U5      U5      $ )Nz!Cannot broadcast, rank mismatch: z, r   z3Cannot broadcast, the expanded size of the tensor (z ) must match the existing size (z) at non-singleton dimension : )r{   r|  rx  get_block_shapesr}  r3   	enumerater5   rb  r   r+   r,   create_broadcastr   )r   r   ri  	src_shapeiitemr   s          r   broadcast_impl_shape#TritonSemantic.broadcast_impl_shape  s   zz""$$::e++JJ//1	y>SZ'@2eWUVVL +GAx4DAI #VW\W_V` aCCG& I%%&Cr)Bug"? @ @ ,
 uzz00%8{{4<<88uMvVVr!   c           	        UR                   nUR                   nUR                  5       (       a  UR                  5       (       dm  UR                  UR                  5      nU R	                  U R
                  R                  UR                  U R
                  5      UR                  5      U5      nX4$ UR                  5       (       d  UR                  5       (       am  UR                  UR                  5      nU R	                  U R
                  R                  UR                  U R
                  5      UR                  5      U5      nX4$ UR                  5       (       Ga  UR                  5       (       Ga  UR                  5       nUR                  5       n[        U5      [        U5      :  a  [        [        U5      [        U5      5       H  nU R	                  U R
                  R                  UR                  S5      [        R                  " UR                  S/UR                  -   5      5      nUR                   nUR                  5       nM     O[        U5      [        U5      :  a  [        [        U5      [        U5      5       H  nU R	                  U R
                  R                  UR                  S5      [        R                  " UR                  S/UR                  -   5      5      nUR                   nUR                  5       nM     [        U5      [        U5      :X  d   e/ n[!        U5       Hs  u  pXi   nU
S:X  a  UR#                  U5        M"  US:X  d  X:X  a  UR#                  U
5        M@  [%        S['        U	5      -   S-   ['        U
5      -   S-   ['        U5      -   5      e   XX:w  aW  [        R                  " UR                  U5      nU R	                  U R
                  R)                  UR                  U5      U5      nXh:w  aW  [        R                  " UR                  U5      nU R	                  U R
                  R)                  UR                  U5      U5      nX4$ )Nr   r   z?Cannot make_shape_compatible: incompatible dimensions at index r  r   )r{   r|  r   r   r+   r,   r~  r   r   r  r}  rh  r  r5   rb  valuesr  appendr3   strr  )r   r   r   lhs_tyrhs_ty	lhs_shape	rhs_shape_	ret_shaper  leftrightr   s                r   r   #TritonSemantic.broadcast_impl_value  ss    ??V__%6%6++FMM:F++dll77T\\8RTWT^T^_aghCV xS ""v'8'8++FMM:F++dll77T\\8RTWT^T^_aghCN xK __6??#4#4//1I//1I9~I.s9~s9~>A++dll&E&EcjjRS&T&(mmFMMA3IYIYCY&Z\C XXF & 7 7 9I	 ?
 Y#i.0s9~s9~>A++dll&E&EcjjRS&T&(mmFMMA3IYIYCY&Z\C XXF & 7 7 9I	 ?
 y>S^333I$Y/!19$$U+qjem$$T*$ &136q6&:<@&ACFt9&MOV&WY\]bYc&d e e 0 %v}}i@kk$,,"?"?

I"VX^_%v}}i@kk$,,"?"?

I"VX^_xr!   c                    Uc  g US:X  a  [         R                  R                  $ US:X  a  [         R                  R                  $ [	        SU S35      e)NrtnertzzInvalid rounding mode: z0. Supported rounding modes are 'rtne' and 'rtz'.)r   ROUNDING_MODERTNERTZr3   )r   rounding_modes     r   _str_to_rounding_mode$TritonSemantic._str_to_rounding_mode  sU     F"##(((E!##'''2=/Aqrssr!   c                R   UR                   nUR                  5       (       a  UR                  UR                  5      nX2:X  a  U$ UR                  nUR                  nUR	                  5       (       d  UR	                  5       (       a  U R                  X5      $ UR                  nUR                  nXg:w  a&  [        S[        U5      -   S-   [        U5      -   5      eU R                  U R                  R                  UR                  UR                  U R                  5      5      U5      $ )Nz!Cannot bitcast data-type of size z to data-type of size )r{   r|  r   r   r   r   primitive_bitwidthr3   r  r+   r,   create_bitcastr   r   )r   r   dst_tysrc_ty
src_sca_ty
dst_sca_tysrc_bitsdst_bitss           r   r  TritonSemantic.bitcast  s    ??++FMM:FL]]
]]
*"3"3"5"599U++0000@3x=P T2 247MB C C{{4<<66u||V\\RVR^R^E_`bhiir!   c                (   UR                   nUR                  nUR                  nXV:X  a  U$ UR                  5       (       a  UR                  U5      nU R	                  U5      nSnUR                  5       (       an  UR                  5       (       aY  UR                  UR                  :  a?  Uc  [        R                  R                  nOJU[        R                  R                  :w  a  SnO)Ub&  [        S[        U5      -   S-   [        U5      -   5      eUR                  5       (       d  UR                  5       (       aL  U R                  R                  R                  S5       c   S5       eU R                  R                  S   " XX0S9$ UR!                  5       (       a  UR                  5       (       d1  UR                  5       (       a  UR!                  5       (       d  U(       aP  U R#                  U R                  R%                  UR&                  UR)                  U R                  5      U5      U5      $ UR+                  5       (       a  UR-                  5       (       a*  UR/                  5       (       aE  UR-                  5       (       d0  U R1                  U R1                  U[2        R4                  5      U5      $ UR                  5       =(       a0    UR                  5       =(       a    UR                  UR                  :  nU(       aO  U R#                  U R                  R7                  UR&                  UR)                  U R                  5      5      U5      $ UR                  5       =(       a0    UR                  5       =(       a    UR                  UR                  :  n	U	(       aO  U R#                  U R                  R9                  UR&                  UR)                  U R                  5      5      U5      $ UR;                  5       (       GaF  UR;                  5       (       Ga0  UR<                  UR<                  :w  d  UR>                  UR>                  :w  a  URA                  5       =(       a    URC                  5       (       + n
URC                  5       (       ak  URD                  R)                  U R                  5      nU R#                  U R                  RG                  U5      URD                  5      nU RI                  X5      $ U R#                  U R                  RK                  UR&                  UR)                  U R                  5      U
5      U5      $ URM                  5       (       GaI  UR;                  5       (       Ga3  URC                  5       (       ak  URD                  R)                  U R                  5      nU R#                  U R                  RG                  U5      URD                  5      nU RI                  X5      $ URA                  5       (       aO  U R#                  U R                  RO                  UR&                  UR)                  U R                  5      5      U5      $ U R#                  U R                  RQ                  UR&                  UR)                  U R                  5      5      U5      $ UR;                  5       (       a  URM                  5       (       a  URC                  5       (       d  URA                  5       (       dO  U R#                  U R                  RS                  UR&                  UR)                  U R                  5      5      U5      $ U R#                  U R                  RU                  UR&                  UR)                  U R                  5      5      U5      $ URW                  5       (       a  UR;                  5       (       a  UR<                  nUS:X  aO  U R#                  U R                  RY                  UR&                  UR)                  U R                  5      5      U5      $ US	:X  ag  U RI                  U R1                  U[2        RZ                  5      U R#                  U R                  R]                  S
5      [2        RZ                  5      5      $ UR;                  5       (       ad  URW                  5       (       aO  U R#                  U R                  R_                  UR&                  UR)                  U R                  5      5      U5      $ URW                  5       (       ad  URW                  5       (       aO  U R#                  U R                  Ra                  UR&                  UR)                  U R                  5      5      U5      $  SU SU 35       e)NFTz]fp_downcast_rounding should be set only for truncating fp conversions. Source scalar type is z and destination type is convert_custom_typesz0target doesn't provide conversion for this type.)	_semanticr   r   r   zcannot cast z to )1r{   r   r|  r   r  r   r  r   r  r  r3   r  is_fp8e4b15r,   codegen_fnsgetr[   r+   create_fp_to_fpr   r   rY   rX   rZ   r   r5   rU   create_fp_trunccreate_fp_extr\   r@   rA   r   is_boolrB   r2  r^  r   is_standard_floatingcreate_fp_to_sicreate_fp_to_uicreate_ui_to_fpcreate_si_to_fpr   create_ptr_to_intrt   	get_int64create_int_to_ptrr  )r   r   r  fp_downcast_roundingr  r  r  use_custom_roundingtruncate_fpext_fpsign_extendtyr3  bitwidths                 r   r   TritonSemantic.cast  sV   ]]
]]
#L??++J7F  $99:NO#!!##
(>(> )
 )
++j.K.KK#+BDTDTDYDY-A%)9)9)>)>>VZ@S#/  ":<?
O"LNi"j!$Z"1 2 2 ""$$
(>(>(@(@<<++//&(/34 h5gh 4<<++,BCESgxx J$:$:$<$<""$$):):)<)<;;,,U\\6<<;UWklntv v   ););)=)=  ););)=)=99TYYubjj9:FF
 !,,. J""$J))J,I,II 	 ;;t||;;ELL&,,W[WcWcJdegmnn '') J""$J))J,I,II 	 ;;t||99%,,UYUaUaHbcekll :#4#4#6#6##z'>'>>*B[B[_i_x_xBx$224QZ=O=O=Q9QK!!##[[&&t||4[[!<!<R!@%++N~~e00{{4<<#?#?fll[_[g[gNhju#v#)+ + **,,1B1B1D1D!!##[[&&t||4[[!<!<R!@%++N~~e00))++{{4<<#?#?fll[_[g[gNh#ikqrr{{4<<#?#?fll[_[g[gNh#ikqrr :#B#B#D#D!!##:+C+C+E+E{{4<<#?#?fll[_[g[gNh#ikqrr{{4<<#?#?fll[_[g[gNh#ikqrr :#4#4#6#6!..H2~{{4<<#A#A%,,PVP\P\]a]i]iPj#kmstt1}~~diirxx&@$++dllNdNdefNgikiqiqBrss :#4#4#6#6;;t||==ellFLLY]YeYeLfgiopp :#4#4#6#6;;t||::5<<VZVbVbIcdflmm8UG4x88ur!   c                2   [         R                  R                  nU(       au  US:X  a  [         R                  R                  nU$ US:X  a  [         R                  R                  nU$ US:X  a  [         R                  R
                  nU$ [        SU S35      eU$ )Nz.ca.cgz.cvCache modifier  not supported)r   CACHE_MODIFIERr   CACGCVr3   r   cache_modifiercaches      r   _str_to_load_cache_modifier*TritonSemantic._str_to_load_cache_modifier  s    !!&&&)),,   5()),,
 	  5()),,  !?>2B.!QRRr!   c                v   [         R                  R                  nU(       a  US:X  a  [         R                  R                  nU$ US:X  a  [         R                  R                  nU$ US:X  a  [         R                  R
                  nU$ US:X  a  [         R                  R                  nU$ [        SU S35      eU$ )Nz.wbr  z.csz.wtr  r  )r   r  r   WBr  CSWTr3   r  s      r   _str_to_store_cache_modifier+TritonSemantic._str_to_store_cache_modifier  s    !!&&&)),,   5()),,   5()),,
 	  5()),,  !?>2B.!QRRr!   c                    [         R                  R                  nU(       aS  US:X  a  [         R                  R                  nU$ US:X  a  [         R                  R                  nU$ [        SU S35      eU$ )N
evict_lastevict_firstzEviction policy r  )r   EVICTION_POLICYNORMAL
EVICT_LASTEVICT_FIRSTr3   )r   eviction_policyevictions      r   _str_to_eviction_policy&TritonSemantic._str_to_eviction_policy  su    %%,,,.--88
 	 !M1--99  !#3O3DN!STTr!   c                    S nU(       aS  US:X  a  [         R                  R                  nU$ US:X  a  [         R                  R                  nU$ [	        SU S35      eU$ )NzeronanzPadding option r  )r   PADDING_OPTIONPAD_ZEROPAD_NANr3   )r   padding_optionpaddings      r   _str_to_padding_option%TritonSemantic._str_to_padding_option  sh    '++44
 	  5(++33  !?>2B.!QRRr!   c                v   [         R                  R                  nU(       a  US:X  a  [         R                  R                  nU$ US:X  a  [         R                  R                  nU$ US:X  a  [         R                  R                  nU$ US:X  a  [         R                  R
                  nU$ [        SU S35      eU$ )Nacquirereleaseacq_relrelaxedMemory semantic r  )r   MEM_SEMANTICACQUIRE_RELEASEACQUIRERELEASERELAXEDr3   )r   
sem_optionsems      r   _str_to_semTritonSemantic._str_to_sem  s    oo--Y&oo-- 
 y(oo-- 
 y(oo55
 
	 y(oo-- 
 !#3J<~!NOO
r!   c                2   [         R                  R                  nU(       au  US:X  a  [         R                  R                  nU$ US:X  a  [         R                  R                  nU$ US:X  a  [         R                  R                  nU$ [        SU S35      eU$ )Ngpuctasysr$  r  )r   MEM_SYNC_SCOPEGPUCTASYSTEMr3   )r   scope_optionscopes      r   _str_to_scopeTritonSemantic._str_to_scope  s    !!%%u$))--  &))--
 	 &))00  !#3L>!PQQr!   c                   U(       a  [        US5      (       d  U/nU Vs/ s H0  n[        U[        R                  5      (       a  UR                  OUPM2     nnU H3  n[        U[
        5      (       a  SUs=::  a  [        U5      :  a  M0   e   e   [        U5      S:  d   e[        U5      [        [        U5      5      :X  d   S5       e[        U5      $ gs  snf )N__iter__r   z'Duplicate dimension in `boundary_check`r-  )	hasattrrn   r5   ry   rR   rr   r}  setr  )r   boundary_checkblock_shapeelemdims        r   _canonicalize_boundary_check+TritonSemantic._canonicalize_boundary_check  s    >:66"0!1aopaoY]JtR\\,J,JdjjPTTaoNp%!#s++S0K3{;K0KKK0KKK &~&***~&#c..A*BBmDmmB.)) qs   7Cc	           
        Uc  Ub  [        S5      eUR                  R                  R                  n	U	[        R                  :w  d   S5       eU	R                  5       (       a)  U[        R                  R                  :X  a  [        S5      eUR                  R                  n
U R                  XJR                  5       5      nU R                  U R                  R                  UR                  XEXgU5      U
5      $ )NK`mask` and `other` arguments cannot be specified for loading block pointers4`tl.int1` should be rewritten in `tl.make_block_ptr`z@Padding option `nan` is not supported for integer block pointers)r3   r{   
element_tyr5   rq   r\   r   r  r  rB  r  r+   r,   create_tensor_pointer_loadr   )r   ptrmaskr   r>  r  r  r  is_volatileelt_tyr  s              r   _load_block_pointer"TritonSemantic._load_block_pointer  s     u0jkk$$// X"XX ==??w"*;*;*C*CC_`` $$ ::>KbKbKde {{LL33CJJY^juv 	r!   c	           
        UR                   R                  R                  5       (       d'  [        SUR                   R	                  5        S35      eUc  Ub  [        S5      eU(       d  U(       a  [        S5      eUR                   R                  5       (       db  U(       a*  UR                   R                  5       (       a  [        S5      eU(       a*  UR                   R                  5       (       a  [        S5      eUR                   R                  5       (       a,  Ub  U R                  X5      u  pUb  U R                  X5      u  pUR                   R                  n	U	R                  n
U
[        R                  :H  nU(       aA  [        R                  n
[        R                  " XR                  5      n	U R                  X5      nUb  U R                  X:5      nUR                   R                  5       (       a  UR                   R                  U
5      nOU
nUc8  U R                  U R                   R#                  UR$                  XgU5      U5      nOVU R                  U R                   R'                  UR$                  UR$                  U(       a  UR$                  OS UXx5      U5      nU(       a   U R                  U[        R                  5      nU$ )NUnsupported ptr type z in `tl.load`z)`other` cannot be provided without `mask`z`padding_option` or `boundary_check` argument is not supported for loading a tensor ofpointers or loading a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadEMask argument cannot be block type if pointer argument is not a blockzFOther argument cannot be block type if pointer argument is not a block)r{   r   r   r3   r   r|  r   rG  r5   rq   int8pointer_typeaddress_spacer   r   r+   r,   create_loadr   create_masked_load)r   rI  rJ  r   r>  r  r  r  rK  ptr_tyrL  r  r  r   s                 r   _load_legacyTritonSemantic._load_legacy  s9   xx%%''4SXX5F5F5H4IWXX <E-HIIn X Y Y
 xx  ""		**,, !hii,,.. !ijj 88 55c@	 !66sB
 "" BGG#WWF__V-A-ABF))C(C IIe,E 88XX--f5F F <++dll66szz5T_`bhiC++//

DKKY^dhjo08GHNPC ))C)C
r!   c	           
     H   U R                  U5      n	U R                  U5      n
U R                  U5      nUR                  R	                  5       (       a=  UR                  R
                  R                  5       (       a  U R                  XX4XX5      $ U R                  XX4XX5      $ r.   )	r  r  r  r{   r   rG  r|  rM  rX  )r   rI  rJ  r   r>  r  r   r  rK  r  r  r  s               r   loadTritonSemantic.load5  s     00@//@--n=88??!4!4!=!=!?!?++Cug^ftt $$SwW_mmr!   c                   [        U[        R                  5      (       d   e[        UR                  5      n[        U5      U:X  d   SU S[        U5       35       eU R                  USS9nU R                  R                  UR                  X R                  U5      U R                  U5      5      nU R                  XaR                  5      $ )N	expected  offsets, but got Frequire_i64)rn   r5   tensor_descriptor_baser}  r?  _convert_to_ir_valuesr,   create_descriptor_loadr   r  r  r+   rb  )r   descoffsetsr   r  ndimr|   s          r   descriptor_loadTritonSemantic.descriptor_loadC  s    $ 9 9::::4##$7|t#Wy6HW%WW#,,W%,HLL//WFfFfguFv040L0L_0]_{{1oo..r!   c                    [        U[        R                  5      (       d   e[        UR                  5      n[        U5      U:X  d   SU S[        U5       35       eUR
                  UR                  :X  d   eg )Nr^  r_  )rn   r5   rb  r}  r?  ri  )r   re  rR   rf  rg  s        r   validate_store_like"TritonSemantic.validate_store_likeN  sl    $ 9 9::::4##$7|t#Wy6HW%WW#{{d.....r!   c                   U R                  XU5        U R                  X!R                  5      nU R                  USS9nU R	                  U R
                  R                  UR                  UR                  U5      [        R                  5      $ NFr`  )
rk  r   rB   rc  r+   r,   create_descriptor_storer   r5   void)r   re  rR   rf  s       r   descriptor_storeTritonSemantic.descriptor_storeT  sm      g6		%,,,W%,H{{4<<??U\\[bcegelelmmr!   c                   U R                  XU5        UR                  [        R                  [        R                  [        R
                  [        R                  [        R                  [        R                  1;   d   S5       eU R                  USS9n[        R                  R                  nU R                  U R                  R                  XAR                   UR                   U5      [        R"                  5      $ NUnsupported dtypeFr`  )rk  rB   r5   rs   r6   ru   rU   rS   rT   rc  r   DESCRIPTOR_REDUCE_KINDADDr+   r,   create_descriptor_reducer   rp  r   re  rR   rf  rQ   s        r   descriptor_atomic_add$TritonSemantic.descriptor_atomic_add[  s      g6zzbii299bjj"**VXVaVabbwdwwb,,W%,H((,,{{4<<@@{{TYT`T`bijlnlslsttr!   c                    [         R                  R                  5       nUR                  S:H  =(       a    UR                  S:  $ )NcudaZ   )r   activeget_current_targetbackendarch)r   targets     r   _has_native_tmaTritonSemantic._has_native_tmab  s1    113&(>V[[B->?r!   c                T   U[         R                  [         R                  [         R                  [         R                  [         R
                  [         R                  1;   d   S5       eU[         R
                  [         R                  1;   a  U R                  5       (       d   S5       eg g )Nru  z-16-bit float types require native tma support)r5   rs   r6   ru   rt   rS   rT   r  )r   rB   s     r   $_descriptor_atomic_min_max_supported3TritonSemantic._descriptor_atomic_min_max_supportedf  sp    BHHbii2::r{{[[p]pp[RZZ--''))Z+ZZ) .r!   c                N   U R                  XU5        U R                  UR                  5        U R                  USS9n[        R
                  R                  nU R                  U R                  R                  XAR                  UR                  U5      [        R                  5      $ rn  )rk  r  rB   rc  r   rv  MINr+   r,   rx  r   r5   rp  ry  s        r   descriptor_atomic_min$TritonSemantic.descriptor_atomic_mink        g611$**=,,W%,H((,,{{4<<@@{{TYT`T`bijlnlslsttr!   c                N   U R                  XU5        U R                  UR                  5        U R                  USS9n[        R
                  R                  nU R                  U R                  R                  XAR                  UR                  U5      [        R                  5      $ rn  )rk  r  rB   rc  r   rv  MAXr+   r,   rx  r   r5   rp  ry  s        r   descriptor_atomic_max$TritonSemantic.descriptor_atomic_maxr  r  r!   c                   U R                  XU5        UR                  [        R                  [        R                  [        R
                  [        R                  1;   d   S5       eU R                  USS9n[        R                  R                  nU R                  U R                  R                  XAR                  UR                  U5      [        R                  5      $ rt  )rk  rB   r5   rs   r6   ru   rt   rc  r   rv  ANDr+   r,   rx  r   rp  ry  s        r   descriptor_atomic_and$TritonSemantic.descriptor_atomic_andy        g6zzbii299bhhGG\I\\G,,W%,H((,,{{4<<@@{{TYT`T`bijlnlslsttr!   c                   U R                  XU5        UR                  [        R                  [        R                  [        R
                  [        R                  1;   d   S5       eU R                  USS9n[        R                  R                  nU R                  U R                  R                  XAR                  UR                  U5      [        R                  5      $ rt  )rk  rB   r5   rs   r6   ru   rt   rc  r   rv  ORr+   r,   rx  r   rp  ry  s        r   descriptor_atomic_or#TritonSemantic.descriptor_atomic_or  s      g6zzbii299bhhGG\I\\G,,W%,H((++{{4<<@@{{TYT`T`bijlnlslsttr!   c                   U R                  XU5        UR                  [        R                  [        R                  [        R
                  [        R                  1;   d   S5       eU R                  USS9n[        R                  R                  nU R                  U R                  R                  XAR                  UR                  U5      [        R                  5      $ rt  )rk  rB   r5   rs   r6   ru   rt   rc  r   rv  XORr+   r,   rx  r   rp  ry  s        r   descriptor_atomic_xor$TritonSemantic.descriptor_atomic_xor  r  r!   c                   [        U[        R                  5      (       d   eUS:X  d   S5       eUS:X  d   S5       e[        UR                  5      S:X  d   SUR                   35       eUR                  S   S:X  d   SUR                   35       e[        UR
                  5      S:X  d   S	UR
                   35       eUR
                  S   S
:  d   SUR
                   35       eUR                  nSUR                  -  S
-  nUR                  S   U:  d   SU SU SUR                  S    35       e[        R                  " UR                  UR
                  S   UR                  S   /5      nU R                  U4SS9S   nU R                  R                  UR                  UR                  X8R                  U R                  5      5      n	U R                  X5      $ )N z#cache modifier is not supported yetz$eviction policy is not supported yetr   descriptor must be 2D, but got r   r   *descriptor block must have 1 row, but got x offsets must be 1D, but got    z5descriptor gather must have at least 8 rows, but got ra  zdescriptor gather of  must have at least  columns, but got Fr`  )rn   r5   rb  r}  r?  ri  rB   r  rb  rc  r,   create_descriptor_gatherr   r   r+   )
r   re  	x_offsetsy_offsetr   r  rB   min_colsr{   r|   s
             r   descriptor_gather TritonSemantic.descriptor_gather  s   $ 9 9::::#J%JJ#"$L&LL$ 4##$)_-LTM]M]L^+__)"a'h+UVZVfVfUg)hh' 9??#q(\,J9??J[*\\( q!Q&q*_`i`o`o_p(qq&

111A5 	A3E7:NxjXjkok{k{|}k~j  A	A  }}TZZ)//!*<d>N>Nq>Q)RS--xl-NqQLL11$++y?O?OQY[e[efjfrfr[st{{1##r!   c                   [        U[        R                  5      (       d   e[        UR                  5      S:X  d   SUR                   35       eUR                  S   S:X  d   SUR                   35       e[        UR
                  5      S:X  d   SUR                   35       eUR
                  S   S:  d   SUR
                   35       eUR                  nS	UR                  -  S-  nUR                  S   U:  d   S
U SU SUR                  S    35       eU R                  U4SS9S   nU R                  R                  UR                  UR                  UR                  U5        U R                  S [        R                  5      $ )Nr   r  r   r   r  r  r  z6descriptor scatter must have at least 8 rows, but got ra  zdescriptor scatter of r  r  Fr`  )rn   r5   rb  r}  r?  ri  shapaerB   r  rc  r,   create_descriptor_scatterr   r+   rp  )r   re  rR   r  r  rB   r  s          r   descriptor_scatter!TritonSemantic.descriptor_scatter  s   $ 9 9:::: 4##$)_-LTM]M]L^+__)"a'h+UVZVfVfUg)hh' 9??#q(],J9K[K[J\*]]( q!Q&r*`ajapap`q(rr&

111A5 	B4UG;OPXzYklpl|l|}~l  lA  B	B  --xl-NqQ..t{{ELL)JZJZ\de{{4))r!   c           	        Ub  [        S5      eUR                  R                  R                  5       nUR                  R	                  5       (       d  U R                  X'5      nUR                  R	                  5       (       d   S5       eXrR                  R                  5       :X  d&   SU SUR                  R                  5        S35       eUR                  R                  R                  UR                  R                  :X  d@   SUR                  R                  R                   SUR                  R                   S35       eUR                  R                  R                  nU[        R                  :w  d   S5       eU R                  XG5      nU R                  X(5      nU R                  U R                  R                  UR                  UR                  XEU5      [        R                  5      $ )	NrE  z-Value argument must be block type or a scalarzBlock shape(z) and value shape(z
) mismatchzBlock element type(z) and value element type(rF  )r3   r{   rG  r  r|  r  r5   rq   rB  r   r+   r,   create_tensor_pointer_storer   rp  )	r   rI  valrJ  r>  r  r  r?  rL  s	            r   _store_block_pointer#TritonSemantic._store_block_pointer  s    jkk hh))::<xx  ""++C=Cxx  ""S$SS"hh77 
 
 	a+&89R9R9T8UU_`	a 
xx""--1D1DD  	uH[\_\d\d\o\o\z\z[{  |U  VY  V^  V^  Vi  Vi  Uj  jt  Gu  	uD$$// X"XX  ::>W ii$ {{LL44SZZ^dlmoqovovx 	xr!   c           	     r   UR                   R                  R                  5       (       d'  [        SUR                   R	                  5        S35      eU(       a  [        S5      eUR                   R                  5       (       d[  UR                   R                  5       (       a  [        S5      eU(       a*  UR                   R                  5       (       a  [        S5      eUR                   R                  5       (       aU  U R                  X!R                   R                  5       5      nUb)  U R                  X1R                   R                  5       5      nUR                   R                  nUR                  nU[        R                  :X  aA  [        R                  n[        R                  " XR                  5      nU R                  X5      nU R                  X(5      nUcO  U R                  U R                   R#                  UR$                  UR$                  XV5      [        R&                  5      $ UR                   R                  R)                  5       (       d  [        S5      eU R                  U R                   R+                  UR$                  UR$                  UR$                  XV5      [        R&                  5      $ )NrP  z in `tl.store`z`boundary_check` argument is not supported for storing a tensor of pointers or storing a scalar. Because the compiler does not know the boundary; please use block pointers (defined by `make_block_ptr`) insteadzFValue argument cannot be block type if pointer argument is not a blockrQ  "Mask must have boolean scalar type)r{   r   r   r3   r   r|  r  r  rG  r5   rq   rR  rS  rT  r   r+   r,   create_storer   rp  r  create_masked_store)	r   rI  r  rJ  r>  r  r  rW  rL  s	            r   _store_legacyTritonSemantic._store_legacy  s    xx%%''4SXX5F5F5H4IXYY  E F F
 xx  ""xx  "" !ijj		**,, !hii 88++C1J1J1LMC00xx7P7P7RS"" RWWWWF__V-A-ABF))C(C ii$ <;;t||88SZZQVacecjcjkkyy''))ABB{{4<<;;CJJ

TXT_T_afq77$ 	$r!   c                   U R                  U5      nU R                  U5      nUR                  R                  5       (       d)  UR                  R                  R                  5       (       a  [        S5      eUR                  R                  5       (       a<  UR                  R                  R                  5       (       a  U R                  XX4Xx5      $ U R                  XX4Xx5      $ )N"Cannot store to a constant pointer)r  r  r{   is_constr   r3   r   rG  r|  r  r  )	r   rI  r  rJ  r>  r   r  r  r  s	            r   storeTritonSemantic.store  s     11.A//@88#((//":":"<"<ABB88??!4!4!=!=!?!?,,StU]] %%ceVVr!   c           	     h   U R                  U5      nU R                  U5      nUR                  R                  R                  nUR
                  S;  a  [        S5      eU R                  U R                  R                  UR                  UR                  UR                  XE5      UR                  5      $ )N)r   ra  r   z9atomic_cas only supports elements with width {16, 32, 64})r,  r8  r{   r   rG  r  r3   r+   r,   create_atomic_casr   )r   rI  cmpr  r+  r7  rG  s          r   
atomic_casTritonSemantic.atomic_cas  s    s#""5)XX__//
((<XYY{{4<<99#**cjjRUR\R\^aiknksksttr!   c                   UR                   R                  R                  5       (       d&  [        SUR                   R	                  5       -   5      eUR                   R                  5       (       d)  UR                   R                  R                  5       (       a  [        S5      eUR                   R                  R                  nU[        R                  L a  US:w  a  [        SU-   S-   5      eU[        R                  L a  US:w  a  [        SU-   S-   5      eU[        R                  [        R                  4;   d  UR                  S:  a  [        SU-   S-   [        U5      -   5      eUR                   R                  5       (       aX  Ub)  U R                  X1R                   R!                  5       5      nUb)  U R                  X!R                   R!                  5       5      nU R#                  X!R                   R                  R                  5      nUc  U R$                  R'                  S	5      n[        R(                  nUR                   R                  5       (       a^  UR                   R+                  [        R(                  5      nU R$                  R-                  UR/                  U R$                  5      U5      nU R1                  Xg5      nXU4$ )
Nz)Pointer argument of store instruction is r  r   atomic_z does not support fp16z does not support bf16r   z does not support T)r{   r   r   r3   r   r  rG  r5   rS   rT   int16uint16r  r  r|  r  r  r   r,   rp   rq   r   r~  r   r+   )r   rI  r  rJ  oprG  mask_irmask_tys           r   atom_red_typechecking_impl)TritonSemantic.atom_red_typechecking_impl  s   xx%%''H388K\K\K^^__88#(("5"5">">"@"@ABBXX__//
#eY^.FFGG$uY^.FFGG"((BII..*2O2ORT2TY^.BBS_TUU8800xx7P7P7RS//XX5N5N5PQiiXX__778<ll++D1GggGxx  ""((22277;,,33GMM$,,4OQXY;;w0D~r!   c                    UR                   R                  n[        R                  " USS9nU R	                  X5      nU R                  XBS-
  5      nU R                  U[        R                  5      $ )NF)r  signedr   )rB   r  r5   get_int_dtyper  r!  r   rq   )r   r|   r  idtypeixsignbits         r   _signbitTritonSemantic._signbit;  sX    77--!!8EB\\!$))B1-yy"''**r!   c                F   U R                  XUS5      u  pnU R                  U5      nU R                  U5      nUR                  R                  nUR                  5       (       a  UR                  5       (       ao  U R                  U R                  R                  [        R                  R                  UR                  UR                  UR                  XE5      UR                  5      $ U R                  U R                  R                  [        R                  R                  UR                  UR                  UR                  XE5      UR                  5      $ U[        R                   [        R"                  1;  a  [%        SU 35      eU[        R                   :X  a  [        R&                  O[        R(                  nU R+                  X'5      nU R+                  U[        R,                  " US5      5      n	U[        R                   :X  a  [        R.                  O[        R0                  n
U R+                  X*5      nU R+                  U[        R,                  " U
S5      5      nU R3                  U5      nU R5                  U5      nU R                  U R                  R                  [        R                  R                  U	R                  UR                  U R7                  X>5      R                  XE5      UR                  5      nU R                  U R                  R                  [        R                  R8                  UR                  UR                  U R7                  X=5      R                  XE5      UR                  5      nU R;                  XU5      nU R+                  UU5      $ )Nr   z#atomic_max not supported for dtype r   )r  r,  r8  r{   r   r\   r   r+   r,   create_atomic_rmwr   	ATOMIC_OPr  r   UMAXr5   rU   rW   rE   r6   rt   r  rS  rs   ru   r  r  r   UMINwherer   rI  r  rJ  r+  r7  sca_tyi_typei_vali_ptrui_typeui_valui_ptrnegpospos_retneg_retr   s                     r   
atomic_maxTritonSemantic.atomic_maxB     884O$s#""5)==??##%%{{LL222<<3C3CSZZQTQ[Q[]a]h]hjmuHH  {{LL222<<3D3DcjjRUR\R\^b^i^iknvHH  "**bjj11A&JKK#rzz1rxxS)S"//&!"<=%3"))c+c2??7A#>?mmC iin++LL**2<<+;+;U\\5<<+/99T+?+F+FTUZU_U_a ++LL**2<<+<+<fmmV]]+/99T+?+F+FTU[U`U`b jjw/||C((r!   c                F   U R                  XUS5      u  pnU R                  U5      nU R                  U5      nUR                  R                  nUR                  5       (       a  UR                  5       (       ao  U R                  U R                  R                  [        R                  R                  UR                  UR                  UR                  XE5      UR                  5      $ U R                  U R                  R                  [        R                  R                  UR                  UR                  UR                  XE5      UR                  5      $ U[        R                   [        R"                  1;  a  [%        SU 35      eU[        R                   :X  a  [        R&                  O[        R(                  nU R+                  X'5      nU R+                  U[        R,                  " US5      5      n	U[        R                   :X  a  [        R.                  O[        R0                  n
U R+                  X*5      nU R+                  U[        R,                  " U
S5      5      nU R3                  U5      nU R5                  U5      nU R                  U R                  R                  [        R                  R                  U	R                  UR                  U R7                  X>5      R                  XE5      UR                  5      nU R                  U R                  R                  [        R                  R8                  UR                  UR                  U R7                  X=5      R                  XE5      UR                  5      nU R;                  XU5      nU R+                  UU5      $ )Nr   z#atomic_min not supported for dtype r   )r  r,  r8  r{   r   r\   r   r+   r,   r  r   r  r  r   r  r5   rU   rW   rE   r6   rt   r  rS  rs   ru   r  r  r   r  r  r  s                     r   
atomic_minTritonSemantic.atomic_minh  r  r!   c           
        U R                  XUS5      u  pnU R                  U5      nU R                  U5      nUR                  R                  nUR                  5       (       a  [        R                  R                  O[        R                  R                  nU R                  U R                  R                  XqR                  UR                  UR                  XE5      UR                  5      $ )Nr   )r  r,  r8  r{   r   r   r   r  FADDrw  r+   r,   r  r   )r   rI  r  rJ  r+  r7  r  r  s           r   
atomic_addTritonSemantic.atomic_add  s    884O$s#""5)"("4"4"6"6R\\BLL<L<L{{4<<99"jj#**VZVaVacfn88% 	%r!   c           
     P   U R                  XUS5      u  pnU R                  U5      nU R                  U5      nU R                  U R                  R                  [        R                  R                  UR                  UR                  UR                  XE5      UR                  5      $ )Nand)r  r,  r8  r+   r,   r  r   r  r  r   r{   r   rI  r  rJ  r+  r7  s         r   
atomic_andTritonSemantic.atomic_and      884O$s#""5){{LL**2<<+;+;SZZUYU`U`bemorowowy 	yr!   c           
     P   U R                  XUS5      u  pnU R                  U5      nU R                  U5      nU R                  U R                  R                  [        R                  R                  UR                  UR                  UR                  XE5      UR                  5      $ )Nor)r  r,  r8  r+   r,   r  r   r  r  r   r{   r  s         r   	atomic_orTritonSemantic.atomic_or  s    884N$s#""5){{LL**2<<??CJJ

TXT_T_adlnqnvnvx 	xr!   c           
     P   U R                  XUS5      u  pnU R                  U5      nU R                  U5      nU R                  U R                  R                  [        R                  R                  UR                  UR                  UR                  XE5      UR                  5      $ )Nxor)r  r,  r8  r+   r,   r  r   r  r  r   r{   r  s         r   
atomic_xorTritonSemantic.atomic_xor  r  r!   c           
     P   U R                  XUS5      u  pnU R                  U5      nU R                  U5      nU R                  U R                  R                  [        R                  R                  UR                  UR                  UR                  XE5      UR                  5      $ )Nxchg)r  r,  r8  r+   r,   r  r   r  XCHGr   r{   r  s         r   atomic_xchgTritonSemantic.atomic_xchg  s    884P$s#""5){{LL**2<<+<+<cjj#**VZVaVacfnHH 	r!   c                    UR                  5       U R                  R                  R                  ;   d+   SU R                  R                  R                   SU 35       eUR	                  5       nUS:X  a  Sn[        [        R                  U5      $ )Nzinput_precision must be one of . Got TF32X3TF32x3)lowerr,   r   allowed_dot_input_precisionsupperro  r   INPUT_PRECISION)r   input_precisions     r   _str_to_dot_input_precision*TritonSemantic._str_to_dot_input_precision  s    $$&$,,*>*>*[*[[ 	y-dll.B.B._._-``fgvfwx	y[)//1h&&Or))?;;r!   c           
        UR                   R                  5       (       a  UR                   R                  5       (       d   eUR                  R                  5       (       a!  UR                  R                  5       (       a  GO7UR                  [        R
                  [        R                  [        R                  [        R                  [        R                  [        R                  4;   d   SUR                   35       eUR                  [        R
                  [        R                  [        R                  [        R                  [        R                  [        R                  4;   d   SUR                   35       eUR                  UR                  :X  d!   SUR                   SUR                   35       eUR                  R                  5       (       d  UR                  R                  5       (       az  SU R                  R                  R                  ;   a  [        R                   " S5        U R#                  U[        R                  5      nU R#                  U[        R                  5      nUR                  R%                  5       =(       d    UR                  R%                  5       nUR                  R'                  5       =(       d    UR                  R'                  5       nU(       d  U(       a  U(       a  SOSn	XR                  R                  R                  ;   a  U R                  R                  R(                  n
[        R                   " U	 S	U
 S
U
 S35        U R#                  U[        R                  5      nU R#                  U[        R                  5      nUc   U R                  R                  R*                  nU R-                  U5      n[/        UR0                  5      n[/        UR0                  5      nXs=:X  a  S:X  d2  O  Xs=:X  a  S:X  d$  O   SUR0                   SUR0                   S35       eUR0                  S   R2                  UR0                  S   R2                  :X  dV   SUR0                   SUR0                   SUR0                  S   R2                   SUR0                  S   R2                   S3	5       eU R                  R4                  R7                  S5       c   S5       eU R                  R4                  S   " UR                   UR                   5      nUR0                  S   R2                  US   :  a@  UR0                  S   R2                  US   :  a   UR0                  S   R2                  US   :  d   SUS    SUS    SUS    35       eUR                   R8                  R;                  5       (       a\  UR                   R8                  [        R
                  :X  d   S5       eU R                  R=                  S5      n[        R>                  nGO@URA                  5       (       a  [C        S5      eUR                   R8                  RE                  5       (       d)  UR                   R8                  RA                  5       (       a,  U R                  RG                  S5      n[        R                  nOUR                   R8                  RI                  5       (       a,  U R                  RK                  S5      n[        R                  nOMURM                  5       (       a  U R                  RO                  S5      OU R                  RG                  S5      nUnUR                   R0                  S   nUR                   R0                  S   nUR                   R0                  S   nUS:X  a  UR                   R0                  S   OS n[        RP                  " UU(       a  UUU/OUU/5      nUc6  U R                  RS                  URU                  U R                  5      U5      nOLURV                  nUR                   R0                  UR0                  :X  a  UR                   RX                  U:X  d   eUcb  UR                  R                  5       (       a@  UR                  R                  5       (       a!  U R                  R                  RZ                  nOYSnOVUR                  R                  5       (       a7  UR                  R                  5       (       a  UU:  a  [C        S U S!U S35      eU R]                  U R                  R_                  URV                  URV                  UXE5      U5      $ )"NzUnsupported lhs dtype zUnsupported rhs dtype z&Both operands must be same dtype. Got r   fp8e4b15zthe use of fp8e4b15 is deprecated on Hopper and later architectures and can cause significant slow down. It will be removed in a future triton releasefp8e4b8fp8e5b16z- is AMD gfx942 specific and not supported on z^ so it's upcasted to fp16 and can cause significant slow down. Please use OCP fp8 variants on z for performancer      +Both inputs must be either 2D or 3D; (lhs: 	 vs rhs: r1  r  zFirst input shape (z) and second input shape z= are not compatible for matmul (second index of first shape (z0) must be equal to first index of second shape (min_dot_sizez2target doesn't provide lower shape bounds for dot.r   r   zInput shapes should have M >= z, N >= z
 and K >= zonly int8 supported!zhout_dtype=bfloat16 is unsupported. Please use out_dtype=float32/float16 and cast with `.to(tl.bfloat16)`zmax_num_imprecise_acc (z) must be <= K ()0r{   r|  rB   r[   r5   rR  uint8rS   rT   rU   rW   r  r,   r   !deprecated_fp8_dot_operand_dtypeswarningswarnr   
is_fp8e4b8is_fp8e5b16r  default_dot_input_precisionr  r}  ri  rR   r  r  r   r\   	get_int32r6   rZ   r3   rX   get_fp32rV   get_fp64rY   get_fp16rb  r~  r   r   rG  max_num_imprecise_acc_defaultr+   
create_dot)r   r   r   accr  max_num_imprecise_acc	out_dtypeuses_fp8e4b8uses_fp8e5b16	type_namer  lhs_rankrhs_rankr  r3  ret_scalar_tyMNKBr   
acc_handles                         r   dotTritonSemantic.dot  s   xx  ""sxx'8'8':':::99#))"2"2"4"499"((BJJRZZ!#!- - S0Fsyyk.RS -99"((BJJRZZ!#!- - S0Fsyyk.RS -99		)o-STWT]T]S^^cdgdmdmcn+oo)99  ""cii&;&;&=&=T\\11SSS m ))C,C))C,Cyy++-G1E1E1G		--/J3993H3H3J=%1	zILL00RRR||++00 k!Ntf U66:V;KMN iiRZZ0iiRZZ0""ll22NNO::?Ksyy>syy>(q(H,EA,E  	VItuxu~u~t  @I  JM  JS  JS  IT  TU  HV  	VEyy}""cii' 	u,SYYK7PQTQZQZP[  \Y  Z]  Zc  Zc  df  Zg  Zm  Zm  Yn  n^  _b  _h  _h  ik  _l  _r  _r  ^s  st  u	u ||''++#'( 	^)]	^ (||//?#((Syy}""l1o5#))B-:M:MQ]^_Q`:`		"##|A6	v0a0AVWHYYcdpqrdsctu	v 7 88??!!##88??bgg-E/EE-''*BHHM  z  XX__$$&&#((//*A*A*C*C&&q)BJJMXX__$$&&&&q)BJJM-6->->-@-@&&q)dllF[F[\]F^B%MHHNN2HHNN2HHNN2!)QCHHNN1D}1q!Qi1a&I;226<<3MrRJJ88>>V\\1chh6I6IY6VVV !(yy!!cii&6&6&8&8(,(<(<(Z(Z%()%yy!!cii&6&6&8&8=RUV=V #:;P:QQabcadde!fgg{{LL##CJJ

Joqwy 	yr!   c                |    [        [        R                  UR                  5       S 5      nUc  [	        SU S35      eU$ )NzInvalid float format: ri   )ro  r   ScaleDotElemTypeTYr  r3   )r   float_formatty_enums      r   _str_to_fp_typeTritonSemantic._str_to_fp_type  s>    "//1C1C1EtL?5l^1EFFr!   c                D   [         R                  [         R                  [         R                  [         R                  S.R                  U5      nUcD  US:X  d
   SU 35       eUR                  [         R                  :X  d   SUR                   35       eU$ UR                  U:X  a  U$ [         R                  [         R                  [         R                  [         R                  S.U   nUR                  U:X  d   SU SUR                   35       eU R                  X5      $ )z
If float_format is subbyte, make sure it's packed as uint8 and return it.
Otherwise, return a tensor (perhaps bitcasting) of the specified float format.
)e5m2e4m3bf16fp16e2m1z)Internal Error: Unexpected float format: z)e2m1 format must be packed as uint8. Got zUnexpected dtype for r  )
r5   float8e5
float8e4nvrT   rS   r  rB   r  r  r  )r   r  r:  	triton_tyunsigned_tys        r   _bitcast_to_fp_type"TritonSemantic._bitcast_to_fp_type#  s    
  [["--ZZ!!$\!2 	6)e-VWcVd+ee)99(a,UVYV_V_U`*aa(J99	!J#%88RXXryyZ\ZcZcdeqrK99+d/D\NRXY\YbYbXc-dd+<<//r!   c                   UR                   R                  5       (       a  UR                   R                  5       (       d   e[        UR                  5      n[        UR                  5      nXs=:X  a  S:X  d2  O  Xs=:X  a  S:X  d$  O   SUR                   SUR                   S35       eUR                  nUR                  nU R                  U5      nU R                  U5      n1 SknUU;   d
   SU 35       eUU;   d
   SU 35       eUS L =(       d/    [        U[        R                  5      =(       a    UR                  S L nUS L =(       d/    [        U[        R                  5      =(       a    UR                  S L nU R                  X5      nU R                  XF5      nU	(       d  US	:X  d   S
5       eU
(       d  US	:X  d   S
5       eUR                   R                  SS  u  nnUR                   R                  SS  u  nnUS	:X  a  SOSnUS	:X  a  SOSnU	(       a  UU-  OUnU
(       a  UU-  OUnUU:X  d"   SUR                   SUR                   S35       eUS:X  a  UR                   R                  S   OS nU	(       d  UU-  nU
(       d  UU-  n[        R                  " UU(       a  UUU/OUU/5      nU R                  R                  S5      nUc6  U R                  R                  UR                  U R                  5      U5      nOLUR                  nUR                   R                  UR                  :X  a  UR                   R                   U:X  d   eU(       a  S OUR                  nU(       a  S OUR                  n U R#                  U R                  R%                  UR                  U XR                  UXXU5
      U5      $ )Nr   r  r  r  r1  >   rA  rC  r@  r?  rB  zNYI: lhs_format zNYI: rhs_format rC  zBonly mxfp4 inputs can be packed along a dimension different than Kr  r   zCReduction dimension should pack the same number of elements; (lhs: r   )r{   r|  r}  ri  rR   r<  rn   r5   ry   rH  rb  r,   r#  r~  r   r   rG  r+   create_dot_scaled)!r   r   	lhs_scale
lhs_formatr   	rhs_scale
rhs_formatr(  	fast_math
lhs_k_pack
rhs_k_packr*  r.  r/  lhs_format_enumrhs_format_enumallowed_formatsrhs_scale_is_nonelhs_scale_is_noner1  K_LHSK_RHSr2  PACKED_APACKED_BPACKED_A_DIMPACKED_B_DIMr4  r   r3  r5  rhs_scale_handlelhs_scale_handles!                                    r   
dot_scaledTritonSemantic.dot_scaled5  s    xx  ""sxx'8'8':':::syy>syy>(q(H,EA,E  	VItuxu~u~t  @I  JM  JS  JS  IT  TU  HV  	VE$**
$**
..z:..z:B_,M0@.MM,_,M0@.MM,%-r*Y2U2qZcZiZimqZq%-r*Y2U2qZcZiZimqZq&&s7&&s7Z61w3ww1Z61w3ww188>>"#&588>>"#&q"f,1!"f,1!+5x%'5+5x%'5|+  	T/rsvs|s|r}  ~G  HK  HQ  HQ  GR  RS  .T  	T+!)QCHHNN1DHAHAyq1a)q!fE\\""1%;226<<3MrRJJ88>>V\\1chh6I6IY6VVV#44):J:J#44):J:J{{LL**3::7GZdZdfv+:z_iklrt 	tr!   c                N   UR                   [        R                  :w  a#  [        R                  " SUR                    35        U R                  U[        R                  5      nU R                  X#SS5      u  p#UR                  R                  5       (       a'  U R                  X5      u  pU R                  X#5      u  p#OU R                  X5      u  pUR                  nU R                  U R                  R                  UR                  UR                  UR                  5      U5      $ )Nzgtl.where with a non-boolean condition is deprecated and will error out in a future triton release. Got T)rB   r5   rq   r  r  r   r   r{   r|  r   r+   r,   create_selectr   )r   	conditionr|   r   r  r   s         r   r  TritonSemantic.wherei  s    ??bgg%MMy  {D  {J  {J  zK  L IIi1	00tTB>>""$$44YBLI,,Q2DAq44YBLI{{4<<55i6F6FRSRZRZ[]cddr!   c                d    U(       a  [         R                  " X#5      nOUnU R                  X5      $ r.   )r5   rb  r+   )r   r|   r`   r  res_tys        r   wrap_tensorTritonSemantic.wrap_tensor}  s)    ]]98F F{{1%%r!   c                L  ^ ^^^	^
 Uc  [        U 4S jT 5       5      mSnTS   R                  R                  m
[        T
5      nX$:  d   SU S35       e[	        T
5       VVs/ s H  u  pVXR:w  d  M  UPM     snnm	[        U
4S jT 5       5      (       d   S5       eT R                  R                  T Vs/ s H  owR                  PM     snU5      mU" T5        TR                  5       (       d   e[        UUU	U 4S j[        [        T5      5       5       5      $ s  snnf s  snf )Nc              3  n   >#    U  H*  nTR                  XR                  R                  /S S9v   M,     g7f)Tr  N)r  rt  rR   )r  tr   s     r   r  +TritonSemantic.reduction.<locals>.<genexpr>  s+     ^W]RS4<<GGMM?<MW]s   25r   z&reduction axis must be < inputs rank (r1  c              3  T   >#    U  H  oR                   R                  T:H  v   M     g 7fr.   )r{   ri  )r  rl  ri  s     r   r  rm    s     9&Q66<<5(&s   %(z-all reduction inputs must have the same shapec              3     >#    U  H>  nTR                  TR                  U5      TU   R                  R                  T5      v   M@     g 7fr.   rh  
get_resultr{   r   )r  r  inputs	reduce_opr  r   s     r   r  rm    sB      uas\]DY11!4fQinn6K6KYWWas   AA	)tupler{   ri  r}  r  allr,   create_reducer   verifyrh  )r   rr  r8   region_builder_fnrankr  r  rl  rs  r  ri  s   ``      @@@r   	reductionTritonSemantic.reduction  s   <^W]^^FDq	$$5z{LDTF!LL{#,U#3A#341qyQ#3A	9&999j;jj9LL..&/I&Q&/I4P	)$!!!! uafgjkqgrasu u 	u B 0Js   "D1D5D!c                  ^ ^^^ TS   R                   R                  m[        T5      nU* Us=::  a  U:  d  O   SU SU S35       eUS:  a  X%-  nT H$  nUR                   R                  T:X  a  M   S5       e   T R                  R	                  T Vs/ s H  ofR
                  PM     snX$5      mU" T5        TR                  5       (       d   e[        UUU U4S j[        [        T5      5       5       5      $ s  snf )Nr   z
scan axis z must be < inputs rank (r1  z(all scan inputs must have the same shapec              3     >#    U  H>  nTR                  TR                  U5      TU   R                  R                  T5      v   M@     g 7fr.   rp  )r  r  rr  scan_opr   ri  s     r   r  2TritonSemantic.associative_scan.<locals>.<genexpr>  s@     wdv_`T%%g&8&8&;VAY^^=R=RTYZZdvrt  )	r{   ri  r}  r,   create_scanr   rx  ru  rh  )	r   rr  r8   ry  reverserz  rl  r  ri  s	   ``     @@r   associative_scanTritonSemantic.associative_scan  s    q	$$5zu#t#Wz$7OPTvUV%WW#!8LDA66<<5(T*TT(  ,,**f+EfHHf+EtU'"~~wdijmntjudvwww	 ,Fs   C:c                   UR                   R                  5       (       d   S5       e[        UR                  R                  5      n[        UR                  R                  5      U:X  d   S5       eU* Us=::  a  U:  d  O   SU SU S35       eUS:  a  X4-  n[        U5       HI  nXS:X  a  M
  UR                  R                  U   UR                  R                  U   :X  a  M@   SU S35       e   U R                  R                  UR                  UR                  U5      nU R                  XaR                  R                  UR                  R                  5      $ )	Nzindex must be an integer tensorz0source and index tensors must have the same rankzgather axis z must be < source rank (r1  r   z
index dim z( must match the corresponding source dim)rB   r\   r}  r{   ri  rh  r,   create_gatherr   rh  r   )r   srcindexr8   rz  r  gathers          r   r  TritonSemantic.gather  s'   {{!!##F%FF#388>>"5::##$,`.``,u#t#Y|D69QRVQWWX%YY#!8LDtAy::##A&#((..*;;xz$Ow=xx; 
 ++CJJdK9I9IJJr!   c                    U(       d  gUtp#[        [        U5      5       H  nU R                  X#U   5      u  o#U'   M     [        [        U5      5       H  nU R                  X#U   5      u  o#U'   M     U/UQ7$ )Nr-  )rh  r}  r   )r   rr  headtailr  s        r   broadcast_tensors TritonSemantic.broadcast_tensors  su    s4y!A 55dGDMDq' "s4y!A 55dGDMDq' "}t}r!   c           	       ^ ^ T R                   " U6 n[        U5      S:  d   S5       eU Vs/ s H*  oQS   R                  R                  UR                  5      PM,     nnT R
                  R                  U Vs/ s H  ofR                  PM     snU Vs/ s H  oUR                  T R
                  5      PM     snU5      mU" T5        [        UU 4S j[        U5       5       5      $ s  snf s  snf s  snf )Nr   z1map_elementwise must have at least 1 input tensorc              3  j   >#    U  H(  u  pTR                  TR                  U5      U5      v   M*     g 7fr.   )r+   rq  )r  r  r  elementwise_opr   s      r   r  1TritonSemantic.map_elementwise.<locals>.<genexpr>  s.     hPguqT[[!:!:1!=rBBPgs   03)r  r}  r{   r   r   r,   create_map_elementwiser   r   ru  r  )r   rr  result_typespackry  r  rl  r  s   `      @r   map_elementwiseTritonSemantic.map_elementwise  s    ''06{QS SSLXYLbq	66ryyALY<<%&v!XXv&.:;lXXdll#l;

 	.) hPYZfPghhh Z&;s   1C&8C+$C0
c                   [        UR                  5      S:X  d   S5       eUR                  R                  5       (       d   S5       eUb[  U R	                  X1R                  5      nUR
                  R                  R                  5       (       d  [        S5      eUR                  nU R                  U R                  R                  UR                  X#5      [        R                  " [        R                  U/5      5      $ )Nr   z histogram only supports 1D inputz%histogram only supports integer inputr  )r}  ri  rB   r\   r  r{   r   r  r3   r   r+   r,   create_histogramr5   rb  r6   )r   r   num_binsrJ  s       r   	histogramTritonSemantic.histogram  s    5;;1$H&HH${{!!##L%LL#,,T;;?D99##++-- !EFF;;D{{4<<88xV==H:>@ 	@r!   c                   [        S[        UR                  5      5      [        U5      :w  a  [        S5      eUR                  R                  S[        R                  " X!R                  R                  5       5      5        U$ )Nr   zAShape of input to multiple_of does not match the length of valuesztt.divisibility)	r   r}  ri  r3   r   set_attrr   	make_attrget_contextr   r|   r  s      r   multiple_ofTritonSemantic.multiple_of  sY    q#agg,3v;.`aa	+R\\&((BVBVBX-YZr!   c                    [        UR                  5      [        U5      :w  a  [        S5      eUR                  R	                  S[
        R                  " X!R                  R                  5       5      5        U$ )NzDShape of input to max_contiguous does not match the length of valuesztt.contiguityr}  ri  r3   r   r  r   r  r  r  s      r   max_contiguousTritonSemantic.max_contiguous  sQ    qww<3v;&cdd	/2<<@T@T@V+WXr!   c                    [        UR                  5      [        U5      :w  a  [        S5      eUR                  R	                  S[
        R                  " X!R                  R                  5       5      5        U$ )NzCShape of input to max_constancy does not match the length of valuesztt.constancyr  r  s      r   max_constancyTritonSemantic.max_constancy  sQ    qww<3v;&bcc	.",,vxx?S?S?U*VWr!   c                r    U R                  U R                  R                  5       [        R                  5      $ r.   )r+   r,   create_barrierr5   rp  )r   s    r   debug_barrierTritonSemantic.debug_barrier  s$    {{4<<668"''BBr!   c                   UR                  S5      (       d  U(       a  US-  nUR                  S5      (       d  U(       a  US S S-   n[        U5      S:  a  UR                  S5      (       d  SU-   nU Vs/ s H  oDR                  PM     nnU Vs/ s H  oDR                  R                  5       PM     nnU R                  U R                  R                  XXV5      [        R                  5      $ s  snf s  snf )N r  r  r   )endswithr}  
startswithr   rB   r   r+   r,   create_printr5   rp  )r   prefixargshexargnew_args	is_signeds          r   device_printTritonSemantic.device_print  s     s##cMFt$$CR[4'Fv;?6#4#4S#9#96\F*./$3JJ$/:>?$3YY,,.$	?{{4<<44V(VXZX_X_`` 0?s   6C0#C5c                   U R                   R                  R                  (       d  g Ub   U R                  XR	                  U5      5      nU R                  U R                   R                  UR                  U5      [        R                  5      $ r.   )
r,   r   debugr  r  r+   create_assertr   r5   rp  )r   r   r   rJ  s       r   r   TritonSemantic.device_assert  s^    ||##))88D))D/2D{{4<<55dkk3GQQr!   c                    U R                  U R                  R                  UR                  5      [        R
                  5      $ r.   )r+   r,   create_assumer   r5   rp  )r   r   s     r   assumeTritonSemantic.assume  s*    {{4<<55dkkBBGGLLr!   c                   [        U[        5      (       a  [        R                  " U5      n[        U[        R                  5      (       a  [        UR                  [
        5      (       a%  U R                  R                  UR                  5      $ U(       aS  SUR                  s=::  a  S:  d  O   SUR                   S35       eU R                  R                  UR                  5      $ SUR                  s=::  a  S:  d  O   SUR                   S35       eU R                  R                  UR                  5      $ [        U[        R                  5      (       a  UR                  R                  S:X  d   S	5       eUR                  R                  5       (       d   S
5       eUR                  [        R                  :w  a^  U(       aW  U R                  R                  UR                   U R                  R#                  5       UR                  R%                  5       5      $ UR                  [        R                  :X  a  U(       d   S5       eUR                   $  S['        U5       35       e)Nrg   rh   z@Block pointers only support 64 bit `shape/strides`, got a value z which is out of the rangere   rf   zFBlock pointers only support 32 bit `offsets/block_shape`, got a value r   z*Expected a scalar in shape/strides/offsetsz8Expected an integer scalar type in shape/strides/offsetszzBlock pointers only support 32 bit `offsets/block_shape`, add a `.to(tl.int32)` or use regular indexing for 64 bit supportz3Unsupported element type in shape/strides/offsets: )rn   rr   r5   ry   rR   ro   r,   rp   r  r"  r+   rt  rB   r\   rt   r   r   get_int64_tyr   r{   )r   r@  ra  s      r   _convert_elem_to_ir_value(TritonSemantic._convert_elem_to_ir_value  s   dC  <<%DdBLL))$**d++||,,TZZ883e3 J 8##'::,.H6J J3||--djj993e3 J 8##'::,.H6J J3||--djj99bii((::##q(V*VV(::$$&&b(bb&zzRXX%+||33DKKAZAZA\48JJ4L4L4NP Prxx'W W Wu;;XKDQUJ<XXur!   c                    [        US5      (       a!  U Vs/ s H  o0R                  X25      PM     sn$ U R                  X5      /$ s  snf )Nr;  )r<  r  )r   	list_likera  r@  s       r   rc  $TritonSemantic._convert_to_ir_values9  sI    9j))R[\R[$224ER[\\..yFGG ]s   Ac           	       ^ U R                  U5      nU R                  U5      nU R                  USS9nUR                  R                  5       (       a)  UR                  R                  R	                  5       (       a  [        S5      eUR                  R                  [        R                  :X  aI  U R                  U[        R                  " [        R                  UR                  R                  5      5      n[        TS5      (       d  T/mT Vs/ s H0  n[        U[        R                  5      (       a  UR                  OUPM2     snm[!        S T 5       5      (       d   S5       e[        US5      (       d  U/nU Vs/ s H0  n[        U[        R                  5      (       a  UR                  OUPM2     nn[#        U5      [%        ['        [)        U5      5      5      :X  d   S5       e[!        U4S jX#XF4 5       5      (       d   S	5       eU R*                  R-                  UR.                  X#UTU5      nU R1                  U[        R                  " [        R2                  " UR                  R                  T5      5      5      $ s  snf s  snf )
NFr`  zMExpected `base` to be a pointer type (but not a block pointer type or others)r;  c              3  z   #    U  H1  n[        U[        5      =(       a    S Us=:*  =(       a    S:  Os  v   M3     g7f)re   rf   N)rn   rr   )r  r@  s     r   r  0TritonSemantic.make_block_ptr.<locals>.<genexpr>Q  s.     \P[:dC(CVt-C-Ce-CCP[s   9;zGExpected a list of constant integers (`int32_t` range) in `block_shape`z<Expected a permutation of (0, 1, ..., len(order)-1) in orderc              3  R   >#    U  H  n[        T5      [        U5      :H  v   M     g 7fr.   )r}  )r  r  r?  s     r   r  r  [  s!     hGg)3{#s9~5Ggs   $'zBExpected shape/strides/offsets/block_shape to have the same length)rc  r{   r   rG  r|  r3   r5   rq   r   rS  rR  rT  r<  rn   ry   rR   rv  r  r  rh  r}  r,   create_make_block_ptrr   r+   rb  )	r   baseri  stridesrf  r?  orderr@  r   s	        `   r   make_block_ptrTritonSemantic.make_block_ptr>  s    **51,,W5,,W%,H yy!!TYY%9%9%B%B%D%Dlmm 99277*99T2??277DII<S<S#TUD {J//&-KZefZeRVZbll%C%CtzzMZef\P[\\\ 	VU	V\ uj))GETYZTYDz$==4GTYZe}U3u:%6 77w9ww7 hX_Gghhh 	QP	Qh 33DKKQXZeglm{{62??2==AUAUWb3c#dee% g [s   7J 7J
c                    U R                  USS9nU R                  U R                  R                  UR                  U5      UR
                  5      $ rn  )rc  r+   r,   create_advancer   r{   )r   r  rf  s      r   advanceTritonSemantic.advanced  sC    ,,W%,H {{4<<66t{{GLdiiXXr!   c           	     <   [        U5      nSUs=::  a  S::  d  O  [        SU S35      e[        U5      U:w  a  [        SU S[        U5       35      e[        U5      U:w  a  [        SU S[        U5       35      e[        UR                  [        R
                  5      (       d   eUR                  R                  R                  S	-  n[        R                  " US
   5      nX-  S:  a  [        SU SU SX-   S35      e[        R                  " US
   5      n	U	S:w  a  [        SU	 35      eU V
s/ s H"  oR                  U
[        R                  5      PM$     nn
U V
s/ s H6  oR                  [        R                  " U
5      [        R                  5      PM8     nn
[        R                  " U5      n[        UR                  [        R
                  5      (       d   e[        R                  " UR                  R                  U5      nUR                  nUR                  R                  R!                  5       nU R#                  U5      nUR                  R                  R%                  5       (       a)  U[&        R(                  R*                  :X  a  [        S5      eU R,                  R/                  X Vs/ s H  oR                  PM     snU Vs/ s H  oR                  PM     snXMU5      n[        R0                  " UX#U5      $ s  sn
f s  sn
f s  snf s  snf )Nr      z Expected 1 <= ndim <= 5 but got z dimensionsz	Expected z strides but got zExpected block_shape to have z dimensions but got r  r  r   zRDescriptor block shape must have at least 16 bytes in the last dimension, but got z * z = z bytesz-Tensor descriptor last dim must be 1 but got z8Padding option `nan` is not supported for integer blocks)r}  r3   rn   rB   r5   rS  rG  r  r  ru  r6   rt   _unwrap_shaper{   rb  r   r   r  r\   r   r  r  r,   create_make_tensor_descriptortensor_descriptor)r   r  ri  r  r?  r  rg  	elem_sizecontig_dim_sizelast_strider|   r{   base_handleis_signed_intr  r  r   s                    r   make_tensor_descriptor%TritonSemantic.make_tensor_descriptork  s   5zTQ?v[QRRw<4y.?G~NOO{t#<TFBVWZ[bWcVdeff$**boo6666JJ))<<A	11+b/B&+detduux  zC  yD  DG  HW  Hc  Gd  dj  k  --gbk:!L[MZ[[8=>1!!!RXX.>SZ[SZa##B$;$;A$>ISZ[ &&{3$))R__5555}}TYY11;?kk		,,::<--n=99&&((W8I8I8Q8Q-QWXX;;K\aIb\aWX((\aIbOV<Ww!XXw<WYd<CE ##FEDAA' ?[ Jc<Ws   3)L
"=L9L
Lr/   )r8   rr   returnr   )rF   tl.dtyperG   r  r  r  )rF   r  r]   ro   rG   r  r^   ro   r_   ro   r  r  )T)r}   ro   )r   r  r   r  r   ro   r  None)FFTF)r   TensorTy | numbers.Numberr   r  r  Tuple[TensorTy, TensorTy])r   r   r   r   r   callable)r   r  r   r  r   ro   r  r   )r   r  r   r  r  r   )r   r  r   r  r   ro   r  r   )r|   r   r   r   r   tl.PropagateNan)r|   r   r   r   r   r   r   r  )r   r   r   r   r  r  )r   r   r   r   r  r   )r   r   )r   r   r  r   )r:  r   r  tl.block_type)rd  rr   re  rr   r   r  r  r   )rB   r  r  r   )ri  	List[int]rB   r  r  r   )rR   r   ri  r  r  r   )rR   r   r  r   )r   r   r  r  r  ro   r  r   )r   r   r8   rr   r  r   )r   r   r   r   r  ro   r  r   )r  r   r  r   r  r   )r  r   r  r  )r   r   r  
Tuple[int]r  r   )r   r   ri  r  r  r   )r   r   r   r   r  r   )r  Optional[str])r   r   r  r  r  r   r.   )r   r   r  r  r  r  r  r   )rI  r   rJ  Optional[TensorTy]r   r  r>  r   r  r  r   r  r  r  rK  ro   r  r   )re  tl.tensor_descriptor_baser   r  r  r  r  r   )re  r  rR   r   r  r  )re  r  rR   r   r  r   )r   r  r  r  r  r   )rI  r   r  r   rJ  r  r   r  r  r  r  r   )rI  r   r  r   r  r   r+  r  r7  r  r  r   )
rI  r   r  r   rJ  r   r  r  r  z#Tuple[TensorTy, TensorTy, TensorTy])r|   r   r  r   )rI  r   r  r   rJ  r   r+  r  r7  r  r  r   )r   r   r   r   r(  r   r  r  r)  rr   r*  r  r  r   )r:  r  )r  r   r:  r  )r   r   rL  r   rM  r  r   r   rN  r  rO  r  r(  zTensorTy | NonerP  ro   rQ  ro   rR  ro   r*  r  r  r   )rd  r   r|   r   r   r   r  r   )rr  Sequence[TensorTy]r8   rr   r  Tuple[TensorTy, ...])rr  r  r8   rr   r  ro   r  r  )r  r   r  r   r8   rr   r  r   )rr  zSequence[tl.tensor]r  zSequence[tl.dtype]r  rr   r  zTuple[tl.tensor, ...])r   r   r  rr   rJ  r  r  r   )r|   r   r  r  r  r   )r  r   )r  r  r  List[TensorTy]r  ro   r  r   )r   r   r   r  rJ  r  r  r   )r  r   r  r   )r  )r  r   ri  r  r  r  r?  zList[tl.constexpr]r  r  r  ztl.tensor_descriptor)~r"   r#   r$   r%   r5   r+   __annotations__langr   r9   r=   rL   rb   rz   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r  r  r  r  r  r!  r&  r*  r.  r   r  r;  rB  r   rN  r   rX  r^  rk  rv   ru  ry  rx  r  r  r  r  r  r  r  r  r   r  r  r   r  r  r  r  r,  r8  rB  rM  rX  r[  rh  rk  rq  rz  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r6  r<  rH  r`  r  rh  r{  r  r  r  r  r  r  r  r  r  r  r   r  r  rc  r  r  r  r&   r-  r!   r   r)   r)      s   YYFN&DO
QD05*.053;05d#R	@ ae05#:S#J,$>#>(0>>8#8(08"8#8(08]2>,8.9"9"	a\[\'&"
]]\)$/8888	8	8 GK Z$	)2AhSfXV0

TW 2ptj$i9^		
,:xn n25nHKnZ^nckn	/),	/19	//nu@[
uuuuu$0**x8*$XW"W'/W(u'*/R8+$)L$)L%yxy<[y#&[y3;[y@H[yz0$.t0.t>A.tHW.tdh.t#.t15.tBJ.tOW.the(&u,x"&x+?x.K,i.Ci*	@CaRMY4H
$fLY W](B,>(BPS(Bau(B (Br!   r)   )
__future__r   r  typingr   r   r   r   r   r	   r
   r   triton.runtimer   _C.libtritonr   r  r   r5   r   r   	Exceptionr   r)   r-  r!   r   <module>r     sV    "  J J J  !  CL:F	 FzBWX& zBr!   