
    9iX                        S SK Jr  S SKJrJr  S SKJrJrJr  S SK	J
r
  S r " S S5      r\" SS	9 " S
 S\5      5       r\" SS	9 " S S\5      5       r\" SS	9 " S S\5      5       r\" SS	9 " S S\5      5       r\" SS	9 " S S\5      5       r\" SSS9 " S S\5      5       r " S S5      r\
S 5       r\" SS	9 " S S\5      5       r\" SSS9 " S S\5      5       r\" SSS9 " S S\5      5       rS#S  jrS! rg")$    )	dataclass)ListOptional)_unwrap_if_constexpr_unwrap_shapeconstexpr_type)constexpr_functionc                 R   U R                   =(       d    S/U-  nU R                  =(       d    S/U-  nU R                  =(       d    [        [	        [        U5      5      5      n[        R                  U SU5        [        R                  U SU5        [        R                  U SU5        g )N   ctas_per_cgacta_split_num	cta_order)r   r   r   listreversedrangeobject__setattr__)layoutrankr   r   r   s        k/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/triton/experimental/gluon/language/_layouts.py_realize_cta_layoutr      s    &&41#*L((6QC$JM  ?D%+)>$?I
v~|<
v>
v{I6    c                   (    \ rS rSrSr\S 5       rSrg)DistributedLayout   z8
Base class for distributed memory layouts in Gluon IR.
c                     [        U 5      $ Nr   selfs    r   typeDistributedLayout.type       d##r    N__name__
__module____qualname____firstlineno____doc__propertyr!   __static_attributes__r$   r   r   r   r           $ $r   r   T)frozenc                        \ rS rSrS rS rSrg)
AutoLayout   c                 "    UR                  5       $ r   )get_auto_layoutr    builders     r   _to_irAutoLayout._to_ir   s    &&((r   c                     g)NALr$   r   s    r   mangleAutoLayout.mangle    s    r   r$   N)r&   r'   r(   r)   r6   r:   r,   r$   r   r   r0   r0      s    )r   r0   c                      ^  \ rS rSr% Sr\\   \S'   \\   \S'   \\   \S'   \\   \S'   Sr\	\\      \S'   Sr
\	\\      \S	'   Sr\	\\      \S
'   U 4S jrS rS\4S jrS rSrU =r$ )BlockedLayout$   a8  
Represents a blocked layout, partitioning a tensor across threads, warps, and CTAs.

Args:
    size_per_thread (List[int]): Number of elements per thread per dimension.
    threads_per_warp (List[int]): Number of threads per warp per dimension.
    warps_per_cta (List[int]): Number of warps per CTA per dimension.
    order (List[int]): The ordering of dimensions for partitioning.
    ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
    cta_split_num (Optional[List[int]]): Split factors for CTAs.
    cta_order (Optional[List[int]]): Ordering for CTAs.
size_per_threadthreads_per_warpwarps_per_ctaorderNr   r   r   c                 t  > [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R
                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [        U R                  5      n[        X5        [        U R                  5      U:X  d   e[        U R
                  5      U:X  d   e[        U R                  5      U:X  d   e[        U R                  5      U:X  d   e[        U R                  5      U:X  d   e[        U R                  5      U:X  d   eg )Nr?   r@   rA   rB   r   r   r   )superr   r   r?   r@   rA   rB   r   r   r   lenr   r    r   	__class__s     r   __post_init__BlockedLayout.__post_init__:   sg   -/CDDXDX/YZ.0DTEZEZ0[\O-A$BTBT-UVG%9$**%EFN,@ARAR,STO-A$BTBT-UVK)=dnn)MN4''(D'4(()T1114%%&$...4::$&&&4$$%---4%%&$...4>>"d***r   c           	          UR                  U R                  U R                  U R                  U R                  U R
                  U R                  U R                  5      $ r   )get_blocked_layoutr?   r@   rA   rB   r   r   r   r4   s     r   r6   BlockedLayout._to_irL   sP    ))  !!JJNN
 	
r   returnc                 4   S nU" U R                   5      nU" U R                  5      nU" U R                  5      nU" U R                  5      nU" U R                  5      nU" U R
                  5      nU" U R                  5      nSU SU SU SU SU SU SU S3$ )Nc                 H    U c  gSR                  [        [        U 5      5      $ N _joinmapstrxs    r   	stringify'BlockedLayout.mangle.<locals>.stringifyY       y88CQK((r   B)r?   r@   rA   rB   r   r   r   )	r    rY   r?   r@   rA   rB   r   r   r   s	            r   r:   BlockedLayout.mangleW   s    	)
 $D$8$89$T%:%:;!$"4"45$**% !2!23!$"4"45dnn-	?#1%5$6aawaP\~]^_l^mmnoxnyyz{{r   c                    [        [        U R                  5      [        U R                  5      [        U R                  5      [        U R
                  5      U R                  (       a  [        U R                  5      OS U R                  (       a  [        U R                  5      OS U R                  (       a  [        U R                  5      45      $ S 45      $ r   )	hashtupler?   r@   rA   rB   r   r   r   r   s    r   __hash__BlockedLayout.__hash__g   s    $&&'$''($$$%$**(,(9(9E$##$t)-););E$$$%%)^^E$..!
  	 :>
  	r   r$   r&   r'   r(   r)   r*   r   int__annotations__r   r   r   r   rH   r6   rV   r:   ra   r,   __classcell__rG   s   @r   r=   r=   $   s     #Y3i99(,L(49%,)-M8DI&-%)IxS	")+$	
| | 	 	r   r=   c                   \   ^  \ rS rSr% Sr\\S'   \\S'   U 4S jrS r	S\
4S jrS	 rS
rU =r$ )SliceLayouts   z
Represents a layout corresponding to slicing a distributed tensor along one dimension.

Args:
    dim (int): The dimension index to slice.
    parent (DistributedLayout): The parent layout before slicing.
dimparentc                    > [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        g )Nrk   rl   )rD   r   r   rk   rl   r    rG   s    r   rH   SliceLayout.__post_init__   s5    E#7#ABH&:4;;&GHr   c                 l    UR                  U R                  U R                  R                  U5      5      $ r   )get_slice_layoutrk   rl   r6   r4   s     r   r6   SliceLayout._to_ir   s.    ''HHKKw'
 	
r   rM   c                 X    SU R                    SU R                  R                  5        S3$ )NSLrR   )rk   rl   r:   r   s    r   r:   SliceLayout.mangle   s)    DHH:Qt{{1134B77r   c                 D    [        U R                  U R                  45      $ r   )r_   rk   rl   r   s    r   ra   SliceLayout.__hash__   s    TXXt{{+,,r   r$   r&   r'   r(   r)   r*   rd   re   r   rH   r6   rV   r:   ra   r,   rf   rg   s   @r   ri   ri   s   s6     
HI
8 8- -r   ri   c                      ^  \ rS rSr% Sr\\\      \S'   \\\      \S'   \\\      \S'   \\\      \S'   \\   \S'   U 4S jrS	 r	S
 r
S rSrU =r$ )DistributedLinearLayout   a  
Represents a linear distributed layout with explicit bases at register, lane, warp, and block levels.
See: https://arxiv.org/abs/2505.23819 for reference.

Args:
    reg_bases (List[List[int]]): Bases for register-level distribution.
    lane_bases (List[List[int]]): Bases for lane-level distribution.
    warp_bases (List[List[int]]): Bases for warp-level distribution.
    block_bases (List[List[int]]): Bases for block-level distribution.
    shape (List[int]): The tensor global shape.
	reg_bases
lane_bases
warp_basesblock_basesshapec                   > [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R
                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [        U R                  5      nU R                   H  n[        U5      U:X  a  M   e   U R                   H  n[        U5      U:X  a  M   e   U R
                   H  n[        U5      U:X  a  M   e   U R                   H  n[        U5      U:X  a  M   e   g )Nr|   r}   r~   r   r   )	rD   r   r   r|   r}   r~   r   r   rE   )r    r   basisrG   s      r   rH   %DistributedLinearLayout.__post_init__   s   Kt~~)FGL-*HIL-*HIM=9I9I+JKG]4::%>?4::^^Eu:%%% $__Eu:%%% %__Eu:%%% %%%Eu:%%% &r   c                     UR                  U R                  U R                  U R                  U R                  U R
                  5      $ r   )get_distributed_linear_layoutr|   r}   r~   r   r   r4   s     r   r6   DistributedLinearLayout._to_ir   s<    44T^^T__VZVeVegkgwgw59ZZA 	Ar   c                     SU R                    SU R                   SU R                   SU R                   SU R                   S3$ )NDLLrR   )r|   r}   r~   r   r   r   s    r   r:   DistributedLinearLayout.mangle   sI    T^^$Adoo%6a7H$JZJZI[[\]a]g]g\hhkllr   c                 P   [        [        [        [        U R                  5      5      [        [        [        U R                  5      5      [        [        [        U R
                  5      5      [        [        [        U R                  5      5      [        U R                  5      45      $ r   )r_   r`   rU   r|   r}   r~   r   r   r   s    r   ra    DistributedLinearLayout.__hash__   sn    #eT^^,-#eT__-.#eT__-.#eT--./$**
  	r   r$   )r&   r'   r(   r)   r*   r   rd   re   rH   r6   r:   ra   r,   rf   rg   s   @r   rz   rz      sg    
 DIT#YT#Yd3i 9&$Am r   rz   c                   f   ^  \ rS rSr% Sr\\S'   \\S'   \\S'   U 4S jrS r	S\
4S	 jrS
 rSrU =r$ )DotOperandLayout   z
Represents a layout for a dot operand.

Args:
    operand_index (int): 0 for LHS and 1 for RHS of the dot operation.
    parent (DistributedLayout): The parent layout, representing the MMA.
    k_width (int): Number of elements per 32-bits.
operand_indexrl   k_widthc                    > [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R
                  5      5        g )Nr   rl   r   )rD   r   r   r   rl   r   rn   s    r   rH   DotOperandLayout.__post_init__   sP    O-A$BTBT-UVH&:4;;&GHI';DLL'IJr   c                     UR                  U R                  U R                  R                  U5      U R                  5      $ r   )get_dot_operand_layoutr   rl   r6   r   r4   s     r   r6   DotOperandLayout._to_ir   s4    --d.@.@$++BTBTU\B]_c_k_kllr   rM   c                 r    SU R                    SU R                  R                  5        SU R                   S3$ )NDOrR   )r   rl   r:   r   r   s    r   r:   DotOperandLayout.mangle   s6    D&&'q););)=(>a~RPPr   c                 Z    [        U R                  U R                  U R                  45      $ r   )r_   r   rl   r   r   s    r   ra   DotOperandLayout.__hash__   s"    T''dllCDDr   r$   rx   rg   s   @r   r   r      sA     LK
mQ QE Er   r   )r.   eqc                      ^  \ rS rSr% Sr\\   \S'   \\   \S'   \\   \S'   Sr\	\\      \S'   Sr
\	\\      \S'   Sr\	\\      \S	'   U 4S
 jrS rS\4S jrS rSrU =r$ )NVMMADistributedLayout   a  
Represents a layout for NVIDIA MMA (tensor core) operations.

Args:
    version (List[int]): Version identifier for the MMA instruction.
    warps_per_cta (List[int]): Number of warps per CTA.
    instr_shape (List[int]): Instruction shape for MMA.
    ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
    cta_split_num (Optional[List[int]]): Split factors for CTAs.
    cta_order (Optional[List[int]]): CTA ordering.
versionrA   instr_shapeNr   r   r   c                   > [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R
                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [        U R                  5      n[        X5        [        U R                  5      U:X  d   e[        U R                  5      U:X  d   e[        U R                  5      U:X  d   eg )Nr   rA   r   r   r   r   )rD   r   r   r   rA   r   r   r   r   rE   r   rF   s     r   rH   $NVMMADistributedLayout.__post_init__   s   I';DLL'IJO-A$BTBT-UVM+?@P@P+QRN,@ARAR,STO-A$BTBT-UVK)=dnn)MN4%%&D'4$$%---4%%&$...4>>"d***r   c                     UR                  U R                  U R                  U R                  U R                  U R
                  U R                  5      $ r   )get_mma_layoutr   rA   r   r   r   r   r4   s     r   r6   NVMMADistributedLayout._to_ir  sE    %%dllD4F4FHYHY[_[m[m&*nnd6F6FH 	Hr   rM   c                     SU R                    SU R                   SU R                   SU R                   SU R                   SU R
                   S3$ )NMMA_rR   _MMA)r   rA   r   r   r   r   r   s    r   r:   NVMMADistributedLayout.mangle	  sq    dll^1T%7%7$8$:J:J9K1TM^M^L__`aeasas`ttuvz  wE  wE  vF  FJ  K  	Kr   c           
         [        [        U R                  5      [        U R                  5      [        U R                  5      U R
                  (       a  [        U R
                  5      OS U R                  (       a  [        U R                  5      OS U R                  (       a  [        U R                  5      45      $ S 45      $ r   )r_   r`   r   rA   r   r   r   r   r   s    r   ra   NVMMADistributedLayout.__hash__  s    U4<<(%0B0B*C4++,$J[J[eD4E4E.Fae262D2DU4--.$.2nnU4>>*H I 	I CGH I 	Ir   r$   rc   rg   s   @r   r   r      s    
 #Y9c(,L(49%,)-M8DI&-%)IxS	")+HK KI Ir   r   c                   (    \ rS rSrSr\S 5       rSrg)SharedLayouti  z3
Base class for shared memory layouts in Gluon IR.
c                     [        U 5      $ r   r   r   s    r   r!   SharedLayout.type  r#   r   r$   Nr%   r$   r   r   r   r     r-   r   r   c                     U nUbC  [        U5      [        U 5      :X  d   e[        [        U5      5       H  nX#==   X   -  ss'   M     U$ r   )rE   r   )r   r   shape_per_ctark   s       r   _get_shape_per_ctar     sO    M =!SZ///]+,C-"44 -r   c                      ^  \ rS rSr% Sr\\S'   \\S'   \\S'   Sr\\S'   Sr	\\S'   S	r
\\\      \S
'   S	r\\\      \S'   S	r\\\      \S'   U 4S jrS r\\  SS j5       5       rS\4S jrS rSrU =r$ )NVMMASharedLayouti'  a  
Represents a layout for shared memory suitable for NVIDIA MMA operations.

Args:
    swizzle_byte_width (int): Width in bytes for swizzling.
    element_bitwidth (int): Bitwidth of element type.
    rank (int): Rank of the tensor.
    transposed (bool): Whether the layout is transposed.
    fp4_padded (bool): Whether FP4 padding is used.
    ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
    cta_split_num (Optional[List[int]]): Split factors for CTAs.
    cta_order (Optional[List[int]]): CTA ordering.
swizzle_byte_widthelement_bitwidthr   F
transposed
fp4_paddedNr   r   r   c                 N  > [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R
                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        U R                  S	;   d   eU R                  S
;   d   eU R
                  n[        X5        [        U R                  5      U:X  d   e[        U R                  5      U:X  d   e[        U R                  5      U:X  d   eg )Nr   r   r   r   r   r   r   r   )   r       @   )r   r   r      )rD   r   r   r   r   r   r   r   r   r   r   r   rE   rF   s     r   rH   NVMMASharedLayout.__post_init__?  sX   02FtG^G^2_`.0DTEZEZ0[\F$8$CDL*>t*OPL*>t*OPN,@ARAR,STO-A$BTBT-UVK)=dnn)MN$$777&&*::::yyD'4$$%---4%%&$...4>>"d***r   c           	          UR                  U R                  U R                  U R                  U R                  U R
                  U R                  U R                  5      $ r   )get_nvmma_shared_layoutr   r   r   r   r   r   r   r4   s     r   r6   NVMMASharedLayout._to_irQ  sN    ..##!!OOOONN
 	
r   c                    U(       a  SOSn[        X5      n[        U 5      n	U(       a  USS USS -   nUS   U-  n
XR                  -  S-  nUS:  a  US-  S:X  a  SnO&US:  a  US-  S:X  a  SnOUS	:  a  US	-  S:X  a  S	nOSnSnUSS  H  nX-  nM	     [        U 5      S:  d  US:  a  Sn[        UUR                  U	UUUUUS
9$ )zReturns an NVMMASharedLayout with default swizzling for a given shape.

This picks the largest swizzle pattern compatible with the shape, which
allows emitting the fewest TMA or MMA messages.
   r   Nr   r   r   r   r   )r   r   r   r   r   r   r   r   )r   rE   primitive_bitwidthr   )block_shapedtyper   r   r   r   r   packing_factorr   r   contig_dim_sizecontig_dim_bytesr   flatten_outer_dimsizes                  r   get_default_for!NVMMASharedLayout.get_default_for\  s    )a*;F;)!"-bq0AAM'+n<*-E-EEJs"'7#'='B!$#(82(=(B!##(82(=(B!#!"!#2&D% '{a#4q#8!" 1"55!!%'	
 		
r   rM   c           	      p    SU R                    SU R                   SU R                   SU R                   S3	$ )NNVMMA_rR   _NVMMA)r   r   r   r   r   s    r   r:   NVMMASharedLayout.mangle  s@    //0$2G2G1H$//IZZ[\`\k\k[llrssr   c                 z   [        U R                  U R                  U R                  U R                  U R
                  U R                  (       a  [        U R                  5      OS U R                  (       a  [        U R                  5      OS U R                  (       a  [        U R                  5      45      $ S 45      $ r   )
r_   r   r   r   r   r   r   r`   r   r   r   s    r   ra   NVMMASharedLayout.__hash__  s    T,,d.C.CTYYPTP_P_aeapap151B1BU4,,-262D2DU4--.$.2nnU4>>*H I 	I CGH I 	Ir   r$   )FFNNN)r&   r'   r(   r)   r*   rd   re   r   boolr   r   r   r   r   r   rH   r6   staticmethodr	   r   rV   r:   ra   r,   rf   rg   s   @r   r   r   '  s     
IJJ(,L(49%,)-M8DI&-%)IxS	")+$	
 qu"&&
  &
Pt tI Ir   r   c                      ^  \ rS rSr% Sr\\S'   \\S'   \\S'   \\   \S'   Sr\	\\      \S'   Sr
\	\\      \S	'   Sr\	\\      \S
'   U 4S jrS rS\4S jrS rSrU =r$ )SwizzledSharedLayouti  a  
Represents a generic swizzled shared memory layout.

Args:
    vec (int): Vector width for swizzling.
    per_phase (int): Elements per swizzle phase.
    max_phase (int): Maximum number of swizzle phases.
    order (List[int]): Dimension ordering for swizzling.
    ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
    cta_split_num (Optional[List[int]]): Split factors for CTAs.
    cta_order (Optional[List[int]]): CTA ordering.
vec	per_phase	max_phaserB   Nr   r   r   c                   > [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R
                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [         TU ]  S[        U R                  5      5        [        U R                  5      n[        X5        [        U R                  5      U:X  d   e[        U R                  5      U:X  d   e[        U R                  5      U:X  d   eg )Nr   r   r   rB   r   r   r   )rD   r   r   r   r   r   rB   r   r   r   rE   r   rF   s     r   rH   "SwizzledSharedLayout.__post_init__  s   E#7#ABK)=dnn)MNK)=dnn)MNG%9$**%EFN,@ARAR,STO-A$BTBT-UVK)=dnn)MN4::D'4$$%---4%%&$...4>>"d***r   c           	          UR                  U R                  U R                  U R                  U R                  U R
                  U R                  U R                  5      $ r   )get_swizzled_shared_layoutr   r   r   rB   r   r   r   r4   s     r   r6   SwizzledSharedLayout._to_ir  sJ    11HHNNNNJJNN
 	
r   rM   c                     S nSU R                    SU R                   SU R                   SU" U R                  5       SU" U R                  5       SU" U R
                  5       SU" U R                  5       S3$ )Nc                 H    U c  gSR                  [        [        U 5      5      $ rP   rS   rW   s    r   rY   .SwizzledSharedLayout.mangle.<locals>.stringify  r[   r   SSS_rR   _SSS)r   r   r   rB   r   r   r   r    rY   s     r   r:   SwizzledSharedLayout.mangle  s    	)
 dhhZq 0$..1A9TZZCXBYYZ[deievev[wZxxy  {D  EI  EW  EW  {X  zY  YZ  [d  ei  es  es  [t  Zu  uy  z  	zr   c                 v   [        U R                  U R                  U R                  [	        U R
                  5      U R                  (       a  [	        U R                  5      OS U R                  (       a  [	        U R                  5      OS U R                  (       a  [	        U R                  5      45      $ S 45      $ r   )	r_   r   r   r   r`   rB   r   r   r   r   s    r   ra   SwizzledSharedLayout.__hash__  s    TXXt~~t~~4::&DDUDUd.?.?(@[_262D2DU4--.$.2nnU4>>*H I 	I CGH I 	Ir   r$   )r&   r'   r(   r)   r*   rd   re   r   r   r   r   r   rH   r6   rV   r:   ra   r,   rf   rg   s   @r   r   r     s     
HNN9(,L(49%,)-M8DI&-%)IxS	")+	
z zI Ir   r   c                      ^  \ rS rSr% Sr\\\      \S'   \\   \S'   Sr\	\\      \S'   Sr
\	\\      \S'   Sr\	\\      \S'   U 4S	 jrS
 rS\4S jrS rS rSrU =r$ )PaddedSharedLayouti  a6  
Represents a layout for the access to shared memory. Compared to SwizzledSharedLayout,
it uses padding to avoid shared memory bank conflicts. After every interval tensor elements,
the corresponding number of padding elements are inserted.
If a position corresponds to multiple intervals, the padding amounts are summed.

In the following example of a tensor,
`eM` represents original elements in the and `pN` represents padded element.

Before padding, the shared memory looks like:
[e0, e1,
 e2, e3,
 e4, e5,
 e6, e7,
 ...]

After padding with interval-padding list [[2, 1], [4, 2]],
the shared memory will be
[e0, e1, p0,
 e2, e3, p1, p2, p3,
 e4, e5, p4,
 e6, e7, p5, p6, p7,
 ...]

Args:
    interval_padding_pairs (List[int]): List of [interval, padding] pair and both interval and padding must be powers of 2.
    order (List[int]): Order of logical tensor dimensions; fastest-varying first.
    ctas_per_cga (Optional[List[int]]): CTAs per CGA grouping.
    cta_split_num (Optional[List[int]]): Split factors for CTAs.
    cta_order (Optional[List[int]]): CTA ordering.
interval_padding_pairsrB   Nr   r   r   c                   > [         TU ]  S[        U R                  5      5        [         TU ]  S[	        U R
                  5      5        [         TU ]  S[	        U R                  5      5        [         TU ]  S[	        U R                  5      5        [         TU ]  S[	        U R                  5      5        U R                  5         g )Nr   rB   r   r   r   )
rD   r   r   r   r   rB   r   r   r   verifyrn   s    r   rH    PaddedSharedLayout.__post_init__  s    4mDD_D_6`aG%9$**%EFN,@ARAR,STO-A$BTBT-UVK)=dnn)MNr   c                     [        U R                  6 u  p#UR                  X#U R                  U R                  U R
                  U R                  5      $ r   )zipr   get_padded_shared_layoutrB   r   r   r   )r    r5   	intervalspaddingss       r   r6   PaddedSharedLayout._to_ir   sK    !4#>#>?	//	TZZQUQbQbdhdvdv04@ 	@r   rM   c                     S nSU" U R                   5       SU" U R                  5       SU" U R                  5       SU" U R                  5       SU" U R                  5       S3$ )Nc                 H    U c  gSR                  [        [        U 5      5      $ rP   rS   rW   s    r   rY   ,PaddedSharedLayout.mangle.<locals>.stringify  r[   r   PaddedShared_rR   _PaddedShared)r   rB   r   r   r   r   s     r   r:   PaddedSharedLayout.mangle  s    	)
 y)D)DEFa	RVR\R\H]G^^_`ijnj{j{`|_}}~  @I  JN  J\  J\  @]  ^  ^_  `i  jn  jx  jx  `y  _z  zG  H  	Hr   c                 r  ^ U R                   n[        U5      S:  d   S5       e[        S U 5       5      (       d   e[        U6 u  p#[	        [        U5      5      n[        U5      [        U5      :X  d   eS m[        U4S jU 5       5      (       d   S5       e[        U4S jU 5       5      (       d   S5       e[        U R                  5      nUS:  d   S	5       e[        X5        [        U R                  5      U:X  d   e[        U R                  5      U:X  d   e[        U R                  5      U:X  d   eg )
Nr   zVPaddedSharedLayout interval_padding_pairs must have at least one interval-padding pairc              3   >   #    U  H  n[        U5      S :H  v   M     g7f)r   N)rE   ).0pairs     r   	<genexpr>,PaddedSharedLayout.verify.<locals>.<genexpr>  s     4ed3t9>es   c                 .    U S:  =(       a
    X S-
  -  S:H  $ )Nr   r   r$   )ns    r   <lambda>+PaddedSharedLayout.verify.<locals>.<lambda>  s    !a%"<AQK1,<"<r   c              3   4   >#    U  H  nT" U5      v   M     g 7fr   r$   r   r  is_power_of_2s     r   r   r    s     7Y=##Y   z;PaddedSharedLayout interval values must all be power of twoc              3   4   >#    U  H  nT" U5      v   M     g 7fr   r$   r  s     r   r   r    s     6X=##Xr	  z:PaddedSharedLayout padding values must all be power of twoz*PaddedSharedLayout order must not be empty)r   rE   allr   r   setrB   r   r   r   r   )r    pairsr   r   unique_intervalsr   r  s         @r   r   PaddedSharedLayout.verify  s   ++5zA~www~4e44444!5k	I/#$I666<7Y777v9vv76X666t8tt64::axEEExD'4$$%---4%%&$...4>>"d***r   c           	      x   [        [        [        [        U R                  5      5      [        U R                  5      U R
                  (       a  [        U R
                  5      OS U R                  (       a  [        U R                  5      OS U R                  (       a  [        U R                  5      45      $ S 45      $ r   )r_   r`   rU   r   rB   r   r   r   r   s    r   ra   PaddedSharedLayout.__hash__#  s    U3ud&A&ABC4::&DDUDUd.?.?(@[_262D2DU4--.$.2nnU4>>*H I 	I CGH I 	Ir   r$   )r&   r'   r(   r)   r*   r   rd   re   r   r   r   r   rH   r6   rV   r:   r   ra   r,   rf   rg   s   @r   r   r     s    > !cO+9(,L(49%,)-M8DI&-%)IxS	")@
H H+*I Ir   r   c                     S/U-  nU (       d  U$ S nU  HL  n[        S [        U5       5       S 5      nUb  UnX6==   S-  ss'   M2  U(       a  M;  Uc   eX4==   S-  ss'   MN     U$ )Nr   c              3   :   #    U  H  u  pUS :w  d  M  Uv   M     g7f)r   Nr$   )r   ivs      r   r    bases_per_dim.<locals>.<genexpr>5  s     ="2$!a1fAA"2s   	r   )next	enumerate)basesr   skip_broadcastresultnon_zero_idxr   idxs          r   bases_per_dimr  +  sw    S4ZFL=)E"2=tD?LK1K+++ A%   Mr   c                     [        U [        5      (       a  [        U R                  [	        U5      5      $ [        U [
        [        45      (       a  [        U R                  U5      $ U R                  $ r   )	
isinstancerz   r  r~   rE   ri   r   rA   rl   )r   r   s     r   rA   rA   A  sW    &122V..E
;;	F[*:;	<	<V]]E22###r   N)T)dataclassesr   typingr   r   triton.language.corer   r   r   triton.runtime.jitr	   r   r   r0   r=   ri   rz   r   r   r   r   r   r   r   r  rA   r$   r   r   <module>r%     s   ! ! T T 17$ $ $"   $K% K K\ $-# - -8 $2/ 2 2j $E( E E8 $4 ,I. ,I !,I^$ $   $eI eI eIP $4 <I< <I !<I~ $4 VI VI !VIt,$r   