
    9i;%                   V   % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKJrJrJrJrJr  S SKJrJr  S SKJr  S SKJr  S SKJrJrJrJrJrJrJrJrJrJ r J!r!J"r"  S SK#J$r$J%r%J&r&J'r'J(r(J)r)J*r*  S S	K+J,r,  S SK-r-S S
K-J.r.J/r/J0r0  S SK1J2s  J3s  J4r5  S SK6J7s  J8r9  S SK:r;S SK<r;S SK=J8s  J>r?  S SK@JArA  S SKBJCrC  S SKDJErE  S SKFJGrG  S SKHJIrI  S SKJJKrKJLrLJMrMJNrNJOrO  S SKPJQrQ  S SKRJSrSJTrTJUrUJVrVJWrWJXrXJYrYJZrZJ[r[  S SK\J]r]  S SK^J_r_  S SK`JaraJbrbJcrc  S SKdJere  SSKfJgrgJhrh  SSKiJjrjJkrkJlrlJmrmJnrn  SSKhJoroJprpJqrqJrrrJsrs  SSKtJuru  SSKvJwrwJxrxJyryJzrz  SSK{J|r|  SSK}J~r~Jr  SSK8JrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJr  SS KJrJrJr  \ (       a+  S S!KJr  S S"KRJr  S S#K\Jr  SS$KJr  SS%KJr  SS&KJr  SS'K8Jr  O\rS(\S)'    S SKr\GRP                  rS*r\'" S,5      r\!" S-5      r\!" S.5      r\!" S/5      r\"\\.4   rS(\S0'   \"\\\.4   rS(\S1'   \"\;GRh                  GRj                  \;GRh                  GRl                  4   rS(\S2'   \GRp                  " \5      r\R*                  " \	GRv                  S3S49r\;GR.                  GRx                  r\" \GRz                  " S5S65      5      r\" \GRz                  " S7S85      5      r \"\S9\\S94   S:S;\\\"\\\S94   S9S:S;4         4   rS(\S<'   GS*S= jr\GR                  " S*S>9 " S? S@5      5       rGS+SA jrGS,SB jrGS-SC jrGS-SD jr      GS.SE jr/ SFQr/ SGQr GS/     GS0SH jjrGS1SI jr GS/     GS0SJ jjr\GS2GS3SK jj5       r\GS2GS4SL jj5       r GS2     GS5SM jjr    GS6SN jr    GS7SO jrGS8SP jrGS8SQ jr      GS9SR jr        GS:SS jr      GS;ST jrGS<SU jrGS=SV jr " SW S;5      r\" S+S>9 " SX SY5      5       r\ " SZ S[\5      5       rGS>S\ jr\ " S] S^\5      5       r\ " S_ S`\5      5       r\" Sa5      \" Sb5      \" Sc5      \" Sd5      \" Se5      \" Sf5      Sg.rSh\Si'    GS2       GS?Sj jjr\ " Sk Sl\5      5       rS\/" S 5      4       GS@Sm jjr\\\.   \\.   /\4   rS(\Sn'    " So Sp\5      r " Sq Sr\5      r " Ss St\5      r\ " Su Sv\5      5       r\ " Sw Sx\5      5       r\ " Sy Sz\5      5       rGSAS{ jrGSAS| jr     GSB             GSCS} jjr      GSDS~ jrGSES jr\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r " S S\5      r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r      GSFS jrGSGS jr " S S5      r\ " S S\5      5       r " S S\5      Gr  " S S\5      Gr " S S\5      Gr " S S\5      Gr " S SG\ 5      Gr\ " S S\5      5       Gr " S S\5      Gr\" S+S>9 " S S\\k5      5       Gr\" S+S>9 " S SG\\5      5       Gr " S SG\5      Gr	 " S SG\	5      Gr
 " S SG\	5      Gr\ " S S\5      5       Gr\ " S S\5      5       Gr\" S+S>9 " S SG\5      5       Gr " S SG\5      Gr " S SG\5      Gr\"\\G\\G\\"\\\G\4      4   Gr " S S5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr    GSHS jGr\" S+S>9 " S SG\5      5       Gr " S SG\5      Gr " S SG\5      Gr\" S+S>9 " S SG\5      5       Gr\" S+S>9 " S SG\5      5       Gr " S SG\5      Gr  " S SG\5      Gr! " S SG\5      Gr" " S SG\5      Gr# " S SG\#5      Gr$ " S SG\#5      Gr% " S SG\5      Gr& " S SG\5      Gr' " S SG\5      Gr( " S SG\5      Gr) " S SG\5      Gr* " S SG\*5      Gr+ " S SG\!5      Gr, " S SG\5      Gr- " S SG\5      Gr. " S SG\5      Gr/ " S SG\5      Gr0 " S SG\5      Gr1 " S SG\5      Gr2\" S+S>9 " S S5      5       Gr3 " S GS G\!5      Gr4\" S+S>9 " GS GSG\45      5       Gr5 " GS GSG\45      Gr6\ " GS GS\5      5       Gr7 " GS GSG\5      Gr8\GR                   " GS	 GS
\5      5       Gr9 " GS S9G\95      Gr: " GS GSG\95      Gr;\" S+S>9 " GS GS\5      5       Gr<GSIGS jGr=\" S+S>9 " GS GSG\5      5       Gr>\" S+S>9 " GS GSG\5      5       Gr?    GSJGS jGr@\" S+S>9 " GS GSG\5      5       GrA " GS GSG\45      GrB " GS GS\5      GrC\ " GS GSG\C5      5       GrD\ " GS GSG\C5      5       GrE " GS  GS!G\45      GrF " GS" GS#G\F5      GrG " GS$ GS%G\F5      GrH " GS& GS'G\F5      GrIGSKGS( jGrJGSKGS) jGrKg! \ a    SrS+r GNf = f(L      )annotationsN)	Container	GeneratorIterableIteratorSequence)AbstractContextManagernullcontext)Enum)partial)AnyCallablecastClassVarLiteralOptionaloverloadSupportsFloatSupportsIntTYPE_CHECKINGTypeVarUnion)assert_neverNeveroverride	ParamSpecSelf	TypeAliasTypeIs)patch)ExprIntegerSymbol)identity)GraphModuleSerializer)can_auto_functionalize)metricsget_free_symbols)compute_required_storage_lengthis_boolean_dtypeis_float_dtypemake_channels_last_strides_for
StrideType)get_schema_info)	&_remove_effect_token_unbacked_bindingscompute_unbacked_bindingsfree_symbolsfree_unbacked_symbolsIterateExprsrebind_unbackedresolve_unbacked_bindingsShapeEnvSymTypes)Node
OrderedSet)CleanDivFloorDivModularIndexing)SymT   )configdependencies)BackendFeatureCodegenSymbolget_scheduling_for_deviceindex_prevent_reorderingKernel)Depextract_free_symbols#extract_input_node_reduction_rangesextract_read_writesvar_builder)LoopBody)OpCounterCSEOpCountResultReductionType	StoreMode)benchmarker)DevicePropertiesReductionHint)argsortargsort_symcache_on_selfcache_on_self_and_argsceildivconvert_shape_to_inductorconvert_shape_to_symintdeveloper_warningdo_bench_using_profilingdtype_from_sizeget_dtype_sizeget_kernel_metadataGPU_ALIGN_BYTESir_dataclass
is_dynamicis_gpu	sympy_dotsympy_index_symbolsympy_index_symbol_with_prefixsympy_product
sympy_substensor_is_aligned)opsOpsValueV)FakeScriptObject)SympyBoolean)Argument)CUDATemplate)PythonWrapperCodegen)GraphLowering)IndentedBufferr   rq   TF_P_T_U_V_IntLike_NumLike_OpOverloadsz  prefixTORCH_AUTOTUNE_WARMUP   TORCH_AUTOTUNE_REPd   	TensorBoxr#   IRNode_NodeOrNodesc                .    [        U [        [        45      $ N)
isinstanceintr"   xs    R/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/torch/_inductor/ir.py
_is_staticr      s    a#w((    )frozenc                  R    \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S
\S'   S\S'   Srg)GraphPartitionSignature   OrderedSet[sympy.Symbol]symbol_inputsz5dict[str, Union[IRNode, sympy.Expr, TorchBindObject]]input_nodeslist[IRNode]output_nodeszdict[str, bool]input_deallocationboolskip_cudagraph	list[str]constant_names N__name__
__module____qualname____firstlineno____annotations____static_attributes__r   r   r   r   r      s/     ,+ GF (' r   r   c                &   ^ SU4S jjmT" U 5        g )Nc                  > U c  g [        U [        [        45      (       a  U  H  nT" U5        M     g [        U [        5      (       a   U R	                  5        H  nT" U5        M     g [        U [
        [        [        [        [        R                  R                  R                  [        [        [        [         4	5      (       d   S[#        U 5       S35       eg )NzFound zE, which is not a supported top level IR node. See [Note: Inductor IR])r   listtupledictvalues
ExpandViewDynamicScalarAssertScalarr   sympylogicboolalgBooleanr!   r   EffectfulKernelShapeAsConstantBuffertype)nodesnode_check_tensorboxs     r   r   %validate_ir.<locals>._check_tensorbox   s     =e}-- & t$$ & ' ! KK''//#)
   e%jk r   )r   Optional[_NodeOrNodes]returnNoner   )node_or_nodesr   s    @r   validate_irr      s    < ]#r   c                b   ^  [        T [        5      (       d   [        T 5      5       eSU 4S jjnU$ )Nc                 0   > [        [        T5      " U 0 UD6$ r   )getattrrk   )argskwargsnames     r   fnops_wrapper.<locals>.fn  s    sD!42622r   )r   objectr   r   r   rl   )r   strr   )r   r   s   ` r   ops_wrapperr     s+    dC  ,$t*, 3 Ir   c           
     f   ^ [        [        U [        [        U 5      5      5      5      mSU4S jjnU$ )Nc                   > [        U 5      [        T5      :X  d   e[        [        U 5      5       Vs/ s H
  oTU      PM     sn$ s  snf r   lenrange)indexi	inv_orders     r   reindex inverse_reorder.<locals>.reindex   sC    5zS^+++-23u:->?->il#->???   Ar   Sequence[_T]r   r   )r   zipr   r   )orderr   r   s     @r   inverse_reorderr     s*    Sc%j 123I@ Nr   c                   ^  SU 4S jjnU$ )Nc                   > [        U 5      [        T5      :X  d   e[        [        U 5      5       Vs/ s H
  oTU      PM     sn$ s  snf r   r   )r   r   r   s     r   r   same_reorder.<locals>.reindex(  sB    5zSZ''').s5z):;):AeAh):;;;r   r   r   )r   r   s   ` r   same_reorderr   '  s    < Nr   c                   ^ ^ SU U4S jjnU$ )Nc                    > T" T" U 5      5      $ r   r   )r   reindex1reindex2s    r   r    fuse_reindexing.<locals>.reindex3  s    ((r   )r   r   r   zSequence[_V]r   )r   r   r   s   `` r   fuse_reindexingr   /  s    ) ) Nr   )   r      r@   )   r   r   r   r@   c                j    Ub  [        S U  5       5      (       a  [        U 5      nU$ [        X5      nU$ )z)
Convert strides to fill order (argsort)
c              3  b   #    U  H%  n[        U[        [        R                  45      v   M'     g 7fr   )r   r   r   r"   .0ss     r   	<genexpr>!get_fill_order.<locals>.<genexpr>C  s#     QS
1sEMM.B C CSs   -/)allrU   rV   )seq	shape_env
sorted_idxs      r   get_fill_orderr   =  s=     CQSQQQ$+CL
  !0
r   c                    [        U 5       VVs0 s H  u  pX!_M	     nnn[        [        U 5      5       Vs/ s H  oCU   PM	     nnU$ s  snnf s  snf )zx
Convert stride order to fill order
For channel last format,

stride order = [3, 0, 2, 1] and fill order = [1, 3, 2, 0]
)	enumerater   r   )r   idxposlookupr   
fill_orders         r   stride_order2fill_orderr   K  sR     (1'78'783ch'7F8%*3u:%67%6)%6J7 97s
   AAc                    [        X5      n[        [        U 5      5       Vs/ s H  nSPM     nn[        U5       H	  u  pVXTU'   M     U$ s  snf )z!
Convert strides to stride order
r   )r   r   r   r   )r   r   r   _outr   elems          r   get_stride_orderr   W  sL     !/s >JCHo
&o1oC
&Z(D	 )J 's   A
c                    g r   r   r   guard_shapes     r   ir_node_to_tensorr   d  s    KNr   c                    g r   r   r   s     r   r   r   h  s    LOr   c                   U c  g U(       d%  [         R                  R                  R                  nO[        nU R                  5        Vs/ s H
  o2" U5      PM     nn[        U 5      (       a0  U R                  5       R                   Vs/ s H
  o2" U5      PM     nnO[        R                  U5      nU R                  5       nU R                  5       n[        U5      n[        U5      n[         R                  R                  R                  R                  5          [         R"                  " XEXgS9R%                  5       nS S S 5        U$ s  snf s  snf ! , (       d  f       W$ = f)N)sizestridedtypedevice)rm   graphsizevars	size_hintr$   get_sizeis_storage_and_layout
get_layoutr  FlexibleLayoutcontiguous_strides	get_dtype
get_devicer[   r   suppress_guardstorchempty_stridedzero_)	r   r   shape_fnr   r  r  r  r  ts	            r   r   r   l  s    	y 77##--!".AHQKD.Q'(||~'<'<='<!(1+'<=2248KKME\\^F"4(D$V,F	
			#	#	3	3	5E

%' 	
 
6 H / > 
6	5 Hs   	EE$E
E c                D    [        U [        5      (       a
  U (       d  S /$ U $ r   )r   r   values    r   may_convert_to_optionalr    s!     %u vLr   c                @   [        U [        5      (       d  U c  U $ [        U [        R                  5      (       a  U R                  $ [        U [
        [        45      (       a  [        U R                  5       5      $ [        SU  S[	        U 5      R                   S35        g )Nzget_device_type(: ))r   r   r  r  r   r   
OutputSpecget_device_typer  r   r   r   s    r   r  r    sz     !SQY	Au||	$	$vv	A
+	,	,q||~..#A3ba)9)9(:!<=r   c                    [        U 5      nUS;   a  [        [        U S35      S:X  a  ggUb  [        U5      =nc  gSSKJn  [        U[        5      (       d   [        U5      5       e[        X#5      $ )N)cpucuda_backendtritonTFr@   )TritonScheduling)	r  r   rA   rE   codegen.tritonr%  r   r   
issubclass)r   r  device_schedulingr%  s       r   	is_tritonr)    sy    QF  6fXX./8;!:6!BBK0'..G5F0GG.'::r   c                    [        U 5      S:H  $ )Nr!  )r  r   s    r   is_cpur+    s    1&&r   c           	     V  ^ ^ [        T [        5      (       aM  T R                  5       b<  [        T R	                  5       5      (       d  [        T R                  5       5      (       a  g[        UU 4S j[        [        T R	                  5       5      S-
  5       5       5      n[        R                  R                  R                  T R	                  5       S   5      S:H  =(       d=    [        R                  R                  R                  T R                  5       S   5      S:*  nU=(       a    U$ )NFc              3     >#    U  HE  n[         R                  R                  R                  TR	                  5       U   5      T-  S :H  v   MG     g7f)r   N)rm   r  r  size_hint_or_throw
get_stride)r   r   	alignmentr   s     r   r   2is_aligned_realized_tensor_hint.<locals>.<genexpr>  sC      /A 
			,	,Q\\^A->	?)	KPQQ/s   AAr@   )r   r   maybe_get_strider3   r/  r
  r   r   r   rm   r  r  r.  )r   r0  aligned_stridesaligned_last_dims   ``  r   is_aligned_realized_tensor_hintr6    s    
 q&!!' 00 .. s1<<>*Q./ O 	
++ALLN2,>?1D 	F77..qzz|B/?@AE  //r   c                   [        U5      [        U 5      :X  a  [        U 5      [        U5      :X  d   e[        X U5       H  u  p4n[        R                  R                  R                  US5      (       a  M7  [        R                  R                  R                  XE5      (       a  Mg  [        R                  R                  R                  U5      [        R                  R                  R                  U5      :X  a  M    g   g)zH
Returns true if the strides are equal, ignoring dimensions of size 1 .
r@   FT)r   r   rm   r  r  statically_known_leqstatically_known_equalssymbolic_hint)strides1strides2shapedims1s2s         r   significant_strides_equalrA    s     u:X&3x=CM+III5H57700a88ww77
 
''""0048H8H8V8V9
 
  6 r   c                t   [        U 5      (       d  U $ [        S [        XR                  5       5       5       5      (       a  U $ [	        XR                  5       U R                  5       5      (       d  U $ [        U 5      u  p#/ UR                  Qn[        U R                  5       5       H<  u  pV[        R                  R                  R                  US5      (       d  M6  X   XE'   M>     [        UR                  UR                  UR                   UUR"                  UR$                  5      n['        [)        X'S95      $ )a  
Tries to match the strides of the tensor to those in the meta_strides. Strides of insignificant
dimensions - size 0 or 1 - will be updated.

If there are real stride differences (NHWC vs NCHW), or the tensor is not realized, then the input will be returned
c              3  x   #    U  H0  u  p[         R                  R                  R                  X5      v   M2     g 7fr   rm   r  r  r9  r   r?  r@  s      r   r   2try_match_insignificant_strides.<locals>.<genexpr>  s1      7FB 	
00887   8:r@   datalayout)r  r   r   r/  rA  r
  as_storage_and_layoutr  r   rm   r  r  r8  FixedLayoutr  r  r  offset	is_pinnedr   ReinterpretView)tensorstridesstorage
old_layout
new_strider   r   
new_layouts           r   try_match_insignificant_stridesrV    s    !((
 '#4#4#67   $W.?.?.A6??CTUU/7G%:$$%J&//+,7700A66#JJM - J _'EFFr   c                    U R                   R                  SS9S   n[        UR                  5       VVs/ s H  u  p#UPM	     snnUR                  S'   SSKJn  U" U 5        g s  snnf )Noutput)opr   user_visible_output_idxs)record_original_output_strides)r  
find_nodesr   r   metatorch._inductor.compile_fxr[  )gmoutput_noder   r   r[  s        r   gm_original_output_stridesra    sd    ((%%%215K#K$4$454554K/0 J"2&4s   A#c                    [        5       nU  H9  nU[        UR                  5       SS9-  nU[        UR                  5       SS9-  nM;     [	        U5      $ )NFunbacked_only)r;   r)   r
  r/  r   )inputssym_varsinps      r   get_symbolic_inputsrh    sP    !+H$S\\^5II$S^^%5UKK  >r   c                     \ rS rSr% Sr\" 5       rS\S'   \R                  " SS9r
S\S'   \R                  " SS9rS	\S
'   \R                  " SS9rS\S'   \\R                  SIS j5       5       r\SJS j5       rSKS jrSLS jrSMS jrSNS jrSOS jrSPS jrSMS jrSQSRS jjr SS       STS jjrSUS jrSVS jrSWS jrSXS jrSYS jrSZS jr S[S jr!S\S jr"S]S  jr#\$S^S! j5       r%S_S" jr&S[S# jr'S`S$ jr(SaSbS& jjr)ScS' jr*SdS( jr+S[S) jr,SeS* jr-SfS+ jr.SgS, jr/S]S- jr0ShS. jr1S`S/ jr2S[S0 jr3SaSiS1 jjr4SjS2 jr5SLS3 jr6SkS4 jr7SLS5 jr8 Sl     SmS6 jjr9SnS7 jr:SoS8 jr; Sl     SpS9 jjr<SqS: jr=SrS; jr>SsS< jr?StS= jr@ Sl   SuS> jjrAS`S? jrBS\S@ jrCS[SA jrDS[SB jrESvSC jrFSwSD jrGShSE jrHSwSF jrI\J(       a  \$SUSG j5       rKSHrLg%SHrLg%)xr   i  zBase class for all intermediate representation (IR) nodes in TorchInductor.

Note:
    This is an abstract base class. Most methods raise NotImplementedError
    and must be overridden by concrete subclasses.
zClassVar[OrderedSet[Any]]_current_originsF)initOrderedSet[Any]originsOptional[list[str]]	tracebackOptional[torch.fx.Node]origin_nodec              #     #    [         R                  nX-  [         l         S v   U[         l        g ! U[         l        f = f7fr   )r   rj  )rm  olds     r   current_originsIRNode.current_origins/  s4      %%"%-	*&)F#cF#s   A1 A>Ac                L    [        U [        [        [        [        [
        45      $ r   )r   ComputedBufferInputsKernelInputBufferrO  TemplateBuffer)r   s    r   is_realized_nodeIRNode.is_realized_node9  s&    	
 		
r   c                0    [         R                  XU5        g r   )r   __setattr__)selfattrr  s      r   _post_init_setattrIRNode._post_init_setattrF  s     	4u-r   c                    [        U R                  5      nU R                  SU5        U R                  S[        R                  (       a  [
        R                  " 5       OS 5        U R                  SS 5        g )Nrm  ro  rq  )r;   rj  r  rA   debug_ir_tracebackro  format_stack)r  rm  s     r   __post_init__IRNode.__post_init__L  s[    T223	73V5N5N//1TX	
 	t4r   c                B    [        S U R                  5        5       5      $ )Nc              3  8   #    U  H  oR                   v   M     g 7fr   r   r   deps     r   r   (IRNode.get_read_names.<locals>.<genexpr>U       ?.>s((.>   r;   	get_readsr  s    r   get_read_namesIRNode.get_read_namesT      ?dnn.>???r   c                    U R                   $ r   )ro  r  s    r   get_tracebackIRNode.get_tracebackW  s    ~~r   c                    U R                   $ r   rq  r  s    r   get_origin_nodeIRNode.get_origin_nodeZ      r   c                    g r   r   r  s    r   get_defining_opIRNode.get_defining_op]      r   c                   [        5       nU R                  n[        U [        5      (       a-  U R	                  5       nU R
                  (       a  [        U/5      nU H  n[        US5      (       a.  UR                  (       a  UR                  UR                  5        MB  [        R                  R                  R                  R                  S0 5      R                  UR                  / 5      n[        U[        5      (       d  M  U HQ  n[        R                  R                  R                   R                  US 5      nU(       d  M@  UR                  U5        MS     GM     U$ )Nstack_trace	postToPre)r;   rm  r   ExternKernelr  rq  hasattrr  addr  	_inductordebug _inductor_post_to_pre_grad_nodesgetr   r   #_inductor_pre_grad_node_stack_trace)r  stack_tracesrm  rq  r   pre_grad_nodes	node_namer  s           r   get_stack_tracesIRNode.get_stack_traces`  s    )3,,dL))..0K$k]3Dt]++0@0@  !1!12 OO))JJNN#Rc$))R( 
 ".$77!/I--QQUU%t  
 #{$((5 "0 ( r   c                6   S[        U SS5       3nU(       a  [        U5      S:  a  US S  S3nU R                  5       (       d  U/$ / nU R                  5        H8  nUR                  S5        X4R	                  S5      -  nUR                  S	5        M:     U/U-   $ )
Nzorigins=rm   @   =   z...zstack_traces = {
})r   r   r  appendsplit)r  shortenrm  stack_trace_strr  s        r   common_reprIRNode.common_repr  s    WT9b9:;s7|b( "c*G$$&&9002K""#560066O""3' 3 y?**r   c                .   [        U5      [        U R                  U5      5      -   n[        [        [        U5      5      nU(       a5  [	        SR                  U5      5      n[        U 5      R                   SU S3$ [        U 5      R                   SU S3$ )Nz,
z(
z
)(r  )r   r  mapr   indentjoinr   r   )r  linesr  	multiline	new_liness        r   
str_helperIRNode.str_helper  s     Ud4#3#3G#<==Se_%uzz%01I4j))*#i[<<4j))*!E7!44r   c                    U R                   $ r   r  r  s    r   r  IRNode.get_dtype      zzr   c                D     U R                  5       $ ! [         a     g f = fr   )r  NotImplementedErrorr  s    r   maybe_get_dtypeIRNode.maybe_get_dtype  s&    	>>##" 		    
c                2    [        S[        U 5       S35      e)Nz#get_layout() is not implemented by !r  r   r  s    r   r  IRNode.get_layout  s    !$GT
|ST"UVVr   c                D     U R                  5       $ ! [         a     g f = fr   )r  r  r  s    r   maybe_get_layoutIRNode.maybe_get_layout  &    	??$$" 		r  c                "    U R                  5       $ r   )r  r  s    r   get_output_specIRNode.get_output_spec  s      r   c                D     U R                  5       $ ! [         a     g f = fr   )r  r  r  s    r   maybe_get_output_specIRNode.maybe_get_output_spec  s(    	''))" 		r  c                >    [        U R                  5       [        5      $ )z4True for single tensor output (excludes MultiOutput))r   r  Layoutr  s    r   has_tensor_outputIRNode.has_tensor_output  s    $446??r   c                2    [        S[        U 5       S35      e)Nz!get_size() is not implemented by r  r  r  s    r   r
  IRNode.get_size  s    !$Ed4j\QR"STTr   c                D     U R                  5       $ ! [         a     g f = fr   )r
  r  r  s    r   maybe_get_sizeIRNode.maybe_get_size  %    	==?"" 		r  c                "    U R                  5       $ r   r
  r  s    r   r=  IRNode.shape  s    }}r   c                4    [        U R                  5       5      $ r   )rh   r
  r  s    r   	get_numelIRNode.get_numel  s    T]]_--r   c                    [         R                  R                  R                  [        R
                  " U R                  5       S5      5      $ Nr   rm   r  r  statically_known_truer   Eqr  r  s    r   is_zero_elementsIRNode.is_zero_elements  0    ww55ehht~~?OQR6STTr   c                0    [        S[        U 5       35      e)a  
If the IRNode refers to data which has not been materialized (e.g.,
it is a Pointwise/Reduction that could potentially have more
compute fused into it), realize the IRNode into physical memory,
ending the possibility of fusing into it, but allowing, e.g., multiple
users to access the data without having to recompute.

Check StorageBox.realize for a particularly notable implementation.

TODO(ezyang): I think, in principle, every IRNode should have an
implementation of this, and most of the time no-op is OK, but you
really do have to audit each IRNode for this, so for now, raise
an error if it's not implemented.  Note that some code in graph.py
will catch this thrown error and suppress it with a warning.
zrealize NYI on r  r  s    r   realizeIRNode.realize  s      "ODJ<"@AAr   Nc                0    [        S[        U 5       35      e)Nzcodegen_reference NYI on r  r  writers     r   codegen_referenceIRNode.codegen_reference  s    !$=d4j\"JKKr   c                    g r   r   r  s    r   r  IRNode.get_device  r  r   c                0    U R                  5       nUc   eU$ r   )r  r  r  s     r   get_device_or_errorIRNode.get_device_or_error  s    "!!!r   c                    gNFr   r  s    r   has_exceeded_max_readsIRNode.has_exceeded_max_reads      r   c                >    [        [        U 5      R                  5      er   r  r   r   r  s    r   make_loaderIRNode.make_loader      !$t*"5"566r   c                >    [        [        U 5      R                  5      er   r  r  s    r   make_indexerIRNode.make_indexer  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   r/  IRNode.get_stride  r  r   c                D     U R                  5       $ ! [         a     g f = fr   )r/  r  r  s    r   r3  IRNode.maybe_get_stride  r  r  c                >    [        [        U 5      R                  5      er   r  r  s    r   get_nameIRNode.get_name  r  r   c                D     U R                  5       $ ! [         a     g f = fr   )r  r  r  s    r   maybe_get_nameIRNode.maybe_get_name  r  r  c                z     U R                  5       [        R                  R                  ;   $ ! [         a     gf = fr  )r  rm   r  graph_inputsr  r  s    r   is_input_bufferIRNode.is_input_buffer  s4    	==?agg&:&:::" 		s   *- 
::c                    gr  r   r  	thresholds     r   has_large_inner_fnIRNode.has_large_inner_fn	  r	  r   c                    g r   r   r  userss     r   
mark_reuseIRNode.mark_reuse      r   c                    g r   r   r  s    r   realize_hintIRNode.realize_hint  r*  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   unwrap_viewIRNode.unwrap_view  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   freeze_layoutIRNode.freeze_layout  r  r   c                >    [        [        U 5      R                  5      er   r  r  r   allow_paddings      r   freeze_layout_with_stride_order&IRNode.freeze_layout_with_stride_order       "$t*"5"566r   c                >    [        [        U 5      R                  5      er   r  r  r   s     r   freeze_layout_with_fill_order$IRNode.freeze_layout_with_fill_order  r  r   c                >    [        [        U 5      R                  5      er   r  r  r  s     r   freeze_layout_with_same_order$IRNode.freeze_layout_with_same_order   r  r   c                >    [        [        U 5      R                  5      er   r  r  exact_stridesr6  s      r    freeze_layout_with_exact_strides'IRNode.freeze_layout_with_exact_strides#  r9  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_read_writesIRNode.get_read_writes(  r  r   c                6    U R                  5       R                  $ r   rH  readsr  s    r   r  IRNode.get_reads+      ##%+++r   c                4    [        U R                  5       5      $ r   )r   r  r  s    r   	num_readsIRNode.num_reads.  s    4>>#$$r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_storage_numelIRNode.get_storage_numel1  r  r   c                >    [        [        U 5      R                  5      er   r  r  rd  s     r   get_free_symbol_usesIRNode.get_free_symbol_uses4  r9  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_reduction_typeIRNode.get_reduction_type9  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_reduction_sizeIRNode.get_reduction_size<  r  r   c                    gr  r   r  s    r   	is_externIRNode.is_extern?  r	  r   c                    gr  r   r  s    r   is_no_opIRNode.is_no_opB  r	  r   c                >    [        [        U 5      R                  5      er   r  r  s     r   constant_to_deviceIRNode.constant_to_deviceE  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_mutation_namesIRNode.get_mutation_namesH  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_operation_nameIRNode.get_operation_nameK  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_inputs_that_alias_output#IRNode.get_inputs_that_alias_outputN  r  r   c                    g r   r   r  s    r   r  IRNode.dtypeS  s    (+r   r   )rm  zOrderedSet[Node]r   zGenerator[None, None, None]r   r   r   r   )r  r   r  r   r   r   r   r   r   OrderedSet[str])r   rn  r   rp  r   zOptional[Operation]T)r  r   r   Sequence[str])TT)r  zSequence[object]r  r   r  r   r   r   r   torch.dtype)r   zOptional[torch.dtype]r   r  )r   zOptional[Layout]r   r  )r   zOptional[OutputSpec]r   r   r   Sequence[Expr])r   Optional[Sequence[_IntLike]])r   z.Union[_IntLike, sympy.Rel, Sequence[_IntLike]]r   r!   r   Optional[str]r   r  zOptional[IndentedBuffer]r   r   r   Optional[torch.device]r   torch.devicer   $Callable[[Sequence[Expr]], OpsValue]r    Callable[[Sequence[Expr]], Expr]r   Sequence[_IntLike]r   r   r"  Optional[int]r   r   r'  r   r   r   r   r   Fr   Sequence[int]r6  r   r   r   r   r  r   r   r  r  r   r   rD  r  r6  r   r   r   r   dependencies.ReadWritesr   zOrderedSet[Dep]r   r   r   ry   rd  r   r   r   r  r  r   r   r   rz  )Mr   r   r   r   __doc__r;   rj  r   dataclassesfieldrm  ro  rq  staticmethod
contextlibcontextmanagerrt  r{  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  r  propertyr=  r  r  r  r  r  r  r  r  r  r/  r3  r  r  r  r#  r(  r,  r/  r2  r7  r<  r@  rE  rH  r  rP  rS  rW  rZ  r]  r`  rc  rf  ri  rl  ro  r   r  r   r   r   r   r   r     sH    3=,/>  +00e<G_<%0%6%6E%BI"B+6+<+<%+HK(H*  * 

 

.5@ >+  PT	5%	504	5HL	5		5W!@U  .UB$L
777777 ;@7"7377	7
77 HM7/7@D7	7
7,%7 %*7!7	!7
777777 	+ 
+ r   c                      \ rS rSrSS jrSS jrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSS jrSS jrSS jr S   S S jjrS!S jrSrg)"	OperationiW  c                    S U l         g r   operation_namer  s    r   r  Operation.__post_init__Y  s
    -1r   c                    [         er   r  r  s    r   r  Operation.get_device\      !!r   c                @    [        U S5      (       d   eU R                  $ Nrq  )r  rq  r  s    r   r  Operation.get_origin_node_  s!    t]++++r   c                @    [        U S5      (       d   eU R                  $ )Nrm  )r  rm  r  s    r   get_originsOperation.get_originsc  s    tY''''||r   c                8    U R                   c   eU R                   $ r   r  r  s    r   rl  Operation.get_operation_nameg  s     ""..."""r   c                    gr  r   r  s    r   r`  Operation.is_externk  r	  r   c                    gr  r   r  s    r   rc  Operation.is_no_opn  r	  r   c                    [         er   r  r  s    r   rH  Operation.get_read_writesq  r  r   c                &    XR                  5       ;   $ r   )r  )r  r   s     r   
is_user_ofOperation.is_user_oft  s    **,,,r   c                B    [        S U R                  5        5       5      $ )Nc              3  8   #    U  H  oR                   v   M     g 7fr   r  r  s     r   r   +Operation.get_read_names.<locals>.<genexpr>x  r  r  r  r  s    r   r  Operation.get_read_namesw  r  r   c                6    U R                  5       R                  $ r   rK  r  s    r   r  Operation.get_readsz  rN  r   c                    [         er   r  r  s    r   get_outputsOperation.get_outputs}  r  r   c                    [        5       $ r   r:   r  s    r   get_unbacked_symbol_defs"Operation.get_unbacked_symbol_defs  
    |r   c                    [        5       $ )a  
When unbacked_only=True:
Returns the unbacked symbols which are required to be in scope in
order to successfully perform codegen for this buffer.  For example,
a buffer that corresponds to an extern kernel call that takes i0 as
an argument would return {i0} here.  This is used to generate necessary
dependencies that ensure we actually bind i0 in codegen before you
try to use it.

Note that this is NOT transitive; in particular, if this buffer takes
in as input another buffer with dynamic shape (e.g., (i0,)), we will
not report it here, because you will already have a dependency
on that buffer, which will eventually have a dependency on i0 if
necessary.

When unbacked_only=False:
Similar to `unbacked_only=True` but including all free symbols
instead of only free unbacked symbols.
r:   rV  s     r   rW  Operation.get_free_symbol_uses  s    , |r   c                    g)z
Gets extra global memory size needed by this buffer.
Some algorithms (e.g. group gemm) may require extra global memory in the generated code.
r   r   r  s    r   get_workspace_sizeOperation.get_workspace_size  s    
 r   r  Nrt  r  rw  )r   rl  r  r  r  )r   r   r   r   ru  r  r   list[Buffer]r   r   r  r  r  )r   r   r   r   r  r  r  r  rl  r`  rc  rH  r  r  r  r  r  rW  r  r   r   r   r   r  r  W  sc    2" #"-@," %*!	!0r   r  c                    ^  \ rS rSr% S\S'   S\S'   S\S'   S\S	'   \" S 5       S    S!S
 jj5       rS"S jrS#U 4S jjrS$S jr	\	r
S%S jrS&S jrS'S jrS'S jr\      S(S j5       r\\R&                  4S)S jj5       r\S*S j5       rS+S jr\S$S j5       rS,S-S jjrS S.S jjrS/S jrS0S jrS1S jrS'S jrS2S jrS3S jr Sr!U =r"$ )4Loopsi  r  r  r|  r  Callable[..., Any]inner_fnr  rangesc                   ^ [        5       R                  " / U4S jU R                   5       QU R                  T5      P76 $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r(   r   erd  s     r   r   -Loops.get_free_symbol_uses.<locals>.<genexpr>  s     F+Qq-00+   )r;   unionr  inner_fn_free_symbolsrV  s    `r   rW  Loops.get_free_symbol_uses  s>     |!! 
F$++F
&&}5
 	
r   c                   U R                  SU R                  R                   S3[        U R                  5      U R                  5       /U Vs/ s H  o" S[        X5       3PM     sn-   SU R                  < 3/-   5      $ s  snf )N'=origin_node=)r  r  r   r   r  inner_fn_strr   rq  )r  namesr   s      r   _to_strLoops._to_str  s    DKK$$%Q'DJJ!!#
 <AA54q,-.5AB d..1234
 	
 Bs   B
c                "   > [         TU ]  5         g r   )superr  r  	__class__s    r   r  Loops.__post_init__  s    r   c                $    U R                  S5      $ )Nr  r  r  s    r   __str__Loops.__str__  s    ||K((r   c                    U R                   $ r   r  r  s    r   r  Loops.get_device      {{r   c                    U R                   $ r   r  r  s    r   r  Loops.get_origin_node  r  r   c                    U R                   $ r   r  r  s    r   r
  Loops.get_size  r  r   c                    U R                   $ r   r  r  s    r   get_pointwise_sizeLoops.get_pointwise_size  r  r   c                    UR                  SS 5      nUR                  SS 5      nU " U0 UD6nUR                  SU5        UR                  SU=(       d    UR                  5        [        R	                  U5      $ )Nrq  ro  )popr  ro  r   create)clsr   r   rq  tbrs         r   r  Loops.create  so     jj5ZZT*   	
]K8	["*;<""r   c                    [        U 5       VVs/ s H0  u  p#US:X  a  [        R                  R                  O
[	        X5      PM2     snn$ s  snnf Nr@   )r   r   SZerorg   )r  r}   nr   s       r   _indexLoops._index  sI     "&)
) FEGGLL(Fv(QQ)
 	
 
s   7A
c                |   [        [        R                  " 5       5      n[        R                  " U5         [        R
                  " [        SS5         U R                  " U R                  5       6   UR                  5       sS S S 5        sS S S 5        $ ! , (       d  f       O= f S S S 5        g ! , (       d  f       g = fNallow_indexingT)
rN   rm   MockHandlerset_ops_handlerr    r   r  r  inner_fn_argsgetvalue)r  	opcounters     r   inner_fn_opcountLoops.inner_fn_opcount  sy     1	i(LL)94@MM4--/0%%' A@ )(@@@ )((s#   B--B?	B-
B 	B--
B;c                :    U R                  U R                  5      4$ r   )r  r  r  s    r   r  Loops.inner_fn_args  s    DKK(**r   c                t    [         R                  R                  " U R                  /U R	                  5       Q76 $ r   )rm   KernelFormatterHandlerir_to_stringr  r  r  s    r   r  Loops.inner_fn_str  s3    ''44MM
 ..0
 	
r   c                z    Uc  Sn[        U[        R                  5      nU R                  5       R                  U:  $ r  )maxrA   realize_opcount_thresholdr  num_opsr!  s     r   r#  Loops.has_large_inner_fn  s9    I	6#C#CD	$$&..::r   c                `    U R                  U R                  5      n[        U R                  X!S9$ Nrc  )r  r  rI   r  )r  rd  r   s      r   r  Loops.inner_fn_free_symbols   s%    DKK(#DMM5VVr   c                   [         R                  " [        SS5         U R                  5       (       aJ  [	        U R                  5       U R                  5       U R                  5       5      R                  sS S S 5        $ [	        U R                  5       U R                  5       5      R                  sS S S 5        $ ! , (       d  f       g = fr  )	r    r   r  rZ  rK   r  r
  r]  rL  r  s    r   r  Loops.get_reads  s    \\.*:DA&&((*$$&MMO++- % BA +$$&MMO % BAAs   AB8=1B88
Cc                H    [        U R                  5       R                  5      $ r   )r;   r  read_buffersr  s    r   r  Loops.get_read_names  s    $//1>>??r   c                H    [        U R                  5       R                  5      $ r   )r   r  r/  r  s    r   rP  Loops.num_reads  s    4((*7788r   c                2    [        S[        U 5       S35      e)Nz+get_reduction_size() is not implemented by r  r  r  s    r   r]  Loops.get_reduction_size      !9$t*QG
 	
r   c                2    [        S[        U 5       S35      e)Nz+get_reduction_type() is not implemented by r  r  r  s    r   rZ  Loops.get_reduction_type  r5  r   c                2    [        S[        U 5       S35      e)Nz+constant_to_device() is not implemented by r  r  r  s     r   rf  Loops.constant_to_device"  r5  r   r   r  r  )r  rz  r   r   rt  r  r  rw  r  )r   r   r   r   r   'Union[TensorBox, ShapeAsConstantBuffer])r  r  r}   r?   r   r  )r   rO   r   zSequence[Sequence[_IntLike]]r   r  rd  r   r   OrderedSet[Symbol]r  ru  r  r  r  )#r   r   r   r   r   rX   rW  r  r  r  __repr__r  r  r
  r  classmethodr  r  r?   INDEXr  rW   r  r  r  r#  r  r  r  rP  r]  rZ  rf  r   __classcell__r  s   @r   r  r    s     G$$)
!
	!
 %
	
 ) H  ###&#	0# # :>** 
 
 ( (+ 
 

;W@9




 
r   r  c                   UR                   (       a   [        R                  " [        S5      U5      $ [        R                  " SU5      $ )Nnanr   )is_floating_pointrk   constantfloat)r   r  s     r   nop_loader_fnrH  (  s1    ||E%L%00||Au%%r   c                  V    \ rS rSrS	S jrS
S jrSS jr        SS jrSS jrSr	g)	Pointwisei/  c                t    U R                  5       (       a  [        [        U R                  S9$ U R                  $ Nr  )r  r   rH  r  r  r  s    r   r  Pointwise.make_loader1  s,      ""=

;;}}r   c                    / $ r   r   r  s    r   r]  Pointwise.get_reduction_size8  s    	r   c                    g r   r   r  s    r   rZ  Pointwise.get_reduction_type;  r  r   c                |    U R                  5       n[        R                  " U=(       d    SU" U5      U" U5      5      $ Nunnamed)r  rk   storer  output_nameindexervarsloaders        r   store_outputPointwise.store_output>  s2     !!#yy1	74=&,OOr   c                    U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R
                  UU R                  S9$ FMove this to a given device. Requires that all reads are to constants.override_devicer  r  r  r  )r  r    r   ConstantBufferrJ  r  r  r  r  rZ  s      r   rf  Pointwise.constant_to_deviceG  sI    !!#n.?HP**;;	
 	
r   r   Nr  )r   zSequence[sympy.Expr]r  )rW  r  rX  !Callable[[Sequence[Expr]], Never]rY  r  r   r   r  )
r   r   r   r   r  r]  rZ  r[  rf  r   r   r   r   rJ  rJ  /  sF    P"P 3P 	P
 
P	
r   rJ  c                  R    \ rS rSr% S\S'   SrS\S'   S
S jr        SS jrS	rg)ScatteriS  r  output_indexerNrQ   scatter_modec           	         U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R
                  UU R                  U R                  U R                  S9$ )r_  r`  )r  r  r  r  rh  ri  )	r  r    r   rb  rg  r  r  rh  ri  rc  s      r   rf  Scatter.constant_to_deviceX  s[    !!#n.?HP**;;..**
 	
r   c                    U R                  5       nUc  Sn[        R                  " UU" U R                  U5      5      U" U5      U R                  S9$ )NrT  )mode)r  rk   rU  rh  ri  rV  s        r   r[  Scatter.store_outpute  sT     !!##KyyD''-.4L""	
 	
r   r   r  )rW  r  rX  re  rY  r  r   r   )	r   r   r   r   r   ri  rf  r[  r   r   r   r   rg  rg  S  sB    44"L)"

"
 3
 	

 

r   rg  
logical_ormaximumminimummulr  bitwise_xor)anyr%  minprodsumxor_sumz"dict[str, Callable[..., OpsValue]]REDUCTION_COMBINE_FNc                   ^ ^^ T [         ;   a	  [         T    $ T S;   a        SUUU 4S jjnU$ T S:X  a        SS jnU$ [        ST  35      e)Nargmaxargminc                  > U u  p#Uu  pETS:X  a  [         R                  " X$5      nO[         R                  " X$5      n[         R                  " X$5      n[	        T5      (       a  [         R
                  " X"5      n[         R
                  " XD5      n	[         R                  " U[         R                  " X5      5      n[         R                  " U[         R                  " X5      5      nT(       a  [         R                  " X55      O[         R                  " X55      n
[         R                  " U[         R                  " Xz5      5      n[         R                  " XbU5      [         R                  " XcU5      4$ )Nr}  )	rk   ltgteqr,   nero  logical_andwhere)aba_valuea_indexb_valueb_indexmaskequala_isnanb_isnantiearg_break_ties_leftr  reduction_types              r   argmax_combine_fn3get_reduction_combine_fn.<locals>.argmax_combine_fn  s     !G G)vvg/vvg/FF7,Ee$$&&2&&2~~dCFF7,DEucoog.OP ' w(VVG- 
 >>$(CDD		$1		$1 r   welford_combinec                \    U u  p#nUu  pVnXR-
  nXG-   n	Xy-  n
X(U
-  -   X6-   X-  U-  U
-  -   U	4$ r   r   )r  r  a_meana_m2a_weightb_meanb_m2b_weightdelta
new_weight	w2_over_ws              r   welford_combine_fn4get_reduction_combine_fn.<locals>.welford_combine_fn  s]     &'"F(%&"F(OE!,J -I**emh6BB r   zunknown reduction_type=)r  tuple[object, object]r  r  r   tuple[OpsValue, OpsValue])r  #tuple[OpsValue, OpsValue, OpsValue]r  r  r   r  )ry  r  )r  r  r  r  r  s   ```  r   get_reduction_combine_fnr    s     --#N33	/	/	$	)>	&	 	: ! 	,	,	2	2	 1	  "! "$;N;K"LMMr   c                  J  ^  \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S!S
 jr\r\" S 5      S"S#U 4S jjj5       rS$S jr	S%S jr
          S&S jrS'S jrS(S jrS"S#S jjrS)S jr\ S*                   S+S jj5       r\          S,S j5       r\\R*                  S4                   S-S jj5       r\      S.S j5       r\      S.S j5       r\        S/S j5       r\      S0S j5       r\ S*               S1S jj5       r\            S2S j5       r\                        S3S j5       r\ S*                     S4S jj5       r\                      S5S j5       rS r U =r!$ )6	Reductioni  r  reduction_rangesrP   r  r|  	src_dtyperT   reduction_hintc                $    U R                  S5      $ )N)r  r  r  r  r  s    r   r  Reduction.__str__  s    ||LMMr   c                |   >^ [         TU ]  T5      [        5       R                  " U4S jU R                   5       6 -  $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r(   r  s     r   r   1Reduction.get_free_symbol_uses.<locals>.<genexpr>  s     P:OQq-00:Or  )r  rW  r;   r  r  r  rd  r  s    `r   rW  Reduction.get_free_symbol_uses  s7    w+M:Z\=O=OP$:O:OP>
 
 	
r   c                    U R                   $ r   )r  r  s    r   r]  Reduction.get_reduction_size  s    $$$r   c                    U R                   $ r   )r  r  s    r   rZ  Reduction.get_reduction_type      """r   c           	         [         R                  " U R                  U R                  U R                  U R                  X45      5      n[         R                  " U=(       d    SU" U5      U5        g rS  )rk   	reductionr  r  r  r  store_reduction)r  rW  rX  rY  reduction_varsr  s         r   r  Reduction.store_reduction  sR     JJNNMM$/	
 	K49gdmUKr   c                X    [        U R                  5      [        U R                  5      -   $ r   )r   r  r  r  s    r   index_lengthReduction.index_length  s!    4;;#d&;&;"<<<r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nX4$ r   )r  r  r  r?   R0_INDEX)r  r   rindexs      r   r  Reduction.inner_fn_args  s6    DKK(T22DMMBr   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      n[        U R                  X#US9$ r*  )r  r  r  r?   r  rI   r  )r  rd  r   r  s       r   r  Reduction.inner_fn_free_symbols  sF    DKK(T22DMMB#MM5
 	
r   c                   U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R
                  UU R                  U R                  U R                  U R                  [        R                  S9$ )r_  r`  r  r  r  r  r  r  r  r  )r  r    r   rb  r  r  r  r  r  r  rT   DEFAULTrc  s      r   rf  Reduction.constant_to_device  sk    !!#n.?HP**;;!22..nn(00	
 		
r   Nc	                >   [         R                  R                  R                  U5      n	[         R                  R                  R                  [	        U5      5      n
US:H  =(       dV    [         R                  R                  U [        R                  5      (       + =(       a    US;  =(       a    [        R                  n[        U	5      (       a  [        U
5      (       d  [        R                  S4$ [        R                  " U 5      nUR                  nSnU(       a]  [         R"                  " [         R$                  R&                  U SS9n[         R"                  " [         R$                  R&                  U SS9nO      SS jnUnU
S:X  a  U" X5      nUS:X  a  [        R(                  U4$ Ub  [+        U[,        5      (       a  [.        R0                  " [2        S	S5         [5        U5      u  nnS S S 5        Wbj  Wbg  [         R                  R                  R                  [	        UU-   5      5      nU	U:X  a,  [6        R9                  S
UUUUU5        [        R(                  S4$ [        R(                  U4$ X::  d  XS-  S-  :  a  [        R                  S4$ [;        U UUUUUS:w  a  UOSU[        R                  S9nSS jnU" U5      u  nnU(       a  U" U5      u  nn[=        U5      S:X  a  [        R                  S4$ [>        R@                  " URC                  5       URE                  5       5      u  u  nnnSnSnU H  n[         R                  R                  RG                  UU5      n[         R                  R                  RI                  UU[K        URM                  5       5      5      n [O        S U  5       5      n!U!(       a  US-  nM  US-  nM     UU:  a  [        R(                  U" X5      4$ [        RP                  U" X5      4$ ! , (       d  f       GN&= f)Nscanr{  r@       T)inner_reductionFc                    gr  r   )reduction_numel_hint
numel_hints     r   inner_reduction_splits4Reduction.num_splits.<locals>.inner_reduction_splits-  s     r   r  zUse previous IRNode's range and reduction_ranges instead of split. current ranges: %s, current reduction ranges: %s, current split: %d, new ranges: %s, new reduction ranges: %sr2  r   rw  r  c           	     `  ^	 U R                  5       nUc   e[        S [        UU R                  5       U R	                  5       S9U S9nUR                  5       nUR                  c   eUR                   V s/ s H=  n [        U [        5      (       d  M  [        U [        R                  5      (       a  M;  U PM?     nn / nSn[        UR                  S S9 H  m	[        U	4S jU 5       5      (       d  M  UR                  T	R                  5        T	R                   ["        R$                  R&                  ;   d  Md  ["        R$                  R&                  T	R                      n[)        UR*                  SS 5      nUR-                  5         [)        UR*                  SS 5      U:w  d  M  SnM     XV4$ s  sn f )	Nr  r  r  r   rJ  rI  Fc                    U R                   $ r   r  r   s    r   <lambda>@Reduction.num_splits.<locals>.get_read_indices.<locals>.<lambda>}  s    affr   keyc              3  T   >#    U  H  oTR                   R                  ;   v   M     g 7fr   )r   r2   )r   r  mds     r   r   AReduction.num_splits.<locals>.get_read_indices.<locals>.<genexpr>~  s     F:aBHH111:   %(r  T)r  rw  r  r  r
  rH  
range_varsr   r!   r   NumbersortedrL  r   r  r   r   rm   r  name_to_bufferr   rJ  decide_layout)
r  r  cbread_writesr  indiceschangedbuforiginal_strider  s
            @r   get_read_indices.Reduction.num_splits.<locals>.get_read_indicese  sn   \\^F%%%%!++-
 B ,,.K ))555 %///Aa& /9!U\\/J /  
 GG[..4DEF:FFFNN288,ww!''"8"88gg44RWW=*1#**h*M))+"3::x>/Q&*G F ##!s   4F+F+2F+r   c              3  *   #    U  H	  oS :  v   M     g7fr@   Nr   r   s     r   r   'Reduction.num_splits.<locals>.<genexpr>  s     /w!Aw   )r  r   r  r   r   r   )r  r  r   ztuple[Sequence[Expr], bool]))rm   r  r  r:  rh   has_featurerC   REDUCE_TO_SINGLE_ELEMENTrA   split_reductionsr   rT   r  rS   r  multi_processor_count	functoolsr   choicesreduction_split_factorINNERr   r   r    r   r  rJ   logr  r  r   rB   index_vars_squeezer
  r]  simplify_with_rangesstride_hintsr   keysr   OUTER)"r  	dst_dtyper  r  r  r  r  reduction_numel
input_noder  r  should_splitpropsnum_smmin_elements_per_threadr  outer_reduction_splitsr  
new_rangesnew_reduction_rangesextracted_numel_hintr  r  r  r  r   r  ranges1	num_outer	num_innerr   jrQ  outers"                                     r   
num_splitsReduction.num_splits  s     !ww//==oNWW%%33M&4IJ
%/ 
##FN,S,STT (( '' 	 /00Z
5K5K ((!++ ''/,,"$@I@Q@Q		00&$A" AJ@Q@Q		00&%A"
&)  &<" ?*+?LEz$**E11%*Z*K*K\\.2BDI <JG", J
 ).B.N+,77+;+;+I+I%j3G&GH,( ,/CC		G #,!&0	  -22B66 &&-- ;aZ"_, ((!++--;v-E>5(00	
!	$F ,A.)!,JGQw<1 ((!++'3'F'FJJL!..0(
$NW 		A  55aAAgg&&33>4#7G /w//EQ	Q	  y  &&(>$)   !&&(>$)  Q JIs   P
Pc                  ^ ^^^^^ [         R                  R                  R                  T5      m[	        X#5      mSUUU4S jjmUS;   a4  [        T[        R                  T5      5      m      SUU 4S jjmU4S j$ T mT$ )z1Convert inner_fn from a reduction to an pointwisec                   >^  [         R                  " TU U4S j[        R                  " T Vs/ s H  n[	        U5      PM     sn6  5       5      $ s  snf )Nc              3  6   >#    U  H  nT" TU5      v   M     g 7fr   r   )r   r  r   value_fns     r   r   =Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<genexpr>  s&      # UF++#s   )r  reduce	itertoolsproductr   )r   r   
combine_fnr  r  s   ` r   r   *Reduction._unroll_reduction_fn.<locals>.fn  sN    ##"+"3"3,<=,<q%(,<=# 
 >s   Ar}  r|  c                   > U Vs/ s H  n[         R                  " U5      PM     nnT" X5      [        R                  " T" U5      [        R
                  5      4$ s  snf r   )r   expandrk   
index_exprr  int64)r   r  r   flatten_indexr  s      r   r  0Reduction._unroll_reduction_fn.<locals>.value_fn  sO     4::6a%,,q/6:U+NN=#8%++F  ;s    Ac                   > T" U 5      S   $ r  r   )r   r   s    r   r  0Reduction._unroll_reduction_fn.<locals>.<lambda>  s    E1r   )r   r  r   r   )r   r  r  r  r   r  )rm   r  r  guard_int_seqr  _fixed_indexerr  r  )r  r  r  r  r  r  r   r  s   ``  @@@@r   _unroll_reduction_fnReduction._unroll_reduction_fn  s     77++99:JK-nH
		 		 11* 112BCM
)3E*  .-HIr   c
                  ^^^^^^ [         R                  R                  R                  [	        T5      5      mTS:X  an  SU4S jjn
U
" S5      U
" S5      U
" S5      U
" S5      S.mTTR                  5       ;   d
   T S35       eSUUU4S jjn[        R                  UUU[        U5      S9$ TS:X  a-  TS;   a	  SU4S	 jjnO	SUU4S
 jjn[        R                  UTXS9$ [        T[        5      (       a  [         R                  R                  R                  T5      [        R                  :  aQ  [	        U5      S:w  d  [        UR                  5      (       a(  [        R                  UTU R!                  TTTU5      US9$ U R#                  UTUTUTTTU	5	      u  pSU4S jjnU" U5      nU[$        R&                  :X  a  UnUS:X  a\  U	c   e[(        R*                  " [,        SS5         [/        U	5      u  nnS S S 5        Wc   eWc   eU R1                  UTUTUTUUTU5
      $ US:  a  U R3                  UTUTUTTUUU	5
      $ [4        R                  [7        UTTUTTUUS95      $ ! , (       d  f       Nu= f)Nr   c                $  > T[         R                  :X  a  [        U 5      $ TR                  (       a0  [        U [        5      (       d   [        U 5      5       e[        U 5      $ [        U [        5      (       d   [        U 5      5       e[        U 5      $ r   )	r  r   rE  r   r   r   rG  r   r   )valr  s    r   py_cnst!Reduction.create.<locals>.py_cnst  sm    

*9$00%c=99D49D9 :%%c;77BcB7s8Or   r@   )rw  rx  rv  rt  z* not supported for zero-dimension tensors!c                8   > [         R                  " TT   T5      $ r   rk   rF  )r   r  r  rtypes_to_initss    r   const_fn"Reduction.create.<locals>.const_fn  s    ||ON$CYOOr   ra  r  c                2   > [         R                  " ST5      $ r  r+  )r   r  s    r   r   Reduction.create.<locals>.fn  s    <<955r   c                r   > T Vs/ s H  n[         R                  R                  PM     nnT" X5      $ s  snf r   r   r  r  )r   r   reduction_indexr  r  s      r   r   r0    s2    =M&N=Muww||=MO&N#E;; 'O   $4c                l   > [        T5      (       a  U $ U S:  a  [        U [        R                  5      $ U $ r  )r   r%  rA   min_num_split)r  r  s    r   _maybe_increase_split/Reduction.create.<locals>._maybe_increase_split6  s2    /**qy5&"6"677r   r2  r  Tr  )r'  r   r   zUnion[bool, float, int])r   r   r   rl   )r  r   r   r   )rm   r  r  simplifyrh   r  rJ  r  r   r   r"   r.  rA   unroll_reductions_thresholdrd   r   r#  r  rT   r  r    r   r  rJ   !create_multilayer_existing_rangescreate_multilayerr   r  )r	  r  r  r  r  r  r  r  r  r  r(  r-  r   hintr  r7  r  r  r  r,  s     ` ` ``          @@r   r  Reduction.create  s    ''**33MBR4STa$ qz"1:
qz	O "_%9%9%;; !""LM;P P ##!F|	 $   a!556 6
< < ##Y $  
 00  33OD001v&!+vfkk/B/B ##11.	  $   nn

	 &e,
 ]222!NB;)))n.>E3V40
0 F )))'33388 $  QY((   !!1-#-	
 	
C FEs   )I''
I5c           	        U S;   aL  [        U5      (       a  [        S5      $ [        U5      (       a  g[        R                  " U5      R
                  $ U S;   aL  [        U5      (       a  [        S5      $ [        U5      (       a  g[        R                  " U5      R                  $ [        U5      (       a  SOSn[        U5      (       a  SOSnUUUUX"U4X"U4[        S5      U4S	.U    $ )
N)r%  r|  z-infF)ru  r}  infTr   r@   )rw  rv  rx  rt  welford_reducer  online_softmax_reduce)r,   rG  r+   r  iinforu  r%  )r  r  zeroones       r   default_accumulatorReduction.default_accumulatorv  s     ..e$$V}$!%(({{5)---..e$$U|#!%(({{5)---(//uQ&u--d1#40 $D1&+FmT%:
  	r   c                :    U S:X  a  g[         R                  X5      $ )NrA  r   )r  rF  r  r  s     r   default_valueReduction.default_value  s!     --,,^CCr   c                    U S:X  a  U$ U S::  a*  US::  a$  U[         R                  :X  a  [         R                  $ U S::  a*  US::  a$  U[         R                  :X  a  [         R                  $ U$ )Nr2     i      )rT   r  
OUTER_TINY)r  r  r  s      r   _multilayer_second_step_hint&Reduction._multilayer_second_step_hint  sg     B;!!C<J#-.MDWDW2W +++TMc!-"5"55 +++r   c                   Uc  g[         R                  R                  R                  UR	                  5       U5      (       d  gUR                  5          [        U5        UR                  5       n[        USS 5       H8  u  pE[         R                  R                  R                  US5      (       d  M6  Us  $    g! [         a     gf = f)z
If we are reducing over the full tensor, and it is non-dense in the last dimension,
reindex so we reduce over the dense dimension. initially just handle complete
reduction case
Nr2  r@   )
rm   r  r  r9  r  r  rK  r  r/  r   )r	  r  r  rQ  r   r   s         r   $check_for_split_dense_dim_reindexing.Reduction.check_for_split_dense_dim_reindexing  s     ww77  "O
 
 	!*- '')gcrl+DAww771== ,  # 		s   B: :
CCc                  ^^^^^
^ U R                  TU5      n[        R                  UT/U5      m[        R                  R
                  R                  [        R                  " TU-  S5      5      (       + m
      SUUUU
UU4S jjn	U	$ )Nr   c                "  >^^ Uu  nU Gt mnTU-  U-   mSUU
UU4S jjnT(       ac  [        T5      n[        R                  " [        R                  " TU5      [        R                  " TU5      5      n[        R                  " XST	5      $ U" 5       $ )Nc                 $   > T" TT" T /5      5      $ r   r   )r  rZ  	new_indexr   s   r   bodyCReduction._multilayer_wrap_loader.<locals>.wrapper_fn.<locals>.body  s    i');<<r   )r   rl   )r^   rk   r  r  masked)r   r3  reduction_blockrY  index_dtyper  r  rX  
block_sizedefaultrZ  	need_maskr  r   s         @@r   
wrapper_fn5Reduction._multilayer_wrap_loader.<locals>.wrapper_fn  s     "1_*/'Y ?2_DG= = -o>vvNN7K8NN?K@ zz$g66vr   )r   Sequence[Symbol]r3  rc  r   rl   )	rS  Viewdynamic_reshape_indexerrm   r  r  r  r   r  )r	  rZ  r  r  r  r^  r_  r  dense_indexra  r`  r   s    ` ` ``   @@r   _multilayer_wrap_loader!Reduction._multilayer_wrap_loader  s     >>Z
 ../
 ((>>HH_u,a0
 
		#	6F		 	( r   c                   ^^^ [        S T 5       5      (       d   ST< 35       e[        R                  U[        U5      [        U5      -   5      m      SUUU4S jjnU$ )Nc              3  *   #    U  H	  oS :H  v   M     g7fr  r   r   r  s     r   r   DReduction._multilayer_wrap_loader_existing_ranges.<locals>.<genexpr>  s     3?a6?r  z8Only enabled for numel_hint == 1, found original_ranges=c           	        > U S [        T5       nU [        T5      S  nT" UT" [        U5      [        U5      -   5      5      $ r   )r   r   )merged_indexnew_reduction_indexoriginal_idxrX  rZ  original_rangesr   s       r   ra  EReduction._multilayer_wrap_loader_existing_ranges.<locals>.wrapper_fn	  sQ     ((>#o*>?L$S%9%;<Ii(51D+EEF r   )rn  r  ro  r  r   rl   )r   rd  re  r   )r	  rZ  rq  original_reduction_rangesr  r  ra  r   s    ``    @r   '_multilayer_wrap_loader_existing_ranges1Reduction._multilayer_wrap_loader_existing_ranges  s     3?333 	
G6HI	
3 ..%uZ'85AU;V'V
		(		!/		 		 		 r   c                  ^ U[         R                  [         R                  4;  a  UO[         R                  n[        R                  UUUUUUU	U5      nUR                  5         UR                  5       m      SU4S jjn[        R                  R                  R                  [        U5      5      nU R                  XU5      nXWS[        U5       :X  d   e[        R                  [	        UUUUU[        U5      S U	UUS95      $ )I
Break a large reduction up into multiple smaller reductions
recursively
c                   > T" / U QUQ5      $ r   r   )r   r3  intermediate_loaders     r   intermediate_fn;Reduction.create_multilayer_helper.<locals>.intermediate_fn>  s     ''A'A'ABBr   Nr  )r   r  r3  r  r   rl   )r  float16bfloat16rG  r  r  r  r  rm   r  r  r	  rh   rP  r   r   )r	  r  r  r  ra  rq  rs  r  r  r  r  r  intermediate_dtypeintermediaterz  r  ry  s                   @r   create_multilayer_helper"Reduction.create_multilayer_helper  s$   0  ??  	
 !'' 	
 	*668	C%	C8J	C	C
 WW%%//o0NO
99~
 -Cs?/C"DDDD(&!+C,@,B!C-#-	
 	
r   c                    [        U5      n[        XS-
  -   U5      nU R                  Xr5      nU R                  UUUUUUU
5      nU R	                  UUUUUU/ UQUPU/UUU	5      $ )rw  r@   )rh   r=   rJ  rg  r  )r	  r  r  r  r  r  r  r  r  r  r  r  r^  r_  ra  s                  r   r<  Reduction.create_multilayerV  s    & ((89o;UC
##N>00

 ++feL
 	
r   c                j    U R                  UUUUU5      nU R                  UUUUUU/ UQUQUU	SU
5      $ )rw  r2  )rt  r  )r	  r  r  r  r  rq  rs  r  r  r  r  ra  s               r   r;  +Reduction.create_multilayer_existing_ranges  sc    $ @@% 

 ++%+o+
+ 
 	
r   r   r  r  r<  r  r  )
rW  r  rX  re  rY  r  r  rc  r   r   r  r   zSequence[Sequence[Expr]]r  r   )r  r  r  r|  r  r|  r  zCallable[_P, OpsValue]r  r  r  r  r  z%Union[ReductionType, Literal['scan']]r  r!   r  Optional[IRNode]r   tuple[ReductionHint, _IntLike])
r  z<Callable[[Sequence[_IntLike], Sequence[_IntLike]], OpsValue]r  r  r  r   r  r|  r   z(Callable[[Sequence[_IntLike]], OpsValue])r  r  r  r|  r  r|  r  r  r  r  r  r  r  rP   r  rT   r  r  r   r:  r  r   r  r|  r   #Union[_NumLike, Sequence[_NumLike]])r  ry   r  r   r  rT   r   rT   )r  ry   r  r  r   r  )rZ  Callable[..., OpsValue]r  r  r  ry   r  ry   r^  ry   r_  r  r  r  r   Callable[..., object])rZ  4Callable[[Sequence[Expr], Sequence[Expr]], OpsValue]rq  r  rs  r  r  Sequence[Integer]r  r  r   z@Callable[[Sequence[sympy.Expr], Sequence[sympy.Expr]], OpsValue])r  r  r  r|  r  r|  ra  r  rq  r  rs  r  r  
list[Expr]r  list[Integer]r  rP   r  ry   r  rT   r   r:  )r  r  r  r|  r  r|  r  r  r  r  r  r  r  rP   r  ry   r  rT   r  r  r   r:  )r  r  r  r|  r  r|  r  r  rq  r  rs  r  r  r  r  r  r  rP   r  rT   r   r:  )"r   r   r   r   r   r  r>  rX   rW  r]  rZ  r  r  r  r  rf  r  r  r#  r?  rT   r  r  rF  rJ  rP  rS  rg  rt  r  r<  r;  r   rA  rB  s   @r   r  r    s   ((!!!!N HK(
 
 )

%#L"L 3L 	L
 )L 
L=


  (,``` ` )	`
 #` -` >` ` %` 
(` `D )N),) ) 	)
 
2) )V  )6(=(='+_
_
 _
 	_

 %_
 _
 )_
 &_
 &_
 %_
 
1_
 _
B $/	, < DD$/D	,D D %(:G	   &4D	 >  (,('( -( "	(
 ( ( 5( %( 
( (T D ( $2	
 & 0 
J 8 =
=
 =
 	=

 '=
 (=
 $2=
 =
 ,=
 &=
 =
 &=
 
1=
 =
~  (,+
+
 +
 	+

 %+
 +
 )+
 &+
 +
 &+
 %+
 
1+
 +
Z $
$
 $
 	$

 %$
 ($
 $2$
 "$
 ,$
 &$
 &$
 
1$
 $
r   r  c                    ^ ^^ SUU U4S jjnU$ )1A closure containing math to read a given elementc                   > Tb  [        U 5      [        T5      :X  d   e[        U 5      [        T5      :X  d   eTn[        U TT5       H  u  p#nUS:w  d  M  XU-  -   nM     U$ r  )r   r   )r   resultr   stszrM  r  r  s        r   rX  _fixed_indexer.<locals>.indexer  sj    !c%jCK&???5zSY&&&ufd3KCRQw(* 4 r   )r   r  r   r   r   )r  r  rM  rX  s   ``` r   r"  r"    s      Nr   INNER_FN_TYc                  z   ^  \ rS rSr% S\S'                     SU 4S jjr          SS jrSrU =r$ )	MultiOutputReductioni  r   output_indexc
                   >^ [        T5      (       a  T4m[        T5      S:X  a  TS   n
O      SU4S jjn
[        TU ]  UUU
UUUUUS9  Xl        g )Nr@   r   c                4   >^ ^ [        U U4S jT 5       5      $ )Nc              3  4   >#    U  H  o" TT5      v   M     g 7fr   r   )r   r   r   reduction_idxs     r   r   @MultiOutputReduction.__init__.<locals>.loader.<locals>.<genexpr>  s     HiR]33i   )r   )r   r  	inner_fnss   ``r   rZ  -MultiOutputReduction.__init__.<locals>.loader  s     HiHHHr   r  )r   r  r  r  r   ztuple[OpsValue, ...])callabler   r  __init__r  )r  r  r  r  r  r  r  r  r  r  rZ  r  s      `       r   r  MultiOutputReduction.__init__  s     I"I y>Qq\FI#I4BI%I
 	-)) 	 		
 )r   c           	     N   [         R                  " U R                  U R                  U R                  U R                  X45      5      n[        U[        [        45      (       d   [        U5      5       eXPR                     n[         R                  " U=(       d    SU" U5      U5      $ rS  )rk   r  r  r  r  r  r   r   r   r   r  r  )r  rW  rX  rY  r  r   r  s          r   r  $MultiOutputReduction.store_reduction  s     JJNNMM$/	
 &5$-00>$v,>0(()"";#;)WT]ERRr   )r  )r  r  r  r|  r  z)Union[INNER_FN_TY, Sequence[INNER_FN_TY]]r  r  r  r  r  rP   r  r|  r  rT   r  r   )
rW  r  rX  re  rY  r  r  rc  r   r   )	r   r   r   r   r   r  r  r   rA  rB  s   @r   r  r    s    #)#) #) =	#)
 "#) ,#) &#) #) &#) #)JS"S 3S 	S
 )S 
S Sr   r  c                  j    \ rS rSr\\R                  S4                   SS jj5       rSrg)OnlineSoftmaxReductioni  Nc
           	        ^^^^^^^ [        UUUUUUU4S j[        U5       5       5      n
U
 H  nUR                  5         M     U
$ )z.
Create the reduction disregarding splitting.
c              3  p   >#    U  H+  n[         R                  [        TTTTTS TTU5	      5      v   M-     g7f)rB  N)r   r  r  )	r   
output_idxr  r  r  r  r  r  r  s	     r   r   0OnlineSoftmaxReduction.create.<locals>.<genexpr>  sO      
 0
 $$+"
  0s   36)r   r   r  )r	  r  r  r  r  r  r  
num_outputr  r  resultsr  s    `````` `   r   r  OnlineSoftmaxReduction.create  sB       
 
 $J/
 
  AIIK r   r   )r  r  r  r|  r  r|  r  r  r  r  r  r  r  r   r  rT   r  r  r   1Sequence[Union[TensorBox, ShapeAsConstantBuffer]])	r   r   r   r   r?  rT   r  r  r   r   r   r   r  r    s     )6(=(='+!! ! 	!
 %! ! )! ! &! %! 
;! !r   r  c                      \ rS rSr\\R                  4               SS jj5       r\      SS j5       r	\                  S	S j5       r
Srg)
WelfordReductioni"  c                  ^^^^^ US;   d   e[         R                  R                  R                  [	        T5      5      nS
UUU4S jjn	US:X  a  U	" S5      n
U	" S5      nU	" S5      nXU4$ US:X  aD      SUUUU4S jjmUS:X  a  T" US   5      U	" S5      U	" S5      4$ [        U4S jU 5       5      $ [        R                  TTTUS   TTUUS9u  pU[        R                  :X  a  UnUS:  a  U R                  TTUTTUUU5      $ [        S	5       Vs/ s H)  n[        R                  [        TTUTTUTUU5	      5      PM+     nnU H  nUR                  5         M     U$ s  snf )N)rA  r  c                V   >^  SUU 4S jjn[         R                  TTU[        T5      S9$ )Nc                2   > [         R                  " TT5      $ r   r+  )r   r  r'  s    r   r  8WelfordReduction.create.<locals>.const.<locals>.inner_fn3  s    || r   ra  r   r  r   rl   rJ  r  r   )r'  r  r  r  r  s   ` r   const&WelfordReduction.create.<locals>.const2  s7      ##!F|	 $  r   r   r@   c                V   >^  SU U4S jjn[         R                  TTU[        T5      S9$ )Nc                r   > T Vs/ s H  n[         R                  R                  PM     nnT" X5      $ s  snf r   r2  )r   r   r3  rZ  r  s      r   r  7WelfordReduction.create.<locals>.copy.<locals>.inner_fnK  s2    =M&N=Muww||=MO&N!#77 'Or4  ra  r  r  )rZ  r  r  r  r  r  s   ` r   copy%WelfordReduction.create.<locals>.copyH  s7    8 8 !''!%<	 (  r   rA  c              3  4   >#    U  H  nT" U5      v   M     g 7fr   r   )r   r   r  s     r   r   *WelfordReduction.create.<locals>.<genexpr>Y  s     :	"T"XX	r  )r  r  r   )r'  r   r   r:  )rZ  r  r   r:  )rm   r  r  r9  rh   r   r  r  rT   r  r<  r   r   r  r  r  )r	  r  r  r  r  r  r  r  r  r  meanm2weightr=  r  r  r  r  r  s    `` ``            @r   r  WelfordReduction.create#  s    !FFFF''**33MBR4ST	 	 a8DqB1XFV##aL8  !11IaL)58U1X==:	:::&  **aL)+ + 	
 ]222!N19(( 	 	2 $Ah
 '
  $""
 ' 	 
  AIIK %
s   0E#c                    g)N)r   r   r   r   rI  s     r   rJ  WelfordReduction.default_value  s     r   c	                  ^ ^^^^^^ [        T5      m[        R                  R                  R	                  [
        R                  " TT-  S5      5      (       + n	U	(       aB  US:w  a<          S
U4S jjn
T R                  UTUS   [        U
SS9[        U
SS94UTSTUS9$ [        TTS-
  -   T5      m[        R                  UT[        UU UUU4S jU 5       5      / UQTPT/UU5      nU H  nUR                  5         M             SS jm[        R                  R                  R                  [        U5      5      nT R                  TX5      n[        R                  UT[        U4S	 jU 5       5      UT/SU5      $ )rw  r   r  c                2   > [         R                  " UT5      $ r   r+  )r   r  r  r  s      r   rF  4WelfordReduction.create_multilayer.<locals>.constant  s     ||E511r   r  r@   )r  r  r  r  r  r  r  r  c           
   3  L   >#    U  H  nTR                  UTTTTS S9v   M     g7f)r   )r_  N)rg  )r   rZ  r^  r	  r  r  r  s     r   r   5WelfordReduction.create_multilayer.<locals>.<genexpr>  s>      
 (F ++$# ,  (s   !$c                    U" / U QUQ5      $ r   r   )r   r3  rZ  s      r   intermediate_loader_fnBWelfordReduction.create_multilayer.<locals>.intermediate_loader_fn  s    
 4E4O455r   c              3  T   >#    U  H  n[        TUR                  5       S 9v   M     g7f))rZ  N)r   r  )r   r   r  s     r   r   r    s&      &A .q}}G&r  )r   r  r  r  r  r   r   rl   )r   r  r3  r  rZ  r  r   rl   )rh   rm   r  r  r  r   r  r<  r   r=   r  r  r   r  r	  rP  )r	  r  r  r  r  r  r  r  r  r`  rF  intermediatesr   r  r^  r  r  s   ` `  ` `      @@@r   r<  "WelfordReduction.create_multilayer  s     ((89((>>HH_u,a0
 
	 +<<2#24B2KN22
 ((aLHA.HA.
 !10- )   o;UC
(// 
 
 (
 
 feL#
& AIIK 	6!	6+	6 9	6 		6 WW%%//f0EF
99:
  && &  G
 	
r   r   N)r  r  r  r|  r  Sequence[Callable[..., Any]]r  r  r  r  r  rP   r  rT   r   r  r  )r  r  r  r|  r  r  r  r  r  r  r  rP   r  ry   r  rT   r   r  )r   r   r   r   r?  rT   r  r  r  rJ  r<  r   r   r   r   r  r  "  s    )6(=(=vv v 0	v
 v (v &v &v 
;v vp $/	, 
 Z
Z
 Z
 0	Z

 Z
 (Z
 &Z
 Z
 &Z
 
;Z
 Z
r   r  c                    ^  \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   \" S 5      S S!U 4S jjj5       rS"U 4S jjr          S#S jrS$S jr	S%S jr
S%S jrS%S jrS&S jrS'S jrS S!S jjr\\R$                  4SS.                   S(S jjj5       r\                  S)S j5       rSrU =r$ )*Scani 	  r  scan_rangesr  =Callable[[tuple[Any, ...], tuple[Any, ...]], tuple[Any, ...]]r  zFCallable[[Sequence[_IntLike], Sequence[_IntLike]], Sequence[_IntLike]]r   rT   r  r   r  tuple[torch.dtype, ...]dtypestuple[Callable[..., Any], ...]r  c                   >^ [         TU ]  T5      [        5       R                  " U4S jU R                   5       6 -  [        5       R                  " U4S jU R
                   5       6 -  $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r(   r  s     r   r   ,Scan.get_free_symbol_uses.<locals>.<genexpr>	       O>N"1m44>Nr  c              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r(   r  s     r   r   r  	       Hi"1m44ir  )r  rW  r;   r  r  r  r  s    `r   rW  Scan.get_free_symbol_uses	  s]     G(7l  Od>N>NO l  HdiiH		
r   c                   > [        U R                  5      [        U R                  5      -   [        U R                  5      :X  d   e[        TU ]  5         g r   )r   r  r  r  r  r  r  s    r   r  Scan.__post_init__	  =    4;;#d&6&6"773tyy>IIIr   c                "  ^ U R                  X45      m[        U4S jU R                   5       5      n[        R                  " U R
                  U R                  U5      n[        R                  " U=(       d    SU" T5      X`R                     5      $ )Nc              3  2   >#    U  H  o" T5      v   M     g 7fr   r   r   r  r   s     r   r   'Scan.store_reduction.<locals>.<genexpr>)	       D^x}}^   rT  )	r   r   r  rk   r  r  r  rU  r  )r  rW  rX  rY  	scan_varsr   r  r   s          @r   r  Scan.store_reduction!	  sk     ll4+DT^^DD$++t?yy$9gclF;L;L4M
 	
r   c                    g)Ncustomr   r  s    r   rZ  Scan.get_reduction_type/	  s    r   c                    U R                   $ r   )r  r  s    r   r]  Scan.get_reduction_size3	  r  r   c                    U R                   $ r   r  r  s    r   r
  Scan.get_size6	      yyr   c                    U R                   $ r   r  r  s    r   r  Scan.get_pointwise_size9	  r  r   c                X    [        U R                  5      [        U R                  5      -   $ r   )r   r  r  r  s    r   r  Scan.index_length<	  !    4;;#d&6&6"777r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X5      nU4$ r   )r  r  r  r?   r  r   r  r   r  r   s       r   r  Scan.inner_fn_args?	  C    DKK(T--t}}=ll5)vr   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X#5      n[        U R                  XAS9$ r*  )r  r  r  r?   r  r   rI   r  r  rd  r   r  r   s        r   r  Scan.inner_fn_free_symbolsE	  M    DKK(T--t}}=ll5)#DMM3TTr   T)can_fallback_to_atenc                 ^^^ / US T QUTS-   S  QmUT   /m[         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [        U5      S:  aB  [         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [         R                  R                  n
U
R                  [        T5      5      n[        U5      [        U5      :X  d   eU
R                  [        R                  " US5      5      (       a=  [        [        U5      5       Vs/ s H  n[        R                  UX,   X<   US9PM     sn$ U R!                  UUS   US   TTTUUS9u  p}["        nUS:  at  [$        R&                  R(                  S L =(       d    [*        =(       a	    [,        S:  =(       a    [        U5      S:H  nU(       d  U(       a  S /[        U5      -  $ SnO[.        nSUUU4S jjn[        [        U5      5       Vs/ s H/  n[0        R                  U" S	UX,   UX<   UUTTUUUUS.U	D65      PM1     nnU H  nUR3                  5         M     U$ s  snf s  snf )
Nr@   ra  r   )r  r  r  axispointwise_rangesr  r  
scan_numelz3.3.0c                   > [        U5      [        T5      :X  d   e[        U 5      [        T5      :X  d   e/ U S T QUQU TS  Q$ r   r   )r   
scan_indexr  r  r  s     r   r   Scan.create.<locals>.reindex	  S    z?c+&6666u:%5!6666>U5D\>J>tu>>r   )r  r  r  r  r  r  r  r  r  r   r  r  )r   r  r  r  r   r  r   )rm   r  r  rC   SCANr   TUPLE_REDUCTIONr  r9  rh   r  r   Ler   rJ  r  r  r  r  versionhip
has_tritontriton_version	SplitScanr   r  )r	  r  r  r  r  r  r  r  r  r   r  r	  r  r  	scan_typesupports_splitr   r  r  r  r  s        `             @@r   r  Scan.createK	  s    =T%4[<4q
+;<Dzlww""6>+>+>??6CK''v;?177#6#6N22$
 $
 6CK''77##&&}['AB
6{c)n,,, ))%((:q*ABB %*#f+$6 %7L   ! .&4	 !  %7  &)^^)q\-#!! &4 	&
" 	>!!T)Wj.V^w=V%v;!#  "' 6CK//!"J%		? 	?. !&c&k 2%
$ !3#  ! .!&4'+ +)##1!- " !3% 	 
* FNN  N
s   ;$I3 6I8c	                L   ^^ SUU4S jjn	[         R                  UUUU	UUSUS9$ )Nc                ,   > T" / U S T QUQU TS  Q5      $ r   r   )r   r  r  r  s     r   ra  #Scan.num_splits.<locals>.wrapper_fn	  s*    Fc%4jF=F3tu:FGGr   r  )r  r  r  r  r  r  r  r  )r   r  r  r  r   rl   )r  r  )
r	  r  r  r  r  r  r  r  r	  ra  s
      ``     r   r  Scan.num_splits	  sA    	H 	H ###(!& $ 	
 		
r   r   r  r<  rt  )
rW  r  rX  z%Callable[[Sequence[_IntLike]], Never]rY  r  r  rc  r   r   r  r  r  r;  )r  r  r  r  r  z+tuple[Callable[[Sequence[Expr]], Any], ...]r  r  r  r   r  r  r  rT   r  r   r   r   r   ;Sequence[Optional[Union[TensorBox, ShapeAsConstantBuffer]]])r  r  r  r|  r  r  r  r   r  r  r  r  r  r  r	  r!   r   r  )r   r   r   r   r   rX   rW  r  r  rZ  r]  r
  r  r  r  r  r?  rT   r  r  r  r   rA  rB  s   @r   r  r   	  s   
MMSS!!##-- F#
 
 $
 
"
 7
 	

 $
 

 8U  )6(=(=_ &*__ (_ ?	_
 _ _ R_ &_ #_ _ 
E_ _B 

 
 7	

 
 (
 #
 R
 
 
(
 
r   r  c                      \ rS rSrSrg)r  i	  r   N)r   r   r   r   r   r   r   r   r  r  	  s    r   r  c                  n  ^  \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   S\S'   \" S 5      SSU 4S jjj5       rS U 4S jjr          S!S jrS"S jr	S#S jr
S#S jrS#S jrS$S jrS%S jrSSS jjr\\R$                  4                   S&S jj5       rSrU =r$ )'Sorti	  r  sort_rangesr  z:Callable[[Sequence[Expr], Sequence[Expr]], Sequence[Expr]]r   rT   r  r   r  r  r  r  r  r   stable
descendingc                   >^ [         TU ]  T5      [        5       R                  " U4S jU R                   5       6 -  [        5       R                  " U4S jU R
                   5       6 -  $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r(   r  s     r   r   ,Sort.get_free_symbol_uses.<locals>.<genexpr>	  r  r  c              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r(   r  s     r   r   r'  	  r  r  )r  rW  r;   r  r"  r  r  s    `r   rW  Sort.get_free_symbol_uses	  s]     G(7l  Od>N>NO l  HdiiH		
r   c                   > [        U R                  5      [        U R                  5      -   [        U R                  5      :X  d   e[        TU ]  5         g r   )r   r  r"  r  r  r  r  s    r   r  Sort.__post_init__	  r  r   c                6  ^ U R                  X45      m[        U4S jU R                   5       5      n[        R                  " U R
                  XPR                  U R                  5      n[        R                  " U=(       d    SU" T5      X`R                     5      $ )Nc              3  2   >#    U  H  o" T5      v   M     g 7fr   r   r  s     r   r   'Sort.store_reduction.<locals>.<genexpr>	  r  r  rT  )
r   r   r  rk   sortr  r#  r$  rU  r  )r  rW  rX  rY  r  r   r  r   s          @r   r  Sort.store_reduction	  so     ll40DT^^DD$++v{{DOOLyy$9gclF;L;L4M
 	
r   c                    g)Nr/  r   r  s    r   rZ  Sort.get_reduction_type	  s    r   c                    U R                   $ r   )r"  r  s    r   r]  Sort.get_reduction_size
  r  r   c                    U R                   $ r   r  r  s    r   r
  Sort.get_size
  r  r   c                    U R                   $ r   r  r  s    r   r  Sort.get_pointwise_size
  r  r   c                X    [        U R                  5      [        U R                  5      -   $ r   )r   r  r"  r  s    r   r  Sort.index_length

  r  r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X5      nU4$ r   )r  r  r"  r?   r  r   r  s       r   r  Sort.inner_fn_args
  r   r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X#5      n[        U R                  XAS9$ r*  )r  r  r"  r?   r  r   rI   r  r  s        r   r  Sort.inner_fn_free_symbols
  r  r   c	                  ^^^ / US T QUTS-   S  QmUT   /m[         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [         R                  R                  n
U
R                  [        T5      5      nSn[        R                  R                  =(       a%    U
R                  [        R                  " X5      5      nU(       d  S /[        U5      -  $ [        U5      [        U5      :X  d   eU
R                  [        R                  " US5      5      (       a=  [        [        U5      5       Vs/ s H  n[         R#                  UX.   X>   US9PM     sn$ SUUU4S jjn[        [        U5      5       Vs/ s H3  n[$        R#                  ['        SUX.   UX>   UUTTUUUUUS.U	D65      PM5     nnU H  nUR)                  5         M     U$ s  snf s  snf )Nr@   rM  ra  c                   > [        U5      [        T5      :X  d   e[        U 5      [        T5      :X  d   e/ U S T QUQU TS  Q$ r   r  )r   
sort_indexr  r  r"  s     r   r   Sort.create.<locals>.reindexH
  r  r   )r  r  r  r  r  r  r  r"  r   r  r  r#  r$  )r   r  rA  r  r   r  r   )rm   r  r  rC   SORTr   r  r9  rh   rA   r$  persistent_reductionsr  r   r  r   rJ  r  r   r!  r  )r	  r  r  r  r  r  r#  r$  r  r   r  
sort_numel
max_rblockis_persistent_kernelr  r   r  r  r  r"  s        `            @@r   r  Sort.create
  s    =T%4[<4q
+;<Dzlww""6>+>+>??6CK''77##&&}['AB
 
MM// Q..uxx
/OP 	 $6CK''6{c)n,,, ))%((:q*ABB %*#f+$6 %7L   ! .&4	 !  %7 	? 	?0 !&c&k 2'
& !3%  ! .!&4'+ +##1!-!) $ !3' 	 
, FNN  Q
s   $G&:G+r   r  r<  rt  )
rW  r  rX  r  rY  r  r  r  r   r   r  r  r  r  )r  r  r  r  r  z'tuple[Callable[[list[Expr]], Any], ...]r  r  r  r   r#  r   r$  r   r  rT   r   r   r   r  )r   r   r   r   r   rX   rW  r  r  rZ  r]  r
  r  r  r  r  r?  rT   r  r  r   rA  rB  s   @r   r!  r!  	  sD    
GG!!##--L F#	
 	
 $	
 
"
 2
 	

 '
 

 8U  )6(=(=LL (L ;	L
 L L L L &L L 
EL Lr   r!  c                :     [        U SS9  g! [         a     gf = f)NFfreezeT)rK  r  r   s    r   r  r  i
  s&    a. s   
 
c                     [        U SS9u  pUR                  5       (       a  UR                  5         UR                  5       $ ! [         a     gf = fNFrJ  )rK  should_pad_stridespad_stridesis_contiguousr  )r   _bufferrJ  s      r    is_contiguous_storage_and_layoutrR  q
  sS    /%@ $$&& ##%% s   A A 
AAc           	        [        U [        5      (       a  [        U R                  UUUUUS9$ [        U [        5      (       a5  [        U R                  UUUUUS9u  pgX R                  R                  5       4$ [        U [        5      (       a  U(       as  U(       a6  U R                  5         U R                  5       R                  5       (       d   eO6Ub  U R                  X4S9  O#Ub  U R                  XTS9  OU R                  5         [	        U 5      U R                  5       4$ [        U [        5      (       a#  [        U R                  US9u  pXR                  4$ [        e)z
Try to simplify x into a StorageBox and a Layout.

allow_padding only affect how we apply stride_order. When allow_padding
is True, we have the freedom to add padding when applying the stride_order.
rK  want_contiguousstride_orderr6  rD  r6  rJ  )r   r   rK  rI  
StorageBoxr  Bufferr2  rP  r7  rE  r  rO  rJ  r  )	r   rK  rU  rV  r6  rD  r   rJ  buffers	            r   rK  rK  }
  sJ    !Y$FF+%''
 	
 !Z  )FF+%''
	 &&##%%%!V!||~335555)11  2  *22! 3  !!}alln,,!_%% *FF
	 xx
r   c                ^     [        U SS9u  p#UR                  U5      $ ! [         a     gf = frM  )rK  is_stride_orderedr  )r   rV  rQ  rJ  s       r   "is_stride_order_storage_and_layoutr]  
  s8    /%@''55 s    
,,c                   [        U [        [        45      (       a  [        U R                  5      $ [        U [
        5      (       a}  U R                  n[        R                  R                  R                  UR                  [        UR                  5      -  [        5      (       + n[        U R                  5      =(       d    U$ [        U [        5      (       a+  U R!                  5       [        R                  R"                  ;   $ gr  )r   r   rX  is_unalignedrI  rO  rJ  rm   r  r  statically_known_multiple_ofrM  r_   r  ra   rY  r  unaligned_buffers)r   rJ  has_unaligned_layouts      r   r_  r_  
  s    $J/00DII&&$((#$77#3#3#P#PMMN6<<88/$
  
 DII&>*>>$}}!''";";;; r   c                     \ rS rSr% S\S'   \" S 5      SSS jj5       rSS jrSS jrSS jr	\
S S j5       rS!S	 jrS"S
 jrS#S jrS$S jrS%S jrS&S jrS'S jrS(S jrS)S jrS*S jrS'S jrS'S jrS+S jrS,S jrS-S jrS.S jrSrg)/BaseViewi
  r   rI  c                8    U R                   R                  U5      $ r   rI  rW  rV  s     r   rW  BaseView.get_free_symbol_uses
  s    yy--m<<r   c                    [        SU  35      e)Nzmake_reindexer NYI on r  r  s    r   make_reindexerBaseView.make_reindexer
  s    !$:4&"ABBr   c                p   ^^ U R                   R                  5       mU R                  5       mSUU4S jjnU$ )Nc                    > T" T" U 5      5      $ r   r   r   innerr   s    r   rX  &BaseView.make_indexer.<locals>.indexer
      &&r   )r   r  r   r!   )rI  r  ri  )r  rX  rn  r   s     @@r   r  BaseView.make_indexer
  s4    		&&(%%'	' 	' r   c                p   ^^ U R                   R                  5       mU R                  5       mSUU4S jjnU$ )Nc                    > T" T" U 5      5      $ r   r   rm  s    r   rZ  $BaseView.make_loader.<locals>.loader
  rp  r   r  )rI  r  ri  )r  rZ  rn  r   s     @@r   r  BaseView.make_loader
  s4    		%%'%%'	' 	' r   c                6    U R                   R                  5       $ r   )rI  r  r  s    r   r  BaseView.dtype
  s    yy""$$r   c                6    U R                   R                  5       $ r   rI  r  r  s    r   r  BaseView.get_layout
      yy##%%r   c                6    U R                   R                  5       $ r   rI  r  r  s    r   r  BaseView.get_device
  r{  r   c                    g r   r   r  s    r   r  BaseView.get_origin_node
  r  r   c                6    U R                   R                  5       $ r   rI  r  r  s    r   r  BaseView.get_name
      yy!!##r   c                "    U R                  5       $ r   r  r  s    r   r  BaseView.get_pointwise_size      }}r   c                8    U R                   R                  U5      $ r   rI  r(  r&  s     r   r(  BaseView.mark_reuse      yy##E**r   c                6    U R                   R                  5       $ r   rI  r  r  s    r   r  BaseView.has_exceeded_max_reads      yy//11r   c                6    U R                   R                  5       $ r   rI  r  r  s    r   r  BaseView.realize      yy  ""r   c                8    U R                   R                  5         g r   rI  r,  r  s    r   r,  BaseView.realize_hint  s    		 r   c                6    U R                   R                  5       $ r   rI  rS  r  s    r   rS  BaseView.get_storage_numel      yy**,,r   c                6    U R                   R                  5       $ r   rI  r`  r  s    r   r`  BaseView.is_extern      yy""$$r   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  5       $ r   )r   rI  rd  r   is_module_bufferr  s    r   r  BaseView.is_module_buffer  s9    $))X..?TYY?.yy))++r   c                6    U R                   R                  5       $ r   rI  r  r  s    r   r  BaseView.get_read_names      yy''))r   c                    [         R                  " [        SS5         [        U R	                  5       U R                  5       5      R                  sS S S 5        $ ! , (       d  f       g = fr  )r    r   r  rK   r  r
  rL  r  s    r   r  BaseView.get_reads  sD    \\.*:DA&  " e	 BAAs   2A
A'c                z    U n[        U[        5      (       a#  UR                  n[        U[        5      (       a  M#  U$ r   )r   rd  rI  )r  r   s     r   r/  BaseView.unwrap_view%  s1    H%%A H%%r   c                    U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R                  5       UU R                  5       S9$ r^  )r  r    r   rb  rJ  r  r
  rc  s      r   rf  BaseView.constant_to_device+  sN    !!#n.?HP.."==?	
 	
r   r   Nr  r<  r   *Callable[[Sequence[Expr]], Sequence[Expr]]r  r  r{  r}  r  rw  r  r  r  r  r  rt  r  ru  r  r  r  )r   r   r   r   r   rX   rW  ri  r  r  r  r  r  r  r  r  r  r(  r  r  r,  rS  r`  r  r  r  r/  rf  r   r   r   r   rd  rd  
  s    
LJ'= (=C % %&&$+2#!-%,*	
r   rd  c                  `    \ rS rSr% S\S'   \S
S j5       r\SS j5       rSS jr	  SS jr
Srg	)r   i7  r  r  c                H   [         R                  R                  nU Vs/ s H  n[        R                  " U5      PM     nnU R                  5       nS/[        U5      [        U5      -
  -  [        U5      -   n[        U5      [        U5      :X  d   e[        [        U5      5       Hs  nX   S:X  a  XE   c   eXE   X'   M  XE   b0  [         R                  R                  R                  XE   5      (       a  MQ  UR                  X   XE   -
  SS9S:X  a  Mn   S5       e   U$ s  snf )zReplace `-1` with correct sizesNr2  r   fallbackzKBroadcast failed in ExpandView({x.get_size()}, {new_size}) on dimension {i})rm   r  r  r   r  r
  r   r   r   is_size_one_or_falser	  )r   new_sizer  r   old_sizer   s         r   _normalize_sizeExpandView._normalize_size;  s    77##-56XELLOX6::<6S]S]:;d8nL8}H---s8}%A{b {...&k$(8(8(M(M) )   ))(+*Ca)PTUU aU &" + 7s    Dc                   U R                  X5      n[        U5      (       Ga   [        U5      u  p4[        U5      [        UR                  5      -
  nUS:  d   e[
        R                  R                  /U-  n[        UR                  UR                  5       H^  u  pxUR                  [        R                  R                  R                  U5      (       d  UO[
        R                  R                  5        M`     [        UR                   UR"                  [%        U5      UUR&                  UR(                  5      n	[+        X9S9$ [-        XS9$ )Nr   rH  )rI  r  )r  r  rK  r   r  r   r  r  r   r  r  rm   r  r  r  rL  r  r  r   rM  rN  rO  r   )
r	  r   r  rR  rS  skiprT  r  r  rU  s
             r   r  ExpandView.createV  s
   &&q3 ##"7":Gx=3z#77D199'',,$.J #J$5$5z G!!77++@@FF  !H %!!  X!!$$J #CCq00r   c                    U R                   $ r   r  r  s    r   r
  ExpandView.get_sizeq  r  r   c                   ^^ U R                  5       nU R                  R                  5       m[        U5      [        T5      -
  m    SUU4S jjnU$ )Nc                   > [        U TS  5      n [        U 5      [        T5      :X  d   e[        [        T5      5       H*  nTU   S:X  d  M  [        R                  R
                  X'   M,     U $ r  )r   r   r   r   r  r  )r   r   actualr  s     r   r   *ExpandView.make_reindexer.<locals>.reindex{  s_     tu&Eu:V,,,3v;'!9>$ww||EH ( Lr   r   r  r   r  )r
  rI  r   )r  targetr   r  r  s      @@r   ri  ExpandView.make_reindexert  sU     ##%6{S[(		!				 		 r   r   N)r   r   r  r  r   r  )r   r   r  r  r   rd  r  r  )r   r   r   r   r   r  r  r?  r  r
  ri  r   r   r   r   r   r   7  s@    
 4 1 14	3r   r   c                  `    \ rS rSr% S\S'   \S
S j5       r\SS j5       rSS jr  SS jr	Sr
g	)PermuteViewi  r  dimsc           
        U R                  U5      n[        U5      [        [        [        U5      5      5      :X  d   e[	        U5      (       a  [        U5      u  p4[        UR                  UR                  U Vs/ s H  oTR                  U   PM     snU Vs/ s H  oTR                  U   PM     snUR                  UR                  5      n[        X6S9$ [        XS9$ s  snf s  snf )NrH  )rI  r  )_map_neg_dimsr;   r   r   r  rK  rL  r  r  r  r  rM  rN  rO  r  )r	  r   r  rR  rS  r   rU  s          r   r  PermuteView.create  s      &$:eCI.>#???? ##"7":G$!!  -12T#T2/34t!""1%t4!!$$J #CC-- 34s   :CC$c                X    U Vs/ s H  o"S:  a  UO[        U5      U-   PM     sn$ s  snf r  r  )r	  r  r>  s      r   r  PermuteView._map_neg_dims  s+    @DEaxSY_4EEEs   'c                   [        U R                  U R                  5      5      [        [        [	        U R                  5      5      5      :X  d   eU R
                  R                  5       nU R                   Vs/ s H  o!U   PM	     sn$ s  snf r   )r;   r  r  r   r   rI  r
  )r  r  r   s      r   r
  PermuteView.get_size  sq    $,,TYY78J#dii.!=
 
 	
 
 yy!!#!%+AQ+++s   8B	c                Z  ^ [        U R                  5       VVs0 s H  u  pX!_M	     snnm[        [        U R                  5      5       Vs/ s H  nTU   PM
     snm[	        T5      [	        [        [        U R                  5      5      5      :X  d   e    SU4S jjnU$ s  snnf s  snf )Nc                8   > T Vs/ s H  oU   PM	     sn$ s  snf r   r   )r   r   invs     r   r   +PermuteView.make_reindexer.<locals>.reindex  s     '**c!Hc***s   r  )r   r  r   r   r;   )r  r   r
  r   r  s       @r   ri  PermuteView.make_reindexer  s     !*$)) 45 4qt 45$S^454!s1v45#*U3tyy>-B"CCCC	+!	+	+
  65s   B"B(r   N)r   r   r  r  r   rd  )r  r  r   	list[int]r  r  )r   r   r   r   r   r?  r  r  r
  ri  r   r   r   r   r  r    sB    
. .$ F F,	3r   r  c                  V    \ rS rSr\SS.SS jj5       r\    S	S j5       rS
S jrSr	g)SqueezeViewi  N)r>  c          	        [        U5      (       Ga5  [        U5      u  p4/ n/ nUbF  [        U[        5      (       d   [	        U5      5       eSU::  a  U[        UR                  5      :  d   e[        [        UR                  UR                  5      5       Hm  u  nu  pUc,  US:w  a$  UR                  U5        UR                  U	5        M5  M7  Xr:w  a$  UR                  U5        UR                  U	5        M`  US:X  a  Mh   S5       e   [        UR                  UR                  UUUR                  UR                  5      n
[!        X:S9$ Uc;  ["        R%                  XR'                  5        Vs/ s H  oS:w  d  M
  UPM     sn5      $ UR'                  5       U   S:X  d   e["        R%                  U[        UR'                  5       5       VVs/ s H  u  p{Xr:w  d  M  UPM     snn5      $ s  snf s  snnf )Nr   r@   zexpected squeezed size to be 1rH  )r  rK  r   r   r   r   r  r   r   r  r  rL  r  r  rM  rN  rO  rd  r  r
  )r	  r   r>  rR  rS  r  rT  r   r  r  rU  r   s               r   r  SqueezeView.create  s    ##"7":GHJ!#s++6T#Y6+CxC#joo*>$>>>%.s:??JDUDU/V%W!>D;qy -"))&1 ! x -"))&1#qyJ*JJy &X %!!  !!$$J #CC;;;qjjl"El1f1l"EFF::<$)));;q1::<1H"U1HAH11H"UVV #F #Vs   +	G'
8G'

G,G,c                   ^^ U  Vs/ s H  oS:w  d  M
  UPM     nn[        U 5       VVs/ s H  u  p1US:w  d  M  UPM     snnm[        U 5      mSUU4S jjnX$4$ s  snf s  snnf )Nr@   c                   > [        U 5      [        T5      :X  d   U  ST 35       e[        R                  R                  /T-  n[	        TU 5       H	  u  p#X1U'   M     [        U5      $ )N )r   r   r  r  r   r   )r   rX  r   r   lengthnot_ones       r   r   %SqueezeView.squeezer.<locals>.reindex  sb    u:W-C%'/CC-/Igu-!"# .##r   )r   r  r   ztuple[Expr])r   r   )r  r   r  r   r   r  r  s        @@r   squeezerSqueezeView.squeezer  sc      $.t!AvAt.!*4;AF1;T	$ 	$    /;s   	AAA A c                    [        S5      e)Nzuse SqueezeView.create())AssertionError)r  rI  s     r   r  SqueezeView.__init__  s    788r   r   )r   r   r>  r  r   r   )r  r  r   z9tuple[list[int], Callable[[Sequence[Expr]], tuple[Expr]]])rI  r   r   r   )
r   r   r   r   r?  r  r  r  r  r   r   r   r   r  r    sC    7; $W $WL !!	B! ! 9r   r  c                  ~    \ rS rSr% S\S'   S\S'     SS jrSS jrSS jr\r\	        SS	 j5       r
SS
 jrSrg)GenericViewi  r  r  r  r   c                    U R                   $ r   )r   r  s    r   ri  GenericView.make_reindexer  s     ||r   c                   [        [        U R                  5      5       Vs/ s H  n[        [        R
                  U5      PM     nn[        U R                  U5      5      nSSR                  [        [        U5      5       SU 3$ s  snf )Nzlambda , r  )r   r   r  rg   r?   r@  r   r   r  r  r   )r  r  	index_old	index_news       r   reindex_strGenericView.reindex_str  sv    CHTYYCX
CXa*4::q9CX 	 
 i01	3sI#6789+FF	
s   $Bc                z    U R                  U R                  SU R                   3SU R                  5        3/5      $ )Nsize=zreindex=)r  rI  r  r  r  s    r   r  GenericView.__str__	  s=    YY%		{+x8H8H8J7K-LM
 	
r   c                $    U " U[        U5      US9$ )NrI  r  r   )r   )r	  r   r  r   s       r   r  GenericView.create  s     X@@r   c                    U R                   $ r   r  r  s    r   r
  GenericView.get_size  r  r   r   Nr  r  )r   r   r  r  r   r  r   rd  r  )r   r   r   r   r   ri  r  r  r>  r?  r  r
  r   r   r   r   r  r    sp    
77	3
G

 HAA !A <	A
 
A Ar   r  c                      \ rS rSr\S	S j5       r\S
S j5       r\      SS j5       r\ S       SS jj5       r	\ S       SS jj5       r
Srg)rd  i  c                   [         R                  " U 5      n [         R                  " U5      n[        R                  R                  R
                  R                  nU" [         R                  " U S5      5      (       a  X-   n U $ r  )r   r  rm   r  r  r   evaluate_exprLt)r   r  r  s      r   handle_negative_indexView.handle_negative_index  s[    ll3||D!((22@@#q)***C
r   c                (  ^	 [        U[        5      (       d   [        U5      5       eU R                  UR	                  5       U5      u  m	n[
        R                  R                  R                  T	U5      (       a  U$ Sn[        [        T	5      5      S:  d  [        [        U5      5      S:  a  SnSU;   a  SU	4S jjnU " U[        U5      US9$ [        U5      (       d  U(       a  U(       a%  [        U5      (       d  [        R                  U5      n[        USS9u  pV[!        UR"                  UR$                  U[&        R)                  U5      UR*                  UR,                  5      n[/        XWS9$ U R1                  T	U5      nU " U[        U5      US9$ )	NFr   Tc                4   > [        S/[        T5      -  5      $ r  )r   r   )r   r  s    r   fake_reindex!View.create.<locals>.fake_reindex:  s    aS3x=011r   r  )rU  rH  )r   r   r   ztuple[int, ...])r   r   r   resolve_negative_sizer
  rm   r  r  statically_known_list_equalsr   r3   r   rR  r  require_contiguousrK  rL  r  r  r  r  rM  rN  rO  re  )
r	  r   r  unbacked_symbols_in_sizesr  rR  rS  rU  r   r  s
            @r   r  View.create(  sb   (H--=tH~=- 66qzz|XN( 77888LLH$)!%h/014(23a7(,%=2 ADNLII-a004M(2RST2U2U !33A6"74"PG$!!  11(;!!$$J #CC--hAX@@r   c                P   U Vs/ s H,  n[         R                  R                  R                  U5      PM.     nnU  Vs/ s H,  n[         R                  R                  R                  U5      PM.     n n[	        U5      n[        [        U5      5       HI  nX   S:X  d  M  [        R                  R                  X'   [        [        U 5      [        U5      5      X'     O   [         R                  R                  R                  [        U 5      [        U5      5        X4$ s  snf s  snf )Nr2  )rm   r  r  r9  r   r   r   r   r  Oner<   rh   check_equals)r  r  r   r   s       r   r  View.resolve_negative_sizeV  s     ;CC(QAGG$$--a0(C:BC(QAGG$$--a0(C>s8}%A{b #ggkk&}X'>h@WX	 & 	
%%mH&=}X?VW!! DCs
   3D3D#Nc                     U R                  XU5      nU$ ! [        [        4 a=    [        U5      /nU R                  X5      nU R                  XR5      n[	        Xg5      n U$ f = fr   )_dynamic_reshape_indexerr  
IndexErrorrh   r   )r	  r  r  	dense_dimr   flatr   r   s           r   re  View.dynamic_reshape_indexerg  sr    	:228yQG  
+ 	:!(+,D33HCH33DCH%h9G	:s    A	A#"A#c                  ^^ [         R                  R                  R                  n[	        [        U5      5       Vs/ s H  n[        [        R                  U5      PM     snm[        [        TU5      5      n[        U 5      nUSL=(       a&    U[        U5      S-
  :g  =(       a    [        U5      S:H  nU(       a'  Uc   eUR                  U5      nUR                  U5        / mU(       GaK  U(       GaC  UR                  5       n	UR                  5       u  pU	S:X  a=  TR                  [        R                  R                  5        UR                  X45        GOUS:X  a  UR                  U	5        GOU" U5      U" U	5      :X  a<  TR                  U
5        [         R                  R                  R!                  X5        GOfU" U5      U" U	5      :  a~  U" U5      U" U	5      :  a1  UR                  5       u  pX-  U
-   n
X-  nU" U5      U" U	5      :  a  M1  TR                  U
5        [         R                  R                  R!                  X5        OU" U5      U" U	5      :  a  [        R                  R"                  nU	nTR                  [%        XU5      5        X-  nU" U5      U" U	5      :  aG  UR                  5       nTR                  [%        XU5      5        X-  nX-  n	U" U5      U" U	5      :  a  MG  [         R                  R                  R!                  X5        O[&        eU(       a
  U(       a  GMC  U(       al  UR                  5       n	[         R                  R                  R!                  U	S5        TR                  [        R                  R                  5        U(       a  Ml  U(       aE  UR                  5       u  p[         R                  R                  R!                  US5        U(       a  ME  UbB  [        U5      S:X  a3  TR)                  5         TR                  5       nTR+                  UU5        OTR)                  5         [        T5      [        U 5      :X  d   e    SUU4S jjnU$ s  snf )z7
Perform a reshape entirely by modifying indexing math
Nr@   c                   >^ [        U 5      [        T5      :X  d   [        U 5      [        T5      45       e[        [        TU 5      5      m[        U4S jT 5       5      $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   )ri   )r   r   replacementss     r   r   AView._dynamic_reshape_indexer.<locals>.reindex.<locals>.<genexpr>  s     HiA|44ir  )r   r   r   r   )r   r  rY  	view_exprs    @r   r   .View._dynamic_reshape_indexer.<locals>.reindex  sO     u:T*CSZT,CC*D% 01LHiHHHr   r  )rm   r  r  r	  r   r   rg   r?   VIEWr   r   r  r  r   r  r  r  r  r>   r  reverseinsert)r  r  r
  r	  r   	stack_new	stack_oldreordering_dense_dimold_dimsize_oldvarsize_newvar2	size_new2divisormodulus
dense_exprr   rY  r  s                     @@r   r  View._dynamic_reshape_indexerx  s    GG$$..	 CHHBV
BVQ*499a8BV
 T8,-	N	 T! #S^a//#H" 	
  (((mmI.GW%	I }}H%MMOMC1}  .  #1Q  *8$	((;;  %  --hA8$y'::)Ih,??&/mmoOD/C/C'3H  )Ih,??   %  --hA8$y'::''++"  w!GH!+)Ih,??'mmoG$$_S7%KL%/G'1H	  )Ih,??
   --hA$$= II@  }}HGG))(A6UWW\\* i
 %MMOMCGG))(A6 i  S]a%7"JY
39~X...	I!	I	I 	I [
s   $Q5r   )r   r!   r  r!   r   r!   )r   r   r  r  r   r   )r  r  r  r  r   ztuple[list[Expr], list[Expr]]r   )r  r  r  r  r
  r  r   &Callable[[Sequence[_T]], Sequence[_V]])r  r  r  r  r
  r  r   r  )r   r   r   r   r  r  r?  r  r  re  r  r   r   r   r   rd  rd    s      +A +AZ " ",:"	&" "  
 $(	$ % !	
 
0    $(X X X !X 
4	X Xr   rd  c                     ^  \ rS rSr% SrS\S'   SU 4S jjrSS jr\rSS jr	SS jr
SS	 jr\SS
 j5       rSS jrSS jrSS jrSS jrSS jrSS jr\" S 5       S   SS jj5       rS S!S jjrS"S jrSrU =r$ )#rO  i  z*Pretend our storage has a different layoutr  rJ  c                   > [         TU ]  5         [        U R                  [        5      (       a0  [
        R                  U SU R                  R                  5       5        g g )NrI  )r  r  r   rI  rd  r   r~  r/  r  s    r   r  ReinterpretView.__post_init__  sC    dii**tVTYY-B-B-DE +r   c                P    U R                  U R                  U R                  /5      $ r   )r  rI  rJ  r  s    r   r  ReinterpretView.__str__  s&    		
 	
r   c                6    U R                   R                  5       $ r   r  r  s    r   r  ReinterpretView.get_name  r  r   c                .    U R                   R                  $ r   )rJ  r  r  s    r   r  ReinterpretView.get_device  s    {{!!!r   c                    g r   r   r  s    r   r  ReinterpretView.get_origin_node  r  r   c                .    U R                   R                  $ r   )rJ  r  r  s    r   r  ReinterpretView.dtype  s    {{   r   c                @    [        U R                  R                  5      $ r   )r   rJ  r  r  s    r   r
  ReinterpretView.get_size  s    DKK$$%%r   c                @    [        U R                  R                  5      $ r   )r   rJ  r  r  s    r   r/  ReinterpretView.get_stride  s    DKK&&''r   c                   ^  SU 4S jjnU$ )Nc                Z  > TR                   R                  5       n[        R                  " TR	                  5       U" U 5      5      nTR                   R
                  TR                  R
                  :w  a6  [        R                  " UTR
                  TR                  R
                  5      $ U$ r   )rJ  r  rk   loadr  r  rI  to_dtype_bitcast)r   rX  
tmp_loaderr  s      r   rZ  +ReinterpretView.make_loader.<locals>.loader  sp    kk..0G$--/75>BJ{{  DIIOO3++J

DIIOOTT!!r   r   r  r   rl   r   r  rZ  s   ` r   r  ReinterpretView.make_loader  s    	" r   c                6    U R                   R                  5       $ r   )rJ  r  r  s    r   r  ReinterpretView.make_indexer      {{''))r   c                    U R                   $ r   rJ  r  s    r   r  ReinterpretView.get_layout
  r  r   c                    g r   r   r  s    r   r2  ReinterpretView.freeze_layout  r*  r   c                    [        U R                  R                  U5      [        U R                  R                  U5      -  [        U R                  R                  U5      -  $ r   )r)   rJ  r  r  rM  rV  s     r   rW  $ReinterpretView.get_free_symbol_uses  sQ    
 T[[--}=t{{11=ABt{{11=AB	
r   c           	     p   [         R                  R                  R                  U R                  U R
                  R                  U R
                  R                  U R
                  R                  Ub  UR                  O#[         R                  R                  R                  U R
                  R                  S9$ rL  )rm   r  wrapper_codecodegen_reinterpret_viewrI  rJ  r  r  rM  	writeliner  r  s     r   r  !ReinterpretView.codegen_reference  s     ww##<<IIKKKKKK & 2F8L8L8V8V++## = 
 	
r   c                    gr  r   r  s    r   rP  ReinterpretView.num_reads'      r   r   rt  r  r  rw  r{  r  r  r  r}  r  r  r   r  r  )r   r   r   r   r  r   r  r  r>  r  r  r  r  r  r
  r/  r  r  r  r2  rX   rW  r  rP  r   rA  rB  s   @r   rO  rO    s    4NF

 H$" ! !&(	* -.$)
!
	!
 /

 r   rO  c                  n    \ rS rSr% SrS\S'   \SS j5       rSS jr\r	\
SS j5       rSS jrSS	 jrS
rg)	DtypeViewi+  z(Pretend our storage has a different typer|  target_dtypec                    [        U5      (       aX  [        U5      u  p4[        UR                  UUR                  UR
                  UR                  UR                  5      n[        X5S9$ [        XS9$ )NrH  )rI  rS  )
r  rK  rL  r  r  r  rM  rN  rO  rR  )r	  r   	new_dtyperR  rS  rU  s         r   r  DtypeView.create1  sj     ##"7":G$!!!!!!$$J #CCa88r   c                P    U R                  U R                  U R                  /5      $ r   )r  rI  rS  r  s    r   r  DtypeView.__str__@  s     		4+<+<=>>r   c                    U R                   $ r   )rS  r  s    r   r  DtypeView.dtypeE  s       r   c                6    U R                   R                  5       $ r   rI  r
  r  s    r   r
  DtypeView.get_sizeI  r  r   c                P   ^ ^ T R                   R                  5       mSUU 4S jjnU$ )Nc                |   > [         R                  " T" U 5      TR                  TR                  R                  5      $ r   )rk   r9  rS  rI  r  )r   rn  r  s    r   rZ  %DtypeView.make_loader.<locals>.loaderO  s*    ''c
D4E4EtyyWWr   r  rI  r  )r  rZ  rn  s   ` @r   r  DtypeView.make_loaderL  s(    		%%'	X 	X r   r   N)r   r   rU  r|  r   rd  r  r{  r  r  )r   r   r   r   r  r   r?  r  r  r>  r  r  r
  r  r   r   r   r   rR  rR  +  sE    29 9? H! !$r   rR  c                  r    \ rS rSr\          SS j5       r\  S             SS jj5       rSrg)		SliceViewiU  c                x  ^ ^^^^	^
 [         R                  R                  m
UR                  5       U   m[	        S X4T4 5       5      (       a!  [
        R                  m	[
        R                  mOT
R                  m	T
R                  mSUU	U
4S jjm          SUU U4S jjnU" USTS5      nU" XCTT5      nX44$ )zb
Normalize start and end such that both are in the range
[0, x.get_size()[dim]] and start <= end.
c              3  8   #    U  H  n[        U5      v   M     g 7fr   )r3   r   r   s     r   r   0SliceView.normalize_start_end.<locals>.<genexpr>a  s     H1GA$Q''1Gr  c                   > TR                  X5      (       a  U OT" X5      nTR                  X25      (       a  UnU$ T" X25      nU$ r   )statically_known_geqr8  )r   lowerupperclamped_lowerclamped_fullmax_funcmin_funcr  s        r   clamp,SliceView.normalize_start_end.<locals>.clamph  s^    221<<(1BT 
 00FF  
   m3 
  r   c                D   > U c  U$ TR                  U T5      n T" XU5      $ r   )r  )r'  rk  rl  r_  rq  r	  dim_sizes       r   
clamp_wrap1SliceView.normalize_start_end.<locals>.clamp_wraps  s.     {++C:CU++r   r   )r   r!   rk  r   rl  r   r   r!   )
r'  zUnion[int, None]rk  r   rl  r   r_  Union[Expr, int]r   rw  )
rm   r  r  r
  rt  r   MinMaxevaluate_minevaluate_max)r	  r   r>  startendru  rq  rt  ro  rp  r  s   `     @@@@@r   normalize_start_endSliceView.normalize_start_endV  s     77##::<$H%h1GHHHyyHyyH,,H,,H		  		 	,!	,*-	,69	,DT	,	, 	, 5!Xq1Xx8zr   c           	       ^^^^ [         R                  " T5      m[        T[        5      (       d  TS:  d   T5       e TS:X  a  US:  a  TS:X  a  U$ [        UR                  5       5      mU(       a  U R                  UTTU5      u  mn[        UT-
  TS-
  -   T5      TT'   [        U5      (       a  [        U5      u  px[        UR                  5      n	U	T   T-  U	T'   [        UR                  UR                  TU	UR                  UR                  T   T-  -   UR                   5      n
[#        XzS9$     SUUUU4S jjn[%        UTUS9$ ! [         a     GNf = f)Nr   l    r@   rH  c                   > [        U 5      [        T5      :X  d   SU  ST 35       e[        U 5      n U T   T-  T-   U T'   U $ )Nzwrong ndim r  )r   r   )r   r>  r  r|  steps    r   r   !SliceView.create.<locals>.reindex  sR     u:X.P+eWAhZ0PP.KEsd*U2E#JLr   r  r  )r   r  r   r!   	TypeErrorr   r
  r~  r=   r  rK  r  rL  r  r  rM  rN  rO  rd  )r	  r   r>  r|  r}  r  rq  rR  rS  rT  rU  r   r  s     `` `      @r   r  SliceView.create  sm    ||D!$%%7471	zcY.419 

%
 00CDJE3 uq!94@ ##"7":Gj//0J(o4JsO$!!  !!J$5$5c$:U$BB$$J #CC	!		 	 ah@@K  		s   E 
EEr   N)
r   r   r>  r   r|  r   r}  r   r   ztuple[int, int])r@   T)r   r   r>  r   r|  r   r}  r   r  r   rq  r   r   r   )r   r   r   r   r?  r~  r  r   r   r   r   rd  rd  U  s    '' '),'36'	' 'R  3A3A 3A 	3A
 3A 3A 3A 
3A 3Ar   rd  c                  R    \ rS rSr% S\S'   S\S'   SS jrSS jrSS jrSS	 jrS
r	g)BaseConstanti  r|  r  r  r  c                    gNr   r   r  s    r   r
  BaseConstant.get_size  s    r   c                    U R                   $ r   r  r  s    r   r  BaseConstant.get_device  r  r   c                    g r   r   r  s    r   r  BaseConstant.get_origin_node  r  r   c                    [        5       $ r   r:   r  s    r   r  BaseConstant.get_reads  r  r   r   Nr  r  rw  r  )
r   r   r   r   r   r
  r  r  r  r   r   r   r   r  r    s"    r   r  c                  R    \ rS rSr% S\S'   S\S'   S\S'   SS jrSS	 jrSS
 jrSrg)Constanti  r   r  r|  r  r  r  c                   ^  SU 4S jjnU$ )Nc                Z   > [         R                  " TR                  TR                  5      $ r   )rk   rF  r  r  r   r  s    r   rZ  $Constant.make_loader.<locals>.loader  s    <<

DJJ77r   r<  r   r=  s   ` r   r  Constant.make_loader  s    	8 r   c                    g r   r   r  s    r   r  Constant.realize  r*  r   c                @    [        U R                  U R                  US9$ )N)r  r  r  )r  r  r  r  s     r   rf  Constant.constant_to_device  s    djj

6JJr   r   Nr  r  r  )	r   r   r   r   r   r  r  rf  r   r   r   r   r  r    s#    JKr   r  c                  H    \ rS rSr% S\S'   S\S'   S\S'   SS jrSS	 jrS
rg)IndexingConstanti  r   r   r|  r  r  r  c                   ^  SU 4S jjnU$ )Nc                Z   > [         R                  " TR                  TR                  5      $ r   )rk   r  r   r  r  s    r   rZ  ,IndexingConstant.make_loader.<locals>.loader  s    >>$**djj99r   r<  r   r=  s   ` r   r  IndexingConstant.make_loader  s    	: r   c                @    [        U R                  U R                  US9$ )N)r   r  r  )r  r   r  r  s     r   rf  #IndexingConstant.constant_to_device  s    djj

6RRr   r   Nr  r  )r   r   r   r   r   r  rf  r   r   r   r   r  r    s    JSr   r  c                `   SnSn[        [        [        X5      5      5       H  u  pEUS:X  a  M  [        R                  R
                  R                  XR5      (       d0  [        R                  R
                  R                  XS5      (       d    gU[        R                  " SU5      -  nX$-  nM     gNr@   FT)	reversedr   r   rm   r  r  r9  r   ry  )r  r=  expected_strideexpected_stride_maxr   ys         r   is_contiguous_strides_for_shaper    s     Os51236ww77
 
''""::1RRuyyA. 4 r   c                <    [         R                  U R                  -  $ r   )rA   padding_alignment_bytesitemsizer  s    r   get_align_for_dtyper     s    ))U^^;;r   c                  B    \ rS rSrSrSS jrS	S jr S
   SS jjrSrg)r  i  ztAbstract base for Layout, MultiOutputLayout, NoneLayout.
Represents the memory layout of the output of an Operation.c                >    [        [        U 5      R                  5      er   r  r  s    r   r  OutputSpec.get_device  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   storage_sizeOutputSpec.storage_size  r  r   c                >    [        [        U 5      R                  5      er   r  rV  s     r   rW  OutputSpec.get_free_symbol_uses  r9  r   r   Nr  r  r  r  )	r   r   r   r   r  r  r  rW  r   r   r   r   r  r    s1    C77 %*7!7	!7 7r   r  c                     \ rS rSrSrS\" S5      S4             SS jjr\SS j5       r\R                  S S j5       r\SS	 j5       r
\
R                  S S
 j5       r
\S!S j5       r\R                  S"S j5       rS#S jr\rS$S jrS%S jrS&S jr\      S'S j5       rS&S jrS(S jrS&S jr\        S)S j5       rS*S jrS&S jrS+S jrS,S jrS-S jrS!S jr\" S 5       S.   S/S jj5       rSrg)0r  i  z_
Layout base class

Carries tensor meta-information including offset and
whether it is pinned.
Nr   Fc                T   Uc  [         R                  U5      nXl        X l        [	        U5      [	        U5      :X  d   SU SU 35       e[        S U 5       5      (       d   eX0l        X@l        XPl        X`l	        U R                  (       a  U R                  R                  S:X  d   eg g )Nr  	, stride=c              3  N   #    U  H  n[        U[        [        45      v   M     g 7fr   )r   r!   r   r   s     r   r   "Layout.__init__.<locals>.<genexpr>+  s     <t!:a$--t   #%r!  )r  r  r  r  r   r   _size_stride_offsetrN  r   )r  r  r  r  r  rM  rN  s          r   r  Layout.__init__  s     >#66t<F
4yCK'H5ix)HH'<t<<<<<
"NN(8(8E(ABB(ANr   c                    U R                   $ r   r  r  s    r   r  Layout.size3      zzr   c                    Xl         g r   r  r  r  s     r   r  r  7  s    
r   c                    U R                   $ r   r  r  s    r   r  Layout.stride;      ||r   c                    Xl         g r   r  r  s     r   r  r  ?      r   c                    U R                   $ r   r  r  s    r   rM  Layout.offsetC  r  r   c                    Xl         g r   r  r  s     r   rM  r  G  r  r   c                   SnU R                   S:w  a  SU R                    3nU R                  R                  c  SOSU R                  R                   3nSnU R                  (       a  SU R                   3n[	        U 5      R
                   SU R                  R                   U SU R                   SU R                   S	U R                   U U S
3$ )Nr  r   z	, offset=:z, is_pinned=z('z', z, size=r  r  )	rM  r  r   rN  r   r   r  r  r  )r  rM  device_index_stris_pinned_strs       r   r  Layout.__str__K  s    ;;! .F!%!2!2!:2!DKKDUDUCV@W>>*4>>*:;MDz""#2dkk&6&6%78H7ITZZL YII;i}VH]O1N	
r   c                    U R                   $ r   r  r  s    r   r  Layout.get_device[  r  r   c           	        [         R                     [        R                  " [	        U R
                  5      [	        U R                  5      U R                  U R                  U R                  S9sS S S 5        $ ! , (       d  f       g = f)N)r  r  
pin_memory)
rm   	fake_moder  r  r[   r  r  r  r  rN  r  s    r   get_exampleLayout.get_example^  sN    [[&&'		2'4jj{{>> [[s   AA77
Bc                B    [        U R                  U R                  5      $ r   )r  r  r  r  s    r   rP  Layout.is_contiguoush  s    .t{{DIIFFr   c                    [        U 5      nUS;  d	  U S   S:X  a  g[        U[        U 5      U 5       H  u  p4nUS:w  d  M  X4:w  d  M    g   g)N)r      r@   FT)r   r   r-   )r=  rQ  ndimleftrightr  s         r   is_channels_last_contiguous"Layout.is_channels_last_contiguousk  sX     5zvqQ!$3E:E"
D qyT]	"

 r   c                    [        U R                  [        [        R	                  [        [        U R                  5      5      5      5      U R                  5       H  u  pnUS:w  d  M  X:w  d  M    g   gr  )r   r  r  r  r  r   r  )r  r  r  r  s       r   is_transposedLayout.is_transposedy  sZ    !$KK^66tHTYY<O7PQRII"
D
 qyT]"
 r   c           	        [        U R                  5      [        U5      :X  d   e[        U R                  5       VVs/ s H5  u  p#[        R
                  R                  R                  USS9S:w  d  M3  UPM7     nnnU Vs/ s H  o R                  U   PM     nnU Vs/ s H  o!U   PM	     nnS	S jnU" U5      nS/[        U5      -  n[        [        U5      5       H  nXR   XqU   '   M     [        [        U5      S-
  5       H_  nXr   XrS-      :  n[        U[        5      (       d2  [        R
                  R                  R                  Xr   XrS-      :  SS9nU(       d  M_    g   gs  snnf s  snf s  snf )
Nr   r  r@   c                d    [        U 5      nU  Vs/ s H  o!R                  U5      PM     sn$ s  snf r   )r  r   )arr
sorted_arrelements      r   sorted_indices0Layout.is_stride_ordered.<locals>.sorted_indices  s,    J=@AS'$$W-SAAAs   -r2  T)size_obliviousF)r  r  r   r  )r   r  r   r  rm   r  r  r	  r   r   r   
_shape_envr  )	r  r   r   r>  non_1_indicesr  r  stride_orderedexprs	            r   r\  Layout.is_stride_ordered  sl   4;;3u:---
 $DII.
.ww))#):a? . 	 
 +88-Q++a.-82?@-Qa-@	B
 u% E
*s5z"A'-yN8$ # s5zA~&A!$~!e'<<DdD))ww))77"%1u(==d 8  t ' ;
 9@s   2E)3E) E/E4c                    S/[        [        [        S[        U R                  5      S-
  5      5      5      -   n[        U5      /U-   nU R                  U5      $ Nr   r@   )r   r  r   r   r  r\  r;  s     r   is_channels_last_stride_ordered&Layout.is_channels_last_stride_ordered  sN    d8E!S-=-A$BCDDUu$%%e,,r   c                b  ^^ [        U5      n[        U 5      S:X  a  U $ [        R                  (       d  [        R                  X5      (       a  U $ [        R                  " 5       n[        US5      (       a#  UR                  R                  SS5      (       a  U $ [        [        R                  S5      (       a  [        R                  R                  OSmSU4S jjmT(       a  [        U4S jU  5       5      (       a  U $ [        U T5      n[        U5      n[!        [        U 5      5       Vs/ s H  nSPM     nnS	XS   '   Sn	[#        US	S S	S
9 H  u  pXjS	-
     nX   X   -  n[%        U[&        [(        R*                  45      =(       a!    U[        R,                  :  =(       a    X-  S:g  =(       d1    [%        U[(        R.                  5      =(       a    [        R0                  nXU'   U(       d  M  [3        X5      U-  X'   Sn	M     U	(       d  U $ [4        =R6                  S	-  sl        U$ s  snf )zv
The padding does not change stride order but makes sure all strides larger
than the threshold are multiple of align.
r   r]  dislike_paddingFr  Nc                   > Tc  g[        U [        R                  5      (       d  g[        U4S jU R                   5       5      $ )NFc              3  F   >#    U  H  nTR                  U5      v   M     g 7fr   )is_unbacked_symint)r   r   r   s     r   r   ILayout._pad_strides.<locals>.contains_unbacked_symints.<locals>.<genexpr>  s!     R@Q1y33A66@Qs   !)r   r   r!   rt  r2   )r  r   s    r   contains_unbacked_symints6Layout._pad_strides.<locals>.contains_unbacked_symints  s7     dEJJ//R@Q@QRRRr   c              3  4   >#    U  H  nT" U5      v   M     g 7fr   r   )r   r   r   s     r   r   &Layout._pad_strides.<locals>.<genexpr>  s     N:a6q99:r  r@   )r|  T)r  zsympy.Expr | intr   r   )r  r   rA   pad_channels_lastr  r  rm   get_current_noder  r]  r  r  r  rt  r   r   r   r   r   r   r   r"   padding_stride_thresholdr!   pad_dynamic_shapesrY   r'   num_comprehensive_padding)
in_stridesr  r  aligncurrent_fx_noderV  r   r   new_stridespaddedrankr   prev_idxr  require_paddingr   r   s                  @@r   _pad_stridesLayout._pad_strides  s    $E*z?a''F,N,N-
 -
 ,,.?F++0D0D0H0Hu1
 1
 *1!''<*H*HAGG&&d		S N:NNN'
I>,\:
"'J"89"8Qq"89 &'qM"":ab>;ID!(+H *T^;F 6C#78 (V<<<(Na'P VUZZ0NV5N5N	 
  &#*6#9E#A  <  ))Q.)9 :s   ,H,c                    [        U [        5      (       d   [        U 5      5       eU R                  c   eU R	                  U R                  U R
                  U R                  5      U l        g r   )r   r  r   r  r  r  r  r  s    r   rO  Layout.pad_strides  sP    $//;d;/{{&&&''TYY

Kr   c                P    [         R                  =(       a    [        U [        5      $ r   )rA   comprehensive_paddingr   r  r  s    r   rN  Layout.should_pad_strides  s    ++P
40PPr   c                   [        U [        5      (       a  U $ U R                  5       (       a  U R                  5         [        U R                  U R
                  U R                  U R                  U R                  U R                  5      $ r   )
r   rL  rN  rO  r  r  r  r  rM  rN  r  s    r   as_fixedLayout.as_fixed  sf    dK((K""$$KKJJIIKKKKNN
 	
r   c                    [         R                  (       d   S[        U 5      R                   S35       eU R	                  5       R                  5       $ )Nzconvert z to FixedLayout first)r  r  r   r   r  r  r  s    r   r  Layout.make_indexer  sG    ,, 	
tDz**++@A	
, }}++--r   c                   [        U[        5      =(       a    U R                  UR                  :H  =(       a    U R                  UR                  :H  =(       ay    U R                  UR                  :H  =(       aY    U R
                  UR
                  :H  =(       a9    U R                  UR                  :H  =(       a    U R                  UR                  :H  $ r   )r   r  r  r  r  r  rM  rN  )r  others     r   __eq__Layout.__eq__  s    uf% 2u||+2

ekk)2 		UZZ'2 u||+	2
 u||+2 %//1	
r   c                X    [        U R                  U R                  U R                  5      $ r   )r*   r  r  rM  r  s    r   r  Layout.storage_size  s    .tyy$++t{{SSr   c                    [        U R                  U5      [        U R                  U5      -  [        U R                  U5      -  $ r   )r)   r  r  rM  rV  s     r   rW  Layout.get_free_symbol_uses  s=    
 TYY6t{{M:;t{{M:;	
r   )r  r  r  r  r  rN  r  )r  r  r  r|  r  r  r  zOptional[Sequence[Expr]]rM  r!   rN  r   r   r   r  r  r  r   r   r  r  r!   r   r   r  r  )r   torch.Tensorr  )r=  r  rQ  r  r   r   )r   r  r   r   )r	  r  r  r  r  r|  r   r  rt  )r   rL  r  )r  r   r   r   r  r  ) r   r   r   r   r  r"   r  r  r  setterr  rM  r  r>  r  r  rP  r  r  r  r\  r  r  rO  rN  r  r  r  r  rX   rW  r   r   r   r   r  r    s    ,0qzCC C 	C
 )C C C 
C,   
[[    ]]    ]] 
 HG !,>	 !F- B!B)7B@KB	B BHL
Q
.	
T H%$)
!
	!
 &
r   r  c                  "    \ rS rSrSrSS jrSrg)rL  i'  z A Tensor layout we cannot changec                X    [        U R                  U R                  U R                  5      $ )r  )r"  r  r  rM  r  s    r   r  FixedLayout.make_indexer*  s    diidkkBBr   r   Nr  )r   r   r   r   r  r  r   r   r   r   rL  rL  '  s    *Cr   rL  c                    ^  \ rS rSrSrSr\SS j5       r\SS j5       r\SS j5       r	\      SS j5       r
\      SS j5       r\SS	 j5       r\R                  SS
 j5       r\SS j5       r\R                  SS j5       r\SS j5       r\R                  SS j5       r S      S!S jjr S      S"S jjrS#S jrS$S jrS%S jrS&S jr  S'           S(U 4S jjjrSrU =r$ ))r  i/  zp
A Tensor layout that we are allowed to change

Assumption: layout change should NOT add or remove free symbols
Fc                    [        U 5      S:X  a  / $ [        R                  R                  /n[	        U SS  5       H  nUR                  X!S   -  5        M     [        [	        U5      5      $ )Nr   r@   r2  )r   r   r  r  r  r  r   )sizesreversed_stridesr  s      r   r  !FlexibleLayout.contiguous_strides9  s^    u:?I!GGKK=U12Y'D##DB+?$?@ (H-.//r   c                    [        [        [        U 5      5      5      [        U5      :X  d   X45       e[        R                  R
                  nS/[        U5      -  nU H  nX#U'   X U   -  nM     U$ )z
Create a stride based on the order the dimensions should be filled in.

In this format, channels last would be:
    [1, 3, 2, 0]
N)r;   r   r   r   r  r  )r.  r   next_striderQ  r   s        r   fill_orderedFlexibleLayout.fill_orderedB  sm     %E
+,
50AAQE>QAggkk&3u:%A$AJ%a0K  r   c                    [        [        [        U 5      5      5      [        U5      :X  d   e[        U5      n[        R                  X5      $ )zz
Create a stride based on the sorted order of a permuted range.

In this format, channels last would be:
    [3, 0, 2, 1]
)r;   r   r   r   r  r3  )r.  r   r   s      r   r  FlexibleLayout.stride_orderedS  s@     %E
+,
50AAAA,U3
**5==r   c                D   U[         R                  :X  a  [        R                  U [        5      $ U[         R
                  :X  a  [        R                  U [        5      $ U[         R                  :X  a  [        R                  U 5      $ [        R                  SU5        [        e)a9  
Create a stride based on a memory format.

Memory format is translasted into a stride order,
so channels_last is the same as:
    FlexibleLayout.stride_ordered(sizes, [3, 0, 2, 1])

This interface does not support memory_format `torch.preserve_format`
which should be used to deduce a format from another source
z>stride_ordered_for_memory_format, unsuppored memory_format: %s)r  channels_lastr  r  NHWC_STRIDE_ORDERchannels_last_3dNHWDC_STRIDE_ORDERcontiguous_formatr  r  r  r  )r.  memory_formats     r    stride_ordered_for_memory_format/FlexibleLayout.stride_ordered_for_memory_format_  s     E///!008IJJe444!008JKKe555!44U;;IIP &%r   c                (   [        U 5      [        U5      :X  d   eU Vs/ s H,  n[        R                  R                  R	                  U5      PM.     nn[        [        [        U5      5      UR                  S9n[        R                  X5      $ s  snf )z
Create a stride that has the same stride order as given stride

For example, if given stride is [1000, 1, 100, 10],
the fill order should be [1, 3, 2, 0]
r  )
r   rm   r  r  r.  r  r   __getitem__r  r3  )r.  r  r   r   s       r   same_orderedFlexibleLayout.same_orderedz  su     5zS[(((BHI&Q!''""55a8&IE#f+.F4F4FG
**5== Js   3Bc                    U R                   $ r   r  r  s    r   r  FlexibleLayout.size  r  r   c                4    U R                  SU5        Xl        g )Nr  )!assert_free_symbol_uses_unchangedr  r  s     r   r  rE    s    ..vu=
r   c                    U R                   $ r   r  r  s    r   r  FlexibleLayout.stride  r  r   c                4    U R                  SU5        Xl        g )Nr  )rG  r  r  s     r   r  rI        ..x?r   c                    U R                   $ r   r  r  s    r   rM  FlexibleLayout.offset  r  r   c                4    U R                  SU5        Xl        g )NrM  )rG  r  r  s     r   rM  rM    rK  r   c                B   U R                  U R                  U5      nU R                  5       (       a-  U(       a&  U R                  X0R                  U R                  5      n[        U R                  U R                  U R                  UU R                  U R                  5      $ r   )	r  r  rN  r  r  rL  r  rM  rN  )r  r   r6  rT  s       r   as_stride_orderFlexibleLayout.as_stride_order  sx     ((E:
""$$**:yy$**MJKKJJIIKKNN
 	
r   c                   UnU R                  5       (       a-  U(       a&  U R                  X0R                  U R                  5      n[	        U R
                  U R                  U R                  UU R                  U R                  5      $ r   )rN  r  r  r  rL  r  rM  rN  )r  rD  r6  rT  s       r   as_exact_stridesFlexibleLayout.as_exact_strides  sg     #
""$$**:yy$**MJKKJJIIKKNN
 	
r   c                4   U R                  U R                  U5      nU R                  5       (       a&  U R                  X R                  U R                  5      n[        U R                  U R                  U R                  UU R                  U R                  5      $ r   )	r3  r  rN  r  r  rL  r  rM  rN  )r  r   rT  s      r   as_fill_orderFlexibleLayout.as_fill_order  st    $($5$5dii$G
""$$**:yy$**MJKKJJIIKKNN
 	
r   c                4   U R                  U R                  U5      nU R                  5       (       a&  U R                  X R                  U R                  5      n[        U R                  U R                  U R                  UU R                  U R                  5      $ r   )	rB  r  rN  r  r  rL  r  rM  rN  )r  r  rT  s      r   as_same_orderFlexibleLayout.as_same_order  st    &&tyy&9
""$$**:yy$**MJKKJJIIKKNN
 	
r   c           
     t    0 nS H/  nS H&  nX#4n[        [        [        X5      U5      5      X'   M(     M1     U$ )N)r  r  rM  TF)r;   r)   r   )r  initial_free_symbolsr   rd  r  s        r   get_initial_free_symbol_uses+FlexibleLayout.get_initial_free_symbol_uses  sI    !0D!.+,6$WT%8-H-$) "/ 1 $#r   c                    S H9  nU R                   X4   n[        [        X#5      5      nXT:X  a  M.   SU SU 35       e   g )Nr\  z)Expected free symbols unchanged, but got z vs )r]  r;   r)   )r  r   r  rd  old_free_symbolsnew_free_symbolss         r   rG  0FlexibleLayout.assert_free_symbol_uses_unchanged  sX    *M#88$9NO)*:5*PQ#7 ;<L;MTRbQcd7 +r   c                   > U(       a  [         R                  X45      nO[         R                  U5      n[        TU ]  XX6US9  U R                  5       U l        g )NrN  )r  r3  r  r  r  r^  r]  )r  r  r  r  rV  rN  rQ  r  s          r   r  FlexibleLayout.__init__  sO     $11$EG$77=GK %)$E$E$G!r   )r  r  r  r]  )r.  r  r   r  )r.  r  r   r  r   r  )r.  r  r   r  r   r  )r.  r  r=  ztorch.memory_formatr   r  )r.  r  r  r  r   r  r  r%  r  r&  r  )r   r  r6  r   r   rL  )rD  r  r6  r   r   rL  )r   r  r   rL  )r  r  r   rL  )r   z$dict[tuple[str, bool], sympy.Symbol])r   r   r  r4   r   r   r  )r  r  r  r|  r  r  rV  'Optional[Sequence[Union[int, Integer]]]rN  r   r   r   )r   r   r   r   r  r  r  r  r3  r  r>  rB  r  r  r(  r  rM  rP  rS  rV  rY  r^  rG  r  r   rA  rB  s   @r   r  r  /  s    N 0 0    	> 	> &&-@&	& &4 >>&8>	> >   
[[    ]]    ]] 
 ;@
"
37
	
" HM
/
@D
	
 

	$ AEHH H 	H
 >H H 
H Hr   r  c                  p   ^  \ rS rSrSrSU 4S jjrS	S jrS
S jr\" S 5       S   SS jj5       r	Sr
U =r$ )NonOwningLayouti  z,Is a view into the storage of another tensorc                   > UR                  5       n[        TU ]	  UR                  UR                  UR
                  UR                  5        Xl        g r   )r  r  r  r  r  r  r  view)r  rk  rJ  r  s      r   r  NonOwningLayout.__init__  s?    "MMLLKKMM		
 	r   c                >    U R                  5       R                  5       $ r   )r  r  r  s    r   r  NonOwningLayout.make_indexer  s    }}++--r   c                    U R                   R                  5       R                  nUS:X  a  gSSKJn  [
        R                  R                  R                  X5      $ )Nr   Tr@   )	ALIGNMENT)	rk  r  rM  utilsrp  rm   r  r  r`  )r  rM  rp  s      r   maybe_guard_aligned#NonOwningLayout.maybe_guard_aligned  sB    %%'..Q;$ww<<VOOr   c                R   [        U R                  [        5      (       d   eU R                  R                  n[        U[        5      (       d   [        U5      5       eUR                  n[        U[        5      (       d   [        U5      5       eUR                  R                  U5      $ r   )	r   rk  rO  rI  rX  r   rY  rJ  rW  )r  rd  boxinput_buffers       r   rW  $NonOwningLayout.get_free_symbol_uses  s     $))_5555iinn#z**5DI5*xx,//:c:/""77FFr   )rk  )rk  zUnion[BaseView, TensorBox]r   r   r  r  r  r  )r   r   r   r   r  r  r  rr  rX   rW  r   rA  rB  s   @r   ri  ri    sG    6.P -.$)G!G	!G /Gr   ri  c                      \ rS rSrSrSrg)CommBufferTypei(  symm_memr   N)r   r   r   r   SYMM_MEMr   r   r   r   ry  ry  (  s    Hr   ry  c                  R   ^  \ rS rSr% SrS\S'   S\S'         S	U 4S jjrSrU =r$ )
CommBufferLayouti,  a\  
A layout that signifies the buffer is a comm buffer.
In terms of striding, the layout is identical to `FixedLayout`.

Buffers with this layout do not participate in in-place reuse - it can be
neither the source nor the target for in-place reuse.

For detailed motivation and usage of this layout, see
NOTE [lowering-time collective optimization].
ry  comm_buffer_typer   
group_namec           	     "  > [        U[        5      (       d  [        SU S35      eUR                  5       n[        TU ]  UR                  UR                  UR                  UR                  UR                  UR                  S9  X l        X0l        g )NzJA `CommBufferLayout` can only be initialized with a `FlexibleLayout` (got z).r  r  r  r  rM  rN  )r   r  r  r  r  r  r  r  r  r  rM  rN  r~  r  )r  rJ  r~  r  fixedr  s        r   r  CommBufferLayout.__init__;  s     &.11 ++1("6 
 !<<++<<<<oo 	 	
 !1$r   )r~  r  )rJ  r  r~  ry  r  r   )	r   r   r   r   r  r   r  r   rA  rB  s   @r   r}  r}  ,  s;    	 %$O%% )% 	% %r   r}  c                      \ rS rSr% S\S'   \R                  " S S9rS\S'   \R                  " S S9rS\S	'   SS
 jr	SS jr
SS jrSrg)
NoneLayoutiT  r  r  c                     S/$ r  r   r   r   r   r  NoneLayout.<lambda>_  s    r   default_factoryr  r  c                     S/$ r  r   r   r   r   r  r  `  s    1#r   r  c                    gr  r   r  s    r   r  NoneLayout.storage_sizeb  rP  r   c                    U $ r   r   r  s    r   r  NoneLayout.as_fixede      r   c                    U R                   $ r   r  r  s    r   r  NoneLayout.get_deviceh  r  r   r   Nr  r~  r  )r   r   r   r   r   r  r  r  r  r  r  r  r   r   r   r   r  r  T  sC     #"!''DD)D#))+FFIFr   r  c                     ^  \ rS rSrSU 4S jjr\SS j5       r\R                  SS j5       rSS jrSS jr	SS jr
\ S       SS jj5       rSS	 jrSS
 jrSrU =r$ )MutationLayoutSHOULDREMOVEil  c                  > [         TU ]  UR                  5       UR                  5       UR	                  5       S 5        Xl        U R                  5       R                  5       n[        R                  R                  U5        g r   )r  r  r  r  r
  r  
get_bufferr  rm   r  mark_buffer_mutated)r  r  r   r  s      r   r  #MutationLayoutSHOULDREMOVE.__init__m  sc    &&(OO		
  ))+	##D)r   c                6    U R                  5       R                  $ r   )real_layoutr  r  s    r   r  !MutationLayoutSHOULDREMOVE.stridex  s    !(((r   c                    g r   r   r  s     r   r  r  |  s    r   c                >    U R                  5       R                  5       $ r   )r  r  r  s    r   r  'MutationLayoutSHOULDREMOVE.storage_size  s    !..00r   c                   ^ SU4S jjmT" U R                   5      n[        U[        5      (       d   [        U5      5       eU$ )Nc                   > [        U [        5      (       a  T" U R                  5      $ [        U [        5      (       a  T" U R	                  5       5      $ [        U [
        5      (       a  T" U R                  5      $ U $ r   )r   r  r  rd  r/  
MutableBoxrI  )r  unwrap_viewss    r   r  ;MutationLayoutSHOULDREMOVE.get_buffer.<locals>.unwrap_views  sb    &"<==#FMM22&(++#F$6$6$899&*--#FKK00Mr   )r  r   r   r   )r  r   rY  r   )r  r  r  s     @r   r  %MutationLayoutSHOULDREMOVE.get_buffer  s9    	 dkk*&&))74<7)r   c                h    U R                  5       R                  n[        U[        5      (       d   eU$ r   )r  rJ  r   r  )r  rJ  s     r   r  &MutationLayoutSHOULDREMOVE.real_layout  s,    "))&&))))r   c                   UR                  5         [        R                  R                  UR	                  5       5        [        U[        5      (       a  UR                  nUR                  5         U(       d  [        R                  UR                  5       UR                  5       UR                  5       [        UR                  5       UR                  5       5       VVs/ s H.  u  pE[        R                  R                   R#                  XE5      PM0     snnS9n[        U[$        [&        45      (       d   eUR                  nUR                  5         [)        US5      (       d   U5       e[        UR                  R*                  [,        5      (       d$   [/        UR                  R*                  5      5       e[1        U5      UR                  l        UR                  $ s  snnf )Nra  rI  )r  rm   r  r  r  r   r   rI  r,  rJ  r  r  r  r  r   r
  r  check_equals_and_simplifyrd  r  r  rJ  r  r   r  )r	  srcdstunsafe_aliasr  r  r   s          r   realize_into'MutationLayoutSHOULDREMOVE.realize_into  sV    	 	
##CLLN3c9%%((C 	##~~'mmo* !$CLLNCLLN C C GG$$>>qD C	 $ D dXz$:;;;;))CsF##(S(##((//>::QD<QQ:4S9xxs   5Gc                    U $ r   r   r  s    r   r  #MutationLayoutSHOULDREMOVE.as_fixed  r  r   c                6    U R                   R                  5       $ r   )r  r  r  s    r   r  'MutationLayoutSHOULDREMOVE.make_indexer  rA  r   )r  )r  r   r   r   r  )r  r   r   r   r  )r   rY  r}  r  )r  r   r  r   r  r   r   r   )r   r   r  )r   r   r   r   r  r  r  r(  r  r  r  r?  r  r  r  r   rA  rB  s   @r   r  r  l  s    	* ) ) ]] 1
 <A%%%%59%	% %N* *r   r  c                    ^  \ rS rSr% S\S'   S\S'   S%U 4S jjrS&S jrS'S jrS(S	 jrS)S
 jr	S*S jr
\S+S j5       rS,S jrS-S jrS.S jrS/S jrS0S jrS1S jrS2S jrS%S jr S3     S4S jjrS5S jrS6S jr S3     S7S jjrS2S jrS8S jrS9S:S jjrS%S jrS;S jrS;S jrS<S jr\ " S 5       S3   S=S  jj5       r!S>S! jr"S?S" jr#S2S# jr$S$r%U =r&$ )@rY  i  r  r   r  rJ  c                F   > [         TU ]  5         U R                  SS 5        g r  )r  r  r  r  s    r   r  Buffer.__post_init__  s    t4r   c                >    U R                  5       R                  5       $ r   )r  r  r  s    r   r  Buffer.make_indexer  s     --//r   c                J    U R                   (       d   U 5       eU R                   $ r   r  r  s    r   r  Buffer.get_name  s    yy$yyyr   c                    [        U R                  [        5      (       a  U R                  R                  5       $ [	        [        U R                  5      R                  5      er   )r   rJ  r  r  r  r   r   r  s    r   r  Buffer.get_example  s@    dkk6**;;**,,!$t{{"3"<"<==r   c                >    U R                  5       R                  5       $ r   )r  r  r  s    r   r  Buffer.get_device  s    ##%0022r   c                    g r   r   r  s    r   r  Buffer.get_defining_op  r  r   c                6    U R                  5       R                  $ r   )r  r  r  s    r   r  Buffer.dtype  s     &&&r   c                :    / U R                  5       R                  Q$ r   )r  r  r  s    r   r
  Buffer.get_size  s    ("''((r   c                :    / U R                  5       R                  Q$ r   )r  r  r  s    r   r/  Buffer.get_stride  s    *"))**r   c                6    U R                  5       R                  $ r   )r  rM  r  s    r   
get_offsetBuffer.get_offset  s     '''r   c                    [        U R                  [        5      (       a  U R                  $ [        [	        U R                  5      R
                  5      er   )r   rJ  r  r  r   r   r  s    r   r  Buffer.get_layout  s7    dkk6**;;!$t{{"3"<"<==r   c                    U R                   $ r   rC  r  s    r   r  Buffer.get_output_spec  r  r   c                "    U R                  5       $ r   )r  r  s    r   rS  Buffer.get_storage_numel  s    ~~r   c                6    U R                  5       R                  $ r   )r  rN  r  s    r   get_is_pinnedBuffer.get_is_pinned  s     ***r   c                    [        U R                  [        5      (       a@  [        U R                  [        5      (       d   U R                  R	                  5       U l        g g g r   )r   rJ  r  ri  r  r  s    r   r2  Buffer.freeze_layout   sF    dkk6**:KK4
 4
 ++..0DK4
*r   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  XS9U l        g NrW  )r   rJ  r  r   rP  r5  s      r   r7  &Buffer.freeze_layout_with_stride_order  sB     $++~66IT[[8II6kk11%1Ur   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  U5      U l        g r   )r   rJ  r  r   rV  r;  s     r   r<  $Buffer.freeze_layout_with_fill_order  s=    $++~66IT[[8II6kk//6r   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  U5      U l        g r   )r   rJ  r  r   rY  r?  s     r   r@  $Buffer.freeze_layout_with_same_order  s=    $++~66IT[[8II6kk//7r   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  XS9U l        g r  )r   rJ  r  r   rS  rC  s      r   rE  'Buffer.freeze_layout_with_exact_strides  sG     $++~66IT[[8II6kk22 3 
r   c                    [         R                  R                  R                  [        R
                  " U R                  5       S5      5      $ r  r  r  s    r   r  Buffer.is_zero_elements  r  r   c                z   ^  T R                  5       (       a  [        [        T R                  5       S9$ SU 4S jjnU$ )Nr  c                   > TR                  5       n[        R                  " TR                  =(       d    SU" U 5      5      $ rS  )r  rk   r8  r   r   rX  r  s     r   rZ  "Buffer.make_loader.<locals>.loader$  s/    '')G88DII2GENCCr   r<  )r  r   rH  r  r=  s   ` r   r  Buffer.make_loader  s3      ""=0@AA	D r   c                "    U R                  5       $ r   r  r  s     r   r  Buffer.codegen_reference*  r  r   c                    g r   r   r  s    r   r  Buffer.decide_layout-  r*  r   c                    [        U R                  [        5      (       a%  U R                  R                  R	                  5       /$ gr  )r   rJ  ri  rk  r  r  s    r   ro  #Buffer.get_inputs_that_alias_output0  s2    dkk?33KK$$--/00r   c                    [        U R                  [        5      (       a%  U R                  R                  R	                  5       /$ gr  )r   rJ  r  r  r  r  s    r   ri  Buffer.get_mutation_names5  s3    dkk#=>>KK&&//122r   c                6    [        U R                  5       /5      $ r   )r;   r  r  s    r   r  Buffer.get_read_names:  s    4==?+,,r   c                    [        5       $ r   r:   rV  s     r   rW  Buffer.get_free_symbol_uses=       |r   c                    [        5       $ r   r:   r  s    r   r  Buffer.get_unbacked_symbol_defsC  r  r   c                    g r   r   r  s    r   r  Buffer.realizeF  r*  r   c                    gr  r   r  s    r   should_allocateBuffer.should_allocateI  s    r   rC  rt  r  r  )r   z!Union[torch.Tensor, sympy.Symbol]r  rx  r{  r  )r   r  r  r}  r~  r  r  r  r  r  )r  r  r   r   )rD  r  r6  r   r   r   r  r   r  r  ru  r  r  r  )'r   r   r   r   r   r  r  r  r  r  r  r  r  r
  r/  r  r  r  rS  r  r2  r7  r<  r@  rE  r  r  r  r  ro  ri  r  rX   rW  r  r  r  r   rA  rB  s   @r   rY  rY    s$    
50>
3 ' ')+(>
 +1 ;@V"V37V	V78
 CH
*
;?
	
U	

- H%$)!	! &
 r   rY  c                  J    \ rS rSrSS jrSS jr\R                  rS	S jrSr	g)
OperationBufferiN  c                    U /$ r   r   r  s    r   r  OperationBuffer.get_outputsQ  s	    vr   c                    U $ r   r   r  s    r   r  OperationBuffer.get_defining_opT  r  r   c                X    [         R                  U 5        [        R                  U 5        g r   )rY  r  r  r  s    r   r  OperationBuffer.__post_init__Z  s    T"%r   r   Nr  r   r  rt  )
r   r   r   r   r  r  r  rl  r  r   r   r   r   r  r  N  s     #55&r   r  c                      \ rS rSrSS jrSrg)ry  i_  c                    gr  r   r  s    r   rP  InputBuffer.num_reads`  rP  r   r   Nr  )r   r   r   r   rP  r   r   r   r   ry  ry  _  s    r   ry  c                      \ rS rSrSrSrg)DonatedBufferid  aA  
Represents a donated buffer which is a saved tensor that is not alias to any
fwd inputs, fwd user outputs, and bwd outputs. We generally cannot inplace
reuse the input tensor memory during backward since it might be used in another
function. However, donated buffer can be inplace reused during backward
to save memory.
r   N)r   r   r   r   r  r   r   r   r   r  r  d  s    r   r  c                  8    \ rS rSr% SrS\S'   SS jrS	S jrSrg)
rb  in  Nr  r`  c                   ^  SU 4S jjnU$ )Nc                   > TR                  5       R                  5       n[        R                  " [        R
                  R                  TR                  5       TR                  5      U" U 5      5      $ r   )	r  r  rk   r8  rm   r  constant_namer  r`  r  s     r   rZ  *ConstantBuffer.make_loader.<locals>.loaderr  sP    oo'446G88%%dmmot7K7KL r   r<  r   r=  s   ` r   r  ConstantBuffer.make_loaderq  s    	 r   c                    [        [        R                  R                  U R	                  5       U5      U R
                  S9$ N)r   rJ  )rb  rm   r  r  r  rJ  r  s     r   rf  !ConstantBuffer.constant_to_device{  s/    &&t}}?
 	
r   r   r  r  )	r   r   r   r   r`  r   r  rf  r   r   r   r   rb  rb  n  s    .2O+2
r   rb  c                  l    \ rS rSrS	S jr\" S 5       S
   SS jj5       rSSS jjrSS jrSS jr	Sr
g)NoneAsConstantBufferi  c                    [        5       $ r   r:   r  s    r   r  NoneAsConstantBuffer.get_reads  r  r   c                    [        5       $ r   r:   rV  s     r   rW  )NoneAsConstantBuffer.get_free_symbol_uses  r  r   Nc                J    [         R                  R                  R                  $ r   )rm   r  rJ  none_strr  s     r   r  &NoneAsConstantBuffer.codegen_reference  s    ww##,,,r   c                    [        S S9$ Nr  )r  r  s    r   r  $NoneAsConstantBuffer.get_output_spec  s    &&r   c                    gr  r   r  s    r   r  &NoneAsConstantBuffer.has_tensor_output  r	  r   r   r  r  r  r   r  r~  r  )r   r   r   r   r  rX   rW  r  r  r  r   r   r   r   r  r    sC     23$)!	! 4
-'r   r  c                  d    \ rS rSr% S\S'   \" S 5       S	   S
S jj5       rSSS jjrSS jrSr	g)r   i  r!   r  c                .    [        U R                  U5      $ r   )r)   r  rV  s     r   rW  *ShapeAsConstantBuffer.get_free_symbol_uses  s      		=99r   Nc                h    [         R                  R                  R                  U R                  5      $ r   )rm   r  rJ  codegen_sizevarr  r  s     r   r  'ShapeAsConstantBuffer.codegen_reference  s!    ww##33DII>>r   c                    gr  r   r  s    r   r  'ShapeAsConstantBuffer.has_tensor_output  r	  r   r   r  r  r   r  r  )
r   r   r   r   r   rX   rW  r  r  r   r   r   r   r   r     s<    
J34$):!:	!: 5:
?r   r   c                    ^  \ rS rSr% SrS\S'   SrS\S'   \\R                  SS j5       5       r
SS	 jrSS
 jrS S jrS!S jrS"S jr\" S 5       S#   S$S jj5       rS%U 4S jjrS&S jrS'S jrS(S jrS)S jr\  S*S j5       r  S+     S,S jjr\ S-           S.S jj5       rS/S jrSS jrS&S jrS&S jrS0S jrSr U =r!$ )1rw  i  zZ
Represents a buffer that is computed during kernel execution rather than being an input.
r  rI  FzClassVar[bool]_force_realizec               #     #    [         R                  n  S[         l        S v   U [         l        g ! U [         l        f = f7fNT)rw  r%  )	old_values    r   force_realizeComputedBuffer.force_realize  s2      #11		6,0N),5N)IN)s   ?/ ?<?c                    U R                   b  U R                   $ [        U R                  S5      (       a  U R                  R                   $ g)z}
Returns self.name if it exists, otherwise returns the name of the data node if that exists.
If neither exist, returns None.
Nr   )r   r  rI  r  s    r   get_computed_buffer_name'ComputedBuffer.get_computed_buffer_name  s:    
 99 99499f%%99>>!r   c                6    U R                   R                  5       $ r   rI  rP  r  s    r   rP  ComputedBuffer.num_reads  r  r   c                6    U R                   R                  5       $ r   rI  r  r  s    r   r  ComputedBuffer.get_reads  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  ComputedBuffer.get_read_names  r  r   c                t   [        U R                  [        [        [        [
        45      (       d.  [        R                  " [        5       [        5       [        5       S9$ [        R                  " [        SS5         U R                  R                  5       (       aT  [        U R                  5       U R                  R                  5       U R                  R!                  5       5      sS S S 5        $ [        U R                  5       U R                  R#                  5       5      sS S S 5        $ ! , (       d  f       g = f)NrL  writesindex_exprsr  T)r   rI  r  r  r!  rJ  rB   
ReadWritesr;   r    r   r  rZ  rK   get_store_functionr  r]  r
  r  s    r   rH  ComputedBuffer.get_read_writes  s    $))itY%GHH** l!|&L  \\.*:DAyy++--*++-II002II002 BA +++-II&&( BAAs   :A*D).1D))
D7c                &   U R                   R                  U5      U R                  R                  U5      -  nU R                  5       (       aD  [	        U R                  5       [        5      (       a!  X R                  5       R                  U5      -  nU$ r   )rJ  rW  rI  has_store_functionr   r;  rM   rH  )r  rd  r  s      r   rW  #ComputedBuffer.get_free_symbol_uses  s    $ 11
II**=9: ""$$##%x*
 *
 **,AA-PPFr   c                  > U R                  5       (       dg  U R                  [        R                  R                  ;  a?  U R                  5       S:X  a+  U R                  (       d  U R                  R                  5       $ [        TU ]!  5       $ r  )
rZ  r   rm   r  mutated_buffersrP  r%  rI  r  r  r  s    r   r  ComputedBuffer.make_loader  sc    ''))		!8!88 A%'' 99((**w"$$r   c                V    [        U R                  [        [        [        [
        45      $ r   )r   rI  r  r  r!  rJ  r  s    r   r>  !ComputedBuffer.has_store_function
  s    $))itY%GHHr   c                   U R                  5       R                  5       R                  5       n[        U R                  [
        [        [        45      (       a+  [        U R                  R                  U R                  U5      $ [        U R                  [        5      (       d   [        U R                  5      5       e[        U R                  R                  U R                  U5      $ r   )r  r  r  r   rI  r  r  r!  r   r  r   rJ  r   r[  )r  rX  s     r   r;  !ComputedBuffer.get_store_function  s    //#,,.;;=dii)T4!89949944diiIIdii33DT$))_D349911499gFFr   c                   [        U R                  [        5      (       Ga  [        R                  " U R
                  R                  5       U R
                  R                  5       5      u  u  pnU R                  5       R                  n[        S U 5       5      (       d   eU VVs/ s Hk  n[        U[        R                  5      (       d  M$  [        UR                  U Vs0 s H%  ofS:w  d  M
  U[        R                  R                   _M'     sn5      PMm     nnnU(       a  [        U R
                  ["        [$        45      (       a  U R
                  R'                  X5      nOUnU Vs/ s H,  n[(        R*                  R,                  R/                  X5      PM.     n	nSSKJn
  U
" XR5                  5       5      $ gs  snf s  snnf s  snf )aD  
If our layout is still flexible, try to determine the stride order based on stride orders of reads.

TODO(jansel): A better algorithm here would look at downstream consumers of this
              value and try to do global graph-level layout optimization.
              This is also something just begging to be autotuned.
c              3  v   #    U  H/  n[        U[        R                  [        R                  45      v   M1     g 7fr   )r   rB   StarDep	MemoryDeprk  s     r   r   0ComputedBuffer.get_fill_order.<locals>.<genexpr>$  s2      A 1|33\5K5KLMMs   79r   r@   pick_loop_orderN)r   rJ  r  rB   r  rI  r  r]  rH  rL  r   rJ  ri   r   r   r  r  r  r!  r   rm   r  r  r  	schedulerrM  r
  )r  
index_varsr  r   rL  r  vr  r  stride_lengthsrM  s              r   r   ComputedBuffer.get_fill_order  sx    dkk>22.:.M.M		,,.		0L0L0N/+(Z! ((*00E       Aa!7!78 Y
177n$WnUVPV_Q_n$WX   dii$66"ii//
KG(GMR"MRTAGG$$11$@U  " 7&~}}GG# %X"s*   %#F?F? 	F:-F:F?(3G:F?c                    [        U R                  [        5      (       a:  U R                  5       nU(       a  U R	                  U5        g U R                  5         g g r   )r   rJ  r  r   r<  r2  r;  s     r   r  ComputedBuffer.decide_layout<  sC    dkk>22'')E2259""$ 3r   c                   [         R                  " U R                  R                  5       U R                  R	                  5       SS9u  p[
        R                  " [        SU R                  5       5         [        U R                  5       U R                  5       (       a  UOUS S U/UQ76 nS S S 5        / n/ n/ n/ nUR                  5        Hf  u  pXS   ;   a-  U(       a   eUR                  U5        UR                  U	5        M:  XS   ;   d   eUR                  U5        UR                  U	5        Mh     Xg4WXE44$ ! , (       d  f       N= f)Nqr|   r`  r@   r   )rB   r  rI  r  r]  r    r   rb  r  rM   r;  rZ  itemsr  )
r  r   
var_rangesrY  rO  reduce_vars
index_sizereduce_sizerP  r   s
             r   get_default_sizes_body%ComputedBuffer.get_default_sizes_bodyD  s6    (::II((*DII,H,H,JSV
 \\.*;T__=NO'')0022Ra 	D P 
!#
$$&DAG|&&!!!$!!!$G|#|""1%""1% ' ($0III) POs   37D;;
E	c                X  ^ ^^ T R                  5       u  u  p4nu  pgU(       a  U" X44XVU45      u  u  p4nu  pg/ UR                  R                  5       QmUb  [        U[        5      (       a  [        U5      S:X  d   eUu  p[        U[        5      (       d   [        U5      5       e[        U	[        5      (       d   [        U	5      5       e[        S U	 5       5      (       d   eUR                  n
X:X  d	   U
U45       eU	 Vs/ s H  oT;  d  M
  UPM     n	nTU	-  m/ UR                  5       Qm[        R                  R                  T [        R                   5      (       d  TR#                  UR%                  5       5                  SUUU 4S jjnXg-   n['        [)        T 5      5      (       + =(       d    [*        R,                  (       + nU" UUUU5      u  nnnU" X}XN5      u  nnn[.        R0                  " UUSS9u  u  nnn[3        UU" U5      U" U5      /UUU5      nUU4U4$ s  snf )a  
This is a main place where we do loop transformations in a
backend-agnostic way.

Here we:
    1) Remove any 1 dimensions
    2) Fuse contiguous dimensions together
    3) Reorder dimensions based on stride orders

Optional argument extra_indexing_constraints can be used to append additional
indexing expressions to existing ones derived from buffer's body. This can be useful
to fuse scheduler nodes with compatible ranges, e.g. (s0*s1*...,) and (s0, s1, s2, ...)
on CPU by preventing indexing simplifications and obtaining index/reduce ranges for
the scheduler node compatible with other nodes.
Optional argument recompute_sizes_body_func can be used to recompute sizes and body
on the default body. This can be useful to append additional loop transformations.
r   c              3  B   #    U  H  n[        U[        5      v   M     g 7fr   )r   r!   )r   fs     r   r   6ComputedBuffer.simplify_and_reorder.<locals>.<genexpr>  s     H4Gqz!T**4G   c           	        > TR                  XUT
5      u  p$nU" U 5      n U(       aD  [        R                  R                  R	                  U U[        T	X5      5      u  p&n[        XV5      nOUnX(U4$ r   )_apply_loop_reorderingrm   r  r  _simplify_loopsrF   r   )x_varssupport_varsr.  simplify_loopsreindex0r   r   _pruner   index_formulasmemory_addrsr  s            r   simplify_and_reorderAComputedBuffer.simplify_and_reorder.<locals>.simplify_and_reorder  s{     )-(C(Ce\)%EX f%F*+''*:*:*J*J,^VK+'
 *(="8++r   pr|   )
rf  Sequence[sympy.Symbol]rg  rp  r.  r  rh  r   r   dtuple[list[int], Callable[[Sequence[int]], Sequence[int]], Callable[[Sequence[int]], Sequence[int]]])r\  indexing_exprsr   r   r   r   r   r   r   r   rX  get_write_exprsrm   r  r  rC   PREFER_STORE_LOOP_ORDERextendget_read_exprsrd   r  rA   loop_ordering_after_fusionrB   index_vars_no_squeezerM   )r  extra_indexing_constraintsrecompute_sizes_body_funcrZ  r[  rY  rO  rY  extra_indexing_rangesextra_indexing_exprexpected_var_rangesr  rm  rg  should_merge_loopsiter_rangesiter_reindexr   reduce_rangesreduce_reindex	iter_varsrX  rk  rl  s   `                     @@r   rm  #ComputedBuffer.simplify_and_reordere  s   4 '')		
%Z%Z %
 *)4k1J	))
 94..5578%15u==23q89 :T6!3T::WDAV<WW:1488S$?R:SS8H4GHHHHH"&//&? #%B ? /#.a>2I.   # 11N0--/0ww""4)O)OPP 3 3 56	,*	,0	, !	, !		,

	, 	,6 "/t,--VV5V5V1V 	 (<	(
$\1 ,@{,
(~q
 0</Q/Q0
, K*
 )$n[&AB
 ]+T11I#s   	H'H'c           
     n   SSK Jn  Uc  / n U Vs/ s H-  n[        R                  R                  R                  X`U5      PM/     nn[        U5      [        U5      :X  a  [        US   5      [        U 5      :X  d   e[        [        U" XrU5      5      5      nU V	s/ s H  oU	   PM	     nn	U[#        U5      [%        U5      4$ s  snf ! [         a^    [        R                  (       a)  [        R                  S[        [        X5      5      U5        [        [!        [        U5      5      5      n Nf = fs  sn	f )zE
Shuffle the order of loops around to hopefully improve performance.
r@   rL  r   z%Did not simplify complex index:
%s
%s)rN  rM  rm   r  r  r  r   r   r  	ExceptionrA   r  r  warningr   r   r   r   r   )
rO  rg  r.  rl  priority_idxrM  r  rQ  r   r   s
             r   rd  %ComputedBuffer._apply_loop_reordering  s#    	/L	, )(D   --dM(   w<3|#44WQZCM :   /',"OPQE $))5aq5)l5)?5+AAA#  	,||=Z/0 
 s5z*+E	, *s*   C 4CAC D2C A%D/.D/c                6    U R                   R                  5       $ r   rI  r]  r  s    r   r]  !ComputedBuffer.get_reduction_size	      yy++--r   c                6    U R                   R                  5       $ r   rI  rZ  r  s    r   rZ  !ComputedBuffer.get_reduction_type  r  r   c                6    U R                   R                  5       $ r   )rI  r  r  s    r   rc  ComputedBuffer.is_no_op  s    yy))++r   c                    gr'  r   r  s    r   r  ComputedBuffer.should_allocate  r  r   c                8    U R                   R                  U5      $ )r_  rI  rf  r  s     r   rf  !ComputedBuffer.constant_to_device  s    yy++F33r   r   )r   Iterator[None]r  r  r  ru  r  r  r  r  r  )r   zCallable[..., None])r   Optional[list[int]]rt  )r   zMtuple[tuple[list[Expr], list[Expr]], LoopBody, tuple[list[Expr], list[Expr]]]NN)ry  *Optional[tuple[dict[Any, Any], list[Any]]]rz  Optional[Callable[..., Any]]r   z8tuple[tuple[list[Expr], list[Expr]], Optional[LoopBody]]r   )rO  rp  rg  rp  r.  r  rl  zlist[sympy.Expr]r  r  r   rq  r  r  )"r   r   r   r   r  r   r%  r  r  r  r)  r,  rP  r  r  rH  rX   rW  r  r>  r;  r   r  rW   r\  rm  rd  r]  rZ  rc  r  rf  r   rA  rB  s   @r   rw  rw    sg    K%*NN*6  6	%%** ,-$)!	! .6	%IG%N% J
J JD RVBFz2$Nz2 $@z2 
B	z2x  -1%B*%B,%B %B '	%B
 *%B
%B %BN..,4 4r   rw  c                     ^  \ rS rSrSr        SU 4S jjrSS jrSSS jjrSS jrSS jr	SS jr
  S     SS	 jjrS
rU =r$ )rz  i  zh
Represents a Triton (in the future other type) of template operator
that we can fuse an epilogue onto.
c                   > [         TU ]  S US9  [        R                  U5      U l        X0l        [        R                  R                  U 5      U l	        [        R                  R                  U 5        g r  )r  r  rx  unwrap_storagere  make_kernel_renderrm   r  register_bufferr   register_operation)r  rJ  re  r  r  s       r   r  TemplateBuffer.__init__   sW     	d62"11&9"4GG++D1		""4(r   c                     U R                  SS9$ )NT	normalize)rK   r  s    r   rH  TemplateBuffer.get_read_writes,  s    ''$'77r   c           
     z  ^^^ U R                  5       mU R                  5       R                  5       mSUU4S jjn[        R                  " X R                  5       SUS9nU R                   H  m[        T[        [        45      (       d   [        T5      5       e[        TR                  [        5      (       d   [        TR                  5      5       eTR                  R                  5       mSUU4S jjnU=R                  [        R                  " UTR                  5       SUS9R                  -  sl        M     U$ )Nc                b   > [        U5      S:X  d   e[        R                  " TT" U 5      S5      $ )Nr   fake)r   rk   rU  )r   r  rX  r   s     r   dummy1TemplateBuffer.extract_read_writes.<locals>.dummy3  s,    v;!###99T75>6::r   r   r  c                |   > [        U5      S:X  d   e[        R                  " TR                  5       T" U 5      5      $ r  )r   rk   r8  r  )r   r  rX  rg  s     r   r  r  A  s0    6{a'''xx??r   )r   Sequence[Any]r  r  r   r   )r  r  r  rB   rK   r
  re  r   rO  rY  r   rJ  r  rL  )r  r  r  depsrX  rg  r   s       @@@r   rK   "TemplateBuffer.extract_read_writes/  s    }}//#002	; 	; //==?B)
 ;;CcOV#<==HtCyH=cjj&11C4

3CC1jj--/G@ @ JJ,::s||~rYeJ  r   c                6    [         R                  R                  $ r   )r   r  r  r  s    r   r]  !TemplateBuffer.get_reduction_sizeK  s    ww{{r   c                    g r   r   r  s    r   rZ  !TemplateBuffer.get_reduction_typeN  r  r   c                    gr'  r   r  s    r   r  TemplateBuffer.should_allocateQ  r  r   c                *    U R                  5       / 4S 4$ r   r  )r  ry  rz  s      r   rm  #TemplateBuffer.simplify_and_reorderT  s$      
 	
r   )re  r  r   )rJ  r  re  Sequence[IRNode]r  r  r   r   r  r  )r  r   r   r  r  r  r  r  )ry  r  rz  r  r   z<tuple[tuple[Sequence[Expr], list[Expr]], Optional[LoopBody]])r   r   r   r   r  r  rH  rK   r]  rZ  r  rm  r   rA  rB  s   @r   rz  rz    s~    

)
) !
) 9	
)
 

)88
 RVBF
$N
 $@
 
F	
 
r   rz  c                     ^  \ rS rSr  S           S	U 4S jjjr\" S 5       S
   SU 4S jjj5       rSS jrSS jrSS jr	Sr
U =r$ )TritonTemplateBufferib  c                  > [         T
U ]  XU5        X@l        U /U l        UGb  [        R
                  R                  R                  [        R
                  R                  R                  4n[        R                  R                  R                  nXv;   d   SU SU 35       e[        U R                  S   [        5      (       d   [!        U R                  S   5      5       eU R                  S   R#                  5       nU =R                  U V	s/ s H  n	[%        ['        US9X5      PM     sn	-  sl        U(       a  UO	[)        5       U l        SU l        SU l        gs  sn	f )a  
NOTE:[TritonTemplates with multiple outputs]
We want the ability for TritonTemplates to output multiple tensors. Triton
kernels have no notion of outputs and this is done by creating tensors that
are then mutated by the kernel. Currently our STORE_OUTPUT codegen doesn't
support creating multinode outputs for triton templates.
We work around this by creating an extra input buffer during the lowering
and we mark them as mutated inputs.
Nz$Mutated inputs are only allowed for z	 but got r   r  )r  r  mutated_inputsoutputsr  rk   higher_orderflex_attentionflex_attention_backwardrm   r  current_noder  r   re  r   r   r  MutationOutputr  r;   allowed_prologue_inpssubgraph_inpssubgraph_outs)r  rJ  re  r  r  r  allowed_setr  r  r  r  s             r   r  TritonTemplateBuffer.__init__c  s:   " 	);<,&*V% 		&&55		&&>>K 77//66L. 6{m9\N[. dkk!nf55KtDKKN7KK5[[^..0FLL))C z8#D) L &;!
 	" SW?Cs   Ec                ,  > [         TU ]  U5      nU R                  (       a  U R                  O/ nU R                  (       a  U R                  O/ nU Hz  n[	        U[
        R                  5      (       a  UR                  [        XQ5      5        M>  [	        U[        5      (       a"  UR                  UR                  U5      5        Mu  Uc  Mz   e   U H?  n[	        U[        5      (       a"  UR                  UR                  U5      5        M:  Uc  M?   e   U$ r   )
r  rW  r  r  r   r   r!   updater)   r   )r  rd  resr  r  rg  r   r  s          r   rW  )TritonTemplateBuffer.get_free_symbol_uses  s     g*=9.2.@.@**b.2.@.@**b C#uzz**

+C?@C((

333MBC{"{ ! !C#v&&

333MBC{"{	 ! 
r   c                    U R                   $ r   r  r  s    r   r   TritonTemplateBuffer.get_outputs      ||r   c                    U R                   $ r   )r  r  s    r   get_allowed_prologue_inps.TritonTemplateBuffer.get_allowed_prologue_inps  s    )))r   c                &    SU R                    S3nU$ )NzTritonTemplateBuffer(layout=r  rC  )r  r   s     r   r  TritonTemplateBuffer.__str__  s    ,T[[M;
r   )r  r  r  r  r  r  )rJ  r  re  r  r  zOptional[Callable[_P, _T]]r  Optional[Iterable[IRNode]]r  zOptional[OrderedSet[str]]r   r   r  r  r  ru  r  )r   r   r   r   r  rX   rW  r  r  r  r   rA  rB  s   @r   r  r  b  s     6:;?*D*D !*D 7	*D
 3*D  9*D 
*D *DX 23$)!	! 4.* r   r  c                     ^  \ rS rSrSr          SU 4S jjrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSrU =r$ )ChoiceCalleri  a  
Represents a possible choice used in autotune_process.py.
During autotuning, self.benchmark() is first called to get benchmark result,
and if this choice is selected, self.output_node() is called to get the output_node.

Children classes: TritonTemplateCaller, CUDATemplateCaller.
c                R   > [         TU ]  5         Xl        X0l        X l        X@l        g r   )r  r  r   rJ  r   description)r  r   r   rJ  r  r  s        r   r  ChoiceCaller.__init__  s(     		& 'r   c                  ^^ U R                  5       m[        [        S.n[        R                  (       a  [        UU4S j40 UD6$ [        R                  " TTSU040 UD6$ )N)warmuprepc                    > T " T6 $ r   r   )algor   s   r   r  (ChoiceCaller.benchmark.<locals>.<lambda>  s	    D$Kr   r   )to_callableautotune_warmupautotune_reprA   /profile_bandwidth_with_do_bench_using_profilingr]   rR   	benchmark)r  r   r   benchmark_configsr  s     ` @r   r  ChoiceCaller.benchmark  s[    !%
 AA+,?UCTUU$$T4%SARSSr   c                    [         er   r  r  s    r   	call_nameChoiceCaller.call_name  r  r   c                    [         er   r  r  s    r   r  ChoiceCaller.to_callable  r  r   c                "    U R                  5       $ )z
Hash key for the underlying kernel. By default, we assume there are no
runtime params, so kernel hash key defaults to choice caller's hash key.
)hash_keyr  s    r   kernel_hash_keyChoiceCaller.kernel_hash_key  s    
 }}r   c                    [         er   r  r  s    r   r  ChoiceCaller.hash_key  r  r   c                    [         er   r  r  s    r   r`  ChoiceCaller.output_node  r  r   c                    0 $ )zRInformation returned here is logged to the autotune log file when that is enabled.r   r  s    r   	info_dictChoiceCaller.info_dict  s    	r   c                    g)Nunsupported_choicer   r  s    r   autoheuristic_idChoiceCaller.autoheuristic_id  s    #r   )r  r   rJ  r   )
r   r   r   r  rJ  r  r  r   r   r   )r   r   r   r'  r   rG  r  )r   r  )r   r:  )r   z<dict[str, Union[PrimitiveInfoType, list[PrimitiveInfoType]]])r   r   r   r   r  r  r  r  r  r  r  r`  r  r  r   rA  rB  s   @r   r  r    sk    '' "' 	'
 ' 
'T""""$ $r   r  c                      \ rS rSrSS jrSrg)TritonTemplateCallerBasei  c                    [         er   r  r  s    r   get_make_kernel_render/TritonTemplateCallerBase.get_make_kernel_render  r  r   r   N)r   r   )r   r   r   r   r 	  r   r   r   r   r  r    s    "r   r  c                     ^  \ rS rSrSr            SU 4S jjr\SS j5       r S   SS jjr\	R                  SS j5       rSS jr S   SS jjr    SS	 jrS
rU =r$ )MultiTemplateBufferi  a3  
Represents a Buffer with multiple backing implementation choices.

Choices can be TritonTemplates or ExternKernels. During scheduling if there is a potential
epilogue we will benchmark each of the choices with the epilogue to determine an implementation.
Otherwise, the fastest base choice will be chosen.
c                   > [         TU ]  UUS US9  X0l        0 U l        X l        [        S U 5       5      U l        0 U l        g )N)rJ  re  r  r  c              3     #    U  H]  n[        U[        5      =(       dA    [        U[        R                  R                  R
                  5      =(       a    UR                  v   M_     g 7fr   )r   r  r  r  select_algorithmExternKernelCallerhas_out_variant)r   choices     r   r   /MultiTemplateBuffer.__init__.<locals>.<genexpr>  sU      %
 - v78 65??#C#C#V#VW +**
 -s   A%A')r  r  _choice_timings_fn_choice_timingsoriginal_inputsr   _output_plannable_make_kernel_renders)r  rJ  re  choice_timings_fnunfiltered_choicesr  r  s         r   r  MultiTemplateBuffer.__init__   s`     	#"7	 	 	
 #4OQ%!$ %
 -%
 "
 ?A!r   c                    U R                   $ )zN
Are all possible choices TritonTemplates or Extern Kernels with out variants
)r	  r  s    r   output_plannable$MultiTemplateBuffer.output_plannable  s    
 %%%r   c                z    XR                   ;  a  U R                  U5      U R                   U'   U R                   U   $ r   )r	  r	  )r  hint_overrides     r   choice_timings"MultiTemplateBuffer.choice_timings"  s<      4 44262I2I-2XD  /##M22r   c              #  8  #    [        U[        R                  R                  R                  5      (       d   [        U5      5       eU R                  UR                  :X  d   eU R                  nUR                  5       U l         S v   X l        g ! X l        f = f7fr   )	r   r  r  r	  TritonTemplateCallerr   rJ  r  r 	  )r  callerrenders      r   swap_as_triton_caller)MultiTemplateBuffer.swap_as_triton_caller)  s     EOO44II
 
 	<	 
 {{fmm+++(("("?"?"A	-&,#f#s   BBB BBBc                \   [        U[        R                  R                  R                  5      (       d   [        U5      5       eU R                  5       UR                  R                  :X  d   eU R                  5       UR                  R                  :X  d   eUR                  5       U l        g r   )r   r  r  r	  r	  r   r
  rJ  r  r/  r  r 	  r  )r  r	  s     r   finalize_as_triton_caller-MultiTemplateBuffer.finalize_as_triton_caller7  s    EOO44II
 
 	<	 
 }}&--"4"4444 FMM$8$8888"("?"?"Ar   c                R    U R                  US9n[        X"R                  S9nX2U   4$ )N)r	  r  )r	  ru  r  )r  r	  timings
min_choices       r   get_min_choice"MultiTemplateBuffer.get_min_choice?  s3     %%M%Bkk2
J/00r   c                    UR                  5        H"  u  p#UR                  5       U R                  U'   M$     U R                  S   U l        g)z;Finalize with multiple callers for different hint overridesN)rW  r 	  r	  r  )r  callersr	  r	  s       r   finalize_as_triton_callers.MultiTemplateBuffer.finalize_as_triton_callersF  sE     &-]]_!M7=7T7T7VD%%m4 &5 #'";";D"Ar   )r	  r	  r	  r	  r  r	  )rJ  r  re  r  r	  z4Callable[[Optional[int]], dict[ChoiceCaller, float]]r	  zlist[ChoiceCaller]r  rv  r   r   r  r   )r	  r  r   zdict[ChoiceCaller, float])r	  r  r   r  )r	  r  r   r   )r	  r  r   ztuple[ChoiceCaller, float])r)	  z-dict[Optional[int], TritonTemplateCallerBase]r   r   )r   r   r   r   r  r  r  r	  r	  r  r  r	  r!	  r&	  r*	  r   rA  rB  s   @r   r	  r	    s    AA !A P	A
 /A  /A 
A6 & & .23*3	"3 - -B .21*1	#1BDB	B Br   r	  c                  \   ^  \ rS rSr              SU 4S jjrSS jrSS jrSrU =r$ )	CUDATemplateBufferiQ  c                J   > [         TU ]  XU5        X@l        XPl        X`l        g r   )r  r  workspace_sizetemplatesupports_epilogue_fusion)r  rJ  re  r  r/	  r0	  r1	  r  s          r   r  CUDATemplateBuffer.__init__R  s&     	);<, (@%r   c                8    U R                   b  U R                   $ S$ r  )r/	  r  s    r   r  %CUDATemplateBuffer.get_workspace_sizea  s    &*&9&9&Et""L1Lr   c                ~    U R                  5        H)  n[        R                  " UR                  5       S S 5        M+     g r   )r  rk   rU  r  )r  rX  s     r   emulate_store_fn#CUDATemplateBuffer.emulate_store_fnd  s,    &&(FIIfoo't4 )r   )r1	  r0	  r/	  )rJ  r  re  r  r  Callable[_P, _T]r/	  r   r0	  rq   r1	  r   r   r   r  rt  )	r   r   r   r   r  r  r6	  r   rA  rB  s   @r   r-	  r-	  Q  sd    AA !A -	A
 A A #'A 
AM5 5r   r-	  c                  T   ^  \ rS rSr            SU 4S jjrSU 4S jjrSrU =r$ )CppTemplateBufferii  c                L   > [         TU ]  XU5        X@l        XPl        S U l        g r   )r  r  r0	  r		  r  )r  rJ  re  r  r0	  r		  r  s         r   r  CppTemplateBuffer.__init__j  s&     	);< /3r   c                  > [        U R                  [        5      (       a  [        U R                  [        5      (       d   [        U R                  5      5       eU R                  S   n[        U[        5      (       d   [        U5      5       eUR                  n[        U[        5      (       d   [        U5      5       eU$ [        TU ]%  5       $ r  )
r   rJ  MultiOutputLayoutr  r   r   rY  r  r  r  )r  first_outputrJ  r  s      r   r  CppTemplateBuffer.get_layoutw  s    dkk#455dllH55ItDLL7II5<<?LlF33GT,5GG3!((Fff--;tF|;-M7%''r   )r		  r  r0	  )rJ  r  re  r  r  r8	  r0	  rq   r		  r   r   r   r}  )r   r   r   r   r  r  r   rA  rB  s   @r   r:	  r:	  i  sL    44 !4 -	4
 4 4 
4	( 	(r   r:	  c                  V   ^  \ rS rSrSr S           SU 4S jjjrSS jrSrU =r$ )	CuteDSLTemplateBufferi  z
Buffer for CuteDSL (CUTLASS Python DSL) template kernels.
Similar to other template buffers but specialized for CuteDSL operations.
c                  > [         TU ]  XU5        X@l        XPl        U /U l        Ub  [        U R                  S   [        5      (       d   [        U R                  S   5      5       eU R                  S   R                  5       nU =R                  U Vs/ s H  n[        [        US9Xp5      PM     sn-  sl        g g s  snf )Nr   r  )r  r  r0	  r  r  r   re  r   r   r  r  r  )	r  rJ  re  r  r0	  r  r  r  r  s	           r   r  CuteDSLTemplateBuffer.__init__  s     	);< ,&*V%dkk!nf55KtDKKN7KK5[[^..0FLL))C z8#D) L &s   B>c                    U R                   $ r   r  r  s    r   r  !CuteDSLTemplateBuffer.get_outputs  r  r   )r  r  r0	  r   )rJ  r  re  r  r  r8	  r0	  r   r  r  r   r   r  )	r   r   r   r   r  r  r  r   rA  rB  s   @r   rB	  rB	    s[     6: ! -	
  3 
 * r   rB	  c                &    [        S U  5       5      $ )Nc              3  B   #    U  H  n[        U[        5      v   M     g 7fr   r   r   r   r  s     r   r   #is_node_sequence.<locals>.<genexpr>  s     4ez!V$$erb  )r   )r   s    r   is_node_sequencerL	    s     4e444r   c                      \ rS rSr% S\S'   SS jrSS jrSS jr\SS j5       r	\
    SS j5       rSS	 jrSS
 jr\" S 5       S   SS jj5       rSrg)rx  i  )Sequence[Union[IRNode, Sequence[IRNode]]]re  c                n    U R                   U   n[        U[        5      (       d   eUR                  5       $ r   re  r   r   r  )r  r   inputs      r   
input_nameInputsKernel.input_name  s/    A%((((~~r   c                  ^ [         [        R                     " 5       n[        R                  mU R                   Hq  n[        U[        5      (       a  UR                  U4S jU 5       5        M5  [        U[        5      (       a  ML  UR                  T" UR                  5       5      5        Ms     [         [        R                     " U4S jU R                  5        5       5      n[        R                  " UU[        5       S9$ )Nc              3  P   >#    U  H  nT" UR                  5       5      v   M     g 7fr   r  )r   r   rI  s     r   r   /InputsKernel.get_read_writes.<locals>.<genexpr>  s     BEqWQZZ\22E   #&c              3  P   >#    U  H  nT" UR                  5       5      v   M     g 7fr   r  )r   r  rI  s     r   r   rV	    s#      .
/AGCLLN##/ArW	  r7  )r;   rB   rH   rI  re  r   r   r  r   r  r  r  r:  )r  rL  rQ	  r8  rI  s       @r   rH  InputsKernel.get_read_writes  s    <++,.&&[[E%**BEBBE#899		'%.."234 ! L,,- .
/3/?/?/A.
 
 &&"
 	
r   c                6    U R                  5       R                  $ r   rK  r  s    r   r  InputsKernel.get_reads  rN  r   c                   [        U[        5      (       a  UR                  n[        U[        5      (       a  UR                  n[        U[        5      (       a*  [        U[
        5      (       d  [        R                  U5      n[        U[        5      (       a  U R                  U5      $ [        U[        5      (       a  U$ [        U[        [
        45      (       d   [        U5      5       eU$ r   )r   r   rI  rX  rd  rO  r  realize_inputunwrap_storage_for_inputTorchBindObjectrY  r   r	  r   s     r   r^	  %InputsKernel.unwrap_storage_for_input  s    a##Aa$$Aa"":a+I+I**1-Aa##
 //22a))H!fo677@a@7r   c                    / nU  Hd  n[        U[        5      (       a&  U Vs/ s H  n[        R                  U5      PM     nnO[        R                  U5      nUR	                  U5        Mf     U$ s  snf r   )r   r   rx  r^	  r  )re  
inputs_newr   r   s       r   r  InputsKernel.unwrap_storage  sm     =?
A!X&&GHIq!\::1=qI 99!<a   	 Js   A/c                    gr'  r   r  s    r   r`  InputsKernel.is_extern  r  r   c                    gr  r   r  s    r   rP  InputsKernel.num_reads  rP  r   c                    [         [        R                     " 5       nU R                   HI  n[	        U[
        5      (       a  X#R                  U5      -  nM-  U H  nX$R                  U5      -  nM     MK     U$ r   )r;   r   r#   re  r   r   rW  )r  rd  r  rg  	inner_inps        r   rW  !InputsKernel.get_free_symbol_uses  sg     u||$&;;C#v&&--m<<!$I77FFA "%	  r   r   N)r   r   r   r   r  r  r   r   r   r   )re  rN	  r   z%list[Union[IRNode, Sequence[IRNode]]]r  r  r  r  )r   r   r   r   r   rR	  rH  r  r?  r^	  r  r  r`  rP  rX   rW  r   r   r   r   rx  rx    s    55 

,,  $ 
9
	.
 
 N+$)
!
	!
 ,
r   rx  c                  (    \ rS rSrSS jrSS jrSrg)	NopKerneli  c                    gr'  r   r  s    r   rc  NopKernel.is_no_op  r  r   c                    [        5       $ r   r:   r  s    r   r  NopKernel.get_reads  r  r   r   Nr  r  )r   r   r   r   rc  r  r   r   r   r   rn	  rn	    s    r   rn	  c                      \ rS rSrSr\S
S j5       r\ S     SS jj5       r\" S 5       S   SS jj5       r	\SS j5       r
SS jrS	rg)ConcatKerneli  zb
There isn't actually a real kernel for concat, we just change the
storage for the upstream data.
c                   US   R                  5       nUS   R                  5       n[        US   R                  5       5      nS/nXR   /nSUs=::  a  [	        U5      :  d   e   e[        S[	        U5      5       H  nX   R                  5       n	UR                  XR   5        [	        U	5      [	        U5      :X  d   eX   R                  5       U:X  d   eX   R                  5       U:X  d   e[        [	        U5      5       HE  n
X:X  a  XZ   X   -   XZ'   M  [        R                  R                  R                  XZ   X   5      XZ'   MG     UR                  XR   5        M     [        R                  U5      n[        R                  (       a#  [        R!                  XUS   R"                  5      n[        [	        U5      5       H|  nX   n[%        U5      (       d  M  UR'                  5       n[)        U[*        5      (       d  M@  [        R-                  UR.                  UR0                  5      (       d  Mq  [3        U5      n  O   [5        S U 5       5      n[        R                  R6                  R8                  S   n[)        U[        5      (       d   [;        U5      5       eUSL a"  [5        S U 5       5      (       a  [3        U5      n[=        S U 5       5      nUc   e[?        S[+        UUUUUS9/ S	9n[A        U5      n/ n[C        U5       GH  u  nn[)        U[D        [F        45      (       d   [;        U5      5       eU RI                  U[J        RM                  UX&U   Xx   SS
95      n[)        U[N        5      (       d   [;        U5      5       e[)        URP                  [        5      (       d   [;        URP                  5      5       eURP                  R                  U5        [)        URR                  [D        5      (       a  URR                  RU                  5       nOURR                  n[)        U[@        5      (       d  GM4  URW                  5       (       d  GML  UR                  5       =nc  GMb  [Y        UR:                  5      (       d  GM  [[        U5      (       a  GM  UR                  UR]                  5       5        GM     [	        U5      S:  aR  [        R                  R_                  U[`        Rb                  5      (       a  [        R                  Re                  U5        [        R                  Rg                  U5      Ul4        U Rk                  URP                  5      Ul(        [        R                  Rm                  U5        U$ )z&
Create the concat kernel from inputs
r   r@   c              3  8   #    U  H  n[        U5      v   M     g 7fr   )r  rg  s     r   r   &ConcatKernel.create.<locals>.<genexpr>8  s     -WPV1.CA.F.FPVr  Fc              3    #    U  Hv  nS UR                   ;   =(       a[    UR                   S    R                  [        R                  S9=(       d*    UR                   S    R                  [        R                  S9v   Mx     g7f)r'  r=  N)r]  rP  r  r8  r:  r   args     r   r   rw	  <  sr      <
 $ SXX --E<O<O-P W88E?00u?U?U0V
 $s   A>B c              3  z   #    U  H1  n[        U5      =(       a    UR                  5       R                  v   M3     g 7fr   )r  r  rN  rg  s     r   r   rw	  F  s-      
KQa!!$A)A)AA6s   9;N)r  r  r  r  rN  r   rJ  re  )rq  )7r  r  r   r
  r   r   r  rm   r  r  r  r  r  rA   r  r  r  r  r  r  r   rL  r  r  r  r-   rt  r  r   r   r   rt	  rX  r   rd  r  r  rd  r  rY  re  rI  r/  r  rd   rc   rl  r  rC   FOREACHregister_operation_listr  r   r  r  )r	  re  r>  r  r  r  offsets_startoffsets_endr   
input_sizer
  output_strider   rJ  any_input_is_storage_and_layoutfx_node_argsrN  concat_kernelkernelop_namesrg  rv  input_unwrappeddevs                           r   r  ConcatKernel.create  s   
 %%'q	##%q	**,-}oC'#h-'''''q#f+&A++-J  /z?c(m3339&&(E1119'')V3333x=)8"*+
"=HK"#''"2"2"L"L Z]#HK	 * x}- ' (6'H'H'R''"//M
 s6{#A	A$Q''K 88fmmTT$B8$LM $ +.-WPV-W*W'ww++003,--AtL/AA-*e3 <
 $<
 9
 9
 ;8DM 
KQ
 
	 !!!$$# 

 M*'FAscHj#9::EDIE:++  Cq!1;> ! L lF33GT,5GG3m22D99U4@T@T;UU9  ''5#((H--"%(("6"6"8"%(( ?J77#3355NN,,S9388$$"<00 ? ? AB1 (4 x=1!4!4V^=S=S!T!TGG++H5WW44]C"11-2F2FG	""=1r   Nc                2   [        U[        5      (       a  U R                  UR                  U5      $ [        U[        [
        45      (       d   [        U5      5       e[        UR                  [        5      (       a  [        UR                  R                  [        5      (       a  UR                  R                  (       d  gUc  g[        UR                  5       5      [        UR                  5       5      :X  d  g[        S [        UR                  5       UR                  5       5       5       5      $ [        UR                  S5      =(       aJ    [        UR                  R                  [         5      =(       a    [        UR                  ["        5      (       + $ )NFTc              3  x   #    U  H0  u  p[         R                  R                  R                  X5      v   M2     g 7fr   rD  rE  s      r   r   =ConcatKernel.can_realize_into_without_copy.<locals>.<genexpr>  s1      EFB   88@@ErG  rJ  )r   r   can_realize_into_without_copyrI  rd  rX  r   r	  rJ  rL  r	  r   r/  r   r   r  r  ExternKernelAlloc)r	  r  r  s      r   r	  *ConcatKernel.can_realize_into_without_copy{  s!    c9%%44SXXsCC#*566AS	A6chh 344sxx<<xx00 { s~~'(C0@,AA !#.."2CNN4DE   CHHh' <388??N;<sxx):;;	
r   c                ,    [         R                  X5      $ r   )rn	  rW  rV  s     r   rW  !ConcatKernel.get_free_symbol_uses  s     --dBBr   c                   [        U[        5      (       d&  [        U5      (       a  [        U5      u  p4[        X4S9n[        U[        5      (       d   [	        U5      5       e[        U[
        5      (       a  U R                  UR                  U5      $ [        U[        5      (       ai  UR                  5         [        UR                  S5      (       d   eU R                  X5      (       a&  [        U5      UR                  l        UR                  $ [        R                  UR!                  5       UR#                  5       UR%                  5       ['        UR)                  5       UR)                  5       5       VVs/ s H.  u  pV[*        R,                  R.                  R1                  XV5      PM0     snnS9nU R                  Xr5      $ s  snnf )NrH  rJ  ra  )r   rO  r  rK  r   r   r  rI  rX  r  r  r	  ri  rJ  rJ  r  r  r  r  r   r
  rm   r  r  r  )r	  r  r  rR  rJ  r  r  pws           r   r  ConcatKernel.realize_into  sZ   
 #//$S))"7"<%7B#//:c:/c9%%##CHHc22c:&&KKM388X....00::"1#"6xx>>#--/__&  ??DA   ::1@?	  
 ((s   75Gc                    gr'  r   r  s    r   r  ConcatKernel.should_allocate  r  r   r   )re  r  r>  r   r   rX  r   )r  r   r  r  r   r   r  r  )r  r   r  r   r   r   r  )r   r   r   r   r  r?  r  r	  rX   rW  r  r  r   r   r   r   rt	  rt	    s    
 l l\ 26!
!
/!
	!
 !
F N+$)C!C	!C ,C
 ) )@r   rt	  c                  
  ^  \ rS rSr% SrSrS\S'   \R                  " \	S9r
S\S'   S	rS
\S'   S	rS\S'   S	rS\S'   \R                  " \S9rS\S'   S	rS\S'   S	rS\S'   \R                  " \	S9rS\S'   S	rS\S'   \R                  " \	S9rS\S'   \R                  " \S9rS\S'          SB                     SCU 4S jjjrSDS jrSES jrSFS  jrSFS! jrSGS" jrSGS# jrSHSIS$ jjrSJS% jrSKS& jr\ SLS' j5       r!\"        SMS( j5       r#\"SNS) j5       r$\"SOS* j5       r%\"SOS+ j5       r&\"   SP         SQS, jj5       r'\" SR       SSS- jj5       r(\" SR       STS. jj5       r)\"SOS/ j5       r*\"SOS0 j5       r+\"SOS1 j5       r,\"SOS2 j5       r-SFS3 jr.      SUS4 jr/SHSVS5 jjr0SWS6 jr1SXS7 jr2SRSYS8 jjr3SKS9 jr4SGS: jr5SGS; jr6SGS< jr7SZS= jr8S[S> jr9\:" S 5       SR   S\S? jj5       r;SKS@ jr<\<r=SAr>U =r?$ )]r  i  z
A class that represents Kernels which are not directly lowered to Inductor
Loop Level IR, such as custom operators, or aten operators which we fallback to.
r   r  constant_argsr  dict[str, Any]r   NOptional[ReinterpretView]output_viewr  python_kernel_namecpp_kernel_nameIterable[str]ordered_kwargs_for_cpp_kernelOptional[_OpOverloads]op_overloadzOptional[list[dict[str, Any]]]arg_propertieszdict[str, dict[str, Any]]allarg_propertiesz#Optional[dict[str, dict[str, Any]]]kwarg_propertiesz"dict[sympy.Symbol, pytree.KeyPath]unbacked_bindingszlist[MutationOutput]mutation_outputsc                2  > [         TU ]  UUUS9  X@l        U(       a  UO0 U l        X`l        Xl        U R                  U5        U R                  U5        Xl        U R                  5         0 U l
        / U l        [        R                  R                  U l        g Nr}	  )r  r  r	  r   r	  r	  set_cpp_kernel_nameset_python_kernel_namer	  collect_arg_kwarg_propertiesr	  r	  rm   r  r  fx_node)r  r   rJ  re  r	  r   r	  r	  r	  r	  r	  r  s              r   r  ExternKernel.__init__  s     	 	 	

 + &fB&&  1##$67-J*))+!# "ww++r   c                     U /U R                   Q$ r   r	  r  s    r   r  ExternKernel.get_outputs  s    -t,,--r   c                    [        5       $ r   r:   r  s    r   r  %ExternKernel.get_unbacked_symbol_defs  r  r   c                   [        U R                  [        R                  R                  5      (       af  U R                  R
                  R                   Vs/ s H:  nUR                  (       a  M  UR                  UR                  UR                  S.PM<     snO.[        [        U R                  5      5       Vs/ s H  n0 PM     snU l        [        U R                  [        R                  R                  5      (       aS  U R                  R
                  R                   Vs0 s H'  nUR                  UR                  UR                  S._M)     snO0 U l        [        U R                  [        R                  R                  5      (       a  U R                   (       dR  U R                  R
                  R                   Vs/ s H!  oR                  (       d  M  UR                  PM#     snU l        U R                  R
                  R                   Vs/ s H  oR                  (       d  M  UPM     snU l        g / U l        g s  snf s  snf s  snf s  snf s  snf )N)r   r   rJ  )r   rJ  )r   r	  r  _ops
OpOverload_schema	arguments
kwarg_onlyr   	real_typerJ  r   r   re  r	  r	  r	  schema_kwargs)r  r   r   s      r   r	  )ExternKernel.collect_arg_kwarg_properties
  s    $**EJJ,A,ABB ))11;; <A||FFKK%&__
 < $C$4565"56 	$ $**EJJ,A,ABB ))11;;;A qOO;
  	 d&&

(=(=>>55$($4$4$<$<$F$F6$Fq,,FAFF$F62  ++33=="=a="D "$D? 76"s0   I/(I:I#.IIII+Ic                    [        U R                  [        5      (       a!  U R                  5         U R	                  5         g g r   )r   rJ  r  apply_constraintr2  r  s    r   r  ExternKernel.decide_layout/  s0    dkk>22!!#  3r   c                P    [        X5      u  p#U(       a  UR                  U5        g g r   )r`   make_comment)r  wrapper
origin_str_detailed_origin_strs       r   codegen_commentExternKernel.codegen_comment4  s%    +>t+M(
  , r   c                    [         er   r  r  r	  s     r   codegenExternKernel.codegen9  r  r   c                   Xl         [        R                  R                  (       a3  [	        U R
                  [        R                  R                  5      (       d  g U R
                  nU R                   c  UR                  S:X  aV  UR                  S:X  a  UR                  R                  S5      S   OUR                  R                  SS5      nSU S3U l         g UR                  R                  U l         g g )Natenr_  .r   r   z
at::_ops::z::call)r	  rm   r  cpp_wrapperr   r	  r  r	  r	  	namespace_overloadnamer   r  replacer	  r   )r  r	  r	  opnames       r   r	   ExternKernel.set_cpp_kernel_name<  s    .ww""*ejj33+
 +
 !!'6) ++y8 OO))#.q100c: 
 *4F86'B$'-~~':':$ (r   c                   Xl         Ub  g U R                  nUc  g [        U[        R                  R
                  5      (       a  SUR                   3U l         g UR                  R                  SS5       SUR                   3U l         g )Nztorch.ops.higher_order.._ops..ops.r	  )	r	  r	  r   r  r	  HigherOrderOperatorr   r   r	  )r  r	  r	  s      r   r	  #ExternKernel.set_python_kernel_nameT  s    "4)!!>

 > >??(??P&QD# $$,,Xw?@&//ARS #r   c                   SSK Jn  U R                  5       =n(       a  UR                  O[        R
                  R                  n[        R
                  R                  (       a  U R                  c   eU R                  $ [        R
                  R                  (       a  [        [        R
                  R                  U5      (       d(   [        [        R
                  R                  5      5       eU R                  c   e[        R
                  R                  R                  U R                  U5      $ U R                  c   eU R                  $ )Nr@   )CppWrapperCpu)codegen.cpp_wrapper_cpur	  r  r   rm   r  device_type
fx_wrapperr	  r	  r   rJ  r	  get_c_shim_func_name)r  r	  dr  s       r   get_kernel_nameExternKernel.get_kernel_namec  s    :!%!22A29L9L77**666***WW  agg22MBB D$$E B ''33377''<<$$f  **666***r   c           	         [         R                  U R                  5       U R                  5       U R	                  5       U R                  5       U R                  5       U R                  5       S9nUR                  5         U$ )N)r  r  r  r  rq  ro  )	rJ  r  r  r  r  r
  r  r  r  )r   r	  s     r   
copy_inputExternKernel.copy_inputv  sa    <<>++-]]_::<))+oo'  
 	

	r   c                	  ^^ X#S.n[         R                  " U5      u  nm/ m/ n/ nU H  nTR                  [        U[        5      =(       a    [        U[
        5      (       + 5        TS   (       a  UR                  U5        M[  [        U[        5      (       a2  [        R                  R                  R                  R                  US S9nUR                  U5        M           SUU4S jjn	U V
s/ s H  oR                  U
5      PM     nn
U H  n
[        U
5      (       d  M  [        U
SS9  M!     / nU GH  n
[        U
[        5      (       dh  U
R!                  5       [        R                  R"                  ;   a<  UR                  [        R                  R"                  U
R!                  5          5        M  [        U
[        5      (       dh  U
R!                  5       [        R                  R$                  ;   a<  UR                  [        R                  R$                  U
R!                  5          5        M  [        U
[&        5      (       a"  UR                  U
R)                  5       5        GM5  [        U
[*        R,                  R.                  R
                  5      (       ar  U
R0                  R2                  nU
R0                  R4                  S:X  a  Uc   eUR                  [*        R6                  R8                  U   R;                  5       5        GM  UR                  [=        U
SS95        GM     U	" X5      u  pU" U0 UD6nS n[        R>                  R                  =n(       a  [        R@                  RB                  RE                  S	5      n[G        5       n[        R@                  RH                  [*        RJ                  RL                  RN                  :X  a  US
   n[Q        [        R@                  5      nU   [S        U[        R@                  U5        S S S 5        [U        UUU5      n[        U[V        [X        45      (       d  U/OUnU H  n[        U[*        RZ                  5      (       d  M$  UR\                  (       d  M7  Sn[        R                  R@                  RB                  RE                  SS 5      =n(       a  U SU 3nU[        R                  l/        M     UUUU	U4$ s  sn
f ! , (       d  f       N= f)N)r   r   r2  )r=  c                6  > / n[        U 5      n[        U5      nT H@  nU(       a  UR                  [        U5      5        M&  UR                  [        U5      5        MB     [        R                  " UT5      nUR                  S/ 5      UR                  S0 5      4$ )Nr   r   )iterr  nextpytreetree_unflattenr  )	new_tensor_argsnew_non_tensor_argsr  
it_tensorsit_non_tensors	is_tensorr  	args_specis_arg_tensors	          r   unflatten_args3ExternKernel.process_kernel.<locals>.unflatten_args  s     Fo.J!"56N*	MM$z"23MM$~"67	 +
 %%fi8A55$aeeHb&999r   TrJ  r"  )r   r'  r@   zEsparsity not handled. Please file issue for sparse inference weights.r  z Found from : 
 )r	  r   r	  r   r   ztuple[list[_T], dict[str, _T]])0r	  tree_flattenr  r   r   GeneratorStater!   rm   r  r  r   create_symintnoder]	  r  rK  rd  r  	constantstorchbind_constantsr_	  	get_valuer  r  irr  r   r   r"  default_generatorsclone_stater   r  r  r]  r  r
   r  _higher_order_opseffectswith_effectsr0   r5   r1   r   r   Tensor	is_sparsedisable_cudagraphs_reason)r	  r	  r   r   binded_args	args_flattensor_argsnon_tensor_argsr{	  r	  r   example_argsdevice_indexnew_args
new_kwargsexample_outputr	  r   node_meta_valctxexample_out_lir  msgr  r	  r	  s                           @@r   process_kernelExternKernel.process_kernel  s     $6%22;?	9%'C  3'O
30O,O R ""3'c4((''**44FFsQUFVC&&s+ 	:)	:@L	:+	: 	: 6AA[((+[A A$Q''%a5  	 	 A a**qzz|qww?P?P/P##AGG$5$5ajjl$CDq(++JJLAGG$?$??##AGG$?$?

$MNA//##AKKM2Au11@@AA xx~~xx}}.<3KKK##JJ11,?KKM ##$5aT$JK' *  .lL8Z8JN---9-NN//33E:M0;C~~$$(?(?(G(G(T(TT -a 0<Q^^L	1>>>J  9>=! ntUm<<  	
  A!U\\**q{{{]"#''"6"6";";"?"?t"TT;T E!2;-@C471   
 	
O Bj s   2S%7S**
S8c                \   [        U[        5      (       d   [        U5      5       e[        U[        5      (       a  U$ UR	                  5       n[
        R                  R                  UR                  5       5      nUc   eUR                  5       nUb  SUR                  ;   a  [        U[        [        [        45      (       a  [        UR                  [        5      (       a  UR                  S   R                  [         R"                  S9(       d/  UR                  S   R                  [         R$                  S9(       a)  UR'                  [)        UR+                  5       5      5        OUR-                  5         [.        R0                  " UR+                  5       SS9u  pVUS   nUR3                  5       " U5      n[
        R                  R4                  R7                  X5      n[
        R                  R4                  R9                  X5      n	[
        R                  R4                  R;                  X5      n
[=        Xy5      U
-   nX:w  a  [>        RA                  SU	U
U5        [B        e[        URD                  [G        URI                  5       URK                  5       UR+                  5       U	U
SS9S	9$ )
z
In order to pass this to an extern kernel we need a
ReinterpretView not a View.  This allows us to avoid some
unneeded copies.
r'  ry	  r  r|   r   z@convert_to_reinterpret_view failed: stride=%s offset=%s index=%sFr  rH  )&r   rd  r   rO  r/  rm   r  r  r  r  r]  rY  r  rJ  r  rP  r  r8  r:  r@  r-   r
  r2  rB   r  r  r  r  stride_vars
offset_varre   r  r  r  rI  rL  r  r  )r	  r   x_unwrap_viewr  x_unwrap_view_fx_node
index_argsrX  r  r   rQ  rM  expecteds               r   convert_to_reinterpret_view(ExternKernel.convert_to_reinterpret_view  s4    !X&&/Q/&a))H gg  !7!7!9: # 3 3 5 "-.333=?FJ*OPP=//@@%**51??"'"5"5 @  )--e4BB"'"8"8 C 
 77.}/E/E/GH '')!-!@!@JJL"

  ]
 ,  55eH''""..uA!!,,U?Z1F:IIR	 &%,,.kkmZZ\

 
	
r   c                   Uc
  [        5       $ [        U[        [        R                  R
                  R                  [        45      (       a	  [        US9$ [        U[        5      (       aY  [        R                  R                  [        R                  " UR                  UR!                  5       UR#                  5       S95      $ [        U[$        5      (       a  U$ [        U[&        5      (       a  U R)                  UR*                  5      $ [        U[,        5      (       a1  [-        U R)                  UR*                  5      UR/                  5       S9$ [        U[0        5      (       a@  UR3                  5         [5        UR7                  5       5      (       a   U R9                  U5      $ [        U[<        5      (       a  UR3                  5         U$ [        U[>        [        45      (       a  U$ U RA                  U5      $ ! [:         a     Naf = f)N)r  )r  r  rH  )!r  r   r!   r   r   r   r   r   r   r  rm   r  add_tensor_constantr  rP  r  r  r  rb  r   r]	  rI  rO  r  rd  r  r  r/  r
  r  rX  NonTensorObjr	  r`	  s     r   r]	  ExternKernel.realize_inputC  s   9'))a$ 3 3 ; ;SABB(a00a""77..QWWAKKM!,,.Q  a((Ha##$$QVV,,a))"&&qvv.q||~  a""IIK$Q]]_55::1== a$$IIKHa,(=>??H~~a   + s   G: :
HHc                    [        U5      (       a@  [        UR                  5       5      S:X  a  U$ UR                  5        H  nUS:X  d  M  Us  $    U R                  U5      $ r  )r  r   r/  r	  )r	  r   r  s      r   require_stride1ExternKernel.require_stride1d  sR     ##1<<>"a',,.Q;H ) ~~a  r   c                
   Uc  Uc   eUR                  5       S;   a	  U(       d  U$ [        U5      (       Ga/  [        UR                  5       [        5      (       a  U(       a  [        X5      =(       a(    [        UR                  5       R                  5      (       + n[        USSU(       aJ  [        [        R                  R                  R                  UR                  5       R                  5      5      OUUS9  U$ [        USSS UUS9  U$ [        UR                  5       [        [        45      (       ay  U(       a$  UR                  5       R!                  U5      (       d>  U(       aG  [#        X1R                  5       R                  UR%                  5       5      (       a  Ub  ['        X5      $ U$ [        UR                  5       =n[(        5      (       a  [        UR+                  5       =n[        5      (       a  [-        S5      e[        U[        5      (       aO  U(       a  UR!                  U5      (       d0  U(       a+  [#        X7R                  UR%                  5       5      (       a  U$ [        U[.        5      (       ak  U(       a$  UR                  5       R!                  U5      (       d>  U(       a9  [#        X1R                  5       R                  UR%                  5       5      (       a  U$ [        U[0        5      (       a  [        UR2                  [4        5      (       a  [        UR2                  [6        5      (       d  [        UR9                  5       =n5      (       a  [;        US5      (       ao  [        UR2                  [<        5      (       dP   U R?                  UR2                  5      Ul        U(       a  U RA                  XUS9$ U(       a  U RC                  XUS9$  S n	UR%                  5       n
Ub  [        R                  R                  n[G        [I        UR%                  5       5      5       Vs/ s HJ  nURK                  X<   S	5      (       d  M  URM                  UR%                  5       U   S
5      (       d  MH  UPML     n	nU	 H.  n[N        RP                  RR                  RU                  XS	S5      nM0     U RW                  U5      n[        USSUUUS9  U(       a  [        X5      (       d   e U$ U	(       a<  U
b  Uc   e[N        RP                  RR                  RY                  X5      n['        X5      $ U$ ! [D         a     GNgf = fs  snf )N)r   r@   TF)rK  rU  rV  r6  rT  zHthe MutationLayoutSHOULDREMOVE's real layout shouldn't be FlexibleLayoutrI  rW  r   r   r@   )-r  r  r   r  r  r]  r3   r  rK  r   rm   r  r  size_hints_or_throwrL  ri  r\  rA  r
  rV  r  r  r  ry  r   rI  rd  rO  r/  r  r	  r
  require_stride_orderrequire_exact_stridesr  r   r   r9  rj  r  r  loweringslice_r	  r  )r	  r   r   rD  r6  use_current_stride_ordermutation_layoutr  r/  expanded_dims	orig_sizer  r   r>  s                 r   require_stridesExternKernel.require_stridesn  s     M$===;;=F"=H !##!,,..99 0R0 0K3ALLN4I4IJJ - *#(-  8 - ! 0 0 D D$%LLN$9$9!" "'&3 H *#(-%)&3&3 HALLN[/,JKK1<<>;;EBB!1%||~'<'<ajjl  %0 4AE 
 $%LLN25O  $3$?$?$AA[N  )b   [99{<<UCC%5)+=+=qzz| 
 H a%%q||~77>>-!<<>#8#8!**, 
 Hq)$$1668,,qvv77%Q]]_&DkEEV,,{//1BCC88@33 4   #44 5   # .2JJL	$ww''H s1::<011A33M4DaH  11!**,q/1E 1   %OO,,33AAqA %
 NN1!''	
 5a????  (]-FFF((//=A21DDW ' s*   
6T3 T3 ,U&U7U3
U Uc                "    U R                  XUS9$ )N)rD  r6  r-
  )r	  r   rD  r6  s       r   r&
  "ExternKernel.require_exact_strides  s!     ""- # 
 	
r   c                "    U R                  XUS9$ )N)r   r6  r0
  )r	  r   r   r6  s       r   r%
  !ExternKernel.require_stride_order  s     ""1"OOr   c                .    U R                  U[        5      $ r   )r%
  r9  r`	  s     r   require_channels_last"ExternKernel.require_channels_last   s    ''+<==r   c                .    U R                  U[        5      $ r   )r%
  r;  r`	  s     r   require_channels_last_3d%ExternKernel.require_channels_last_3d$  s    ''+=>>r   c                    SS jnU" U5      (       a  U$ U R                  U[        R                  UR                  5       5      5      $ )Nc                     U R                  5       nU[        R                  R
                  ;   =(       a'    [        R                  R
                  U   R                  $ ! [        [        4 a     gf = fr  )r  AttributeErrorr  rm   r  r	  	is_mkldnn)r   r   s     r   is_mkldnn_tensor9ExternKernel.require_contiguous.<locals>.is_mkldnn_tensor*  s]    zz| 177,,,R1B1B41H1R1RR #$78 s   A A0/A0r   r   r   r   r&
  r  r  r
  )r	  r   r>
  s      r   r   ExternKernel.require_contiguous(  sC    	S AH,,>44QZZ\B r   c                h    U R                  U[        R                  UR                  5       5      5      $ r   rA
  r`	  s     r   require_contiguous_strides'ExternKernel.require_contiguous_strides:  s-     ((~00>
 	
r   c                    g r   r   r  s    r   r	  ExternKernel.apply_constraintB  r*  r   c                   [        U[        5      (       d   [        U5      5       e[        U[        5      (       d  [        U5      nU R                  (       d   S5       e[        U5      n[        U R                  5      nX4:  aq  [        R                  SU R                  XC-
  5        [        X45       H?  nU R                  U   S   nUR                  Xb;   a  X&   OU R                  U   S   5        MA     U$ )Nz/ExternKernel.arg_properties should not be emptyzv%s has %d unprovided positional arguments. Will check if they are in the keyword arguments or will use default values.r   rJ  )r   r   r   r   r	  r   r  r  r	  r   r  )r  r   r   n_args
n_pos_argsr   arg_names          r   fill_non_provided_args#ExternKernel.fill_non_provided_argsE  s     $))54:5)$%%:D""U$UU"T,,-
 II^  #	 6...q1&9) $,,Q/@ / r   c                   [         R                  R                  (       Ga`  / nS nU(       ae  U R                  (       aT  [	        U R
                  5      [	        U5      :X  d   S5       eU R                   Vs0 s H  oDR                  S5      U_M     nn[        U R
                  5       H  u  pVUb3  Uc   eUR                  X   5      nU(       a  UR                  S5      OS nOb[	        U R                  5      U-   n	U R                  (       a7  U	[	        U R                  5      :  a  U R                  U	   R                  S5      OS nUR                  [         R                  R                  R                  Xh5      5        M     U$ U R
                   V
s/ s H,  n
[         R                  R                  R                  U
5      PM.     sn
$ s  snf s  sn
f )NzDnames passed to codegen_const_args does not match self.constant_argsr   r   )rm   r  r	  r	  r   r	  r  r   re  r  rJ  val_to_arg_str)r  r  r  name_to_arg_propertiesr{	  r   r   proptype_r   r  s              r   codegen_const_argsExternKernel.codegen_const_argsi  s   77F
 &*",,4--.#e*< Z< 594G4G*4GSGGFOS(4G ' * "$"4"45)5 ,,,155eh?D04DHHV,$Edkk*Q.C  ..3T=P=P9Q3Q ++C044V<! 
 agg22AA!KL 6 MDHDVDVWDVqAGG((77:DVWW'*& Xs   4G3G
c                    [         R                  R                  (       aD  U R                  b7  U R	                  / U R
                  QU R                  QU R                  5      nSnOU R
                  nSn/ n[        U5       H  u  pE[         R                  R                  (       a  U R                  (       a  U[        U R                  5      :  d   S5       eU R                  U   R                  S5      nUR                  [         R                  R                  R                  XV5      5        M  UR                  [         R                  R                  R                  U5      5        M     U(       a  UR                  U R!                  5       5        U$ )NFTz-Invalid access to ExternKernel.arg_propertiesr   )rm   r  r	  r	  rL
  re  r	  r   r   r	  r   r  r  rJ  rO
  ru  rS
  )r  re  need_codegen_constant_argsr   r   r   rR
  s          r   codegen_argsExternKernel.codegen_args  s*   774#3#3#?003$++3 2 23T[[F */&[[F)-&f%DAww""**q3t7J7J3K/K CK ++A.226:AGG00??IJAGG00??BC & &KK//12r   c                    X;   a  UR                  U5      $ XR                  ;   a  U R                  R                  U5      $ U R                  R                  U5      =nb  UR                  S5      $ [        U S35      e)zGiven an argument name, queries for values in (in order):
1. any provided kwargs for this function.
2. the class self.kwargs member.
3. any available default arguments in self.allarg_properties.rJ  z not in self.allarg_properties)r  r   r	  r  )r  rK
  r   r{	  s       r   get_kwargs_valueExternKernel.get_kwargs_value  st    
 ::h''{{";;??8,,))--h77CD77?++z)GHIIr   c           	        [         R                  R                  (       a  U R                  b  [	        U R
                  5      S:X  a  / $ / nU R                   H  nU(       a  US:X  a  M  U R                  U5      n[        U[        5      (       a  UR                  U5        MK  U R                  c   eU R                  R                  U0 5      R                  S5      nUR                  [         R                  R                  R                  XE5      5        M     U$ U R                  R!                  5        VVs/ s H3  u  pdU S[         R                  R                  R                  U5       3PM5     nnnU$ s  snnf )Nr   r   r   r  )rm   r  r	  r	  r   r	  r	  rZ
  r   r!   r  r	  r  rJ  rO
  r   rW  )r  skip_outr   rK
  rP  rR
  ks          r   codegen_kwargsExternKernel.codegen_kwargs  s8   77+D4F4F0G10L	F >>E 1))(3a&&MM!$11=== 2266xDHHPEMM!''"6"6"E"Ea"OP ?"  !KK--//DA #Qqww++::1=>?/   	s   6:E5c                    U R                   bS  U R                   R                  n[        USS5      nUR                  SS5      nUR	                  SS5      S   nU SU 3nU$ SnU$ )	Nr   unknown_namespacer	  r	  r	  r@   r   
unknown_op)r	  r  r   r	  rsplit)r  r  op_namespaceop_names       r   get_op_nameExternKernel.get_op_name  sv    <<#\\((F"6<9LML'//'BL'..sA6q9L%ax0G  #Gr   c                   [         R                  (       a  [        R                  R                  (       d  [        U R                  5       5      S:X  a  g [        R                  R                  R                  U R                  5       5      n[        R                  R                  R                  U R                  5       5      nU R                  5       nUR                  SU R                  5        SU SU SU< S3	5        g g g )Nr   zassert_size_stride(r  r  )rA   size_assertsrm   r  r	  rh   r
  rJ  codegen_shape_tupler/  rg
  rL  r  )r  r	  r  r  rf
  s        r   codegen_size_asserts!ExternKernel.codegen_size_asserts  s    qww':':T]]_-277'';;DMMOLDWW))==doo>OPF&&(G%dmmo%6bb7+UVW (;r   c           	     j   [         R                  (       a  [        R                  R                  (       d~  U R                  5       nU[        R                  R                  ;  nU R                  5       nU(       a!  UR                  SU S[         SU< S35        g UR                  SU SU S35        g g g )Nzassert_alignment(r  r  z	# buffer z (op: z) is assumed to be not aligned)
rA   alignment_assertsrm   r  r	  r  ra  rg
  rL  ra   )r  r	  r   alignedrf
  s        r   codegen_alignment_asserts&ExternKernel.codegen_alignment_asserts  s    ##AGG,?,?==?D!''";";;G&&(G!!'vR/@7+QO !!vVG94RS -@#r   c                    [         R                  R                  (       a  [        R                  R
                  (       a  gUR                  5         U R                  5       nUR                  SU SU S35        g)zS
Track outputs of fallback operators if config.test_configs.track_memory_lifecycle
Nztrack_tensor(z, 'z'))	rA   test_configstrack_memory_lifecyclerm   r  r	  "write_memory_track_allocation_oncer  rL  )r  r	  r   s      r   codegen_memory_tracking$ExternKernel.codegen_memory_tracking  sV     ""99QWW=P=P224}}M$s4&;<r   c                N    U R                  5       nU R                  5       nU/ /U4$ )z4
get output sizes and strides, for template_codegen
)r
  r/  )r  r  r  s      r   get_group_strideExternKernel.get_group_stride  s*     //#r{G##r   c                   [         R                  R                  nU R                  5       nU R	                  5       nU Vs/ s H  oAR                  U5      PM     nn[        [        U5      5       Vs/ s H  n[        SU 35      PM     nn[        [        [        U5      5      UR                  SS9n[        U5       VV	s0 s H  u  pX_M	     n
nn	[        [        U
5      5       Vs/ s H  oZU   PM	     nnU Vs/ s H  oVU   PM	     nnU R                  5       nU" U5      n[         R                  R                  R                  XbU/5      u  pn[        S5      u  nn[        [!        Xo" U Vs/ s H  nU" U5      PM     sn5      5      5      n[#        [$        R&                  " U5      U5      nU[)        U5      4$ s  snf s  snf s  sn	nf s  snf s  snf s  snf )z3
Manually get canonicalization of the output index
r	  T)r  r  c)rm   r  r  r
  r/  r	  r   r   rf   r  rA  r   r  re  rL   r   r   ri   r   r  r   )r  r  r.  rQ  r   r   rO  index_orderr   r   r   r   rX  r   	new_sizesr   rj  r   add_varreplacements                       r   canonicalizeExternKernel.canonicalize  s   
 77##//#29:'Q%%a(':;@U;LM;La(1QC1;L
MU3w<0g6I6ISWX+4[+AB+Axs#(+AB$)#f+$67$6q$67-23UmU
3##%
#%&WW%5%5%E%Ew&
"	F !%
73z7	3R	1GAJ	3R+STU5<<.<eI&&&+ ;M C73 4Ss#   F=1G G'G;G/Gc                    U(       a  [         O[        n[        R                  X5      nU R                   H  nX2" U5      -  nM     U R
                  R                  5        H  nX2" U5      -  nM     U$ r   )maybe_free_unbacked_symbolsmaybe_free_symbolsrx  rW  r	  r   r   )r  rd  maybe_get_symbolsr  r{	  s        r   rW  !ExternKernel.get_free_symbol_uses#  sp     ,9'>P 	 --dB%%C"3''A &;;%%'C"3''A (r   c           
     ,   [        U SS 5      nSU< 3/nU[        R                  " U 5       Vs/ s H'  nUR                   S[        XR                  5       3PM)     sn-  nUR	                  SU R
                  < 35        U R                  U5      $ s  snf )Nr	  zpython_kernel_name=r  r  )r   r  fieldsr   r  rq  r  )r  kernel_namer  r  s       r   r  ExternKernel.__str__3  s    d$8$?!+1
 	$++D1
1 zzl!GD**5671
 	
 	|D$4$4#789u%%
s   .B)r	  r	  r	  r	  r	  r   r	  r	  r	  r	  r	  r	  r	  r   NNNNr   N)r   r  rJ  r  re  rN	  r	  r  r   Optional[dict[str, Any]]r	  r	  r	  r  r	  r  r	  r	  r	  r	  r   r   r  r  rt  r	  rr   r   r   r   r	  r  r   r   )r	  r  r   r   r  )r   r   r   r:  )r	  r{   r   r   r   r   r   zituple[Any, list[Any], list[Any], Callable[[Any, Any], Any], Optional[dict[sympy.Symbol, pytree.KeyPath]]])r   r   r   rO  rl	  )NNF)
r   r   r   Optional[Sequence[int]]rD  r  r6  r   r   r   r  )r   r   rD  r  r6  r   r   r   )r   r   r   r  r6  r   r   r   )r   r  r   r	  r   r  )r  rn  r   r   r   r   )rK
  r   r   r   r   r   )r]
  r   r   r   )r   z'tuple[list[Sequence[Expr]], list[Expr]])r   ztuple[Expr, Sequence[Expr]]r  )@r   r   r   r   r  r	  r   r  r  r   r   r	  r	  r	  r   r	  r	  r	  r	  r	  r	  r	  r  r  r  r	  r  r	  r	  r	  r	  r	  r  r	  r?  r
  r
  r]	  r!
  r-
  r&
  r%
  r5
  r8
  r   rD
  r	  rL
  rS
  rW
  rZ
  r_
  rg
  rl
  rq
  rw
  rz
  r
  rX   rW  r  r>  r   rA  rB  s   @r   r  r    s   
 $&M=%(..tDFND-1K*1(,,%)O]) 4?3D3D4!=  +/K'.59N293>3D3D40  =A9@<G<M<M=9  .9->->t-T*T (*+/15,0)-79.2,, , :	,
 %, ), /, *, ', (5, ,, 
, ,<.#$J!
-
";0+& 
 
 w
!w
*-w
9<w

w
 w
r C
 C
J ! !@ ! !  *.6:#aa 'a 4	a
 a 
a aF QV

'9
JN
	
 
 DIPP,P=AP	P P
 > > ? ?  " 
 
"!"+9"	"HXB4J4	
	=$'> N+$)!	! ,
& Hr   r  c                  x   ^  \ rS rSrSS jr       S                   SU 4S jjjrS	S jrSrU =r$ )
ExternKernelOutiB  c                &    UR                  U 5        g r   )generate_extern_kernel_outr	  s     r   r	  ExternKernelOut.codegenD  s    **40r   c
                :  > U R                  U5      n
[        U
[        5      (       d   [        U
5      5       e[        TU ]  S UU
UU=(       d    0 S UUUU	5
        [        R                  R                  U 5      U l	        [        R                  R                  U 5        g r   )r  r   r   r   r  r  rm   r  r  r   r  )r  rJ  re  r	  r   r	  r	  r	  r	  r	  unwrapped_inputsr  s              r   r  ExternKernelOut.__init__G  s      ..v6*H55Mt<L7MM5Lb)	
 GG++D1		""4(r   c                    gr'  r   r  s    r   r  ExternKernelOut.should_allocated  r  r   r  r
  r
  )rJ  r  re  r  r	  r  r   r
  r	  r	  r	  r  r	  r  r	  r  r	  r	  r   r   r  )	r   r   r   r   r	  r  r  r   rA  rB  s   @r   r
  r
  B  s    1 (*+/15,0)-79.2)) !) %	)
 )) /) *) ') (5) ,) 
) ): r   r
  c                  ,   ^  \ rS rSrSU 4S jjrSrU =r$ )RandomSeedsih  c           	       > [         R                  " [         R                  5      n[        TU ]  [        U[         R                  U/S9/ UR                  UR                  U//SS[        R                  R                  S9  g )Nr  zaten.randint.low_outzat::_ops::randint_low_out::call)rJ  re  r	  r	  r	  r	  )r  rC  r  r  r  rL  ru  r%  r	  randintlow_out)r  countr  limitsr  s       r   r  RandomSeeds.__init__i  sl    U[[)kkW
 !::vzzE7;5 >,, 	 	
r   r   )r
  r   r  r  r   r   r   r   r   r   r  r   rA  rB  s   @r   r
  r
  h  s    
 
r   r
  c                  |   ^  \ rS rSrSS jr      S                 S	U 4S jjjrS
S jrSS jrSrU =r	$ )r	  i|  c                &    UR                  U 5        g r   )generate_extern_kernel_allocr	  s     r   r	  ExternKernelAlloc.codegen}      ,,T2r   c	                Z  > U R                  U5      n	[        S U	 5       5      (       d   e[        T
U ]  S U[	        [
        [           U	5      UU=(       d    0 S UUUU5
        / U l        [        R                  R                  U 5      U l        [        R                  R                  U 5        g )Nc              3  B   #    U  H  n[        U[        5      v   M     g 7fr   rI	  )r   r   s     r   r   -ExternKernelAlloc.__init__.<locals>.<genexpr>  s     C2BQ:a((2Brb  )r  r   r  r  r   r   r   r  rm   r  r  r   r  )r  rJ  re  r	  r   r	  r	  r	  r	  r
  r  s             r   r  ExternKernelAlloc.__init__  s      ..v6C2BCCCCC&!#34Lb)	
 ')GG++D1		""4(r   c                    gr  r   r  s    r   r  !ExternKernelAlloc.should_allocate  r	  r   c                    [         er   r  r  s    r   r	  "ExternKernelAlloc.apply_constraint  r  r   )r   r  r
  )r   NNNr   N)rJ  r  re  r  r	  r  r   r
  r	  r  r	  r  r	  r  r	  r	  r   r   r  rt  )
r   r   r   r   r	  r  r  r	  r   rA  rB  s   @r   r	  r	  |  s    3 (*+/,0)-79.2)) !) %	)
 )) *) ') (5) ,) 
) )@" "r   r	  c                  h   ^  \ rS rSrSr        S	U 4S jjrS
S jrSS jrSS jrSS jr	Sr
U =r$ )r  i  zH
An output buffer that represents the mutation of a pre-existing buffer
c                   > [         TU ]  S US9  UR                  5       n[        R                  R                  U5        U/U l        X0l        [        R                  R                  U 5      U l	        g r  )
r  r  r  rm   r  r  mutation_namesmutating_noder  r   )r  rJ  mutated_noder
  mutated_node_namer  s        r   r  MutationOutput.__init__  s`     	d62(113	##$5601(5GG++D1	r   c                    U R                   $ r   )r
  r  s    r   r  MutationOutput.get_defining_op  s    !!!r   c                    U R                   $ r   )r
  r  s    r   ri  !MutationOutput.get_mutation_names  r  r   c                    gr  r   r  s    r   r  MutationOutput.should_allocate  r	  r   c                j    U R                  5       nS U 5        Vs/ s H
  nUc  M  UPM     sn$ s  snf )Nc              3  `   #    U  H$  n[         R                  R                  U5      v   M&     g 7fr   )rm   r  try_get_buffer)r   r   s     r   r   6MutationOutput.get_mutation_buffers.<locals>.<genexpr>  s"     P..t44   ,.)ri  )r  r
  r  s      r   get_mutation_buffers#MutationOutput.get_mutation_buffers  s@    002 QP
P P
 	
 
s   00)r
  r
  r   )rJ  r  r
  r   r
  r  r   r   r  r  r  r   r  )r   r   r   r   r  r  r  ri  r  r
  r   rA  rB  s   @r   r  r    sF    2 2062GP2	2"#
 
r   r  c                     ^  \ rS rSr% Sr0 rS\S'   \      SS j5       r\      SS j5       r	        SU 4S jjr
SS jrSS	 jrS
rU =r$ )TMADescriptori  aL  
An IR node representing a generic host-side TMA descriptor in the Triton API
Mostly useful for user-defined Triton kernels relying on host-side TMA;
but can, in principle, be used for Inductor's Triton templates, too.

See TMADescriptorExperimental and TMADescriptorStable for the two implementations
(the old API and the new API)
zdict[Any, TMADescriptor]_CACHEc                    [        U5      S:X  d   eUS   S:X  a  [        U/US   Q76 $ US   S:X  d   e[        U/US   Q76 $ )Nr   r   experimentalr@   r#  )r   TMADescriptorExperimentalTMADescriptorStable)r	  rP  tma_metas      r   _create_implTMADescriptor._create_impl  s\     8}!!!A;.(,VBhqkBBA;(***&v<<<r   c                    [        U5      U4nX0R                  ;  a  U R                  X5      U R                  U'   U R                  U   $ r   )idr
  r
  )r	  rP  r
  r  s       r   r  TMADescriptor.create  sB     &z8$jj !..v@CJJsOzz#r   c           
     8  > [         TU ]  S [        [        UUR	                  5       S95      [        [        [           U5      [        U5      S 5        Xl	        [        R                  R                  U 5      U l        [        R                  R                  U 5        g )NrH  )r  r  ri  rO  r  r   r   rY  r   rP  rm   r  r  r   r  )r  rP  re  r	  r  s       r   r  TMADescriptor.__init__  s     	 !,,. &!6*- 	
  GG++D1		""4(r   c                &    UR                  U 5        g r   )generate_tma_descriptorr	  s     r   r	  TMADescriptor.codegen      ''-r   c                    U R                   $ r   )rP  r  s    r   
get_tensorTMADescriptor.get_tensor  r  r   )r   rP  )rP  r   r
  ztuple[str, tuple[Any, ...]]r   r
  )rP  r   re  r  r	  r  r   r   r
  r  )r   r   r   r   r  r
  r   r?  r
  r  r  r	  r
  r   rA  rB  s   @r   r
  r
    s     (*F$)=='B=	= = 'B	 ))&3)DQ)	).. r   r
  c                  H   ^  \ rS rSrSr S         SU 4S jjjrSrU =r$ )r
  i  z
the new host-side TMA Descriptor API:
(the ones obtained via create_{1d,2d}_tma_descriptor calls).

See also TMADescriptorStable for the new API.
c                ^  > [        U5      S;   d   e[        U5      [        U5      :X  d   eUc  UR                  5       R                  nX l        X0l        X@l        [        U R                  5      U l        U/n/ U R                  QU R                  QU R
                  Pn[        TU ]!  UUUS9  g )N)r@   r   rP  re  r	  )	r   r  r  r  
block_dimselement_sizer  r  r  )r  rP  r  r
  r
  re  r	  r  s          r   r  "TMADescriptorExperimental.__init__  s     4yF"""4yC
O+++!++-66L	$(		N	
YY
__
 
 	' 	 	
r   )r
  r  r
  r  r   )
rP  r   r  list[Union[int, torch.SymInt]]r
  r
  r
  r  r   r   r   r   r   r   r  r  r   rA  rB  s   @r   r
  r
    sG     '+

 -
 3	

 $
 

 
r   r
  c                  0   ^  \ rS rSrSrSU 4S jjrSrU =r$ )r
  i0  z
the new host-side TMA descriptor API
(the ones obtained via TensorDescriptor.from_tensor).

See also TMADescriptorExperimental for the old API.
c                2   > X l         [        TU ]	  UU/US9  g )Nr
  )block_shaper  r  )r  rP  r
  r  s      r   r  TMADescriptorStable.__init__8  s&    &8% 	 	
r   )r
  )rP  r   r
  r
  r
  rB  s   @r   r
  r
  0  s    
 
r   r
  c                  J   ^  \ rS rSr          SU 4S jjrSS jrSrU =r$ )SubgraphBufferiB  c                  > [         T
U ]  S X5        X0l        X@l        [        R
                  R                  U 5      U l        [        R
                  R                  U 5        [        R
                  R                  U R                  XE5      U l
        [        U R                  5      (       d   e[        U R                  5      nU HT  nXpR                  R                  UR                  '   U R                  R                  R!                  UR                  5        MV     U Vs/ s H  oR                  PM     snU l        SS KJs  Jn	  [        R*                  " U R                  5         U	R-                  SSSS9   U R                  R.                  " U R                  6   S S S 5        S S S 5        g s  snf ! , (       d  f       N= f! , (       d  f       g = f)Nr   FATEN)max_autotunemax_autotune_gemmmax_autotune_gemm_backends)r  r  r_  example_inputsrm   r  r  r   r  make_subgraphsubgraphrL	  re  rh  r  graph_input_namesr  
sym_inputstorch._inductor.configr  rA   set_graph_handlerr    run)r  rJ  r   r_  r
  subgraph_namer
  sym_inpsym_varinductor_configr  s             r   r  SubgraphBuffer.__init__C  sR    	v3,GG++D1		""4(--dgg~U,,,,(5
!G7>MM&&w||4MM++227<<@ " 8BBzG<<zB88  / &&""'+1 ' 
 !!4#6#67 0/	 C  0/s*   F,%G7$F1G1
F?	;G
Gc                $    " S S5      n[        U R                  5      (       d   eU R                   Vs/ s H  o3R                  5       PM     nnUR                  U" U R                  5      / U R
                  QUQU R                  /5        g s  snf )Nc                      \ rS rSrSS jrSrg),SubgraphBuffer.codegen.<locals>.CodegenGraphih  c                2    Xl         UR                  U l        g r   r  r   )r  r  s     r   r  5SubgraphBuffer.codegen.<locals>.CodegenGraph.__init__i  s    "
!JJ	r   r  N)r  rs   )r   r   r   r   r  r   r   r   r   CodegenGraphr   h  s    'r   r  )rL	  re  r  'codegen_subgraph_with_flattened_outputsr
  r
  r   )r  r	  r  r  outer_inputss        r   r	  SubgraphBuffer.codegeng  s|    	' 	'
  ,,,,7;{{C{!++-{C77'-doo--YYK	
 Ds   B)r
  r_  r   r
  r
  )
rJ  r  r   r  r_  torch.fx.GraphModuler
  	list[Any]r
  r   r
  r   r   r   r   r  r	  r   rA  rB  s   @r   r
  r
  B  sC    "8"8 ""8 !	"8
 ""8 "8H
 
r   r
  c                     ^  \ rS rSrS
S jr\SS j5       r\" S 5       S   SU 4S jjj5       rSS jr	          SU 4S jjr
SS jrSS jrS	rU =r$ )UserDefinedTritonKerneliv  c                z  ^ SSK Jn  SSKJn  UR	                  U R
                  5      m/ n/ n/ n[        TU5      (       a  [        TS5      (       a&  UR                  U4S jTR                   5       5        O.[        TS5      (       d   eUR                  TR                  5        [        TS5      (       a<  TR                   H+  nUR                  TR                  R                  U   5        M-     O.[        TS5      (       d   eUR                  TR                  5        TR                   nTR                  mTX4U4$ )	Nr   )	Autotuner)kernel_side_tablerestore_idxc              3  V   >#    U  H  nTR                   R                  U   v   M      g 7fr   )r   	arg_namesr   r   r	  s     r   r   BUserDefinedTritonKernel.get_kernel_and_metadata.<locals>.<genexpr>  s$      *4FqFII''*4Fs   &)restore_value	reset_idxreset_to_zero)triton.runtime.autotunerr  *torch._higher_order_ops.triton_kernel_wrapr  
get_kernel
kernel_idxr   r  ru  r  r  r  r  r   r  r  configs)r  r  r  r  restore_value_argsreset_to_zero_argsr   r	  s          @r   get_kernel_and_metadata/UserDefinedTritonKernel.get_kernel_and_metadataw  s   6P"--doo>(*(*fi(( v}--")) *4:4F4F*  v7777"))&*>*>?v{++))A&--fii.A.A!.DE * v7777"))&*>*>?nnGYYFw4FFFr   c                  ^ SSK Jn  U R                  5       u  mnnnUR                  TUU R                  UUU R
                  5      u  nnnU R                   V	s0 s H  oU R                  U	5      _M     n
n	[        TS5      (       a  [        TS5      (       d   [        T5      5       e[        U4S jTR                   5       5      n/ n/ n/ n/ n[        R                  " U
R                  5       [        [        R                   " S5      U5      5       GH  u  nnUU;   a  U" 5       (       a  M  UR#                  U5        UR#                  U5        [%        U[&        5      (       a@  UR#                  UR)                  5       5        UR#                  UR+                  5       5        M  [%        U[,        [.        [0        [2        R4                  45      (       a-  UR#                  U5        UR#                  [        U5      5        M  UU;   a)  UR#                  S5        UR#                  [,        5        GM  UcY   U" 5       (       a)  UR#                  S5        UR#                  [,        5        GMV  UR7                  5         UR7                  5         GMy  [9        S	[        U5       S
U 35      e   U R;                  U5        UR=                  UUUUUUSU R?                  5       U R@                  RB                  S9	  gs  sn	f )QOverrides the parent member.
See https://github.com/pytorch/pytorch/issues/151692r   )triton_version_uses_attrs_dictr  
constexprsc              3  B   >#    U  H  nTR                   U   v   M     g 7fr   )r  r  s     r   r   2UserDefinedTritonKernel.codegen.<locals>.<genexpr>  s     $TBSQV%5%5a%8BSs   r  r2  NzUnsupported arg type: r  T)	arg_typesraw_argsraw_keystriton_metar$  r  original_fxnode_name)"torch._inductor.utilsr#  r  !define_user_defined_triton_kernelr   gridr	  rZ
  r  r   r;   r$  r  chainrW  r   repeatr  r   r   r  r  r   rG  r   r   r!   r  r  r	  generate_kernel_callr  r	  r   )r  r	  r#  r  r  r  new_namer*  extra_launch_argsr^
  
named_argsconstexpr_namesr   r'  raw_keys_filteredraw_args_filteredr   r{	  r	  s                     @r   r	  UserDefinedTritonKernel.codegen  s   
 	I ((*	
 55KKII
		
 261S1S
1SAt$$Q''1S 	 
 v{++0M0M 	
tP
 	
M %$T&BSBS$TT!	')')"I$4$4R$8:K L
ID# &+I+K+K$$T*$$S)#v&&C1134  1C#udEJJ!?@@C   c+( B  % 233KKO$$S)%))+%))+),B49+RPSu*UVVI
L 	W%$$&&#??$!%!2!2 	% 
	
g
s   K2c                P   > [         TU ]  U5      [        U R                  U5      -  $ r   )r  rW  r)   r.  r  s     r   rW  ,UserDefinedTritonKernel.get_free_symbol_uses  s-     w+M:=MII}>
 
 	
r   c                    [        5       $ r   r:   r  s    r   r  0UserDefinedTritonKernel.get_unbacked_symbol_defs  r  r   c          	     h  > / n0 n/ nUR                  5        H  u  p[        U	[        5      (       aX  [        R	                  U R                  U	5      5      n
X;   a  [        R                  XU   5      n
UR                  U
5        XU'   Mr  UR                  U	5        XU'   M     [        U5      S:w  d   eUS   R                  5       U l        [        U[        5      (       d   [        U5      5       e[        TU ]=  S [!        U R                  S9U[#        U5      U5        Xl        X l        U R)                  5       u  p  n[+        US5      (       d   eUR,                   Vs/ s H  oU;   d  M
  UPM     snU l        SSKJn  [        U5      S:  a  US   R4                  O0 nU" U0 UEUEU5       Vs/ s H  nUU   PM
     snU l        U R6                   Vs/ s H!  n[9        [!        U R                  S9UU 5      PM#     snU l        [<        R>                  RA                  U 5        g s  snf s  snf s  snf )Nr   r  r  )identify_mutated_tensors)!rW  r   r   rx  r^	  r]	  r
  r  r  r   r  r  r   r   r  r  r  r   r  r.  r  r  r  r	  r  r>  r   mutable_argsr  r	  rm   r  r  )r  r  r.  tma_descriptor_metadatakernel_argsre  r   r	  r^
  rP  r  r	  r  r   r{	  r>  autotuned_kwargsr  r  r  s                      r   r   UserDefinedTritonKernel.__init__  s$     "$&&(%%'DA!Y'' 99$:L:LQ:OP/%,,Q0JKAa q	$$Q'q	 ( 6{aQi**,&(++9T&\9+dkk*- 	
 %	 $ < < >A v{++++!++.
+Ck/AC+.
* 	X03Gq0@71:,,b 0;;;*:;=T
 
 ((!
( :T[[93E(!
 	
""4(%.

!
s   	H%,H%0H*(H/c                ,    [        U R                  5      $ r   )r   r	  r  s    r   r  #UserDefinedTritonKernel.get_outputs:  s    D))**r   c                    U R                   $ r   r  r  s    r   r  "UserDefinedTritonKernel.get_device=  r  r   )r  r.  r  r?  r	  r	  )r   z(tuple[Kernel, Any, list[str], list[str]]r
  r  r  r  )
r  r   r.  r   r@  r	  rA  r	  r   r   r  r  )r   r   r   r   r  r   r	  rX   rW  r  r  r  r  r   rA  rB  s   @r   r  r  v  s    G> W
 W
r 56$)
!
	!
 7
;) ;) 	;)
 "0;) $;) 
;)z+ r   r  c                  h   ^  \ rS rSrSrS	S jrS
S jrSS jrSS jr        SU 4S jjr	Sr
U =r$ )InplaceBernoulliFallbackiA  =
This needs to be a custom class to handle mutation properly
c                    [        S U R                   5       5      (       d   eS U R                   5       u  n[        R                  R                  (       a\  UR                  U R                  5        SU SSR                  [        [        U R                  5      5       SUR                   35        g UR                  U R                  5        SU SSR                  [        [        U R                  5      5       SUR                   35        g )Nc              3  B   #    U  H  n[        U[        5      v   M     g 7fr   rI	  r   r  s     r   r   3InplaceBernoulliFallback.codegen.<locals>.<genexpr>G  s     >+Q:a((+rb  c              3  ^   #    U  H#  n[        [        U5      R                  5       v   M%     g 7fr   )r   r   r  rM  s     r   r   rN  H  s"     I[VQ1133[s   +-r  r  z, NULL)r  )r   re  rm   r  r	  rL  r	  r  r  reprr	  ending)r  r	  r   s      r   r	   InplaceBernoulliFallback.codegenF  s    >$++>>>>>IT[[I77 '')*!A3b3tTEWEW;X1Y0ZZabibpbpaqr '')*!A3b3tTEWEW;X1Y0ZZ[\c\j\j[klr   c                    gr  r   r  s    r   r  (InplaceBernoulliFallback.should_allocateU  r	  r   c                &    U R                  S5      /$ r  rR	  r  s    r   ri  +InplaceBernoulliFallback.get_mutation_namesX      "##r   c                    [        5       $ r   r:   r  s    r   r  1InplaceBernoulliFallback.get_unbacked_symbol_defs[  r  r   c                R  > [         TU ]  S [        UR                  5       S9U R	                  U/5      UUS9  [
        R                  R                  UR                  5       5        [
        R                  R                  U 5      U l
        [
        R                  R                  U 5        g )Nr  r	  )r  r  r  r  r  rm   r  r  r  r  r   r  )r  r	  r   r	  r  s       r   r  !InplaceBernoulliFallback.__init__^  s     	alln-$# 	 	
 	
##AJJL1GG++D1		""4(r   r  r
  r  r  r  )r	  r{   r   r   r	  r   r   r   r   r   r   r   r  r	  r  ri  r  r  r   rA  rB  s   @r   rI  rI  A  sF    $)'),2)DG)	) )r   rI  c                     ^  \ rS rSrSrS
S jrSS jrSS jrSS jr        SU 4S jjr	\
 S       SS jj5       rS	rU =r$ )InplaceCopyFallbackin  rJ  c                N    U R                  5       u  p#nUR                  X2U5        g r   )rW
  codegen_device_copy)r  r	  r  r  non_blockings        r   r	  InplaceCopyFallback.codegens  s%    #'#4#4#6 <##Cl;r   c                    gr  r   r  s    r   r  #InplaceCopyFallback.should_allocatew  r	  r   c                &    U R                  S5      /$ r  rV  r  s    r   ri  &InplaceCopyFallback.get_mutation_namesz  rX  r   c                    [        5       $ r   r:   r  s    r   r  ,InplaceCopyFallback.get_unbacked_symbol_defs}  r  r   c           	       > [         TU ]  S UUUSSS9  [        R                  R	                  US   R                  5       5        [        R                  R                  U 5      U l        [        R                  R                  U 5        g )Nz
aten.copy_aoti_torch_copy_)r	  r	  r   )	r  r  rm   r  r  r  r  r   r  )r  rJ  re  r	  r  s       r   r  InplaceCopyFallback.__init__  sr     	+. 	 	
 	
##F1I$6$6$89GG++D1		""4(r   c                    X4 Vs/ s H  o@R                  U5      PM     nnU4n[        [        UR                  5       S9UU5      nU$ s  snf r  )r]	  r`  r  r  )r	  r  r  rc  r  re  r	  r  s           r   r  InplaceCopyFallback.create  sU     25
;
1##A&
;%$cnn./

  <s   A	r  r
  r  r  r  )rJ  r  re  r  r	  r  r   r   r  )r  r   r  r   rc  r   r   r`  )r   r   r   r   r  r	  r  ri  r  r  r?  r  r   rA  rB  s   @r   r`  r`  n  s~    <$)) !) %	)
 
)$ <A

%
59
	
 
r   r`  c                  J    \ rS rSrSrS
S jrSS jrSS jrSS jrSS jr	Sr
g	)MutatingFirstArgExternKerneli  rJ  c                   [        U R                  5      (       d   e/ S U R                   5       Q[        [        U R                  5      QnUR                  U R                  5        SSR                  U5       SUR                   35        g )Nc              3  @   #    U  H  oR                  5       v   M     g 7fr   r  rM  s     r   r   7MutatingFirstArgExternKernel.codegen.<locals>.<genexpr>  s     9[!!##[   r  r  r  )	rL	  re  r  rP  r	  rL  r	  r  rQ  )r  r	  argrefss      r   r	  $MutatingFirstArgExternKernel.codegen  s    ,,,,
9T[[9
t))*
 	##%&a		'(:';1W^^<LM	
r   c                    gr  r   r  s    r   r  ,MutatingFirstArgExternKernel.should_allocate  r	  r   c                &    U R                  S5      /$ r  rV  r  s    r   ri  /MutatingFirstArgExternKernel.get_mutation_names  rX  r   c                    [        5       $ r   r:   r  s    r   r  5MutatingFirstArgExternKernel.get_unbacked_symbol_defs  r  r   c                    gr'  r   r  s    r   has_side_effects-MutatingFirstArgExternKernel.has_side_effects  r  r   r   Nr
  r  r  r  )r   r   r   r   r  r	  r  ri  r  r  r   r   r   r   rq  rq    s     
$r   rq  c                  ,   ^  \ rS rSrSU 4S jjrSrU =r$ )ResizeStorageBytesi  c                  > [        U[        5      (       d   S5       e[        TU ]  S [	        UR                  5       S9U R                  U/5      U4S9  [        R                  R                  UR                  5       5        [        R                  R                  U 5      U l        [        R                  R                  U 5        SU l        SU l        [        U[         ["        [$        45      (       d   ['        U5      5       e[        R                  R(                  R+                  UR,                  R                  5       5        g )NzTODO: dynamic shapesr  )r	  z"inductor_ops.resize_storage_bytes_z&torch::inductor::resize_storage_bytes_)r   r   r  r  r  r  r  rm   r  r  r  r  r   r  r	  r	  rd  rX  r   r   never_reuse_buffersr  rI  )r  variabler  r  s      r   r  ResizeStorageBytes.__init__  s    (C((@*@@(h1134
+#+	 	 	
 	
##H$5$5$78GG++D1		""4("FG(Xz9$EFFVXVF	##''(>(>(@Ar   )r	  r   r	  )r  r   r  r   r   r   r
  rB  s   @r   r  r    s    B Br   r  c                  6   ^  \ rS rSrSU 4S jjrSS jrSrU =r$ )SetSourceTensorKerneli  c                  > UR                  5         [        TU ]	  UR                  5       X/S[        R
                  R                  R                  R                  S9  [        U[        [        [        45      (       d   [        U5      5       e[        R                  R                   R#                  UR$                  R'                  5       5        [        R                  R                   R#                  UR'                  5       5        [        R                  R                   R#                  U R'                  5       5        UR)                  5       n[+        [-        US9X5      [+        [-        US9X 5      /U l        g )Nz!torch.ops.aten.set_.source_Tensor)r	  r	  r  )r2  r  r  r  r  rk   r	  set_source_Tensorr   rd  rX  r   r   rm   r  r  r  rI  r  r  r  r  r	  )r  self_tensorstorage_tensorr  r  s       r   r  SetSourceTensorKernel.__init__  s   $$&%%')B		++99	 	 	
 +*i'HII 	
4L
 	
I 	
##''(8(8(A(A(CD	##''(?(?(AB	##''8**,:V4kH:V4nK!
r   c                F    U R                  S5      U R                  S5      /$ r  rV  r  s    r   ro  2SetSourceTensorKernel.get_inputs_that_alias_output  s    "DOOA$677r   r	  )r  r   r  r   r   r   r  )r   r   r   r   r  ro  r   rA  rB  s   @r   r  r    s    
(8 8r   r  c                     ^  \ rS rSrSrSS jrSS jrSS jrSS jrSSS	.               SU 4S
 jjjr	Sr
U =r$ )ScatterFallbacki  z
This needs to be a custom class to handle mutation properly.
This class handles both aten.scatter_ and aten.scatter_reduce_.
It also handle the case `src` being a scalar properly.
c           
        U R                   S   n[        R                  R                  (       a  SSS.nX#;   a  X2   n[	        U R
                  5      (       d   eU R                  (       a  S U R
                   5       u  pEnO$S U R
                   5       u  pEU R                  S   nUR                  UX@R                  S   XV/U R                  U R                  U R                  UU R                  5       5        g )	Nr  rw  rv  )r  multiplyc              3  @   #    U  H  oR                  5       v   M     g 7fr   rt  rM  s     r   r   *ScatterFallback.codegen.<locals>.<genexpr>  s     Jk2244krv  c              3  @   #    U  H  oR                  5       v   M     g 7fr   rt  rM  s     r   r   r    s     EA--//rv  r@   r   )r   rm   r  r	  rL	  re  src_is_tensorr	  generate_scatter_fallbackr	  r	  r_
  )r  r	  r  get_operator_enumr   r   r  s          r   r	  ScatterFallback.codegen  s    X&77(-6 B**2,,,,JdkkJOQsEEJQ$$Q'C))""1%u2  ##!	
r   c                    gr  r   r  s    r   r  ScatterFallback.should_allocate  r	  r   c                p    U R                   S   n[        U[        5      (       d   eUR                  5       /$ r  rP	  r  rg  s     r   ri  "ScatterFallback.get_mutation_names	  s1    kk!n#v&&&&r   c                    [        5       $ r   r:   r  s    r   r  (ScatterFallback.get_unbacked_symbol_defs  r  r   NTr  include_selfc               d  > [        U[        5      U l        U R                  (       a&  X$U4 Vs/ s H  oR                  U5      PM     n	nU4n
O$X$4 Vs/ s H  oR                  U5      PM     n	nX54n
[        TU ]  S [        UR                  5       S9U R                  U	5      U
XgS.[        U5      SS/US9  [        R                  R                  UR                  5       5        [        R                  R                  U 5      U l        [        R                  R!                  U 5        g s  snf s  snf )Nr  r  r  r  )r	  r	  r	  )r   r   r  r]	  r  r  r  r  r  r   rm   r  r  r  r  r   r  )r  r	  r   r>  r   r  r  r  r  tensorsr	  r  s              r   r  ScatterFallback.__init__  s    (Y7 78oFo))!,oGF FM78jAj))!,jGA JMalln-(<";/+3^*D# 	 		
 	
##AJJL1GG++D1		""4(% G Bs   D(D-)r   r  r
  r  r
  r  )r	  r{   r   r   r>  r   r   r   r  r   r  r  r  r   r   r   r^  rB  s   @r   r  r    s|    
0 
 !%!!)!!) !) 	!)
 !) !) !) !) 
!) !)r   r  c                  p   ^  \ rS rSrSrS	S jrS
S jrSS jrSS jr            SU 4S jjr	Sr
U =r$ )IndexPutFallbacki5  zI
This needs to be a custom class to handle mutation and indices properly
c                   [        U R                  5      (       d   eS U R                   5       tp#n/ n[        U5      n[        U R                  5       Hd  u  pxU R                  U   b  UR                  [        U5      5        M1  UR                  [        R                  R                  R                  5        Mf     UR                  " U R                  5       X%U/U R                  5       Q76   g )Nc              3  @   #    U  H  oR                  5       v   M     g 7fr   rt  rM  s     r   r   +IndexPutFallback.codegen.<locals>.<genexpr><  s     &Rk':':'<'<krv  )rL	  re  r	  r   r  r  r	  rm   r  rJ  r  generate_index_put_fallbackr	  rS
  )	r  r	  r   r   valid_indicesr  iter_valid_indicesr   r   s	            r   r	  IndexPutFallback.codegen:  s    ,,,,&Rdkk&R#]!-0dll+DA||A*t$678qww33<<=	 , 	++  "A	
9=9P9P9R	
r   c                    gr  r   r  s    r   r   IndexPutFallback.should_allocateI  r	  r   c                &    U R                  S5      /$ r  rV  r  s    r   ri  #IndexPutFallback.get_mutation_namesL  rX  r   c                    [        5       $ r   r:   r  s    r   r  )IndexPutFallback.get_unbacked_symbol_defsO  r  r   c           
       > X0l         U Vs/ s H	  ofc  M  UPM     nnX$/UQ Vs/ s H  o R                  U5      PM     nnSn	[        T
U ]  S [	        WR                  5       S9U R                  U5      U4SU	US9  [        R                  R                  U R                  S5      5        [        R                  R                  U 5      U l        [        R                  R                  U 5        g s  snf s  snf )Naoti_torch_index_put_outr  zaten.index_put_)r	  r	  r	  r   )r  r]	  r  r  r  r  r  rm   r  r  rR	  r  r   r  )r  r	  r   r  r   
accumulater   r  r  r	  r  s             r   r  IndexPutFallback.__init__R  s     $+=GqG=342M}2MN2MQ%%a(2MN4alln-(M0+# 	 	
 	
##DOOA$67GG++D1		""4( >Ns   C-C-C2)r  r   r
  r  r  r  )r	  ztorch._ops.OpOverloadr   r   r  r	  r   r  r  r   r   r   r^  rB  s   @r   r  r  5  s`    
$)*) ) 	)
 ) ) 
) )r   r  c                  2    \ rS rSr\SS j5       rSS jrSrg)
DeviceCopyil  c           
        UR                  5       (       dU  [        S UR                  5        5       5      (       a0  [        R                  R
                  (       d  UR                  U5      $ [        R                  R                  U5        UR                  5       nUc   e[        R                  R                  U5        [        S5        U4n[        R                  U5      nS nUR                  5       (       a  UR                  5       n[!        UR"                  5      =(       a    UR"                  S:H  =(       a    UnUR"                  S:H  =(       a    [!        UR"                  5      =(       a    UnU(       a%  [%        U5      (       a  SUR'                  5       l        [+        [-        UUR/                  5       UR                  5       UUS9U R1                  U5      /U5      $ )Nc              3  Z   #    U  H!  o[         R                  R                  ;   v   M#     g 7fr   )rm   r  r	  rk  s     r   r   $DeviceCopy.create.<locals>.<genexpr>q  s     G4Fq***4Fs   )+zDeviceCopy in input programr!  Tre  )r`  r   r  rA   aot_inductoruse_runtime_constant_foldingrf  rm   r  add_device_infor  r\   r  r   r
  r/  rd   r   r  r  rN  r  rL  r  r]	  )	r	  r   r  rc  x_devicer	  r  is_destination_pinnedis_source_pinneds	            r   r  DeviceCopy.createm  sm    GA4D4D4FGGG''DD''//	'<<>###	)78%++A.::<<\\^F8==!KfkkU&:K| 	 MMU"Kvfkk':K| 	  5a 8 8'+ALLN$

/ q!"

 
	
r   c                   U R                  5       n[        U5      S:X  d   eU R                  (       a2  UR                  US   U R                  R	                  5       US   5        g UR                  US   U R	                  5       US   5        g )Nr   r   r@   )rW
  r   r	  rb  r  )r  r	  r   s      r   r	  DeviceCopy.codegen  s{      "4yA~~''Q));;=tAw ''Q1G1G1I4PQ7Sr   r   N)r   r   r  r  rc  r   r   r   r
  )r   r   r   r   r?  r  r	  r   r   r   r   r  r  l  s    '
 '
RTr   r  c                     ^  \ rS rSrSrS
S jrSS jr            SU 4S jjrSS jr\	" S 5       S   SS jj5       r
SS jrS	rU =r$ )DynamicSelectStorageOffseti  a  
The result of computing a dynamic selection index is determined as follows: when the index in the
select operation is unbacked, the actual index calculation is ambiguous for negative indices
(index + size) versus non-negative indices (just index). To resolve this, we allocate an unbacked
SymInt to represent the storage offset and decompose the select operation into a call to as_strided,
computing the storage offset at runtime with this node.
c                    [        5       $ r   r:   r  s    r   r  $DynamicSelectStorageOffset.get_reads  r  r   c                    gr  r   r  s    r   r  *DynamicSelectStorageOffset.should_allocate  r	  r   c                   > [         TU ]  S [        [        R                  " S5      S9/ 5        Xl        X l        X0l        X@l        XPl	        g Nr!  r  )
r  r  r  r  r  unbacked_offset_symbolr   base_offsetbase_dim_strider  )r  r  r   r  r  r  r  s         r   r  #DynamicSelectStorageOffset.__init__  sB     	ze1DErJ '=#
&.	r   c                .    [        U R                  /5      $ r   )r;   r  r  s    r   r  3DynamicSelectStorageOffset.get_unbacked_symbol_defs  s    466788r   c                .    [        U R                  U5      $ r   )r)   r   rV  s     r   rW  /DynamicSelectStorageOffset.get_free_symbol_uses  s      

M::r   c                &    UR                  U 5        g r   )codegen_dynamic_select_indexr	  s     r   r	  "DynamicSelectStorageOffset.codegen  r
  r   )r  r  r   r  r  r  r  )r  sympy.Symbolr   r  r  Union[sympy.Symbol, int]r  r  r  r  r   r   r  r  r  r
  )r   r   r   r   r  r  r  r  r  rX   rW  r	  r   rA  rB  s   @r   r  r    s     ,  .	
 2 ' 
"9 89$);!;	!; :;
3 3r   r  c                  h   ^  \ rS rSrSrS	S jrS
S jr        SU 4S jjrSS jrSS jr	Sr
U =r$ )r   i  z3
The result of a call to aten._local_scalar_dense.
c                    [        5       $ r   r:   r  s    r   r  DynamicScalar.get_reads  r  r   c                    gr  r   r  s    r   r  DynamicScalar.should_allocate  r	  r   c                   > UR                  5         [        TU ]	  S [        [        R
                  " S5      S9U R                  U/5      5        Xl        X l        g r  )	r  r  r  r  r  r  r  symkeypath)r  r  r  rI  r  s       r   r  DynamicScalar.__init__  sI     	*ELL$78$:M:Mtf:U	
 r   c                .    [        U R                  /5      $ r   )r;   r  r  s    r   r  &DynamicScalar.get_unbacked_symbol_defs  s    488*%%r   c                &    UR                  U 5        g r   )codegen_dynamic_scalarr	  s     r   r	  DynamicScalar.codegen  s    &&t,r   )r  r  r  r  )r  r  r  zpytree.KeyPathrI  r   r   r   r  r
  )r   r   r   r   r  r  r  r  r  r	  r   rA  rB  s   @r   r   r     sF    *8@F	&- -r   r   c                     ^  \ rS rSrSrS
S jrSS jrSU 4S jjrSS jr\	" S 5       S   SS jj5       r
SS jrS	rU =r$ )r   i  z-
The result of a call to aten._assert_scalar
c                    [        5       $ r   r:   r  s    r   r  AssertScalar.get_reads  r  r   c                    gr  r   r  s    r   r  AssertScalar.should_allocate  r	  r   c                v   > [         TU ]  S [        [        R                  " S5      S9/ 5        Xl        X l        g r  )r  r  r  r  r  scalarr
  )r  r  r
  r  s      r   r  AssertScalar.__init__  s3    ell512	
 r   c                    gr'  r   r  s    r   r  AssertScalar.has_side_effects  r  r   c                .    [        U R                  U5      $ r   )r)   r  rV  s     r   rW  !AssertScalar.get_free_symbol_uses  s      ];;r   c           	        [         R                  (       d  g [        [        U R	                  SS95      5      n[
        R                  R                  (       a  g [
        R                  R                  (       a^  SU S3n[
        R                  R                  R                  U R                  SS9nUR                  SU SU R                   SU S	35        g [
        R                  R                  R                  U R                  SS9nUR                  S
U S35        UR                  S[        U R                  5       S35        UR                  U R!                  5        S35        g )NFrc  zstd::to_string(r  )r9  zif (!(z()) { throw std::runtime_error("Expected z but received " + z); }zif not (z):z    raise RuntimeError(z = None)rA   scalar_assertsr	  r	  rW  rm   r  r	  r	  rJ  codegen_cpp_sizevarr  rL  r
  codegen_python_sizevarrP  r  )r  r	  symbol
symbol_strsizevars        r   r	  AssertScalar.codegen
  s4   $$ d44454IJK77WW  *6(!4Jgg**>>e ? G 	!J488*Tfgqfrrwx gg**AAe B G 	45 7TXX7GqIJ  19:r   )r
  r  r  r  )r  ro   r
  r   r   r   r  r  r
  )r   r   r   r   r  r  r  r  r  rX   rW  r	  r   rA  rB  s   @r   r   r     sR    	 N+$)<!<	!< ,<
; ;r   r   c                  *    \ rS rSr% S\S'   S\S'   Srg)ExternKernelNodei*  r   r   zexport_schema.Noder   r   Nr   r   r   r   r  r  *  s    
I
r   r  c                    ^  \ rS rSrSr SSS.               SU 4S jjjjrSU 4S jjrSS jrSS jrSS	 jr	\
      SS
 j5       rSS jrSS jrSS jrS r\SS j5       r\
SS j5       r\SS j5       rSU 4S jjrSrU =r$ ) FallbackKerneli0  z
A class that represents a fallback kernel for handling operators that are not
directly support by inductor. It currently supports functional ops, view ops,
inplace aten ops, and mutating ops that are auto-functionalizable.
Nr	  c                 >^  [         TT ]  U[        U5      [        U5      US9  ST l        U=(       d    0 T l        [        U[        R                  R                  [        R                  R                  45      (       d   SU S[        U5       S35       eUT l        UT l        Uc  0 OUT l        T R                  c   e[        R                   R#                  T R                  5        / T l        / T l        [        T R                  [        R                  R                  5      (       a  g ST R                  R)                  5       ;   a  g T R                  R*                  n[        R,                  R.                  R1                  T R                  5      (       a-  T R&                  R3                  US   R5                  5       5        g UR6                  (       a  [9        U5      (       d  [;        SU 35      eT R                  T R<                  T R>                  5      u  pS
U 4S	 jjn
[        R,                  R.                  RA                  XU5       H  u  pU
" X5        M     g )Nr\  F#Fails to create FallbackKernel for r   not supported_c10d_functionalr   z'NYI: Can't generate FallbackKernel for c                >  >^  [        T R                  [        R                  5      (       a+  [        U[        [
        45      (       d   [        U5      5       e[        R                  " T R                  5      (       a  [        U[
        [        45      (       a   eUc  g T R                  c  g SU U4S jjn[        R                  " T R                  5      (       a  Ub  U H  nU" U5        M     g g [        R                  " T R                  5      (       d   eU" U5        g )Nc                $  > TR                   R                  U R                  5       5        TR                  c   eTR                  R                  (       a<  TR
                  R                  [        [        U R                  5       S9U T5      5        g g r  )	alias_namesr  r  
alias_infois_writer	  r  r  r  )r  infor  s    r   	add_aliasPFallbackKernel.__init__.<locals>.handle_aliasing_and_mutation.<locals>.add_alias  sj      ''

5222??++))00&z'H!TR ,r   )r  r   r   r   )
r   r   r  ListTyper   r   library_utilsis_tensor_like_typer  is_tensorlist_like_type)r  r{	  r  optional_tensor_argr  s   `   r   handle_aliasing_and_mutation=FallbackKernel.__init__.<locals>.handle_aliasing_and_mutation  s    $))U^^44!#e}55@tCy@500;; &cE4=9999{&  44TYY???/2+!"56 03 # %88CCCC#r   )r  ztorch._C.Argumentr{	  r   r   r   )!r  r  r   use_runtime_dispatchr	  r   r  r	  r	  r	  r   r	  r	  r   r	  rm   r  warn_fallbackr  r
  r   r	  _libraryrq  mutates_and_returns_first_argr  r  
is_mutabler&   r  re  r	  
zip_schema)r  rJ  r	  r
  nontensor_argsr	  r   r	  schemar   r  r  r{	  r  s   `            r   r  FallbackKernel.__init__7  s    	+.!	 	 	
 %*!!2!8bUZZ**EJJ,J,JK
 
 	X04<.W	X 
 ","Nb&&222	d556 '))+d&&

(F(FGG !1!1!6!6!88
 !!)) >>==d>N>NOO&&{1~'>'>'@A%;F%C%C%9&B  **4;;8J8JK	< --88vNID(3 Or   c                @  > [         TU ]  5       nU R                  [        R                  R
                  R                  L a]  U R                   HM  n[        U[        5      (       d  M  UR                  [        R                  " UR                  5       5      5      nMO     U$ r   )r  rH  r	  r  _prims	rng_primsgraphsafe_run_with_rng_stater	  r   r	  	with_readrB   rI  r  )r  r  r{	  r  s      r   rH  FallbackKernel.get_read_writes  sw    g-/u||55RRR))c>22"-"7"7$,,S\\^<#K * r   c           	     n    UR                  U R                  5       U R                  [        U SS 5      5      $ Nr	  )(codegen_unbacked_symbol_defs_for_outputsr  r  r   r	  s     r   codegen_unbacked_symbol_defs+FallbackKernel.codegen_unbacked_symbol_defs  s0    ??MMOT\\749Ld+S
 	
r   c                    [        U SS 5      =n(       aC  [        [        R                  R                  R
                  U5      nUc   eUR                  5       $ [        5       $ r)  )r   r6   rm   r  r  r   r  r;   r  r	  resolveds      r   r  'FallbackKernel.get_unbacked_symbol_defs  sZ     '.A4 HHH0  **,=H '''==?"<r   c                ~   [         R                   " S S5      5       n[        U R                  5      (       d   eU R                   Vs/ s H  o!" UR	                  5       5      PM     nnU R                  X0R                  5      u  pE[        R                  R                  (       a  [        U R                  [        R                  R                  5      (       a  U R                  XE5      n[!        U R                  R"                  R$                  U5       VVs/ s H8  u  pb[        R                  R&                  R)                  X&R*                  5      PM:     nnnO9U Vs/ s H,  n[        R                  R&                  R)                  U5      PM.     nnU R,                  R/                  U5        U$ s  snf s  snnf s  snf )Nc                  *    \ rS rSr% S\S'   SS jrSrg))FallbackKernel.codegen_args.<locals>.Shimi  r   refc                    U R                   $ r   )r4  r  s    r   r>  2FallbackKernel.codegen_args.<locals>.Shim.__repr__  s    xxr   r   Nr  )r   r   r   r   r   r>  r   r   r   r   Shimr3    s    H r   r7  )r  	dataclassrL	  re  r  r	  r	  rm   r  r	  r   r	  r  r	  r	  rL
  r   r	  r	  rJ  rO
  r	  r   r  )r  r7  r   r
  r   r   params          r   rW
  FallbackKernel.codegen_args  s\   				  	  
	   ,,,,<@KKHKqtA//12KH**;8J8JK77:d.>.>

@U@U#V#V..t<D !$D$4$4$<$<$F$F M MHE $$33AG M  D
 EIIDqAGG((77:DDI 	6" I
 Js   F/?F43F:c                   U (       a*  U  Vs/ s H  n[        U[        5      (       a  M  UPM     snOS nU(       aD  U (       d   eU  Vs/ s H)  oDR                  5       (       d  M  UR                  5       PM+     nnUS   $ [        U[        R                  5      (       a  UR
                  $ [        U[        [        45      (       a  [        S U 5       5      nU Vs/ s H  ow(       d  M  UPM     nn[        U5      S:X  a  US   $ U HB  n[        U[        R
                  5      (       d   e[        UR                  5      (       d  M@  Us  $    US   $ g s  snf s  snf s  snf )Nr   c              3  N   #    U  H  n[         R                  S U5      v   M     g 7fr   )r  find_devicerg  s     r   r   -FallbackKernel.find_device.<locals>.<genexpr>  s#      $=K**433^r  r@   )r   r_	  r  r  r
  r  r   r   r;   r   rd   r   )r
  r
  r  non_torch_bind_tensor_argsr{	  devices
device_setr  s           r   r=  FallbackKernel.find_device  s1     $J1:a+IQJ 	#
 &;3>S;C..BR's~~';GS1:nell33!(((ntUm44# $=K$ J -7AJ&&vJGA7|q qz!!!&%,,7777&++&&!M " 1:/ K T Bs"   EEE$E
E)Ec                    [        U R                  [        R                  R                  5      (       a  g[        U R                  5      R                  5       $ r  )r   r	  r  r	  r	  r/   r  r  s    r   r  FallbackKernel.has_side_effects  s<    d&&

(F(FGGt//0;;==r   c                .   [        U R                  [        R                  R                  [        R                  R
                  45      (       d+   SU R                   S[        U R                  5       S35       e[        U R                  [        R                  R
                  5      (       d_  SU R                  R                  5       ;  aA  U R                  R                  R                  (       a  [        U R                  5      (       a  / $ U R                  $ )Nr  r  r  r	  )r   r	  r  r	  r	  r	  r   r   r	  r  r&   r  r  s    r   ro  +FallbackKernel.get_inputs_that_alias_output  s    uzz44ejj6T6TU
 
 	
 2$2B2B1C2D$$%&n6	
 
 4++UZZ-K-KLL"$*:*:*?*?*AA  ((33&t'7'788I###r   c                P    [        U R                  5      S::  d   eU R                  $ r  )r   r
  r  s    r   ri  !FallbackKernel.get_mutation_names  s'    4&&'1,,,"""r   c           
         [         R                  SU R                  5       U R                  5        [	        U [
        5      (       d   [        U 5      5       eU R                  U R                  U R                  5      u  pU R                  X5      nU R                   Vs/ s H  nU R                  " U40 UD6PM     nnU R                  n[        R                  R                  (       d  / UQUQ$ [!        S/ 5      nUR#                  XQU5      n      SS jn[	        U[$        R&                  R(                  R*                  5      (       a#  UR-                  US   US   5      R.                  n	OUR0                  R.                  n	[3        U	5      S:X  aB  U R4                  (       a  U R4                  OU R6                  n
U	S   R8                  nU" X5      /nO:[;        XR4                  5       VVs/ s H  u  pU" UR8                  U5      PM     nnnU R                  c   e[=        U R                  5       [>        R@                  " U R                  RC                  5       UU0 S9S9n[        RD                  RG                  U5        / UQUQ$ s  snf s  snnf )	a  
ProxyExecutor Design Note
We export the ExternFallbackNodes (for custom ops) into a serialized file
and run it with a host side proxy executor to address the ABI problem
This is currently only implemented for fbcode. Eventually, we will also make this work for OSS.
Detailed design doc can be found at
https://docs.google.com/document/d/1wC4DOZFaYym2t1Esz0X5yxlLI3RDnSiyRbUus3bkJ64/edit?usp=sharing
z4Extern kernel node added for node %s with target %s.Nc           	     
   [        U [        R                  [        R                  45      (       a  Un[        U[        [
        45      (       a  [        U5      S:X  d   eUS   n[        U [        R                  5      (       aT  [        U[        5      (       d   e[        R                  R                  [        R                  " UR                  5       S9S9$ Ub   e[        R                  R                  SS9$ [        U [        R                  5      (       a  [        U R                  5       [        R                  5      (       as  [        U[        5      (       d   [!        U5      5       e[        R                  R                  U Vs/ s H%  n[        R                  " UR                  5       S9PM'     snS9$ [        U [        R"                  5      (       a  [        U R                  5       [        R                  5      (       a  Uc8  [        R                  R                  [        R$                  R                  SS9S9$ [        U[        5      (       d   e[        R                  R                  [        R$                  R                  [        R                  " UR                  5       S9S9S9$ [        U [        R&                  5      (       a  [        R                  R                  US	9$ [)        S
[!        U 5       35      es  snf )Nr@   r   r  )	as_tensorT)as_none)
as_tensors)as_optional_tensor)as_intzUnsupported return type )r   r  
TensorTypeNoneTyper   r   r   r   export_schemarp   r  TensorArgumentr  r  getElementTyper   r   OptionalTypeOptionalTensorArgumentIntTypeRuntimeError)return_typerX  r   s      r   handle_single_outputFFallbackKernel.export_extern_kernel_node.<locals>.handle_single_output0  s    +(8(8%..'IJJftUm44v;!+++ )Ck5+;+;<<%c62222(1188"/">">CLLN"S 9   ;&;(11888FFK88Z**,e.>.>> > "&(33AT&\A3$--44 $* #)C &44#,,.I#)  5   K););<<**,e.>.>B B >(1188+8+O+O+V+V$( ,W , 9   &ff5555(1188+8+O+O+V+V&3&B&B%+__%6' ,W , 9   K77$--44F4CC"%=d;>O=P#QRR7 s   ,L r   r@   )r  re  r  metadata)r   r   )rY  z6Union[torch.TensorType, torch.ListType, torch.JitType]rX  Union[IRNode, Sequence[IRNode]]r   zexport_schema.Argument)$r  r  r  r	  r   r  r   r	  re  r	  rL
  r	  rZ
  rm   r  aot_moder%   serialize_inputsr  r	  	torchbindCallTorchBindr   returnsr	  r   r  r	  r	  r   r  rR  r9   r   extern_kernel_nodesr  )r  r   r   r  ordered_kwargsr  
serializernamed_argumentsrZ  rb  r  rY  output_argumentsreturn_schemarX  r   s                   r   export_extern_kernel_node(FallbackKernel.export_extern_kernel_node  sl    			BMMO	
 $//;d;/**4;;8J8JK**48 99
9 !!#009 	 
 !!ww+T+N++*44
$55fFK3	SO3	S33	S $3	Sj fe55??MMNNmmDGT!W5==Gnn,,Gw<1 '+lldll8M8MG!!*..K 4[ JK .1,,-G 
 .H)M	 %!++ .H    +++##'',,.&(	
 	
$$T*''''K
` s   J*J
c                  ^ ^^ T R                   nUc   eUR                  S:X  a  [        U[        R                  R
                  5      (       d   [        U5      5       e[        R                  R                  (       a2  SSK
Jn  [        U5      U;  a  [        R                  SU5        ST l        OUR                  S:X  a:  [        U[        R                  R
                  5      (       d   [        U5      5       eOA[        R                  R                  (       a"  U[         R"                  R$                  ;  T l        [        R                  R                  (       a  [        U[        R                  R
                  5      (       a  T R                  (       d  SU4S jjmT R'                  T R(                  T R*                  5      u  nm[,        R.                  " UUU 4S	 jT R0                   5       5      n[3        U4S
 j[5        XRR6                  R8                  5       5       5      T l        T R;                  U5        T R                  (       a  T R=                  5       nT R>                  c   eT R                   c   eURA                  T RC                  5       T R>                  U 4S jT R                   UT RD                  (       a  T RD                  OT RF                  5        OcURI                  T 5        [        T RJ                  [L        5      (       a3  T RO                  U5        T RQ                  U5        T RS                  U5        T RU                  U5        g)r"  Nr	  r   )inductor_fallback_opszG%s is missing a c-shim implementation, using proxy executor as fallbackT
_quantizedc                   > [        U [        R                  5      (       a  T" U R                  5       5      $ [        U [        R                  5      $ r   )r   r  rU  rT  
NumberType)r  	is_numbers    r   rp  )FallbackKernel.codegen.<locals>.is_number  s=    a!3!344$Q%5%5%788!!U%5%566r   c              3  J   >#    U  H  nTR                   " U40 TD6v   M     g 7fr   )rZ
  )r   r^
  r   r  s     r   r   )FallbackKernel.codegen.<locals>.<genexpr>  s(      ? ))!6v6?s    #c              3  z   >#    U  H0  u  p[        U[        5      =(       a    T" UR                  5      v   M2     g 7fr   )r   complexr	  )r   rP  r  rp  s      r   r   rs    s2      ,DDA 1g&A9Q[[+AADs   8;c                 H   > / T R                  5       QT R                  5       Q$ r   )rW
  r_
  r  s   r   r  (FallbackKernel.codegen.<locals>.<lambda>  s"    F$++-F0C0C0EFr   )r  ztorch.JitTyper   r   )+r	  r	  r   r  r	  r	  r   rm   r  r	  torchgen.aoti.fallback_opsrl  r   r  r  r  rA   r  custom_ops_to_c_shimsr	  re  r	  r  r/  r	  rt  r   r	  r	  r	  ri  r	  ,generate_fallback_kernel_with_runtime_lookupr  r  r	  generate_fallback_kernelrJ  r  rl
  rq
  rw
  r+  )	r  r	  r	  rl  r   	args_iterexported_argsrp  r   s	   `      @@r   r	  FallbackKernel.codegen  sz    !!!!!v%fejj&;&;<<Jd6lJ<ww""Lv;&;; KKa 15D--fejj&;&;<<Jd6lJ<WW   f11GGG % GG65::#8#899--7  ..t{{D<N<NOLD& "!??I ), ,	>>+C+CD, )D%
 	W%$$ ::<M**666##///@@''F   $$2G2G ,,T2$++v..))'2..w7,,W5))'2r   c           	         Sn U R                  5       n[        U R                  U R                  [        U R                  5       5      [        U R                  5       5      US9$ ! [         a     N[f = f)NFre  )rN  rX  rL  r  r  rZ   r  r  )rX  rN  s     r   tensor_to_layoutFallbackKernel.tensor_to_layout  sj    		((*I MMLL%fkkm4%fmmo6
 	
  		s   A# #
A0/A0c           	     :  ^ ^^^ [         R                  4nX;  a,  [        [        S   [        R
                  R                  5      nO
[        5       nU   T R                  " U/UQ70 UD6u  nnnn	n
SSS5        [        S W 5       5      mT R                  UW5      nU(       dI  [        U[        R                  R                  R                  5      (       a  [        R                   " S5      nUc  T " [#        US9UUWW	W
S9mO U(       d   S5       eT " [%        US9UUWW	W
S9mSU UUU4S jjmT" U/ 5      n[        U[&        [(        45      (       a	  UTl        U$ [        U[,        5      (       a  [)        U5      Tl        U$ U/Tl        U$ ! , (       d  f       GN"= f)	z9Create an instance of FallbackKernel from an _OpOverloadsNc              3  8   #    U  H  n[        U5      v   M     g 7fr   )r_  rz	  s     r   r   (FallbackKernel.create.<locals>.<genexpr>  s     !K{,s"3"3{r  r!  r  r  z"Not sure where to find device infoc                J  >^ ^ [        T [        [        45      (       a/  [        T 5      " UUU 4S j[	        [        T 5      5       5       5      $ [        T [        5      (       a<  T R                  5        VVs0 s H  u  p#UT" UT[        T 5      U4/-   5      _M      snn$ [        T [        R                  5      (       a}  [        TR                  T 5      TT5      n[        R                  (       d  T(       d  [        T 5      (       d3  [        R                   R"                  R%                  UR&                  5        U$ [        T [(        5      (       a  T $ [        T [        R*                  5      (       a  T R,                  R.                  $ T b   S[        T 5       S35       eg s  snnf )Nc              3  Z   >#    U  H   nT" TU   T[        T5      U4/-   5      v   M"     g 7fr   )r   )r   r   generate_outputr  rX  s     r   r   AFallbackKernel.create.<locals>.generate_output.<locals>.<genexpr>$  s7      $/ $F1Iw4<:K9L/LMM/s   (+zFallbackKernel output type z is not supported)r   r   r   r   r   r   r   rW  r  r
  MultiOutputr  rA    assume_unaligned_fallback_outputrj   rm   r  ra  r  r   r   SymIntr   r  )	rX  r  r  r'  r  r	  r  has_unaligned_inputpackeds	   ``   r   r  .FallbackKernel.create.<locals>.generate_output"  s[   &4-00F| $"3v;/$   FD)) %+LLN$2 g$v,9L8M.MNN$2  FELL11!((0 ;;*,V44GG--11#((;
FC((FELL11{{'''~ 1$v,?PQ~ 3s   6%F)rX  r   r  zlist[tuple[Any, int]]r   r   )r	  *_fused_moving_avg_obs_fq_helper_functionalr   r	   rm   r  r  r
   r
  rt  r=  r   r  r	  r`  ra  r  r  r>	  r   r   r  r   )r	  r	  r   r   fake_incorrect_kernelscontextr
  r
  r
  r	  r	  r  r  r  r  r  s   `            @@@r   r  FallbackKernel.create  s    #'"Q"Q!S/1$79J9JKG!mG ""6;D;F;!  "!K{!KKn=*E++55CC
 
 \\%(F!&)"3F ???6!0"3F 	  	D "."5ge}--$FN
 	 &&"7^FN  &YFNg Ws   F
Fc                    > [         TU ]  5       $ r   )r  r	  r  s    r   r	  FallbackKernel.apply_constraintM  s    w'))r   )r  r   r
  r	  r	  r	  r  r   rJ  r  r	  r{   r
  r  r  r  r	  r  r   r
  r	  ,Optional[dict[sympy.Symbol, pytree.KeyPath]]r   r   r  r
  )r   zContainer[sympy.Symbol]r
  )r
  z Optional[Sequence[torch.Tensor]]r
  r  r   r   r  r  )rX  r'  r   rL  )r	  r{   r   r   r   r   r   r  rt  )r   r   r   r   r  r  rH  r+  r  rW
  r  r=  r  ro  ri  ri  r   r	  r  r?  r  r	  r   rA  rB  s   @r   r  r  0  s     ,0h4 KOh4h4 h4 &	h4
 &h4 +h4 )h4 Hh4 
h4 h4T


 0 5GT	 :>
$*#w(r S3 S3j 
 
 [ [z* *r   r  c                  f   ^  \ rS rSrSrS	S jrS
S jrSS.             SU 4S jjjrSrU =r	$ )ComplexViewiQ  z9View a complex number as two dtyped numbers or vice versac                    gr  r   r  s    r   r  ComplexView.should_allocateU  r	  r   c                &    U R                  S5      /$ r  rV  r  s    r   ro  (ComplexView.get_inputs_that_alias_outputX  s    "##r   Nr  c          	     *   > [         TU ]  UUUUUUS9  g )Nr  )r  r  )r  rJ  r	  r
  r  r	  r	  r  s          r   r  ComplexView.__init__\  s)     	/ 	 	
r   r   r  r  )rJ  r  r	  r{   r
  r  r  r  r	  r  r	  r  r   r   )
r   r   r   r   r  r  ro  r  r   rA  rB  s   @r   r  r  Q  sf    C$ KO

 
 &	

 &
 +
 H
 

 
r   r  c                  "    \ rS rSrSrSS jrSrg)MemoryCheckKernelip  z
Custom kernel for memory checking that generates direct function calls

TODO - the custom op was erroring with str inputs. should be able to custom op directly.
c                    UR                  5         U R                  u  p#n[        U5      n[        U5      nU(       a  UR                  S5        SU SU SU S3nO	SU SU S3nUR                  U5        g)z.Override codegen to write direct function callzV# note: dont currently distinguish between buffers returned and dealloc'd in last stepzcheck_memory_step(allocated=z, freed=z, is_final_step=r  N)rv
  r	  rP  rL  )r  r	  
alive_list	dead_listis_final_step
alive_repr	dead_reprcalls           r   r	  MemoryCheckKernel.codegenw  s     	224/3/A/A,
}*%
O	h 2*Xi[P`an`oopqD1*Xi[PQRD$r   r   Nr
  )r   r   r   r   r  r	  r   r   r   r   r  r  p  s     r   r  c                  *    \ rS rSr% S\S'   SS jrSrg)r>	  i  r  r  c                    U R                   $ r   r  r  s    r   r  MultiOutputLayout.get_device  r  r   r   Nr  )r   r   r   r   r   r  r   r   r   r   r>	  r>	    s    r   r>	  c                     ^  \ rS rSrSS jr S	         S
U 4S jjjr\" S 5       S	   SS jj5       rSS jrSS jr	Sr
U =r$ )r  i  c                    UR                  U 5        U R                  (       d#  U R                  U5        U R                  U5        g g r   )codegen_multi_output!skip_size_stride_alignment_checksrl
  rq
  r	  s     r   r	  MultiOutput.codegen  s:    $$T*55%%g.**73 6r   c                   > [         TU ]  S X/S5        [        R                  R	                  U 5      U l        [        R                  R                  U 5        X0l        X@l        g r  )	r  r  rm   r  r  r   r  r  r  )r  rJ  rQ	  r  r  r  s        r   r  MultiOutput.__init__  sK     	vw3GG++D1		""4(1R.r   c                z    U R                   S   n[        U[        5      (       d   U5       eUR                  U5      $ r  )re  r   r   rW  )r  rd  r  s      r   rW   MultiOutput.get_free_symbol_uses  s:     [[^
*f--9z9-..}==r   c                z    [        U R                  5      S:H  =(       a    [        U R                  S   [        5      $ )Nr@   r   )r   re  r   r:	  r  s    r   r  MultiOutput.should_allocate  s0    4;;1$ 
t{{1~'89	
r   c                    U R                    Vs/ s HI  n[        U[        5      (       d  M  [        UR	                  5       5      S:  d  M9  UR                  5       PMK     sn$ s  snf r  )re  r   r  r   ro  r  r  s     r   ro  (MultiOutput.get_inputs_that_alias_output  s\     {{
"#~.  C4467!; CLLN"
 	
 
s   A"A"A")r  r   r  r
  r  )
rJ  r  rQ	  r   r  zlist[tuple[Any, ...]]r  r   r   r   r  r  r  )r   r   r   r   r	  r  rX   rW  r  ro  r   rA  rB  s   @r   r  r    s    4 38SS S '	S
 ,0S 
S S M*$)>!>	!> +>


 
r   r  c                     \ rS rSr% SrS\S'   S.S jrS/S jrS0S jrS1S jr	S2S	 jr
S3S
 jrS4S5S jjrS6S jrS7S jrS8S jrS.S jrS7S jr S9     S:S jjrS;S jrS<S jr S9     S=S jjrS>S jrS?S jrS@S jrSAS jrSBS jrSCS jrS.S jrS.S jrSDS jrSES jrS3S  jr SES! jr!SBS" jr"\#" S 5       S9   SFS# jj5       r$SGS$ jr%SHS% jr&S4SIS& jjr'\(SJS' j5       r)SKS( jr*SJS) jr+SCS* jr,\(SLS+ j5       r-S3S, jr.\.r/S-r0g)Mr  i  z;
TensorBox / StorageBox allow in-place mutation of Tensors
r   rI  c                6    U R                   R                  5       $ r   r  r  s    r   r  !MutableBox.has_exceeded_max_reads  r  r   c                6    U R                   R                  5       $ r   r}  r  s    r   r  MutableBox.get_device  r{  r   c                6    U R                   R                  5       $ r   ra  r  s    r   r  MutableBox.make_loader      yy$$&&r   c                6    U R                   R                  5       $ r   )rI  r  r  s    r   r  MutableBox.make_indexer      yy%%''r   c                6    U R                   R                  5       $ r   )rI  r/  r  s    r   r/  MutableBox.get_stride  r{  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_name  r  r   Nc                8    U R                   R                  U5      $ r   )rI  r#  r!  s     r   r#  MutableBox.has_large_inner_fn  s    yy++I66r   c                8    U R                   R                  U5      $ r   r  r&  s     r   r(  MutableBox.mark_reuse  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r,  MutableBox.realize_hint  r  r   c                6    U R                   R                  5       $ r   )rI  r/  r  s    r   r/  MutableBox.unwrap_view  r  r   c                6    U R                   R                  5       $ r   )rI  r  r  s    r   r  MutableBox.is_input_buffer      yy((**r   c                6    U R                   R                  5       $ r   )rI  r2  r  s    r   r2  MutableBox.freeze_layout  s    yy&&((r   c                8    U R                   R                  X5      $ r   )rI  r7  r5  s      r   r7  *MutableBox.freeze_layout_with_stride_order  s     yy88NNr   c                8    U R                   R                  U5      $ r   )rI  r<  r;  s     r   r<  (MutableBox.freeze_layout_with_fill_order  s    yy66u==r   c                8    U R                   R                  U5      $ r   )rI  r@  r?  s     r   r@  (MutableBox.freeze_layout_with_same_order  s    yy66v>>r   c                8    U R                   R                  X5      $ r   )rI  rE  rC  s      r   rE  +MutableBox.freeze_layout_with_exact_strides  s     yy99-WWr   c                6    U R                   R                  5       $ r   )rI  rH  r  s    r   rH  MutableBox.get_read_writes  r  r   c                6    U R                   R                  5       $ r   r2  r  s    r   r  MutableBox.get_reads  r  r   c                6    U R                   R                  5       $ r   r/  r  s    r   rP  MutableBox.num_reads  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   rS  MutableBox.get_storage_numel   r  r   c                6    U R                   R                  5       $ r   r  r  s    r   rZ  MutableBox.get_reduction_type   r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r]  MutableBox.get_reduction_size   r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r`  MutableBox.is_extern   r  r   c                6    U R                   R                  5       $ r   )rI  rc  r  s    r   rc  MutableBox.is_no_op   r  r   c                8    U R                   R                  U5      $ r   r  r  s     r   rf  MutableBox.constant_to_device   s    yy++F33r   c                6    U R                   R                  5       $ r   )rI  ri  r  s    r   ri  MutableBox.get_mutation_names   r  r   c                6    U R                   R                  5       $ r   )rI  rl  r  s    r   rl  MutableBox.get_operation_name   r  r   c                6    U R                   R                  5       $ r   )rI  ro  r  s    r   ro  'MutableBox.get_inputs_that_alias_output   s    yy5577r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.realize   r  r   c                8    U R                   R                  U5      $ r   rf  rV  s     r   rW  MutableBox.get_free_symbol_uses    s     yy--m<<r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_read_names&   r  r   c                6    U R                   R                  5       $ r   )rI  r  r  s    r   r  MutableBox.get_defining_op)   r  r   c                8    U R                   R                  U5      $ r   )rI  r  r  s     r   r  MutableBox.codegen_reference,   s    yy**622r   c                6    U R                   R                  5       $ r   rI  r  r  s    r   rJ  MutableBox.layout/   s     yy((**r   c                6    U R                   R                  5       $ r   ry  r  s    r   r  MutableBox.get_layout4   r{  r   c                6    U R                   R                  5       $ r   r   r  s    r   r  MutableBox.get_output_spec7   r  r   c                6    U R                   R                  5       $ r   r\  r  s    r   r
  MutableBox.get_size:   r  r   c                .    U R                   R                  $ r   )rI  r  r  s    r   r  MutableBox.dtype=   s    yyr   c                ~   [        U R                  [        5      (       aQ  [        U 5      R                   S[        U R                  5      R                   S3nSnU R                  R                  nO&[        U 5      R                   S3nU R                  nSnU[        [        U5      5      U/nSR                  U5      $ )Nr  z))r  r  )r   rI  r  r   r   r  r   r  )r  line0endlrn  r  s        r   r  MutableBox.__str__A   s    dii,,Dz**+1T$))_-E-E,FaHEDIINNEDz**+1-EIIED 3u:

 yyr   r   r  r  r  r  r  r  r   r  r  rt  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  ru  rx  r  r~  r}  r{  )1r   r   r   r   r  r   r  r  r  r  r/  r  r#  r(  r,  r/  r  r2  r7  r<  r@  rE  rH  r  rP  rS  rZ  r]  r`  rc  rf  ri  rl  ro  r  rX   rW  r  r  r  r  rJ  r  r  r
  r  r  r>  r   r   r   r   r  r    sb    L2&'(&$7+('+) ;@O"O37O	O
>? HMX/X@DX	X
+%%-..%$4..8# L)$)=!=	!= *=
*+3 + +&+$   " Hr   r  c                  (    \ rS rSr\SS j5       rSrg)r   iU   c                X    [        U [        5      (       a  U $ [        [        U 5      5      $ r   )r   r   r   rX  rI  s    r   r  TensorBox.createV   s%    d122KD)**r   r   N)rI  r   r   r:  )r   r   r   r   r  r  r   r   r   r   r   r   U   s    + +r   c                  r    \ rS rSrSrSS jrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSrg)rX  i]   z/
StorageBox allow in-place mutation of Tensors
c                    [        U R                  [        [        45      (       a5  U R                  R	                  5       [
        R                  R                  ;   $ gr  )r   rI  ry  rO  r  rm   r  r  r  s    r   r  StorageBox.is_input_bufferb   s=    dii+!?@@99%%'177+?+???r   c                    [        U R                  [        5      =(       a5    U R                  R                  5       [        R
                  R                  ;   $ r   )r   rI  rb  r  rm   r  r	  r  s    r   r  StorageBox.is_module_bufferg   s9    tyy>3 :		""$(9(99	
r   c           
        [         R                  U R                  5      (       a  U R                  R                  5       $ [	        U R                  [
        [        [        [        45      (       d   [        U R                  5      5       eU R                  R                  5       nU R                  R                  5       nU R                  R                  5       nUc   e[        S [        UU R                  R                  5       U R                  R!                  5       SS9U R                  S9U l        ["        R$                  R'                  U R                  5      U R                  l        ["        R$                  R+                  U R                  5        U R,                  U R                  l        XR                  l        X R                  l        U R                  R(                  $ )NF)r  r  r  rN  r  )r   r{  rI  r  r   rJ  r  r  r!  r   r  r  r  rw  r  r  r
  rm   r  r  r   r  rm  rq  ro  )r  rq  ro  r  s       r   r  StorageBox.realizem   sR   ""499--99%%''$))iD$%GHH 	
$IIK
 	
H ii//1II++-	%%'!!!"!ii))+YY'')	 	
	 00;			""499- LL		 +		'		yy~~r   c                    [        U R                  [        [        45      (       a:  U R                  R	                  5       R
                  S:  a  U R                  5         ggg)z<
Called on buffers we expect to be forced to realize later.
r@   N)r   rI  rJ  r  r  nontrivial_read_countr  r  s    r   r,  StorageBox.realize_hint   sI    
 tyy9i"899		**,BBQFLLN G :r   c                H    [        S U R                  5        5       5      U:  $ )Nc              3  `   #    U  H$  n[         R                  R                  U5      v   M&     g 7fr   )rm   r  get_dep_size_hintr  s     r   r   BStorageBox.has_accumulated_enough_reads_by_size.<locals>.<genexpr>   s$     K:J3))#..:Jr
  )rw  r  r!  s     r   $has_accumulated_enough_reads_by_size/StorageBox.has_accumulated_enough_reads_by_size   s!    K$..:JKKiW	
r   c                2   [        U R                  [        5      =(       aw    U R                  5       [        R
                  :  =(       dO    U R                  5       =(       d8    [        R                  S L=(       a    U R                  [        R                  5      $ r   )	r   rI  rJ  rP  rA   realize_acc_reads_thresholdr#   realize_acc_reads_size_thresholdr   r  s    r   r  !StorageBox.has_exceeded_max_reads   sq    $))Y/ 	
NNvAAA &&( 77tC ==;;		
r   c                r  ^ US:  a  [        U R                  [        [        45      (       a  [	        U R                  5      (       a9  U R                  R                  5       mSS/n[        U4S jU 5       5      (       a  gU R                  5       [        R                  :  =(       d    U R                  5       $ g)zR
A heuristic to decide if we should realize a tensor
that is used multiple times.
r@   expsigmoidc              3  @   >#    U  H  oTR                   ;   v   M     g 7fr   )used_ops)r   r   opcounts     r   r   5StorageBox.should_realize_on_reuse.<locals>.<genexpr>   s     @iG,,,is   TF)r   rI  rJ  r  r+  r  rt  rP  rA   realize_reads_thresholdr#  )r  r'  	heavy_opsr+  s      @r   should_realize_on_reuse"StorageBox.should_realize_on_reuse   s    
 19DII	9/EFFdii  ))446"I.	@i@@@ 6#A#AA -**, r   c                R    U R                  U5      (       a  U R                  5         g g r   )r/  r  r&  s     r   r(  StorageBox.mark_reuse   s!    ''..LLN /r   c                6    U R                   R                  5       $ r   r/  r  s    r   rP  StorageBox.num_reads   r  r   r  Nr  r  rt  )r"  r   r   r   )r'  r   r   r   r  r  )r   r   r   r   r  r  r  r  r,  r   r  r/  r(  rP  r   r   r   r   rX  rX  ]   s4    

:



$%r   rX  c                  8    \ rS rSr% S\S'   S\S'   SrS\S'   S	rg)
Subgraphi   r   r   r  graph_moduleNzOptional[GraphLowering]r  r   )r   r   r   r   r   r  r   r   r   r   r6  r6     s    
I&&%)E")r   r6  c                    U  Vs/ s H*  n[        U[        5      (       a  UR                  5       OUPM,     n n[        [	        S U  5       5      5      [        U 5      :  $ s  snf )Nc              3  8   #    U  H  n[        U5      v   M     g 7fr   )r
  )r   rZ  s     r   r   '_has_aliased_buffers.<locals>.<genexpr>   s     ;7"V**7r  )r   rO  r/  r   r;   )buffersrZ  s     r   _has_aliased_buffersr<     sd     F !+6? C CO  
 z;7;;<s7|KKs   1Ac                     ^  \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'           SU 4S	 jjr	\
      SS
 j5       rSS jrSrU =r$ )InvokeSubgraphi   z&
Ir node for the invoke_subgraph HOP.
NOptional[Subgraph]r
  Optional[Sequence[IRNode]]operandsr  c                   > [         TU ]  S UUS9  Xl        [        R                  R                  U 5      U l        [        R                  R                  U 5        g r	  )r  r  r
  rm   r  r  r   r  )r  r
  rA  rJ  r  s       r   r  InvokeSubgraph.__init__   sO     	 	 	

 !GG++D1		""4(r   c                  ^ SSK Jn  [        R                  R                  nSnUR
                  R                  S5      =n(       a	  US   SS nO-UR                  SS nU Vs/ s H  oR
                  S   PM     nnU Vs/ s H  oR                  U5      PM     nn/ n	[        U5       HM  u  p[        U[        [        45      (       a  U	R                  U5        M3  U	R                  U" XU
   5      5        MO     U	nUR                  cz  [        R                  R                  UR                  UUR                   S9Ul        [        R"                  " UR                  5         UR                  R$                  " U6   SSS5        UR                  R&                  nSnU H*  n[        U[        5      (       a  M  UR)                  5       n  O   Uc   e[+        UU[-        US	9S
9m      SU4S jjn[        U5       VVs/ s H  u  nnU" UU5      PM     nnnUTl        U$ s  snf s  snf ! , (       d  f       N= fs  snnf )z|For each operand, get a realized input, force it to have the same
strides as the subgraph inputs, then use an InvokeSubgraphr@   )constrain_to_fake_tensorNeager_input_valsr   r   r'  r_  r
  r
  r  )r
  rA  rJ  c                Z  > [        U [        [        45      (       a  U $ U R                  5       nUc   e[	        [        UU R                  5       U R                  5       U R                  5       U R                  5       R                  U R                  5       R                  S9T[        U4/SS9$ )Nr  T)r  )r   r   r  r  r  rL  r  r
  r/  r  rM  rN  r   )rX  indr  invoke_subgraphs      r   create_output,InvokeSubgraph.create.<locals>.create_output$!  s     &#8:N"OPP**,)))"%$..0#__.%002%00299"("3"3"5"?"? $C[M6: r   )rX  r   rI  r   r   z?Union[ShapeAsConstantBuffer, NoneAsConstantBuffer, MultiOutput])r'
  rE  rm   r  r  r]  r  r   r]	  r   r   r   r	  r  r
  r7  r   r
  r
  graph_outputsr  r>  r>	  r  )r	  r
  rA  rE  r  fake_operandsrF  fx_operandsr   new_operandsr   operandr  r  rK  r   rX  outsrJ  s                     @r   r  InvokeSubgraph.create   s.    	7 ww+++00445GHHH,Q/3M '++AB/K4?@KqVVE]KM@
 AI!I1"3"3A"6!I%'%h/LC'$9>#JKK##G,##,WC6HI	 0  >>!WW22((,&mm 3 HN
 $$X^^4""M2 5 .... Gg'<== ++-   !!!($F3
		!$	L	. ;DG:LM:LYQfa(:LM"&K A
 "J( 54T Ns   "H  H%!H*>H;*
H8c                &    UR                  U 5        g r   )codegen_invoke_subgraphr	  s     r   r	  InvokeSubgraph.codegen?!  r
  r   )r   r
  )r
  r6  rA  r  rJ  r>	  r   r   )r
  r6  rA  r   r   zElist[Union[ShapeAsConstantBuffer, NoneAsConstantBuffer, MultiOutput]]r
  )r   r   r   r   r  r
  r   rA  r  r  r?  r  r	  r   rA  rB  s   @r   r>  r>     s     $(H '+/H(/*.G'.
) 
),<
)FW
)	
) WW,2W	NW Wr. .r   r>  c                     ^  \ rS rSr% SrS\S'   SrS\S'   SrS\S'   SrS\S	'   Sr	S
\S'                 SU 4S jjr
\SS j5       r\          SS j5       rSS jrSS jrSrU =r$ )ConditionaliC!  Nr  	predicater@  rA  r?  true_subgraphfalse_subgraphOptional[Sequence[MultiOutput]]r  c                  > Xl         X l        X0l        X@l        [	        U/UQ5      u  px[
        T	U ]  S UUUS9  Ub  X`l        [        R                  R                  U 5      U l        [        R                  R                  U 5        g N)r   rJ  re  r	  )rY  rA  rZ  r[  _split_by_sym_typer  r  r	  rm   r  r  r   r  )
r  rY  rA  rZ  r[  rJ  r	  sym_argsr
  r  s
            r   r  Conditional.__init__K!  s     # *, 2I3I3I J"	 	 	
 (%6"GG++D1		""4(r   c                \    [        U [        5      (       a  U $ U R                  R                  $ r   )r   r   r   r  )r   s    r   _maybe_exprConditional._maybe_exprg!  s"    aHvv{{r   c                	   U R                  U5      nU Vs/ s H  oPR                  U5      PM     nn[        R                  R                  R                  S   n[        U[        5      (       d   [        U5      5       e[        S U 5       5      (       d   eU Vs/ s H   n[        [        U5      R                  S   PM"     nnX#4 H  nUR                  b  M  [        R                  R                  UR                  UUR                  S9Ul        [        R                  " UR                  5         UR                  R                   " U6   SSS5        M     UR                  c   eUR                  c   eUR                  R"                  n	UR                  R"                  n
SU	4SU
44 H&  u  p[%        U	5      (       d  M  ['        SU S	U 35      e   [)        U	5      [)        U
5      :X  d   X45       e[+        [-        X5      5       H  u  nu  pUR/                  5       UR/                  5       :X  d	   XU45       eUR1                  5       UR1                  5       :X  d	   XU45       eUR3                  5       R4                  UR3                  5       R4                  :X  a  M   XU45       e   [7        S
 U/U-    5       5      n[9        [        R                  R:                  R<                  [        R                  R                  R                  R?                  SS5      5      nUc   S5       e[A        UUUU[C        US9US9n[+        [-        U	[        R                  R                  R                  S   5      5       VVVVs/ s H  u  nu  nn[E        [G        UUR1                  5       URI                  5        Vs/ s H  n[@        RK                  U5      PM     snURM                  5        Vs/ s H  n[@        RK                  U5      PM     snUR3                  5       R4                  UR3                  5       RN                  S9U[P        U4/5      PM     nnnnnUUl)        U$ s  snf s  snf ! , (       d  f       GM  = fs  snf s  snf s  snnnnf )zNCreate a Sequence of IRNodes from a conditional statement (see .lowering.cond)r2  c              3  B   #    U  H  n[        U[        5      v   M     g 7fr   )r   r9   rJ	  s     r   r   %Conditional.create.<locals>.<genexpr>{!  s     <1:a&&rb  r'  NrG  true_fnfalse_fnzVOutput aliasing is currently not supported in compiled torch.cond. The outputs of the z% subgraph of torch.cond are aliased: c              3  p   #    U  H,  n[        U[        5      (       a  M  UR                  5       v   M.     g 7fr   )r   r   r  )r   os     r   r   rg  !  s,      
+a!67 ALLNN+s   66r	  zcannot determine devicer  )rY  rA  rZ  r[  rJ  r	  r  )*r]	  rm   r  r  r   r   r   r   r   r   r9   r]  r
  r7  r   r
  r
  rM  r<  r  r   r   r   r  r  r  rM  r	  r6   r  r   r  rX  r>	  r  rL  r  rc  r  rN  r   r  )r	  rY  rh  ri  rA  r   rO  rN  r
  true_outputsfalse_outputsr   r  r   t_of_or  r	  conditionalrX  merged_outputr  s                         r   r  Conditional.createm!  s    %%i0	2:;(Q%%a((; ! 4 4 9 9" =+x00C${2CC0<<<<<<<GHKqdA++E2KH +H~~%!"!6!6,,#0"*-- "7 "
 ((8NN&&6 98 , }}(((~~)))}}22 44(,7*m9TUMD#L11$**./TU\T]_  V < C$66U8UU6&s<'GHMAz>>#s~~'77F!#F7==?cmmo5D}D5>>#**cnn.>.E.EETPS}TE I
  
[8+
 

 6GG&&GG  %%))*=tD
 !<#<<!!!#$F3/
4 /8L!''"6"6";";E"BC/#
"/**FM! ! **,@M@R@R@TU@T"+11"5@TU>K>R>R>T>T//3>T ",,.55$//1;;	  /# 	 
, &_ <
 I 98\ V
s<   Q'Q!1Q&<8R
4Q9R
'Q>AR
&
Q6	9
R
c           	         UR                  U 5        UR                  U R                  5       U R                  [	        U S0 5      5        g r)  )codegen_conditionalr*  r  r  r   r	  s     r   r	  Conditional.codegen!  s9    ##D)88MMOT\\749Lb+Q	
r   c                    [        U SS 5      =n(       aL  [        [        R                  R                  R
                  U5      nUc   e[        UR                  5       5      $ [        5       $ r)  r   r6   rm   r  r  r   r;   r  r.  s      r   r  $Conditional.get_unbacked_symbol_defs!  _     '.A4 HHH0  **,=H '''hmmo..<r   )r[  r   rA  rY  rZ  r	  )rY  r   rA  r  rZ  r6  r[  r6  rJ  r>	  r	  r  r   r   )r   zUnion[int, torch.SymInt]r   zUnion[int, sympy.Expr])
rY  r   rh  r6  ri  r6  rA  z-list[Union[TensorBox, ShapeAsConstantBuffer]]r   r  r
  r  )r   r   r   r   rY  r   rA  rZ  r[  r  r  r  rc  r?  r  r	  r  r   rA  rB  s   @r   rX  rX  C!  s    "&I&+/H(/(,M%,)-N&-/3G,3)) #)  	)
 !) ") H) 
)8  
 XX X 	X
 @X 
X Xt
   r   rX  c                    / n/ nU  HF  n[        U[        5      (       a  UR                  UR                  5        M5  UR                  U5        MH     X!4$ r   )r   r   r  r  )r   non_sym_argsr`  r{	  s       r   r_  r_  !  sO     LHc011OOCHH%$	  !!r   c                     ^  \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S	'   Sr
S
\S'                   SU 4S jjr\SS j5       r\            SS j5       rSS jrSS jrSrU =r$ )	WhileLoopi!  zSThe IR node for while_loop and while_loop_stack_output. It supports input mutation.Nr@  carried_inputsadditional_inputsr?  cond_subgraphbody_subgraphr\  r  c                  > Xl         X l        X0l        X@l        [	        / UQUQ5      u  p[
        T
U ]  S UU	US9  Ub  X`l        Xpl        [        R                  R                  U 5      U l        [        R                  R                  U 5        g r^  )r~  r  r  r  r_  r  r  r	  stack_outputrm   r  r  r   r  )r  r~  r  r  r  rJ  r	  r  r`  r
  r  s             r   r  WhileLoop.__init__!  s     -!2** 21n101!
 	"	 	 	
 (%6"(GG++D1		""4(r   c                   [        U 5      (       d  U $ SSKJn  U  Vs/ s H*  n[        U[        5      (       a  UR                  5       OUPM,     nn[        5       n/ n[        [        X5      5       H[  u  nu  px[        U5      U;   a  UR                  U" U5      5        M0  UR                  [        U5      5        UR                  U5        M]     U$ s  snf )Nr@   )clone)r<  r'
  r  r   rO  r/  r;   r   r   r
  r  r  )	r~  r  rZ  unwrapped_buffersseen_buffersr  r   original_inputunwrapped_buffers	            r   _clone_aliased_inputsWhileLoop._clone_aliased_inputs"  s    #N33!! 	$
 )
( %/v$G$GF VS( 	 
 )35>26
1A1 "#|3eN34  $4!56n-6
 %
s   1Cc                   SSK Jn        SS jn[        R                  R                  R
                  S   n[        R                  R                  R
                  S   n	X-   n
U
 Vs/ s H  oR                  S   PM     nnU Vs/ s H  oR                  S   PM     nnU	 Vs/ s H  oR                  S   PM     nnU Vs/ s H  oR                  U5      PM     nn[        R                  U5      nU" X5      nU Vs/ s H  oR                  U5      PM     nnU" UU5      nUU-   nX4 GH  nUR                  b  M  [        U
[        5      (       d   [        U
5      5       e[        R                  R                  UR                  U
UR                  S9Ul        [        R                   " UR                  5         UR                  R"                  " U6   UUL aZ  [%        UR                  R&                  5      [%        U5      :X  d   eU" UR                  R&                  U5      UR                  l        SSS5        GM     UR                  (       a  UR                  (       d   eUR                  R&                  nUR                  R&                  n[)        U5      (       a  [+        S	U 35      e[%        U5      S
:X  d   U5       eUS   n[        U[,        5      (       dM  UR/                  5       [0        R2                  :X  d   U5       e[%        UR5                  5       5      S:X  d   U5       e[%        U5      S:  d   S5       eUS   R7                  5       nUc   e[%        U5      [%        U5      :X  d	   UU45       e[9        [;        UU5      5       H  u  nu  nn      SS jnU" UR5                  5       UR5                  5       5        U" UR=                  5       UR=                  5       5        UR7                  5       UR7                  5       :X  d   UUUU45       eUR/                  5       UR/                  5       :X  a  M   UUU45       e   Uc   e[?        [        R                  R@                  RB                  [        R                  R                  R                  RE                  SS5      5      n[        UUUU[G        US9UUS9nUR                  b=  [        UR                  RH                  [0        RJ                  RL                  5      (       d   eU" UR                  RH                  U5      S   n[O        U5      nU Vs/ s H  nUU   PM
     n n[Q        U 5      n!/ n"/ Ul)        / Ul*        U(       Ga  [%        U5      S:X  d   S5       e[9        [        R                  R                  R                  S   5       H  u  nn#[W        [Y        U#RZ                  U#R\                  U#R_                  5        V$s/ s H  n$[`        Rc                  U$5      PM     sn$U#Re                  5        V%s/ s H  n%[`        Rc                  U%5      PM     sn%S9U[f        U4/5      n&URR                  Ri                  U&5        U"Ri                  U&5        M     GO[9        U5       GH  u  nn#UU;   ad  U[%        U5      :  d   S5       e[k        U!5      n'URT                  Ri                  [m        U'Rn                  U'U5      5        U"Ri                  U'5        Mq  [W        [Y        U#R7                  5       U#R/                  5       U#R5                  5       U#R=                  5       U#Rq                  5       Rr                  S9U[f        U4/5      n&URR                  Ri                  U&5        U"Ri                  U&5        GM     [;        UU"5       Hk  u  n(n)U(Ru                  5       [        R                  Rv                  ;   d  M4  [        R                  Rx                  R{                  U)Ru                  5       5        Mm     U"$ s  snf s  snf s  snf s  snf s  snf ! , (       d  f       GM   = fs  snf s  sn$f s  sn%f )zcreate the while_loop IR node. stack_output controls whether it stack
each iterations' output, which is necessary for training.
r   )check_input_alias_and_mutationc           	     ,   [        U 5      [        U5      :X  d   e/ n[        X5       Hh  u  p4[        U[        R                  5      (       a3  UR                  [        R                  X4R                  5       SS95        MW  UR                  U5        Mj     U$ )NFrW  )	r   r   r   r  r
  r  r  r&
  r  )tensor_boxesfake_tensorsretr
  fks        r   _require_exact_strides0WhileLoop.create.<locals>._require_exact_strides>"  s     |$L(9999Cl9b%,,//JJ$::		5 ;  JJrN : Jr   r2  r'  NrG  zOutput aliasing is currently not supported in compiled torch.while_loop. The outputs of the body_fn subgraph of torch.while_loop are aliased: r@   z9torch.while_loop is assumed to have at least one operand.c                    [        U 5      [        U5      :X  d   e[        X5       H.  u  p#[        R                  R                  R                  X#5        M0     g r   )r   r   rm   r  r  r  )	lhs_exprs	rhs_exprslhsrhss       r   _guard_list_equals,WhileLoop.create.<locals>._guard_list_equals"  sC     9~Y777 #I 9HCGG$$11#; !:r   r	  r  )r~  r  r  r  rJ  r	  r  r   z-NYI: while_loop_stack_output input mutations.)r  r  r  r  zonly carries can be mutated.)r  r  r  r  rM  )r  r  r  z,list[Union[int, torch.SymInt, torch.Tensor]]r   r   )r   Sequence[Union[int, sympy.Expr]]r  r  r   r   )>torch._higher_order_ops.utilsr  rm   r  r  r   r]  r]	  r}  r  r   r   r   r
  r7  r   r
  r
  r   rM  r<  r  r   r  r  r   r
  r  r   r   r/  r6   r  r   r  r>	  modulefxGraphModuler;   r	  r  r	  r  rL  r  r  r  rX  rc  r  r   r  r	  r  rJ  r  rM  r  r  r  r  )*r	  cond_fnbody_fnr~  r  r  r  r  fx_carried_inputsfx_additional_inputsfx_all_inputsr   fake_all_inputsfake_carried_inputsfake_additional_inputscarried_inputs_additional_inputs_
all_inputsr
  cond_outputsbody_outputsro  r  r   rY  bor  r	  
while_loopmutated_idxsmutated_idx_setr   r  mutated_inputs_iterall_outputsrX  r  r  	multi_outmutated_inputrg  r   s*                                             r   r  WhileLoop.create0"  s    	Q	*	F	 	" GG0055b9 ww3388<)@2?@-Q66%=-@6GH6Gvve}6GH9M!N9MA&&-9M!N9GHA,,Q/H#99/J0V<MN<Mq//2<MN3 6
 %'99
 *H~~%!-::OD<OO:!"!6!6,,#0"*-- "7 "
 ((8NN&&8  7*"8>>#?#?@C/E      8N$NN88/84 98 +4 }}..}}22}}22-- XXdWeg  < A%3|3%O!233;;=EJJ.11.qzz|$),1,):" 	
G	
" A))+!!!?#s<'88 	
;
 	
8 %S,%GHKAxB<;<;< < r{{}bkkm<r}}@ ==?bmmo5J2r67JJ5<<>R\\^3@aR[@3 I" !!!5GG&&GG  %%))*=tD

 *0!!$F3/%	

 }}(ZMM  %(("6"6.
 .
 	
 

 6MM  /

 %\25DE_c*S/_E #>2$&
&(
#'1, ?,  ))=)=)B)B5)IJV'%}}$llDJKKMRMbk55b9MRFLmmoVo 7 7 ;oV	 C[M		 "")))4""9-  K  )6V/)^!44T6TT4$()<$=M//66&}';';]JW  &&}5 +##)#4#4#6"("2"2"4!'!2#)#4#4#6#)#4#4#6#=#= #
!I &&--i8&&y1-  70 NK8HC||~!5!55 ++//? 9 U AH!NH O 98t F" SVs=   (`#`($`-`2?`7<A9`<a4a'a<
a	c           	         UR                  X R                  5        UR                  U R                  5       U R                  [        U S0 5      5        g r)  )codegen_while_loopr  r*  r  r  r   r	  s     r   r	  WhileLoop.codegen"  s?    ""4):):;88MMOT\\749Lb+Q	
r   c                    [        U SS 5      =n(       aL  [        [        R                  R                  R
                  U5      nUc   e[        UR                  5       5      $ [        5       $ r)  rw  r.  s      r   r  "WhileLoop.get_unbacked_symbol_defs#  ry  r   )r  r  r~  r  r   r  r	  )r~  r  r  r  r  r6  r  r6  rJ  r>	  r	  r  r  r   r   r   )r~  r  r   r  )r  r6  r  r6  r~  r  r  r  r  r   r   r]  r
  r  )r   r   r   r   r  r~  r   r  r  r  r  r  r  r  r?  r  r	  r  r   rA  rB  s   @r   r}  r}  !  s   ]15N.54818(,M%,(,M%,/3G,3)() ,)  	)
  ) ") H) ) 
)D  8 KK K )	K
 ,K K 
)K KZ
   r   r}  c                  r   ^  \ rS rSr SSS.               S	U 4S jjjjrS
U 4S jjrSS jrSrU =r$ )r   i#  Nr  c          
     t  > [         TU ]  UUUUUS US9  SSKJn  U V	s/ s H&  n	[	        U	[
        5      (       a  U	R                  OU	PM(     n
n	U" U/ UQU
Q7U5      nUc   eXl        [        R                  R                  R                  US 5      U l        U [        R                  R                  U'   g s  sn	f )Nr   r	  r   )get_effect_key)r  r  torch._higher_order_ops.effectsr  r   r_	  r  effect_typerm   r  effectful_opsr  prev_effect_buffer)r  rJ  r	  r
  r  r	  r   r	  r  r  uncovered_argsr  r  s               r   r  EffectfulKernel.__init__#  s     	/ 	 	
 	C GR
FQz!_55AGG1<k 	 
 %V-O~-O-OQWX&&&&"#''"7"7";";K"N-1k*
s   -B5c                   > [         TU ]  5       nU R                  bG  UR                  R	                  [
        R                  " U R                  R                  5       5      5        U$ r   )r  rH  r  rL  r  rB   rI  r  )r  r  r  s     r   rH  EffectfulKernel.get_read_writes0#  sU    g-/"".!!$$T%<%<%E%E%GH r   c                    gr'  r   r  s    r   r   EffectfulKernel.has_side_effects:#  r  r   )r  r  r   r  r  r  )	r   r   r   r   r  rH  r  r   rA  rB  s   @r   r   r   #  s}     ,02 KO22 2 &	2
 &2 +2 )2 H2 
2 2@ r   r   c                  @    \ rS rSr\" S 5       S   SS jj5       rSrg)r
  i>#  c                    [        5       $ r   r:   rV  s     r   rW  !NonTensorObj.get_free_symbol_uses?#  r  r   r   Nr  r  )r   r   r   r   rX   rW  r   r   r   r   r
  r
  >#  s,    N+$)!	! ,r   r
  c                  `    \ rS rSr% S\S'   S\S'   SS jrSSS jjrSS	 jrSS
 jrSS jr	Sr
g)r_	  iF#  r   r   +Union[FakeScriptObject, torch.ScriptObject]r  c                    U R                   $ r   r  r  s    r   r  TorchBindObject.get_nameK#  r  r   Nc                    U R                   $ r   r  r  s     r   r  !TorchBindObject.codegen_referenceN#  r  r   c                    U R                   $ r   r  r  s    r   r	  TorchBindObject.get_valueQ#  r  r   c                    [        U R                  [        R                  5      (       a  U R                  $ U R                  R                  $ r   )r   r  r  ScriptObjectreal_objr  s    r   get_real_objTorchBindObject.get_real_objT#  s3    djj%"4"455::::&&&r   c                   U R                  5       n[        US5      (       d   e[        UR                  5       5      n[        R
                  " U5      S   nU Vs/ s HE  n[        U[        R                  5      (       d  M$  UR                  5       UR                  5       -  PMG     nn[        R                  " [        R                  US5      $ s  snf )N__obj_flatten__r   )r  r  r   r  r	  r	  r   r  r
  r
  numelr  r  operatorr  )r  real_script_obj	flat_dict
flat_elemsr   
flat_sizess         r   get_buf_bytesTorchBindObject.get_buf_bytesZ#  s    ++-(9::::88:;	((3A6
  
!U\\* )ANNqwwy( 	 

 j!<<
s   #C%Cr   r  r   r  )r   r  )r   ztorch.ScriptObjectr  )r   r   r   r   r   r  r  r	  r  r  r   r   r   r   r_	  r_	  F#  s&    
I66'=r   r_	  c                  B    \ rS rSr% S\S'   S\S'   S
S jrSSS jjrS	rg)r	  ih#  r   r   r  r  c                    U R                   $ r   r  r  s    r   r  GeneratorState.get_namem#  r  r   Nc                    U R                   $ r   r  r  s     r   r   GeneratorState.codegen_referencep#  r  r   r   r  r   r  )r   r   r   r   r   r  r  r   r   r   r   r	  r	  h#  s    
I r   r	  c                      \ rS rSrS	S jrS	S jrS
SS jjr\          SS j5       r\          SS j5       r	Sr
g)_CollectiveKernelit#  c                    gr  r   r  s    r   r  !_CollectiveKernel.should_allocateu#  r	  r   c                    gr'  r   r  s    r   r  "_CollectiveKernel.has_side_effectsx#  r  r   Nc                n   [        U R                  5      [        R                  R                  L d   S5       eU R                  nUb  Xl        OUR                  R                  U l        UR                  R                   Vs/ s H!  o3R                  (       d  M  UR                  PM#     snU l
        g s  snf )Nz,Setting cpp kernel needs a valid op_overload)r   r	  r  r	  r	  r	  r	  r   r	  r	  r	  )r  r	  r	  r   s       r   r	  %_CollectiveKernel.set_cpp_kernel_name}#  s    D$$%)>)>> 	
:	
> !!&#2 #)>>#6#6D  #NN44.
4qFAFF4.
* .
s   B2B2c                <   [         R                  R                     U R                  " X/UQ70 UD6u  nnnnn	S S S 5        W	(       a   U SU	 35       eW H  n
U
R	                  5         M     US   R                  5       nU " [        US9UUWW5      n[        R                  " U5      nUR                  R                  U Vs/ s H  n[        [        US9X5      PM     sn5        UR                  R                  U Vs/ s H  oR                  5       PM     sn5        SU;   a]  UR                  R                  [        [        US9US   U5      5        UR                  R                  US   R                  5       5        g g ! , (       d  f       GNQ= fs  snf s  snf )Nr  r   r  r   )rm   r  r  r
  r  r  r  r	  tree_leavesr	  ru  r  r  r  r  )r	  r	  re  r   r   _example_outputr
  r
  r	  r	  
tensor_argr  r  inpsr  rg  s                   r   create_inplace _CollectiveKernel.create_inplace#  s    WW ""6CDCFC!  %E2C1D&EE$%J  & Q**,f%
 !!&)&&OSTt^Jf5sCtT	

 	!!T"BTc<<>T"BCF?##**z8&-P %%fUm&<&<&>? 9 . U #Cs   FF>F
Fc           
        [         R                  R                     U R                  " X/UQ70 UD6u  nnnnn	S S S 5        W	(       a   U SU	 35       eW H  n
U
R	                  5         M     [        W[        5      (       a  U R                  Xe5      nUc   eU " [        US9UUWW5      n[        U5       VVs/ s H(  u  p[        U R                  U5      U[        U4/5      PM*     snnUl        [        UR                  U5       H_  u  p[        R                  (       d  [!        U5      (       a  M,  [         R                  R"                  R%                  UR&                  5        Ma     UR                  $ U " U R                  U5      UUWW5      n[        R                  (       d  [!        U5      (       d3  [         R                  R"                  R%                  UR&                  5        U/Ul        U$ ! , (       d  f       GN= fs  snnf )Nr  r  )rm   r  r  r
  r  r   r   r=  r>	  r   r  r  r  r   rA   r  rj   ra  r  r   )r	  r	  re  r   r   r
  r
  r
  r	  r	  r  r  r  r   rP  r  s                   r   create_out_of_place%_CollectiveKernel.create_out_of_place#  s    WW ""6CDCFC!  %F3D2E&FF$%J  & nd++__[AF%%%!0F "+>!: ";IA ((0AYK
 ";FN  #6>>>B::BSC C GG--11#((;	  C
 >>!$$^4F 66>O? ? ))--fkk:$XFNMc ,s   G28/H2
H)r	  r	  r  r   r
  )
r	  r{   re  zUnion[IRNode, list[IRNode]]r   r   r   r   r   r   )
r	  r{   re  z!Union[TensorBox, list[TensorBox]]r   r   r   r   r   z+Union[list[MultiOutput], _CollectiveKernel])r   r   r   r   r  r  r	  r?  r  r  r   r   r   r   r  r  t#  s    

( (@(@ ,(@ 	(@
 (@ 
(@ (@@ 88 28 	8
 8 
58 8r   r  c                  b   ^  \ rS rSr SSS.               SU 4S jjjjrS	S jrSrU =r$ )
_AllReduce_Kerneli$  Nr  c          
     N   > [         TU ]  UUUUUS US9  U R                  S5        g )Nr  +aoti_torch_cpu__c10d_functional_all_reduce_r  r  r	  	r  rJ  r	  r
  r  r	  r   r	  r  s	           r   r  _AllReduce_Kernel.__init__$  =     	/ 	 	
 	  !NOr   c                    UR                  S5        UR                  U 5        [        U R                  [        5      (       a  U R                  U5        g g Nz+torch/csrc/inductor/aoti_torch/c/shim_cpu.hinclude_extra_headerr
  r   rJ  r  rl
  r	  s     r   r	  _AllReduce_Kernel.codegen%$  C    $$%RS,,T2dkk6**%%g. +r   r   r   r  r
  r
  rB  s   @r   r  r  $  s     ,0P KOPP P &	P
 &P +P )P HP 
P P,/ /r   r  c                  b   ^  \ rS rSr SSS.               SU 4S jjjjrS	S jrSrU =r$ )
_AllReduceKerneli-$  Nr  c          
     N   > [         TU ]  UUUUUS US9  U R                  S5        g )Nr  *aoti_torch_cpu__c10d_functional_all_reducer  r  s	           r   r  _AllReduceKernel.__init__.$  s=     	/ 	 	
 	  !MNr   c                    UR                  S5        UR                  U 5        [        U R                  [        5      (       a  U R                  U5        g g r  r  r	  s     r   r	  _AllReduceKernel.codegenD$  r
  r   r   r   r  r
  r
  rB  s   @r   r  r  -$  s     ,0O KOOO O &	O
 &O +O )O HO 
O O,/ /r   r  c                     ^  \ rS rSr S
SS.               SU 4S jjjjrSS jrSS jr\SS j5       rSU 4S jjr	S	r
U =r$ )_WaitKerneliL$  Nr  c          
     N   > [         TU ]  UUUUUS US9  U R                  S5        g )Nr  +aoti_torch_cpu__c10d_functional_wait_tensorr  r  s	           r   r  _WaitKernel.__init__M$  r  r   c                    UR                  S5        UR                  U 5        [        U R                  [        5      (       a  U R                  U5        g g r  r  r	  s     r   r	  _WaitKernel.codegenc$  r
  r   c                   U R                   S   n[        U[        5      (       d   e[        U[        5      (       a7  UR                   S   n[        U[        5      (       d   [	        U5      5       eU/$ [        U[
        5      (       aG  UR                   S   n[        U[        5      (       a!  UR                  S   u  pEUR                   U   /$ / $ / $ r  )re  r   r   r  r   r  r  )r  rg  r   collr   r   s         r   get_volatile_reads_WaitKernel.get_volatile_readsj$  s    kk!n#v&&&&c,--

1Aa((1$q'1(3J[)) ::a=D$ 122QC())I Ir   c                v   [         R                  R                     U R                  X5      u  nnnnnS S S 5        W(       a   U SU 35       eU " [	        UR                  5       S9UWWW5      nUR                  R                  [        [	        UR                  5       S9X(5      5        g ! , (       d  f       N}= f)Nr  r  )	rm   r  r  r
  r  r  r	  r  r  )	r	  r	  rg  r  r
  r
  r	  r	  r  s	            r   create_wait_WaitKernel.create_wait$  s    WW ""6/!  %E2C1D&EE$cnn./
 	&&:S^^-=>L	
! s   B**
B8c                   > [         TU ]  5       nU R                  5       nU H@  nUR                  R	                  [
        R                  " UR                  5       5      5        MB     U$ r   )r  rH  r  rL  r  rB   rI  r  )r  r  volatile_readsvrr  s       r   rH  _WaitKernel.get_read_writes$  sS    g-/002 B!!,"6"6r{{}"EF !r   r   r   r  r
  r
  )r	  r{   rg  r   r   r   r  )r   r   r   r   r  r	  r  r?  r  rH  r   rA  rB  s   @r   r  r  L$  s     ,0P KOPP P &	P
 &P +P )P HP 
P P,/0 
 
* r   r  c                V   [        U [        [        45      (       a  [        U 5      $ [        U [        [
        45      (       a5  [        [        R                     " 5       nU  H  nU[        U5      -  nM     U$ [        U [        R                  5      (       a  [        U 5      $ [        5       $ r   )r   r8   r!   r3   r   r   r;   r   r#   r
  r  r
  r   r  r  s      r   r
  r
  $  s    !h%&&$Q''	At}	%	%u||$&A,Q//A 	Au||	$	$$Q''|r   c                V   [        U [        [        45      (       a  [        U 5      $ [        U [        [
        45      (       a5  [        [        R                     " 5       nU  H  nU[        U5      -  nM     U$ [        U [        R                  5      (       a  [        U 5      $ [        5       $ r   )r   r8   r!   r2   r   r   r;   r   r#   r
  r  r
  r%  s      r   r
  r
  $  s    !h%&&A	At}	%	%u||$&A#A&&A 	Au||	$	$A|r   )r   r   r   r   )r   r   r   r   )r   r   r   r  )r   r  r   z&Callable[[Sequence[_T]], Sequence[_T]])r   z&Callable[[Sequence[_U]], Sequence[_V]]r   z&Callable[[Sequence[_T]], Sequence[_U]]r   r$  r   )r   z(Sequence[Union[int, torch.SymInt, Expr]]r   zOptional[ShapeEnv]r   r  )r   Sequence[Union[int, Integer]]r   r  ry  )r   zLiteral[None]r   r   r   r   )r   r   r   r   r   r'  )r   r  r   r   r   zOptional[torch.Tensor])r  zOptional[Sequence[_T]]r   z Optional[Sequence[Optional[_T]]])r   z2Union[IRNode, OutputSpec, torch.device, None, str]r   r  )r   z&Union[IRNode, torch.device, None, str]r   r   )r   zUnion[Buffer, TensorBox]r0  r   r   r   )r;  r  r<  r  r=  r  r   r   )rP  r   rQ  z"Sequence[Union[int, torch.SymInt]]r   r   )r_  r  r   r   )re  r  r   r  )r   zUnion[Expr, Sequence[Expr]]r  r|  r   rl   )r  r   r  r|  r  r   r   r  )r  r  r  r
  rM  r!   r   r  r@
  )TFNFN)r   r   rK  r   rU  r   rV  rg  r6  r   rD  rg  r   ztuple[StorageBox, Layout])r   r   rV  r'  r   r   rs  )r  r  r=  r  r   r   )r  r|  r   r   )r   rN	  r   zTypeIs[Sequence[IRNode]])r;  r  r   r   )r   r	  r   z-tuple[list[ShapeAsConstantBuffer], list[Any]])r   r   r   r=  (L  
__future__r   r  r  r  r  loggingr  ostextwrapro  collections.abcr   r   r   r   r   r	   r
   enumr   r   typingr   r   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   r   r   r   r   unittest.mockr    r   r!   r"   r#   torch._export.serde.schema_exportserder   rR  torch._library.utilsr  rq  r  torch._loggingr  torch.fxtorch.utils._pytree_pytreer	  torch._dynamo.utilsr$   torch._export.serde.serializer%   *torch._higher_order_ops.auto_functionalizer&   torch._inductorr'   r,  r)   torch._prims_commonr*   r+   r,   r-   r.   torch._subclasses.fake_tensorr/   %torch.fx.experimental.symbolic_shapesr0   r1   r2   r3   r4   r5   r6   r7   r8   torch.fx.noder9   torch.utils._ordered_setr;   torch.utils._sympy.functionsr<   r=   r>   torch.utils._sympy.symbolr?   r  rA   rB   codegen.commonrC   rD   rE   rF   rG   rH   rI   rJ   rK   rL   	loop_bodyrM   ops_handlerrN   rO   rP   rQ   runtime.benchmarkingrR   runtime.hintsrS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   virtualizedrk   rl   rm   "torch._library.fake_class_registryrn   ro   rp   codegen.cuda.cuda_templaterq   codegen.wrapperrr   r  rs   rt   r   r   r$  __version__r  r  ImportErrorru   rv   rw   rx   r   ry   rG  rz   r	  r	  r	  r{   	getLoggerr   r  r  r	  getenvr  r  r   r   r   r   r8  r   r   r   r   r   r   r9  r;  r   r   r   r   r  r  r)  r+  r6  rA  rV  ra  rh  r   r  r  rH  rJ  rg  ry  r  r  r"  r  r  r  r  r  r  r!  r  rR  rK  r]  r_  rd  r   r  r  r  rd  rO  rR  rd  r  r  r  r  r  r  r  rL  r  ri  ry  r}  r  r  rY  r  ry  r  rb  r  r   rw  rz  r  r   r   PrimitiveInfoTyper  r  r	  r-	  r:	  rB	  rL	  rx  rn	  rt	  r  r
  r
  r	  r  r
  r
  r
  r
  r  rI  r`  rq  r  r  r  r  r  r  r   r   r  r  r  r  r>	  r  r  r   rX  r6  r<  r>  rX  r_  r}  r   r
  r_	  r	  r  r  r  r  r
  r
  r   r   r   <module>rR     s   "       	   N N :           ' ' 2 2 , ,   $ $ ( ? M # 2  :
 
 
  / L L * "     N N - :     0 * ) CB&85$% %L)$''NJ t_T]T]T]CI&) &C,-) -

 5 5uzz7U7U UVi V!			8??4	8yy~~bii 7<=2991378'T  k	sDk!12K8STU	i 	) d#  $$$D44 , ! $  TX	1>P	 TX
	1
>P

 
 N 
 N 
 O 
 O .2&*8!%	>9	>	>;('00,/0	00    
	.$G$G/$G $GN'u, u,p	 UH H HV A
F A
 A
H&  
  
  
F 
i 
 
F |$y!y!u=)< 8  JN<N<N +<NBF<N<N~ i
 i
 i
\ '+1:
#  &	& "8D>8D>"BH"LMY M7S9 7St#1 #L[
+ [
| E
5 E
 E
R 	 	 	 V5 V Vr	 !<@=A999 9 :	9
 9 ;9 9x:	$ ^
v ^
 ^
B N N Nb -( - -` :9( :9 :9z !( ! !H s; s sl Sh S Sl & & &R_A _AD 6  " K| K K$ S| S S'9	(<7 7  O
Z O
 O
dC& CRHV RHj!Gf !GHT %%{ %%P   .V* V*r UEV] E EP U&fi & & & 
K 
[ 
& 6  ( F    Uo4_ o4 o4dE
_ E
PM> M` #udCeCeT<Q6R1SST :$ :$z"| "
WB. WBt5 50( (4N >5455 UR? R Rj A9 AH Ut< t tn U"l " "J
/ 
((" ("V
V 
B=L =@%
 %
P
- 
$1
\ 1
hHl HV))| ))Z/, /d< 8B5 B$8- 82K)l K)\4)| 4)n3T 3Tl*3 *3Z-L -8<;< <;~ U  
^*& ^*B U
. 
 
<   2 
  '
, '
X T T Tn+
 +_% _%D U*v * *L Up.\ p. p.f UR , R  R j"
"2" Ud  d  d N	,n ,^6  =l = =B \  W Wt/) />/( />R# Rp  Ka  NJs   j 
j('j(