
    i                        % S SK r S SKrS SKrS SKrS SKJr  S SKJrJr  S SK	J
r
JrJrJrJrJrJrJrJrJr  S SKrS SKJr  S SKJr  S SKJr  S SKJrJr  S S	KJ r J!r!  S S
K"J#r#  S SK$J%r%  S SKJ&r&  S SK'J(r(J)r)J*r*  S SK+J,r,  S SK-J.r.  0 r/\\0\S\\1   4   4   \2S'    " S S5      r3S\RL                  RX                  S\44S jr5S\RL                  RX                  S\44S jr6 S`S\RL                  RX                  S\%S\1S\1S\\)   SS4S jjr7S\\
   S\\,   4S jr8 S`S\,S\\)   S\44S  jjr9S!\\,   S\\)   S\\%   4S" jr:S\\)   S\44S# jr; S`S!\\,   S\\)   S\\%   4S$ jjr<S!\\,   S\\%   4S% jr=         SaS!\\,   S\\)   S'\4S(\4S)\4S*\4S+\4S,\4S-\4S.\4S\\%   4S/ jjr> S`S0\RL                  R~                  S\\)   S\\%   4S1 jjr@\ " S2 S35      5       rA\ " S4 S55      5       rB\ " S6 S75      5       rC\ " S8 S95      5       rD SbS:\\B   S;\1S\14S< jjrES=\BS\%S\44S> jrFS=\BS\%S\14S? jrG ScS:\\B   S\%S@\4S\B4SA jjrHS\RL                  RX                  S\\\%   \\%   4   4SB jrIS0\RL                  R~                  S\44SC jrJ SbS&SD.SE\1SF\\%   S0\RL                  R~                  S\)SG\1S@\4S\D4SH jjjrK " SI SJ5      rLSE\1SF\\%   S0\RL                  R~                  S\)SG\1S\D4SK jrMS0\RL                  R~                  S\\,   4SL jrNS0\RL                  R~                  S\\,   4SM jrOS0\RL                  R~                  S\\,   4SN jrPS0\RL                  R~                  S\\,   4SO jrQS0\&R~                  S\\%   4SP jrRS0\&R~                  S\\%   4SQ jrSS0\&R~                  SR\\%   SS4SS jrTST\U\1   SU\U\1   S\U\1   4SV jrV  SdSW\S\U\1   4   SX\RL                  R~                  SE\1SY\RL                  RX                  S\\)   SZ\4S\U\1   4S[ jjrW S`SW\S\U\1   4   S0\RL                  R~                  SE\1S\\)   S\U\1   4
S\ jjrX    SeSW\S\U\1   4   S0\RL                  R~                  SE\1S\\)   SZ\4S]\4S^\4S\U\1   4S_ jjrYg)f    N)defaultdict)	dataclassfield)
AnyCallablecastDictIterableListOptionalSetTupleUnion)memory)
while_loop)executorch_call_delegate)internal_assertInternalError)is_inplace_variantis_out_variant)TensorShapeDynamism)
TensorSpec)fx)ConstantArgumentExportGraphSignature	InputKind)Node)tree_flatten.REGISTERED_ALGOSc                   F   \ rS rSrSr SS\R                  R                  S\S\S\S\	\
   S	S4S
 jjr\ SS\S\S\S	\4S jj5       r\S\\   S\\   S	\4S j5       r\S\S\S	\4S j5       r\S\S\S	\4S j5       r\S\S\S	\4S j5       r SS\S	\4S jjrSS jrSrg)Verifier0   z
Verify if the outcome of a memory planning algorithm makes sense.
E.g., make sure tensors having overlapping lifetime does not have overlapping
storage/buffer.
Ngraph_modulealloc_graph_inputalloc_graph_outputalloc_mutable_buffersgraph_signaturereturnc                 @    Xl         XPl        X l        X0l        X@l        g N)r#   r'   r$   r%   r&   )selfr#   r$   r%   r&   r'   s         ^/var/www/html/ai-image-ml/venv/lib/python3.13/site-packages/executorch/exir/memory_planning.py__init__Verifier.__init__7   s"     ).!2"4%:"    lhs_specrhs_specaccept_both_nonec                     UR                   UR                   :w  a  gUR                  c  UR                  c  U$ UR                  UR                  :H  $ )z
Given two `TensorSpec`, return if their `mem_obj_id` are the same. Note that if
both are None, this function will return True if `accept_both_none` is True and
False otherwise.
F)mem_id
mem_obj_id)clsr0   r1   r2   s       r,   mem_obj_id_matchVerifier.mem_obj_id_matchE   sM     ??hoo- &8+>+>+F##""h&9&999r/   lhs_ivlrhs_ivlc                     US   US   :  d  US   US   :  a  gUS   US   :  =(       a    US   US   :*  =(       d    US   US   :  =(       a    US   US   :*  $ )zW
The passed in intervals are inclusive in both sides. Return if they have
overlapping.
r      F )r6   r9   r:   s      r,   has_overlapVerifier.has_overlapW   st     1:
"gaj71:&=
gaj(EWQZ71:-E 
AJ'!*$Awqz)A	
r/   c                     UR                   nUR                   n[        US   S L=(       a    US   S LU S35        [        US   S L=(       a    US   S LU S35        U R                  X45      $ )Nr   r<   z  should have valid start and end)lifetimer   r>   )r6   r0   r1   lhs_lifetimerhs_lifetimes        r,   lifetime_overlapVerifier.lifetime_overlape   s    ((((O4'GLO4,Gj89	
 	O4'GLO4,Gj89	
 |::r/   c                    / nUR                   UR                   :w  a  gX4 H  n[        UR                  S:  U S35        [        [        UR                  [
        5      =(       a    UR                  S:  U S35        UR                  UR                  UR                  UR                  -   S-
  /5        M     U R                  " U6 nU$ )NFr   z& should have non-zero allocated memoryz$ should have specified memory offsetr<   )r4   r   allocated_memory
isinstance
mem_offsetintappendr>   )r6   r0   r1   	intervalsspecr>   s         r,   storage_overlapVerifier.storage_overlaps   s    	??hoo-(D%%*&>? 4??C0IT__5I&<= $//D4I4I"IA"MN ) ooy1r/   c                     SUR                    SUR                    S3nUSUR                   SUR                   SUR                   S3-  nUSUR                   SUR                   SUR                   3-  nU$ )Nzlhs life time: z, rhs lifetime:  zlhs: mem_id z
 storage: z, zrhs: mem_id )rA   r4   rI   rG   )r6   r0   r1   messages       r,   _debug_message_from_specs"Verifier._debug_message_from_specs   s    
 h//00@ARAR@SSTU 	 	\(//!2*X=P=P<QQST\TmTmSnnopp\(//!2*X=P=P<QQST\TmTmSnoor/   "allow_lifetime_and_storage_overlapc                    Sn[        [        U R                  R                  R                  U R
                  SU R                  (       + U R                  (       + U R                  (       + SSSS9	5      n[        U5       H  u  pEX4S-   S  H  nUR                  SL UR                  SL :w  a  [        S5      e[        R                  XV5      nU(       d  MJ  U(       d7  U R                  XV5      (       a!  [        S[        R                  XV5       35      e[        R!                  XV5      (       d  [        S	U S
U 35      eUS-  nM     M     U$ )a  
'allow_lifetime_and_storage_overlap' allows tensors to overlap in both
lifetime and storage. If is it False, and two tensors have both overlapping
lifetime and storage, throw an exception.
Returns:
    Number of pairs of tenors that have overlapping storage.
r   TF)ignore_constignore_graph_inputignore_graph_outputignore_mutable_buffersdo_assertionignore_out_var_nodededupr<   Nz4Specs do not agree on whether mem_obj_id is defined.zUnexpected storage overlap: z$Unexpected mem_obj_id mismatch: lhs z, rhs )listcollect_specs_from_nodesr#   graphnodesr'   r$   r%   r&   	enumerater5   r   r!   rN   rD   rS   r7   )r+   rU   num_reuse_pairs	all_specslhs_spec_idxr0   r1   has_storage_overlaps           r,   verify_storage_reuseVerifier.verify_storage_reuse   s]     $!!''--$$!'+'='=#=(,(?(?$?+/+E+E'E"$)

	 '0	&:"L%Q&6&89''4/X5H5HD5PQ'N  '/&>&>x&R#*9d>S>S? ? (6x7Y7YZb7m6no   00DD'>xjxjY   1$3 : ';8 r/   c                    ^ U R                   n[        UR                  R                  U R                  5      [        UR                  R                  5      -  nU(       a1  U R                  U R                  :w  a  [        R                  " S5        gSnSnSnSnSS1UR                  R                   Vs1 s H  owR                  iM     sn-  nSU;   d
   SU 35       e[        UR                  R                  U R                  5      mUR                  R                   H  n	U	R                  U;   d  M  [        U	5      =n
(       d  M)  [        XR                  5      (       a  ME  [        [        U4S jU
5      5      n
[!        U
5      S:X  a  U R                  nM{  [#        S	 U
 5       5      n[#        S
 U
 5       5      n[%        S U
 5       5      U:X  d   S5       eU	R                  S:X  a  UnX\-  nM  UnXl-  nM     ['        U R                  S9nSU;   a>  U(       a7  Uc   S5       eU(       d&  X0R                  :X  d   SU SU R                   35       eUc   S5       eU(       d'  X@R                  :X  d   SU SU R                   35       eggs  snf )z
alloc_graph_input / alloc_graph_output indicates if memory for graph
input/output is allocated by the compiler. If not, the runtime will
set them using buffers provided by users.
zlHaving overlapping graph input/output tensors while the allocation decision for graph input/output mismatch.NFplaceholderoutputzgraph module has no output: c                 >   > U R                   (       + =(       a    U T;  $ r*   )const)rM   mutable_buffer_specss    r,   <lambda>4Verifier.verify_graph_input_output.<locals>.<lambda>   s    ^ &= (<<&=r/   r   c              3   R   #    U  H  oS L =(       d    UR                   S Lv   M     g 7fr*   rI   .0rM   s     r,   	<genexpr>5Verifier.verify_graph_input_output.<locals>.<genexpr>  s&       LQDDL?DOO4$??E   %'c              3   r   #    U  H-  nUS L =(       d    UR                   [        R                  :H  v   M/     g 7fr*   )shape_dynamismr   DYNAMIC_UNBOUNDrs   s     r,   ru   rv     s>      1 !& DL R**.A.Q.QQR %s   57c              3   R   #    U  H  oS L =(       d    UR                   S Lv   M     g 7fr*   rr   rs   s     r,   ru   rv     s%     VPUCt(CCPUrw   z;Either all or non of the tensors should be allocated memory)r'   zgraph_input_allocated not setzMisallocate graph input: z v.s. zgraph_output_allocated not setzMisallocate graph output )r#   get_graph_input_tensorsr`   ra   r'   get_graph_output_tensorsr$   r%   loggingdebugop_get_mutable_buffer_specsget_node_tensor_specs_is_mutable_bufferr^   filterlenanyall_do_user_inputs_exist)r+   r#   overlapgraph_input_allocatedgraph_output_allocatedhas_dynamic_unbound_inputhas_dynamic_unbound_outputnode
check_listndspecs	allocatedhas_dynamic_unbound_tensoruser_inputs_existrn   s                 @r,   verify_graph_input_output"Verifier.verify_graph_input_output   s    (( *$$d&:&:
$\%7%7%=%=>? ..$2I2IIMM~  $!%$)!%*"#X. , 2 2 8 82
 8GG 82
 

 :%T)El^'TT%  9$$d&:&: 
 $$**Buu
"!6r!:::%b*>*>??= u:?-1-D-D*  LQ  	 .1 1 !&1 .* VPUVV !Q QQ! 55M),5)-K--6*.L.E +J 2$BVBVWJ&+<(4U6UU4,)-C-CCe./D.EVDLbLbKcdeC &1S3SS1)&*A*AAc*+A*B&I`I`HabcA *s2
s   3J)r$   r%   r&   r#   r'   r*   TF)r(   N)__name__
__module____qualname____firstlineno____doc__torchr   GraphModuleboolr   r   r-   classmethodr   r7   r   rJ   r>   rD   rN   strrS   rg   r   __static_attributes__r=   r/   r,   r!   r!   0   sc    ;?;hh**;  ; !	;
  $; ""67; 
; RV:!:-7:KO:	: :" 
$s) 
d3i 
D 
 
 ;
 ;j ;T ; ; z Z D  ( !-7	  :?7267	7rYcr/   r!   r   r(   c                 (   U R                   S:H  =(       a}    [        U R                  [        R                  R
                  5      =(       aH    [        U R                  R                  R                  U R                  R                  R                  5      $ Ncall_function)
r   rH   targetr   _ops
OpOverloadr   _schemanameoverload_namer   s    r,   _is_out_var_noder   (  sa    ?" 	Xt{{EJJ$9$9:	X4;;..33T[[5H5H5V5VWr/   c                 (   U R                   S:H  =(       a}    [        U R                  [        R                  R
                  5      =(       aH    [        U R                  R                  R                  U R                  R                  R                  5      $ r   )
r   rH   r   r   r   r   r   r   r   r   r   s    r,   _is_inplace_noder   0  sb    ?" 	
t{{EJJ$9$9:	
KK$$dkk&9&9&G&G
r/   rM   node_idxmax_node_idxgsc                     UR                   u  pVU R                  S:X  a  SnOUb  XR:  a  UOUnU R                  S:X  a  [        X5      (       a  UnOUb  Xb:  a  UOUnXV/Ul         g)a   
Update the lifetime of the tensor to cover node_idx. A tensor's lifetime
are represented by the index of the first and last node referring
that tensor in its inputs/outputs.

Arguments:
    spec: the TensorSpec for the tensor
    node_idx: extend the tensor's lifetime to cover node_idx
rj   r   N)rA   r   r   )r   rM   r   r   r   startends          r,   update_tensor_lifetimer   :  sd      JEww-!MU-=5ww-$6t$@$@+hSLDMr/   inputsc                     [        [        U 5      5      S    Vs/ s H  n[        U[        5      (       d  M  UPM     sn$ s  snf )zH
This method need return Node object embedded inside List/Dict as well.
r   )r   r^   rH   r   )r   r   s     r,   filter_nodesr   Y  s5     &d6l3A6O62*R:NB6OOOs
   A A r'   c                    Uc  gU R                   S:X  ap  [        U R                  [        5      (       aQ  U R                  UR                  ;   a7  UR                  U R                     nX!R
                  R                  5       ;   a  gg)zP
Check if the node is mutable buffer according to the provided graph signature.
Frj   T)r   rH   r   r   inputs_to_buffersbuffers_to_mutatevalues)r   r'   fqns      r,   r   r   `  sk     ww-dkk3''{{o???%77D;;BBDDr/   ra   c                     [        5       nU  H8  n[        X15      (       d  M  [        U5       H  nUR                  U5        M     M:     U$ r*   )setr   r   add)ra   r'   rn   r   rM   s        r,   r   r   s  sF     -0Ed44-d3$((. 4   r/   c                 n    U c  g[        [        S U R                  5      5      n[        S U 5       5      $ )NFc                 <    U R                   [        R                  :H  $ r*   )kindr   
USER_INPUT)inputs    r,   ro   '_do_user_inputs_exist.<locals>.<lambda>  s    %**	(<(<<r/   c              3      #    U  Hd  n[        UR                  [        5      (       + =(       d9    [        UR                  R                  [        [
        [        [        45      (       + v   Mf     g 7fr*   )rH   argr   valuerJ   floatr   r   )rt   r   s     r,   ru   (_do_user_inputs_exist.<locals>.<genexpr>  sR       !E uyy"233 	D%))//Cc+BCC	D s   A,A.)r^   r   input_specsr   )r'   user_inputss     r,   r   r   ~  sG    <''	
K   !  r/   c                     [        5       nU  HJ  nUR                  S:X  d  M  [        X15      (       a  M'  [        U5       H  nUR	                  U5        M     ML     U$ )Nrj   )r   r   r   r   r   )ra   r'   graph_input_tensorsr   rM   s        r,   r|   r|     sR     %77m#,>t,U,U-d3#''- 4 
 r/   c                     [        5       nU  H8  nUR                  S:X  d  M  [        U5       H  nUR                  U5        M     M:     U$ )Nrk   )r   r   r   r   )ra   graph_output_tensorsr   rM   s       r,   r}   r}     sE    577h-d3$((. 4 
  r/   TrX   rY   rZ   rW   r\   r]   r[   ignore_dynamic_unbound_tensorc
              #     #    [        5       n
U(       a  [        X5      O	[        5       nU(       a  [        U 5      O	[        5       n[        5       nU(       a  [        X5      nU  GH  nUR                  S;   a  M  U(       a  [        U5      (       a  M/  [        U5      =n(       d  MC  [        U5      (       a  MU  [        X5      (       a	  U(       a  Mn  U(       a  [        UR                  S;   =(       d    UR                  [        R                  [        R                  [        R                  [         R"                  R$                  R&                  [(        [         R"                  R$                  R*                  [,        4;   SUR                   SUR                   35        U H  nUc  M  U	(       a   UR.                  [0        R2                  :X  a  M/  U(       a  UU;   a  M>  U(       a  UU;   a  MM  U(       a  UU;   a  M\  U(       a4  UR4                  (       a#  UR6                  R9                  SS5      (       d  M  U(       a  UU
;   a  M  U
R;                  U5        Uv   M     GM     g7f)a  
Collect specs from the passed in nodes. Do filtering as controlled by
arguments.
Arguments:
    ignore_graph_input: ignore graph input tensors from placeholder nodes
    ignore_const: whether to ignore the const
    ignore_out_var_node: whether to ignore out variant node
    dedup: whether do dedup
    do_assertion: whether to assert the filtered nodes belong to a resticted set like alloc, getitem
)get_attr)rj   rk   zUnexpected op z	, target Nweight_has_gradientF)r   r|   r}   r   r   r   r   r   r   r   r   r   allocviewoperatorgetitemr   opshigher_ordercond
exir_whilemap_implr   ry   r   rz   rm   metagetr   )ra   r'   rX   rY   rZ   rW   r\   r]   r[   r   unique_specr   r   rn   r   r   rM   s                    r,   r_   r_     s    , %K;M7SVSX  ,? 'CE  -0E8P77l" #3D#9#9.t444D!!d449O44 
;;LLKK$$II**//II**33,	 !	4;;-@ D| .''+>+N+NN
 "d.A&A"t/C'C%$2F*FJJ		&;UCC;&OOD)J? E s   I#I%r#   c                    [        5       n[        U R                  R                  5      S-
  n[	        U R                  R                  5       H|  u  pE[        [        [        R                  " U/UR                  UR                  R                  5       5      5      USSSSSSS9 H!  n[        XVXCU5        UR                  U5        M#     M~     U$ )zC
Set the lifetime for all the tensors encountered in the Fx graph.
r<   F)rX   rW   r\   r]   r[   r   )r   r   r`   ra   rb   r_   r   	itertoolschainargskwargsr   r   r   )r#   r'   r   r   r   r   rM   s          r,   update_all_tensors_lifetimer     s     EE|))//014L#L$6$6$<$<=,$DKK<N<N<PQR$ %*/	
D #4xWIIdO	
 > Lr/   c                   .    \ rS rSr% Sr\\S'   \\S'   Srg)AllocationSpeci,  zA
AllocationSpec is used to represent the allocation of a tensor.
offsetrM   r=   N)	r   r   r   r   r   rJ   __annotations__r   r   r=   r/   r,   r   r   ,  s    
 K
r/   r   c                   v    \ rS rSr% Sr\\S'   \\S'   \\S'   \\S'   \\S'   \" \S9r	\
\   \S	'   S
\4S jrSrg)SharedObjecti8  aC  
We define the concept of shared object, which represents a segment
in the memory buffer that can be shared by multiple tensors. In order to
check if a shared object is available for a tensor, we maintain the
last_used_index attribute. The shared object will be available for nodes
with index greater than last_used_index.
idxr   sizefirst_used_indexlast_used_index)default_factoryallocationsr(   c           	          SU R                    SU R                   SU R                   SU R                  U R                  4 S3	$ )NzSharedObject(idx=z	, offset=z, size=z, lifetime=[z]))r   r   r   r   r   )r+   s    r,   __repr__SharedObject.__repr__O  s_    "488*Idkk]'$))T`aeavavx|  yM  yM  bM  aN  NP  Q  	Qr/   r=   N)r   r   r   r   r   rJ   r   r   r^   r   r   r   r   r   r   r=   r/   r,   r   r   8  sF     
HK
I(-d(CKn%CQ# Qr/   r   c                   8    \ rS rSr% Sr\\S'   \\S'   \\S'   Srg)SpecAllocResultiS  a!  These are the values that a memory plannig algorithm assigns to a spec.
These are not directly written back into the spec object, but are used to
track the allocation decisions and assigned back to the spec object in the
end, based on which algorithm is picked as the best performing one.
r4   r5   rI   r=   N)r   r   r   r   r   rJ   r   r   r=   r/   r,   r   r   S  s     KOOr/   r   c                   >    \ rS rSr% Sr\\\4   \S'   \	\
   \S'   Srg)MemoryAlgoResulti`  a  This is the result returned by a memory planning algorithm that is
invoked by memory_planning_algorithm_suite. It contains the allocation
decisions of that algorithm for all the specs, and the size of the buffer
that was used for different memory hierarchies.
	spec_dictbufsizesr=   N)r   r   r   r   r   r	   r   r   r   r   rJ   r   r=   r/   r,   r   r   `  s#     J/003ir/   r   shared_objectsinput_total_sizec                 D    UnU  H  nX#l         X#R                  -  nM     U$ )z
Assign concrete location in the buffer for each SharedObject.offset.

Assuming all the passed in shared objects belong to the same memory buffer.
)r   r   )r   r   
total_sizesobjs       r,   materialize_bufferr  l  s,     "J ii
  r/   r  c                     U R                    H[  nUR                  S   UR                  R                  S   :  a  M/  UR                  S   UR                  R                  S   :  a  M[    g   g)z7
Check if a shared object and a tensor do not overlap.
r<   r   FT)r   rA   rM   )r  rM   r   s      r,   _does_not_overlapr  {  s\     !!MM!uzz22155}}Q%**"5"5a"88 " r/   c                 6   SnU R                    H  nUR                  S   UR                  R                  S   :  d*  UR                  S   UR                  R                  S   :  a  MY  [        UR                  UR                  R
                  -   U5      nM     U$ Nr   r<   )r   rA   rM   maxr   rG   )r  rM   
max_offsetr   s       r,   (_find_max_overlapping_allocations_offsetr    s     J!!MM!uzz22155}}Q%**"5"5a"88

(C(CCZP
 " r/   allow_overlapping_allocationsc                 *   SnU  H  n[        XA5      (       d  M  UR                  UR                  :  d   S5       eUn[        UR                  UR
                  S   5      Ul        [        UR                  UR
                  S   5      Ul        [        SU5      nUR                  R                  U5          O   Uc  U(       a  U  H  n[        XA5      nUS:  d  M  XaR                  -   UR                  ::  d  M4  Un[        UR                  UR
                  S   5      Ul        [        UR                  UR
                  S   5      Ul        [        Xa5      nUR                  R                  U5          O   Uc  [        [        U 5      SUR                  UR
                  S   UR
                  S   5      n[        SU5      nUR                  R                  U5        UR
                  S   Ul        UR
                  S   Ul        U R                  U5        U$ )a  
Pick the available shared object to which to assign this spec,
or create a new one
Algorithm details
Previous: Look at every spec in chronological order. Find if previously allocated object
allows it to fit in. If not, allocate a new object.
New:
- Sort all the specs by allocation size
- Process the specs in order
- If the spec's size in smaller than previously allocated buckets:
    - Conditions under which previously allocated bucket can be used:
      - Lifetime of the spec does not overlap with lifetime of the bucket.
          - In this case allocate spec to that bucket and expand its lifetime.
          - Spec is allocated at offset = 0 in this bucket.
          - Add this spec to allocated object's list of specs.
      - Lifetime of the spec overlaps with lifetime of the bucket,
        partially or fully (e.g. spec's lifetime subset of bucket's lifetime)
          - If none of the specs in the bucket overlaps with spec's lifetime.
            - Allocate spec to the bucket at offset = 0.
            - Add this spec to the bucket's list of specs.
            - Expand bucket's lifetime accounting for added spec's lifetime.
          - If one or more specs in the bucket overlaps with spec's lifetime.
            - Collect offsets (at which the given overlapping spec is allocated in the bucket).
              of all the overlapping specs, and find the max offset.
            - Allocate spec to the bucket at offset = max_offset + max_offset_spec_size.
            - Add this spec to the bucket's list of specs.
            - Expand bucket's lifetime accounting for added spec's lifetime.
    - If none of these conditions are met, allocate a new bucket.
        - Add spec to this bucket.
        - Update bucket's lifetime to that of the spec.
- If the spec's size is larger than previously allocated buckets, allocate a new bucket.
    - Size and lifetime of this bucket is that of the spec

Proof of correctness:
- If allocating a new bucket, it is correct.
- If allocating spec to an existing bucket, whose lifetime does not overlap with any
  of the previously allocated specs' lifetime, then the allocation is correct.
Proof of correctness by induction when adding spec to an existing bucket:
- If all previous allocations in the given bucket are correct:
    - Then the new one being added must be correct because when the requested allocation
      overlaps with one or more previous allocations, we find the largest offset among
      all the overlapping allocations, and allocate the new spec at that offset. Hence,
      the allocation at such an offset, will not overlap with any previous allocations.
Base case: A newly added allocation within a bucket with single allocation is correct:
because a) it must fit and b) its lifetime must not overlap with object's lifetime.
This holds true because of the following invariants:
- Once a bucket is created, it is never resized.
- All the allocations within a bucket follow this:
  - Span, defined by allocation's offset + size, of two allocations can only overlap,
    if their timelines do not overlap.
NzAllocation specs are not sortedr   r<   )r  r   rG   minr   rA   r	  r   r   r   rK   r  r   r   )r   rM   r  pickedr  allocation_specr
  s          r,   pick_shared_objr    s   p FT((99 5 55X7XX5F$'(=(=t}}Q?O$PD!#&t';';T]]1=M#ND ,Q5O%%o6  ~7"DA$MJA~ 5 55B!F,/0E0Et}}UVGW,XD)+.t/C/CT]]STEU+VD(&4Z&FO&&--o> # ~!!MM!MM!
 )D1!!/2"&--"2!%q!1f%Mr/   c                    U R                   [        R                  :X  aV  U R                  S   n[	        U[
        R                  R                  5      (       d   eUR                  R                  S5      nOU R                  R                  S5      n[	        U[        5      (       a  U/n[	        U[        [        45      (       d  / $ U Vs/ s H6  n[	        U[        [        [        [         [#        S5      45      (       a  M4  UPM8     sn$ s  snf )za
Return the list of the tensor specs for the node or empty list if the node
has no tensor specs.
r   rM   N)r   r   r   r   rH   r   r   r   r   r   r   r^   tuplerJ   r   r   r   type)r   baser   rM   s       r,   r   r     s     {{fkk!yy|$....		f%		f%%$$edE]++	 
dS%sDJ$GH 
 	
 
s   	3D	 D	c                    U R                   R                   Hp  nUR                  [        :X  d  M  [	        U R                   R
                  UR                  S   R                  5      nSUR                  R                  5       ;   d  Mp    g   g)Nr   xnnpackTF)	r`   ra   r   r   getattrowning_moduler   
backend_idlower)r#   r   lowered_modules      r,   _contains_xnnpack_delegater    so    ""((;;22$""00$))A,2E2EN N55;;== ) r/   )r  	alignmentr   extra_paddingc          	         [        0 / 5      n0 n[        [        5      nSSKn	/ n
U H  nU	R	                  XS S9  M     U
R                  5         U
 H  nUR                  R                  U[        SSS5      5      nUR                  c  SUl	        OUR                  Ul	        XR                  U'   UR                  U 5        [        XR                     UU5      X{'   M     [        U5      S:X  a  SS/nGO6S/[        UR                  5       5      S-   -  nSnU H  nSn[        USS5      =n(       a+  [!        U[        5      (       d   e[        U5      U:  a  UU   n[#        X   U5      X'   X==   U-  ss'   X    H  nUR$                   Hm  nUR&                  nUR                  R                  US5      nUc   SU S35       eUR(                  Ul        UR,                  UR,                  -   Ul        US-  nMo     M     M     [        U5      U:X  d   S	[        U5       S
U S35       e[0        R2                  " SU 35        Xl        U$ )a  Greedy algorithm to allocate memory for tensors in the graph.

Args:
    alignment: Memory alignment requirement
    specs: Set of TensorSpec objects with updated lifetimes
    graph_module: Graph module
    graph_signature: Graph signature
    extra_padding: Additional padding to add to each memory buffer (in bytes)
    allow_overlapping_allocations: If set to true, allows for allocations that overlap
        in their lifetime but are at different offsets in the storage. By default true.
        This flag is added to allow for Vulkan to use MemoryPlanningPass with overlapping
        allocations disabled

Returns:
    MemoryAlgoResult containing the allocation decisions
r   Nc                     U R                   $ r*   )rG   )xs    r,   ro   greedy.<locals>.<lambda>E  s
    8J8Jr/   keyr<   input_mem_buffer_sizeszSpec z not found.z-All specs should be processed but there were z specs and processed z specsz#greedy algorithm returns bufsizes: )r   r   r^   bisectinsortreverser   r   r   r4   realignr  r   r	  keysr  rH   r  r   rM   r   r5   r   rI   r~   r   r   )r  r   r#   r'   r   r  greedy_resultspec2objr   r(  sorted_specsrM   spec_alloc_resulttotal_sizesnum_specs_processedr4   r   r   r  r   s                       r,   greedyr3  !  ss   2 %R,MH &N
 Ll.JK   *3377oaQRTU>VW;;'($'+{{$(9%Y(334)
   >a !fcS!4!4!67!;<$F "<1I4PPxP!(D1111x=6)'/'7$"4&(8#K =0
 '.!--E ::D )6(?(?(C(CD$(O%,8SE${:SS83788%037;;3M%0'1,' . / %2 M00	{:3x=/I^_r^ssyz	{0 MM7}EF(r/   c                       \ rS rSr SS\\\S\4         SS4S jjrS\	S\
\   S	\R                  R                  S
\S\	S\\	   4S jrSrg)MemoryPlanningAlgorithmSuitei  N	algo_list.r(   c                 $    Uc  [         /nXl        g r*   )r3  r6  )r+   r6  s     r,   r-   %MemoryPlanningAlgorithmSuite.__init__  s     I@Ir/   r  r   r#   r'   r   c                   ^ 0 mU R                    HU  n[        U[        R                  5      (       a  UR                  R
                  nO[        USS5      nU" UUUUU5      TU'   MW     [        TR                  5        Vs1 s H  n[        UR                  5      iM     sn5      S:X  d   S5       e[        TU4S jS9n	[        R                  " SU	 35        TU	   R                  n
TU	   R                   HH  nTU	   R                  U   nUR                  Ul        UR                  Ul        UR                   Ul        MJ     U
$ s  snf )a  
Memory planning algorithm suite that runs a list of memory planning algorithms
and returns the result of the algorithm that minimizes the total memory usage.

Args:
    graph_module: The graph module to allocate memory for
    alignment: Memory alignment requirement
    graph_signature: Optional graph signature
    alloc_graph_input: Whether to allocate memory for graph input
    alloc_graph_output: Whether to allocate memory for graph output
    allow_overlapping_allocations: Whether to allow overlapping allocations
    algo_list: List of memory planning algorithms to run
    specs: Optional set of TensorSpec objects with updated lifetimes. If None, they will be
        calculated from the graph_module.

Returns:
    List of buffer sizes for each memory hierarchy
r   Nr<   zVDifferent memory planning algorithms should have the same number of buffers allocated.c                 4   > [        TU    R                  5      $ r*   )sumr   )kmem_algo_resultss    r,   ro   7MemoryPlanningAlgorithmSuite.__call__.<locals>.<lambda>  s    C0@0C0L0L,Mr/   r%  z,Best memory planning algo for this model is )r6  rH   	functoolspartialfuncr   r  r   r   r   r  r~   r   r   r4   rI   r5   )r+   r  r   r#   r'   r   algor   mem_algo_result	best_algor   rM   r0  r=  s                @r,   __call__%MemoryPlanningAlgorithmSuite.__call__  s\   6 NND$	 1 122yy))tZ6%)&T" #   ,<+B+B+D+D 001+D 	d d	d "M
	 	DYKPQ#I.77 %Y/99D 0 ; E Ed K+22DK/::DO/::DO	 : /s    E)r6  r*   )r   r   r   r   r   r   r   r   r-   rJ   r   r   r   r   r   r   rE  r   r=   r/   r,   r5  r5    s     FJJD#/?*?!@ABJ 
JDD :D hh**	D
 .D D 
cDr/   r5  c           	      (   [        0 / 5      nS[        [           S[        S[        S[        4S jn[        USS5      nUc  SS/n[	        [        [           U5      nU H  nUR
                  R                  U[        SSS5      5      n	UR                  c  S	U	l        OUR                  U	l        XR
                  U'   UR                  U 5        U" XyR                  UR                  5      U	l        M     [        R                  " S
U 35        Xul        U$ )a  Naive algorithm to allocate memory for tensors in the graph.

This algorithm simply allocates memory for each tensor sequentially without reusing memory.

Args:
    alignment: Memory alignment requirement
    specs: Set of TensorSpec objects with updated lifetimes
    graph_module: Graph module
    graph_signature: Graph signature
    extra_padding: Additional padding to add to each memory buffer (in bytes)

Returns:
    MemoryAlgoResult containing the allocation decisions
r   r4   r   r(   c                     U[        U 5      :  a$  U R                  S/U[        U 5      -
  S-   -  5        X   nX==   U-  ss'   U$ r  )r   extend)r   r4   r   rets       r,   _allocate_bufnaive.<locals>._allocate_buf  sJ    S]"OOQC6CM#9A#=>?I%
r/   r'  Nr   r<   z"naive algorithm returns bufsizes: )r   r   rJ   r  r   r   r   r   r4   r+  rG   rI   r~   r   r   )
r  r   r#   r'   r   naive_resultrK  r   rM   r0  s
             r,   naiverN    s   * $B+LS	 3 3 3  |%=tDHq6DIx(H(2266t_QPQST=UV;;'($'+{{$'8t$ 	Y'4..0E0E(
$   MM6xjAB$r/   c              #      #    U R                   R                   H:  nUR                  [        R                  R
                  R                  L d  M6  Uv   M<     g 7fr*   )r`   ra   r   r   r   r   r   r#   r   s     r,   get_cond_nodesrQ    <       &&99		..333H '   A	A	Ac              #   x   #    U R                   R                   H  nUR                  [        L d  M  Uv   M     g 7fr*   )r`   ra   r   r   rP  s     r,   get_while_nodesrU    s,       &&99
"H 's   +:	:c              #      #    U R                   R                   H:  nUR                  [        R                  R
                  R                  L d  M6  Uv   M<     g 7fr*   )r`   ra   r   r   r   r   r   rP  s     r,   get_map_nodesrW    s<       &&99		..777H 'rS  c              #      #    U R                   R                   H:  nUR                  [        R                  R
                  R                  L d  M6  Uv   M<     g 7fr*   )r`   ra   r   r   r   r   scanrP  s     r,   get_scan_nodesrZ    rR  rS  c                    [        5       nU R                  R                  n[        U5      S:  aP  [	        [        [        U5      5      5      n[        UR                  S   5      S    H  nUR                  U5        M     U$ )Nr   rM   )
r   r`   ra   r   nextiterreversedr   r   r   )r#   return_specsra   	last_noderM   s        r,   get_return_specsra  #  si    5L$$E
5zA~huo./	 !78;DT" <r/   c                     [        5       nU R                  R                  nU HH  nUR                  S:X  d  M  [	        UR
                  S   5      S    H  nUR                  U5        M     MJ     U$ )Nrj   rM   r   )r   r`   ra   r   r   r   r   )r#   r   ra   r   rM   s        r,   get_input_specsrc  -  sa    %K$$E77m#$TYYv%67:% ;  r/   allspecsc                    [        U 5      n[        U 5      n[        [        5      nU HQ  nUR                  [
        R                  :X  d  M#  XR;  d  M*  XS;  d  M1  XER                  S      R                  U5        MS     [        U R                  R                  5      n[        [        U R                  R                  5      [        US-
  SS5      5       Hx  u  pxUR                  U/ 5      n	U	(       d  M   U R                  R!                  U5         U	 H.  nU R                  R#                  [$        R&                  U45        M0     SSS5        Mz     U R)                  5         g! , (       d  f       M  = f)a  
Insert calls to free for dynamic unbound tensors that goes out of lifetime.

Only handle the module itself. Submodule is handles in separate calls of
this function.

NOTE: this method will invalidate lifetime recorded in TensorSpec because
of extra free node added to the graph.
r<   r  N)ra  rc  r   r^   ry   r   rz   rA   rK   r   r`   ra   zipr^  ranger   inserting_afterr   r   free	recompile)
r#   rd  r_  r   idx_to_dead_specsrM   	num_nodesr   r   
dead_specss
             r,   insert_calls_to_freern  7  s+    $L1L ",/K#D)#6#F#FF('mmA./66t<  L&&,,-I ##))*E)a-R,H '**8R8
//5"""00tgF # 65  65s   5E  
E/	r   new_bufsizesc                     [        U 5      [        U5      :  a*  U R                  S/[        U5      [        U 5      -
  -  5        [        [        U5      5       H  n[        X   X   5      X'   M     U $ )zCombine two buffer size lists.r   )r   rI  rg  r	  )r   ro  is      r,   _merge_bufsizesrr  `  s_    
8}s<((s<03x=@AB3|$%(+|7 &Or/   rB  parent_graph_modulesubmodule_noder$   c           	      >   UR                   S:X  d   e[        XR                  5      n[        R                  " SUR
                   S35        [        U UUUUSS9nUR                  R                  SU05        [        R                  " SUR
                   SU 35        U$ )	z7Apply algo to nodes in a submodule of the graph module.r   zPlanning memory for submodule z...T)r$   r%   non_const_buffer_sizeszBuffer sizes for submodule z: )	r   r  r   r~   r   r   
apply_algor   update)rB  rs  r  rt  r'   r$   	submoduler   s           r,   _handle_submodulerz  i  s     
***+-B-BCIMM2>3F3F2GsKL+H NN3X>?MM/0C0C/DBxjQROr/   c                   ^ ^^^^	 / m	 SS[         R                  R                  S[        SS4U UU	UU4S jjjn[	        T5       Hq  nU" [        [         R                  R                  UR                  S   5      5        U" [        [         R                  R                  UR                  S   5      5        Ms     [        T5       Hq  nU" [        [         R                  R                  UR                  S   5      5        U" [        [         R                  R                  UR                  S   5      5        Ms     [        T5       H9  nU" [        [         R                  R                  UR                  S   5      S	S
9  M;     [        T5       H9  nU" [        [         R                  R                  UR                  S   5      S	S
9  M;     T	$ )zApply algo to map/cond/while/scan nodes in the graph module.

This method will popuate graph_module.meta["non_const_buffer_sizes"] for
all submodules and return a bufsizes list that is the maximum size of all
buffers.
rt  r$   r(   Nc           	      :   > [        TTTU TUS9n[        TU5        g )Nr$   )rz  rr  )rt  r$   current_bufsizesrB  r  r   r#   r'   s      r,   _handle*_apply_algo_to_submodules.<locals>._handle  s/     -/
 	"23r/   r<      r   Tr}  r   )
r   r   r   r   rQ  r   r   rU  rW  rZ  )
rB  r#   r  r'   r  	cond_node
while_nodemap_node	scan_noder   s
   ````     @r,   _apply_algo_to_submodulesr    sJ    H #(444 
4 4 $L1	UXX]]INN1$567UXX]]INN1$567 2 &l3
UXX]]JOOA$678UXX]]JOOA$678 4 ",/UXX]]HMM!$45N 0 $L1	UXX]]INN1$56$O 2 Or/   r%   r&   c           	      2   [        X5      n[        UR                  R                  USU(       + U(       + U(       + S9n[	        XX#5      n	Xl        Sn
[        U5      (       a  Sn
U " UUUUU
5      n[        X5        UR                  R                  SU05        U$ )a  
Recursively apply algo to graph_module and its submodules for control flow.

Algo implementation should handle one of two meta entries for submodules:
1. input_mem_buffer_sizes: List of int offset bytes. Memory allocated by
   `algo` should start at the offset specified by this list;
OR
2. non_const_buffer_sizes: List of bufsizes for planned memory in submodule.
   `algo` should reserve the space specified by this list for the lifetime
   of the submodule node (e.g. cond, while, map).

TODO: Missing optimizations:
1. To handle maps, we set `alloc_graph_input=True`, which allocates
appropriate space for mapped arg but ends up allocating extra space for
`operand` arg. The memory for operands is unused.
F)r[   rX   rY   rZ   r   @   rv  )
r   r_   r`   ra   r  r'  r  rn  r   rx  )rB  r#   r  r'   r$   r%   r&   _r   submodule_bufsizesr   r   s               r,   rw  rw    s    8 	$LBA %  00 22#88E 3I +=' M!,// H -6ABOr/   r*   )	NFFFTTTTT)r   r   )NF)NTTT)Zr?  r   r~   r   collectionsr   dataclassesr   r   typingr   r   r   r	   r
   r   r   r   r   r   r   executorch.exirr   executorch.exir.control_flowr   r   executorch.exir.delegater   executorch.exir.errorr   r    executorch.exir.operator.convertr   r   executorch.exir.schemar   executorch.exir.tensorr   r   torch.export.exported_programr   r   r   torch.fxr   torch.utils._pytreer   r   r   rJ   r   r!   r   r   r   r   r   r   r   r   r|   r}   r_   r   r   r   r   r   r   r  r  r  r  r   r  r3  r5  rN  rQ  rU  rW  rZ  ra  rc  rn  r^   rr  rz  r  rw  r=   r/   r,   <module>r     s>       # (    " A = @ O 6 -  
  ,8: $sHS$s)^445 :uc ucp588== T 588== T  *.!
((--!
! ! 	!
 	%&! 
!>P# P8D> P CG
!)*>!?	& D> ,45I,J _ 84H+I d 0 NR	D>	,45I,J	_	 HTN  s:   7;$ %#( $*.eD>e23e e 	e
 !e e e e e $(e jeT 7;((&&23 	_2    Q Q Q4 	 	 	    AB&:=
L 

 
t 

(" +/]&]
] $(] 	]@

((--

4
U:../
@UXX-A-A d   ] +/]]z?] ((&&] *	]
 ] $(] ]@M M`77z?7 ((&&7 *	7
 7 7t!5!5 (4. %(("6"6 8D>  4 4 $ !5!5 (4. 2>> c*o ".. S_ &..&,/
O&	&Rd3i tCy T#Y  7;#
3S	>
"--  HHMM	
 23  
#Y< 7;	0
3S	>
"0((&&0 0 23	0
 
#Y0n 7;"#"&G
3S	>
"G((&&G G 23	G
 G G  G 
#YGr/   