
    9i                       % S SK r S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	rS SK
Jr  S SKJrJr  S SKJrJrJrJrJr  S SKrS SKrS SKrS SKJr  S SKJs  Jr  S SKJrJ r   S SK!J"r"  S SK#J$r%  S SK&J'r'  S S	K(J)r)  S S
K*J+r+  S SK,J-r-J.r.  S SK/J0r0J1r1  S SK2J3r3J4r4J5r5J6r6J7r7J8r8  S SK9J:r:  S SK;J<r<  S SK=J>r>  SSK?J$r$  SSK@JArA  SSKBJCrCJDrDJErE  SSKFJGrG  SSKHJIrIJJrJ  SSKKJLrL  SSKMJNrNJOrO  SSKPJQrQJRrRJSrS  \(       a  S SKTrT\$R                  rV\W\XS'   \R                  " \Z5      r[\R                  \XS'   \R                  R                  r^\R                  R                  r_\ " S S5      5       r`\ " S S5      5       ra\ " S S 5      5       rbS!\R                  S"\W4S# jrdS$\R                  S"\W4S% jrfS$\R                  S"\W4S& jrgS!\R                  S"\h4S' jri " S( S)5      rj\j" 5       rk SS*\R                  S+\m\R                     S,\m\R                     S-\m\I   S.\\n   S"\R                  4S/ jjroS!\R                  S"\W4S0 jrpS!\R                  S"\W4S1 jrqS!\R                  S"\W4S2 jrrS!\R                  S"\W4S3 jrsS!\R                  S"\W4S4 jrtS!\R                  S"\W4S5 jruS!\R                  S"\W4S6 jrvS!\R                  S"\W4S7 jrwS!\R                  S"\W4S8 jrxS!\R                  S"\W4S9 jryS:\R                  S"\z\m\R                     \m\R                     \m\I   \m\I   4   4S; jr{S<\m\R                     S=\n4S> jr|S?\\m\R                     \z\R                     4   S"\h4S@ jr}  SSA\R2                  R                  S!\R2                  R                  SB\~SC\~4SD jjrSA\R2                  R                  S!\R2                  R                  SE\R2                  R                  SF\GR                   SG\~SH\~S"\R2                  R                  4SI jrSJ\GR                  S"\~4SK jrS"\m\GR                      4SL jrS!\R2                  R                  S"\W4SM jrS"\GR                   4SN jrSO\GR                   S"\z4SP jrSA\R2                  R                  S"S4SQ jrSA\R2                  R                  S"S4SR jrSS\R                  ST\R                  SU\\n\R                  4   S"S4SV jr SS<\m\R                     SS\R                  ST\R                  SW\\<\R                        S"S4
SX jjrSSY.S:\R                  S<\m\R                     SZ\m\R                     S[\hSW\\<\R                        S"\z\R                  \R                  4   4S\ jjrSSS].S:\R                  S^\\m\h      SW\\<\R                        S"\z\R                  \R                  4   4S_ jjr\h" S`5      rSa\hS"\h4Sb jrS!\R                  S"\h4Sc jrSA\R                  4Sd jr\GR&                  Se 5       rSf\\R                  \h4   S"\m\z\R                  \h4      4Sg jrSh\R                  S"\R                  4Si jrSj\R2                  R                  Sk\R2                  R                  Sl\R2                  R                  Sm\R2                  R                  Sn\GR.                  So\hSp\R2                  R                  Sq\R2                  R                  4Sr jrS:\R                  Sj\R                  Sk\R                  Ss\hS"\z\R                  \R                  4   4
St jrS:\R                  S"S4Su jrS:\R                  S"S4Sv jrS:\R                  S"\R                  4Sw jr SS*\R                  Sx\aSy\bSz\\<\R                        4S{ jjrS| rS"\`4S} jrSA\R                  4S~ jrS*\R                  S\m\~   S\m\~   S\~Sx\aS\m\R                     S"\z\~\m\h   \m\h   4   4S jrS SKJr  S\GR                  S\hS"\GR                  4S jrS r SS*\R                  Sx\aS"\m\R                     4S jjrS*\R2                  R                  S<\m\R2                  R                     4S jrS r SSS.S:\R                  S^\\m\h      S"\z\R                  \R                  4   4S jjjr     SS\R2                  R                  S\nS\nS\WS\\\n\m\n   4      S\WS\\n   S"S4S jjrg)    Ndefaultdict)	dataclassreplace)AnyCallableOptionalTYPE_CHECKINGUnion)countersis_node_meta_valid)(create_structured_trace_for_min_cut_info)config)trace_structured)extract_tensor_metadata)BackwardState)is_sym_nodepy_sym_types)magic_methodsmethod_to_operator)find_symbol_binding_fx_nodesfree_symbolshint_intis_symbol_binding_fx_nodestatically_known_falsestatically_known_true)graph_drawer)
OrderedSet)CheckpointPolicy   )GraphInfoProvider)dp_knapsackgreedy_knapsackilp_knapsack)KnapsackEvaluator)	AOTOutputSavedForBackwardsAOTOutput)get_aot_graph_name)get_cuda_generator_meta_valis_with_effects)fx_graph_cseget_aten_targetraise_getitemsAOT_PARTITIONER_DEBUGlogc                      \ rS rSr% Sr\\   \S'   \\   \S'   \\   \S'   \\   \S'   \\   \S'   S\R                  4S	 jr
S\R                  4S
 jrS\R                  4S jrS\R                  4S jrS\R                  4S jrSrg)OpTypesD   z8Class for keeping track of different operator categoriesfusible_opscompute_intensive_ops
random_opsview_opsrecomputable_opsnodec                 2    [        U5      U R                  ;   $ N)r,   r3   selfr8   s     ]/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/torch/_functorch/partitioners.py
is_fusibleOpTypes.is_fusibleN   s    t$(8(888    c                 2    [        U5      U R                  ;   $ r:   )r,   r4   r;   s     r=   is_compute_intensiveOpTypes.is_compute_intensiveQ   s    t$(B(BBBr@   c                 2    [        U5      U R                  ;   $ r:   )r,   r5   r;   s     r=   	is_randomOpTypes.is_randomT   s    t$77r@   c                 2    [        U5      U R                  ;   $ r:   )r,   r6   r;   s     r=   is_viewOpTypes.is_viewW   s    t$55r@   c                 2    [        U5      U R                  ;   $ r:   )r,   r7   r;   s     r=   is_recomputableOpTypes.is_recomputableZ   s    t$(=(===r@    N)__name__
__module____qualname____firstlineno____doc__r   r   __annotations__fxNoder>   rB   rE   rH   rK   __static_attributes__rM   r@   r=   r1   r1   D   s    BH%%%h//8$$"" **9rww 9C C8bgg 86BGG 6>BGG >r@   r1   c                      \ rS rSr% \\R                     \S'   \\R                     \S'   \\R                     \S'   \\R                     \S'   \	\R                  \
4   \S'   \\R                     \S'   \R                  S\\R                     4S	 j5       rS
\R                  S\4S jrS
\R                  S\4S jrS
\R                  S\4S jrS
\R                  S\
4S jrSrg)NodeInfo^   inputs_required_fw_nodesrequired_bw_nodesunclaimed_nodesfw_orderstatic_lifetime_input_nodesreturnc                 B   ^  [        S T R                   5       U 4S jS9$ )Nc              3   $   #    U  H  ov   M     g 7fr:   rM   .0ns     r=   	<genexpr>-NodeInfo.required_fw_nodes.<locals>.<genexpr>m   s     0/1Q/s   c                 "   > TR                   U    $ r:   )r^   )re   r<   s    r=   <lambda>,NodeInfo.required_fw_nodes.<locals>.<lambda>m   s    a@Pr@   key)sortedr[   r<   s   `r=   required_fw_nodesNodeInfo.required_fw_nodesj   s!    0//06P
 	
r@   re   c                     XR                   ;   $ r:   )r[   r<   re   s     r=   is_required_fwNodeInfo.is_required_fwp   s    ++++r@   c                     XR                   ;   $ r:   )r\   rr   s     r=   is_required_bwNodeInfo.is_required_bws   s    ****r@   c                     XR                   ;   $ r:   )r]   rr   s     r=   is_unclaimedNodeInfo.is_unclaimedv   s    ((((r@   c                 T    XR                   ;   d   SU S35       eU R                  U   $ )NNode z not in fw nodes!)r[   r^   rr   s     r=   get_fw_orderNodeInfo.get_fw_ordery   s2    +++IuQC7H-II+}}Qr@   rM   N)rN   rO   rP   rQ   listrT   rU   rS   r   dictint	functoolscached_propertyro   boolrs   rv   ry   r}   rV   rM   r@   r=   rX   rX   ^   s     M"277++!"''**((277C<  !+BGG!44
4= 
 

, ,D ,+ +D +)bgg )$ ) bgg  #  r@   rX   c                   H    \ rS rSr% \\S'   \\S'   \\S'   \\S'   \\S'   Srg)	MinCutOptions~   ban_if_used_far_apartban_if_long_fusible_chainsban_if_materialized_backwardban_if_not_in_allowlistban_if_reductionrM   N)rN   rO   rP   rQ   r   rS   rV   rM   r@   r=   r   r   ~   s      $$"&&!!r@   r   r8   r`   c                 |    U R                   R                  SS 5      [        R                  [        R                  4;   $ )N	recompute)metagetr   MUST_RECOMPUTEPREFER_RECOMPUTEr8   s    r=   must_recomputer      s5    99==d+''))0  r@   fx_gc                 b    U R                   R                   H  n[        U5      (       d  M    g   g)NTF)graphnodesr   r   r8   s     r=   has_recomputable_opsr      s)    

  $ ! r@   c                    U R                   R                   Hf  n[        U5      (       d  M  [        UR                  S5      (       d  M2  [
        R                  R                  UR                  R                  ;   d  Mf    g   g)NtagsTF)	r   r   r   hasattrtargettorchTagnondeterministic_seededr   r   s     r=   has_recomputable_rng_opsr      sV    

  4  V,,		11T[[5E5EE ! r@   c                     [        U R                  S   [        R                  [        R                  45      (       a  g[        U R                  S   [        R
                  5      (       d   eg)Nvalr       )
isinstancer   r   SymIntSymBoolSymFloatr   s    r=   sym_node_sizer      sK    $))E"U\\5==$ABBdii&7777r@   c                       \ rS rSrS rSrg)InvalidNodeBase   c                     g)NzInvalid NoderM   rn   s    r=   __repr__InvalidNodeBase.__repr__   s    r@   rM   N)rN   rO   rP   rQ   r   rV   rM   r@   r=   r   r      s    r@   r   joint_graphrZ   outputsoutputs_descssubgraphc                 4  ^ [         R                  " 5       n0 mU H4  nUR                  UR                  5      nUR                  Ul        UTU'   M6     U R
                   GH^  n[        U5      (       a  US:w  a  [        TU'   M%  [        U5      (       a  US:w  a  [        TU'   MF  UT;   a  MN  UR                  S:X  a  [        TU'   Mi  UR                  S:X  a  [        R                  " UR                  0 UR                  D6nU V	s/ s H7  n	[        U	[         R                  5      (       d  M$  [        TU	   [         5      PM9     nn	[#        U5      (       a  [        TU'   GM  UR%                  UU4S j5      TU'   GM  UR                  S:X  a  UR%                  UU4S j5      TU'   GMK  UR                  S:X  d  GM^  GMa     / n
U H  n	[        U	[         R                  5      (       aN  U	T;  a  ['        S	U	 S
35      e[        TU	   [         5      (       a   S	U	 S35       eU
R)                  TU	   5        Mp  U
R)                  U	5        M     UR+                  [-        U
5      5      nX;R                  S'   UR/                  5         UR1                  5         U$ s  sn	f )au  
Given a graph, extracts out a subgraph that takes the specified nodes as
inputs and returns the specified outputs.

This includes specifying non-placeholder nodes as inputs.

The general strategy is to initialize all inputs with proxies as we
encounter them, and trace through the graph, only keeping values which take
in valid proxies. Then, all dead code is eliminated.
backwardforwardplaceholdercall_functionc                    > TU    $ r:   rM   xenvs    r=   ri   4_extract_graph_with_inputs_outputs.<locals>.<lambda>   	    CFr@   get_attrc                    > TU    $ r:   rM   r   s    r=   ri   r      r   r@   outputr|   z couldn't be found in envz was invalid, but is outputdesc)rT   Graphr   namer   r   _must_be_in_backwardInvalidNode_must_be_in_forwardoppytreearg_tree_leavesargskwargsr   rU   r   any	node_copyRuntimeErrorappendr   tupleeliminate_dead_codelint)r   rZ   r   r   r   	new_graphr8   new_nodeall_argsr   output_valuesoutr   s               @r=   "_extract_graph_with_inputs_outputsr      sM   " 
I
C ((3		D		  !!%%(j*@#CIt$$Y)>#CI3; WW%#CIWW'--tyyHDKKHH "!Aa) 4
3q6?3!  
 8}}'D	!++D2BCCIWW
"!++D2BCCIWW = "> Ma!!|"U1#-F#GHH!#a&/:: s56:   Q(  #  

5/
0C$HHV!!#NN;s   #J-Jc                     U R                   S:H  =(       aF    S[        U R                  5      ;  =(       a'    [        U 5      (       + =(       a    [	        U 5      (       + $ Nr   tangents)r   strr   _is_bwd_seed_offset_is_fwd_seed_offsetr   s    r=   
_is_primalr      sK    =  	*c$++..	*#D))	* $D))	r@   c                 ^    U R                   S:H  =(       a    S[        U R                  5      ;   $ r   r   r   r   r   s    r=   _is_tangentr     s$    77m#F
c$++6F(FFr@   c                     U R                   S:H  =(       a7    S[        U R                  5      ;   =(       d    S[        U R                  5      ;   $ )Nr   bwd_seedbwd_base_offsetr   r   s    r=   r   r   	  =    77m# c$++&&O*;s4;;?O*Or@   c                     U R                   S:H  =(       a7    S[        U R                  5      ;   =(       d    S[        U R                  5      ;   $ )Nr   fwd_seedfwd_base_offsetr   r   s    r=   r   r     r   r@   c                     U R                   S:H  =(       a)    [        U R                  R                  S5      [        5      $ )Nr   r   )r   r   r   r   r   r   s    r=   _is_backward_stater     s*    77m#W
499==3G(WWr@   c                 @    U R                   R                  SS 5      S:H  $ )Npartitioner_tagis_backwardr   r   r   s    r=   _has_tag_is_backwardr     s    99==*D1]BBr@   c                 @    U R                   R                  SS 5      S:H  $ )Nr   must_be_in_forwardr   r   s    r=   _has_tag_must_be_in_forwardr     s    99==*D15IIIr@   c                 @    U R                   R                  SS 5      S:H  $ )Nr   must_be_in_backwardr   r   s    r=   _has_tag_must_be_in_backwardr   !  s    99==*D15JJJr@   c                     [        U 5      $ r:   )r   r   s    r=   r   r   %  s    &t,,r@   c                 `    [        U 5      =(       d    [        U 5      =(       a    [        U 5      $ r:   )r   r   r*   r   s    r=   r   r   )  s&    '- T"<t'<r@   joint_modulec          
      \   [         R                  " S U R                  R                  SS9 5       6 n[         R                  " [	        [        U R                  R                  SS95      5      R                  R                  SS /[        U5      -  5      5      nUS U nX!S  nUS U nX1S  nXEXg4$ )Nc              3   8   #    U  H  oR                   v   M     g 7fr:   r   rd   r8   s     r=   rf   +_extract_fwd_bwd_outputs.<locals>.<genexpr>3  s     	K J)) J   r   r   r   )	r   r   r   
find_nodesnextiterr   r   len)r   num_fwd_outputsr   r   fwd_outputsbwd_outputsfwd_outputs_descsbwd_outputs_descss           r=   _extract_fwd_bwd_outputsr  /  s     $$	K 2 2 = = = J	KG **T,$$//8/<=>CCGGTFS\)	
M
 *?+K*+K%&67%&67%6IIr@   saved_valuesr   c                 \    U  H&  nUR                   U:X  d  M  U R                  U5          g    g r:   )r   remove)r  r   saved_values      r=   _remove_by_namer  A  s+    #t#, $r@   fwd_module_outputsc                     [        U 5      n[        [        U 5      S-
  SS5       H  n[        X   5      (       a  M  US-   n  U$    U$ )Nr    )r  ranger   )r  idxis      r=   find_first_sym_noder  H  sT      
!C3)*Q.B7-011a%CJ	 8 Jr@   r   maxminc           	         U R                  U5         U R                  [        R                  R                  R
                  R                  U4S9n[        R                  R                  R
                  R                  UR                  S   5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                  US/S4S9n[        R                  R                  R                  R                  UR                  S   S/S5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                  U[        R                  4S9n[        R                  R                  R                  R                  UR                  S   [        R                  5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                  Xc4S9n[        R                  R                  R                  R                  UR                  S   U5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                  U4S9n[        R                  R                  R                  R                  UR                  S   5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                   X4S9n	[        R                  R                  R                  R!                  UR                  S   U5      U	R                  S'   [        U	R                  S   5      U	R                  S'   S S S 5        U R                  W	5         U R                  [        R                  R                  R                  R                  U	[        R"                  4S[%        UR&                  5      -   S9n
[        R                  R                  R                  R                  U	R                  S   [        R"                  5      U
R                  S'   [        U
R                  S   5      U
R                  S'   S S S 5        U
$ ! , (       d  f       GN"= f! , (       d  f       GNe= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN!= f! , (       d  f       GNi= f! , (       d  f       W
$ = f)Nr   r   tensor_metar  T
fp8_scale_r   r   )inserting_afterr   r   opsatenabsdefaultr   r   amaxprimsconvert_element_typefloat64	clamp_min
reciprocalmulTensorfloat32r   r   )r   r8   r  r  abs_node	amax_nodeamax_64_nodeclamp_min_nodereciprocal_nodemul_node
scale_nodes              r=   calculate_quantization_scalingr4  S  s    
		t	$&&IINN&& ' 
  %yy~~1199$))E:JKe'>x}}U?S'Tm$ 
% 
		x	(''IINN''RD$' ( 
	 !&		 3 3 ; ;MM% 2$!
	u )@	u@U(V	}% 
) 
		y	)**IIOO0088U]]+ + 
 $)99??#G#G#O#ONN5!5==$
%  ,Ce$,
-( 
* 
		|	,,,IINN$$,,$ - 
 &+YY^^%=%=%E%Ee$c&
E" .E&.
M* 
- 
		~	.--IINN%%-- " . 
 ',iinn&?&?&G&G&'
U# /F  '/
]+ 
/ 
			/&&IINN%%!' ' 
  %yy~~1188  ' 
e (?x}}U?S'Tm$ 
0 
		x	(((IIOO0088EMM*DII. ) 


 "'!E!E!M!MMM% %--"

 *AQVAW)X
& 
) I 
%	$ 
)	( 
*	) 
-	, 
/	. 
0	/ 
)	( sZ   B0WB6W/*CXB1XB0X%$B1X7.C%Y	
W,/
W>
X
X"%
X47
Y	
Yr3  
quant_typer(  	clamp_maxc           	       	   U R                  U5         U R                  [        R                  R                  R
                  R                  U[        R                  4S9n[        R                  R                  R
                  R                  UR                  S   [        R                  5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                  Xb4S9n[        R                  R                  R                  R                  UR                  S   UR                  S   5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                  Xt4S9n[        R                  R                  R                  R                  UR                  S   U5      UR                  S'   [        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [        R                  R                  R                  R                  X4S9n	[        R                  R                  R                  R                  UR                  S   U5      U	R                  S'   [        U	R                  S   5      U	R                  S'   S S S 5        U R                  W	5         U R                  [        R                  R                  R
                  R                  X4S[        UR                   5      -   S9n
[        R                  R                  R
                  R                  U	R                  S   U5      U
R                  S'   [        U
R                  S   5      U
R                  S'   S S S 5        U
$ ! , (       d  f       GN]= f! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GN(= f! , (       d  f       W
$ = f)Nr   r   r  
fp8_quant_r  )r  r   r   r   r%  r&  r#  r,  r   r   r!  r*  r+  r(  r6  r   r   )r   r8   r3  r5  r(  r6  target_node_32scaled_target_nodeclamp_min_scaled_nodeclamp_max_scaled_nodequant_activation_nodes              r=   perform_quantizationr>    s    
		z	*,,IIOO0088& - 
 &+YY__%I%I%Q%QIIeemm&
E" .E&.
M* 
+ 
		~	."00IINN%% - 1 
 */););)B)B&
(>*
& 2I##E*2
. 
/ 
		1	2 % 3 3IINN$$,,$0 !4 !
 -2IINN,D,D,L,L##E*I-
""5) 5L!&&u-5
""=1 
3 
		4	5 % 3 3IINN$$,,'3 !4 !
 -2IINN,D,D,L,L!&&u-y-
""5) 5L!&&u-5
""=1 
6 
		4	5 % 3 3IIOO0088'4DII. !4 !
 IIOO0088%**51: 	""5)
 5L!&&u-5
""=1 
6 ! u 
+	* 
/	. 
3	2 
6	5 
6	5 ! s@   CP69B>QB1QB1Q,$CQ>6
Q
Q
Q),
Q;>
Rtensorc                 P    U R                  5       nU R                  5       nX-  S-  $ )z
Calculate the size of a PyTorch tensor in megabytes (MB).

Args:
    tensor (torch.Tensor): Input tensor

Returns:
    float: Memory size in MB
i   )numelelement_size)r?  num_elementsrB  s      r=   calculate_tensor_sizerD    s+     <<>L&&(L'K88r@   c            	          [         R                  R                  R                  S   R	                  SS5      n U R                  S5       Vs/ s H%  n[        [         UR                  S5      S   5      PM'     n nU $ s  snf )N!activation_quantization_aten_passallowed_dtypesztorch.bfloat16;.r  )r   	_inductorr   post_grad_fusion_optionsr   splitgetattr)rG  dtypes     r=   get_allowed_dtypesrO    s}    __++DD+	c
,-  ;I:N:Ns:S:Su{{3'+,:S   s   ,A;c                 n   [        5       n[        U 5      (       a  U R                  S   R                  U;  a  g[        R
                  R                  R                  S   R                  SS5      n[        U R                  S   5      n[        R
                  R                  R                  S   R                  SS5      (       d  X2:  $ [        R
                  R                  R                  S   R                  SS5      (       a&  [        X2:  5      =(       d    [        X2:  5      (       + $ [        X2:  5      $ )Nr   FrF  
size_in_mbd   skip_dynamo_guardsquantize_dynamic_shape)rO  r   r   rN  r   rJ  r   rK  r   rD  r   r   )r8   rG  size_thresholdrQ  s       r=   should_quantizerV     s   ')Nd##tyy'7'='=^'S__++DD+	c,  'tyy'78J??!!::+	c
&' ++ ??!!::/

#&
./ ), J+J,HIIJ
 ))EFFr@   c                      [         R                  R                  R                  S   R	                  SS5      n [        [         U R                  S5      S   5      $ )NrF  r5  ztorch.float8_e5m2rI  r  )r   rJ  r   rK  r   rM  rL  )r5  s    r=   get_quant_typerX    sN    ''@@+	c,+,  5***3/344r@   rN  c                 ^    [         R                  " U 5      nUR                  UR                  4$ )z
Calculate the range of values for a given torch.dtype.
Args:
    dtype (torch.dtype): The input dtype.
Returns:
    tuple: A tuple containing the minimum and maximum values.
)r   finfor  r  )rN  infos     r=   calculate_ranger\  "  s%     ;;uD88TXXr@   c           
         U R                  SS9S   nUR                  S   n[        5       n[        U5      u  pE[	        5       n/ / pU GH  n	U	R
                  R                  SS5      (       d  M'  [        R                  R                  R                  S   R                  SS5      (       aN  [        X	US	5      n
[        X	XXE5      n[        U
5      (       d  UR                  U
5        OUR                  U
5        OU R                  U	5         U R!                  [        R"                  R$                  R&                  R(                  X4S
[+        U	R,                  5      -   S9n[        R"                  R$                  R&                  R)                  U	R
                  S   U5      UR
                  S'   [/        UR
                  S   5      UR
                  S'   S S S 5        WXi'   GM     U V	s/ s H  oU;   a  Xi   OU	PM     nn	[1        U5      nXx-   nU(       a  US U U-   XS  -   nUR3                  S[5        U5      5        [6        S   S==   S-  ss'   g ! , (       d  f       N= fs  sn	f )Nr   r  r   saved_for_quantizationFrF  use_scalingT-q=r8  r  r   r  inductor%activation_quantization_fwd_aten_passr    )r  r   rX  r\  r   r   r   r   rJ  r   rK  r4  r>  r   r   r  r   r   r%  r&  r#  r   r   r   r  
update_argr   r   )r   r   r  r5  r(  r6  node_to_quanttensor_scale_nodessym_scale_nodesr8   r3  
quant_nodeoutput_updated_argsr  scale_nodess                  r=   quantize_activation_fwrj  .  sC   *1-F++a.K!J*:6IFM*,b99==1599%%>>3c-&' <E
 2
 #:..&--j9#**:6 **40!&!4!4		<<DD"/)C		N: "5 "J 		<<DD IIe,j OOE*
 6M".6JOOM2 1 #-MG L LWKV4}4$>;   1
2C$6K%36I$6OO 	 a234Z@AQFA9 10 s   
CI'I"
I	c           
      R
  ^	 U R                    Vs/ s H  oR                  S:X  d  M  UPM     nnS nU GH  nUR                  R                  SS5      (       d  M'  UR                  R	                  S5        UR                  R	                  S5      n[
        R                  R                  R                  S   R                  SS5      (       Ga  U R                  U5         SUR                  R                  SS	5      -   m	[        U	4S
 jU 5       5      nS S S 5        U R                  W5         U R                  [
        R                  R                  R                   R"                  X4S9n[
        R                  R                  R                   R#                  UR                  S   U5      UR                  S'   [%        UR                  S   5      UR                  S'   S S S 5        U R                  U5         U R                  [
        R                  R&                  R(                  R*                  X54S9n[
        R                  R&                  R(                  R+                  UR                  S   UR                  S   5      UR                  S'   [%        UR                  S   5      UR                  S'   S S S 5        U R                  W5         U R                  [
        R                  R                  R                   R"                  Xd4S9n[
        R                  R                  R                   R#                  UR                  S   U5      UR                  S'   [%        UR                  S   5      UR                  S'   S S S 5        OU R                  U5         U R                  [
        R                  R                  R                   R"                  X4S[-        UR                  5      -   S9n[
        R                  R                  R                   R#                  UR                  S   U5      UR                  S'   [%        UR                  S   5      UR                  S'   S S S 5        [/        UR0                  R3                  5       5       H#  nUW:w  d  M  X:w  d  M  UR5                  X5        M%     GM     [6        S   S==   S-  ss'   g s  snf ! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       GNH= f! , (       d  f       N= f! , (       d  f       N= f)Nr   r^  Fdequant_typerF  r_  r  r8   c              3   L   >#    U  H  nUR                   T:X  d  M  Uv   M     g 7fr:   r   )rd   	bwd_input
scale_names     r=   rf   )quantize_activation_bw.<locals>.<genexpr>x  s&      &)2I$>>Z7 "	)2s   $	$r   r   r  dequant_r  ra  %activation_quantization_bwd_aten_passr    )r   r   r   r   popr   rJ  r   rK  r  r   r   r  r   r   r%  r&  r#  r   r!  divr+  r   r   userskeysreplace_input_withr   )
r   r8   	bw_inputsactivation_noderl  r3  divided_target_node_32dequant_nodeuserrq  s
            @r=   quantize_activation_bwr  i  s,   "'++J+$M1I+IJO99==1599IIMM2399==8L%%>>3c-'( **40!-		0A0A,PR0S!SJ!% &)2& "J 1 **:6&+&9&9		<<DD"1 ': 'O
 		<<DD IIe,l $((/
 ;R',,U3;O((7 7 **?;-2-@-@		**11-: .A .* :?9K9K9R9R',,U3Z__U5K:*//6 00F0K0KE0RS +//> < **+AB#(#6#6		<<DD4C $7 $L
 		<<DD277> !%%e,
 8O$))%08L%%m4 CB **40#(#6#6		<<DD"1'#dii.8 $7 $L 		<<DD IIe,l !%%e,
 8O$))%08L%%m4 1 TZZ__./<'D,C++D? 0M T Z@AQFAY K 10 76 <; CB 10sJ   SS5S,B1S#6B>S5B1TCT
S 	#
S2	5
T	
T	
T&	
fwd_module
bwd_modulebwd_module_inputsc                   ^ ^ [        SS U 4S jS9  [        T R                  5        [        SS U 4S jS9  [        SS U4S jS9  T R                  R                  S	S
9S   R                  S   nU H  nSUR
                  ;   d  M  X$R
                  R                  SS5         nTR                  R                  U5         TR                  R                  UR
                  S9nS S S 5        UR                  S   nWR                  R                  UR                  5        SUR                  S'   XvR                  S'   UR                  U5        TR                  R                  U5        M     [        R                  R                  R                   S   R#                  SS5      (       a  [%        TR                  R                  SS
95      nUS   n	['        U5       H  n
[)        U
5      (       a  M  U
n	  O   T R                  R                  S	S
9S   R                  S   nU H  nSUR
                  ;   d  M  TR                  R                  U	5         TR                  R                  UR
                  S9nS S S 5        WR                  R                  UR                  5        Un	M     [+        TR                  5        [        SS U4S jS9  g ! , (       d  f       GN= f! , (       d  f       Nq= f)Nartifactc                      SSS.$ )N,before_activation_quantization_fwd_aten_passstringr   encodingrM   rM   r@   r=   ri   5perform_fp8_activation_quantization.<locals>.<lambda>      B 
r@   c                  &   > T R                  SSSS9$ NFT)print_outputinclude_strideinclude_deviceprint_readabler  s   r=   ri   r        :44tD 5 
r@   metadata_fn
payload_fnc                      SSS.$ )N+after_activation_quantization_fwd_aten_passr  r  rM   rM   r@   r=   ri   r        A 
r@   c                  &   > T R                  SSSS9$ r  r  r  s   r=   ri   r    r  r@   c                      SSS.$ )N,before_activation_quantization_bwd_aten_passr  r  rM   rM   r@   r=   ri   r    r  r@   c                  &   > T R                  SSSS9$ r  r  r  s   r=   ri   r    r  r@   r   r  r   r8  rm  ro  rl  Tr^  rF  r_  r   r  r  c                      SSS.$ )N+after_activation_quantization_bwd_aten_passr  r  rM   rM   r@   r=   ri   r    r  r@   c                  &   > T R                  SSSS9$ r  r  r  s   r=   ri   r    r  r@   )r   rj  r   r  r   r   r   r  r   r   updatereplace_all_uses_with
erase_noder   rJ  r   rK  r   r   reversedr   r  )r  r  r  quant_fwd_module_outputsfwd_noderp  quant_bwd_inputrl  quant_bwd_module_inputsbwd_input_locbw_inputscaled_fwd_module_outputsscale_bwd_inputs   ``           r=   #perform_fp8_activation_quantizationr    s   
 

	 :++,

	 

	  *//::h:GJOOPQR,8==()--*?*?b*QRI!!11)<","2"2">">HMM">"R =$>>.9L  ''6=AO  !9:3?  0++O<''	2 - 66+	c- #'z'7'7'B'Bm'B'T"U/3 !89Hx(( ( :
 %/$4$4$?$?8$?$LQ$O$T$TUV$W!1Hx}},%%55mD&0&6&6&B&B&B&VO E$$++HMM: / 2 :++,

	? =<0 EDs   $K$K!
K	!
K/	r_   c                 t   [         R                  R                  SS 5       c  g U(       a  U Vs/ s H  oDR                  PM     snO/ nU  Vs0 s H  oDR                  U_M     nn[        R
                  R                  R                  S   R                  SS5      (       a/  U  Vs0 s H"  nSUR                  ;  d  M  UR                  U_M$     nnUR                  R                  SS9S   R                  S   nUR                  R                  SS9 Vs0 s H  oDR                  U_M     nnSn	U H  nUR                  U;   d  M  [        U5      (       d  M'  UR                  U;   a"  [        R                  S	UR                  5        MY  S
UR                  S'   UR                  S   R                  UR                  S'   S
XR                     R                  S'   UR                  S   R                  XR                     R                  S'   S
n	M     U	(       a  [        XU5        g g s  snf s  snf s  snf s  snf )NrF  exclude_primalsFprimalsr   r  r   r   z*Skipping quantization of static input %s: Tr^  r   rl  )inductor_configrK  r   r   r   rJ  r   r   r  r   rV  r/   debugr   rN  r  )
r  r  r  r_   r8   static_input_namessaved_values_namesr  r  should_perform_fp8_quants
             r=   enable_activation_quantizationr    s    	0044/	
 	
 	 '  ;;:t:; 
 7CCld))T/lC66+	c
U#$ )5
(4	8RODIItO 	 
 $))444A!DII!L$.$4$4$?$?=$?$Q$QD		4$Q    %"99**t/D/Dyy..		F		R26DII./(,		%(8(>(>DIIn%JNii(--.FG@D		%@P@V@Vii(--n='+$ #  +JDUV  9 	< D
s   H&H+%H0=H0H5)r_   saved_sym_nodesr  c                   [        XS9u  pVpxU R                  R                  SS9n	/ [        [        U	5      Qn
/ [        [
        U	5      Qn/ [        [        U	5      Qn/ [        [        U	5      Qn/ [        [        U	5      Qn[        U R                  X!-   U-   U-   UUS5      n[        R                  R                  5       nUR                  SS9 H  nUR                  (       d.  [        UUR                  5        [        UUR                  5        MB  U(       aO  [!        S UR                   5       5      (       a.  [        UUR                  5        [        UUR                  5        M  [        U5      (       d  M  [        UUR                  5        U(       a  M   e   [#        5       n/ n/ nU HJ  n[%        U5      nU(       a$  UR'                  U5        UR)                  U5        M9  UR)                  U5        ML     [+        U R                  5      n[,        R.                  " UX5       Hc  nSUR0                  ;  a  M  [3        UR0                  S   5      U-
  n[5        US S9 H  nUU;  a  M  UR)                  UU   5        M!     UU-  nMe     UR7                  5         UR9                  UU-   5        [        U R                  X-   XQ-   U-   U[;        [=        U5      [=        U5      -   5       Vs/ s H  n[?        U5      PM     sn-   S	5      n[        U R                  UU-   U-   U-   U-   UUS5      n[@        RB                  RE                  U U5      n[@        RB                  RE                  X5      n[G        UUUU5        UU4$ s  snf )
Nr  r   r  r   c              3      #    U  H^  nUR                   [        R                  R                  R                  R
                  L =(       a    [        UR                  5      S :H  v   M`     g7fr   N)r   r   r   _c10d_functionalwait_tensorr#  r  rw  rc   s     r=   rf   +_extract_fwd_bwd_modules.<locals>.<genexpr>e  sR      )
   HH		22>>FFF "AGG!"s   A&A(r   c                     U R                   $ r:   ro  )ss    r=   ri   *_extract_fwd_bwd_modules.<locals>.<lambda>  s    166r@   rk   r   )$r  r   r  filterr   r   r   r   r   r   r   distributedis_availablerw  r  r   allr   r   addr   r   	itertoolschainr   r   rm   clearextendr  r  r'   rT   _lazy_graph_module_make_graph_moduler  )r   r  r  r  r_   r  r	  r
  r  placeholdersprimal_inputstangent_inputsfwd_seed_offset_inputsbwd_seed_offset_inputsbackward_state_inputs	bwd_graphdistributed_enabledr8   saved_symbolssaved_sym_nodes_bindingsaved_sym_nodes_derivedsymbolsymbol_bindingsnew_symbolsr  r  	fwd_graphr  r  s                                r=   _extract_fwd_bwd_modulesr  @  s    	!O CK/  %%00M0BL7fZ67M9vk<89NIv&9<HIIv&9<HIGf%7FG2&7:PPI  ++88:$$$6zzL$))4OTYY7
 !S )
 ZZ)
 &
 &

 L$))4OTYY7%%L$))4((((' 72 /9lM    *40f%#**40#**40   3<3E3EFO 7V		!"499U#34}D)9:A '#**?1+=> ; 	$ W" 25LLM 3."_4 3|,s?/CCD
D 'q)D
	

 	
I 3
	
	 !	!  		 
 	
I &&99,	RJ&&99,RJ"j*.I z!!/
s   ,N)static_lifetime_input_indicesr_   r  c                   [        U 5      (       a  [        U UUUS9$ [        [        [        U R
                  R                  5      5      n[        [        [        U R
                  R                  5      5      nXV-   n[        XS9u  pp[        U R
                  XxU
S5      n[        S UR                   5       5      n/ n/ nU R
                  R                   GH
  nUR                  U;  a  M  [        U5      (       a  UR                  U5        M9  SUR                  ;  aH  UR                  S:X  a8  UR                   n[#        S U 5       5      (       d   eUR%                  U5        M  UR                    Vs/ s H  nUR                  U;  d  M  UPM     nnSUR                  ;   a*  [#        S U 5       5      (       a  UR%                  U5        M  UR                  U5        GM     [        [&        R)                  U5      R+                  5       5      n[        [&        R)                  U5      R+                  5       5      n[-        U UUUUS	9$ s  snf )
a  
Partitions the :attr:`joint_module` in a manner that closely resembles the
behavior observed in the original ``.forward()`` and ``.backward()`` of the
callable, i.e., the resulting forward graph contains those operators that
are executed in the original ``.forward()`` callable passed to
:func:`aot_function`.

The default partitioner collects the operators that are between the forward
inputs and the forward outputs. This helps in finding the tensors which have
to be stashed for the backward pass. These stashed tensors become the output
of the generated forward graph. The remaining operators are then placed in
the backward graph.

.. warning::
    This API is experimental and likely to change.

Args:
    joint_module(fx.GraphModule): The joint forward and backward graph. This
        is the result of AOT Autograd tracing.

Returns:
    Returns the generated forward and backward Fx graph modules.
)r  r  r  r   c              3   \   #    U  H"  oR                   S :w  d  M  UR                  v   M$     g7fr   Nr   r   r   s     r=   rf   $default_partition.<locals>.<genexpr>  s#      $6d''X:M			6   ,,r  r   c              3   Z   #    U  H!  oR                   [        R                  :H  v   M#     g 7fr:   )r   operatorgetitemrd   r~  s     r=   rf   r    s     I54{{h&6&665s   )+c              3   8   #    U  H  n[        U5      v   M     g 7fr:   r   rc   s     r=   rf   r     s      2(71Ar  r  r  r_   )r   #min_cut_rematerialization_partitionr   r  r   r   r   r   r  r   r   r   r   r   r   r   rw  r  r  r   fromkeysrx  r  )r   _joint_inputsr  r  r_   r  r  rZ   r  r	  r
  r  forward_only_graphforward_node_namesr  r  r8   rw  re   backward_usagess                       r=   default_partitionr    s   > L))2+*G	
 	
 
L,>,>,D,DEFM!&)<l>P>P>V>V"WX3F O CK/ <F1BI $ $066$  LO""((99..t ""4($))+?0JJJEI5IIIII&  ::%a7I)I:   		)c 2(72 / /  &&7##D); )< l388:;L4==9>>@AO#''$? )s   4I	I	g    .ArA  c                     XR                   -  $ r:   )itemsize)rA  rN  s     r=   _tensor_nbytesr    s    >>!!r@   c                   ^ S[         4S jmSU R                  ;   a  U R                  S   n[        U[        5      (       a  g[        U[        [
        45      (       a  [        U4S jU 5       5      $ [        U[        5      (       a#  [        U4S jUR                  5        5       5      $ [        U[        R                  5      (       a  T" U5      $ [        S[        U5       SU  35      eU R                  S	:X  d;  U R                  [        R                  R                   R"                  R$                  L a  g
[        SU  S35      e)Nr`   c                     [        U [        R                  5      (       d  g[        [	        U R                  5       SS9U R                  5      $ )Nr      fallback)r   r   r+  r  r   rA  rN  r   s    r=   object_nbytes_size_of.<locals>.object_nbytes"  s4    !U\\**hqwwy4@!''JJr@   r   r    c              3   4   >#    U  H  nT" U5      v   M     g 7fr:   rM   )rd   re   r  s     r=   rf   _size_of.<locals>.<genexpr>/  s     5A}Q''   c              3   8   >#    U  H  u  pT" U5      v   M     g 7fr:   rM   )rd   _re   r  s      r=   rf   r   1  s     @KDA}Q''Ks   zUnknown metadata type z	 on node r   r   r|   zO didn't have `val` metadata; we should always have `val` metadata on the nodes.)r   r   r   r   r   r   sumr   itemsr   r+  r   typer   r   r   r!  _assert_scalarr#  )r8   r   r  s     @r=   _size_ofr  !  s   KC K
 		iic<(( dE]++5555T""@CIIK@@@U\\** %%3DI;ivNOOww*uyy~~/L/L/T/T T

vde r@   c           	      4   SSK Jn  U" [        5      nU R                   H5  nUR                  S:X  d  M  X#R
                  R                  ==   S-  ss'   M7     [        R                  S[        UR                  5       [        R                  " S5      SS95        g )Nr   r   r   r    z%sTrl   reverse)collectionsr   r   r   r   r   rN   r/   r[  rm   r  r  
itemgetter)r   r   cntr8   s       r=   
_count_opsr  >  sk    '%c*C77o%$$%*%  HHT6#))+8+>+>q+A4PQr@   c                     / n [        [        R                  R                  5       H  n[	        [        R                  R                  U5      n[        U[        R                  R                  5      (       d  MR  UR                  5        HJ  n[	        X#5      n[        R                  R                  UR                  ;   d  M8  U R                  U5          M     M     U $ r:   )dirr   r   r!  rM  r   _opsOpOverloadPacket	overloadsr   	pointwiser   r   )r   	attr_nameopoverloadpacketoverloadop_overloads        r=   pointwise_opsr  H  s    
C(	"599>>9=*EJJ,G,GHH(224H!"2=Kyy""k&6&66

+, 5 ) Jr@   	depth_mapc                     U  Vs0 s H=  n[        U[        R                  R                  R                  5      (       d  M8  X!U   _M?     nn[        UR                  5       [        R                  " S5      SS9$ s  snf )Nr    Tr
  )	r   r   rT   r8   rU   rm   r  r  r  )r   r  arg
arg_depthss       r=   sort_depthsr  Z  si    '+'+z#uxx}}?Q?Q/Rs^t   *""$(*=*=a*@$OOs   7A8 	A8gmc                   ^
^^ [         R                  " 5       m0 m
U R                  R                  SS9 H  nTR	                  UU
4S j5      T
U'   M     [        U R                  R                  5       VVs0 s H  u  p!X_M	     snnmU
UU4S jn[        [        [        U R                  R                  5      5      nSn[        R                  nU H(  nUR                   H  nTU   U:  d  M  TU   nUnM     M*     Uc  U $ [        U R                  R                  5      STU     H[  nUR                  S:X  d  M  UR                  [        R                   R"                  R$                  R&                  :X  d  MS  U" U5        M]     [        U R                  R                  5      TU   S  H  nU" U5        M     [        R                   R)                  U T5      n	U	$ s  snnf )a|  
This pass finds the first bwd node in the graph (by looking at users of
tangents) and then reorders the graph by walking from this node to all the
way to the end of the graph. At each op in this traversal, we insert this op
in a new graph and try to bring only the relevant subgraph from the other
non-bwd edges relevant for this op. This closely mimics the behavior of
autograd engine.

Why is this pass required in the first place?

This is an artifact of how partitioners work today. The starting point of
partitioner is a joint graph, which is fwd and then bwd graph. In the case
of checkpointing, we keep portions of fwd graph in their original place in
the joint graph, while obtaining a bwd graph. As a result, the resulting bwd
graph has copies of recomputed fwd subgraphs followed by the original bwd
graph. If we run this naively, this leads to bad memory footprint, because
the fwd subgraphs are live for way longer duration than necessary. This pass
reorders the operations such that we prioritize the ops for the original bwd
graph while only realizing those ops from the fwd graph that are necessary
at any given point in the graph.
r   r  c                    > TU    $ r:   rM   r   s    r=   ri   5reordering_to_mimic_autograd_engine.<locals>.<lambda>}  s	    Ar@   c                 8  > U /n[        5       n[        U5      S:  aM  UR                  5       n X;   d  U T;   a  M,  UR                  U 5        XR                  -  n[        U5      S:  a  MM  [        UU4S jS9nU H  n TR                  U U4S j5      TU '   M     g )Nr   c                    > TU    $ r:   rM   )re   orders    r=   ri   Sreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graph.<locals>.<lambda>  s	    %(r@   rk   c                    > TU    $ r:   rM   r   s    r=   ri   r'    r   r@   )r   r  ru  r  all_input_nodesrm   r   )r8   	cur_nodesinsertable_nodesr   r   r&  s      r=   insert_node_in_graphAreordering_to_mimic_autograd_engine.<locals>.insert_node_in_graph  s    F	0:)nq ==?D'43;  & ---I )nq  ""28JK$D!++D2BCCI %r@   Nr   )rT   r   r   r  r   	enumerater   r   r  r   mathinfrw  r   r   r   r   r!  copy_r#  GraphModule)r   r8   r  r,  r  first_node_in_bwdminimum_ordertangentr~  new_gmr   r   r&  s             @@@r=   #reordering_to_mimic_autograd_enginer7  a  s   . 
I"$C ##}#5''.>?D	 6 )2"((..(AB(A93TY(ABED$ &bhhnn=>NHHM!MMDT{]* %d$(! " "  	 RXX^^$%?u->'?@77o%$++9M9M9U9U*U & A RXX^^$U+<%=%?@T" A XX!!"i0FMY Cs   6G	fw_module	bw_modulefw_nodebw_nodedevice	rng_countlast_fwd_inputlast_bwd_inputc                    UR                   nUc   eU R                  n	UR                  n
[        R                  R                  R
                  nU R                  R                  U5         U R                  R                  SU 35      n[        U5      UR                  S'   UnSSS5        UR                  R                  U5         UR                  R                  SU 35      n[        U5      UR                  S'   UnSSS5        [        UR                  5      nWUS'   U R                  R                  U5         U	R                  SUUR                  /UR                  Q7US9nSSS5        UR                  W5        U	R!                  U5        [        UR                  5      nWUS'   U
R#                  U5         U
R                  SUUR                  /UR                  Q7US9nUR                  U5        U
R!                  U5        SSS5        Xg4$ ! , (       d  f       GNx= f! , (       d  f       GN.= f! , (       d  f       N= f! , (       d  f       Xg4$ = f)a  
Note [CUDA Graph Safe RNG Functionalization]

CUDA Graph capture doesn't work with get_rng_state and set_rng_state because these functions operate on CPU values,
while CUDA Graph RNG capture uses on-device CUDA tensors. To solve this, we use graphsafe_set_state with a
CUDA Generator registered to the CUDA Graph before capture begins. graphsafe_set_state updates the generator's pointer
to reference a different GeneratorImpl, ensuring subsequent calls are correctly forwarded to the desired generator
(and its cuda-tensor RNG state during graph capture).

For each RNG operation's forward/backward pair:

- We create two generators initialized with identical values
- Each forward and backward call advances its respective generator equally
- This keeps generators synchronized so forward and backward operations use matching RNG values

When forward is called multiple times before backward (causing desynchronization):

- We save the forward RNG state
- We update the backward Generator's state before executing backward

Before each CUDA Graph replay, replay_prologue updates captured RNG pointers with current states, ensuring backward Generator
changes are reflected during replay.

This function modifies both forward and backward computation graphs by:

Creating RNG state placeholders for both passes
Updating the forward node to use graph-safe RNG state
Updating the backward node to use graph-safe RNG state

For more details: https://github.com/pytorch/pytorch/issues/113541
Nfwd_rng_state_r   bwd_rng_state_	rng_stater   r   r   )indexr   r   _prims	rng_primsgraphsafe_run_with_rng_stater  r   r)   r   r   r   create_noder   r   r  r  inserting_before)r8  r9  r:  r;  r<  r=  r>  r?  
device_idxfw_graphbw_graphrH  fwd_rng_statebwd_rng_state	fw_kwargsfunctional_fw_node
bwd_kwargs
rng_outputs                     r=   %apply_graphsafe_rng_functionalizationrT    s   R J!!!HH#(<<#9#9#V#V  
	(	(	8!33nYK4PQ$?
$K5!& 
9 
	(	(	8!33nYK4PQ$?
$K5!&	 
9 W^^$I*Ik		(	(	1%11(..07<<0	 2 
 
2 !!"45  gnn%J+J{		"	"7	+))(..07<<0	 * 

 	%%j1G$ 
, ))M 
9	8 
9	8 
2	1 
,	+ ))s1   )9H9H%;+H7;AI
H"%
H47
I
Inum_sym_nodesc                   ^' [         R                  " 5       nS nS[        [        R                     4S jm'S[        [        R                     4S jnU" U 5      nU" U5      nU" U5      n	0 n
U R
                  R                   H  n[        U5      (       d  M  [        UR                  S5      (       d  M2  [        R                  R                  UR                  R                  ;   d  Mf  X{R                     nXR                     nXR                     nXS.X'   M     [        R                  R                  R                   n[        R                  R                  R"                  nS nUR
                  R%                  SS	9 H  nS
UR                  ;   d  M  Un  O   Uc  ['        S5      e/ n[)        [+        UR
                  R%                  SS	95      5      n[)        [+        UR
                  R%                  SS	95      5      n[-        U'4S jU
R/                  5        5       5      nUR1                  [        R                  " S5      5        [3        U5      S:  n[        R4                  R6                  n[6        R8                  =(       a<    U(       + =(       a.    UR:                  (       + =(       d    UR<                  R>                  n[A        U
RC                  5       5       GH#  u  nu  nnUS   nUS   nT'" U5      nUR
                  nUR
                  nU(       a*  Ub'  URD                  S:X  a  [G        UUUUUUUU5      u  nnMe  URI                  U5         URK                  SUUR                  /URL                  Q7URN                  S9nURK                  S[P        RR                  US40 S9nU" U5      URT                  S'   URK                  S[P        RR                  US40 S9n [V        RV                  " URT                  5      U l*        URY                  U 5        UR[                  U5        UR]                  U5        S S S 5        URI                  U5         S[)        U5       3n!UR_                  U!5      n"U" U5      U"RT                  S'   S S S 5        URI                  U5         URK                  SUW"UR                  /URL                  Q7URN                  S9n URY                  U 5        UR[                  U5        S S S 5        GM&     U(       a  [)        [a        UR
                  R%                  SS	95      5      n#U#RL                  S   n$[3        U$5      U-
  n%U$S U% [c        U5      -   U$U%S  -   n&UR
                  Re                  U&5        UR
                  R[                  U#5        URg                  5         URg                  5         X4$ ! , (       d  f       GN= f! , (       d  f       GNV= f! , (       d  f       GM  = f)Nc                 &   0 nU R                   R                   Ht  nUR                  S:X  d  M  [        UR                  S5      (       d  M2  [
        R                  R                  UR                  R                  ;   d  Mf  X!UR                  '   Mv     U$ )Nr   r   )
r   r   r   r   r   r   r   r   r   r   )gmodrandom_nodesr8   s      r=   get_rng_ops*functionalize_rng_ops.<locals>.get_rng_ops&  sh    JJ$$D?*DKK00II559I9II*.TYY' % r@   r`   c                 B   SU R                   ;  a  gU R                   S   n[        U[        5      (       d  U4nU HL  n[        U[        R                  5      (       d  M$  UR
                  R                  S:X  d  M@  UR
                  s  $    [        R
                  " S5      $ )zF
Check the example value of the node outputs to find the device type.
r   Ncudacpu)r   r   r   r   r+  r<  r  )r8   
candidates	candidates      r=   
get_device)functionalize_rng_ops.<locals>.get_device1  s     		!YYu%
*e,,$J#I)U\\22##((F2$+++ $
 ||E""r@   r<  c                 :   SSK Jn  U" 5       nUc   eU   U bF  U R                  S:X  a6  UR                  [        R
                  R                  5       5      sS S S 5        $ UR                  [        R                  " 5       5      sS S S 5        $ ! , (       d  f       g = f)Nr   )detect_fake_moder]  )torch._guardsrd  r  from_tensorr   r]  get_rng_state)r<  rd  	fake_modes      r=   get_sample_rng_state3functionalize_rng_ops.<locals>.get_sample_rng_stateC  sr    2$&	$$$!fkkV&; ,,UZZ-E-E-GH Y (()<)<)>? YYs   A B#B
Br   )fwdbwdr   r  r5  zaCouldn't find tangent node in graph inputs. This is unexpected, please file a bug if you see thisc              3   :   >#    U  H  nT" US    5      v   M     g7f)rk  NrM   )rd   	node_pairra  s     r=   rf   (functionalize_rng_ops.<locals>.<genexpr>o  s"      6W
9U#$$6Ws   r^  r    rk  rl  r]  r   rD  r   r   rng_state_output_r   )4r  countr	   r   r<  r   r   r   r   r   r   r   r   r   rF  rG  run_and_save_rng_staterun_with_rng_stater  r   r  r  r   valuesdiscardr  rJ  r   graphsafe_rng_functionalizationfallback_randomtest_configs*graphsafe_rng_func_ignores_fallback_randomr.  r  r  rT  rJ  rI  r   r   r  r  r   copyr  r  r   r   r  r   r   	recompile)(r   r8  r9  rU  uidrZ  ri  joint_graph_rng_opsfw_graph_rng_opsbw_graph_rng_opsrecomputable_rng_ops_mapr8   	base_noder:  r;  run_and_save_rngrs  bw_tangent_start_nodefw_rng_state_outputsr>  r?  devicesmulti_cuda_devices
ind_config'use_rng_graphsafe_rng_functionalizationr=  rn  r<  rL  rM  rQ  staterS  
state_namebw_rng_state_nodefw_output_node
fw_outputssym_node_start_idxr   ra  s(                                          @r=   functionalize_rng_opsr    sx   2 //
C	#HU\\2 #$@Xell%; @ &l3"9-"9-!""((4  V,,		11T[[5E5EE+II6I&yy1G&yy1G:A2R$/ ) ||--DD//BB **m*<		!$(! = $o
 	
 (9??#=#=#=#OPQN(9??#=#=#=#OPQN 6N6U6U6W G OOELL'( W) ''J.. 	
""	
 *** R&&QQ , .7 &&(.)	)Iy E"E"G$???? 4"v%-R	.*NN **73%-%9%9#$!..87<<8">>	 &: &" !,,#$$,a0	 -  %9$@

5!%11#$$*  2 
 #'))GLL"9
--j9##G,$++E2; 4@ **+@A0c<
$,$8$8$D!0DV0L!&&u- B
 **73%11#&+W^^KgllK">>	 2 
 --j9##G, 43A.\ d9??#=#=#=#JKL#((+
 _}<**+()*+,-. 	
 	w'"">2A 43@ BA
 43s&   C&W5WAW'
W	
W$	'
W7	c                 >   U R                   R                   H  n[        UR                  [        R
                  R                  5      (       d  M8  UR                  R                  S:X  d  MT  [        U5      (       a  Mf  [        R                  UR                  S'   M     g)z
By default, the partitioner is not allowed to recompute collectives
unless they come from a user-annotated AC region.
See Note [Recomputing collectives in the partitioner]
r  r   N)r   r   r   r   r   r  
OpOverload	namespacer   r   	MUST_SAVEr   )r   r8   s     r=   force_save_collectivesr    sg     ""((t{{EJJ$9$9::%%);;"4((%5%?%?DIIk" )r@   c                    [        5       n[        U R                  R                  5       H  nUR                  S:X  a  M  UR
                  [        R                  R                  R                  R                  :H  nU(       a  [        U5      (       a  UR                  UR                  S   5        [        U5      (       aA  UR                  S   U;   a,  [        R                   UR                  S   R"                  S'   M  M  M    g    g )Nr   r   r    r   )r   r  r   r   r   r   r   r   r!  r1  r#  r   r  r   r   r   r  r   )r   has_mutation_in_bwr8   is_copy_s       r=   force_save_bw_mutation_srcr    s     5?L++11277h;;%)).."6"6">">>+D11"&&tyy|4*400TYYq\EW5W1A1K1K		!!!+. 6X0 ! 3r@   c                    U R                   R                   H  n[        U5      (       d  M  UR                   HT  n[        U5      (       d  M  UR                  S   UR                  S   :  d  M7  [
        R                  UR                  S'   MV     UR                  R                  SS5      (       d  M  [        S UR                   5       5      (       a  M  [
        R                  UR                  S'   M     U $ )z
If there are two consecutive checkpointed blocks with no operator in
between, we would still want to stash the tensor at the boundary of
checkpointed blocks. The following pass makes the last output node
non-recomputable to allow for that.
ac_graph_idr   has_backward_hookFc              3   8   #    U  H  n[        U5      v   M     g 7fr:   )r   r  s     r=   rf   )cleanup_recompute_tags.<locals>.<genexpr>  s      E1;t$$r  )	r   r   r   rw  r   r   r  r   r   )r   r8   r~  s      r=   cleanup_recompute_tagsr    s     ""(($

"4((		-0499]3KK-=-G-GDIIk* # yy}}0%88 E15E B B& *:)C)C		+&7 )8 r@   	node_infomin_cut_optionsdont_banc                 F  ^^^^%^&^'^(^)^*^+^,^-^.^/^0 Tc
  [        5       m[        5       m/[        (       aQ  [        S U R                   5       5      nU[        S T/R                   5       5      -
  n[
        R                  SU5        S m&S m'U&U'U/4S jm( SS KnU(UU/4S	 jm*U*UU/4S
 jnU(4S jm)S[        4U)U/4S jjn	UR                  5       m.[        5       m%U%UU.U/4S jn
U R                   GH  nUR                  S:X  a  M  UTR                  ;   aj  UTR                  ;  a.  T.R                  UR                   S-   S["        R$                  S9  Md  T.R                  UR                   S-   S["        R$                  S9  ['        U5      (       a.  T.R                  UR                   S-   S["        R$                  S9  M  [)        U5      (       d  [+        U5      (       a  U
" U5        TR-                  U5      (       a  U" U5      (       a  U
" U5        SUR.                  ;  =(       a    SUR.                  ;  =(       dB    SUR.                  ;   =(       a,    [1        UR.                  S   [2        R4                  5      (       + n[7        U5      (       a  [        [9        U5      5      nO[U(       aA  [1        UR.                  R;                  S5      [<        5      (       a  SO["        R$                  nOU	" UTR>                  5      nT.R                  UR                   S-   UR                   S-   US9  UR@                   H<  nT.R                  UR                   S-   UR                   S-   ["        R$                  S9  M>     GM     S[B        [D        RF                     S[H        S[H        4U(U4S jjnTRJ                  (       GaR  TRL                   GHA  nUR@                   Vs/ s H,  nTR-                  U5      (       d  M  TRO                  U5      PM.     nnUR@                   Vs/ s H  nTR-                  U5      (       d  M  UPM     nn[Q        U5      S:  d  M  U" U[S        U5      5      n[U        UR@                  5       H  nTR-                  U5      (       d  M  TRO                  U5      U:  d  M2  T(" UU5      (       d  MB  UT%;   a  MJ  [
        R                  SUTRO                  U5      UUTRO                  U5      5        U
" U5        M     GMD     TRV                  (       Ga  [        5       nU R                   GHi  nTR-                  U5      (       d  M  TRO                  U5      U4/nTRO                  U5      n[Q        U5      S:  d  MR  [X        RZ                  " U5      u  nnUU;   a  M2  UR]                  U5        TRO                  U5      US-   :  aP  [Q        U5      S:X  aA  [
        R                  SUUTRO                  U5      TRO                  U5      5        U
" U5        M  UR@                   H[  nTR-                  U5      (       d  M  T(" UU5      (       d  M+  UT%;  d  M3  [X        R^                  " UTRO                  U5      U45        M]     [Q        U5      S:  a  GM  GMl      URa                  T.SS5      u  nnUu  nm-[        5       nU.4S  jU 5        H"  u  m0nURo                  U-U04S! jU 5       5        M$     [        5       nU H*  u  n n!U S S" U!S S# :X  d   eU S S" n"UR]                  U"5        M,     [q        U 5      m+[s        U R                  5       V#Vs0 s H  u  n#oU#_M
     snn#m,[u        U+4S$ jU 5       U,4S% jS&9n$U$T%4$ ! [         a  n[        S5      UeS nAff = fs  snf s  snf ! [b         ai    [
        R                  S5        [
        R                  SRe                  URf                  Rh                  Rk                  T.5      5      5        [m        T.5        e f = fs  snn#f )'Nc              3      #    U  HS  nUR                   S :X  d  M  [        UR                  S5      (       d  M2  [        UR                  R                  5      v   MU     g7f)r   _overloadpacketN)r   r   r   r   r  r   s     r=   rf    solve_min_cut.<locals>.<genexpr>7  sK      &
)ww/) -.5dkkCT.U -C++,,)s   AA&Ac              3   8   #    U  H  n[        U5      v   M     g 7fr:   )r   rd   r  s     r=   rf   r  <  s      4
5qCFF5r  z&Ops banned from re-materialization: %sc                 X   UR                   [        R                  R                  R                  :w  a  gUR
                  S   n[        R                  R                  R                  U5      u  nnU H6  nUR                  U   nXL a    g[        U[        5      (       d  M/  X;   d  M6    g   gNFr   T)r   r   r   higher_orderauto_functionalizedr   _higher_order_opsauto_functionalizeget_mutable_argsr   r   r   )ab
mutable_opmutable_arg_namesr  r   r  s          r=   !can_fuse_into_auto_functionalized8solve_min_cut.<locals>.can_fuse_into_auto_functionalizedA  s    88uyy--AAAVVAY
 ##66GG
S	
%D((4.Cx#t$$8 & r@   c                     UR                   [        R                  R                  R                  :w  a  gUR
                  S   nU H  nUR
                  S   U   nXL d  M    g   g)NFtensors_to_cloner   T)r   r   r   r   triton_kernel_wrapper_functionalr   )r  r  r  r   r  s        r=   .can_fuse_into_triton_kernel_wrapper_functionalEsolve_min_cut.<locals>.can_fuse_into_triton_kernel_wrapper_functionalR  s[    88uyy--NNNHH%78%D((8$T*Cx & r@   c                   > [        U5      [        R                  :X  a  gT" X5      (       a  gT" X5      (       a  gU R                  [        R
                  L a?  U R                  S   R                  [        R                  R                  R                  L a  gTR                  U 5      =(       a    TR                  U5      $ )NTr   F)r,   r!  catr   r  r  r   r   r   r  r  r>   )r  r  r  r  op_typess     r=   r>   !solve_min_cut.<locals>.is_fusible\  s     1),Q229!??HH(((q	  yy%%FFG
 ""1%@(*=*=a*@@r@   r   zANeed networkx installed to perform smart recomputation heuristicsc                 n  > TR                  U 5      (       a  g[        U /5      n[        U5      S:  a  UR                  5       nUR                   HQ  nTR                  U5      (       d  T" X#5      (       d    gTR                  U5      (       d  M@  UR                  U5        MS     [        U5      S:  a  M  gr  )rH   r   r  ru  rw  rs   r  )r8   r*  curr~  r>   r  r  s       r=   is_materialized_backwards0solve_min_cut.<locals>.is_materialized_backwardsv  s    D!!v&	)nq --/C		 //55j>S>S##D))MM$'	 " )nq  r@   c                   > U R                   S:w  a  gU R                  [        R                  :X  a  gU R                  R                  SS 5      [        R                  :X  a  g[        R                  (       a  TR                  U 5      (       a  gU R                  [        R                  R                  [        R                  R                  4;   a  gTR                  (       a  TR!                  U 5      (       d  gO-TR#                  U 5      (       d  TR%                  U 5      (       a  gTR&                  (       a8  T" U 5      (       a+  [(        R+                  SU [-        U R.                  5      5        gU R0                  S:  a  U R0                  [        R2                  :  a  gTR4                  (       a/  [7        S U R8                   5       5      n[;        U 5      nUS-  U:  $ g)	Nr   Fr   Tzmaterialized backwards: %s %si  c              3   z   #    U  H1  n[        U[        R                  5      (       d  M$  [        U5      v   M3     g 7fr:   )r   rT   rU   r  r  s     r=   rf   Bsolve_min_cut.<locals>.should_ban_recomputation.<locals>.<genexpr>  s(      %%.*Q2HYs   #;;r   )r   r   r  r  r   r   r   r  r   recompute_viewsrH   r!  lift_fresh_copyr#  
lift_freshr   rK   rE   rB   r   r/   r  r   rw  dist_from_bwmax_dist_from_bwr   r  r   r  )r8   input_tensors_sizeoutput_sizer  r  r  s      r=   should_ban_recomputation/solve_min_cut.<locals>.should_ban_recomputation  s{   77o%;;(***99==d+/?/I/II!!h&6&6t&<&<;;4//779P9PQQ22++D11 2 !!$''8+H+H+N+N 77<U=
 =
 II5tU4::=NO t#(9(9F<S<S(S ++!$ %%)YY% " #4.K?%777r@   c                 r   >^  T R                   S:X  a  g[        UU 4S jT R                   5       5      (       + $ )Nr   Tc              3   6   >#    U  H  nT" TU5      v   M     g 7fr:   rM   )rd   r~  r>   r8   s     r=   rf   9solve_min_cut.<locals>.is_materialized.<locals>.<genexpr>  s     E*$z$--*s   )r   r  rw  )r8   r>   s   `r=   is_materialized&solve_min_cut.<locals>.is_materialized  s*    77m#E$**EEEEr@   r`   c           
        > [         R                  (       a  X;   a  g[        U 5      n[         R                  (       a&  TR	                  U 5      (       a  [
        R                  $ [        U R                  S   [        5      (       a2  [        U R                  S   [        R                  5      (       d  [        $ [        US[        [        U R                   S5      S5      -  -  5      nT" U 5      (       a  U$ US-  $ )Nr   r   g?rR  r       )r    treat_parameters_as_free_to_saver  r  rH   r/  r0  r   r   r   r   r   INT_INFr   r  r  r  )r8   r_   mem_szr  r  s      r=   get_node_weight&solve_min_cut.<locals>.get_node_weight  s    333$!!h&6&6t&<&< 88Odii&55dii.== Vsc#d.?.?*Eq&IIJK4  MA:r@   c                 ,  > TR                  U 5      (       a  gU T;   ak  [        U R                  [        R                  R
                  5      =(       a    U R                  R                  S:H  n[        R                  (       d  U(       d  g[        U 5      (       a  gSU R                  ;   a-  [        U R                  S   [        R                  5      (       a  gTR                  U 5        TR                  SU R                  S-   [        R                   S9  g)NFr  r   source_incapacityT)rH   r   r   r   r  r  r  r   (unsafe_allow_optimization_of_collectivesr   r   r   r  add_edger   r/  r0  )r8   is_collectivebanned_nodesr  nx_graphr  s     r=   ban_recomputation_if_allowed3solve_min_cut.<locals>.ban_recomputation_if_allowed  s    D!!8 4;;

(=(=> @KK))-??  >>m $DII*TYYu-=u~~"N"N
 	(DII$5Ir@   r   r  sinkr  _outr   r          start_nodes	max_rangec                   > / nU  H,  n[         R                  " UT
R                  U5      US45        M.     [        U5      S:  a  [         R                  " U5      u  pEnU(       d  T
R                  U5      $ UR
                   Hi  nT
R                  U5      (       d  M  T
R                  U5      U:  a  M2  T
R                  U5      UT	" XW5      4nX;  d  MS  [         R                  " X(5        Mk     [        U5      S:  a  M  U$ )zl
Finds the first unfusible node in the chain of nodes starting from
`start_nodes` and returns its position.
Tr   )heapqheappushr}   r  heappoprw  rs   )r  r  sorted_nodesre   r  r8   node_is_fusibler~  r   r>   r  s            r=   find_first_unfusible+solve_min_cut.<locals>.find_first_unfusible>  s    
 9;ANN<)*@*@*CQ)MN  ,!#',}}\'B$A_" --d33

++D11 --d3i? !..t4"4.6C
 .|9 #	 ,!# r@   z1used above/below fusible %s:(%s) -> %s -> %s:(%s)rR  ztoo long %s %s %s %sr  z-Failed to compute min-cut on following graph:
c              3   0   >#    U  H  oTU   4v   M     g 7fr:   rM   )rd   re   r  s     r=   rf   r    s     8i$is   c              3   :   >#    U  H  oT;   d  M
  TU4v   M     g 7fr:   rM   )rd   vnon_reachableus     r=   rf   r    s     Ad=.@fq!fds   	c              3   .   >#    U  H
  nTU   v   M     g 7fr:   rM   rd   r8   name_to_nodes     r=   rf   r    s     2	d		s   c                    > TU    $ r:   rM   )r   node_idxs    r=   ri   solve_min_cut.<locals>.<lambda>  s	    (1+r@   rk   );r   get_default_op_listr.   r   r7   r/   r[  networkxImportErrorr   floatDiGraphr   r\   rZ   r  r   r/  r0  r   r   r   rs   r   r   r   r+  r   r   r   r   r_   rw  r   rT   rU   r   r   ro   r}   r  r  r   r   r  r  r  r  minimum_cut	Exceptionjoin	readwriteedgelistgenerate_edgelistvisualize_min_cut_graphr  get_name_to_noder.  rm   )1r   r  r  r  joint_module_opsops_ignorednxer  r  r  r8   is_non_tensor_nodeweightr~  r  	used_nodeordersfw_usersfirst_unfusible_usevisited
start_nodefusiblestart_orderr  r  	cut_value	partition	reachablecutsetnbrs	cut_nodesnode_innode_out	node_namer  r  r  r  r  r>   r  r  r  r  r  r  r  r  s1    ```                                 @@@@@@@@@@@@r=   solve_min_cutr%  ,  s    <"$H% &
#))&
 

 ' 4
$554
 *
 
 	9;G"A&0dFe  < zz|H(2L 6 !!77h9...9+++!!$))e"3Vdhh!O dii&0&488L$
 dii%/$((Kd2488(.
 ##D)).Ft.L.L(. "E}DII'EUtyy SDIIe4Dell)S%S 	 t=./F!$))--"6FFDHH  %T9+P+PQF$))e+TYY-?&QJJDdii&0$))e2CdhhW e "L$rww- C C  4 ,,,"44I &OO+D++D1 -	&&t,+   "+!0I4L4LT4R   6{Q&:8S[&Q#!)//2D!0066%2248;NN&y$77</$O%%229=/ %2248 5T:! 3 5P 111'1|%++J++J77''
3Z@2G $00<Kg,"w/3'>C  **3/+2CCG)HH."!..s3!..z: 15IID!0066&sD11 4w1G1G1Mt0TU &) g,"" ,F!~~h&I	9  )I}*4,F8i84AdAA 9 ",I#s|x},,,CRL	i  $
 $K0L+4[5F5F+GH+Gic4c	+GHH2	28ML %%]
  O
	R
R  @A2<<00BB8LMN)	& IsC   ]? ^/^^"2^"^' `?
^	^^'A3`c                    SS K nSS KnUR                  R                  U 5      R	                  5       nUR                  U5      S   nUR                  5        He  nXR                  5          UR                  5          S   nUR                  [        U5      5        U[        S5      :X  d  MT  UR                  S5        Mg     [        R                  S5        UR                  S5        g )Nr   r  r0  redz2Visualizing the failed graph to min_cut_failed.svgzmin_cut_failed.svg)r  pydotnx_pydotto_pydot	to_stringgraph_from_dot_data	get_edges
get_sourceget_destination	set_labelr   r  	set_colorr/   r[  	write_svg)r  r  r(  
dot_format	dot_graphedger  s          r=   r  r    s    %%h/99;J))*5a8I##%//+,T-A-A-CDZPs6{#U5\!NN5! & HHAB,-r@   c                  |   / [         R                  P[         R                  P[         R                  P[         R                  P[         R
                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                   P[         R"                  P[         R$                  P[         R&                  P[         R(                  P[         R*                  P[         R,                  P[         R.                  P[         R0                  P[         R2                  P[         R4                  P[         R6                  P[         R8                  P[         R:                  P[         R<                  P[         R>                  P[         R@                  P[         RB                  P[         RD                  P[         RF                  P[         RH                  P[         RJ                  P[         RL                  P[         RN                  P[         RP                  P[         RR                  P[         RT                  P[         RV                  P[         RX                  P[         RZ                  P[         R\                  P[         R^                  P[         R`                  P[         Rb                  P[         Rd                  P[         Rf                  P[         Rh                  P[         Rj                  P[         Rl                  P[         Rn                  P[         Rp                  P[         Rr                  P[         Rt                  P[         Rv                  P[         Rx                  P[         Rz                  P[         R|                  P[         R~                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[        R                  P[         R                  P[         R                  P[         R                  P[         R                  Pn [         R                  [         R                  [         R                  /nU[         R                  [         R                  [         R                  [        R                  [         R                  [         R                  [         R                  [         R                  [         R                  /	-  nUnU / [        R                  P[        R                  P[         R                  P[         R                  P[         R                  P[        R                  P[        R                  P[         R                  P[         R                  P[        R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[         R                  P[        R                  P[        R                  P-  n U [         R                  [         R                  /-  n X-  n U [        5       -  n U [         R                  /-  n U [         Vs/ s H  n[        U5      PM     sn-  n [        U 5      n[        [        S[        4      " [         R                  [         R                  [         R                  /5      n[         R                  [         R                  [         R                  [         GR                   [         GR                  [         GR                  [         GR                  [         GR                  [         GR
                  [         GR                  [         GR                  /nXE-  nG[        U[        U5      U[        U5      U5      $ s  snf )N.)r!  r  subrv  atan2r*  r  r  pow	remainderfmod__and____or____xor__
__lshift__
__rshift__eqnegegtleltr"  bitwise_notceilfloorfracnegreluroundsilutruncr/   log10log1plog2lgammaexpexpm1erferfccosacoscoshsinasinsinhtanatantanhatanhsqrtrsqrtr)  sigmoidsoftplus	thresholdthreshold_backwardclampwherelerpaddcmulgelugelu_backwardr  mean_grad_sum_to_sizesum_to_sizer$  totype_asr  r  squeeze	unsqueezersub_to_copyaliasviewslicetr%  broadcast_in_dimexpand
as_stridedpermuteselectrL  r&  clone	full_likevarstd_unsafe_viewreshapebroadcast_tensorsscalar_tensorones	new_zerosr  arangetriuvar_meanisinfr   fullzerosempty
empty_likeargmaxmaximumiota'_low_memory_max_pool_offsets_to_indicesrE  gatherr  
zeros_liker   r   r   r   r   native_dropout	rand_like
randn_likemmconvolutionconvolution_backwardbmmaddmm#_scaled_dot_product_flash_attention'_scaled_dot_product_efficient_attention_flash_attention_forward_efficient_attention_forwardupsample_bilinear2d
_scaled_mmr1   )default_recomputable_opsrecomputable_view_opsr6   mr7   r5   r4   r3   s           r=   r  r    s   L0L0L0 	L0 	

	L0
 	L0 	L0 	L0 	L0 	L0 			L0 	L0 	L0 	L0 	L0 	L0  	!L0" 	#L0$ 	%L0& 	'L0( 	)L0* 	+L0, 	-L0. 	/L00 			1L02 	

3L04 			5L06 	7L08 			9L0: 	

;L0< 			=L0> 	

?L0@ 	AL0B 	

CL0D 	

EL0F 			GL0H 	IL0J 	KL0L 	

ML0N 	OL0P 			QL0R 	SL0T 			UL0V 			WL0X 	YL0Z 			[L0\ 			]L0^ 	_L0` 			aL0b 			cL0d 	

eL0f 			gL0h 	

iL0j 	kL0l 	mL0n 	oL0p 	qL0r 	sL0t 	

uL0v 	

wL0x 			yL0z 	{L0| 			}L0~ 	L0@ 	AL0B 			CL0D 	EL0F 	GL0H 			IL0J 	KL0L 	ML0N 	OL0P 	QL0R 	SL0T 			UL0V 	WL0Z "\\4>>4::F		




 
 %H $!		$!""$! 	

$! 		$!
 	$! 			$! 			$! 	$! 	$! 	$! 	$! 	$! 			$! 	$! 	

$!  	!$!" 	#$!$ 	%$!& 			'$!( 	)$!* 	+$!, 	-$!. 			/$!0 	1$!2 	

3$!4 	5$!6 			7$!8 	9$!: 	

;$!< 	

=$!> 	?$!@ 	A$!B 	C$!D 	

E$!F 	55G$! $L T[[ 99(/!   N1!3A!6 NN!":;HS#X./			dnndoo>J 	!!

0044%%))   #/K()8 + !Os   6d9c                 L    0 nU R                    H  nX!UR                  '   M     U$ r:   )r   r   )r   r  r8   s      r=   r  r  {  s'    L"&TYY r@   memoryruntimes
max_memoryall_recomputable_banned_nodesc           	         [         R                  nUS:X  a  [        XU5      $ US:X  a  [        XU5      $ US:X  a  [	        XU5      $ US:X  aR  [
        R                  S5        [        R                  " U UUUS9n[	        UU[        US9R                  [        US95      $ [        U5      (       a  U" XX4U5      u  pS	X4$ [        S
U 35      e)Ngreedyilpdpdynamic_memory_budget_dpzdynamic_memory_budget_dp is an experimental solver. It does not guarantee performance improvements. Additionally, it is not guaranteed to be stable.)r   r   recorded_knapsack_input_memories recorded_knapsack_input_runtimes)graph_info_provider)knapsack_algomax_mem_budgetr  z,Not aware of memory budget knapsack solver: )r   activation_memory_budget_solverr#   r$   r"   r/   warningr!   inialize_from_graphr%   get_knee_point_memory_budgetcallabler   )
r   r  r  r  r  r  SOLVERr  saved_node_idxrecomp_node_idxs
             r=   #_optimize_runtime_with_given_memoryr    s     33Fv<<	5Fj99	46Z88	-	-?	

 0CC#*G-3-5	
 $7**)) + 	
 		
 
&		*08U+
' ^55I&RSSr@   no_dispatchr   r  c                    ^ [        U R                  5      nU4S jnU Vs/ s H
  oC" U5      PM     nnU R                  5        Vs/ s H
  oC" U5      PM     nnU R                  X%S9$ s  snf s  snf )Nc                    > [        U TS9$ )Nr  )r   )dr  s    r=   realize_symbol8_remove_symbols_without_guarding.<locals>.realize_symbol  s    H--r@   )stride)r   shaper  new_empty_strided)r   r  r  r  r  r  s    `    r=    _remove_symbols_without_guardingr    sk    ME. )..1^AE.)*4AnQF4u44 /4s   A(A-c                 `  ^ ^^	 [         R                  nS nUS:X  a  gUS:X  ac  [        5          SSKJn  [
        R                  " UT R                  T R                  45      u  mm	UR                  UU	U 4S j5      nUsS S S 5        $ US:X  as  SS	K
Jn  [
        R                  " UT R                  T R                  45      u  mm	U" S
S9 nT R                  " T0 T	D6  S S S 5        WR                  5       n[        US5      $ [        SU 35      e! , (       d  f       g = f! , (       d  f       NI= f)Nc                    [        U [        R                  5      (       aC  [        U R                  S   [        R
                  5      (       a  [        U R                  S   SS9$ [        U [        R                  5      (       aC  [        U R                  S   [        R                  5      (       a  [        U R                  S   SS9$ [        U [        R                  5      (       a-  [        U R                  S   [        R                  5      (       a  g[        U [        R                  5      (       a-  [        U R                  S   [        R                  5      (       a  gU $ )Nr   r  r        ?T)r   rT   rU   r   r   r+  r  r   r   r   r   r  s    r=   materialize_arg)estimate_runtime.<locals>.materialize_arg  s    a!!j&M&M3AFF5MDQQ277##
166%=%,,(O(OAFF5MD99277##
166%=%..(Q(Q277##
166%=%--(P(PHr@   testingr    profiler   )benchmarkerc                  (   > TR                   " T 0 TD6$ r:   )r   )r   r   r8   s   r=   ri   "estimate_runtime.<locals>.<lambda>  s    4;;3O3Or@   flops)FlopCounterModeF)displayz Not aware of runtime estimator: )r   *activation_memory_budget_runtime_estimatorr  $torch._inductor.runtime.benchmarkingr  r   tree_mapr   r   benchmark_gputorch.utils.flop_counterr  r   get_total_flopsr  r   )
r8   RUNTIME_MODEr  r  msr  modecounted_flopsr   r   s
   `       @@r=   estimate_runtimer    s    DDL
 y 		"]H!???TYY<TULD&**+OPB ] 
	 <DKK8PQfU+tKK(( ,,,.=!$$=l^LMM# ] ,+s   AD	D
D
D-c           	        ^ ^^^ ^!^"^#^$^%^&^'^(^)^* US:  d  US:  a  [        SU 35      e[        [        R                  [        R                  [        R
                  [        R                  [        R                  S9n[        R                  (       a  [        USSSSS9nUS:X  a  TR                  $ [        T TU5      u  pEUS:X  a  U$ S[        [        R                     S[        4S	 jm!T!" TR                  5      m(T!" U5      m&T&T(::  a  U$ U&U(4S
 jnS[        [        R                     4U!U&U(4S jjm"[        USSSS9n[        T TU5      u  pT"" U5      U:  a  U$ [        USS9m[        T TT5      u  pT"" U	5      U:  a  U	$ SSKJm#  [%        U#4S jTR                   5       5      m%S[$        [        R                     S[        [        R                     4U#U%4S jjnU" U
5      nU Vs/ s H5  nUR&                  R)                  SS5      [*        R,                  :X  d  M3  UPM7     nnU Vs/ s H  oU;  d  M
  UPM     nn[/        U[0        SS9m [3        T 5      S:X  a  TR                  U-   $ T  Vs/ s H  o" [1        U5      5      PM     snm'T  Vs/ s H  n[5        U5      PM     snm*SSKJm)  UU U'U)U*4S jm$[        R:                  (       Ga  U"U$U UU*4S jnU" S5      U" S5      /nUS   SS  US   SS  :w  a  US   US   4/nU(       a  UR=                  5       u  nnUS   US   -
  S:  a$  UR?                  U5        UR?                  U5        MM  U" US   US   -   S-  5      nUSS  USS  :w  a  UR?                  UU45        USS  USS  :w  a  UR?                  UU45        U(       a  M  URA                  5         SS K!J"n  U Vs/ s H  nUS   PM
     nnU Vs/ s H  nUS   PM
     nnURG                  SS9  URI                  UUSS 9  [K        U5       H   u  nnURM                  US! UUU   4S"S#S$S%9  M"     URO                  S&5        URQ                  S'5        URS                  S(5        URU                  S5        URW                  5       nURY                  5         [Z        R\                  " 5       n[        R^                  b%  [        R^                  n[Z        R`                  " USS)9  S*n[b        Rd                  Rg                  5       (       aD  [b        Rd                  Ri                  5       (       a!  S+[b        Rd                  Rk                  5        3n[Z        Rl                  Ro                  US,U S-[q        5        S.35      nURs                  U5        [t        Rw                  S/U5        T$" UTT S09S   $ s  snf s  snf s  snf s  snf s  snf s  snf )1Nr    r   zJThe valid ranges for memory budget are 0 <= m <= 1. The provided value is )r   r   r   r   r   F)r   r   r   r   r  r`   c                 :    [        [        [        U 5      5      S-  $ N    eA)r  mapr  )r  s    r=   estimate_activations_size:choose_saved_values_set.<locals>.estimate_activations_size	  s    3x./#55r@   c                    > U S-  TT-
  -  $ r  rM   )szmax_act_sizemin_act_sizes    r=   get_normalized_size4choose_saved_values_set.<locals>.get_normalized_size	  s    S\L899r@   activationsc                 &   > T" U 5      T-
  TT-
  -  $ r:   rM   )r  r  r  r  s    r=   get_mem_ratio.choose_saved_values_set.<locals>.get_mem_ratio	  s"    )+6E<'
 	
r@   )r   r   r   )r   )get_node_storagec              3   4   >#    U  H  nT" U5      v   M     g 7fr:   rM   )rd   r8   r  s     r=   rf   *choose_saved_values_set.<locals>.<genexpr>5	  s     TCS4 0 6 6CSr  r  c                    > U  Vs/ s H.  nUR                   [        S5      :  d  M  T" U5      T;  d  M,  UPM0     sn$ s  snf r  )r  r   )r  r  r  input_storagess     r=   get_recomputable_banned_nodes>choose_saved_values_set.<locals>.get_recomputable_banned_nodes7	  sP    
 "
! S)	 
 %Q'~= !
 	
 
s   >
>>r   Tr
  r  c                   > T" 5          [        UTT[        U S5      UT5      u  nnnS S S 5        [        5       nW H  n UR                  TU   5        M     UR                  T5      (       d   e[        UUT
U5      u  p[        (       a  [        UTWUWTTUS9  UW4$ ! , (       d  f       N}= f! [         a     M  f = f)Nr   )r   r  saved_node_idxsrecomputable_node_idxsexpected_runtimememories_banned_nodesruntimes_banned_nodesmin_cut_saved_values)	r  r  r   r  BaseExceptionissubsetr%  r.   r   )memory_budgetr  r   r  r  r  r  r  r  r  aggressive_optionsr  r   r  r  s             r=   get_saved_values_knapsack:choose_saved_values_set.<locals>.get_saved_values_knapsack_	  s    ]
 4%%M1%-	 &	  )3)C:3?@ *   !>????'	
 ! 4'.K /'=!1&;&;%1	 ---Q ]$ ! s   B B0
B-0
B>=B>c                 F   > T" U TTS9u  pU [        T5      U-
  T" U5      4$ )N)r  r   )r  )r  r  r  r  r  r   r  r  s      r=   estimate_for_budget4choose_saved_values_set.<locals>.estimate_for_budget	  s=    -FYK.*L )*-==l+ r@   r  r  gMbP?r  )
      )figsizeo)markerz.4fzoffset points)r   r  center)
textcoordsxytexthazMemory Budgetz Runtime of Recomputed Componentsz:Pareto Frontier of Memory Budget vs. Recomputation Runtime)exist_okrm  _rank_memory_budget_paretor  z.svgz%Generated Pareto frontier curve at %s)r  r  r   )<r   r   r   ban_recompute_used_far_apart!ban_recompute_long_fusible_chains#ban_recompute_materialized_backwardban_recompute_not_in_allowlistban_recompute_reductionsaggressive_recomputationr   rZ   r%  r   rT   rU   r  torch._inductor.fx_utilsr  r   r   r   r   r  rm   r  r  r  torch.utils._mode_utilsr  visualize_memory_budget_paretoru  r   sortmatplotlib.pyplotpyplotfigureplotr.  annotatexlabelylabeltitlegridgcfshowosgetcwdmemory_budget_pareto_dirmakedirsr   r  r  is_initializedget_rankpathr  r(   savefigr/   r  )+r   r  r  r  runtime_optimized_saved_valuesr  r  more_aggressive_optionsmore_aggressive_saved_values%aggressive_recomputation_saved_valuesr  r  recomputable_banned_nodesr  must_save_nodesr8   r
  optionsbisectslhsrhsmidpltitemx_valuesy_valuestxtfigfig_dirrank_suffixfig_namer  r  r  r  r  r  r  r  r   r  r  r  s+   ``                             @@@@@@@@@@@@r=   choose_saved_values_setrI    s   
 qMA-XYfXgh
 	
 $$AA#)#K#K%+%O%O & E E88O &&!"'',).$)
 (5)%" --6RWW 6% 6 -Y-=-=>L,-KLL|#--:
4= 
 

 &##(%*	 '4Y 7'#  12]B++  % ;HY 2;7) :;mK449T9CSCSTTN
 )
	bgg
 
 !>l K +*A66::k5)-=-G-GG 	
*   -!,a0H,  ! %+!x%! ()Q./112O2OQHQK(2O ,I+H4+H 4). ).V ,,,	 	 's+-@-EF1:ab>WQZ^+
GAJ/0G";;=Sq6CF?T)NN3'NN3')3q6CF?a*?@qr7c!"g%NNC:.qr7c!"g%NNC:. ' 	'(/0DG0(/0DG0 	

7
#8C0  )FAsLLs)hqk"*   * 	

?#

56		NOggi
))+**655GKK$/))++0A0A0P0P0R0R"5#4#4#=#=#?"@AK77<<+K=:L:N9OtT
 	H;XF %#yk	 	]
!^ 10s0   +2V9!V9-	V>:V>3WWW Wc                   ^^ SSK Jm  S nU4S jn[        R                  R	                  5       (       Gaz  [        R                  R                  5       (       GaV  [        R                  R                  5       S:  Ga3  U" U 5      (       Ga%  U" U 5      (       Ga  [        5          T" 5          U Vs/ s H  oDR                  PM     sn/n[        [        R                  R                  5       5       Vs/ s H  n/ PM     nn[        R                  R                  XuS   5        [        U 5      n/ n	0 n
[        U5       H|  u  pU Vs/ s H  oU   PM	     nnSnU HF  n[        U5      nUU-  nU[        R                  R                  5       :X  d  M7  UU
UR                  '   MH     XS'   U	R                  U5        M~     [        R                   " U	[        R                  R"                  R%                  5       S9n[        R                  R'                  U[        R                  R"                  R(                  R*                  S9  [-        [        R.                  " U5      R1                  5       5      nS	U S
U
 3m[3        SS U4S jS9  UU    Vs/ s H  nUU   PM
     nnS S S 5        S S S 5        U$ U$ s  snf s  snf s  snf s  snf ! , (       d  f       N.= f! , (       d  f       U$ = f)Nr   )unset_fake_temporarilyc                     U R                    HT  n[        UR                  [        R                  R
                  5      (       d  M8  UR                  R                  S;   d  MT    g   g)N>   c10d_functionalr  TF)r   r   r   r   r  r  r  )r   r8   s     r=   has_collectives3_sync_decision_cross_ranks.<locals>.has_collectives	  sM    %%DUZZ22 ++''+RR	 &
 r@   c                   >^ SR                  S U R                   5       5      n[        R                  " UR	                  S5      5      R                  5       n[        [        R                  R                  5       5       Vs/ s H  nS PM     snm[        5          T" 5          [        R                  R                  TU5        S S S 5        S S S 5        [        U4S jT 5       5      $ s  snf ! , (       d  f       N0= f! , (       d  f       N9= f)N/c              3   8   #    U  H  oR                   v   M     g 7fr:   ro  )rd   r   s     r=   rf   E_sync_decision_cross_ranks.<locals>.has_same_nodes.<locals>.<genexpr>	  s     >,=qFF,=r  zutf-8c              3   4   >#    U  H  nTS    U:H  v   M     g7fr  rM   )rd   r   
all_inputss     r=   rf   rS  	  s     :z!:a=A%zr  )r  r   hashlibsha256encode	hexdigestr  r   r  get_world_sizer  all_gather_objectr  )r   node_strrZ   r  rU  rK  s       @r=   has_same_nodes2_sync_decision_cross_ranks.<locals>.has_same_nodes	  s    
 88>K,=,=>> 89CCE$)%*;*;*J*J*L$MN$Mqd$MN
]24//
FC 5] :z:::	 O44]]s*   C(C>"!C-C>-
C;	7C>>
Dr    z
total size)r<  r  zpicked_rank_idx=z, saved_nodes of current rank=r  c                      SSS.$ )N)aot_joint_graph_sync_decision_cross_ranksr  r  rM   rM   r@   r=   ri   ,_sync_decision_cross_ranks.<locals>.<lambda>
  s    G (%r@   c                     > T $ r:   rM   )sync_decision_cross_ranks_strs   r=   ri   ra  
  s    #@r@   r  )torch._subclasses.fake_tensorrK  r   r  r  r1  rZ  r  r   r  r[  r  r.  r  r2  r   r?  distributed_c10d_get_object_coll_device
all_reduceReduceOpMAXr   argminrA  r   )r   r  rN  r]  r   objectsr  saved_ops_names_all_ranksr  saved_sizessaved_ops_with_sizesr  saved_ops_namesop_namesaved_nodes
saved_sizer8   size_of_nodesaved_sizes_tensorpicked_rank_idxre   rc  rK  s                        @@r=   _sync_decision_cross_ranksrv  	  s    E; 	&&((,,..,,.2K((;'']24(45156G!%"3"3"B"B"DE:EqE & : //0IST:V+K8L%'K35 (12K(L$DSTOG4OT
'D#+D>L,.Je//88:::F,TYY7	 (
 6@\2"":. )M "'((99QQS" (("u'8'8'I'I'R'R'V'V )  "%,,/A"B"G"G"IJO.>>OOm  oC  nD  -E) A *C?)S)SAQ)S  Q 5]X <W 6: U:Q 54]]X sb   #K+K0J2-K2J7>AKJ<
9KC=KKKK2K
K	K
K&c                 J   SnU(       a  SOSn[        [        U R                  R                  SS95      5      nU R                  R                  S[        R
                  R                  R                  S9 GH   n[        XR                  S   R                  5      n[        U[        R                  5      (       d  MG  / n[        UR                  R                  SS95       H  u  pX9R                  ;   d  M  U R                  R!                  U5         U R                  R#                  U SU 35      n
US	-  nU	R$                  S
   U
R$                  S
'   U
nUR'                  U
5        SSS5        M     U(       d  GM  U R                  R!                  U5         U R                  R)                  S[        R
                  R                  R                  / UR                  QUQ70 5      nUR+                  USS9  SSS5        UR$                  R-                  S5      nU(       a9  Uu  p/ UQU Vs/ s H  oR$                  S
   PM     snQ7nUU4WR$                  S'   U R                  R/                  U5        GM     U $ ! , (       d  f       GM  = f! , (       d  f       N= fs  snf )u  
Graph-safe RNG lets torch.compile use CUDA Graphs for graphs with RNG ops.
For graphs without HOPs, the partitioner adds placeholder nodes
fwd_rng_state_* and bw_rng_state_* to the forward and backward graphs. At
runtime, the AOTDispatcher retrieves these RNG states and passes them to the
compiled graphs.

This works well for no-HOP graphs. With HOPs, the partitioner runs
recursively: it first partitions the HOP (producing forward/backward HOP
subgraphs) and then stitches them back into the outer joint graph. For HOPs
that contain RNG ops, the outer joint graph now includes HOP subgraph
modules with extra RNG placeholders. We must thread these placeholders
through the outer module partitioned forward and backward graphs—this
function does exactly that. It collects the RNG placeholder nodes from the
HOPs and creates corresponding placeholders in the outer forward and
backward graphs.

There is a catch: for a short period, the joint graph is in a “bad” state.
The HOP subgraphs expect additional inputs (because of the new
placeholders), but the outer graph call sites don't yet provide them. We
can't fix this in the joint graph because the joint graph's input signature
is fixed (primals, tangents). As a compromise, we keep the joint graph in
somewhat of a bad state for some time and, once the outer forward and
backward graphs are partitioned, insert the corresponding RNG placeholders
and wire up the calls.
r   rO  rN  r   r  r   )r   r   r  r    r   NT)propagate_metaeager_input_vals)r  r  r   r  r   r   r  invoke_subgraphrM  r   r   r   rT   r2  r.  r   r  r   r   r   rI  r  r   r  )moduler   r=  
rng_string
last_inputhop_noder   new_rng_inputsr  placeholder_noderC  new_hop_node_with_fixed_args
eager_vals
eager_argseager_kwargsinpnew_eager_argss                    r=   thread_graphsafe_rng_from_hopsr  )
  sZ   8 I$/_Jhv||66-6HIJJLL++599#9#9#I#I ,  6==#3#:#:;h//N)2))]);*% !6!66  55jA$*LL$<$<)l!I;7%	 "Q	0@0E0Ee0L	u-%.
&--i8 BA* ~\\11(;39<<3K3K'		..>>9(--9.9	40 224T 3  < &]]../AB
/9,J&#&5CD^c((5/^D&N
 '$M0556HI ''1]` MI BA <;" Es    AI<A J,J <
J
J	)r  c          	      p	  ^ U R                   R                  5         U R                  5         U R                   n[        R                  (       a  [        U5      nX`l         U R                   n[        U 5      n[        U 5      n	U(       a  [        U 5      n [        R                  (       d  [        U 5        [        U 5        U4S jn
Uc  / nU
" X5      n[        UR                  5      S:X  a  [        U UTUUR                  S9$ [!        U R                   R"                  5       H  nUR$                  S:X  a  ['        S5      Ul        M%  UR+                  U5      (       d	  SUl        MD  ['        S5      Ul        UR,                   H+  n[/        UR(                  UR(                  S-   5      Ul        M-     M     [        R0                  nUR"                   HC  n[3        UR4                  R7                  SS5      [8        5      (       d  M4  UR4                  S   n  O   [;        UUUS	9n[        R<                  (       a  [=        X5      n[?        [A        [B        U5      5      n[?        [A        S
 U5      5      n[E        U UUTUR                  S9u  nnU(       a!  U	(       a  [G        U UU[        U5      5      u  nn[I        U5      n[K        U5      n[K        U5      n[M        USS9n[M        USS9n[N        (       Ga  [Q        U Vs/ s H  n[S        U5      [U        U5      4PM     sn5      n[W        S U 5       5      S-  n[X        R[                  SU5        [X        R[                  SU5        []        S UR                   R"                   5       5      n[]        S UR                   R"                   5       5      nUU-  n[_        [&        5      nUR                   R"                   H\  nUR`                  U;   d  M  [c        URd                  S5      (       d  M2  U[U        URd                  Rf                  5      ==   S-  ss'   M^     [X        R[                  S[        U5      [        U5      [        U5      5        [Q        URi                  5       [j        Rl                  " S5      SS9n[X        R[                  SU5        UU4$ s  snf )a(  
Partitions the joint graph such that the backward recomputes the forward.
Recomputing helps in trading off memory bandwidth with computation.

To create the fwd and bwd graph, we copy the joint graph, manually set the
outputs to just original forward or backward outputs. And then we run the
resulting graphs through dead code elimination.

.. warning::
    This API is experimental and likely to change.

Args:
    joint_module(fx.GraphModule): The joint forward and backward graph. This
        is the result of AOT Autograd tracing.
    _joint_inputs: The inputs to the joint graph. This is unused.
    compiler: This option determines the default set of recomputable ops.
        Currently, there are two options: ``nvfuser`` and ``inductor``.
    recomputable_ops: This is an optional set of recomputable ops. If this
        is not None, then this set of ops will be used instead of the
        default set of ops.
    num_fwd_outputs: The number of outputs from the forward graph.

Returns:
    Returns the generated forward and backward Fx graph modules.
c                   >^^^^ [        U R                  5      m[        5       mU R                  R                   Hy  nUR                  S:X  a"  SUR
                  ;   a  TR                  U5        O![        U5      (       a  TR                  U5        UT;   d  M^  TR                  UR                  5        M{     [        [        [        U R                  R                  5      5      n[        [        [        U R                  R                  5      5      nX4-   n[        U TS9u  pgpTR                  S U 5       5        [        U R                  XVUS5      n
[        U4S jU
R                   5       5      m[        UU4S jU R                  R                   5       5      n[        U4S j[!        U5       5       5      nS	n0 nU R                  R                   H  nUT;   d  M  XU'   US
-  nM     [#        UTTUUU5      $ )Nr   r   r  c              3   R   #    U  H  oc  M  UR                   S:w  d  M  Uv   M     g 7f)Nr   r  )rd   r  s     r=   rf   Nmin_cut_rematerialization_partition.<locals>.classify_nodes.<locals>.<genexpr>
  s"      !
"!A8HAA{s   ''	'r   c              3   f   >#    U  H&  nUR                   S :w  d  M  TUR                     v   M(     g7fr  r  r  s     r=   rf   r  
  s/      <
0ww(" $L#0s   11c              3   H   >#    U  H  nUT;  d  M  UT;  d  M  Uv   M     g 7fr:   rM   )rd   r8   r\   ro   s     r=   rf   r  
  s1      :
0,, 15=N1N D0s   
""	"c              3   <   >#    U  H  u  pUT;   d  M  Uv   M     g 7fr:   rM   )rd   r  pr  s      r=   rf   r  
  s!      1
2$!a;X6XAA2s   	r   r    )r  r   r   r   r   r   r  r   r  rw  r   r  r   r   r  r   r.  rX   )r   r  r8   r  r  rZ   r  r	  r
  r  r  r]   r_   fw_cntr^   r  r\   ro   r  s    `             @@@r=   classify_nodes;min_cut_rematerialization_partition.<locals>.classify_nodes
  s   '(:(:;1; &&,,Dww-'J$++,E!%%d+%d++!%%d+((!((4 - VJ0B0B0H0HIJ!%&(:(:(@(@A"
 7$\?S 	G"3 	   !
"!
 	
 @5F	
 2< <
*00<
 2

 0: :
$**00:
 0

 '1 1
#M21
 '
#  &&,,D((!'! - '
 	
r@   Nr   )r  r  r_   r   r  r    r  )r  c                 "    [        U 5      (       + $ r:   r  )re   s    r=   ri   5min_cut_rematerialization_partition.<locals>.<lambda>  s    [^);r@   r  F)r   Tc              3   8   #    U  H  n[        U5      v   M     g 7fr:   )r  r  s     r=   rf   6min_cut_rematerialization_partition.<locals>.<genexpr>,  s     'J\\r  z'Theoretical Activations Stored: %.2f GBz,Theoretical Per Activation Storage Sizes: %sc              3   \   #    U  H"  oR                   S :X  d  M  UR                  v   M$     g7fr   Nr  r   s     r=   rf   r  1  #      %
"7$77o;UIDII"7r  c              3   \   #    U  H"  oR                   S :X  d  M  UR                  v   M$     g7fr  r  r   s     r=   rf   r  4  r  r  r  z# remat/fw/bw: %d/%d/%dr
  zCount of Ops Rematerialized: %s)7r   r   r{  r   cser+   r   r   r  r  r  r  r  r\   r  r_   r  r   r   r   r  rs   rw  r  activation_memory_budgetr   r   r   r  rI  rv  r   r  r   r  r  r7  r-   r  r.   rm   r  r   r  r/   r[  r   r   r   r   r   r  r  r  r  )r   r  compilerr  r  r   	cse_graphr   graph_has_recomputable_opsgraph_has_recomputable_rng_opsr  r  r8   r~  r  r  r  r8  r9  r  sorted_sizestotal_activations_size_gbfw_module_nodesbw_module_nodesremat_nodescountsrematerialized_opss      `                       r=   r  r  {
  s   D **,D zz &	&$$K!5l!C%=l%K"!-l;::|,|,4
l %,(*%|KI
 9&&'1, +*G(1(M(M
 	
 ++11277h #CD))$// !D #CD

$'(9(94;L;Lq;P$Q! # 3 33M!!diimmOT:EBB IIo6M " +#L
 ((1+L6+|<=O;\JKL 4''$-$I$IIy ")#8iC4H$ Iy 4I>I y)Iy)I.yeLI.ydKIlKlSV4lKL %('J\'J$JS$P!:<UV 	?N$ %
"+//"7"7%
 
 % %
"+//"7"7%
 
 &7!,S!1OO))DyyK'GDKKAR,S,Ss4;;6678A=8 * 	%  		
 $LLN 3 3A 6
 	24FGi= Ls   ; R3tracedfnamefigname
clear_metaprogparse_stack_tracedot_graph_shapec                 "   U(       aZ  [         R                  " U R                  5      n[        R                  " X5      n U R                  R
                   H
  n0 Ul        M     [        R                  R                  U5      u  pU
(       d  S[        R                  -   n
[        R                  SX5        [        R                  " U UUUS9nUR!                  5       n[#        USU
R%                  S5      -   5      nU	 U
 3nUc	  U" U5        g U" XS9  g )NrI  zWriting FX graph to file: %s%s)r  r  write_)r  )rz  deepcopyr   rT   r2  r   r   r-  r3  splitextr   torch_compile_graph_formatr/   r[  r   FxGraphDrawerget_main_dot_graphrM  lstrip)r  r  r  r  r  r  r  r   r8   baseextgr   write_methods                 r=   
draw_graphr  J  s     MM&,,/	2LL&&DDI '  'IDF555HH-t9""+'		A 	
A1hC89LfSENE|UU&r@   r:   )g      @r`  )r    )ra  )fx_graphTNFN)rz  r   rV  r  r  loggingr/  r  r-  os.pathr  r   dataclassesr   r   typingr   r   r	   r
   r   r   torch._inductor.inductor_primstorch.distributedtorch.fxrT   torch.utils._pytreeutils_pytreer   torch._dynamo.utilsr   r   ;torch._functorch._activation_checkpointing.ac_logging_utilsr   torch._inductorr   r  torch._loggingr   rd  r   %torch.fx.experimental._backward_stater   "torch.fx.experimental.proxy_tensorr   r   torch.fx.experimental.sym_noder   r   %torch.fx.experimental.symbolic_shapesr   r   r   r   r   r   torch.fx.passesr   torch.utils._ordered_setr   torch.utils.checkpointr   rm  -_activation_checkpointing.graph_info_providerr!   "_activation_checkpointing.knapsackr"   r#   r$   ,_activation_checkpointing.knapsack_evaluatorr%   _aot_autograd.descriptorsr&   r'   _aot_autograd.logging_utilsr(   _aot_autograd.utilsr)   r*   compile_utilsr+   r,   r-   sympydebug_partitionerr.   r   rS   	getLoggerrN   r/   Loggerr   r!  r%  r1   rX   r   rU   r   r2  r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r4  rN  r>  r+  rD  rO  rV  rX  r\  rj  r  r   r  r  r  r  r  r  r  r  cacher  r  r7  r<  rT  r  r  r  r  r%  r  r  r  r  r  r  r  r  rI  rv  r  r  r  rM   r@   r=   <module>r     s
           	  # * @ @  %   $ $ < 6 + A ? H L  ) / 3  L 
 L L ; M H H  %66 t 6''1W^^ 1yy~~		 > > >2      >    T r~~ $ 2>> d  C  
  #JJMJ "'']J 	?	J
 smJ XXJZRWW  Gbgg G$ Gbgg $ bgg $ XRWW X XCrww C4 CJbgg J$ JKrww K4 K-bgg -$ -rww 4 J..J
4=$rww-i$y/IJJ$$rww- s d277mU277^;< 	J88>>J
((--J 
J 
	JZB!88>>B!
((--B! B! 	B!
 B! B! XX]]B!J9%,, 95 9"D- G%((-- GD G45 5	5;; 	5 	8G%((.. 8GT 8GvMG%((.. MGT MG`UUU CL)U 
	Ux BF	,Wrww-,W,W ,W "**RWW*=!>	,W
 
,Wj BFx"..x"rww-x" "'']x"
 x" "**RWW*=!>x" 2>>2>>)*x"@ :>AE\..\
 $,DI#6\ "**RWW*=!>\ 2>>2>>)*\~ c("# " "277 s :Rbhh R  "Pbggsl!3 PU277C<=P8Q PJBNN Jr~~ JZZ*xx##Z*xx##Z* XX]]Z* XX]]	Z*
 LLZ* Z* HHMMZ* HHMMZ*zR ..R ~~R  ~~R  	R 
 2>>2>>)*R j@ @D @R^^  .# #BNN #T /3	S&S&S& #S& z"''*+	S&l."eW ePBHH +T+TK+T 5k+T 	+T
 +T $(=+T 5$s)T#Y&'+T\ 05 5 5 5$NT o	o	o	 
"'']	o	dNN/3EHHMM/BNbOj L  :>L ..L  $,DI#6L  2>>2>>)*L d ,0#%)'HH  '' ' 	'
 5d3i(
)' ' c]' 
'r@   