
    #Ki[F                     &   S SK r S SKrS SKJr  S SKJr  S SKrS SKJr  S SK	J
r
  S SKJrJrJr  / SQr\ R                   " \5      r Sr " S	 S
5      r " S S\5      r\" \R,                  " S5      \R.                  5      rS r " S S5      r " S S5      rS\
S\S\\
   4S jrS\R>                  S\S\S\\R>                     4S jr S r!  S"S\"\S4   S\#\$\4   S-  S\S\"\S4   S-  S\#\$\4   S-  S\"\\"   \\#   4   4S  jjr%S\\   4S! jr&g)#    N)Sequence)Anymap_aggregate)	BlockMask)tree_flattentree_maptree_unflatten)TensorChunkSpecsplit_args_kwargs_into_chunksmerge_chunksFc                       \ rS rSrSrS rSrg)_CustomReducer   a   
Custom reducer class that can be used to specify a custom operation that
reduces losses of multiple microbatches into one value.

Example:
>>> # xdoctest: +SKIP
>>> sum_reducer = _CustomReducer(
>>>     torch.tensor(0.0),
>>>     lambda a, b: a + b
>>> )
c                     Xl         X l        g N
init_value	reduce_fn)selfr   r   s      i/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/torch/distributed/pipelining/microbatch.py__init___CustomReducer.__init__+   s    $"    r   N)__name__
__module____qualname____firstlineno____doc__r   __static_attributes__ r   r   r   r      s    
#r   r   c                       \ rS rSrSrg)_LossReducer0   r!   Nr   r   r   r   r    r!   r   r   r#   r#   0       r   r#   g        c                   z    \ rS rSr% SrS r\\S'   S rS r	\
S\\S4   4S	 j5       r\
S\\\4   4S
 j5       rSrg)r   ;   z*
Class used to specify chunking of inputs
c                     Xl         g r   	split_dim)r   r+   s     r   r   TensorChunkSpec.__init__@   s    "r   r+   c                 |    U R                   R                   SU R                   R                   SU R                   S3$ )N.())	__class__r   r   r+   r   s    r   __repr__TensorChunkSpec.__repr__E   s9    ~~(()4>>+B+B*C1T^^DTTUV	
r   c                 "    SU R                    S3$ )NzTensorChunkSpec(r0   r*   r2   s    r   __str__TensorChunkSpec.__str__J   s    !$..!133r   
chunk_dims.c                      [        U S 5      nU$ )aJ  
A helper for creating a tuple of `TensorChunkSpec` from a tuple of chunk
dimensions (int's).
Example:
    >>> # xdoctest: +SKIP
    >>> # There are three positional arguments to the model, and
    >>> # we are chunking them along dimension 0, 0 and 1, respectively
    >>> args_chunk_spec = TensorChunkSpec.from_tuple((0, 0, 1))
c                     [        U 5      $ r   r   dims    r   <lambda>,TensorChunkSpec.from_tuple.<locals>.<lambda>\   	    ,r   r   )r8   args_chunk_specs     r   
from_tupleTensorChunkSpec.from_tupleM   s     (,
 r   c                      [        U S 5      nU$ )a$  
A helper for creating a dictionary of `TensorChunkSpec` from a
dictionary of chunk dimensions (int's).
Example:
    >>> # xdoctest: +SKIP
    >>> # Chunk dimension 0 for the "id" argument, 1 for the "mask" argument
    >>> kwargs_chunk_spec = TensorChunkSpec.from_dict({"id": 0, "mask": 1})
c                     [        U 5      $ r   r;   r<   s    r   r>   +TensorChunkSpec.from_dict.<locals>.<lambda>n   r@   r   r   )r8   kwargs_chunk_specs     r   	from_dictTensorChunkSpec.from_dict`   s     *,
 ! r   r*   N)r   r   r   r   r   r   int__annotations__r3   r6   staticmethodtuplerB   dictstrrH   r    r!   r   r   r   r   ;   se    # N

4 #s(O $ !cN! !r   r   c                       \ rS rSrSrg)
_Replicatet   r!   Nr%   r!   r   r   rQ   rQ   t   r&   r   rQ   
block_mask
num_chunksreturnc                   ^  T R                   R                  S5      S:X  a  T /U-  $ T R                   R                  S5      U:  d   S5       eSn[        R                  " T R                   X5      n[        R                  " T R                  X5      nT R
                  b!  [        R                  " T R
                  X5      OS/U-  nT R                  b!  [        R                  " T R                  X5      OS/U-  n/ nSn[        U5       Hj  n	U 4S jn
UR                  [        R                  " X9   XI   XY   Xi   T R                  U
" U5      T R                  S95        XU	   R                  S5      -  nMl     U$ )zGiven a block mask, split the block mask along the batch dimension (dim0).

Args:
    block_mask: Block mask to split
    num_chunks: Number of chunks to split the block mask into

Returns:
    chunk_block_masks: List of chunked block masks
r      z;Block mask has fewer batch size than the number of chunks. Nc                    >^  UU 4S jnU$ )Nc                 \   > [         R                  " U T5      nTR                  X-   XU5      $ r   )torch	full_likemask_mod)bhq_idxkv_idxb_offsetrS   idxs        r   batch_offset_mask_modI_split_block_mask.<locals>.create_mask_mod.<locals>.batch_offset_mask_mod   s*     ??1c2!**1<6JJr   r!   )rb   rc   rS   s   ` r   create_mask_mod*_split_block_mask.<locals>.create_mask_mod   s    K )(r   )kv_num_blocks
kv_indicesfull_kv_num_blocksfull_kv_indices
BLOCK_SIZEr\   seq_lengths)rg   sizerZ   tensor_splitrh   ri   rj   rangeappendr   from_kv_blocksrk   rl   )rS   rT   	batch_dimkv_num_blocks_chunkskv_indices_chunksfull_kv_num_blocks_chunksfull_kv_indices_chunkschunk_block_masksbatch_offset	chunk_idxre   s   `          r   _split_block_maskrz   x   s    $$Q'1,|j((##((+z9 E9 I --  * **:+@+@*X ((4 	:88*PVj   %%1 	:55zMVj   L:&		) 	  $$2=,7#<#G 6 A%00(6&22
	
 	Y7<<Q??) '* r   tensorspecc                 0   U R                  UR                  5      U:  d$   SU R                  UR                  5       S35       e[        R                  " XUR                  5      n[        (       d  U$ / nSnU H  n[        R
                  " U 5      nXVR                  UR                  5      -   n[        SSS5      /UR                  -  n	[        XX5      XR                  '   XgU	'   UR                  U5        XVR                  UR                  5      -  nM     U$ )zGiven a tensor, and a chunking spec, split the tensor.
Args:

    tensor: Tensor to split
    spec: Chunking spec
    num_chunks: Number of chunks to split the tensor into

Returns:
    chunk_tensors: List of chunked tensors
zTensor size z is smaller than num_chunksr   N)	rm   r+   rZ   rn   _debug_mask_minibatches
zeros_likeslicendimrp   )
r{   r|   rT   chunk_tensorsexpanded_chunkssplit_dim_idxchunk_tensornew_val	upper_idxslice_indicess
             r   _split_tensorr      s      ;;t~~&*4 
v{{4>>233NO4 &&v4>>JM""OM%""6*!$5$5dnn$EE	tT401GLL@(-m(Gnn%!-w'**4>>:: & r   c                    U (       d  [        U5       Vs/ s H  n0 PM     sn$ [        U 5      [        U5      :X  d;   S[        U R                  5       5       S[        UR                  5       5       35       eUc   e[	        U S S9u  pE[	        US S9u  pc/ n[        XFSS9 GHK  u  pU	[        L d  [        U	[        5      (       a  UR                  U5        M7  [        U[        R                  5      (       aC  [        U	[        5      (       d   eUR                  UR                  U	R                  5      5        M  [        U[        5      (       a  [        U	[        5      (       d   eU	R                  S:X  d   S	5       eUR                  R                  S5      S
:X  a  UR                  U5        GM  UR                  UR                  R                  S5      5        GM<  [!        SU	 SU S35      e   [#        / UQUP76 n
[        U
5       Vs/ s H  n/ PM     nn[        XFSS9 H  u  p/ nU	[        L d  [        U	[        5      (       a  U/U
-  nO_[        U[        R                  5      (       a  [%        XU
5      nO3[        U[        5      (       a  ['        X5      nO[!        SU	 SU S35      e[        XSS9 H  u  pUR                  U5        M     M     U Vs/ s H  n[)        X5      PM     sn$ s  snf s  snf s  snf )a3  
Given a dictionary of args, and a dictionary of chunking specs, shard the
args according to the chunking specs.

Args:
    args_dict: Dictionary of args
    args_chunk_spec: Dictionary of chunking specs
    num_chunks: Number of chunks to shard the args into

Returns:
    args_split: List of sharded args
zargs_dict.keys() = z args_chunk_spec.keys() = c                 "    [        U [        5      $ r   
isinstancer   xs    r   r>   %_shard_dict_of_args.<locals>.<lambda>   s    Z9%=r   is_leafc                 "    [        U [        5      $ r   r   r   s    r   r>   r      s    :a+Cr   Tstrictr   z#BlockMask only supports split_dim=0rW   zUnsupported chunk spec: z and value: z combination.)ro   lenlistkeysr   ziprQ   r   rp   rZ   Tensorr   rm   r+   r   rg   
ValueErrorminr   rz   r
   )	args_dictrA   rT   _values	tree_specchunk_specssplit_sizesvr|   result_num_chunksflat_split_resultsv_splits_flat_split_result_v_splits                  r   _shard_dict_of_argsr      s   $ !*-.-q-..y>S11 
d9>>#345 6$$()=)=)?$@#A	C1 &&&$=F "!CNK
 Kv48 :D*!=!=z*5<<((dO4444qvvdnn569%%dO4444>>Q&M(MM&##A&!+"":.""1??#7#7#:;*4&QC}M # 9( 5[5*5167H1I$J1IAR1I$Jv48"$:D*!=!=s..H5<<(($Q.?@H9%%(>H*4&QC}M  -0-
( %%h/-
 9( #5"4 	)5"4 u /N %K&s   K3K8K=args.kwargschunksrA   rG   c                 j  ^	 Uc  0 nS nUc  [        XPS S9nUc  [        XQS S9n[        [        [        U 5      5      [        [        U5      5      U5      n[	        U5      n[        UUU5      n[	        U5      U:  a<  [	        U5      n[        [        [        U 5      5      [        [        U5      5      U5      n[	        U5      [	        U5      :w  a#  [        S[	        U5       S[	        U5       35      eU V	^	s/ s H*  m	[        U	4S j[        [	        T	5      5       5       5      PM,     n
n	X4$ s  sn	f )a  
Given a sequence of args and kwargs, split them into a number of chunks
according to  their respective chunking specs.

Args:
    args: Tuple of args
    kwargs: Dict of kwargs
    chunks: Number of chunks to split the args and kwargs into
    args_chunk_spec: chunking specs for args, in same shape as args
    kwargs_chunk_spec: chunking specs for kwargs, in same shape as kwargs

Returns:
    args_split: List of sharded args
    kwargs_split: List of sharded kwargs
c                     [        U [        R                  [        -  5      (       a  [	        [
        5      $ [        5       $ r   )r   rZ   r   r   r   DEFAULT_CHUNK_DIMrQ   r   s    r   default_spec3split_args_kwargs_into_chunks.<locals>.default_specq  s,    a	122"#455<r   c                 "    [        U [        5      $ r   r   r   s    r   r>   /split_args_kwargs_into_chunks.<locals>.<lambda>y  s    *Q	2Jr   r   c                 "    [        U [        5      $ r   r   r   s    r   r>   r   ~  s    Jq)4Lr   z;args and kwargs are split into different number of chunks: z, c              3   .   >#    U  H
  nTU   v   M     g 7fr   r!   ).0i
chunk_argss     r   	<genexpr>0split_args_kwargs_into_chunks.<locals>.<genexpr>  s     <%;jm%;s   )r	   r   rN   	enumerater   RuntimeErrorrM   ro   )r   r   r   rA   rG   r   args_split_dictreal_num_chunkskwargs_splitr   
args_splits            ` r   r   r   4  sT   p ~  "(J
  $*L
 *Yt_Y'(O
 /*O&L <?* l+-4!?+,
 ?s<00I?#$Bs<'8&9;
 	
 *)J 	<U3z?%;<<)  
 ##s   :1D0c                    Ub  [        U5      u  p#O,[        U S   5      u  pC[        [        5      /[        U5      -  n/ nU  HJ  n[        U5      u  px[        U5      [        U5      :w  a  [	        SU SU 35      eUR                  U5        ML     / n	[        U5       GH5  u  p[        U[        5      (       Gap  [        [        U5      5       Vs/ s H
  nX\   U
   PM     nn[        (       Ga  US   R                  nUSS  H  nUR                  U:X  a  M   e   [        R                  " [        R                  " USS06[        U5      UR                  S9n/ nSn[        U5      [        U5      :X  d   e[        UUS	S
9 Hp  u  nnUUR!                  UR                  5      -   n[#        SSS5      /UR$                  -  n[#        UU5      UUR                  '   UU   nUR                  U5        UnMr     OUnU	R                  [        R&                  " UUR                  S95        GM  [        U[(        5      (       aR  UR*                  n[        [        U5      5       H  nUR-                  UX\   U
   5      nM     U	R                  U5        GM  US   U
   n[        S[        U5      5       H  nX\   U
   U:X  a  M   e   U	R                  U5        GM8     [/        X5      $ s  snf )z
Given a list of chunks, merge them into a single value according to
the chunk spec.

Args:
    chunks: list of chunks
    chunk_spec: Chunking spec for the chunks

Returns:
    value: Merged value
Nr   zChunk z did not match chunk spec rW   devicemeta)sectionsr=   Tr   r<   )r   r   r   r   r   rp   r   r   ro   r~   shaperZ   rn   emptyr+   r   rm   r   r   catr   r   r   r
   )r   
chunk_specspec_flattenedflatten_specchunk0_flatchunks_flattenedchunkchunk_flattenedr   args_flattenedarg_idxargry   partial_valuesoverall_shapevalmeta_chunksvalues_to_catchunk_start_idxpartial_value
meta_chunkchunk_end_idxr   slicedreduced_valvalues                             r   r   r     s   Z '3J'?$ %1$;!)*;<=K@PP )%03~#66veW,FzlSTT0  N!.1c?++ "'s+;'<!=!=I !+G4!=  
 '& .q 1 7 7)!"-C99555 .#00KK>v> 0 !#"#>*c+.>>>>14"K2-M: %4joocmm6T$TM%*4t%<$=@R@R$RM38-3XM#--0*=9F!((0&3O2 !/!!%))Ms}}"MN^,,..K"3'7#89	!mm!1!<W!E :
 !!+.$Q'0E"1c*:&;<	'27;uDDD =!!%(e 2j .77gs   K")NN)'loggingoperatorcollections.abcr   typingr   rZ   torch.fx.noder   !torch.nn.attention.flex_attentionr   torch.utils._pytreer   r	   r
   __all__	getLoggerr   loggerr~   r   r#   r{   addsum_reducerr   r   rQ   rJ   r   rz   r   r   r   rM   rN   rO   r   r   r!   r   r   <module>r      s     $   ' 7 F F 
		8	$
   # #$	> 	 5<<,hll;  5! 5!r	 	=== 
)_=@&LL&
& & ell	&RPn ;?;?p$
S/p$cNT!p$ p$ ?C/047	p$
 C01D8p$ 4;T
"#p$fy8Iy8r   