
    9iC                         S SK r S SKJr  S SKJr  S SKJrJr  S SK	J
r
  S SKJr  S SKJr   " S S\5      r " S	 S
\5      rg)    N)mpu)_flatten_dense_tensors_unflatten_dense_tensors)Variable)Module)DistributedDataParallelc                   >    \ rS rSrS	S\S\4S jjrS
S jrSS jrSr	g)PyTorchDistributedDataParallel   prefixrecursec                 4    U R                   R                  XS9$ N)r   r   modulenamed_parametersselfr   r   s      l/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/nlp/mglm/model/distributed.pyr   /PyTorchDistributedDataParallel.named_parameters       {{++6+KK    Nc                 >    U R                   R                  XU5      nU$ Nr   
state_dictr   destinationr   	keep_varssds        r   r   )PyTorchDistributedDataParallel.state_dict       [[##KC	r   c                 4    U R                   R                  XS9$ N)strictr   load_state_dictr   r   r%   s      r   r'   .PyTorchDistributedDataParallel.load_state_dict!       {{**:*EEr     TNr-   FT)
__name__
__module____qualname____firstlineno__strboolr   r   r'   __static_attributes__r+   r   r   r
   r
      s#    Ls L$ LFr   r
   c                   X   ^  \ rS rSrU 4S jrS rS
S jrSS jrSS\S\	4S jjr
S	rU =r$ )r   %   c                 ^  >^ ^ [         [        T ]  5         [        R                  [        R
                  R                  :X  a  SOST l        UT l        [        R                  " 5       T l        [        R                  " 5       nT R                  R                  5        H?  n[        R                  " U5      (       d  M   [        R                   " X2T R                  S9  MA        SU 4S jjm/ T l        / T l        ['        T R                  R                  5       5       H	  nU4S jnM     TT l        g )NTFgroupc                    > TR                   (       Ga  STl         0 nTR                  R                  5        H]  u  pEUR                  (       d  M  UR                  c  M'  UR
                  R                  5       nXc;  a  / X6'   X6   R                  U5        M_     TR                  (       a0  [        R                  R                  U;   a  [        S5        STl        U GH  nX6   nU Vs/ s H  oUR                  R
                  PM     nn[        U5      n	U(       a  U	R                  5       n	U(       d(  U (       d!  U	[        R                   " TR"                  S9-  n	[        R$                  " U	TR"                  S9  [        R                  R'                  5         U(       d(  U (       a!  U	[        R                   " TR"                  S9-  n	[)        U[+        X5      5       H  u  pU
R-                  U5        M     GM     g g s  snf )NFz}WARNING: gloo dist backend for half parameters may be extremely slow. It is recommended to use the NCCL backend in this case.r:   )needs_reductionr   r   requires_gradgraddatatypeappendwarn_on_halftorchcuda
HalfTensorprintr   floatdistget_world_sizedata_parallel_group
all_reducesynchronizezipr   copy_)reduce_afterno_scalefp32_allreducebucketsnameparamtpbucketgrads	coalescedbufsyncedr   s               r   allreduce_params:DistributedDataParallel.__init__.<locals>.allreduce_params2   s    $$$',$#';;#?#?#AKD***uzz/E#jjoo/,*,GK**51 $B $$zz,,7 \ -2)!B$[F:@A&ZZ__&EA 6u =I%$-OO$5	#L!T%8%8"&":":&< <	OOIT5M5MNJJ**,#!T%8%8"&":":&< <	'*!#;I#M(O		&)(O " %" Bs   'G;c                  D   > [         R                  R                  T5        g r   )r   _execution_enginequeue_callback)unusedr\   s    r   allreduce_hook8DistributedDataParallel.__init__.<locals>.allreduce_hookZ   s    **99:JKr   )TFF)superr   __init__rI   _backenddist_backendGLOOrC   r   r   get_data_parallel_grouprK   get_model_parallel_rank
parametersrD   	is_tensor	broadcasthook_handleshookslistr\   )r   r   src_rankprU   rb   r\   	__class__s   `     @r   re    DistributedDataParallel.__init__'   s    %t57$(MMT5F5F5K5K$KDQV#&#>#>#@ ..0'')Aq!!q$2J2JK * +/&+,1"	*H 
$++0023EL 4
 !1r   c                 4    SU l         U R                  " U0 UD6$ )NT)r=   r   )r   inputskwargss      r   forwardDistributedDataParallel.forward_   s    #{{F-f--r   c                 >    U R                   R                  XU5      nU$ r   r   r   s        r   r   "DistributedDataParallel.state_dictc   r"   r   c                 4    U R                   R                  XS9$ r$   r&   r(   s      r   r'   'DistributedDataParallel.load_state_dictg   r*   r   r   r   c                 4    U R                   R                  XS9$ r   r   r   s      r   r   (DistributedDataParallel.named_parametersj   r   r   )r\   rK   rn   ro   r   r=   rC   r.   r/   r,   )r0   r1   r2   r3   re   rx   r   r'   r4   r5   r   r6   __classcell__)rs   s   @r   r   r   %   s4    61p.FLs L$ Lr   r   )rD   torch.distributeddistributedrI   megatron_utilr   torch._utilsr   r   torch.autogradr   torch.nn.modulesr   torch.nn.parallel.distributedr   DDPr
   r+   r   r   <module>r      s8        I # # H
FS 
FYf Yr   