
    9iN                         S SK r S SKrS SKJr  S SKJr  S SKJr  S SKJ	r
  SSKJr  SSKJr    SS	 jrS
 rSS jr    SS jrS rg)    N)mpu)Float16Module)unwrap_model)DistributedDataParallel   )logger)MoEc           	      j   Uc  [         R                  " 5       n[        R                  R	                  XSUS 35      nUS   S:  aK  [        R                  R	                  US5      n[        R                  R	                  XSU SUS S35      nXg4$ [        R                  R	                  US5      =pgXg4$ )	z8Determine the directory name for this rank's checkpoint.mp_rank_02dr   zmodel_rng.pt
expp_rank_	_mp_rank_z_optim_states.ptzmodel_optim_rng.ptr   get_tensor_model_parallel_rankospathjoin)checkpoints_pathpath_load_tagnum_expertstensor_rank	expp_rankcommon_path
model_name
optim_names           k/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/nlp/gpt_moe/checkpointing.pyget_checkpoint_namesr      s     88:'',,/!)+c):;=K 1~WW\\+~>
WW\\9[,==MNP
 !! #%'',,{/C#E 	E
 !!    c           
          [         R                  " 5       n[        R                  R	                  [        R                  R	                  U S5      SU SU SUS S35      nU$ )Nmodellayer__expert_r   r   z_model_states.ptr   )r   layer_id	expert_idmp_rank	ckpt_names        r   _get_expert_ckpt_namer'   4   sX    002G
%w/

(9+YwsmCSTI r   c                     [         R                  " 5       n[         R                  " U5      n[        U UUUS9nUu  pg[        R
                  " SU 35        [        R                  " USS9nU$ )zwLoad the base state_dict from the given directory

If rank0 is true, just loads rank 0 checkpoint, ignoring arguments.
)r   r   r   zLoading model checkpoint from cpumap_location)r   get_max_expert_size_nameget_expert_parallel_rankr   r   infotorchload)	load_dirr   r   largest_group_namer   checkpoint_namesmodel_checkpoint_nameoptim_checkpoint_namemodel_state_dicts	            r   _load_base_checkpointr7   =   su    
 557,,-?@I+#	
 4D0
KK01F0GHIzz"7eLr   c                 z   [        U [        [        45      n [        XUS9nUc   eU(       a  [	        XS   U5        O[	        XS   U5        U(       a  U R                  US   US9  OU R                  US   US9  [        R                  R                  5       (       a  [        R                  R                  5         g g )N)r   r   moduler    )strict)
r   torchDDPr   r7   load_moe_checkpointload_state_dictr/   distributedis_initializedbarrier)r    r1   r   r:   r   load_ds_ckptsr6   s          r   load_checkpointrB   Q   s     = 9:E,;H '''EH#=xHEG#<hG.x8H.w7G''))!!# *r   c                 `   SnU R                  5        GH  u  pE[        U[        5      (       d  M  UR                  nUR                  n[
        R                  " U5      n[        U5       H  n	X-  U	-   n
[        X#U
5      n[        R                  " SU 35        [        R                  " U[        R                  " S5      S9nSn[        UR                  5       5       H0  nUR!                  U U
 3U U	 35      nUR#                  U5      X'   M2     UR%                  U5        M     US-  nGM     g )Nr   zLoading expert states from r)   r*   z).deepspeed_moe.experts.deepspeed_experts.r   )named_modules
isinstancer	   expert_group_namenum_local_expertsr   r-   ranger'   r   r.   r/   r0   devicelistkeysreplacepopupdate)r    
state_dictr1   moe_layer_idn_moduler9   
group_namerG   r   local_expert_idglobal_expert_idmoe_load_pathexpert_state_dictmoe_str_prefixkey	local_keys                   r   r<   r<   l   s1   L!//1fc""11J & 8 844Z@I#():#;#,#@?#R  5h6F!H9-IJ$)JJ!U0C%E! "M 1 6 6 89C #)*+;*<=)*?*;<!>I 4E3H3H3M%0	 :
 !!"34 $< AL+ 2r   )NN)NTr    T)r   r/   megatron_utilr   megatron_util.modelr   megatron_util.utilsr   torch.nn.parallelr   r;   configurationr   	moe.layerr	   r   r'   r7   rB   r<    r   r   <module>ra      sN     
   - , A !  &*#'	"0, !%")"&$6r   