
    9iB                        S SK r S SKrS SKrS SKJr  S SKrS SKrS SKJr  S SKJ	r	J
r
  S SKJr  S SKJr  S SKJr  S SKJr  S S	KJrJr  S S
KJr  S SKJr  S SKJrJr  S SKJrJr  S SK J!r!  S SK"J#r#  S SK$J%r%  S SK&J'r'  S SK(J)r)  S SK*J+r+J,r,J-r-   " S S\5      r.S r/ " S S\\\5      r0\Rb                  " \Rd                  S9 " S S\5      5       r2g)    N)partialmethod)DeepSpeedEngine)mpuprint_rank_0)HfTrainerDeepSpeedConfig)Hooks)LoadCheckpointHook)HOOKS)BestCkptSaverHookCheckpointHook)CheckpointProcessor)Hook)LrSchedulerHookLrSchedulerProcessor)OptimizerHookOptimizerProcessor)Priority)save_checkpoint)DistributedParallelType)create_device)
get_logger)get_dist_infoget_local_rank	init_distc                   $    \ rS rSrSrS rS rSrg)DeepSpeedConfig#   z
The `DeepSpeedConfig` object is meant to be created during `TrainingArguments` object creation and has the
same lifespan as the latter.
c                 6    U R                  U5      nUc  gUS:H  $ )NFauto)	get_value)selfds_key_longvals      t/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/trainers/hooks/distributed/deepspeed_hook.pyis_autoDeepSpeedConfig.is_auto)   s"    nn[);&=     c                    / SQnU Vs/ s H  oPR                  U5      (       d  M  UPM     nn[        U5      S:  a  [        UR                  S5      (       a  UR                  R                  nOJ[        UR                  S5      (       a   [        UR                  R                  5      nO[        SU S35      eU R                  SXw-  5        U R                  5       (       a-  U R                  SS	U-  U-  5        U R                  S
SU-  5        UR                  R                  R                  S0 5      nUR                  S0 5      n	U	R                  SS5      n
U	R                  SS5      nU
S:  a  U
O[        R                  " X;-  5      n
U R                  SU5        U R                  SU
5        [        U R                   5      S:  a*  SR#                  U R                   5      n[        SU S35      egs  snf )zv
This stage runs after we have the model and know num_training_steps.

Now we can complete the configuration process.
)$zero_optimization.reduce_bucket_size-zero_optimization.stage3_prefetch_bucket_size4zero_optimization.stage3_param_persistence_thresholdr   hidden_sizehidden_sizeszThe model's config file has neither `hidden_size` nor `hidden_sizes` entry, therefore it's not possible to automatically fill out the following `auto` entries in the DeepSpeed config file: zb. You can fix that by replacing `auto` values for these keys with an integer value of your choice.r)   r*   ?r+   
   optionswarmupwarmup_stepswarmup_ratio        z scheduler.params.total_num_stepsz!scheduler.params.warmup_num_steps
z]Please correct the following DeepSpeed config values that mismatch TrainingArguments values:
zF
The easiest method is to set these DeepSpeed config values to 'auto'.N)r%   lenhasattrconfigr,   maxr-   
ValueError	fill_onlyis_zero3train	optimizergetmathceil
fill_match
mismatchesjoin)r!   argsmodelnum_training_stepshidden_size_based_keysxhidden_size_auto_keysr,   r0   r1   r2   r3   rC   s                r$   trainer_config_finalize'DeepSpeedConfig.trainer_config_finalize0   s   "
 .!
-!aA- 	 !
 $%)u||]33#ll66~66!%,,";";< 55J4K LYY  NNA&46}}N"[0;>@J$&
 **&&**9b9Xr*zz.!4zz.#6'3a'7|TYY->/:<NO;\Jt!#4??3J'L(oq  $M!
s
   G3G3 N)__name__
__module____qualname____firstlineno____doc__r%   rK   __static_attributes__rM   r'   r$   r   r   #   s    
!9r'   r   c                     UR                   nS nSU;  a;  UR                  5       (       a  [        R                  S5        U R                  nSUS'   S nSU;  a  U R
                  nXE4$ )Nr>   zDetected ZeRO Offload and non-DeepSpeed optimizers: This combination should work as long as the custom optimizer has both CPU and GPU implementation (except LAMB)Tzero_allow_untested_optimizer	scheduler)r8   
is_offloadloggerinfor>   rV   )trainerhf_deepspeed_configrG   r8   r>   lr_schedulers         r$   deepspeed_optim_schedr]   l   st     ''FI& ))++KKV %%	26./L& ((""r'   c                   `    \ rS rSrSrS rSS jr  SS jrS rS r	S	 r
S
 rS rS rS rSrg)DeepspeedProcessor   rF   c                      [         R                  " 5       nUS:X  a  g[         R                  " 5       nSR                  U5      $ ! [        [
        4 a     gf = f)N    z_mp_rank_{:02d})r   $get_tensor_model_parallel_world_sizeget_tensor_model_parallel_rankformatImportErrorAssertionError)r!   tp_world_sizemp_ranks      r$   	rank_nameDeepspeedProcessor.rank_name   sV    	DDFM!88:G$++G44^, 		s   A %A AAc                 j    U(       d  g[         R                  " 5       nSR                  U5      nSU S3$ )Nzpytorch_model.binz{:02d}mp_rank_z_model_states.pt)r   re   rf   )r!   with_mpurj   ranks       r$   get_bin_filename#DeepspeedProcessor.get_bin_filename   s4    &88:G??7+DdV#344r'   Nc           
      0   UR                  UR                  5      nX R                  5       -   [        R                  -   n[        XgS S USS9  [        R                  R                  U5      n[        R                  R                  U5      n	[        R                  " 5       (       + n
U R                  U
5      n[        R                  R                  X+5      nU R                  S:X  d  U
(       a  UR                  R                  X5        O[        XlS S S SS9  U R                  S:X  a  g U
(       a+  [        R                  R                  X0R                  U5      nO[        R                  R                  X;5      n[        R                  R!                  U5      (       a  [        R"                  " U5         [        R$                  " X5        g ! [&         aC  n[)        5       R+                  SU SU SU S35        [,        R.                  " X5         S nAg S nAff = f)	NF)meta
with_model   )rt   	with_metazLink z to z error: z@, changing to copy the bin file, this may case more space usage.)unwrap_modulerF   rk   r   TRAINER_STATE_SUFFIXr   ospathdirnamebasenamer   is_unitializedrq   rD   
zero_stage_BIN_FILE_DIRisfileunlinklinkOSErrorr   errorshutilcopyfile)r!   rZ   checkpoint_path_prefix
output_dirrt   save_optimizersrF   _train_state_filesave_dirprefixro   bin_filesrc_file	dest_filees                  r$   save_checkpoints#DeepspeedProcessor.save_checkpoints   s    %%gmm42^^ 6
 
445 	dDt	O 77??#9:!!"89))++((277<< 6A??a8MM))(;t$%I ??aZ1C1CXNIZ:I77>>)$$IIi 	1GGH( 	1Lzi[ <Q Q OOH00	1s   1G 
H9HHc                     X R                  5       -   [        R                  -   n[        R                  R                  U5      (       a  [        R                  " U5        [        R                  " USS9  g )NT)ignore_errors)	rk   r   ry   rz   r{   r   remover   rmtree)r!   rZ   r   r   s       r$   remove_checkpoints%DeepspeedProcessor.remove_checkpoints   sT    2^^ 6
 
44577>>+,,II'(,DAr'   c                    [         R                  R                  U5      (       d   e[         R                  R                  U5      n[         R                  R	                  U5      n0 nXR                  5       -   [        R                  -   n[         R                  R                  U5      (       a  U R                  X(U5      n[        UR                  [        5      (       a#  UR                  R                  UUUU(       + S9  U$ Un	U R                  5       n
[         R                  R                  X5      n[         R"                  " US S9nUS   nUR%                  UR                  5      R'                  5       nU H4  nXR)                  5       ;  a  [+        SU-   5        M&  [+        SU-   5        M6     UR%                  UR                  5      R-                  XS9  U$ )N)load_module_strictload_module_onlyc                     U $ NrM   )storagelocs     r$   <lambda>5DeepspeedProcessor.load_checkpoints.<locals>.<lambda>   s    gr'   )map_locationmodulez
Skip key: zLoading key: )strict)rz   r{   isdirr|   r}   rk   r   ry   r   load_trainer_state
isinstancerF   r   load_checkpointrq   rD   torchloadrx   
state_dictkeysr   load_state_dict)r!   r   rZ   load_all_stater   r{   tagrt   r   r   r   
model_file
checkpoint
model_dictkeys                  r$   load_checkpoints#DeepspeedProcessor.load_checkpoints   s   ww}}34444ww56gg562^^ 6
 
44577>>+,,**7+9;D gmm_55MM))#)%3!3	 * ,  .H,,.Hh9J)EGJ#H-J ..w}}=HHJJ!oo// !34 3!67	 "
 !!'--0@@ A +r'   c                     U H-  nUR                   U   nUR                  R                  U5        M/     UR                  R                  5         g r   )train_outputsrF   backwardstep)r!   rZ   	loss_keyscumulative_iters	grad_clipklosss          r$   r   DeepspeedProcessor.backward   sB     A((+DMM""4(  	r'   c                     g r   rM   r!   rZ   s     r$   initialize_optimizer'DeepspeedProcessor.initialize_optimizer      r'   c                     g r   rM   r   s     r$   r   DeepspeedProcessor.step  r   r'   c                     gNTrM   r   s     r$   should_save_on_rank&DeepspeedProcessor.should_save_on_rank  s    r'   c                    [        UR                  [        R                  R                  5      (       d)  [        UR                  [
        R                  5      (       a*  UR                  R                   Vs/ s H  o"S   PM	     nnU$ [        UR                  [        5      (       aP  [        5       nUR                  R                  5        H&  u  pEUR                   Vs/ s H  o"S   PM	     snX4'   M(     U$ [        S5      es  snf s  snf )Nlrz6lr is not applicable because optimizer does not exist.)r   r>   r   optim	Optimizer	deepspeedDeepSpeedOptimizerparam_groupsdictitemsRuntimeError)r!   rZ   groupr   namer   s         r$   get_current_lr!DeepspeedProcessor.get_current_lr
  s    g'')>)>??:!!9#?#?DA DA+2+<+<+I+IJ+I%++IBJ 	 ))400B&006685:5G5GH5GE$K5GH  9
 	 HJ J K Is   5DDrM   )Tr   )rN   rO   rP   rQ   r   rk   rq   r   r   r   r   r   r   r   r   rS   rM   r'   r$   r_   r_      sF     M	5 #)-(1TB%Nr'   r_   )module_namec                   d    \ rS rSr\R
                  r     SS jrS rS r	S r
S rS rS	 rS
rg)DeepspeedHooki  Nc                 `    X0l         X l        X@l        Xl        Ub  US;   d   S5       eXPl        g )N)r   rb      rv   z zero_stage must in (0, 1, 2, 3)!)save_zero_checkpoint"deepspeed_activation_checkpointingro   deepspeed_configr   )r!   r8   r   r   ro   r   s         r$   __init__DeepspeedHook.__init__  sG     %9!2T/  &! "% % I&HI %$r'   c                    [        5       nUR                  [        5      n[        U5      S:  a6  [	        US   R
                  [         5      (       d  US   R                  U5        UR                  [        5      n[        U5      S:  a6  [	        US   R
                  [         5      (       d  US   R                  U5        UR                  [        5      n[        U5      S:  a6  [	        US   R
                  [         5      (       d  US   R                  U5        UR                  [        5      n[        U5      S:  a6  [	        US   R
                  [         5      (       d  US   R                  U5        UR                  [        5      n[        U5      S:  a6  [	        US   R
                  [         5      (       d  US   R                  U5        X l        g )Nr   )r_   get_hookr   r6   r   	processorset_processorr   r   r	   r   )r!   rZ   r   optimizer_hook	ckpt_hookbest_ckpt_hookload_ckpt_hooklr_scheduler_hooks           r$   register_processor DeepspeedHook.register_processor+  s   &(	 ))-8~":q!++-?,A ,A1++I6$$^4	y>Aj11G1G1C'E 'EaL&&y1 ))*;<~":q!++-?,A ,A1++I6 ))*<=~":q!++-?,A ,A1++I6#,,_= !A%j!!$..0B/D /Da ..y9"r'   c                 z  ^ TR                   R                  R                  SS5      Tl        TR                   R                  SS5      Tl        TR                   R
                  R                  SS5      Tl        TR                   R
                  R                  SS5      Tl        TR                   R
                  R                  S	S
5      Tl        TR                   R
                  R                  SS5      Tl	        TR                   R
                  R                  SS5      Tl
        TR                   R                  SS5      Tl        TR                   R                  SS5      Tl        TR                   R                  SS5      Tl        TR                   R                  SS5      Tl        TR                   R                  SS 5      Tl        [!        U4S jTR                   R"                   5       TR                  5      Tl        TR                  (       d  STl        TR                   R                  SS5      Tl        g )Nbatch_size_per_gpu   	clip_gradg      ?r   gh㈵>
adam_beta1r.   
adam_beta2g+?adam_epsilong:0yE>weight_decayr4   use_fp16Ffp16_backendampsave_on_each_nodefp16_opt_levelc              3   r   >#    U  H,  nUS    S:X  d  M  UR                  STR                  5      v   M.     g7f)typeApexAMPOptimizerHook	opt_levelN)r?   r   ).0itemrE   s     r$   	<genexpr>-DeepspeedHook.prepare_args.<locals>.<genexpr>T  s=      $O0@'+F|7M'M %ODHH[$:M:M$N$N0@s   7#7O1bf16)r=   
dataloaderr?   per_device_train_batch_sizemax_grad_normr>   learning_rater   r   r   r   fp16fp16_full_evalr   r   r   nexthooksr  )r!   rE   s    `r$   prepare_argsDeepspeedHook.prepare_argsF  s   +/::+@+@+D+D !,%(!ZZ^^K=!ZZ1155dDA**..22<E**..22<G JJ0044^TJ JJ0044^SIJJNN:u5	"jjnnZ? JJNN>5A!%0CU!K"jjnn-=tD" $O04

0@0@$O $(#6#68 """&DJJNN651	r'   c                 N   [        5       u  oBl        U R                  U5        [        R                  R                  U R                  5      (       a  U R                  nO4[        R                  R                  UR                  U R                  5      n[        R                  R                  U5      (       d  [        SU R                   S35      eU R                  R                  SU 35        [        U5      nUR                  U5        UR                  X!R                  U5        U$ )Nz$No such DeepSpeed json config file: .zLoading deepspeed config from )r   
world_sizer  rz   r{   existsr   rD   	model_dirr   rX   rY   r   trainer_config_processrK   rF   )r!   rZ   rE   	max_steps_r   	ds_configs          r$   get_deepspeed_config"DeepspeedHook.get_deepspeed_config\  s    *_?$77>>$//00#44!ww||G,=,=,0,A,A Cww~~.//6t7L7L6MQO  	9:J9KLM#$45	((.))$yIr'   c                     [        S5        [        5       n[        SU 35      Ul        UR                  R                  UR                  5        S UR                  [        R                  '   g )Npytorchzcuda:)	r   r   r   devicerF   toparallel_groupsr   DP)r!   rZ   
local_ranks      r$   
after_initDeepspeedHook.after_initp  sR    )#%
&zl';<(>B 7 : :;r'   c                     g r   rM   r   s     r$   
before_valDeepspeedHook.before_valw  r   r'   c                    [        US5      (       d  [        5       U l        OUR                  U l        UR                  nUR                  R
                  R                  S0 5      R                  SS5      Ul        UR                  UR                  -  n[        R                  " UR                  U-  5      nU R                  XU5      n[        XU5      u  pgUR                  nU R                  b  U R                  US   S'   US   R                  SS5      U R                   l        ["        R$                  " UR&                  UUUS9u  Ul        Ul        ol        g )	NrX   r0   r   rb   zero_optimizationstager   )rF   r>   r8   r\   )r7   r   rX   cfgr=   r>   r?   gradient_accumulation_stepsiters_per_epochr@   rA   _max_epochsr  r]   r8   r   r   r   
initializerF   r\   )
r!   rZ   rE   num_update_steps_per_epochr  r  r>   r\   r8   r  s
             r$   
before_runDeepspeedHook.before_runz  s-   w))$,DK!..DK {{+/::+?+?+C+Cr,3115 	(%,%<%<@`@`%`"IIg114NNO	--gYG	"7	#+	!!??&37??F&'0$*+>$?$C$CGQ$O!DMDXDX--%	E'Aw(!-Ar'   )r   r   rX   r   r   ro   r   )NTFTN)rN   rO   rP   rQ   r   	VERY_HIGHPRIORITYr   r   r  r  r"  r%  r0  rS   rM   r'   r$   r   r     sB    !!H 48&+ %#62,(C'r'   r   )3r@   rz   r   	functoolsr   r   r   r   megatron_utilr   r   transformers.deepspeedr   modelscope.metainfor   modelscope.trainers.hooksr	   !modelscope.trainers.hooks.builderr
   4modelscope.trainers.hooks.checkpoint.checkpoint_hookr   r   9modelscope.trainers.hooks.checkpoint.checkpoint_processorr   modelscope.trainers.hooks.hookr   +modelscope.trainers.hooks.lr_scheduler_hookr   r   (modelscope.trainers.hooks.optimizer.baser   r   "modelscope.trainers.hooks.priorityr   modelscope.utils.checkpointr   modelscope.utils.constantr   modelscope.utils.devicer   modelscope.utils.loggerr   modelscope.utils.torch_utilsr   r   r   r   r]   r_   register_moduler   rM   r'   r$   <module>rF     s     	  #   % + ; % 8 3' /OJ 7 7 = 1 .5 5F. FR#.R,.B+Rj 5#6#67{'D {' 8{'r'   