
    9i                         S SK r S SKJr  S SKrS SKJr  S SKJr  S SKJr  S SK	J
r
  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr  SSKJr  SSKJr  \R2                  " \
R4                  S9 " S S\5      5       rg)    N)Union)DeepSpeedEngine)mpu)nn)Trainers)
TorchModel)DistributedPlug)BertLayerNorm)TextGenerator)ModeKeys   )TRAINERS)NlpEpochBasedTrainer)module_namec                       \ rS rSrS\\R                  \4   4S jrS\\R                  \4   4S jr	S r
S rS rS rS	 rS
rg)PlugTrainer   returnc                    [        [        R                  R                  SS5      5      n[        R                  R                  SS5      n[        R                  R                  SS5      n[	        U R
                  U4UUS.U R                  R                  D6nU R
                  U R                  UR                  5      l        UR                  $ )N
LOCAL_RANKMASTER_ADDRz	127.0.0.1MASTER_PORT29500)	master_ipmaster_port)	intosenvirongetr	   	model_dircfgmodelunwrap_module)selfrankr   r   r#   s        d/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/trainers/nlp/plug_trainer.pybuild_modelPlugTrainer.build_model   s    2::>>,34JJNN=+>	jjnn]G<NN  #	
 hhnn 59NN5;;'1{{    c                     SSK Jn  U" U5      $ )Nr   )DistributedDataParallel) modelscope.utils.nlp.distributedr,   )r%   r#   DDPs      r'   to_parallelPlugTrainer.to_parallel#   s    S5zr*   c           	         S/ 0n/ SS.nUR                  5        GH>  n[        U[        [        R                  R
                  45      (       aM  US   R                  [        UR                  R                  5       5       Vs/ s H
  nUc  M  UPM     sn5        M  US   R                  [        UR                  R                  5       5       VVs/ s H$  u  peUc  M
  SU;  d  M  SU;  d  M  US:w  d  M"  UPM&     snn5        US   R                  [        UR                  R                  5       5       VVs/ s H  u  peUc  M
  US:X  d  M  UPM     snn5        GMA     X#4$ s  snf s  snnf s  snnf )Nparams        )r2   weight_decay
mask_scoremaskbias)modules
isinstancer
   torchr   	LayerNormextendlist_parametersvaluesitems)r%   moduleweight_decay_paramsno_weight_decay_paramsmodule_pns          r'   )_get_params_for_weight_decay_optimization5PlugTrainer._get_params_for_weight_decay_optimization'   sf   'n,.!D~~'G'M5883E3E#FGG&x077#G$7$7$>$>$@A9A! A9 
 $H-44"&w':':'@'@'B"C6"C$! )5Q)> a ,-K "C6 
 'x077"&w':':'@'@'B"C9"C$! )*f "C9  (" #::9
6
9s<    E 
E 
	E%E%#E%+E%3E%5	E+E+
E+c           	      X   U R                   u  pU R                  R                  R                  SS 5      nUb  UR	                  S0 5      nSSKJn  U R                  nUR                  R                  R                  R                  nUR                  R                  R                  R                  R                  nUR                  R                  R                  R                  n	/ n
U
[        U R                  U5      5      -  n
U
[        U R                  U5      5      -  n
U
[        U R                  U	5      5      -  n
U
 H)  nUS    H  n[!        US5      (       a  M  SUl        M     M+     U" U
UR$                  UR&                  S9nU R                  R                  R                  S	S 5      nUb  Uc   eUR	                  S0 5      nSS
KJn  U R,                  nU" UUR$                  UR.                  U-  UUR0                  SS9nXl        X l        U R2                  U R4                  WW4$ )N	optimizeroptionsr   )DeepSpeedCPUAdamr2   model_parallelF)lrr4   lr_scheduler)AnnealingLRr   )start_lrwarmup_iter	num_itersdecay_style	last_iter)
optimizersr"   trainr    popdeepspeed.ops.adamrL   r#   rA   bert
embeddingsencoderlayerdecoderr=   rG   hasattrrM   rN   r4   &modelscope.models.nlp.plug.AnnealingLRrP   	max_iterswarmuprT   rJ   rO   )r%   rJ   rO   optimizer_cfgoptim_optionsrL   r#   r[   layers
dec_layersparam_groupsparam_groupparamlr_scheduler_cfg
lr_optionsrP   rS   s                    r'   create_optimizer_and_scheduler*PlugTrainer.create_optimizer_and_scheduler>   s   "&//	**;=$)--i<M7

\\'',,77
##((0066\\''//77
::6BD 	D:::FH 	H:::FH 	H (K$X.u&677+0E( / ( %&335	
  88>>--ndC'((()--i<JFNN	""%%(//);(44 #(~~t00-KKr*   c                    UR                  5       u  p4Sn[        R                  " [        R                  " XTU4UR                  S95      R                  USUU5      n[        R                  " UR                  5       [        R                  UR                  S9nSXqU:H  '   [        R                  " U[        R                  UR                  S9nUR                  S5      R                  U5      nXgU4$ )N   )device)dtyperp   r3   r   )sizer:   trilonesrp   viewfloatarangelong	unsqueeze	expand_as)	r%   data	eod_token
batch_size
seq_lengthatt_mask_batchattention_mask	loss_maskposition_idss	            r'   _get_masks_and_position_ids'PlugTrainer._get_masks_and_position_idsn   s    !%
 JJJ?"kk+,,0DJ1;-= 	 JJIIKu{{4;;@	'*	)#$ ||ejj>#--a0::4@,66r*   c           
         [         R                  U l        [        U R                  R
                  SS5      nUS   S S 2S S24   R                  5       nUS   S S 2SS 24   R                  5       nU R                  US5      u  pgn[        U R                  R
                  SS 5      (       a  UR                  5       nU" US   S US	   UUUUS
9u  p[        R                  " U
R                  5       R                  5       U5      nUR                  S5      n[        R                  " UR                  S5      U-  5      UR                  5       -  nSU0U l        U R                   R#                  U R                  5        g )Ncheckpoint_activationsTlabelsr   ro   r   fp16	input_idsr   )r   loss)r   TRAIN_modegetattrr"   rW   
contiguousr   halfr   vocab_parallel_cross_entropyrv   ru   r:   sumtrain_outputs
log_bufferupdate)r%   r#   inputsr   
tgt_tokens
tgt_labelstgt_attention_maskdec_loss_maskr   _outputlossesr   s                r'   
train_stepPlugTrainer.train_step   s]   ^^
!()A4"IH%a"f-88:
H%ae,779
:>:Z:Z;7<488>>6400!3!8!8!: ;#$#9;	 11&2C2C2E2K2K2M2<>%**2.yyR=89M<M<M<OO %d^t112r*   c                 n   [        U R                  [        5      (       a  U R                  R                  nOU R                  nUR	                  5         U R                  U R                  5      R                  R                  nUS   R                  S   n[        UU R                  R                  S 5      n[        R                  " 5          US   R                  5       nUS   R                  5       nUS   R                  5       nUS S 2SS 24   R!                  5       n	US U/n
UR#                  U
5      nUS   nU	R%                  5       R'                  5       R)                  5       n/ US'   / US'   [+        U5       H  nX   S   nS	XUS-
  :  '   UR%                  5       R'                  5       R)                  5       nU R                  R-                  X   S
S9nU R                  R-                  US
S9nUS   R/                  U5        US   R/                  U5        M     S S S 5        U$ ! , (       d  f       W$ = f)Nr   r   r   r   ro   predictionspredstgtsd   T)skip_special_tokens)r9   r#   r   rA   evalr$   configoriginal_vocab_sizeshaper   eval_preprocessornlp_tokenizerr:   no_gradrx   byter   translate_batchcpunumpytolistrangedecodeappend)r%   r{   r#   
vocab_sizer}   beam_generatortokenspadding_mask
target_idstarget_labelsencoder_inputsresult	pred_listtarget_listipred_idsgold_stringpred_strings                     r'   evaluation_stepPlugTrainer.evaluation_step   s	    djj/22JJ%%EJJE

 ''

3::NN
+&,,Q/
&u'+'='='K'K'+- ]]_+&++-F 01668Lh,,.J&q!"u-88:M$dL9N#33NCF}-I'++-335<<>K F7ODL:&$<?69JN23#<<>//188:"44;;N < >"44;;$ < 8w&&{3V##K0 ' , - _, s   
EH%%
H4)r   rO   rJ   r   N)__name__
__module____qualname____firstlineno__r   r   Moduler   r(   r/   rG   rl   r   r   r   __static_attributes__ r*   r'   r   r      sQ    U299j#89 E"))Z*?$@ ;..L`7,3>'r*   r   )r   typingr   r:   	deepspeedr   megatron_utilr   r   modelscope.metainfor   modelscope.models.baser   modelscope.models.nlp.plugr	   #modelscope.models.nlp.plug.backboner
   $modelscope.models.nlp.plug.generatorr   modelscope.utils.constantr   baser   nlp_trainerr   register_modulenlp_plug_trainerr   r   r*   r'   <module>r      s`    	   %   ( - 6 = > .  . 
h&?&?@v& v Avr*   