
    9i                         S SK r S SKJr  S SKJrJrJrJr  S SKrS SKJ	r	  S SK
Jr  S SKJrJr  S SKJr  S SKJr  S S	KJr  S S
KJr  S SKJr  S SKJr  \R6                  " \R8                  S9 " S S\5      5       rg)    N)deepcopy)AnyDictListUnion)nn)Trainers)Model
TorchModel)GPT3ForTextGeneration)TRAINERS)NlpEpochBasedTrainer)build_parallel)Config)is_megatron_initialized)module_namec                      ^  \ rS rSrS\4U 4S jjrS\\R                  \	4   4S jr
S rS rS\S	\\\4   S\\\4   4S
 jrS\S	\\\4   S\\\4   4S jrS\	4S jrSrU =r$ )GPT3Trainer   cfgc                    > [         TU ]  U5      n[        [        R                  R                  SS5      5      UR                  l        U$ )NRANKr   )superrebuild_configintosenvirongetmodelrank)selfr   	__class__s     d/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/trainers/nlp/gpt3_trainer.pyr   GPT3Trainer.rebuild_config   s6    g$S)RZZ^^FA67		
    returnc                    U R                   R                  SS 5      bY  [        U R                   S   5      nUR                  [	        U[
        R                  R                  5       /S95        [        U5      $ [	        SUS[
        R                  R                  5       /S9n[        5       (       aD  SSK
Jn  UR                  [
        R                  R                  5       UR                  5       S.5        [        U5      $ )	Nparallel)module
device_idsDistributedDataParallelT)typer)   find_unused_parametersr*   r   )mpu)output_deviceprocess_group)r   r   r   updatedicttorchcudacurrent_devicer   r   megatron_utilr.   get_data_parallel_group)r!   r   dp_cfgr.   s       r#   to_parallelGPT3Trainer.to_parallel   s    88<<
D)5dhhz23FMMEuzz/H/H/J.KLN!&))*#'

1134	6 #$$)MM!&!:!:!<!$!<!<!> 
 f%%r%   c                 l    U R                   R                  nUR                  UR                  5       5      $ N)eval_preprocessor	tokenizer
detokenizetolist)r!   tokensr>   s      r#   _decodeGPT3Trainer._decode3   s*    **44	##FMMO44r%   c                     U R                   (       a  U R                  R                  OU R                  nUR                  5         SU;   a  U R	                  X!5      $ U R                  X!5      $ )N
inputs_len)_distr   r)   eval_generate_eval_forward_eval)r!   datar   s      r#   evaluation_stepGPT3Trainer.evaluation_step7   sP    %)ZZ

!!TZZ

4&&u33%%e22r%   r   rJ   c           	      P   UR                  SSS9  UR                  U5      nUS   n[        US   U5       VVs/ s H  u  pVU R                  XVS  5      PM     snnUS'   [        US   U5       VVs/ s H  u  pVU R                  XVS-
  S  5      PM     snnUS'   U$ s  snnf s  snnf )	N   g        )top_ktop_pprompts_len	sequencespredslabelstgts)r1   generateziprB   )r!   r   rJ   resultrQ   seqskip_lens          r#   rH   GPT3Trainer._generate_eval@   s     	!2&%!%m!4 "%VK%8+!F
!F LLY(!F
w "%T(^[!A
!A LL\]+,!A
V 

s   B1"B"c                 $    UR                  U5      $ r<   )forward)r!   r   rJ   s      r#   rI   GPT3Trainer._forward_evalQ   s    }}T""r%   c                 ~    [         R                  " U R                  U R                  U R                  R                  S9$ )N)cfg_dictmegatron_cfg)r
   from_pretrained	model_dirr   megatron)r!   s    r#   build_modelGPT3Trainer.build_modelU   s0    $$NNTXXDHH<M<MO 	Or%    )__name__
__module____qualname____firstlineno__r   r   r   r   Moduler   r9   rB   rK   r   r   strr   rH   rI   re   __static_attributes____classcell__)r"   s   @r#   r   r      s    & 
&E"))Z*?$@ &.53$9 !#s(^04S#X"##8 # cN#/3CH~#OZ O Or%   r   )r   copyr   typingr   r   r   r   r3   r   modelscope.metainfor	   modelscope.models.baser
   r   modelscope.models.nlpr   modelscope.trainers.builderr   modelscope.trainers.nlp_trainerr   $modelscope.trainers.parallel.builderr   modelscope.utils.configr   modelscope.utils.megatron_utilsr   register_modulegpt3_trainerr   rg   r%   r#   <module>r|      sd    
  ) )   ( 4 7 0 @ ? * C 
h&;&;<BO& BO =BOr%   