
    9i                        S SK r S SKrS SKJr  S SKJr  S SKJr  S SKJ	r	  S SK
JrJrJrJrJrJr  S SKrS SKrS SKJr  S SKJr  S S	KJrJrJr  S S
KJr  S SKJr  S SKJr  S SK J!r!  S SK"J#r#J$r$  S SK%J&r&  S SK'J(r(J)r)  S SK*J+r+  S SK,J-r-  S SK.J/r/  S SK0J1r1  S SK2J3r3  S SK4J5r5  S SK6J7r7J8r8  S SK9J:r:  S SK;J<r<  S SK=J>r>J?r?J@r@  S SKAJBrBJCrCJDrDJErEJFrFJGrGJHrHJIrIJJrJ  S SKKJLrL  S SKMJNrN  S SKOJPrP  S SKQJRrR  S S KSJTrT  S S!KUJVrV  S S"KWJXrXJYrYJZrZJ[r[J\r\J]r]J^r^J_r_  S#S$K`Jara  S#S%KbJcrc  S#S&KdJereJfrfJgrg  S#S'KhJiri  S#S(KjJkrk  S#S)KlJmrm  \S*   rn\cR                  " \!R                  S+9 " S, S-\a5      5       rqS. rrg)/    N)Mapping)deepcopy)LooseVersion)partial)CallableDictListOptionalTupleUnion)distributed)nn)
DataLoaderDatasetSampler)default_collate)DistributedSampler)check_local_model_is_latest)Trainers)build_metrictask_default_metrics)PredictionSavingWrapper)Model
TorchModel)TorchCustomDataset)build_custom_dataset)	MsDataset)ModelOutputBase)Preprocessor)HOOKS)Priorityget_priority)build_lr_scheduler)build_optimizer)Config
ConfigDictJSONIteratorEncoder)	DEFAULT_MODEL_REVISIONConfigFields
ConfigKeysDistributedParallelTypeInvokeModeKeys	ModelFile
ThirdPartyTrainerStages)	to_device)create_device)func_receive_dict_inputs)is_swift_available)
get_logger)build_from_cfg)compile_modelget_dist_infoget_local_rank	init_distis_dist	is_masteris_on_same_deviceset_random_seed   )BaseTrainer)TRAINERS)	merge_cfgmerge_hooks
update_cfg)Hook)build_parallel)is_parallel)zswift.SwiftConfigzswift.PeftConfig)module_namec                      ^  \ rS rSrSrSSSSSSSSS\SSSS4S\\\\	R                  \4      S\\   S\\   S	\\   S
\\\\\\4   4      S\\\\4      S\\\\4      S\\\\\\4   4      S\\R&                  R(                  \R&                  R*                  R,                  4   S\\   S\S\\\      S\\\\\\4   4      S\\\\4   \4   4U 4S jjjrS rS rS]S jrS^S jr S^S jr!S r"S\#4S jr$\%S 5       r&\%S 5       r'\%S 5       r(S  r)S! r*S" r+\%S# 5       r,\%S$\\   4S% j5       r-\%S$\4S& j5       r.\%S$\4S' j5       r/\%S$\4S( j5       r0\%S) 5       r1\%S* 5       r2\%S+ 5       r3 S^S,\\\\\   4   S-\#S.\S\\   4S/ jjr4S0\S.\S\S$\54S1 jr6\7 S^S-\#S.\S\4S2 jj5       r8S$\\\4   4S3 jr9S$\\\\4      4S4 jr:S5 r;  S_S6 jr<  S`S7\\\\   4   4S8 jjr=SS9 jr>\%S: 5       r?S$\\	R                  \4   4S; jr@S$\\	R                  \4   4S< jrAS$\\	R                  \4   4S= jrBS> rCS? rDS@ rESA rFS7\\\\   4   4SB jrGS^S\HSC\I4SD jjrJS^S\HSC\I4SE jjrKSF rLSG rM    SaS0\SH\SI\SJ\NSK\NS\S$\O4SL jjrPSM rQSN rRSO rSSP rTSQ\S$S4SR jrUSS\S$\4ST jrVSU rWSV rXSW\S$S4SX jrYSY rZSZ r[S$\4S[ jr\S\r]U =r^$ )bEpochBasedTrainer<   a<  Epoch based Trainer, a training helper for PyTorch.

Args:
    cfg_file(str): The local config file.
    model (:obj:`torch.nn.Module` or :obj:`TorchModel` or `str`): The model to be run, or a valid model dir
        or a model id. If model is None, build_model method will be called.
    data_collator (`Callable`, *optional*):
        The function to use to form a batch from a list of elements of `train_dataset` or `eval_dataset`.
    train_dataset (`MsDataset` or `torch.utils.data.Dataset`, *optional*):
        The dataset to use for training.

        Note that if it's a `torch.utils.data.IterableDataset` with some randomization and you are training in a
        distributed fashion, your iterable dataset should either use a internal attribute `generator` that is a
        `torch.Generator` for the randomization that must be identical on all processes (and the Trainer will
        manually set the seed of this `generator` at each epoch) or have a `set_epoch()` method that internally
        sets the seed of the RNGs used.
    eval_dataset (`MsDataset` or `torch.utils.data.Dataset`, *optional*): The dataset to use for evaluation.
    preprocessor (:obj:`Preprocessor`, *optional*): The optional preprocessor.
        NOTE: If the preprocessor has been called before the dataset fed into this trainer by user's custom code,
        this parameter should be None, meanwhile remove the 'preprocessor' key from the cfg_file.
        Else the preprocessor will be instantiated from the cfg_file or assigned from this parameter and
        this preprocessing action will be executed every time the dataset's __getitem__ is called.
    optimizers (`Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler._LRScheduler]`, *optional*): A tuple
        containing the optimizer and the scheduler to use.
    seed (int): The optional random seed for torch, cuda, numpy and random.
    max_epochs: (int, optional): Total training epochs.
    cfg_modify_fn: An input fn which is used to modify the cfg read out of the file.
    remove_unused_data: Automatically remove unused data keys in mini-batches.
        The remove action based on the `inspect` on the model's forward method, the removed columns will be
        moved to the mini-batch's attributes.
    compile (bool, optional): Compile the model with torch 2.0, default False
    compile_options (dict, optional): The compile options if compile=True,
        default None to use the default params of 'TorchModel.compile'.
    efficient_tuners (dict, optional): The tuners to use to train the model
    samplers: (:obj:`Sampler` or `Dict[Sampler]`, *optional*): samplers used in the train/eval DataLoader.
    Examples of cfg_modify_fn:
        >>> def cfg_modify_fn(cfg):
        >>>     cfg.preprocessor.first_sequence= 'text1'
        >>>     cfg.preprocessor.second_sequence='text2'
        >>>     return cfg
NNN*   modelcfg_filecfg_modify_fnarg_parse_fndata_collatortrain_dataseteval_datasetpreprocessor
optimizersmodel_revisionseed	callbackssamplersefficient_tunersc           
      8  > Xl         [        U R                   5        S U l        Xl        [        R
                  U l        / U l        SU l        SU l	        SU l
        SU l        UR                  SS5      U l        S U l        S U l        S U l        Xl        [%        U[&        5      (       aw  U R)                  XUR+                  [,        R.                  S 5      5      U l        Uc8  [2        R4                  R7                  U R0                  [8        R:                  5      nXl        OUc   S5       e[2        R4                  R?                  U5      U l        S U l        [A        US5      (       a`  [C        UR0                  [D        R.                  [D        RF                  [,        R.                  UR+                  [,        R.                  S 5      0S9  [H        TU ]  X$5        X0l&        [O        U RP                  5        U RS                  U RP                  5      U l(        SU;   a  U RP                  RU                  US   5        [W        U RP                  5      U l(        [%        U[X        [Z        R\                  45      (       a  Xl/        OU Ra                  5       U l/        U R                  (       a1  UR                  S5      nUc  0 n[c        U R^                  40 UD6U l/        UR                  S	S 5      Gb  US	   U l2        S
U RP                  ;  a  [g        5       U RP                  S
'   U Rd                  U RP                  S
   S	'   SU RP                  S
   ;   ap  SU RP                  S
   S   ;   a"  U Rd                  U RP                  S
   S   S   S'   SU RP                  S
   S   ;   a"  U Rd                  U RP                  S
   S   S   S'   SU RP                  S
   ;   a  U Rd                  U RP                  S
   S   S'   O+U RP                  Rh                  R                  S	S5      U l2        U Rk                  U5      u  U l6        U l7        [2        R4                  Rq                  U Rd                  5      (       d  [2        Rr                  " U Rd                  SS9  [2        R4                  R7                  U Rd                  SRu                  U Rv                  5      5      n[y        UU RP                  R                  SS5      S9U l=        U R|                  " S,UU RP                  [        R
                  U Rl                  S.UD6U l?        U R|                  " S,UU RP                  [        R                  U Rn                  S.UD6U lA        U R                  UUR                  SS5      S9u  U lC        U lD        UR                  SU RP                  R                  S5      5      U lF        U R                  c   S5       eUR                  SU RP                  R                  S5      5      U lG        UR                  S U RP                  R                  S!5      5      U lH        UR                  S"S5      U lI        UR                  S#5      U lJ        UR                  S$5      U lK        U R                  U5        0 U lM        U R                  b  U RP                  R                  S%5      (       dh  S&U RP                  Rh                  ;  a  / U RP                  Rh                  S&'   U RP                  Rh                  S&   R                  S'U R                  S(.5        [        U RP                  5      nU R                  U5        [        U5      (       a  U/nU=(       d    /  H  nU R                  U5        M     U R                  [        R                  5        U R                  5       =(       a#    [        R                  " U R                  5      S):  U lZ        U R                  5       U l\        U R                  (       d  U R                  b  U R                  OS*n[        U5      U lK        U R                  R                  S+:X  a?  [        U R^                  5      (       a%  U R^                  R                  U R                  5        U R                  5         g )-Nr   Fcompilez?Config file should not be None if model is not from pretrained!	model_dir)
user_agentcfg_optionscompile_optionswork_dirtrain
checkpointperiodsave_dirbestloggingout_dirz
./work_dirT)exist_okz{}.log	log_levelINFO)log_filerk   datasets	model_cfgmoderU   remove_unused_data)rr   
max_epochsztrain.max_epochszzmax_epochs should be provided by the init arguments or configured in the `train.max_epochs` key in the configuration file.train_iters_per_epochztrain.train_iters_per_epochval_iters_per_epochzevaluation.val_iters_per_epochuse_fp16launcherdeviceztrain.hooks.DDPHookhooksDDPHook)typerw   r?   gpucuda )b_seedr>   _metric_valuesrV   r-   TRAIN_mode_hooks_epoch_iter_inner_iter_stop_trainingget_compiletrain_dataloadereval_dataloaderdata_loader	_samplers
isinstancestrget_or_download_model_dirpopr/   KEYr^   ospathjoinr.   CONFIGURATIONinput_model_iddirnamehasattrr   r,   LOCAL_TRAINERsuper__init__rP   rB   cfgrebuild_configmerge_from_dictrD   r   r   ModulerN   build_modelr7   rb   r&   rc   get_preprocessorstrain_preprocessoreval_preprocessorexistsmakedirsformat	timestampr5   loggerbuild_datasetrS   EVALrT   get_data_collatortrain_data_collatoreval_data_collatorsafe_get_max_epochs_train_iters_per_epoch_eval_iters_per_epochrv   rw   rx   tune_moduleparallel_groupsappendrC   register_hook_from_cfgcallableregister_hookinvoke_hookr0   
after_initis_dp_group_availabledistget_world_sizedp_group_distget_metricsmetricsr2   r{   r=   to	print_cfg)selfrN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   kwargsra   rm   ry   callbackdevice_name	__class__s                        [/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/trainers/trainer.pyr   EpochBasedTrainer.__init__h   s   . 


#"$^^
"$
#

9e4 $#!eS!!!;;vzz*..$'GIDN77<<(1(?(?A"''j)jj'WW__X6DN"&Duk**+OO

F$8$8"

:>>4(H  	0*$((&&txx0F"HH$$VM%:;dhh'ej"))455J))+DJ==$jj):;O&"$&tzzE_EDJ::j$'3":.DMdhh&$.L!,0MMDHHWj)txx00txx0>>&*mm HHW%l3H="$TXXg.|<<&*mm HHW%l3F;"$DHHW--:>--!),Y7 HHNN..z<HDM:>:P:P;7!7 ww~~dmm,,KK5 77<<xt~~/NO k6)JL "// "hh00	
  !.. !hh//	
  =A<R<R%zz*>F =S =H9 $"9 "::l&*hh&7&78J&KM+ 	h .h 	h+&,jj#HH;<'># &,ZZ!HH>?&A" 

:u5

:.jj*)*  "==$TXX->->%.' .' dhhnn,*,w'HHNN7#**! MM, 
 DHH%##E*I"I!RHx( (112 //1 d6I6IMM77
 '')##)-)@$++eK'4DK{{6).?

.K.K

dkk*    c                     UbB  [        5       (       d  [        S5      eSSKJn  UR	                  U R
                  U5      U l        g g )NzGPlease install swift by `pip install ms-swift` to use efficient_tuners.r   )Swift)r4   
ValueErrorswiftr   prepare_modelrN   )r   r[   r   s      r   r   EpochBasedTrainer.tune_module  sD    '%'' ]  $,,TZZ9IJDJ (r   c                    U R                   R                  S:X  as  U R                  R                  U R                   5        [	        U R                  5      (       d3  U R
                  (       a!  U R                  U R                  5      U l        gggg)z)Place model to device, or to DDP
        r}   N)rx   r{   rN   r   rG   r   to_parallelr   s    r   place_modelEpochBasedTrainer.place_model&  sb     ;;v%JJMM$++&tzz**tzz!--djj9
 0:* &r   c                   ^  Su  p4[        U[        5      (       a  [        R                  U;   a;  [        U[        R                     [        5      (       d   eU[        R                     n[        R
                  U;   a;  [        U[        R
                     [        5      (       d   eU[        R
                     nOUc  [        OUnUnUnU(       a$  SSKJn  U 4S jnU" 5       nU" X85      nU" UU5      nX44$ )a  Get the data collator for both training and evaluating.

Args:
    data_collator: The input data_collator param.
    remove_unused_data: Remove the unused data with 'RemoveColumnsCollator'.
Returns:
    The train_data_collator and eval_data_collator, can be None.
rL   r   )RemoveColumnsCollatorc                     > [         R                  " TR                  R                  5      n [	        U R
                  R                  5       5      $ N)inspect	signaturerN   forwardlist
parameterskeys)r   r   s    r    _set_signature_columns_if_neededMEpochBasedTrainer.get_data_collator.<locals>._set_signature_columns_if_neededH  s7    #--djj.@.@A	I0055788r   )	r   r   r*   rc   r   valr   modelscope.utils.data_collatorsr   )	r   rR   rr   r   r   
collate_fnr   r   model_inputss	   `        r   r   #EpochBasedTrainer.get_data_collator.  s     3=/mW--=0!-
0@0@"A8LLLL&3J4D4D&E#~~.!-
"?JJJJ%2:>>%B",9,A}J",!+M9 <=L"7##3!67I7C"E"66r   c                 D    Ub  [        U5        [        5       u  p#US:  nU$ )zInit dist and returns the dist information.

Args:
    launcher: The launcher info.

Returns:
    _dist: If world_size is greater than 1.
r?   )r:   r8   )r   rw   _
world_sizer   s        r   r:   EpochBasedTrainer.init_distS  s)     h%Qr   c                 b    Ub  UOSn[        5       (       a  [        5       nSU 3n[        U5      $ )zxGet the device information.

Args:
    device: The input device info.

Returns:
    device_name: The final device name.
r|   zcuda:)r;   r9   r2   )r   rx   r   
local_ranks       r   
get_deviceEpochBasedTrainer.get_devicec  s6     !' 2f99')J!*.K[))r   c                    SnSn[        U[        5      (       a  UnUnGO[        U[        5      (       a  [        R                  U;   a;  [        U[        R                     [
        5      (       d   eU[        R                     n[        R                  U;   a;  [        U[        R                     [
        5      (       d   eU[        R                     nOR[        U R                  [        R                  5      (       a)  U R                  R                  b  U R                  5       u  p#Ub  [        R                  Ul        Ub  [        R                  Ul        X#4$ )zGet the preprocessors information.

Args:
    preprocessor: The input preprocessor info.

Returns:
    The train_preprocessor and eval_preprocessor, can be None.
N)r   r   r   r*   rc   r   r   r   r   r)   rU   build_preprocessorr-   r   rq   r   )r   rU   r   r   s       r   r   #EpochBasedTrainer.get_preprocessorss  s    " lL11!- ,g..</!,z/?/?"@(KKKK%1*2B2B%C"~~-!,z~~">IIII$0$@!TXX|88  8800<484K4K4M1)&.nn#(%-]]"!44r   r   c                 d    [        U S5      (       a  U R                  b  U R                  U5      nU$ )zjA method used to rebuild the config, any subclass can override this method.

Returns: The rebuilt config

rP   )r   rP   r   r   s     r   r    EpochBasedTrainer.rebuild_config  s1     4))d.@.@.L$$S)C
r   c                 <    U R                   [        R                     $ )z
Get the data parallel group.
)r   r+   DPr   s    r   r   EpochBasedTrainer.dp_group      
 ##$;$>$>??r   c                 <    U R                   [        R                     $ )z 
Get the tensor parallel group.
)r   r+   TPr   s    r   tp_groupEpochBasedTrainer.tp_group  r   r   c                 <    U R                   [        R                     $ )z"
Get the pipeline parallel group.
)r   r+   PPr   s    r   pp_groupEpochBasedTrainer.pp_group  r   r   c                 <    [         R                  U R                  ;   $ )z5
Get whether the data parallel group is initialized.
)r+   r   r   r   s    r   r   'EpochBasedTrainer.is_dp_group_available       '))T-A-AAAr   c                 <    [         R                  U R                  ;   $ )z7
Get whether the tensor parallel group is initialized.
)r+   r   r   r   s    r   is_tp_group_available'EpochBasedTrainer.is_tp_group_available  r  r   c                 <    [         R                  U R                  ;   $ )z9
Get whether the pipeline parallel group is initialized.
)r+   r   r   r   s    r   is_pp_group_available'EpochBasedTrainer.is_pp_group_available  r  r   c                     U R                   $ r   )r   r   s    r   rq   EpochBasedTrainer.mode  s    zzr   returnc                     U R                   $ )z.list[:obj:`Hook`]: A list of registered hooks.)r   r   s    r   ry   EpochBasedTrainer.hooks       {{r   c                     U R                   $ )zint: Current epoch.)r   r   s    r   epochEpochBasedTrainer.epoch  r  r   c                     U R                   $ )zint: Current iteration.)r   r   s    r   iterEpochBasedTrainer.iter  s     zzr   c                     U R                   $ )zint: Iteration in an epoch.)r   r   s    r   
inner_iterEpochBasedTrainer.inner_iter       r   c                     U R                   $ )zint: Maximum training epochs.)r   r   s    r   rs   EpochBasedTrainer.max_epochs  r  r   c                 4    U R                   U R                  -  $ )z!int: Maximum training iterations.)r   iters_per_epochr   s    r   	max_itersEpochBasedTrainer.max_iters  s     $"6"666r   c                 6  ^  U 4S jnT R                   [        R                  :X  a+  T R                  b  T R                  $ U" T R                  5      $ T R                   [        R
                  :X  a+  T R                  b  T R                  $ U" T R                  5      $ g)z"int: Total iterations of one epochc                    >  [        U 5      $ ! [         a+  nTR                  R                  U5        [	        S5      eS nAff = f)NzPlease implement ``__len__`` method for your dataset, or add `train_iters_per_epoch` and `train_iters_per_epoch` to your configuration file or kwargs)len	Exceptionr   errorr   )r   er   s     r   _get_data_len8EpochBasedTrainer.iters_per_epoch.<locals>._get_data_len  sG    <;'' <!!!$ ;< <<s   
 
A&>AN)rq   r-   r   r   r   r   r   r   )r   r'  s   ` r   r  !EpochBasedTrainer.iters_per_epoch  s    	< 99&**6222$T%:%:;;YY(--'))5111$T%9%9::	 (r   ro   rp   rq   c           	      4    U(       d  [         R                  X#US9$ [        U[        5      (       a  U$ [        U[        5      (       a3  UR
                  (       d  UR                  " SUUUS.UD6  UR                  $ [        U[        5      (       a|  [        US   [        5      (       ad  / nU HE  nUR
                  (       d  UR                  " SUUUS.UD6  UR                  UR                  5        MG     [        SUUSS.UD6nXl
        U$ U[        R                  :X  a  SOSn	UR                  SU	 35      n
U
c  0 n
[        UR                  R                   UUUS	9nUR#                  U
5        [%        UUR&                  5      nXl
        U$ ! [(         a  n[+        S
U5        [        U[        [,        45      (       d  UbJ  [        U4UUS.[/        US5      (       a  [1        UR                  R                   S9O0 D6nXl
        Us SnA$ Us SnA$ SnAff = f)a  Build input datasets by given model configuration and preprocessor.

Args:
    datasets (Union[Dataset, MsDataset, List[Dataset]]): The input datasets.
    model_cfg (Config): The model configuration.
    mode (str): `train`, `eval` or `inference`. See modelscope.utils.constant.ModeKeys
    preprocessor (Preprocessor, Optional): The preprocessor for input data samples.

Returns:
    Preprocessed datasets.
)rp   rq   rU   
custom_cfgrU   rq   r   N)ro   rq   rU   rc   r   zdataset.)r{   rq   ro   rU   z** build_dataset error log:rq   rU   rN   )r{   r~   )rJ   build_dataset_from_cfgr   r   r   	is_customto_custom_datasetds_instancer	   r   trainerr-   r   r   r&   rN   r{   updater   taskr$  printr   r   dict)r   ro   rp   rq   rU   r   custom_datasetsdatasettorch_custom_datasetdataset_mode_keydata_configdata_build_configcustom_datasetr&  s                 r   r   EpochBasedTrainer.build_dataset  sE   ">	 (??' @ O O ($677Hi00)).. "#,%1!" !	"
  +++Hd++
QK1, 1,"$'G",,11 &'0)5!%& %	&
 $**7+>+>?  ( (: (,!%( 	($
 04,++.2hnn.D7% '008<L;M1NO&"$K$."--%!-	%/!
 "((5!56G6?nn"F)-&%% 	 /3(T5M22l6N!3"6!-"6 ;B!7;, ;,t!5!5613"6 *.&%%	 sC   F F AF <BF A?F 
HA.HHHHHr8  c                 d    U R                   R                  S5        [        U4X#S.UD6nXl        U$ )z
@deprecated
This method is deprecated and may be removed in future releases, please use `build_dataset()` instead. Could be
compatible with methods that override the to_task_dataset in other classes.
zLThis to_task_dataset method is deprecated, please use build_dataset instead.r-  )r   warningr   r2  )r   r8  rq   rU   r   task_datasets         r   to_task_dataset!EpochBasedTrainer.to_task_datasetO  sG     	Z	
 *EE=CE#r   c                    S nU R                  S5      nU R                  SSS9nU R                  SU 35      nU(       a  U(       d  U$ [        R                  " UUUU S9nUR                  (       d  UR	                  XUS9  UR
                  $ )Nzdataset.namezdataset.subsetdefault)rE  zdataset.split_)dataset_namesubset_namesplitr,  r+  )r   r   loadr/  r0  r1  )rp   rq   rU   r8  rF  rG  
split_names          r   r.  (EpochBasedTrainer.build_dataset_from_cfg`  s      )).9(()99(M''.(?@
:N..%# 	"
   %%$d & L """r   c                     [         R                  " U R                  U R                  [        R
                  S9n[         R                  " U R                  U R                  [        R                  S9nX4$ )zBuild train and eval preprocessor.

User can override this method to implement custom logits.

Returns: The train preprocessor and eval preprocessor instance.

)cfg_dictpreprocessor_mode)r   from_pretrainedr^   r   r-   r   r   )r   r   r   s      r   r   $EpochBasedTrainer.build_preprocessoru  sZ     *99NNXX&nn. )88NNTXXP!44r   c                    [        U R                  S5      (       aE  [        U R                  R                  S5      (       a   U R                  R                  R                  OSnUb  UO)[        R
                  " U R                  R                  5      nUc0  U R                  b#  [        SU R                  R                   S35      e[        U[        [        45      (       a  U/nU$ )a  Get the metric class types.

The first choice will be the metrics configured in the config file, if not found, the default metrics will be
used.
If no metrics is found and the eval dataset exists, the method will raise an error.

Returns: The metric types.


evaluationr   NzwMetrics are needed in evaluation, please try to either add metrics in configuration.json or add the default metric for .)r   r   rR  r   r   r   r4  rT   r   r   r   r   )r   r   s     r   r   EpochBasedTrainer.get_metrics  s     29HHl2$ 2$(/0C0C09); ); ((%%--@D 	 %0'6J6N6NHHMM7?t00<SSWS[S[S`S`Raabd  gW~..iGr   c                   ^ Ub}  SSK Jm  [        [        U4S jU R                  5      5      n[        U5      S:X  a)  T" 5       nU R                  U5        UR                  U5        XS   l        X$S   l	        X4S   l
        g g )Nr   LoadCheckpointHookc                    > [        U T5      $ r   )r   )hookrW  s    r   <lambda>?EpochBasedTrainer.set_checkpoint_file_to_hook.<locals>.<lambda>  s    Jt5G$Hr   )modelscope.trainers.hooksrW  r   filterry   r#  r   r   checkpoint_fileload_all_statestrict)r   checkpoint_pathr_  r`  load_ckpt_hooksload_ckpt_hookrW  s         @r   set_checkpoint_file_to_hook-EpochBasedTrainer.set_checkpoint_file_to_hook  s    &D"Hzz#$O ?#q(!3!5"">2&&~61@A.0>A-(.A% 'r   c                    [         R                  U l        U R                  5       U l        U R                  U l        U R                  5         U R                  5         U R                  5         U R                  XUR                  SS5      5        U R                  R                  5         U R                  U R                  5        g)a0  Start training.

Args:
    checkpoint_path(`str`, `optional`): The previous saving checkpoint to read,
        usually it's a `some-file-name.pth` file generated by this trainer.
    load_all_state(`bool`: `optional`): Load all state out of the `checkpoint_path` file, including the
        state dict of model, optimizer, lr_scheduler, the random state and epoch/iter number. If False, only
        the model's state dict will be read, and model will be trained again.
    kwargs:
        strict(`boolean`): If strict, any unmatched keys will cause an error.
r`  FN)r-   r   r   get_train_dataloaderr   r   register_optimizers_hookregister_processorsprint_hook_inford  r   rN   rc   
train_loop)r   ra  r_  argsr   s        r   rc   EpochBasedTrainer.train  s    " ^^
 $ 9 9 ;00%%'  "(()/He)D	F

--.r   predict_datasetsc                 J   U R                  5         U R                  5         Ub  SSKJn  UR	                  X0US9  U R
                  R                  5         [        R                  U l	        U R                  U5      n[        US9/nU H	  nXl        M     U R                  Xg5        g)a  Start prediction.

Args:
    predict_datasets(Union[Dataset, List[Dataset]]): The datasets used to predict ground truth.

    saving_fn(`Callable`): The callable used to save the prediction values to files. Like:
        >>> class SavingFn:
        >>>     def __init__(self):
        >>>         self.filename = '/tmp/results.txt'
        >>>
        >>>     def __call__(self, inputs, outputs):
        >>>         import numpy as np
        >>>         ids = inputs.ids
        >>>         predictions = np.argmax(outputs['logits'].cpu().numpy(), axis=1)
        >>>         with open(self.filename, 'a') as f:
        >>>             for id, pred in zip(ids, predictions):
        >>>                 f.writelines(f'{id}, {pred}')

        This saving_fn's result will not be collected to one file, Training with multiprocessing please
        consider combining these files manually.

    checkpoint_path(`str`, `optional`): The previous saving checkpoint to read,
        usually it's a `some-file-name.pth` file or a pure PyTorch `some-file.bin` file
        generated by this trainer.

    strict(`boolean`): If strict, any unmatched keys will cause an error.
Nr   rV  r`  	saving_fn)ri  rj  r\  rW  load_checkpointrN   evalr-   r   r   get_predict_dataloaderr   r2  evaluation_loop)	r   rn  rr  ra  r`  rW  predict_dataloadermetric_classesms	            r   predictEpochBasedTrainer.predict  s    @ 	  "&D..f / 6

]]
!889IJ1IFGAI   	/@r   c                 .   U R                  5         U R                  5         Ub&  SSKJn  UR	                  XUR                  SS5      S9  U R                  R                  5         [        R                  U l
        U R                  5       U l        U R                  U l        U R                   Vs/ s H  n[        U5      PM     nnUb  UR!                  [#        US95        U H	  nXl        M     U R'                  U R                  U5      nXl        U$ s  snf )a  Start evaluation.

Args:
    checkpoint_path(`str`, `optional`): The previous saving checkpoint to read,
        usually it's a `some-file-name.pth` file or a pure PyTorch `some-file.bin` file
        generated by this trainer.

    saving_fn(`Callable`): The callable used to save the prediction values to files. Like:
        >>> class SavingFn:
        >>>     def __init__(self):
        >>>         self.filename = '/tmp/results.txt'
        >>>
        >>>     def __call__(self, inputs, outputs):
        >>>         import numpy as np
        >>>         ids = inputs.ids
        >>>         predictions = np.argmax(outputs['logits'].cpu().numpy(), axis=1)
        >>>         with open(self.filename, 'a') as f:
        >>>             for id, pred in zip(ids, predictions):
        >>>                 f.writelines(f'{id}, {pred}')
    kwargs:
        strict(`boolean`): If strict, any unmatched keys will cause an error.
r   rV  r`  Frp  rq  )ri  rj  r\  rW  rs  r   rN   rt  r-   r   r   get_eval_data_loaderr   r   r   r   r   r   r2  rv  r   )	r   ra  rr  r   rW  metricrx  ry  metric_valuess	            r   evaluateEpochBasedTrainer.evaluate  s    . 	  "&D..fjj5.I / K

]]
#88://=A\\J\6,v.\J !!"9I"NOAI   ,,T-A-A-;= , Ks   -Dc                     U R                   $ r   )r   r   s    r   r  EpochBasedTrainer.metric_values&  s    """r   c                    [         R                  " U R                  U R                  S9n[	        U[
        R                  5      (       d  [        US5      (       a  UR                  $ [	        U[
        R                  5      (       a  U$ g)zInstantiate a pytorch model and return.

By default, we will create a model using config from configuration file. You can
override this method in a subclass.

)rM  rN   N)	r   rO  r^   r   r   r   r   r   rN   r   rN   s     r   r   EpochBasedTrainer.build_model*  s`     %%dnntxxH%++w0G0G;;ryy))L *r   c                 l   U R                   R                  SS 5      bY  [        U R                   S   5      nUR                  [	        U[
        R                  R                  5       /S95        [        U5      $ [	        SUS[
        R                  R                  5       /U R                  S9n[        U5      $ )Nparallel)module
device_idsDistributedDataParallelT)r{   r  find_unused_parametersr  process_group)
r   r   r   r3  r6  torchr}   current_devicerF   r   )r   rN   dp_cfgs      r   r   EpochBasedTrainer.to_parallel7  s    88<<
D)5dhhz23FMMEuzz/H/H/J.KLN!&))*#'

1134--) f%%r   c                     [        US5      (       a  U R                  UR                  5      $ [        U[        R
                  R                  5      (       d   eU$ )zLUnwrap the model until it's a naked nn.Module.

Args:
    model: An module.
r  )r   unwrap_moduler  r   r  r   r   r  s     r   r  EpochBasedTrainer.unwrap_moduleH  sF     5(##%%ell33eUXX__5555Lr   c                 L   UR                  5         [        R                  U l        [	        U R                  U R                  5      R                  5      n[        U[        5      (       a  U(       d  UR                  " S0 UD6nOUR                  U5      n[        U[        5      (       a  UR                  5       n[        U[        5      (       d  [        S5      eSU;  Ga  S/n[        / 5      nU H:  nUR                  UR!                  5        Vs/ s H  oU;   d  M
  UPM     sn5        M<     0 n	U H  nUR#                  US5      n
U
c  M  [%        5       (       aa  U
R&                  R)                  5       R+                  S5      n
[,        R.                  " U
R1                  [,        R2                  " 5       5      5        U	R                  XR5                  5       05        M     U R6                  R                  U	5        OU R6                  R                  US   5        X@l        gs  snf )a	  Perform a training step on a batch of inputs.

Subclass and override to inject custom behavior.

Args:
    model (`TorchModel`): The model to train.
    inputs (`Dict[str, Union[torch.Tensor, Any]]`):
        The inputs and targets of the model.

        The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
        argument `labels`. Check your model's documentation for all accepted arguments.

Return:
    `torch.Tensor`: The tensor with training loss on this batch.
z$"model.forward()" must return a dictlog_varslossNr}   r~   )rc   r-   r   r   r3   r  rN   r   r   r   r   to_dictr6  	TypeErrorsetr3  r   r   r;   datacloner   r   
all_reducediv_r   item
log_buffertrain_outputs)r   rN   inputsreceive_dict_inputsr  default_keys_pattern
match_keyskey_pkeyr  values              r   
train_stepEpochBasedTrainer.train_stepT  s   $ 	^^
 7tzz*224 fg&&/B!MM3F3M!MM&1Mm_55)113M-..BCC ]*$*8 RJ-!!$1$6$6$8I$8SSLS$8IK . H!%))#t4$yy %

 0 0 2 5 5f =

43F3F3H(IJOOS**,$78 " OO""8,OO""=#<=* Js   	H!
H!
c                 D    U R                   R                  S5        [        e)zDeprecated method
        z*This prediction_step method is deprecated.)r   warnNotImplementedError)r   rN   r  s      r   prediction_step!EpochBasedTrainer.prediction_step  s     	EF!!r   c                    U R                   c  Se0 nU R                  bK  [        U R                  [        5      (       a  U R                  [        R
                     OU R                  US'   U R                  " U R                   4U R                  U R                  U R                  S.UDU R                  R
                  R                  S0 5      D6nU$ )a  Builder torch dataloader for training.

We provide a reasonable default that works well. If you want to use something else, you can change
the config for data.train in configuration file, or subclass and override this method
(or `get_train_dataloader` in a subclass.
z!The train_dataset cannot be None.samplerr   rX   r   
dataloader)rS   r   r   r6  r*   rc   _build_dataloader_with_datasetr   r   r   r   r   )r   sampler_cfgr   s      r   rg  &EpochBasedTrainer.get_train_dataloader  s     %55>>%%/04&6 &6 &*^^  &";?>> 	" 994//	4
 4 hhnn  r24 r   c                    U R                   c  Se0 nU R                  bK  [        U R                  [        5      (       a  U R                  [        R
                     OU R                  US'   SS0nUR                  U R                  R                  R                  S0 5      5        U R                  " U R                   4U R                  U R                  U R                  S.UDUD6nU$ )zBuilder torch dataloader for evaluation.

We provide a reasonable default that works well. If you want to use something else, you can change
the config for dataset.eval in configuration file, or subclass and override this method in a subclass.
pass
z The eval_dataset cannot be None.r  shuffleFr  r  )rT   r   r   r6  r*   r   r3  r   rR  r   r  r   r   r   )r   r  default_configr   s       r   r}  &EpochBasedTrainer.get_eval_data_loader  s     $44>>%#-dnn.2$4 $4 &*^^& 9= 	" $U+dhh1155lBGH99..	
   r   c                    U R                  UU R                  [        R                  U R                  S9n0 nU R
                  bK  [        U R
                  [        5      (       a  U R
                  [        R                     OU R
                  US'   SS0nUR                  U R                  R                  R                  S0 5      5        U R                  " U4U R                  U R                  U R                   S.UDUD6nU$ )zBuilder torch dataloader for prediction with the config of evaluation.

Args:
    predict_datasets(Union[Dataset, List[Dataset]]): The datasets used to predict ground truth.
rn   r  r  Fr  r  )r   r   r-   r   r   r   r   r6  r*   r   r3  rR  r   r  r   r   r   )r   rn  r8  r  r  r   s         r   ru  (EpochBasedTrainer.get_predict_dataloader  s     $$%hh//	 % 1 >>%#-dnn.2$4 $4 &*^^& 9= 	" $U+dhh1155lBGH99..	
   r   default_argsc                      [        U R                  U R                  5      UUS9$ ! [         a7  nU R                  R                  SU S[        R                   S35        UeS nAff = f)Nr   r  z%Build optimizer error, the optimizer G is a torch native component, please check if your torch with version:  matches the config.)r$   r  rN   KeyErrorr   r%  r  __version__r   r   r  r&  s       r   r$   !EpochBasedTrainer.build_optimizer  sx    
	"""4::.)+ +  	KK7u =<<A<M<M;NNbd G	s   #& 
A'2A""A'c                      [        XS9$ ! [         a7  nU R                  R                  SU S[        R
                   S35        UeS nAff = f)Nr  z+Build lr_scheduler error, the lr_scheduler r  r  )r#   r  r   r%  r  r  r  s       r   r#   $EpochBasedTrainer.build_lr_scheduler  s]    	%#II 	KK=cU C<<A<M<M;NNbd G	s    
A2AAc                    U R                   u  pUc0  [        U R                  R                  R	                  SS5      5      nOSn0 nUb!  UR                  S0 5      nU R                  US9nUc0  [        U R                  R                  R	                  SS5      5      nOSn0 nUb)  Uc   eUR                  S0 5      nU R                  USU0S9nXl        X l	        U R                  U R                  XF4$ )zCreate optimizer and lr scheduler

We provide a default implementation, if you want to customize your own optimizer
and lr scheduler, you can either pass a tuple through trainer init function or
subclass this class and override this method.
N	optimizeroptions)r   lr_schedulerr  )
rV   r   r   rc   r   r   r$   r#   r  r  )r   r  r  optimizer_cfgoptim_optionslr_scheduler_cfg
lr_optionss          r   create_optimizer_and_scheduler0EpochBasedTrainer.create_optimizer_and_scheduler  s    #'//	$TXX^^%7%7T%JKM M$)--i<M,,,?I'"">48 :  $
'((()--i<J22$K3K 3 ML #(~~t00-KKr   c                 j  ^ ^^^^	 T R                  5       u  pm	mT R                  R                  R                  S0 5      mT R                  R                  R                  S0 5      mSSKJn  [        X#5      (       a5  T(       d.  SSSSS.00n[        R                  " US	S
SS9n[        SU-   5      eUUUU	U 4S jnU" 5       u  m	mT	b  T R                  [        SSS0T	D6/5        Tb  T R                  [        SSS0TD6/5        T R                  (       a  T R                  [        SSS0T	D6/5        gg)z7Register optimizer hook and lr scheduler hook.
        optimizer_hooklr_scheduler_hookr   )ReduceLROnPlateaurc   PlateauLrSchedulerHookz*Metric Key used for PlateauLrSchedulerHook)r{   
metric_keyF   ),:)	sort_keysindent
separatorsz_Must add `lr_scheduler_hook` to configuration for `ReduceLROnPlateau` lr scheduler as follows:
c                  ~  > T(       a  TR                  T/5        Sn T(       a  TR                  S5      S:X  a  TR                  SS5        0 TETEn T(       a  TR                  T/5        SnTR                  S5      S;   a  STl        T(       a  TR                  S5      S;   a  TR                  SS5        0 TETEnX4$ )a  This function used to fit `optimizer_hook` key and `lr_scheduler_hook` key for easycv configs.

The logic is:
    If the optimizer_hook is provided and it's not TorchAMPOptimizerHook or ApexAMPOptimizerHook,
    (which means the hook is a complete one for optimization, which does not need the OptimizerHook),
    The OptimizerHook will not be registered, or else the OptimizerHook will be registered.

    Same logic to the LrSchedulerHook, the only difference is the condition of lr_scheduler_hook is
    PlateauLrSchedulerHook.

    If TorchAMPOptimizerHook or ApexAMPOptimizerHook is provided, self.use_fp16 will be set to False
    in case of the duplication of registration.

Nr{   r  )TorchAMPOptimizerHookApexAMPOptimizerHookF)r   r   r   rv   )_lr_options_optim_optionslr_hookr  
optim_hookr  r   s     r   _fit_to_old_keysDEpochBasedTrainer.register_optimizers_hook.<locals>._fit_to_old_keys7  s     ++WI6Kgkk&15MMFD)77w7++ZL9!N~~f% *B B %!7 <E "Evt,!@M!@Z!@!..r   Nr{   OptimizerHookLrSchedulerHookr  r~   )r  r   rc   r   torch.optim.lr_schedulerr  r   jsondumpsr   r   r6  rv   )
r   r   r  r  plateau_cfgr  r  r  r  r  s
   `     @@@@r   rh  *EpochBasedTrainer.register_optimizers_hook  sO    6:5X5X 6
2
 XX^^''(8"=
((..$$%8"= 	?l66w' 8D*K **uQ:OK$%& &#	/ #	/J %5$6!z$''<?<m<=?!'';,;
;<>==''D2DmDEG r   batch_size_per_gpuworkers_per_gpur   r  c                 .   Sn	Sn
U R                  5       (       aR  [        R                  R                  U R                  5      n	[        R                  R                  U R                  5      n
U(       a  UnUnOUnUnUR                  SS5      nUc  U(       a@  [        U[        R                  R                  R                  5      (       d  [        UU
U	US9nO9Sn[        U[        R                  R                  R                  5      (       d  XXS'   SnUb  [        [        XUS9OSn[        [        R                  5      [        S5      :  a  XxS	'   O US
L a  U R                   R#                  S5        [%        U4UUUUUR                  SS5      US.UD6nU$ )a  Build dataloader using input dataset and cfg. Used by `EpochBasedTrainer.train()`
and `EpochBasedTrainer.evaluate()`.

In distributed training, each GPU/process has a dataloader.
In non-distributed training, there is only one dataloader for all GPUs.

Args:
    dataset (Dataset): A PyTorch dataset.
    batch_size_per_gpu (int): Number of training samples on each GPU, i.e.,
        batch size of each GPU.
    workers_per_gpu (int): How many subprocesses to use for data loading
        for each GPU.
    dist (bool): Distributed training/test or not. Default: True.
    shuffle (bool): Whether to shuffle the data at every epoch.
        Default: True.
    seed (int, Optional): Seed to be used. Default: 0.
    runner_type (str): Type of runner. Default: `EpochBasedRunner`
    persistent_workers (bool): If True, the data loader will not shutdown
        the worker processes after a dataset has been consumed once.
        This allows to maintain the workers `Dataset` instances alive.
        This argument is only valid when PyTorch>=1.7.0. Default: False.
    kwargs: any keyword argument to be used to initialize DataLoader

Returns:
    DataLoader: A PyTorch dataloader.
r   r?   r  N)num_replicasrankr  r  )num_workersr  rX   z1.7.0persistent_workersTzNpersistent_workers is invalid because your pytorch version is lower than 1.7.0
pin_memoryF)
batch_sizer  r  batch_samplerr  worker_init_fn)r   r  r   get_rankr   r   r   r   utilsr  IterableDatasetr   r   r  r   r  r   r@  r   )r   r8  r  r  r   r  rX   r  r   r  r   r  r  r  r  init_fnr   s                    r   r  0EpochBasedTrainer._build_dataloader_with_dataseth  s   F 
%%''$$--dmm<D**99$--HJ ,J)K+J)K**Y-?Jw',{{'7'7'G'GI I,!+#	% !'5;;+;+;+K+KLL(/9% * 04 	 ))*l7.CC+='(4'KK./ !!#'zz,6"  r   c                    U R                  [        R                  5        U R                  R	                  5         [        U R                  U R                  5       GHR  nU R                  [        R                  5        [        U5       H  u  p4X0R                  :  a  M  [        X@R                  5      nX@l        X0l        U R                  [        R                  5        U R!                  U R                  U5        U R                  [        R"                  5        U ?U =R$                  S-  sl        [&        R(                  U l        US-   U R,                  :  d  M    O   U R                  [        R.                  5        SU l        U =R                  S-  sl        U R0                  (       d  GMS    O   U R                  [        R2                  5        g)z:Training loop used by `EpochBasedTrainer.train()`
        r?   r   N)r   r0   
before_runrN   rc   ranger   r   before_train_epoch	enumerater  r1   rx   
data_batchr   before_train_iterr  after_train_iterr   r-   r   r   r  after_train_epochr   	after_run)r   r   r   ir  s        r   rk  EpochBasedTrainer.train_loop  sK    	112

t{{D$4$45A]==>!*;!7&&z;;?
",#$   !@!@A

J7  !?!?@O

a
%^^
q5D000! "8$ ]<<= DKK1K"""3 66 	001r   c                    U R                   R                  5         [        U R                  U R                   5      R                  5      n[
        R                  " 5          [        U[        5      (       a$  U(       d  U R                   R                  " S0 UD6nOU R                   R	                  U5      nSSS5        U$ ! , (       d  f       W$ = f)zaPerform a training step on a batch of inputs.

Subclass and override to inject custom behavior.

Nr~   )	rN   rt  r3   r  r   r  no_gradr   r   )r   r  r  results       r   evaluation_step!EpochBasedTrainer.evaluation_step  s     	

6tzz*224 ]]_$((1D++3d3++D1	 
  _
 s   AB==
Cc                    Sn[        U R                  R                  S5      (       aB  U R                  R                  R                  n[	        U R                  4SU R
                  0UD6nU R                  [        R                  5        U R                  (       ap  SSK
Jn  U" U UU R                  UUU R                  R                  R                  SS5      U R                  R                  R                  SS5      U R                  S	9nO%SS
K
Jn  U" U UU R                  UUU R                  S9nU R                  [        R                   5        U$ )zAEvaluation loop used by `EpochBasedTrainer.evaluate()`.

        Nvisualizationr8  r   )multi_gpu_test	cache_dirgpu_collectF)rx   rx  vis_closuretmpdirr  data_loader_iters_per_gpu)single_gpu_test)rx   rx  r  data_loader_iters)r   r   rR  r  r   rT   r   r0   
before_valr   #modelscope.trainers.utils.inferencer  rx   r   r   r  	after_val)r   r   rx  r  vis_cfgr  r  r  s           r   rv  !EpochBasedTrainer.evaluation_loop  s$    488&&88hh))77G!""J,0,=,=JAHJK 	112::J*{{-'xx**..{DA HH//33M5I*.*D*DFM L+{{-'"&"<"<>M 	001r   c                     [        S5      e)aB  visualization function for evaluation results.

Examples:
    >>> # draw list of images as numpy array
    >>> images = draw_images(num_of_visualization)

    >>> # set displayed name for each image
    >>> filenames = get_image_display_names()
    >>> vis_results = {'images': images, 'filenames' : filenames}

    >>> # visualization results will be displayed in group named eva_vis
    >>> self.visualization_buffer.output['eval_vis'] = vis_results

Args:
    results (list(dict)):  a list of result dict.
    dataset (Dataset): torch dataset object to access original data.
z<visualization for evaluation will be supported in the future)r  )r   batch_resultr8  r   s       r   r  EpochBasedTrainer.visualization  s    & "JL 	Lr   rY  c                    Sn[        [        U R                  5      S-
  SS5       H  n[        US5      (       a  UR                  O[
        R                  n[        U R                  U   S5      (       a  U R                  U   R                  O[
        R                  n[        U5      [        U5      :  d  M  U R                  R                  US-   U5        Sn  O   U(       d  U R                  R                  SU5        gg)aB  Register a hook into the hook list.

The hook will be inserted into a priority queue, with the specified
priority (See :class:`Priority` for details of priorities).
For hooks with the same priority, they will be triggered in the same
order as they are registered.

Args:
    hook (:obj:`Hook`): The hook to be registered.
Fr?   PRIORITYTr   N)	r   r#  r   r   r#  r!   NORMALr"   insert)r   rY  insertedr  pp_is         r   r   EpochBasedTrainer.register_hook0  s     s4;;'!+R4A!(z!:!:A-4A
., .,$++a.))19  Ac!22""1q5$/ 5 KKq$' r   hook_cfgc                     UR                  5       n[        U[        5      (       d   e/ nU H5  n[        U[        5      nU R                  U5        UR                  U5        M7     U$ )aR  Register a hook from its cfg.

Args:
    hook_cfg (dict): Hook config. It should have at least keys 'type'
      and 'priority' indicating its type and priority.

Note:
    The specific hook class to register should not use 'type' and
    'priority' arguments during initialization.

Returns:
    A list of instances of registered hooks.
)copyr   r   r6   r    r   r   )r   r*  ry   cfg_irY  s        r   r   (EpochBasedTrainer.register_hook_from_cfgI  s^     ==?(D))))E!%/Dt$LL  r   c                 r    U R                    H'  n[        US5      (       d  M  UR                  U 5        M)     g)z%Register processors to hooks
        register_processorN)ry   r   r0  )r   rY  s     r   ri  %EpochBasedTrainer.register_processors`  s.     JJDt122''- r   c                 h    U R                    Vs/ s H  o"R                  U:X  d  M  UPM     sn$ s  snf r   )r   r   )r   clshs      r   get_hookEpochBasedTrainer.get_hookg  s'    ;;=;a++*<;===s   //fn_namec                 p    U R                    H&  n[        X!5      (       d  M  [        X!5      " U 5        M(     g)z}Call all hooks.

Args:
    fn_name (str): The function name in each hook to be called, such as
        "before_train_epoch".
N)r   r   getattr)r   r7  rY  s      r   r   EpochBasedTrainer.invoke_hookj  s*     KKDt%%&t,  r   c                 j   [        5       (       a  [        U R                  5      nU R                  UR                  l        U R
                  R                  S5        U R
                  R                  [        R                  " UR                  S[        S95        U R
                  R                  S5        g g )NzI==========================Training Config Start==========================r  )r  r3  zI===========================Training Config End===========================)r<   r   r   rb   rc   r   infor  r  	_cfg_dictr'   r   s     r   r   EpochBasedTrainer.print_cfgu  s    ;;488$C!%CIIKK[ KK

3==8KLNKK[ r   c                     [        5       (       aD  [        U SS5      (       d1  U R                  R                  U R	                  5       5        SU l        g g g )N_hook_info_printedFT)r<   r9  r   r<  get_hook_infor@  r   s    r   rj  !EpochBasedTrainer.print_hook_info  sC    ;;wt-A5IIKKT//12&*D#  J;r   c                    [         R                   Vs0 s H  o/ _M     nnU R                   H  n [        UR                  5      R
                  nUR                  R                  nSUS SUS 3n[        US5      (       d  MV  UR                  5        H  nX'   R                  U5        M     M     / n[         R                   HH  nX!   n	[        U	5      S:  d  M  SU S3n
U
S	R                  U	5      -  n
U
S
-  n
UR                  U
5        MJ     SR                  U5      nU$ s  snf ! [         a    [        R                  n Nf = f)N(z<12z) z<35get_triggered_stagesr   zStage: z:
    z
    z
 -------------------- 
)rE   stagesry   r!   r#  namer$  r$  r   __name__r   rE  r   r#  r   )r   stagestage_hook_maprY  priority	classname	hook_infotrigger_stagestage_hook_infos
hook_infosr<  s              r   rA  EpochBasedTrainer.get_hook_info  s9   BF++*N+"9+*NJJD+#DMM277 //IHS>Ic?;It344%)%>%>%@M"188C &A  [[E'.J:" w/j1122 ''- !  99%56+ +O  +#??+s   D"D''EE))r   r   r   r   r@  r   r   r   r   r   r   r   r   r   r   r   rP   r  r   rx   r   r   rT   r   r   rw   r   r  r   rN   r^   r  rV   r   r   r   rS   r  r   rv   rb   )Fr   )NT)NF)FTr   F)_rI  
__module____qualname____firstlineno____doc__r(   r
   r   r   r   r   r   r   r   r   r   r   r   r  optim	Optimizerr  _LRSchedulerintr	   rE   r   TunerConfigr   r   r   r   r:   r   r   r%   r   propertyr   r   r   r   r  r	  rq   ry   r  r  r  rs   r  r  r   r   rB  staticmethodr.  r   r   rd  rc   rz  r  r  r   r   r  r  r  rg  r}  ru  r&   r6  r$   r#   r  rh  boolr   r  rk  r  rv  r  r   r   ri  r5  r   r   rj  rA  __static_attributes____classcell__)r   s   @r   rJ   rJ   <   s   (X BF&*04/3GKAE@DEIHN,B.2EI37)sE*bii"<=>s sms $H-	s
 #8,s $E(D9A:B 5C +C %D Es $E)W*<$=>s #5G);#<=s #5)-c<.?)@*A $B Cs ekk33#kk66CCD Es %SMs  !s"  T
+#s$ uWd3<.@%@AB%s& $Dk)9$:$/%0 1's sjK:#7J * 5>&  @ @ @ @ @ @BBB   tDz   s   c    C         7 7 ; ;8 >B	O  %gy$w-&G HO !'O   O  %-\$:	O bw c &2%7"  =A#& #%(#-9# #(5E,*D$E 5 T%T	"23 0/  #!/@ !%	.A"'g(>"?.A`+Z # #U299j#89 &E"))Z*?$@ &"
eBIIz,A&B 
7+r"04uW=A']>K 8L :: T j  !LFJG` 5:7;34:?X07X;>X 9<X .2	X
 15X .1X 5?Xt 2D$"HL,($ (4 (2t  ..>	-3 	-4 	-+
 s    r   rJ   c                 .    X-  U -   U-   n[        U5        g r   )r>   )	worker_idr  r  rX   worker_seeds        r   r  r    s     $y047KK r   )sr   r   collections.abcr   r,  r   distutils.versionr   	functoolsr   typingr   r   r	   r
   r   r   r  r  r   r   r   torch.utils.datar   r   r   torch.utils.data.dataloaderr   torch.utils.data.distributedr   modelscope.hub.check_modelr   modelscope.metainfor   modelscope.metricsr   r   ,modelscope.metrics.prediction_saving_wrapperr   modelscope.models.baser   r   1modelscope.msdatasets.dataset_cls.custom_datasetsr   9modelscope.msdatasets.dataset_cls.custom_datasets.builderr    modelscope.msdatasets.ms_datasetr   modelscope.outputsr   modelscope.preprocessors.baser   !modelscope.trainers.hooks.builderr    "modelscope.trainers.hooks.priorityr!   r"   'modelscope.trainers.lrscheduler.builderr#   %modelscope.trainers.optimizer.builderr$   modelscope.utils.configr%   r&   r'   modelscope.utils.constantr(   r)   r*   r+   r,   r-   r.   r/   r0   modelscope.utils.data_utilsr1   modelscope.utils.devicer2   modelscope.utils.file_utilsr3   modelscope.utils.import_utilsr4   modelscope.utils.loggerr5   modelscope.utils.registryr6   modelscope.utils.torch_utilsr7   r8   r9   r:   r;   r<   r=   r>   baser@   builderrA   r  rB   rC   rD   
hooks.hookrE   parallel.builderrF   parallel.utilsrG   r[  register_modulerE  rJ   r  r~   r   r   <module>r     s     	 #  *  ? ?   %  9 9 7 ; B ( A 4 6 . 6 3 E F A K K6 6 6 2 1 @ < . 4; ; ;   > >  , ';< 
h&6&67a  a  8a H+!r   