
    9iE                     :   S SK r S SKJr  S SKJrJrJr  S SKJrJ	r	  S SK
r
S SKrS SKJr  S SKJr  S SKJr  S\	\\\   4   4S	 jr\ " S
 S5      5       r\ " S S5      5       r\ " S S5      5       r\" SS9 " S S\\\5      5       rS rS\S\	\\\S4   4S jrg)    N)deepcopy)	dataclassfieldfields)ListUnion)CliArgumentParser)Config)DEFAULT_DATASET_NAMESPACEvaluesc                     [        U [        5      (       a  U R                  S5      OU n0 nU=(       d    /  HB  n[        UR	                  5       5      S:X  a  M"  UR                  S5      u  pE[        U5      X$'   MD     U$ )N,r   =)
isinstancestrsplitlenstripparse_value)r   pairs_paramskvkeyvalues         a/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/trainers/training_args.pyset_flatten_valuer      sj    !+FC!8!8FLLfEGkrkrxxz?aXXc]
"5)	 
 N    c                      \ rS rSr% \" SSS0S9r\\S'   \" SSS0S9r\\S'   \" SSS	0S9r	\\S
'   \" SSS0S9r
\\S'   \" SSS0S9r\\S'   \" SSS0S9r\\S'   \" \SS0S9r\\S'   \" \SS0S9r\\S'   \" SSS0S9r\\S'   Srg)DatasetArgs   NhelpzNThe dataset name used for training, can be an id in the datahub or a local dirdefaultmetadatatrain_dataset_namezOThe subset name used for evaluating, can be an id in the datahub or a local dirval_dataset_namez.The subset name used for training, can be Nonetrain_subset_namez0The subset name used for evaluating, can be Noneval_subset_namezThe split of train datasettrain_splitzThe split of val dataset	val_splitz'The dataset namespace used for trainingtrain_dataset_namespacez)The dataset namespace used for evaluatingval_dataset_namespacea  The json file to parse all datasets from, used in a complex dataset scenario,the json format should be like:
                    [
                        {
                            "dataset": {
                                # All args used in the MsDataset.load function
                                "dataset_name": "xxx",
                                ...
                            },
                            # All columns used, mapping the column names in each dataset in same names.
                            "column_mapping": {
                                "text1": "sequence1",
                                "text2": "sequence2",
                                "label": "label",
                            },
                            # float or str, float means to split the dataset into train/val,
                            # or just str(train/val)
                            "split": 0.8,
                        }
                    ]
                    dataset_json_file )__name__
__module____qualname____firstlineno__r   r%   r   __annotations__r&   r'   r(   r)   r*   r   r+   r,   r-   __static_attributes__r.   r   r   r   r      s3    $\
  "]
c  #D
s  !F
OS  0 
K 
 . 
Is 
 $))=
$S  "')?
"3  #
s r   r   c                       \ rS rSr% \" SSSS.S9r\\S'   \" SSS0S9r\\S	'   \" SSS
0S9r	\\S'   \" SSSS.S9r
\\S'   Srg)	ModelArgsj   NzThe task code to be usedtaskr!   cfg_noder"   r!   zA model id or model dirmodelzthe revision of modelmodel_revisionzJThe mode type, if load_model_config is False, user need to fill this fieldz
model.type
model_typer.   )r/   r0   r1   r2   r   r8   r   r3   r;   r<   r=   r4   r.   r   r   r6   r6   j   s    .
D#  - 
E3 
  + 
NC 
  Y$
J r   r6   c                      \ rS rSr% \" SSS0S9r\\S'   \" SSS	S
.S9r\\S'   \" SSSS
.S9r	\\S'   \" SSSS
.S9r
\\S'   \" SSSS
.S9r\\S'   \" SSSS
.S9r\\S'   \" SSSS
.S9r\\S'   \" SSSS
.S9r\\S'   \" SS S!S
.S9r\\S"'   \" S#S$S%S
.S9r\\S&'   \" S'S(S)S
.S9r\\S*'   \" S+S,S-S
.S9r\\S.'   \" S/S0S1S
.S9r\\S2'   \" S3S4S5S
.S9r\\S6'   \" S7S8S9\S:.S9r\\S;'   \" S7S<S=\S:.S9r\\S>'   \" S?S@SA/ SBQSC.S9r\\SD'   \" SSSE0S9r\\SF'   \" S#SGSHSI.S9r\\SJ'   \" S?SKSL/ SBQSM.S9r\\SN'   \" SOSPSQSI.S9r\\SR'   \" S7SSSTSI.S9r \\SU'   \" S?SVSW/ SBQSM.S9r!\\SX'   \" SOSYSZSI.S9r"\\S['   \" SS\S]SI.S9r#\\S^'   \" S7S_S`SI.S9r$\\Sa'   \" SbScSdSI.S9r%\\Se'   \" S7SfSgSI.S9r&\\Sh'   \" SOSiSjSI.S9r'\\Sk'   \" SSlSmSI.S9r(\\Sn'   \" S7SoSpSI.S9r)\\Sq'   \" S7SrSsSI.S9r*\\St'   \" SuSvSwSI.S9r+\\Sx'   \" SySzS{SI.S9r,\\S|'   \" SSlS}SI.S9r-\\S~'   \" S7SoSSI.S9r.\\S'   \" S7SrSSI.S9r/\\S'   \" SuSvSSI.S9r0\\S'   \" SySzSSI.S9r1\\S'   Sr2g7)	TrainArgs   *   r!   zThe random seedr"   seed   z#train.dataloader.batch_size_per_gpuz:The `batch_size_per_gpu` argument for the train dataloader)r:   r!   per_device_train_batch_sizer   z train.dataloader.workers_per_gpuz7The `workers_per_gpu` argument for the train dataloadertrain_data_workerFztrain.dataloader.shufflez/The `shuffle` argument for the train dataloadertrain_shuffleztrain.dataloader.drop_lastz1The `drop_last` argument for the train dataloadertrain_drop_lastz(evaluation.dataloader.batch_size_per_gpuz9The `batch_size_per_gpu` argument for the eval dataloaderper_device_eval_batch_sizez%evaluation.dataloader.workers_per_gpuz6The `workers_per_gpu` argument for the eval dataloadereval_data_workerzevaluation.dataloader.shufflez.The `shuffle` argument for the eval dataloadereval_shufflezevaluation.dataloader.drop_lastz0The `drop_last` argument for the eval dataloadereval_drop_last   ztrain.max_epochszThe training epochs
max_epochsz./train_targetztrain.work_dirz%The directory to save models and logswork_dirg-C6
?ztrain.optimizer.lrz"The learning rate of the optimizerlrLinearLRztrain.lr_scheduler.typezThe lr_scheduler type in torchlr_schedulerAdamWztrain.optimizer.typez+The optimizer type in PyTorch, like `AdamW`	optimizerNztrain.optimizerzThe optimizer params)r:   r!   
cfg_setteroptimizer_paramsztrain.lr_schedulerzThe lr scheduler paramslr_scheduler_paramsby_epochz&train.lr_scheduler.options.lr_strategyzThe lr decay strategy)rW   by_stepno)r:   r!   choiceslr_strategyzThe local rank
local_rankz+The interval of iter of logging informationztrain.logging.intervalr9   logging_intervalz5Eval strategy, can be `by_epoch` or `by_step` or `no`zevaluation.period.eval_strategy)r!   r:   rZ   eval_strategy   zEval intervalzevaluation.period.intervaleval_intervalzThe metric name for evaluationzevaluation.metricseval_metricsz>Checkpointing strategy, can be `by_epoch` or `by_step` or `no`z%train.checkpoint.period.save_strategysave_strategyz9The interval of epoch or iter of saving checkpoint periodz train.checkpoint.period.intervalsave_intervalz;Save the checkpoint(if it's the best) after the evaluation.ztrain.checkpoint.best.save_bestsave_best_checkpointz%The metric used to measure the model.z train.checkpoint.best.metric_keymetric_for_best_modelmaxzDThe rule to measure the model with the metric, can be `max` or `min`ztrain.checkpoint.best.rulemetric_rule_for_best_modelzBThe max number of checkpoints to keep, older ones will be deleted.z*train.checkpoint.period.max_checkpoint_nummax_checkpoint_numzGThe max number of best checkpoints to keep, worse ones will be deleted.z(train.checkpoint.best.max_checkpoint_nummax_checkpoint_num_bestz$Push to hub after each checkpointingz#train.checkpoint.period.push_to_hubpush_to_hubz<The repo id in modelhub, usually the format is "group/model"z#train.checkpoint.period.hub_repo_idrepo_idzYThe modelhub token, you can also set the token to the env variable `MODELSCOPE_API_TOKEN`z!train.checkpoint.period.hub_token	hub_tokenTzUpload to a private hubz#train.checkpoint.period.private_hubprivate_hubmasterzWhich branch to commit toz$train.checkpoint.period.hub_revisionhub_revisionz!train.checkpoint.best.push_to_hubpush_to_hub_bestz!train.checkpoint.best.hub_repo_idrepo_id_bestztrain.checkpoint.best.hub_tokenhub_token_bestz!train.checkpoint.best.private_hubprivate_hub_bestz"train.checkpoint.best.hub_revisionhub_revision_bestr.   )3r/   r0   r1   r2   r   rB   intr3   rD   rE   rF   boolrG   rH   rI   rJ   rK   rM   rN   r   rO   floatrQ   rS   r   rU   rV   r[   r\   r]   r^   r`   ra   rb   rc   rd   re   rg   rh   ri   rj   rk   rl   rm   ro   rp   rq   rr   rs   rt   r4   r.   r   r   r?   r?      sE    %
D# 
 (-=H
(  #:M
s   2E
M4  "4G
OT  ',BG
'  "?L
c  7D
L$  !9F
ND  *)
J   (;
Hc  ,8
B  14
L#  .A
Is  ")*+
c   %,-+
   @+4
K  $
J 
 "A0
c  K94
M3  #4
M3  4,
L#   M?4	
M3   H:
M3  "' K9
"$  "';:
"3  ', S4
'  $ QD
  $) VB
$S  :=
K   K=
GS   h;
Is  -=
K  />
L#  #:;
d   K;
L#    h9
NC  #-;
d  #/<
s r   r?   F)initc                   R    \ rS rSr% \" SSS0S9r\\S'   S rSS	 jr	SS
 jr
S rSrg)TrainingArgsi  Fr!   zeUse the configuration of the model, default will only use the parameters in the CLI and the dataclassr"   use_model_configc                     [        UR                  5       5      U l        [        U 5       H7  nUR                  U;   d  M  [        XR                  XR                     5        M9     0 U l        g N)listkeysmanual_argsr   namesetattr_unknown_args)selfkwargsfs      r   __init__TrainingArgs.__init__  sM    .AvvfffVVn5   r   Nc                 ,   [        U 5      nUR                  U5      u  p4U Vs/ s H  nUS;  d  M  SU;  d  M  UPM     nn0 n[        S[        U5      S5       H(  n[	        XGS-      5      XdU   R                  SS5      '   M*     [        U5      nU =R                  UR                  -  sl        U R                  R                  U5        [        U5      R                  5        H(  u  pU	c  M
  [        X	5      (       d  M  [        X	U
5        M*     U $ s  snf )zKConstruct a TrainingArg class by the parameters of CLI.

Returns:
    Self
)\
z--local-rank=r      r_   - )r	   parse_known_argsranger   r   replacevarsr   r   updater   itemshasattrr   )r   parser_argsparserargsunknownitem_unknowni	args_dictr   r   s              r   	parse_cliTrainingArgs.parse_cli  s	    #4(//<$
$T<' ,;4,G W 	 
 q#g,*A4?A4OHQZ''R01 +J	F...!!(+"9-335JC74#5#55) 6 
s   
DDDc                 h   [        5       n[        R                  " 5       nUc  U R                  n[	        U 5       H  nUR
                  R                  S5      nUR
                  R                  S5      =(       d    S nUbq  UR                  U R                  ;   d  U(       dN  [        U[        5      (       a  U/nU H.  nUR                  Xv" [        XR                  5      5      05        M0     M  M  [        XR                  5      X4R                  '   M     UR                  U R                  5        X#4$ )zaConvert the TrainingArgs to the `Config`

Returns:
    The Config, and extra parameters in dict.
r:   rT   c                     U $ r}   r.   xs    r   <lambda>(TrainingArgs.to_config.<locals>.<lambda>  s    Ar   )r
   addictDictr{   r   r$   getr   r   r   r   merge_from_dictgetattrr   )r   ignore_default_configcfgr   r   r:   rT   _nodes           r   	to_configTrainingArgs.to_config  s     hKKM	 ($($9$9!Azz~~j1H5F+J#66T---5J!(C00$,:!)++"JwtVV/D$EFH "* 6K %,D&&$9	&&!  	D../~r   c                 T    [        U 5      nU H  nUR                  U:X  d  M  Us  $    g r}   )r   r   )r   r   _fieldsr   s       r   get_metadataTrainingArgs.get_metadata  s*    ,Avv}  r   )r   r   r}   )r/   r0   r1   r2   r   r{   rv   r3   r   r   r   r   r4   r.   r   r   rz   rz     s;     #P
d  .8r   rz   c                 h   SSK Jn  / n/ n[        U S5       n[        R                  " U5      nU GH  nUR                  " S0 US   D6R                  5       nUR                  nUS   R                  5       n	U V
s/ s H  oU	;  d  M
  U
PM     nn
SSKJ	n  SSKJ
n  SSKJn  UR                  R                  5        Vs/ s H  oDS   U	;   d  M  UPM     nn0 nU H?  n[        US	   U5      (       a  U" US	   R                  5      UUS   '   M4  US	   UUS   '   MA     U" U5      nUR!                  S
 UUS9R#                  US   5      nUS   n[        U[$        5      (       a6  US;   d   eUS:X  a  UR'                  U5        GMT  UR'                  U5        GMh  [        U[(        5      (       a  SUs=:  a  S	:  d   e   eUR+                  US9nUR'                  US   5        UR'                  US   5        GM     SSS5        SSKJn  U" U5      U" U5      4$ s  sn
f s  snf ! , (       d  f       N.= f)a  
The filename format:
[
    {
        "dataset": {
            "dataset_name": "xxx",
            ...
        },
        "column_mapping": {
            "text1": "sequence1",
            "text2": "sequence2",
            "label": "label",
        }
        "usage": 0.8,
    }
]
r   )	MsDatasetrdatasetcolumn_mapping)Features)Value)
ClassLabelr_   c                     U $ r}   r.   r   s    r   r   )build_dataset_from_file.<locals>.<lambda>"  s    !r   )remove_columnsfeaturesusage)trainvalr   )
train_sizetestN)concatenate_datasetsr.   )
modelscoper   openjsonloadto_hf_datasetcolumn_namesr   datasetsr   r   r   r   r   r   dtypemaprename_columnsr   appendrw   train_test_splitr   )filenamer   	train_seteval_setr   ds_jsondsr   all_columnskeep_columnscolumnr   r   r   r   r   new_featuresr   ds_dictr   s                       r   build_dataset_from_filer     s8   $ %IH	h	))A,Bnn5r)}5CCEG!..K./446L%0%06,4N[   *&+"++1133at|7K3   LadJ//).qtzz):L1&)*1L1&	 
 $L1Lkk-% " ' (6~b9I6J'K  wKE%%% 0000G#$$W-OOG,!%//AMMAAMAA!22e2D  !120G  
N .	*,@,JJJE 
	s7   A#H#:	HH0H#=HHD(H#
H##
H1r   returnc                 $   SSSSS S S S.nX;   a  X   $ SU ;   d  SU ;   a"  U R                  SS5      R                  SS5      $ [        R                  " SU 5      (       a  [        U 5      $ [        R                  " SU 5      (       a  [	        U 5      $ U $ )	NTF)TruetrueFalsefalseNonenonenull"'r   z^\d+$z4[+-]?(?=\d*[.eE])(?=\.?\d)\d*\.?\d*(?:[eE][+-]?\d+)?)r   rematchru   rw   )r   	const_maps     r   r   r   6  s    I 	}}S"%--c266	(E	"	"5z	I
 
U|r   )r   copyr   dataclassesr   r   r   typingr   r   r   r   'modelscope.trainers.cli_argument_parserr	   modelscope.utils.configr
   modelscope.utils.constantr   r   r   r   r6   r?   rz   r   rw   rv   r   r.   r   r   <module>r      s    	  0 0    E * ?eCcN3  L L L^   6 ^ ^ ^B	 I;	9 I IX>KBs uS%t%;< r   