
    9i                     4   S SK r S SKJrJr  S SKrS SKJr  S SKJ	r	  S SK
Jr  S SKJrJr  S SKJr  S SKJr  S S	KJr  S
SKJr  S
SKJr  \R2                  " \	R4                  S9 " S S\5      5       r\R2                  " \	R8                  S9 " S S\5      5       rg)    N)TupleUnion)nn)Trainers)build_metric)Model
TorchModel)Preprocessor)Config)ModeKeys   )TRAINERS)EpochBasedTrainer)module_namec                      ^  \ rS rSrSrU 4S jrS rS\4S jrS\	\
R                  \4   4S jrS\\\4   4S	 jrS
rU =r$ )NlpEpochBasedTrainer   aY  Add code to adapt with nlp models.

This trainer will accept the information of labels&text keys in the cfg, and then initialize
the nlp models/preprocessors with this information.

Labels&text key information may be carried in the cfg like this:

>>> cfg = {
>>>     ...
>>>     "dataset": {
>>>         "train": {
>>>             "first_sequence": "text1",
>>>             "second_sequence": "text2",
>>>             "label": "label",
>>>             "labels": [1, 2, 3, 4],
>>>         },
>>>         "val": {
>>>             "first_sequence": "text3",
>>>             "second_sequence": "text4",
>>>             "label": "label2",
>>>         },
>>>     }
>>> }

To view some actual finetune examples, please check the test files listed below:
tests/trainers/test_finetune_sequence_classification.py
tests/trainers/test_finetune_token_classification.py
c                 l   > S U l         S U l        S U l        S U l        S U l        [
        TU ]  " U0 UD6  g N)label2idid2label
num_labels
train_keys	eval_keyssuper__init__)selfargskwargs	__class__s      _/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/trainers/nlp_trainer.pyr   NlpEpochBasedTrainer.__init__2   s8    $)&)    c                     UR                   R                  R                  n[        U5       VVs0 s H  u  p4XC_M	     snnU l        [        U5       VVs0 s H  u  p4X4_M	     snnU l        [        U5      U l        S nU" UR                  S5      5      U l
        U" UR                  S5      5      U l        [        U R                  5      S:X  a  U R                  U l        g g s  snnf s  snnf ! [         a     Nf = f)Nc                     U b(  [        U SS 5      [        U SS 5      [        U SS 5      S.nO0 nUR                  5        VVs0 s H  u  p#Uc  M
  X#_M     snn$ s  snnf )Nfirst_sequencesecond_sequencelabel)r&   r'   r(   )getattritems)cfg
input_keyskvs       r!   build_dataset_keys?NlpEpochBasedTrainer.prepare_labels.<locals>.build_dataset_keysC   sf    &-c3CT&J'.s4Et'L$S'48
  
%/%5%5%7I%7TQ1DAD%7IIIs   	AAzdataset.trainzdataset.valr   )datasettrainlabels	enumerater   r   lenr   AttributeErrorsafe_getr   r   )r   r+   r3   idxr(   r/   s         r!   prepare_labels#NlpEpochBasedTrainer.prepare_labels:   s    	[[&&--F:CF:KL:KJCUZ:KLDM:CF:KL:KJCSZ:KLDM!&kDO
	J -S\\/-JK+CLL,GHt~~!#!__DN $) ML 		s-   .C/ C#C/ C)#C/ #C/ /
C<;C<r+   c                 h   U R                   b  U R                  U5      nU R                  U5        [        UR                  S5      (       dg  [        UR                  S5      (       dL  U R                  b  U R                  UR                  S'   U R
                  b  U R
                  UR                  S'   U$ )Nr   r   )cfg_modify_fnr9   hasattrmodelr   r   )r   r+   s     r!   rebuild_config#NlpEpochBasedTrainer.rebuild_configT   s    )$$S)CC syy*--g		:7' 7'}}((,		*%}}((,		*%
r#   returnc                 R   U R                   c  0 OSU R                   0n[        R                  " U R                  4SU R                  0UD6n[        U[        R                  5      (       d  [        US5      (       a  UR                  $ [        U[        R                  5      (       a  U$ g)zInstantiate a pytorch model and return.

By default, we will create a model using config from configuration file. You can
override this method in a subclass.

Nr   cfg_dictr>   )
r   r   from_pretrained	model_dirr+   
isinstancer   Moduler=   r>   )r   
model_argsr>   s      r!   build_model NlpEpochBasedTrainer.build_model`   s      ??2R$//9

 %%NN=%)XX=1;=%++w0G0G;;ryy))L *r#   c                    U R                   c  0 OSU R                   0n[        R                  " U R                  4U R                  [
        R                  S.UDU R                  D[
        R                  SS.D6n[        R                  " U R                  4U R                  [
        R                  S.UDU R                  D[
        R                  SS.D6nX#4$ )zyBuild the preprocessor.

User can override this method to implement custom logits.

Returns: The preprocessor instance.

r   )rC   preprocessor_modeT)modeuse_fast)
r   r
   rD   rE   r+   r   TRAINr   EVALr   )r   
extra_argstrain_preprocessoreval_preprocessors       r!   build_preprocessor'NlpEpochBasedTrainer.build_preprocessorq   s      ==0R7

 *99NNXX&nn 	
 oo  )88NNXX&mm 	
 nn  "44r#   )r   r   r   r   r   )__name__
__module____qualname____firstlineno____doc__r   r9   r   r?   r   r   rG   r	   rI   r   r
   rT   __static_attributes____classcell__)r    s   @r!   r   r      sS    :*-4
& 
U299j#89 "5E,*D$E 5 5r#   r   c                       \ rS rSrSS jrSrg)VecoTrainer   Nc           	      .   SSK Jn  Ub  SSKJn  UR	                  X5        U R
                  R                  5         [        R                  U l	        0 nU R                  c4  U R                  U R                  U R                  U R                  S9U l
        SnSn[        U R                  U5      (       a:  U R                  R                  U5        [!        U R                  R"                  5      n U R$                  " U R                  40 U R                  R&                  R)                  S0 5      D6U l        U R*                  U l        U R.                   Vs/ s H  n[1        U5      PM     nnU H	  n	X	l        M     U R5                  U R*                  U5        [7        U5       H?  u  pSU S3U;  a	  0 USU S3'   UR9                  5       USU S3   U R.                  U
   '   MA     US-  nXV:  a  U R                  R                  U5        OOGM  U R.                   Hj  nUR;                  5        V	s/ s H  oU   PM	     nn	US   R=                  5        H.  n[>        R@                  " U Vs/ s H  owU   PM	     sn5      XN'   M0     Ml     U$ s  snf s  sn	f s  snf )	z1Veco evaluates the datasets one by one.

        r   )VecoDataset)LoadCheckpointHook)	model_cfgrM   preprocessorr   
dataloaderzeval_dataset[])!1modelscope.msdatasets.dataset_cls.custom_datasetsra   modelscope.trainers.hooksrb   load_checkpointr>   evalr   rP   _modeeval_datasetbuild_dataset_from_cfgr+   rS   rF   switch_datasetr5   datasets_build_dataloader_with_dataset
evaluationgeteval_dataloaderdata_loadermetricsr   trainerevaluation_loopr4   evaluatevalueskeysnpaverage)r   checkpoint_pathra   rb   metric_valuesr8   dataset_cntmetricmetric_classesmm_idx
metric_clsmetric_nameall_metricskeys                  r!   rx   VecoTrainer.evaluate   sj    	R&D..E

]]
$ $ ; ;((ZZ!33 !< !5D
 d''55,,S1d//889K#'#F#F!!$P%)XX%8%8%<%<\2%N$PD #33DAENvl62NN# 	 $  !5!5~F%.~%>!"3%q)><>MM#a"89+5+>+>+@ cU!45LL') &? 1HC !!005) ,  <<K3@3G3G3IJ3Ia[>3IKJ"1~**,%'ZZ/:;{VC[{;&=" - ( / O$ K <s   J-J'J)rk   rt   rs   rl   r   )rV   rW   rX   rY   rx   r[    r#   r!   r^   r^      s    4r#   r^   )ostypingr   r   numpyr{   torchr   modelscope.metainfor   modelscope.metrics.builderr   modelscope.models.baser   r	   modelscope.preprocessorsr
   modelscope.utils.configr   modelscope.utils.constantr   baser   rv   r   register_modulenlp_base_trainerr   nlp_veco_trainerr^   r   r#   r!   <module>r      s    
    ( 3 4 1 * .  & 
h&?&?@{5, {5 A{5| 
h&?&?@6& 6 A6r#   