
    9i4                         S SK r S SKrS SKrS SKJrJrJr  S SKrS SKJ	r	  S SK
Jr  S SKJr  S SKJr  S SKJr  S SKJrJrJrJrJrJr  S S	KJr  \" 5       r\R8                  " \R:                  S
9 " S S\5      5       rg)    N)DictOptionalUnion)build_trainer)Trainers)	MsDataset)BaseTrainer)TRAINERS)DEFAULT_DATASET_NAMESPACEDEFAULT_DATASET_REVISIONDEFAULT_MODEL_REVISION	ModelFileTasksTrainerStages)
get_logger)module_namec                       \ rS rSrSrSSSS\SSSS4	S\S\S\S	\S
\\	\
\4      S\\   S\\   S\\   S\\   S\\   4S jjrS rS rSS jrS rS\S\\\4   4S jrSrg)
ASRTrainer   dataNFsmallmodelwork_dirdistributeddataset_typedata_dirmodel_revision
batch_bins	max_epochlrmate_paramsc                    U(       ds  [         R                  " 5       R                  U l        [        R
                  R                  U R                  5      (       d   [        R                  " U R                  5        OX l        [        R
                  R                  U R                  5      (       d  [        U R                   S35      e[        R                  SU R                   35        [        R
                  R                  U R                  U R                  5      U l        SU l        X0l        X@l        ["        R$                  " U R                  SS9  [        R                  " U R                  SS9  [        R
                  R                  U5      (       a  UnOU R'                  X5      nXl        [        R
                  R                  U R(                  S5      U l        U R-                  U R*                  5      U l        SU;  a)  U R1                  XPR                  5      u  U l        U l        O
US   U l        [6        R6                  " U R.                  U R                  U R                  U R                  U R                   UUU	U
S	9	U l        g
)a  ASR Trainer.

Args:
    model (str) : model name
    work_dir (str): output dir for saving results
    distributed (bool): whether to enable DDP training
    dataset_type (str): choose which dataset type to use
    data_dir (str): the path of data
    model_revision (str): set model version
    batch_bins (str): batch size
    max_epoch (int): the maximum epoch number for training
    lr (float): learning rate
    mate_params (dict): for saving other training args
Examples:

>>> import os
>>> from modelscope.metainfo import Trainers
>>> from modelscope.msdatasets import MsDataset
>>> from modelscope.trainers import build_trainer
>>> ds_dict = MsDataset.load('speech_asr_aishell1_trainsets')
>>> kwargs = dict(
>>>     model='damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch',
>>>     data_dir=ds_dict,
>>>     work_dir="./checkpoint")
>>> trainer = build_trainer(
>>>     Trainers.speech_asr_trainer, default_args=kwargs)
>>> trainer.train()

z not existszSet workdir to  Tignore_errorsexist_okzconfiguration.jsonraw_data_dir)	modelscope_dictr   
output_dirr   r   r   r   r    r!   N)tempfileTemporaryDirectorynamer   ospathexistsmakedirs	ExceptionloggerinfojoinDATA_DIRr   raw_dataset_pathr   r   shutilrmtreeget_or_download_model_dir	model_dir	model_cfg	parse_cfgcfg_dictload_dataset_raw_pathtrain_data_dirdev_data_dirr   trainer)selfr   r   r   r   r   r   r   r   r    r!   kwargsr;   s                e/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/trainers/audio/asr_trainer.py__init__ASRTrainer.__init__   s   R $779>>DM77>>$--00DMM*$Mww~~dmm,,t}}o[9::odmm_56T]]DMMB "&(dmm48
DMMD177>>%  I66uMI"dnn6JKt~~6)595O5O--6)2D!2 %^4DM$22 MM]]}}((**!#	%    c                 v   [         R                  R                  U5      n[        5       n[	        USSS9 n[
        R                  " U5      nUS   S   S   US'   X#S'   [         R                  R                  X%S   S   5      US	'   [         R                  R                  X%S   S   S
   5      US
'   [         R                  R                  US5      US'   [         R                  R                  X%S   S   S   5      US'   [         R                  R                  US5      US'   SUS   S   ;   a,  [         R                  R                  X%S   S   S   5      US'   OS US'   SUS   S   ;   a,  [         R                  R                  X%S   S   S   5      US'   OUS	   US'   S S S 5        U$ ! , (       d  f       U$ = f)Nrzutf-8)encodingr   model_configmoder;   am_model_nameam_model_fileam_model_configzfinetune.yamlfinetune_configmvn_file	cmvn_fileseg_dictbpemodel
init_model)r.   r/   dirnamedictopenjsonloadr5   )rC   cfg_filecur_dirr>   fconfigs         rE   r=   ASRTrainer.parse_cfgq   s   ''//(+6(C'2aYYq\F%g~>vFHV$+[!(*9);H_%*,'',,89JK+MH&'*,'',,w7F+HH&'$&GGLL8D%FH[!#%77<<#DHZ VG_^<<')ww||G_^<ZH(J$ (,$vg~>>)+G_^<\J*L& *2/)B&- 3. / 32. s   E*F))
F8c                     SU;  a  [        SR                  U5      5      eU R                  XSS9nSU;  a  [        SR                  U5      5      eU R                  XSS9nX44$ )Ntrainz*dataset {0} does not contain a train split)split
validationz(dataset {0} does not contain a dev split)r2   formatprepare_data)rC   datasetoutput_data_dirr@   rA   s        rE   r?    ASRTrainer.load_dataset_raw_path   s    '!<CCGLN N**G + 5w&:AA'JL L((L ) :++rH   c                    [         R                  R                  X#5      n[        R                  " USS9  [         R
                  " USS9  [        X   5      n[        [         R                  R                  US5      S5      n[        [         R                  R                  US5      S5      n[        U5       H  nX   U   n	U	S   n
U	S   nUR                  S	R                  [         R                  R                  U
5      U
/5      S
-   5        UR                  S	R                  [         R                  R                  U
5      U/5      S
-   5        M     UR                  5         UR                  5         U$ )NTr$   r&   zwav.scpwtextz
Audio:FILEz
Text:LABEL	
)r.   r/   r5   r8   r9   r1   lenrY   rangewritebasenameclose)rC   rg   out_base_dirrc   out_dirdata_cnt
fp_wav_scpfp_texticontentwav_filerl   s               rE   rf   ASRTrainer.prepare_data   s   '',,|3gT2
Gd+w~&"'',,w	:C@
rww||GV4c:xAnQ'G|,H<(DTYY(8(8(BH'MN#$ %MM$))RWW%5%5h%?$FG$NO ! 	rH   c                 8    U R                   R                  5         g N)rB   run)rC   argsrD   s      rE   rb   ASRTrainer.train   s    rH   checkpoint_pathreturnc                     [         er~   )NotImplementedError)rC   r   r   rD   s       rE   evaluateASRTrainer.evaluate   s    !!rH   )r>   r   r   rA   r   r<   r;   r7   r@   rB   r   )rb   )__name__
__module____qualname____firstlineno__r6   r   strboolr   r   r   intfloatrX   rF   r=   r?   rf   rb   r   r   __static_attributes__ rH   rE   r   r      s    H "&%*%,=A1G-1,0'+/3T%T%T% #T%  #	T%
 $E)S.$9:T% "*#T% &c]T% %SMT% e_T% 'tnT%l8,$" ""3:."rH   r   )r.   r8   r+   typingr   r   r   rZ   
funasr.binr   modelscope.metainfor   modelscope.msdatasetsr   modelscope.trainers.baser	   modelscope.trainers.builderr
   modelscope.utils.constantr   r   r   r   r   r   modelscope.utils.loggerr   r3   register_modulespeech_asr_trainerr   r   rH   rE   <module>r      sl    	   ( (  $ ( + 0 0= = /	 
h&A&ABY" Y" CY"rH   