
    9i                         S SK r S SKrS SKJr  S SKJrJr  S SKrS SKr	S SK
r
S SKJr  S SKJr  S SKJr  S SKJr  SS	KJr  SS
KJr   " S S\5      rg)    N)Path)AnyDict)CompositeAudioFeatureTransform)S2TDataConfig)pre_chinese)ModeKeys   )OfaBasePreprocessor)
Text2Phonec                      ^  \ rS rSr\R
                  4U 4S jjrS\\\	4   S\\\	4   4S jr
S\\\	4   S\\\	4   4S jrS\\\	4   S\\\	4   4S jrS rS	 rS
rU =r$ )OfaASRPreprocessor   c                 N  > [         [        U ]
  " XU/UQ70 UD6  [        [	        [
        R                  R                  US5      5      5      U l        [        R                  " U R                  R                  SS5      5      U l        [        R                  " U R                  R                  SS5      5      U l        [        [
        R                  R                  US5      5      U l        U R!                  [
        R                  R                  US5      5      u  U l        U l        g)	zpreprocess the data

Args:
    cfg(modelscope.utils.config.ConfigDict) : model config
    model_dir (str): model path,
    mode: preprocessor mode (model mode)
zfbank_config.yamltrainTtestFztext2phone_dict.txtzphone_dict.txtN)superr   __init__r   r   ospathjoindata_cfgr   from_config_dictget_feature_transformstrain_audio_feature_transformstest_audio_feature_transformsr   text2phone_tokenizerbuild_phone_dictphone_to_idid_to_phone)selfcfg	model_dirmodeargskwargs	__class__s         `/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/preprocessors/ofa/asr.pyr   OfaASRPreprocessor.__init__   s     	 $0 	; 	;39	; &i)<=>@.L.].]MM00$?/A+-K-\-\MM00?.A*$.GGLL$9:%<!-1-B-BGGLL$45.7*$*    datareturnc                     U R                   [        R                  :X  a  U R                  U5      $ U R	                  U5      $ )N)r$   r	   TRAIN_build_train_sample_build_infer_sample)r!   r+   s     r(   __call__OfaASRPreprocessor.__call__2   s4    99&++D11++D11r*   c                    [         R                  " / SQ5      nU R                  XR                  S      5      n[        R
                  " USSS9u  pEU R                  [        R                  " U/[        R                  S9UUSSS9n[        R                  " S/5      nUUXR                  S      S	.nUS
   n	U R                  S:X  a)  [        XR                  5      n	U R                  U	SS9US'   OxU	R                  U R                  5      R!                  5       n	U	R!                  5       R#                  5       n
SR%                  U
S U R                   5      n	U R                  U	SS9US'   U R'                  U	5      S-   n[        R                  " S/5      nUS-   US'   XS'   XS'   [        R(                  " U R*                  US   S S /5      US'   U$ )N)g?      ?g?wav>  Tsrmonodtypetarget_sample_rateis_traintext)fbank
fbank_masklabelrB   zhF)add_bostarget r
      
phone_itemphone_target
phone_maskprev_output_tokens)randomchoiceget_audio_bytes
column_maplibrosaloadprepare_fbanktorchtensorfloat32languager   max_tgt_lengthtokenize_text	translatetranstabstripsplitr   to_phonecatbos_item)r!   r+   speedaudio_bytesr5   r8   r@   rA   samplerE   target_token_listrH   rJ   s                r(   r/   &OfaASRPreprocessor._build_train_sample8   s   o.**40F+GH,,{u4@""LL#emm4$ #  \\4&)
$//&12
 ==D  )<)<=F#11&%1HF8%%dmm4::<F & 4 4 6XX/0D1D1DEFF#11&%1HF8]]6*Q.
\\5'*
)A~|!+~)|',yy]]F8,Sb12(4#$r*   c                    SnU R                  XR                  S      5      n[        R                  " USSS9u  pEU R	                  [
        R                  " U/[
        R                  S9UUSSS9n[
        R                  " S/5      nXgS	.nS
U R                  ;   a'  U R                  S
   U;   a  XR                  S
      US'   [
        R                  " / SQ5      US'   [
        R                  " S/5      US'   U$ )Nr4   r5   r6   Tr7   r:   Fr<   )r@   rA   r?   rB   )   rg   rg   rH   rJ   )rO   rP   rQ   rR   rS   rT   rU   rV   )	r!   r+   ra   rb   r5   r8   r@   rA   rc   s	            r(   r0   &OfaASRPreprocessor._build_infer_sample^   s    **40F+GH,,{u4@""LL#emm4$ #  \\4&)
 ;T__$)@D)H"??6#:;F7O  %||I6|$||UG4|r*   c                     U R                   R                  U5      n[        R                  " UR	                  S5       Vs/ s H  o0R
                  U   PM     sn5      nU$ s  snf )NrF   )r   transrT   rU   r]   r   )r!   r?   phonesxidss        r(   r^   OfaASRPreprocessor.to_phoneu   sT    **006llc9JK9JA,,Q/9JKL
 Ls   A c                     [        5       n[        5       n[        US5       n[        U5       H/  u  pVUR                  5       R	                  S5      S   nXRU'   X#U'   M1     S S S 5        X#4$ ! , (       d  f       X#4$ = f)NrrF   r   )dictopen	enumerater\   r]   )r!   phone_dict_pathr   r    phone_dict_fileilinephones           r(   r   #OfaASRPreprocessor.build_phone_dictz   s{    ff/3'?$_5

**3/2%&E"!,A 6 (
 '' ('
 ''s   ?A++
A;)r   r    r   r   r   r   )__name__
__module____qualname____firstlineno__r	   	INFERENCEr   r   strr   r1   r/   r0   r^   r   __static_attributes____classcell__)r'   s   @r(   r   r      s    
 ((762T#s(^ 2S#X 2$S#X $4S> $LS#X 4S> .
( (r*   r   )r   rM   pathlibr   typingr   r   rQ   	soundfilesfrT   %fairseq.data.audio.feature_transformsr   )fairseq.data.audio.speech_to_text_datasetr   modelscope.utils.chinese_utilsr   modelscope.utils.constantr	   baser   utils.text2phoner   r    r*   r(   <module>r      s?    
      # C 6 . % (m(, m(r*   