
    9i.                         S SK r S SKrS SKrS SKrS SKJr  S SKrS SKrS SK	r	S SK
r
S SKJr  S SKJr  S SKJrJr  S SKJr  S SKJr  SS	KJrJrJr  SS
KJr  SSKJr   " S S5      rg)    N)path)Image)File)OFATokenizerOFATokenizerZH)
load_image)Trie   )_get_kaldi_fbank_get_torchaudio_fbankconvert_waveform)OFA_TASK_KEY_MAPPING)set_torch_seedc                       \ rS rSrSrS rSS jr\SS j5       r\S 5       r	S r
S	 rS
 r  SS jrS\R                  4S jrSrg)OfaBasePreprocessor   z
OFA base preprocessor for
c                    Xl         X0l        U R                   R                  R                  SS5      U l        [
        R                  R                  U5      (       a  [
        R                  R                  U5      nU R                  S:X  a  [        R                  " U5      nO-U R                  S;   a  [        R                  " U5      nO[        eUR                  [        S5       Vs/ s H  nSR                  U5      PM     sn5        UR                  [        S5       Vs/ s H  nSR                  U5      PM     sn5        U R                   R                  R                  SS	5      S
:X  a  UR                  SS/5        X`l        ["        R$                  " UR&                  /5      U l        ["        R$                  " UR*                  /5      U l        ["        R$                  " UR.                  /5      U l        UR3                  5       R5                  5        VV	s0 s H  u  pX_M	     sn	n=U l        U l        UR                  R                  SS5      U l        UR                  R                  SS5      U l        UR                  R                  SS5      U l        U R                   R                  R                  SS5      U l        U R                   R                  R                  SS5      U l         U R                   R                  R                  SS5      n
[B        RD                  RG                  U
5        [I        U
5        U R                   R                  R                  SS5      nU(       a  / SQU l%        / SQU l&        O/ SQU l%        / SQU l&        U R                   R                  R                  SS5      U l'        [P        U R                   RR                      Vs0 s H  nX_M     snU l*        [W        U R                   S5      (       af  U R                   RX                  RT                  bE  U R                   RX                  RT                  R5                  5        H  u  pXRT                  U'   M     [Z        R]                  [^        R`                   Vs0 s H  nUS_M     sn5      U l1        SU l2        U R                   R                  R                  SS5      (       Ga/  [f        Rh                  " X R                   R                  Rj                  5      n[m        US S!S"9 n[n        Rp                  " U5      nSSS5        WU l9        U Rr                  R5                  5        VVs0 s H  u  pX_M	     snnU l:        [w        UR.                  5      U l2        [y        UR{                  5       5       H_  u  nnU R}                  S#U-   SSS$9nU Rd                  R                  UR&                  /UR                  5       -   UR.                  /-   5        Ma     SU lA        SU lB        gs  snf s  snf s  sn	nf s  snf s  snf ! , (       d  f       GN	= fs  snnf )%zpreprocess the data via the vocab.txt from the `model_dir` path

Args:
    cfg(modelscope.utils.config.ConfigDict) : model config
    model_dir (str): model path
languageen)zhcni    z	<code_{}>i  z<bin_{}>multimodal_typedefaulttext2sqlz>=z<=max_src_length   max_tgt_lengthmax_image_sizei   prompt_typenoneseed   imagenet_default_mean_and_stdF)g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?)      ?r$   r$   patch_image_sizei  datasetNanswer2labelrzutf-8)encoding )add_bosadd_eos)Ccfgmodemodelgetr   osr   existsabspathr   from_pretrainedr   NotImplementedError
add_tokensrangeformat	tokenizertorch
LongTensorbos_token_idbos_itempad_token_idpad_itemeos_token_ideos_item	get_vocabitemstgt_dictsrc_dictr   r   r   r   nprandomr!   r   meanstdr%   r   task
column_maphasattrr&   str	maketransstringpunctuationtranstabconstraint_trieospjoinr'   openjsonload	ans2label	label2ansr	   	enumeratekeystokenize_textinserttolisttrain_audio_feature_transformstest_audio_feature_transforms)selfr-   	model_dirr.   argskwargsr9   ikeyvaluer!   r#   kvans2label_filereaderans2label_dictansweranswer_items                      a/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/preprocessors/ofa/base.py__init__OfaBasePreprocessor.__init__   s    	**:t<77>>)$$	2I==D $44Y?I]]l*&66yAI%% 	U4[I[k003[IJE$KHKqj//2KHI88>>/;zI  $."(()*@*@)AB(()*@*@)AB(()*@*@)AB (11399;)
;
 J;)
 	
 "iimm,<cB!iimm,<cB!iimm,<cB**:t<88>>--mVDxx~~!!&!,
		tt(,(:(:+U)4%(-DI,DH'DI&DH $ 2 23Es K ,DHHMM:
: H:
 488 "&(("2"2"="="I((3399;%&" <**,* $Y*,-  $88>>nd33 XXi1L1LMNncG<!%6!2 =+DN/3~~/C/C/EF/Etqad/EFDN#'	(>(>#?D &~':':'<=	6"00&L% 1 @$$++Y-C-C,D.9.@.@.B-C/8/E/E.F-G H > /3+-1*q JH)
*
,
 =< Gs0   'X!X!X&X,X1X6 Y6
Yc                    Uc  gU R                  UU R                  SSSS9S   R                  S5      nU(       a"  [        R                  " U R
                  U/5      nU(       a!  [        R                  " X@R                  /5      nU$ )a  
Using `OFATokenizer` to tokenize text input.

Args:
    text (`str`): Input text.
    add_bos ('bool', **optional**, default to `True`)
        Whether or not to add beginning of sentence token in
        the front of sentence.
    add_eos ('bool', **optional**, default to `True`)
        Whether or not to add ending of sentence token in
        the end of sentence.
Returns:
    A list of tokens with the max length of `max_src_length + 2`
NFTpt)
max_lengthadd_special_tokens
truncationreturn_tensors	input_idsr   )r9   r   squeezer:   catr=   rA   )ra   textr+   r,   inputss        ro   r\   !OfaBasePreprocessor.tokenize_textk   s     <**$   !
 "-.
 /6gaj 	 YYv67FYY67F    Nc                    U R                  5       R                  S5      R                  SS5      R                  SS5      R                  SS5      n [        R                  " SSU 5      n U R                  S5      n U R                  S5      n U R                  S5      nUb#  [        U5      U:  a  SR                  US	U 5      n U $ )
a  
Preprocessing for text sentence.

step 1. Get the lower case of input text.
step 2. Remove the words within `,.!?*#:;~ ` in the beginning
    of the sentence.
step 3. Replace the words within `-/` or pattern `\s{2,}` with word ` `
    and replace tag `<person>` with `person`.
step 4. Remove the `\n` in the end of the sentence.
step 5. Split the sentence with token ` `, If `max_words` is not None,
    make a length truncation.

Args:
    caption (`str`): Input text.
    max_words (`int`, **optional**, default `None`):
        The max length of input text. If None, do nothing, else
        make a truncation.

Returns:
    A sequence of `str`.
	,.!?*#:;~-r*   /z<person>person\s{2,}
N
lowerlstripreplaceresubrstripstripsplitlenrT   )caption	max_wordscaption_wordss      ro   pre_captionOfaBasePreprocessor.pre_caption   s    . --/((5==c3GWS#wwz8< 	 &&

 ..&--$  c* S%7)%Chh}Zi89Gr~   c                 `   U R                  5       R                  S5      R                  SS5      R                  SS5      n [        R                  " SSU 5      n U R                  S5      n U R                  S5      n U R                  S5      n[        U5      U:  a  SR                  USU 5      n U $ )a  
Preprocessing for text sentence.
Note that this function is very similar to `pre_caption`, should be merged in the future version.

step 1. Get the lower case of input text.
step 2. Remove the words within `,.!?*#:;~ ` in the beginning
    of the sentence.
step 3. Replace the words within `-/` or pattern `\s{2,}` with word ` `.
step 4. Remove the `\n` in the end of the sentence.
step 5. Split the sentence with token ` `, If `max_words` is not None,
    make a length truncation.

Args:
    question (`str`): Input text.
    max_ques_words (`int`, **optional**, default `None`):
        The max length of input text. If None, do nothing, else
        make a truncation.

Returns:
    A sequence of `str`.
r   r   r*   r   r   r   Nr   )questionmax_ques_wordsquestion_wordss      ro   pre_question OfaBasePreprocessor.pre_question   s    . >>#**;7??@CEELWDGFN 	 66

 ??4(>>#& ",~/xx ?@Hr~   c                    US   nUR                  U R                  5      R                  SS9R                  5       nU R                  (       a  [
        R                  " [        U5      [        U R                  5      45      R                  5       n[        U5      U-
  n[        U[        U5      5       HR  nU R                  R                  5       X%U R                  5       -   nU R                  R                  U5      nSXF   U'   MT     XAS'   gg)z
Add constraint mask.
targetr   )dimTconstraint_maskN)ner?   sumitemrR   r:   zerosr   rD   boolr7   r=   r^   get_next_layer)	ra   sample
target_itmlen_label_itmr   	start_idxre   constraint_prefix_tokenconstraint_nodess	            ro   add_constraint_mask'OfaBasePreprocessor.add_constraint_mask   s     H%
"dmm488Q8?DDF#kkZ#dmm"4577;tv J-7I9c*o6*.--*>*> ++224+5'#'#7#7#F#F+$- 7;"#34 7 )8$%  r~   c                 b    [        U[        R                  5      (       a  UnU$ [        U5      nU$ )a&  
Get the pillow image. If the input is not a pillow image ,it will load
image from a local path or an external url.

Args:
    path_or_url_or_pil (`Union[str, Image]`):
        Can be:
            - A path or url reference to an image
            - A pillow image.
Returns:
    A pillow image.
)
isinstancer   r   )ra   path_or_url_or_pilimages      ro   get_img_pilOfaBasePreprocessor.get_img_pil   s5     '11CU[[&Q&Q" ./ 	r~   c                    [        U[        5      (       a  [        R                  " U5      nU$ [        U[        5      (       a.  [
        R                  " U5      n[        R                  " U5      nU$ [        S[        U5       S35      e)NzUnsupported input type: .)	r   bytesioBytesIOrM   r   read	TypeErrortype)ra   path_or_urlaudio_bytes
file_bytess       ro   get_audio_bytes#OfaBasePreprocessor.get_audio_bytes   su    k5))**[1K  S));/J**Z0K  6tK7H6IKLLr~   c           	         [         R                  R                  XS[        U5      /S[        U5      //5      u  p[	        XSSS9u  pgUS-  nUR                  5       n[        XbS5      nUc  [        XbS5      nUc  [        S5      eU(       a  U R                  b  U R                  U5      nO,U) (       a$  U R                  U5       b  U R                  U5      n[        R                  " U5      R                  5       nU R                  U5      nU$ )NspeedrateT)to_mononormalize_volumei   P   zGPlease install pyKaldi or torchaudio to enable fbank feature extraction)
torchaudiosox_effectsapply_effects_tensorrM   r   numpyr   r   ImportErrorr_   r`   r:   
from_numpyfloatpack_frames)	ra   waveformsample_rater   target_sample_rateis_train	_waveform_fbanks	            ro   prepare_fbank!OfaBasePreprocessor.prepare_fbank  s    !+ 6 6 K Ks5z"VS1C-D$EF!H (4$H	 '	OO%	 <=))"EE=Y  ;;G77>EY4=="#66u=E  '--/  'r~   featurec                     U R                   R                  S:X  a  U$ UR                  S   U R                   R                  -  nUS U R                   R                  U-   nUR                  US5      $ )Nr
   r   )r-   n_frames_per_stepshapereshape)ra   r   n_packed_framess      ro   r   OfaBasePreprocessor.pack_frames'  sc    88%%*N!--*dhh.H.HHG48855GH33r~   )rX   r=   r-   rK   rR   rA   rY   r   r   r   r   rH   r.   r?   r%   r   rE   rI   r`   rD   r9   r_   rQ   )TT)N)i>  F)__name__
__module____qualname____firstlineno____doc__rp   r\   staticmethodr   r   r   r   r   r   r:   Tensorr   __static_attributes__ r~   ro   r   r      si    L2\: & &P ' 'R8$" */$>45<< 4r~   r   )r   r1   r   rO   r   rS   rV   r   rF   r:   r   PILr   modelscope.fileior   !modelscope.models.multi_modal.ofar   r   modelscope.preprocessors.imager   modelscope.utils.trier	   utils.audio_helperr   r   r   utils.constantr   utils.random_helpr   r   r   r~   ro   <module>r      sK    	 	 	        " J 5 &3 3 0 -T4 T4r~   