
    9i!                        S SK r S SKrS SKJrJrJrJr  S SKrS SK	J s  J
r  S SKrS SKJr  S SKJr  S SKJr  S SKJrJr   " S S\5      rS	 r " S
 S5      r\R2                  " \R4                  5       " S S\5      5       rg)    N)AnyDictTupleUnion)File)Preprocessor)PREPROCESSORS)FieldsModeKeysc                   x   ^  \ rS rSrSr\R                  4S\S\4U 4S jjjrS\	\\
4   S\	\\
4   4S jrS	rU =r$ )
AudioBrainPreprocessor   zA preprocessor takes audio file path and reads it into tensor

Args:
    takes: the audio file field name
    provides: the tensor field name
    mode: process mode, default 'inference'
takesprovidesc                    > [         [        U ]
  " U/UQ70 UD6  Xl        X l        SS KnUR                  R                  R                  U l        g )Nr   )superr   __init__r   r   speechbraindataio
read_audio)selfr   r   modeargskwargssb	__class__s          ^/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/preprocessors/audio.pyr   AudioBrainPreprocessor.__init__   sC     	$d4TKDKFK
  ))**55    datareturnc                 \    U R                  XR                     5      nX!U R                  '   U$ N)r   r   r   )r   r    results      r   __call__AudioBrainPreprocessor.__call__&   s(    jj!12$T]]r   )r   r   r   )__name__
__module____qualname____firstlineno____doc__r   	INFERENCEstrr   r   r   r%   __static_attributes____classcell__)r   s   @r   r   r      sU     ((
6
6
6 
6T#s(^ S#X  r   r   c                    [        U SSS9nUR                  5       nUR                  S5      nUR                  SU5      nUR                  SU5      n[        R                  " X$S-   U [        R
                  SS	9nUR                  S
5      nUR                  SU5      nUR                  SU5      n[        R                  " X$S-   U [        R
                  SS	9nUR                  5         Xg4$ )Nrzutf-8)encodingAddShift[]    )dtypesepRescale)openreadfindnp
fromstringfloat32close)filenamefpall_strpos1pos2pos3meanscales           r   load_kaldi_feature_transformrJ   ,   s    	hg	.BggiG<<
#D<<T"D<<T"D==$/rzzsKD<<	"D<<T"D<<T"DMM'(40

LEHHJ;r   c                   4    \ rS rSrSr   SS jrS rS rSrg)	Feature;   z%Extract feat from one utterance.
    Nc                    Xl         X l        US   US   -  S-  U l        US   US   -  S-  U l        [        R
                  " U R                  SS9U l        SU l        Ub|  [        R                  R                  U5      (       aX  [        SU 35        [        U5      u  pV[        R                  " U5      U l        [        R                  " U5      U l        S	U l        U(       ap  U R                  R!                  5       U l        U R                  (       a?  U R                  R!                  5       U l        U R                  R!                  5       U l        ggg)
z

Args:
    fbank_config (dict):
    feat_type (str):
        raw: do nothing
        fbank: use kaldi.fbank
        spec: Real/Imag
        logpow: log(1+|x|^2)
    mvn_file (str): the path of data file for mean variance normalization
    cuda:
frame_lengthsample_frequencyi  frame_shiftF)periodicNzloading mvn file: T)fbank_config	feat_typen_fft
hop_lengthtorchhamming_windowwindowmvnospathexistsprintrJ   
from_numpyshiftrI   cuda)r   rS   rT   mvn_filera   r`   rI   s          r   r   Feature.__init__?   s   " )"!.1L5   #'(
&}59   #'(**4::FBGGNN8$<$<&xj127ALE))%0DJ))%0DJDH++**,DKxx!ZZ__.
!ZZ__.
  r   c           
      :   U R                   S:X  a  U$ U R                   S:X  aR  SSKJs  Jn  [	        UR
                  5      S:X  a  UR                  S5      nUR                  " U40 U R                  D6nU$ U R                   S:X  a  [        R                  " US-  U R                  U R                  U R                  U R                  SS	S
9n[        R                  " UR                  UR                   /SS9R#                  SS5      nU$ U R                   S:X  a  [        R                  " UU R                  U R                  U R                  U R                  SS	S
9n[        R$                  " U5      S-  n[        R&                  " SU-   5      R#                  SS5      nW$ )zE

Args:
    utt: in [-32768, 32767] range

Returns:
     [..., T, F]
rawfbankr   Nr6   speci   FT)centerreturn_complexdimlogpow   )rT   torchaudio.compliance.kaldi
compliancekaldilenshape	unsqueezerf   rS   rW   stftrU   rV   rY   catrealimagpermuteabslog)r   uttrr   featrg   abspows         r   computeFeature.computee   sV    >>U"J^^w&77399~"mmA&;;s8d&7&78D, + ^^v%::e



#%D 99dii3<DDRLD  ^^x'::



#%D YYt_a'F99QZ(00R8Dr   c                 `    U R                   (       a  XR                  -   nXR                  -  nU$ r#   )rZ   r`   rI   )r   r~   s     r   	normalizeFeature.normalize   s&    88**$D**$Dr   )rS   rT   rV   rZ   rU   rI   r`   rY   )rg   NF)	r'   r(   r)   r*   r+   r   r   r   r.    r   r   rL   rL   ;   s#    
 "	$/L&Pr   rL   c                   ^    \ rS rSrSrS rS\\\\	\
4   4   S\\	\
4   4S jr\S 5       rSrg	)
LinearAECAndFbank   i>  c                     SS K nSU R                  -  U l        US   U l        [	        US   US   US   5      U l        UR                  5       U l        US   S:H  U l        g )	Nr   i   linear_aec_delayrS   rT   rZ   mask_onnearend_mic)	MinDAECSAMPLE_RATEtrunc_lengthr   rL   featureloadmitaecmask_on_mic)r   	io_configr   s      r   r   LinearAECAndFbank.__init__   sh     4#3#33 )*< =y8(5y7GIlln$Y/=@r   r    r!   c           	         [        U[        5      (       aC  U R                  US   5      u  p#U R                  US   5      u  pC[        R                  " U5      nO_U R                  US   5      u  p#U R                  US   5      u  pCSU;   a  U R                  US   5      u  pSO[        R                  " U5      nU R
                  R                  X$5      u  pgp[        R                  " [        U R                  U-  5      /5      n
[        R                  " X/5      n[        [        U5      [        U5      [        U5      [        U	5      [        U5      5      nSn[        XR                  5      nXlU X|U XU XU X\U 4u  p'pn[        R                  " 5       n[        R                   " [        R"                  " U5      5      nU R$                  R'                  U5      n[        R(                  " X/SS9n[        R                   " [        R"                  " U5      5      nU R$                  R'                  U5      n[        R(                  " X/SS9n[        R                   " [        R"                  " U	5      5      n	U R$                  R'                  U	5      n[        R(                  " UU/SS9nU R$                  R+                  U5      nUb*  [        R                   " [        R"                  " U5      5      nU R,                  (       a  UnOUnUX]S.nU$ )u  Linear filtering the near end mic and far end audio, then extract the feature.

Args:
    data: Dict with two keys and correspond audios: "nearend_mic" and "farend_speech".

Returns:
    Dict with two keys and Tensor values: "base" linear filtered audio，and "feature"
r   r6   r   farend_speechnearend_speechrk   )basetargetr   )
isinstancetupleload_wavr>   
zeros_liker   do_linear_aeczerosintr   concatenateminrs   r   rW   FloatTensorr_   r@   r   r   rw   r   r   )r   r    r   fsr   r   out_micout_ref
out_linearout_echoextra_zerosflenfstartr~   fbank_nearend_micfbank_out_linearfbank_out_echor   out_datas                      r   r%   LinearAECAndFbank.__call__   s    dE"""mmDG4OK $d1g 6M]];7N #mmD,?@OK $d?.C DM4'%)]]48H3I%J"!#{!;151J1J2(.* hhD$9$9B$> ?@A(EFL#g,JX! 4**+4 '"6d#XT%:$'F)BjN   "&&rzz+'>? LL00=yy$2:%%bjj&<=
<<//
;yy$1q9##BJJx$89--h7yy$/Q7 ||%%d+ %"--bjj.HINDD NLr   c                 T   SS K n[        U [        5      (       a  [        R                  " U 5      n OZ[        U [
        5      (       a-  [        R                  " U 5      n[        R                  " U5      n O[        S[        U 5       S35      e[        R                  " U 5      u  p4[        UR                  5      S:  a  [        S5      eU[        R                  :w  a   UR!                  XC[        R                  5      nUR#                  [$        R&                  5      [        R                  4$ )Nr   zUnsupported input type: .r6   z(modelscope error:The audio must be mono.)librosar   bytesioBytesIOr-   r   r<   	TypeErrortypewavrs   rt   
ValueErrorr   r   resampleastyper>   r@   )inputsr   
file_bytessample_rater    s        r   r   LinearAECAndFbank.load_wav   s    fe$$ZZ'F$$6*JZZ
+F6tF|nAFGGHHV,tzz?QGHH+777##D$5$A$ACD{{2::&(9(E(EEEr   )r   r   r   r   r   N)r'   r(   r)   r*   r   r   r   r   r   r-   r   r%   staticmethodr   r.   r   r   r   r   r      sT    KAAU5$sCx.#89 Ad38n AF F Fr   r   )r   r[   typingr   r   r   r   numpyr>   scipy.io.wavfilewavfiler   rW   modelscope.fileior   modelscope.preprocessorsr    modelscope.preprocessors.builderr	   modelscope.utils.constantr
   r   r   rJ   rL   register_moduleaudior   r   r   r   <module>r      sy    
 	 * *     " 1 : 6\ 6V Vr v||,_F _F -_Fr   