
    9i                     "   S SK r S SKrS SKrS SKrS SKJrJr  S SKrS SKr	S SK
rS SKrS SKJr  S SKJr  S SKJr  S SKJrJr  S SKJr  S SKJrJr  S	rS
rSrS
rSrS
r \RB                  " \RD                  \RF                  S9 " S S\5      5       r$g)    N)AnyDict)File)	Pipelines)
OutputKeys)InputPipeline)	PIPELINES)	ModelFileTasksi  i  hammingi   )module_namec                      ^  \ rS rSrSrSrU 4S jrS\S\\	\
4   4S jrS rS\\	\
4   S\\	\
4   4S	 jrS
 rS\\	\
4   S\\	\
4   4S jrSrU =r$ )ANSDFSMNPipeline   a\  ANS (Acoustic Noise Suppression) inference pipeline based on DFSMN model.

Args:
    stream_mode: set its work mode, default False
    In stream model, it accepts bytes as pipeline input that should be the audio data in PCM format.
    In normal model, it accepts str and treat it as the path of local wav file or the http link of remote wav file.
i  c                   >^	 [         T
U ]  " SSU0UD6  [        R                  R	                  U R
                  R                  [        R                  5      n[        R                  R                  U5      (       a:  [        R                  " X0R                  SS9nU R
                  R                  U5        U R
                  R                  5         UR                  SS5      U l        U R                   (       a  ["        [$        U R
                  R&                  S-
  -  -   S-  n[(        R*                  " US9U l        [/        [$        S-  5       H  nU R,                  R1                  S	5        M      [$        S-  ["        -
  S-  U l        SU l        ["        [$        -
  S-  U l        [        R8                  " [:        SU R                  S
9m	U	4S jnS nXpl        Xl        g )NmodelT)map_locationweights_onlystream_modeF      )maxlen    )periodicdevicec           
      P   > [         R                  " U [        [        [        STSS9$ )NF)centerwindowreturn_complex)torchstftN_FFT
HOP_LENGTHSTFT_WIN_LEN)xr   s    m/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/pipelines/audio/ans_dfsmn_pipeline.pyr"   'ANSDFSMNPipeline.__init__.<locals>.stftC   s)    ::$& &    c           	      L    [         R                  " U [        [        [        SUS9$ )NF)
hop_length
win_lengthr   r   length)librosaistftr$   r%   WINDOW_NAME_HAM)r&   slens     r'   r/   (ANSDFSMNPipeline.__init__.<locals>.istftM   s%    ==%'& r)    ) super__init__ospathjoinr   	model_dirr   TORCH_MODEL_BIN_FILEexistsr!   loadr   load_state_dictevalgetr   WINLENSTRIDElordercollectionsdequebufferrangeappendbyte_length_remainfirst_forwardtensor_give_up_lengthhamming_windowr%   r"   r/   )selfr   kwargsmodel_bin_file
checkpointbyte_buffer_lengthir"   r/   r   	__class__s            @r'   r5   ANSDFSMNPipeline.__init__)   si   /u//djj&:&:&/&D&DF77>>.))[[tMJJJ&&z2

!::mU; &DJJ$5$5$9::a? %++3EFDK6A:&""5) ' (.zF':a&?D#!%D*06/a)?D&%%5>	&	 	
r)   inputsreturnc           	         U R                   (       GaR  [        U[        5      (       d  [        S5      e[	        U5      U R
                  R                  :  a.  [        S[	        U5       SU R
                  R                   35      e/ nSnU R                  [	        U5      -   U-
  [        S-  :  GaG  [        S-  U R                  -
  n[        UXE-   5       H=  nU R
                  R                  X   R                  S[        R                  SS95        M?     [        R                   " 5       nU R
                   H  nUR#                  U5        M     [$        R&                  " UR)                  5       [$        R*                  S	9n	[,        R.                  " U	5      R1                  [,        R2                  5      n
UR                  U
5        SU l        XE-  nU R                  [	        U5      -   U-
  [        S-  :  a  GMG  [        U[	        U5      5       HR  nU R
                  R                  X   R                  S[        R                  SS95        U =R                  S-  sl        MT     S
U0$ [        U[4        5      (       a  [6        R8                  " U5      nO0[        U[        5      (       a  UnO[        S[1        U5       S35      eU R;                  U5      n
S
U
0$ )Nz"Only support bytes in stream mode.zinputs length too large: z > r   r   r   F)	byteordersigneddtypeaudiozUnsupported type .)r   
isinstancebytes	TypeErrorlenrE   r   
ValueErrorrH   rA   rF   rG   to_bytessysrW   ioBytesIOwritenp
frombuffer	getbufferint16r!   
from_numpytypeFloatTensorstrr   readbytes2tensor)rL   rT   preprocess_paramstensor_listcurrent_indexbyte_length_to_addrQ   bytes_iobdatadata_tensor
data_bytess               r'   
preprocessANSDFSMNPipeline.preprocessY   sz   fe,, DEE6{T[[/// /F}C@R@R?ST  KM))C- +,/5z:%+aZ$2I2I%I"},ACAKK&&vy'9'9S]]5 (: (B CC ::<ANN1% %}}X%7%7%9J#..t499%:K:KL"";/*+'3 ))C- +,/5z: =#f+6""69#5#5u $6 $> ?''1,' 7 [))&#&&!YYv.
FE**#
"3DL> CDD++J7K[))r)   c                    [         R                  " [        R                  " U5      5      u  p#UR	                  [
        R                  5      n[        UR                  5      S:  a	  US S 2S4   nX0R                  :w  a  [        R                  " X#U R                  S9nUS-  n[        R                  " U5      R                  [        R                  5      nU$ )Nr   r   )orig_sr	target_sri   )sfro   rd   re   astyperg   float32r`   shapeSAMPLE_RATEr.   resampler!   rk   rl   rm   )rL   
file_bytesdata1fsrw   rx   s         r'   rp   ANSDFSMNPipeline.bytes2tensor   s    GGBJJz23	RZZ(u{{a!Q$KE!!!$$T-=-=?Eu}&&t,11%2C2CDr)   c                 d   U R                   (       a  [        R                  " 5       nUS    H  nU R                  U5      nU R                  (       a  US U R
                  *  nSU l        O#U[        * S  nXPR
                  U R
                  *  nUR                  UR                  [        R                  5      R                  5       5        M     UR                  5       nOCUS   nU R                  U5      nUR                  [        R                  5      R                  5       n[        R                  U0$ )Nr[   F)r   rd   re   _forwardrI   rJ   r@   rf   r   rg   rj   tobytesgetvaluer   
OUTPUT_PCM)rL   rT   forward_paramsru   origin_audio
masked_sigoutputss          r'   forwardANSDFSMNPipeline.forward   s    zz|H &w!]]<8
%%!+,Hd.H.H-H!IJ).D&!+VGH!5J!+,F,F,A,AHB "CJz00:BBDE !0 '')G!'?L|4J ''199;G%%w//r)   c                 V   [         R                  " 5          UR                  S5      nSS KnUR                  R
                  R                  USSSSU R                  [        S9nUR                  S5      nU R                  U5      nU R                  U5      nUR                  SSS5      nXe-  R                  5       nS S S 5        WR                  5       R                  5       nUS S 2S S 2S4   S	US S 2S S 2S4   -  -   nU R                  U[!        U5      5      n	U	$ ! , (       d  f       Ne= f)
Nr   g      ?g      D@g      4@x   )ditherframe_lengthframe_shiftnum_mel_binssample_frequencywindow_typer   r   y              ?)r!   no_grad	unsqueeze
torchaudio
compliancekaldifbankr   r0   r   r"   permutecpudetachnumpyr/   r`   )
rL   r   audio_inr   fbanksmasksspectrummasked_specmasked_spec_complexr   s
             r'   r   ANSDFSMNPipeline._forward   s   ]]_#--a0H**0066!  !%!1!1+ 7 -F %%a(FJJv&Eyy.HMM!Q*E#+002K   "((*002)!Q'2R+aAg:N5NNZZ 3S5FG
' _s   B%D
D(c                     U R                   (       dk  SUR                  5       ;   aW  [        R                  " US   [        R
                  " U[        R                     [        R                  S9U R                  5        U$ )Noutput_pathrY   )
r   keysr   rf   rg   rh   r   r   rj   r   )rL   rT   rM   s      r'   postprocessANSDFSMNPipeline.postprocess   sY    MV[[]$BHH}%fZ%:%:;288L  " r)   )rE   rH   rI   r/   r"   r   rJ   )__name__
__module____qualname____firstlineno____doc__r   r5   r   r   rn   r   rz   rp   r   r   r   __static_attributes____classcell__)rR   s   @r'   r   r      s     K.`&* &*S#X &*P
0d38n 0%)#s(^0*,$sCx. tCH~  r)   r   )%rC   rd   r6   rc   typingr   r   r.   r   rg   	soundfiler   r!   modelscope.fileior   modelscope.metainfor   modelscope.outputsr   modelscope.pipelines.baser   r	   modelscope.pipelines.builderr
   modelscope.utils.constantr   r   r$   r#   r0   r%   r@   rA   register_moduleacoustic_noise_suppressionspeech_dfsmn_ans_psm_48k_causalr   r3   r)   r'   <module>r      s     	 	 
      " ) ) 5 2 6
		 	$$99;_x _;_r)   