
    9i(                     :   S SK r S SKJr  S SKrS SKJr  S SKJs  Jr  S SK	J
r
  S SKJr  S SKJr  S SKJr  S SKJrJr  SS	KJrJr  SS
KJr  \R2                  " \R4                  \
R6                  S9 " S S\5      5       r " S S\R:                  5      rS rSS jr g)    N)Dict)Models)
TorchModel)Tensor)MODELS)	ModelFileTasks   )	ConviSTFTConvSTFT)UNet)module_namec                   Z   ^  \ rS rSrSrS\4U 4S jjrS\\\4   S\\\4   4S jr	Sr
U =r$ )	FRCRNDecorator   z?A decorator of FRCRN for integrating into modelscope framework 	model_dirc                   > [         TU ]  " U/UQ70 UD6  [        U0 UD6U l        [        R
                  R                  U[        R                  5      n[        R
                  R                  U5      (       at  [        R                  " U[        R                  " S5      S9n[        U[        5      (       a  SU;   a  U R                  US   5        gU R                  R                  USS9  gg)zbinitialize the frcrn model from the `model_dir` path.

Args:
    model_dir (str): the model path.
cpu)map_location
state_dictF)strictN)super__init__FRCRNmodelospathjoinr   TORCH_MODEL_BIN_FILEexiststorchloaddevice
isinstancedictload_state_dict)selfr   argskwargsmodel_bin_file
checkpoint	__class__s         a/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/audio/ans/frcrn.pyr   FRCRNDecorator.__init__   s     	4T4V4D+F+
i&/&D&DF77>>.))U\\%-@BJ*d++
0J$$Z%=> 

**:e*D *    inputsreturnc                    U R                   R                  US   5      nUS   US   US   US   US   US   S.nS	U;   a  U R                   R                  US   US	   US
S9nUR                  U5        U R                   R                  US   US	   USS9nUR                  U5        U Vs0 s H  ofXF   R	                  5       _M     snUS'   US   R                  U Vs0 s H  nXeU   R	                  5       _M     sn5        U$ s  snf s  snf )Nnoisyr   r
               )spec_l1wav_l1mask_l1spec_l2wav_l2mask_l2cleanMix)modeSiSNRlog_vars)r   forwardlossupdateitem)r'   r0   result_listoutput
mix_resultsisnr_resultks          r-   rC   FRCRNDecorator.forward,   s6   jj((9"1~!!n"1~"1~!!n"1~
 fw+E ) KJMM*%::??w+G + MLMM,'CM!N:aZ]%7%7%9"9:!NF::%%&(&Q O((**&() 	 "O(s   *DD	)r   )__name__
__module____qualname____firstlineno____doc__strr   r   r   rC   __static_attributes____classcell__r,   s   @r-   r   r      sA     LE# E(d3;/ Df4E  r/   r   c                   b   ^  \ rS rSrSr    S
U 4S jjrS rS rSS jrSS jr	SS jr
S	rU =r$ )r   E   zFrequency Recurrent CRN c
           	        > [         TU ]  5         US-  S-   U l        X`l        Xpl        Xl        Xl        Sn[        U R                  U R                  U R
                  U R                  SUS9U l        [        U R                  U R                  U R
                  U R                  SUS9U l
        [        SUUUUS9U l        [        SUUUUS9U l        g)aL  
Args:
    complex: Whether to use complex networks.
    model_complexity: define the model complexity with the number of layers
    model_depth: Only two options are available : 10, 20
    log_amp: Whether to use log amplitude to estimate signals
    padding_mode: Encoder's convolution filter. 'zeros', 'reflect'
    win_len: length of window used for defining one frame of sample points
    win_inc: length of window shifting (equivalent to hop_size)
    fft_len: number of Short Time Fourier Transform (STFT) points
    win_type: windowing type used in STFT, eg. 'hanning', 'hamming'
r4   r
   Tcomplex)feature_typefix)rY   model_complexitymodel_depthpadding_modeN)r   r   feat_dimwin_lenwin_incfft_lenwin_typer   stftr   istftr   unetunet2)r'   rY   r\   r]   log_ampr^   r`   ra   rb   rc   r)   r[   r,   s               r-   r   FRCRN.__init__H   s    . 	1q( LLLLLLMM"	 LLLLLLMM"
 -#%'	 -#%'
r/   c           	         / nU R                  U5      n[        R                  " US5      n[        R                  " US S 2S S 2S U R                  2S S 24   US S 2S S 2U R                  S 2S S 24   /S5      n[        R                  " US5      n[        R
                  " USS5      nU R                  U5      n[        R                  " U5      nU R                  U5      n[        R                  " U5      nU R                  X55      u  pn
UR                  U5        UR                  U	5        UR                  U
5        Xu-   nU R                  X75      u  pn
UR                  U5        UR                  U	5        UR                  U
5        U$ )Nr
   r6   )rd   r!   	unsqueezecatr_   	transposerf   tanhrg   
apply_maskappend)r'   r0   out_listcmp_spec	unet1_out	cmp_mask1	unet2_out	cmp_mask2est_specest_wavest_masks              r-   rC   FRCRN.forward   sF   99V$??8Q/ 99Q>DMM>1,-Q4==>1,-
  ??8Q/??8Q2IIh'	JJy)	JJy)	JJy)	&*ooh&J#8! !)	&*ooh&J#8! !r/   c                    [         R                  " US S 2S S 2S S 2S S 2S4   US S 2S S 2S S 2S S 2S4   -  US S 2S S 2S S 2S S 2S4   US S 2S S 2S S 2S S 2S4   -  -
  US S 2S S 2S S 2S S 2S4   US S 2S S 2S S 2S S 2S4   -  US S 2S S 2S S 2S S 2S4   US S 2S S 2S S 2S S 2S4   -  -   /S5      n[         R                  " US S 2SS S 2S S 24   US S 2SS S 2S S 24   /S5      n[         R                  " US5      n[         R                  " US S 2S S 2S S 2S4   US S 2S S 2S S 2S4   /S5      nU R                  U5      n[         R                  " US5      nX4U4$ )Nr   r
   )r!   rl   squeezere   )r'   rr   cmp_maskrw   rx   s        r-   ro   FRCRN.apply_mask   sW   99Q1a]#hq!Q1}&==q!Q1}%Aq!Q(??@Q1a]#hq!Q1}&==q!Q1}%Aq!Q(??@

  99hq!Qz2HQ1aZ4HI1M==1-99hq!Qz2HQ1aZ4HI1M**X&--+(**r/   c                 v    / / p2U R                  5        H  u  pESU;   a  X5/-  nM  X%/-  nM     UUS.USS./nU$ )Nbias)paramsweight_decay        )named_parameters)r'   r   weightsbiasesnameparamr   s          r-   
get_paramsFRCRN.get_params   sa    b002KD~'!7"	 3 (
 
 r/   c                    US:X  a_  SnU[        U5      :  aE  X5   nUS-   nX5   nUS-   nX5   nUS-   nUS:w  a  U R                  XXrX5      n	U[        U5      :  a  ME  [        W	S9$ US:X  ak  SnU[        U5      :  aO  X5   nUS-   nX5   nUS-   nX5   nUS-   nUS:w  a  U R                  XXrX5      u  pnX-   U-   n	U[        U5      :  a  MO  [        W	W
WS9$ g )NrA   r   r
   r5   )sisnrr?   )rD   amp_loss
phase_loss)lenloss_1layerr%   )r'   r3   labelsrq   r@   countrw   rx   ry   rD   r   r   
SiSNR_losss                r-   rD   
FRCRN.loss   s    7?E#h-'#?	"/	#?	A:++EW,4<D #h-' d##U]E#h-'#?	"/	#?	A:7;7G7G(8J4H*#0:=D #h-' THLL r/   c                    US:X  ab  UR                  5       S:X  a  [        R                  " US5      nUR                  5       S:X  a  [        R                  " US5      n[        X45      * $ US:X  Ga  UR                  5       S:X  a  [        R                  " US5      nUR                  5       S:X  a  [        R                  " US5      n[        X45      * nUR	                  5       u  pn
U R                  U5      nUSS2SU R                  2SS24   nUSS2U R                  S2SS24   nU R                  U5      nUSS2SU R                  2SS24   nUSS2U R                  S2SS24   nUS-  US-  -   n[        R                  " X-  UU-  -   US-   -  X-  UU-  -
  US-   -  /S5      nSUUS:  '   SUUS	:  '   [        R                  " USS2SU R                  2SS24   USS2SU R                  2SS24   5      U	-  n[        R                  " USS2U R                  S2SS24   USS2U R                  S2SS24   5      U	-  nUUU4$ g)
zCompute the loss by mode
mode == 'Mix'
    est: [B, F*2, T]
    labels: [B, F*2,T]
mode == 'SiSNR'
    est: [B, T]
    labels: [B, T]
rA   r5   r
   r?   Nr4   :0yE>)
dimr!   r|   si_snrsizerd   r_   rl   Fmse_loss)r'   r3   estrx   r   r}   r@   r   bdtSSrSiYYrYiY_powgth_maskr   r   s                        r-   r   FRCRN.loss_1layer   s:    7?zz|q vq1{{}!--37+++U]zz|q vq1{{}!--3 11JhhjGA!		&!A1nt}}na'(B1dmmna'(B		% A1nt}}na'(B1dmmna'(BEBEMEyy27R"W#4"F#%7R"W#4"F"HIJLH%&HX\"&(HX]#zz(1nt}}na+?"@"*1nt}}na+?"@BDEFHHQ-A$B$,Q-A$BDFGHJZ331 r/   )	r_   rb   re   rd   rf   rg   ra   r`   rc   )i  d   i   hann)r   )r?   )rM   rN   rO   rP   rQ   r   rC   ro   r   rD   r   rS   rT   rU   s   @r-   r   r   E   s:    $  9'v@+"M<'4 '4r/   r   c                 6    [         R                  " X-  SSS9nU$ )Nr   T)keepdim)r!   sum)s1s2norms      r-   l2_normr     s    99RWb$/DKr/   c                     [        X5      n[        X5      nX4U-   -  U-  nX-
  n[        XU5      n[        Xf5      nS[        R                  " XxU-   -  U-   5      -  n	[        R                  " U	5      $ )N
   )r   r!   log10mean)
r   r   eps
s1_s2_norm
s2_s2_norms_targete_noisetarget_norm
noise_normsnrs
             r-   r   r     so    JJ#-.3HmG(-K*J
u{{K,<=CD
DC::c?r/   )r   )!r   typingr   r!   torch.nnnntorch.nn.functional
functionalr   modelscope.metainfor   modelscope.modelsr   modelscope.models.baser   modelscope.models.builderr   modelscope.utils.constantr   r	   	conv_stftr   r   rf   r   register_moduleacoustic_noise_suppressionspeech_frcrn_ans_cirm_16kr   Moduler   r   r    r/   r-   <module>r      s    	      & ( ) , 6 *  	$$002-Z -2-`C4BII C4L
r/   