
    9i*                         S r SSKrSSKrSSKJrJrJr  SSKrSSK	r	SSK
Jr  SSKJs  Jr  SSKJs  Jr  SSKJs  Js  Js  Jr  SSKJr  SSKJrJr  SSKJr  SSKJ r    " S S	\RB                  5      r" " S
 S\RF                  5      r$ " S S\RF                  5      r%\RL                  " \RN                  \RP                  S9 " S S\5      5       r)g)ag  Res2Net implementation is adapted from https://github.com/Res2Net/Res2Net-PretrainedModels.
Res2Net is an advanced neural network architecture that enhances the capabilities of standard ResNets
by incorporating hierarchical residual-like connections. This innovative structure improves
performance across various computer vision tasks, such as image classification and object
detection, without significant computational overhead.
Reference: https://arxiv.org/pdf/1904.01169.pdf
Some modifications from the original architecture:
1. Smaller kernel size for the input layer
2. Smaller expansion in BasicBlockRes2Net
    N)AnyDictUnion)Models)MODELS
TorchModel)Tasks)create_devicec                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )ReLU   c                 0   > [         [        U ]  SSU5        g )Nr      )superr   __init__)selfinplace	__class__s     b/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/audio/sv/Res2Net.pyr   ReLU.__init__   s    dD"1b'2    c                 j    U R                   (       a  SOSnU R                  R                  S-   U-   S-   $ )Nr    z ())r   r   __name__)r   inplace_strs     r   __repr__ReLU.__repr__"   s6    #'<<iR~~&&-  	 r    )F)r   
__module____qualname____firstlineno__r   r   __static_attributes____classcell__r   s   @r   r   r      s    3   r   r   c                   6   ^  \ rS rSrSrSU 4S jjrS rSrU =r$ )BasicBlockRes2Net(      c                 F  > [         [        U ]  5         [        [        R
                  " X$S-  -  5      5      n[        R                  " XU-  SUSS9U l        [        R                  " Xe-  5      U l
        US-
  U l        / n/ n[        U R                  5       HN  n	UR                  [        R                  " XfSSSS95        UR                  [        R                  " U5      5        MP     [        R                  " U5      U l        [        R                  " U5      U l        [#        SS9U l        [        R                  " Xe-  X R&                  -  SSS	9U l        [        R                  " X R&                  -  5      U l        [        R,                  " 5       U l        US:w  d  XR&                  U-  :w  a`  [        R,                  " [        R                  " UU R&                  U-  SUSS9[        R                  " U R&                  U-  5      5      U l        X0l        X`l        XPl        g )
Ng      P@   F)kernel_sizestridebias   )r,   paddingr.   T)r   )r,   r.   )r   r'   r   intmathfloornnConv2dconv1BatchNorm2dbn1numsrangeappend
ModuleListconvsbnsr   relu	expansionconv3bn3
Sequentialshortcutr-   widthscale)r   	in_planesplanesr-   	baseWidthrF   rE   r=   r>   ir   s             r   r   BasicBlockRes2Net.__init__+   s   /1DJJvT)9:;<YYu}!FP
>>%-0AI	tyy!ALL		%AquMOJJr~~e,- " ]]5)
==%&	YYM6NN2O
>>&>>"9:Q;)~~'>>MM		NNV+ !! 
 "$0G!HJDM 

r   c                    UnU R                  U5      nU R                  U5      nU R                  U5      n[        R                  " X0R
                  S5      n[        U R                  5       Hp  nUS:X  a  XE   nOWXE   -   nU R                  U   " U5      nU R                  U R                  U   " U5      5      nUS:X  a  UnMX  [        R                  " X64S5      nMr     [        R                  " X4U R                     4S5      nU R                  U5      nU R                  U5      nU R                  U5      nX2-  nU R                  U5      nU$ )Nr+   r   )r6   r8   r?   torchsplitrE   r:   r9   r=   r>   catrA   rB   rD   )r   xresidualoutspxrJ   sps          r   forwardBasicBlockRes2Net.forwardL   s   jjmhhsmiinkk#zz1-tyy!AAvV#&[Ar"B488A;r?+BAvii	1- " ii$))n-q1jjohhsm==#iin
r   )r8   rB   r>   r6   rA   r=   r9   r?   rF   rD   r-   rE   )r+       r)   )	r   r    r!   r"   r@   r   rU   r#   r$   r%   s   @r   r'   r'   (   s    IB r   r'   c                   J   ^  \ rS rSr\/ SQSSSSS4U 4S jjrS	 rS
 rSrU =r	$ )Res2Netk   )r/         r/   rW   P      TSTPFc           	        > [         [        U ]  5         X0l        X@l        XPl        [        US-  5      U-  S-  U l        Xpl        [        R                  " SUSSSSS9U l        [        R                  " U5      U l        U R                  XUS   SS9U l        U R                  XS-  US   SS9U l        U R                  XS	-  US   SS9U l        U R                  XS-  US   SS9U l        US
:X  d  US:X  a  SOSU l        [)        [*        U5      " U R                  UR,                  -  S9U l        [        R0                  " U R                  UR,                  -  U R&                  -  U5      U l        U R                  (       a6  [        R4                  " USS9U l        [        R0                  " XU5      U l        g [        R:                  " 5       U l        [        R:                  " 5       U l        g )N   r+   r/   F)r,   r-   r0   r.   r   )r-   r)   r[   TAPTSDP)in_dim)affine)r   rY   r   rG   feat_dimembedding_sizer1   	stats_dimtwo_emb_layerr4   r5   r6   r7   r8   _make_layerlayer1layer2layer3layer4n_statsgetattrpooling_layersr@   poolLinearseg_1BatchNorm1dseg_bn_1seg_2Identity)	r   block
num_blocks
m_channelsrf   rg   pooling_funcri   r   s	           r   r   Res2Net.__init__m   s    	gt%'# ,X\*Z7!;*YYzqAEK
>>*-&&z!}Q ' 8&&>:a= ' <&&>:a= ' <&&>:a= ' < )E1\V5KqQRNL9>>EOO35	YYt~~?$,,N-/
NN>%HDM>BDJKKMDMDJr   c                     U/S/US-
  -  -   n/ nU H8  nUR                  U" U R                  X$5      5        X!R                  -  U l        M:     [        R                  " U6 $ )Nr+   )r;   rG   r@   r4   rC   )r   ry   rH   rz   r-   strideslayerss          r   rj   Res2Net._make_layer   s^    (aSJN33FMM%?@#oo5DN  }}f%%r   c                    UR                  SSS5      nUR                  S5      n[        R                  " U R	                  U R                  U5      5      5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  (       a:  [        R                  " U5      nU R                  U5      nU R                  U5      nU$ U$ )Nr   r)   r+   )permute
unsqueeze_Fr?   r8   r6   rk   rl   rm   rn   rr   rt   ri   rv   rw   )r   rP   rR   statsembed_aembed_bs         r   rU   Res2Net.forward   s    IIaALLOffTXXdjjm,-kk#kk#kk#kk#		#**U#&&/C--$CjjoGNNr   )r8   r6   rg   rf   rG   rk   rl   rm   rn   ro   rr   rt   rw   rv   rh   ri   )
r   r    r!   r"   r'   r   rj   rU   r#   r$   r%   s   @r   rY   rY   k   s/     )( #$$&'P& r   rY   )module_namec                   T   ^  \ rS rSrSrS\\\4   4U 4S jjrS r	S r
S	S jrSrU =r$ )
SpeakerVerificationResNet   zG
Args:
    model_dir: A model dir.
    model_config: The model config.
model_configc                   > [         TU ]  " X/UQ70 UD6  X l        U R                  S   U l        U R                  S   U l        X@l        SU l        [        U R
                  S   5      U l        [        U R                  U R                  S9U l
        US   nU R                  U5        U R                  R                  U R                  5        U R                  R                  5         g )N	embed_dimchannelsr]   device)rg   r{   pretrained_model)r   r   r   r   r{   other_configfeature_dimr
   r   rY   embedding_model,_SpeakerVerificationResNet__load_check_pointtoeval)r   	model_dirr   argskwargspretrained_model_namer   s         r   r   "SpeakerVerificationResNet.__init__   s    B4B6B(**;7++J7"#D$5$5h$?@&>>doo G !''9 : 56,!!#r   c                    [        U[        R                  5      (       a  [        R                  " U5      n[        UR                  5      S:X  a  UR                  S5      n[        UR                  5      S:X  d   S5       eU R                  U5      nU R                  UR                  U R                  5      5      nUR                  5       R                  5       $ )Nr+   r   r)   zFmodelscope error: the shape of input audio to model needs to be [N, T])
isinstancenpndarrayrM   
from_numpylenshape	unsqueeze+_SpeakerVerificationResNet__extract_featurer   r   r   detachcpu)r   audiofeature	embeddings       r   rU   !SpeakerVerificationResNet.forward   s    eRZZ(($$U+Eu{{q OOA&EKK
 	YX	Y  ((/((DKK)@A	!%%''r   c                     [         R                  " XR                  S9nX"R                  SSS9-
  nUR	                  S5      nU$ )N)num_mel_binsr   T)dimkeepdim)Kaldifbankr   meanr   )r   r   r   s      r   __extract_feature+SpeakerVerificationResNet.__extract_feature   s@    ++e2B2BCLLQL==##A&r   c                     U(       d  [         R                  " S5      nU R                  R                  [         R                  " [
        R                  R                  U R                  U5      US9SS9  g )Nr   )map_locationT)strict)	rM   r   r   load_state_dictloadospathjoinr   )r   r   r   s      r   __load_check_point,SpeakerVerificationResNet.__load_check_point   sW    \\%(F,,JJT^^-BC#% 	 	- 	r   )r   r   r   r   r{   r   r   )N)r   r    r!   r"   __doc__r   strr   r   rU   r   r   r#   r$   r%   s   @r   r   r      s0    $S#X $&( r   r   )*r   r2   r   typingr   r   r   numpyr   rM   torch.nnr4   torch.nn.functional
functionalr   torchaudio.compliance.kaldi
compliancekaldir   )modelscope.models.audio.sv.pooling_layersmodelsr   svrq   modelscope.metainfor   modelscope.modelsr   r   modelscope.utils.constantr	   modelscope.utils.devicer
   Hardtanhr   Moduler'   rY   register_modulespeaker_verification
res2net_svr   r   r   r   <module>r      s   	  	 # #      + + B B & 0 + 1 2;;  @		 @FEbii EP 	F,=,=?5
 5?5r   