
    9i                     ~   S SK r S SKJr  S SKJrJrJr  S SKrS SK	r	S SK
Jr  S SKJs  Jr  S SKJs  Jr  S SKJr  S SKJrJr  S SKJrJrJrJrJrJrJr  S SK J!r!  S SK"J#r#   " S	 S
\RH                  5      r% " S S\RH                  5      r&\RN                  " \!RP                  \RR                  S9 " S S\5      5       r*g)    N)OrderedDict)AnyDictUnion)Models)MODELS
TorchModel)BasicResBlockCAMDenseTDNNBlock
DenseLayer	StatsPool	TDNNLayerTransitLayerget_nonlinear)Tasks)create_devicec                   D   ^  \ rS rSr\SS/SS4U 4S jjrS rS rSrU =r	$ )	FCM          P   c           	        > [         [        U ]  5         X0l        [        R
                  " SUSSSSS9U l        [        R                  " U5      U l        U R                  XUS   SS9U l
        U R                  XUS   SS9U l        [        R
                  " UUSSSSS9U l        [        R                  " U5      U l        X4S	-  -  U l        g )
N      F)kernel_sizestridepaddingbiasr   r   )r   )r   r      )superr   __init__	in_planesnnConv2dconv1BatchNorm2dbn1_make_layerlayer1layer2conv2bn2out_channels)selfblock
num_blocks
m_channelsfeat_dim	__class__s        `/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/audio/sv/DTDNN.pyr"   FCM.__init__   s    
 	c4!##YYzqAEK
>>*-&&z!}Q ' 8&&z!}Q ' 8 YY
 >>*-&a-8    c                     U/S/US-
  -  -   n/ nU H8  nUR                  U" U R                  X$5      5        X!R                  -  U l        M:     [        R                  " U6 $ )Nr   )appendr#   	expansionr$   
Sequential)r/   r0   planesr1   r   strideslayerss          r5   r)   FCM._make_layer4   s^    (aSJN33FMM%?@#oo5DN  }}f%%r7   c                    UR                  S5      n[        R                  " U R                  U R	                  U5      5      5      nU R                  U5      nU R                  U5      n[        R                  " U R                  U R                  U5      5      5      nUR                  nUR                  US   US   US   -  US   5      nU$ )Nr   r   r   r   )	unsqueezeFrelur(   r&   r*   r+   r-   r,   shapereshape)r/   xoutrD   s       r5   forwardFCM.forward<   s    KKNffTXXdjjm,-kk#kk#ffTXXdjjo./		kk%(E!HuQx$7qB
r7   )r(   r-   r&   r,   r#   r*   r+   r.   )
__name__
__module____qualname____firstlineno__r
   r"   r)   rH   __static_attributes____classcell__r4   s   @r5   r   r      s)     %F	94&	 	r7   r   c                   B   ^  \ rS rSr        SU 4S jjrS rSrU =r$ )CAMPPlusH   c	                   > [         [        U ]  5         [        US9U l        U R                  R
                  n	Xl        [        R                  " [        S[        U	USSSSUS94/5      5      U l        Un	[        [        SS	S
5      5       Hw  u  n
u  pn[        UU	UXC-  UUUUS9nU R                  R                  SU
S-   -  U5        XU-  -   n	U R                  R                  SU
S-   -  [!        XS-  SUS95        U	S-  n	My     U R                  R                  S[#        Xi5      5        U R                  S:X  aM  U R                  R                  S[%        5       5        U R                  R                  S['        U	S-  USS95        OU R                  S:X  d   S5       eU R)                  5        H  n[+        U[        R,                  [        R.                  45      (       d  M4  [        R0                  R3                  UR4                  R6                  5        UR8                  c  Mv  [        R0                  R;                  UR8                  5        M     g )N)r3   tdnn   r   r   )r   dilationr   
config_str)   r      )r   r   r   )r   r   r   )
num_layersin_channelsr.   bn_channelsr   rX   rY   memory_efficientzblock%dz	transit%dF)r   rY   out_nonlinearsegmentstatsdense
batchnorm_)rY   framez6`output_level` should be set to 'segment' or 'frame'. )r!   rR   r"   r   headr.   output_levelr$   r;   r   r   xvector	enumeratezipr   
add_moduler   r   r   r   modules
isinstanceConv1dLinearinitkaiming_normal_weightdatar   zeros_)r/   r3   embedding_sizegrowth_ratebn_sizeinit_channelsrY   r_   rg   channelsir\   r   rX   r0   mr4   s                   r5   r"   CAMPPlus.__init__J   s    	h&(*	99))(}}" *,-
 
 !6?L)Y7792A2
%%$(#1'!%!13E LL##IQ$7?{"::HLL##q1u%!m%)+,
 NH%79( 	 -j C	E 	)LL##GY[9LL##qL.\KL
 $$/m1mm/A!bii344''666%GGNN166*	  r7   c                     UR                  SSS5      nU R                  U5      nU R                  U5      nU R                  S:X  a  UR	                  SS5      nU$ )Nr   r   r   re   )permuterf   rh   rg   	transpose)r/   rF   s     r5   rH   CAMPPlus.forward   sQ    IIaAIIaLLLO'Aq!Ar7   )rf   rg   rh   )r   i   r         zbatchnorm-reluTra   )rJ   rK   rL   rM   r"   rH   rN   rO   rP   s   @r5   rR   rR   H   s.      #","&'@+D r7   rR   )module_namec                   P   ^  \ rS rSrSrS\\\4   4U 4S jjrS r	S r
S rSrU =r$ )	SpeakerVerificationCAMPPlus   zA fast and efficient speaker embedding model, using a 2-dimensional convolution residual network as the head
and a densely connected time delay neural network as the backbone.
Args:
    model_dir: A model dir.
    model_config: The model config.
model_configc                   > [         TU ]  " X/UQ70 UD6  X l        X@l        U R                  S   U l        U R                  S   U l        [        U R                  S   5      U l        [        U R                  U R
                  5      U l	        US   nU R                  U5        U R                  R                  U R                  5        U R                  R                  5         g )N	fbank_dimemb_sizedevicepretrained_model)r!   r"   r   other_configfeature_dimr   r   r   rR   embedding_model._SpeakerVerificationCAMPPlus__load_check_pointtoeval)r/   	model_dirr   argskwargspretrained_model_namer4   s         r5   r"   $SpeakerVerificationCAMPPlus.__init__   s    B4B6B(",,[9))*5#D$5$5h$?@'(8(8$--H &'9 : 56,!!#r7   c                    [        U[        R                  5      (       a  [        R                  " U5      n[        UR                  5      S:X  a  UR                  S5      n[        UR                  5      S:X  d   S5       eU R                  U5      nU R                  UR                  U R                  5      5      nUR                  5       R                  5       $ )Nr   r   r   zFmodelscope error: the shape of input audio to model needs to be [N, T])rm   npndarraytorch
from_numpylenrD   rA   -_SpeakerVerificationCAMPPlus__extract_featurer   r   r   detachcpu)r/   audiofeature	embeddings       r5   rH   #SpeakerVerificationCAMPPlus.forward   s    eRZZ(($$U+Eu{{q OOA&EKK
 	YX	Y  ((/((DKK)@A	!%%''r7   c                    / nU Hc  n[         R                  " UR                  S5      U R                  S9nXDR	                  SSS9-
  nUR                  UR                  S5      5        Me     [        R                  " U5      nU$ )Nr   )num_mel_binsT)dimkeepdim)KaldifbankrA   r   meanr9   r   cat)r/   r   featuresaur   s        r5   __extract_feature-SpeakerVerificationCAMPPlus.__extract_feature   st    BkkQd.>.>@GD AAGOOG--a01	 
 99X&r7   c                     U R                   R                  [        R                  " [        R
                  R                  U R                  U5      [        R                  " S5      S9SS9  g )Nr   )map_locationT)strict)	r   load_state_dictr   loadospathjoinr   r   )r/   r   s     r5   __load_check_point.SpeakerVerificationCAMPPlus.__load_check_point   sO    ,,JJT^^-BC"\\%02 	 	- 	r7   )r   r   r   r   r   r   )rJ   rK   rL   rM   __doc__r   strr   r"   rH   r   r   rN   rO   rP   s   @r5   r   r      s0    $S#X $"( r7   r   )+r   collectionsr   typingr   r   r   numpyr   r   torch.nnr$   torch.nn.functional
functionalrB   torchaudio.compliance.kaldi
compliancekaldir   modelscope.metainfor   modelscope.modelsr   r	   'modelscope.models.audio.sv.DTDNN_layersr
   r   r   r   r   r   r   modelscope.utils.constantr   modelscope.utils.devicer   Moduler   rR   register_modulespeaker_verificationcampplus_svr    r7   r5   <module>r      s    
 # # #      + + & 0D D D
 , 1-")) -`Jryy JZ 	F,>,>@5* 5@5r7   