
    9i:                        S r SSKrSSKrSSKJrJrJr  SSKrSSK	r	SSK
Jr  SSKJs  Jr  SSKJs  Jr  SSKJr  SSKJrJr  SSKJr  SSKJr  S!S jrS	\S
\S\S\4S jr " S S\R>                  5      r  " S S\R>                  5      r! " S S\R>                  5      r" " S S\	R                  R>                  5      r# " S S\R>                  5      r$ " S S\R>                  5      r% " S S\R>                  5      r& " S S\R>                  5      r'\RP                  " \RR                  \RT                  S9 " S S \5      5       r+g)"z[This ECAPA-TDNN implementation is adapted from https://github.com/speechbrain/speechbrain.
    N)AnyDictUnion)Models)MODELS
TorchModel)Tasks)create_devicec                    [        U R                  5      S:X  d   eUc,  U R                  5       R                  5       R	                  5       n[
        R                  " XR                  U R                  S9R                  [        U 5      U5      U R                  S5      :  nUc  U R                  nUc  U R                  n[
        R                  " XBUS9nU$ )N   )devicedtype)r   r   )lenshapemaxlongitemtorcharanger   r   expand	unsqueeze	as_tensor)lengthmax_lenr   r   masks        e/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/audio/sv/ecapa_tdnn.pylength_to_maskr      s    v||!!!**,##%**,<<V\\;;A6K<"$*$4$4Q$78D }~??4V<DK    L_instridekernel_sizedilationc                     US:  a9  [         R                  " XU-  -
  U-  S-   5      nXS-
  -  X#-  -   nUS-  US-  /nU$ XUS-
  -  -
  S-
  U-  S-   nX-
  S-  X-
  S-  /nU$ )Nr      )mathceil)r   r    r!   r"   n_stepsL_outpaddings          r   get_padding_elemr*   '   s    z))d8%;;vEJKA+&)??!#[A%56 N K!O44q8VCaGLQ&!(;<Nr   c                   T   ^  \ rS rSr      S	U 4S jjrS rS\S\S\4S jrSrU =r	$ )
Conv1d4   c
                    > [         T
U ]  5         X l        X@l        XPl        X`l        Xl        [        R                  " UUU R                  U R                  U R                  SUUS9U l	        g )Nr   )r    r"   r)   groupsbias)
super__init__r!   r    r"   r)   padding_modennr,   conv)selfout_channelsr!   in_channelsr    r"   r)   r/   r0   r3   	__class__s             r   r2   Conv1d.__init__6   sa     	& (II;;]]	
	r   c                    U R                   S:X  a2  U R                  XR                  U R                  U R                  5      nOnU R                   S:X  a5  U R                  S-
  U R                  -  n[
        R                  " XS45      nO)U R                   S:X  a  O[        SU R                   -   5      eU R                  U5      nU$ )Nsamecausalr   r   validz1Padding must be 'same', 'valid' or 'causal'. Got )	r)   _manage_paddingr!   r"   r    Fpad
ValueErrorr5   )r6   xnum_padwxs       r   forwardConv1d.forwardT   s    <<6!$$Q(8(8$--%)[[2A \\X%''!+t}}<Ga1&A\\W$ C,,    YYq\	r   r!   r"   r    c                 z    UR                   S   n[        XTX#5      n[        R                  " XU R                  S9nU$ )N)mode)r   r*   r@   rA   r3   )r6   rC   r!   r"   r    r   r)   s          r   r?   Conv1d._manage_paddingi   s8     wwr{"4GEE!4#4#45r   )r5   r"   r!   r)   r3   r    )r   r   r<   r   Treflect)
__name__
__module____qualname____firstlineno__r2   rF   intr?   __static_attributes____classcell__r9   s   @r   r,   r,   4   sI     
<*  	
  r   r,   c                   6   ^  \ rS rSr  SU 4S jjrS rSrU =r$ )BatchNorm1dw   c                 X   > [         TU ]  5         [        R                  " UUUS9U l        g )N)epsmomentum)r1   r2   r4   rV   norm)r6   
input_sizerY   rZ   r9   s       r   r2   BatchNorm1d.__init__y   s)     	NN
	r   c                 $    U R                  U5      $ Nr[   r6   rC   s     r   rF   BatchNorm1d.forward   s    yy|r   r`   )gh㈵>g?rM   rN   rO   rP   r2   rF   rR   rS   rT   s   @r   rV   rV   w   s    
 	
 r   rV   c                   J   ^  \ rS rSr\R
                  S4U 4S jjrS rSrU =r	$ )	TDNNBlock   r   c                    > [         [        U ]  5         [        UUUUUS9U l        U" 5       U l        [        US9U l        g )N)r8   r7   r!   r"   r/   r\   )r1   re   r2   r,   r5   
activationrV   r[   )r6   r8   r7   r!   r"   ri   r/   r9   s          r   r2   TDNNBlock.__init__   sD     	i')#%#
	 %,<8	r   c                 `    U R                  U R                  U R                  U5      5      5      $ r_   )r[   ri   r5   ra   s     r   rF   TDNNBlock.forward   s"    yy1677r   )ri   r5   r[   )
rM   rN   rO   rP   r4   ReLUr2   rF   rR   rS   rT   s   @r   re   re      s     779(8 8r   re   c                   8   ^  \ rS rSr   SU 4S jjrS rSrU =r$ )Res2NetBlock   c                    > [         [        U ]  5         X-  S:X  d   eX#-  S:X  d   eX-  nX#-  n[        R                  " [        US-
  5       Vs/ s H  n[        UUUUS9PM     sn5      U l        X0l        g s  snf )Nr   r   )r!   r"   )	r1   ro   r2   r4   
ModuleListrangere   blocksscale)
r6   r8   r7   ru   r!   r"   
in_channelhidden_channelir9   s
            r   r2   Res2NetBlock.__init__   s     	lD*,"a'''#q((( )
%.mm UQY'%
 (! '!	
 (%
  
%
s   A9c                 >   / n[        [        R                  " XR                  SS95       HY  u  p4US:X  a  UnO:US:X  a  U R                  US-
     " U5      nOU R                  US-
     " UW-   5      nUR                  U5        M[     [        R                  " USS9nU$ )Nr   dimr   )	enumerater   chunkru   rt   appendcat)r6   rC   yrx   x_iy_is         r   rF   Res2NetBlock.forward   s    Azzq ABFAAvakk!a%(-kk!a%(s3HHSM C IIaQr   )rt   ru   )      r   rc   rT   s   @r   ro   ro      s    
 . r   ro   c                   2   ^  \ rS rSrU 4S jrSS jrSrU =r$ )SEBlock   c                    > [         [        U ]  5         [        XSS9U l        [
        R                  R                  SS9U l        [        X#SS9U l	        [
        R                  R                  5       U l        g )Nr   r8   r7   r!   T)inplace)r1   r   r2   r,   conv1r   r4   rm   reluconv2Sigmoidsigmoid)r6   r8   se_channelsr7   r9   s       r   r2   SEBlock.__init__   s^    gt%'#1N
HHMM$M/	#AO
xx'')r   c                 h   UR                   S   nUbM  [        X#-  X1R                  S9nUR                  S5      nUR	                  SSS9nX-  R	                  SSS9U-  nOUR                  SSS9nU R                  U R                  U5      5      nU R                  U R                  U5      5      nXa-  $ )NrI   r   r   r   r$   Tr|   keepdim)
r   r   r   r   summeanr   r   r   r   )r6   rC   lengthsLr   totalss          r   rF   SEBlock.forward   s    GGBK!'+qJD>>!$DHHDH1E1d3e;A1d+AIIdjjm$LLA'ur   )r   r   r   r   r_   rc   rT   s   @r   r   r      s    * r   r   c                   6   ^  \ rS rSrSU 4S jjrSS jrSrU =r$ )AttentiveStatisticsPooling   c                    > [         TU ]  5         SU l        X0l        U(       a  [	        US-  USS5      U l        O[	        XSS5      U l        [        R                  " 5       U l        [        UUSS9U l
        g )Ng-q=r   r   r   )r1   r2   rY   global_contextre   tdnnr4   Tanhtanhr,   r5   )r6   channelsattention_channelsr   r9   s       r   r2   #AttentiveStatisticsPooling.__init__   sg    ,!(Q,0BAqIDI!(1EDIGGI	*!	r   c                 L   UR                   S   nSU R                  4S jnUc,  [        R                  " UR                   S   UR                  S9n[        X#-  X1R                  S9nUR                  S5      nU R                  (       a  UR                  SSS	9R                  5       nU" XU-  5      u  pxUR                  S5      R                  SSU5      nUR                  S5      R                  SSU5      n[        R                  " XU/SS
9n	OUn	U R                  U R                  U R                  U	5      5      5      n	U	R                  US:H  [        S5      5      n	[         R"                  " U	SS
9n	U" X5      u  px[        R                  " Xx4SS
9n
U
R                  S5      n
U
$ )NrI   r$   c                     X-  R                  U5      n[        R                  " XUR                  U5      -
  R	                  S5      -  R                  U5      R                  U5      5      nXE4$ )Nr$   )r   r   sqrtr   powclamp)rC   mr|   rY   r   stds         r   _compute_statistics?AttentiveStatisticsPooling.forward.<locals>._compute_statistics   s^    E;;s#D**$..--22155::3?EEcJLC9r   r   )r   r   r   Tr   r{   z-inf)r   rY   r   onesr   r   r   r   r   floatrepeatr   r5   r   r   masked_fillr@   softmax)r6   rC   r   r   r   r   r   r   r   attnpooled_statss              r   rF   "AttentiveStatisticsPooling.forward   sn   GGBK*+ 	 ?jjAHH=G gk1XXF~~a   HHDH1779E+Ae|<ID>>!$++Aq!4D--"))!Q2C99as^3DD yy499T?34 	5=9yy1%'0	yy$!4#--a0r   )r5   rY   r   r   r   )   Tr_   rc   rT   s   @r   r   r      s    ) )r   r   c                   j   ^  \ rS rSrSSSS\R
                  R                  S4U 4S jjrSS jrSr	U =r
$ )	SERes2NetBlocki$  r   r   r   c	           	         > [         T	U ]  5         X l        [        UUSSUUS9U l        [        X"X5U5      U l        [        UUSSUUS9U l        [        X$U5      U l	        S U l
        X:w  a  [        UUSS9U l
        g g )Nr   )r!   r"   ri   r/   r   )r1   r2   r7   re   tdnn1ro   res2net_blocktdnn2r   se_blockshortcutr,   )
r6   r8   r7   res2net_scaler   r!   r"   ri   r/   r9   s
            r   r2   SERes2NetBlock.__init__&  s     	(!

 *,*7hP!

  <H&"')DM 'r   c                     UnU R                   (       a  U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R	                  X5      nX-   $ r_   )r   r   r   r   r   )r6   rC   r   residuals       r   rF   SERes2NetBlock.forwardO  s\    ==}}Q'HJJqMq!JJqMMM!%|r   )r7   r   r   r   r   r   r_   )rM   rN   rO   rP   r   r4   rm   r2   rF   rR   rS   rT   s   @r   r   r   $  s/     88=='R
 
r   r   c                      ^  \ rS rSrSrSS\R                  R                  / SQ/ SQ/ SQSS	SS
/ SQ4U 4S jjrSS jr	Sr
U =r$ )
ECAPA_TDNNi\  zAn implementation of the speaker embedding model in a paper.
"ECAPA-TDNN: Emphasized Channel Attention, Propagation and Aggregation in
TDNN Based Speaker Verification" (https://arxiv.org/abs/2005.07143).
cpu   )   r   r   r   i   )   r   r   r   r   )r   r$   r      r   r   r   T)r   r   r   r   r   c                   > [         TU ]  5         [        U5      [        U5      :X  d   e[        U5      [        U5      :X  d   eXPl        [        R
                  " 5       U l        U R                  R                  [        UUS   US   US   UUS   5      5        [        S[        U5      S-
  5       H9  nU R                  R                  [        X]S-
     X]   U	U
Xm   X}   UX   S95        M;     [        US   US   US   US   UUS   S9U l        [        US   UUS9U l        [        US   S-  S9U l        [!        US   S-  USS	9U l        g )
Nr   r   )r   r   r!   r"   ri   r/   rI   )r/   )r   r   r$   rh   r   )r1   r2   r   r   r4   rr   rt   r   re   rs   r   mfar   asprV   asp_bnr,   fc)r6   r\   r   lin_neuronsri   r   kernel_sizes	dilationsr   r   r   r   r/   rx   r9   s                 r   r2   ECAPA_TDNN.__init__b  sp     	8}L 11118}I... mmo 	Q!q		 q#h-!+,AKKUOK"/ + ,&\)!9	
 - RLRLbM":
 .RL1)

 "Xb\A-=>  q($
r   c                    UR                  SS5      n/ nU R                   H  n U" XS9nUR                  U5        M     [        R
                  " USS SS9nU R                  U5      nU R                  XS9nU R                  U5      nU R                  U5      nUR                  SS5      R                  S5      nU$ ! [         a    U" U5      n Nf = f)zpReturns the embedding vector.

Arguments
---------
x : torch.Tensor
    Tensor of shape (batch, time, channel).
r   r$   )r   Nr{   )	transposert   	TypeErrorr   r   r   r   r   r   r   squeeze)r6   rC   r   xllayers        r   rF   ECAPA_TDNN.forward  s     KK1[[E!- IIaL ! IIbf!$HHQK HHQH(KKN GGAJKK1%%a(!  !Hs   B==CC)r   r   rt   r   r   r   r_   )rM   rN   rO   rP   __doc__r   r4   rm   r2   rF   rR   rS   rT   s   @r   r   r   \  sD     88==+$!F
P r   r   )module_namec                   L   ^  \ rS rSrS\\\4   4U 4S jjrS rS r	S r
SrU =r$ )SpeakerVerificationECAPATDNNi  model_configc                   > [         TU ]  " X/UQ70 UD6  X l        X@l        U R                  S   S:w  a  [	        S5      eSU l        / SQn[        U R                  S   5      U l        [        U R                  5        [        U R
                  US9U l
        US   nU R                  U5        U R                  R                  U R                  5        U R                  R                  5         g )	Nchannel   zFmodelscope error: Currently only 1024-channel ecapa tdnn is supported.P   )r   r   r   r   i   r   )r   pretrained_model)r1   r2   r   other_configrB   feature_dimr
   r   printr   embedding_model/_SpeakerVerificationECAPATDNN__load_check_pointtoeval)r6   	model_dirr   argskwargschannels_configpretrained_model_namer9   s          r   r2   %SpeakerVerificationECAPATDNN.__init__  s    B4B6B("Y'4/X  8#D$5$5h$?@dkk) 8 &'9 : 56,!!#r   c                    [        U[        R                  5      (       a  [        R                  " U5      n[        UR                  5      S:X  a  UR                  S5      n[        UR                  5      S:X  d   S5       eU R                  U5      nU R                  UR                  U R                  5      5      nUR                  5       R                  5       $ )Nr   r   r$   zFmodelscope error: the shape of input audio to model needs to be [N, T])
isinstancenpndarrayr   
from_numpyr   r   r   ._SpeakerVerificationECAPATDNN__extract_featurer   r   r   detachr   )r6   audiofeature	embeddings       r   rF   $SpeakerVerificationECAPATDNN.forward  s    eRZZ(($$U+Eu{{q OOA&EKK
 	YX	Y  ((/((DKK)@A	!%%''r   c                    / nU Hc  n[         R                  " UR                  S5      U R                  S9nXDR	                  SSS9-
  nUR                  UR                  S5      5        Me     [        R                  " U5      nU$ )Nr   )num_mel_binsTr   )Kaldifbankr   r   r   r   r   r   )r6   r   featuresaur   s        r   __extract_feature.SpeakerVerificationECAPATDNN.__extract_feature  st    BkkQd.>.>@GD AAGOOG--a01	 
 99X&r   c                     U R                   R                  [        R                  " [        R
                  R                  U R                  U5      [        R                  " S5      S9SS9  g )Nr   )map_locationT)strict)	r   load_state_dictr   loadospathjoinr   r   )r6   r   s     r   __load_check_point/SpeakerVerificationECAPATDNN.__load_check_point  sO    ,,JJT^^-BC"\\%02 	 	- 	r   )r   r   r   r   r   )rM   rN   rO   rP   r   strr   r2   rF   r   r   rR   rS   rT   s   @r   r   r     s+    $S#X $.( r   r   )NNN),r   r%   r  typingr   r   r   numpyr   r   torch.nnr4   torch.nn.functional
functionalr@   torchaudio.compliance.kaldi
compliancekaldir  modelscope.metainfor   modelscope.modelsr   r   modelscope.utils.constantr	   modelscope.utils.devicer
   r   rQ   r*   Moduler,   rV   re   ro   r   r   r   r   register_modulespeaker_verificationecapa_tdnn_svr    r   r   <module>r&     s#    	 # #      + + & 0 + 1&
3 
 
# 
 
@RYY @F")) &8		 84$588?? $Nbii 8: :z5RYY 5pl l^ 	F,@,@B6: 6B6r   