
    9i                      
   S SK rS SKrS SKJr  S SKJs  Jr   " S S\R                  5      r	 " S S\	5      r
S\S\S\S	\4S
 jr " S S\R                  5      r " S S\5      r " S S\R                  R                  5      rg)    Nc                   `   ^  \ rS rSr         S
U 4S jjrS rS\S\S\4S jrS rS	r	U =r
$ )Conv1d_O	   c                 >  > [         TU ]  5         X l        XPl        X`l        Xpl        Xl        SU l        Xl        Uc  Uc  [        S5      eUc  U R                  U5      n[        R                  " UUU R                  U R                  U R                  SUU	S9U l        g )NFz.Must provide one of input_shape or in_channelsr   )stridedilationpaddinggroupsbias)super__init__kernel_sizer   r   r	   padding_mode	unsqueezeskip_transpose
ValueError_check_input_shapennConv1dconv)selfout_channelsr   input_shapein_channelsr   r   r	   r
   r   r   r   	__class__s               _/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/audio/sv/TDNN.pyr   Conv1d_O.__init__   s     	& (,;#6MNN11+>KII;;]]	
	    c                    U R                   (       d  UR                  SS5      nU R                  (       a  UR                  S5      nU R                  S:X  a2  U R	                  XR
                  U R                  U R                  5      nOnU R                  S:X  a5  U R
                  S-
  U R                  -  n[        R                  " XS45      nO)U R                  S:X  a  O[        SU R                  -   5      eU R                  U5      nU R                  (       a  UR                  S5      nU R                   (       d  UR                  SS5      nU$ )zReturns the output of the convolution.

Arguments
---------
x : torch.Tensor (batch, time, channel)
    input to convolve. 2d or 4d tensors are expected.
   samecausalr   validz1Padding must be 'same', 'valid' or 'causal'. Got )r   	transposer   r	   _manage_paddingr   r   r   Fpadr   r   squeeze)r   xnum_padwxs       r   forwardConv1d_O.forward3   s	    ""Ar"A>>AA<<6!$$Q(8(8$--%)[[2A \\X%''!+t}}<Ga1&A\\W$ C,,    YYq\>>AB""a$B	r   r   r   r   c                 z    UR                   S   n[        XTX#5      n[        R                  " XU R                  S9nU$ )Nr!   )mode)shapeget_padding_elemr'   r(   r   )r   r*   r   r   r   L_inr	   s          r   r&   Conv1d_O._manage_padding\   s<     wwr{ #4G EE!4#4#45r   c                 &   [        U5      S:X  a
  SU l        SnOLU R                  (       a  US   nO5[        U5      S:X  a  US   nO [        S[	        [        U5      5      -   5      eU R
                  S-  S:X  a  [        SU R
                  -  5      eU$ )zIChecks the input shape and returns the number of input channels.
           Tr       z"conv1d expects 2d, 3d inputs. Got r   z4The field kernel size must be an odd number. Got %s.)lenr   r   r   strr   )r   r1   r   s      r   r   Conv1d_O._check_input_shapen   s     u:?!DNK  (KZ1_(KA"3u:/ 0 0 a1$F!!#$ $ r   )r   r   r   r	   r   r   r   r   )	NNr    r    r"   r    TreflectF)__name__
__module____qualname____firstlineno__r   r-   intr&   r   __static_attributes____classcell__r   s   @r   r   r   	   sY     &
P'R  	
 $ r   r   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )r      c                 *   > [         TU ]  " USS0UD6  g Nr   Tr   r   r   argskwargsr   s      r   r   Conv1d.__init__       t>>v>r    r<   r=   r>   r?   r   rA   rB   rC   s   @r   r   r          ? ?r   r   r3   r   r   r   c                     US:  a8  [         R                  XU-  -
  U-  S-   5      nXS-
  -  X#-  -   nUS-  US-  /nU$ XUS-
  -  -
  S-
  U-  S-   nX-
  S-  X-
  S-  /nU$ )zThis function computes the number of elements to add for zero-padding.

Arguments
---------
L_in : int
stride: int
kernel_size : int
dilation : int
r    r6   )mathceil)r3   r   r   r   n_stepsL_outr	   s          r   r2   r2      s     z))d8%;;vEJKA+&)??!#[A%56 N K!O44q8VCaGLQ&!(;<Nr   c                   B   ^  \ rS rSr        SU 4S jjrS rSrU =r$ )BatchNorm1d_O   c	                    > [         T	U ]  5         Xpl        Xl        Uc  U(       a  US   nOUc  US   n[        R
                  " UUUUUS9U l        g )Nr    r!   )epsmomentumaffinetrack_running_stats)r   r   combine_batch_timer   r   BatchNorm1dnorm)
r   r   
input_sizerZ   r[   r\   r]   r^   r   r   s
            r   r   BatchNorm1d_O.__init__   s[     	"4,.$QJ$RJNN 3
	r   c                    UR                   nU R                  (       aR  UR                  S:X  a  UR                  US   US   -  US   5      nOFUR                  US   US   -  US   US   5      nO#U R                  (       d  UR                  SS5      nU R                  U5      nU R                  (       a  UR                  U5      nU$ U R                  (       d  UR                  SS5      nU$ )zReturns the normalized input tensor.

Arguments
---------
x : torch.Tensor (batch, time, [channels])
    input to normalize. 2d or 3d tensors are expected in input
    4d tensors can be used when combine_dims=True.
r7   r   r    r6   r!   )r1   r^   ndimreshaper   r%   r`   )r   r*   shape_orx_ns       r   r-   BatchNorm1d_O.forward   s     77""vv{IIhqkHQK7!EIIhqkHQK7!&qk+ $$B"Aiil""++h'C 
 $$--2&C
r   )r^   r`   r   )NNgh㈵>g?TTFF)r<   r=   r>   r?   r   r-   rA   rB   rC   s   @r   rW   rW      s,       
8 r   rW   c                   (   ^  \ rS rSrU 4S jrSrU =r$ )r_      c                 *   > [         TU ]  " USS0UD6  g rG   rH   rI   s      r   r   BatchNorm1d.__init__   rM   r   rN   rO   rC   s   @r   r_   r_      rP   r   r_   c                   ~   ^  \ rS rSrSrS\R                  R                  S/ SQ/ SQ/ SQSS	4U 4S
 jjrSS jr	Sr
U =r$ )Xvector   a  This model extracts X-vectors for speaker recognition and diarization.

Arguments
---------
device : str
    Device used e.g. "cpu" or "cuda".
activation : torch class
    A class for constructing the activation layers.
tdnn_blocks : int
    Number of time-delay neural (TDNN) layers.
tdnn_channels : list of ints
    Output channels for TDNN layer.
tdnn_kernel_sizes : list of ints
    List of kernel sizes for each TDNN layer.
tdnn_dilations : list of ints
    List of dilations for kernels in each TDNN layer.
lin_neurons : int
    Number of neurons in linear layers.

Example
-------
>>> compute_xvect = Xvector('cpu')
>>> input_feats = torch.rand([5, 10, 40])
>>> outputs = compute_xvect(input_feats)
>>> outputs.shape
torch.Size([5, 1, 512])
cpu   )   rr   rr   rr   i  )rq   r7   r7   r    r    )r    r6   r7   r    r    rr   P   c	           
         > [         TU ]  5         [        R                  " 5       U l        [        U5       HC  n	XI   n
U R                  R                  [        UU
XY   Xi   S9U" 5       [        U
S9/5        XI   nME     g )N)r   r   r   r   )ra   )	r   r   r   
ModuleListblocksrangeextendr   r_   )r   device
activationtdnn_blockstdnn_channelstdnn_kernel_sizestdnn_dilationslin_neuronsr   block_indexr   r   s              r   r   Xvector.__init__  s     	mmo !-K(5LKK +!- 1 >+8	 |4	  	 (4K .r   c                     UR                  SS5      nU R                   H
  n U" XS9nM     UR                  SS5      nU$ ! [         a    U" U5      n M7  f = f)z=Returns the x-vectors.

Arguments
---------
x : torch.Tensor
r    r6   )lengths)r%   rv   	TypeError)r   r*   lenslayers       r   r-   Xvector.forward  sb     KK1[[E!* !
 KK1  !Hs   AAA)rv   )N)r<   r=   r>   r?   __doc__torchr   	LeakyReLUr   r-   rA   rB   rC   s   @r   rn   rn      s;    < 88%%0)&5< r   rn   )numpynpr   torch.nnr   torch.nn.functional
functionalr'   Moduler   r   r@   r2   rW   r_   rn   rN   r   r   <module>r      s        yryy yz?X ?3  #  ,9BII 9x?- ?Kehhoo Kr   