
    9i.                         S r SSKrSSKJr   " S S\R                  5      r " S S\R                  5      r " S S\R                  5      r " S	 S
\R                  5      rg)zLThis implementation is adapted from https://github.com/wenet-e2e/wespeaker.
    Nc                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )TAP   z?
Temporal average pooling, only first-order mean is considered
c                 *   > [         [        U ]  5         g N)superr   __init__selfkwargs	__class__s     i/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/audio/sv/pooling_layers.pyr	   TAP.__init__   s    c4!#    c                 B    UR                  SS9nUR                  SS9nU$ )Ndim   	start_dim)meanflatten)r   xpooling_means      r   forwardTAP.forward   s*    vv"v~#++a+8r    	__name__
__module____qualname____firstlineno____doc__r	   r   __static_attributes____classcell__r   s   @r   r   r      s    $ r   r   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )TSDP   zJ
Temporal standard deviation pooling, only second-order std is considered
c                 *   > [         [        U ]  5         g r   )r   r)   r	   r
   s     r   r	   TSDP.__init__       dD"$r   c                 |    [         R                  " [         R                  " USS9S-   5      nUR                  SS9nU$ Nr   r   g:0yE>r   r   )torchsqrtvarr   )r   r   pooling_stds      r   r   TSDP.forward   s9    jj1"!5!<=!))A)6r   r   r   r'   s   @r   r)   r)      s    % r   r)   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )TSTP&   z
Temporal statistics pooling, concatenate mean and std, which is used in
x-vector
Comment: simple concatenation can not make full use of both statistics
c                 *   > [         [        U ]  5         g r   )r   r6   r	   r
   s     r   r	   TSTP.__init__-   r-   r   c                     UR                  SS9n[        R                  " [        R                  " USS9S-   5      nUR	                  SS9nUR	                  SS9n[        R
                  " X#4S5      nU$ r/   )r   r0   r1   r2   r   cat)r   r   r   r3   statss        r   r   TSTP.forward0   sm    vv"v~jj1"!5!<=#++a+8!))A)6		<5q9r   r   r   r'   s   @r   r6   r6   &   s    % r   r6   c                   6   ^  \ rS rSrSrSU 4S jjrS rSrU =r$ )ASTP;   zkAttentive statistics pooling: Channel- and context-dependent
statistics pooling, first used in ECAPA_TDNN.
c                    > [         [        U ]  5         X0l        U(       a  [        R
                  " US-  USS9U l        O[        R
                  " XSS9U l        [        R
                  " X!SS9U l        g )N   r   )kernel_size)r   r?   r	   global_context_attnnConv1dlinear1linear2)r   in_dimbottleneck_dimrD   r   s       r   r	   ASTP.__init__@   sg    dD"$"4 99
NDL 99DL yyr   c                    [        UR                  5      S:X  aJ  UR                  UR                  S   UR                  S   UR                  S   -  UR                  S   5      n[        UR                  5      S:X  d   eU R                  (       ay  [        R
                  " USSS9R                  U5      n[        R                  " [        R                  " USSS9S	-   5      R                  U5      n[        R                  " XU4SS
9nOUn[        R                  " U R                  U5      5      n[        R                  " U R                  U5      SS
9n[        R                  " XQ-  SS
9n[        R                  " XQS-  -  SS
9US-  -
  n[        R                  " UR                  S	S95      n[        R                  " Xh/SS
9$ )z
x: a 3-dimensional tensor in tdnn-based architecture (B,F,T)
    or a 4-dimensional tensor in resnet architecture (B,C,F,T)
    0-dim: batch-dimension, last-dim: time-dimension (frame-dimension)
   r   r      rB   r   T)r   keepdimg|=r   )min)lenshapereshaperD   r0   r   	expand_asr1   r2   r;   tanhrG   softmaxrH   sumclamp)	r   r   context_meancontext_stdx_inalphar   r2   stds	            r   r   ASTP.forwardR   sb    qww<1		!''!*aggaj1771:&=qwwqzJA177|q   "" ::aR>HHKL**		!T2U:<<EIaL 99a{;CDD 

LL dll51q9yy*iiAA.q8jju-.yy$!,,r   )rD   rG   rH   )   Fr   r'   s   @r   r?   r?   ;   s    $- -r   r?   )	r$   r0   torch.nnrE   Moduler   r)   r6   r?   r   r   r   <module>rb      sP     ")) 299 299 *0-299 0-r   