
    9i                         S SK r S SKrS SKJr  S SKJs  Jr  S SKJrJ	r	  S SK
Jr   " S S\R                  5      r " S S\R                  5      r " S S	\R                  5      r\SS
 j5       rg)    N)DropPathtrunc_normal_)register_modelc                   6   ^  \ rS rSrSrSU 4S jjrS rSrU =r$ )Block   a  ConvNeXt Block. There are two equivalent implementations:
(1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
(2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
We use (2) as we find it slightly faster in PyTorch

Args:
    dim (int): Number of input channels.
    drop_path (float): Stochastic depth rate. Default: 0.0
    layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
c                   > [         TU ]  5         [        R                  " XSSUS9U l        [        USS9U l        [        R                  " USU-  5      U l        [        R                  " 5       U l
        [        R                  " SU-  U5      U l        US:  a,  [        R                  " U[        R                  " U5      -  SS	9OS U l        US
:  a  [!        U5      U l        g [        R"                  " 5       U l        g )N      )kernel_sizepaddinggroupsư>)eps   r   T)requires_grad        )super__init__nnConv2ddwconv	LayerNormnormLinearpwconv1GELUactpwconv2	Parametertorchonesgammar   Identity	drop_path)selfdimr%   layer_scale_init_value	__class__s       l/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/multi_modal/vldoc/convnext.pyr   Block.__init__   s    ii!Qs<ct,	yyG 779yyS#. $:A#= \\"UZZ%66 CG 	
 $b. ".0kkm 	    c                 b   UnU R                  U5      nUR                  SSSS5      nU R                  U5      nU R                  U5      nU R	                  U5      nU R                  U5      nU R                  b  U R                  U-  nUR                  SSSS5      nX R                  U5      -   nU$ )Nr      r      )r   permuter   r   r   r   r#   r%   )r&   xinputs      r*   forwardBlock.forward)   s    KKNIIaAq!IIaLLLOHHQKLLO::!

QAIIaAq!NN1%%r,   )r   r%   r   r#   r   r   r   )r   r   	__name__
__module____qualname____firstlineno____doc__r   r3   __static_attributes____classcell__r)   s   @r*   r   r      s    	<  r,   r   c                   N   ^  \ rS rSrSrS/ SQ/ SQSS4U 4S jjrS	 rS
 rSrU =r	$ )ConvNeXt9   a  ConvNeXt
    A PyTorch impl of : `A ConvNet for the 2020s`  -
      https://arxiv.org/pdf/2201.03545.pdf
Args:
    in_chans (int): Number of input image channels. Default: 3
    num_classes (int): Number of classes for classification head. Default: 1000
    depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
    dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
    drop_path_rate (float): Stochastic depth rate. Default: 0.
    layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
    head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
r   r   r   	   r   `      i  i   r   r   c                   > [         TU ]  5         [        R                  " 5       U l        [        R
                  " [        R                  " XS   SSS9[        US   SSS95      nU R                  R                  U5        [        S5       H\  n[        R
                  " [        X7   SSS9[        R                  " X7   X7S-      S	S	S95      nU R                  R                  U5        M^     [        R                  " 5       U l
        [        R                  " SU[        U5      5       V	s/ s H  oR                  5       PM     n
n	Sn[        S5       Hd  n[        R
                  " [        X'   5       Vs/ s H  n[        X7   XU-      US
9PM     sn6 nU R                  R                  U5        XU   -  nMf     X0l        U R#                  U R$                  5        g s  sn	f s  snf )Nr   r   )r   strider   channels_first)r   data_formatr   r/   r.   )r'   r%   r(   )r   r   r   
ModuleListdownsample_layers
Sequentialr   r   appendrangestagesr!   linspacesumitemr   dimsapply_init_weights)r&   in_chansdepthsrS   drop_path_rater(   stemidownsample_layerr1   dp_ratescurjstager)   s                 r*   r   ConvNeXt.__init__G   s    	!# "
}}IIhQQqAd1g45EFH 	%%d+qA!}}$'t9IJ		$'4A;AaH  ""))*:;  mm 
 $nnQFL
LFFHL 	 
 qAMM
 vy)$
 *A	 &Qw/+AC *$ E KKu%!9C  	

4%%&%

$s   'G1G
c                     [        U[        R                  [        R                  45      (       a?  [	        UR
                  SS9  [        R                  R                  UR                  S5        g g )Ng{Gz?)stdr   )	
isinstancer   r   r   r   weightinit	constant_bias)r&   ms     r*   rU   ConvNeXt._init_weightst   sG    a"))RYY/00!((,GGaffa( 1r,   c                     / n[        S5       H>  nU R                  U   " U5      nU R                  U   " U5      nUR                  U5        M@     [	        U5      $ )Nr   )rN   rK   rO   rM   tuple)r&   r1   xsrZ   s       r*   r3   ConvNeXt.forwardy   sS    qA&&q)!,AAq!AIIaL  Ryr,   )rS   rK   rO   )
r6   r7   r8   r9   r:   r   rU   r3   r;   r<   r=   s   @r*   r?   r?   9   s.      #+'Z)
	 	r,   r?   c                   :   ^  \ rS rSrSr  SU 4S jjrS rSrU =r$ )r      a5  LayerNorm that supports two data formats: channels_last (default) or channels_first.
The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
shape (batch_size, height, width, channels) while channels_first corresponds to inputs
with shape (batch_size, channels, height, width).
c                 2  > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        [        R                  " [        R                  " U5      5      U l        X l	        X0l
        U R                  S;  a  [        eU4U l        g )N)channels_lastrH   )r   r   r   r    r!   r"   rd   zerosrg   r   rI   NotImplementedErrornormalized_shape)r&   rt   r   rI   r)   s       r*   r   LayerNorm.__init__   so     	ll5::.>#?@LL-=!>?	&#FF%%!1 4r,   c                    U R                   S:X  aA  [        R                  " XR                  U R                  U R
                  U R                  5      $ U R                   S:X  a  UR                  SSS9nX-
  R                  S5      R                  SSS9nX-
  [        R                  " X0R                  -   5      -  nU R                  S S 2S S 4   U-  U R
                  S S 2S S 4   -   nU$ g )Nrq   rH   r/   T)keepdimr.   )rI   F
layer_normrt   rd   rg   r   meanpowr!   sqrt)r&   r1   uss       r*   r3   LayerNorm.forward   s    .<<#8#8$++ $		4885 5!11q$'AA##At#4A%**Q\22AAtTM*Q.1dD=1IIAH 2r,   )rg   rI   r   rt   rd   )r   rq   r5   r=   s   @r*   r   r      s     ,5	 	r,   r   c                 *    [        S/ SQ/ SQS.UD6nU$ )NrA   rC   )rW   rS    )r?   )
pretrainedin_22kkwargsmodels       r*   convnext_tinyr      s    ML/BMfMELr,   )FF)osr!   torch.nnr   torch.nn.functional
functionalrx   timm.models.layersr   r   timm.models.registryr   Moduler   r?   r   r   r   r,   r*   <module>r      se    
     6 /)BII )XIryy IX		 @  r,   