
    9i                        S SK Jr  S SKJr  S SKrS SKrS SKJr  S SK	Js  J
r  S SKJs  Jr  S SKJr   S SKJr  SSS/S	S/S
S//SS/S	S/S
S/// SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ S Q/ S!Q/ S"Q// S#Q/ S$QS%.r  S>S& jrS' r " S( S)\R.                  5      r " S* S+\R.                  5      rS?S,\S-\4S. jjr " S/ S0\R.                  5      rS@S1 jr " S2 S3\R.                  5      rSAS4 jr S5 r!S6 r"S7 r# " S8 S9\R.                  5      r$ " S: S;\R.                  5      r% " S< S=\R.                  5      r&g! \ a    Sr GN.f = f)B    )OrderedDict)partialN)trunc_normal_)checkpoint_wrapper      g       @      )r      r   r   )r   r   r   r   )r   r   r   r   )   r   r   r   )   r   r   r   )r	   r   r   r   )   r   r   r   )   r   r   r   )   r   r   r   )	   r   r   r   )
   r   r   r   )   r   r   r   )   r   r   r   )   r   r   r   )   r   r   r   )   r   r   r   )   r   r   r   )   r   r   r   )   r   r   r   )   r   r   r   )   r   r   r   )r
   r   r   r   )   r   r   r   )   r   r   r   )r   r   r   )r   r   r   )depthdim_mulhead_mulpool_q_stridepool_kvq_kernelpool_kv_stride_adaptivec           	         SS/nU(       a  US/-  nU R                  5       nU R                  5        GH  u  pg[        U Vs/ s H  oU;   PM	     sn5      (       d  M+  X   n	UR                  S   U	R                  S   :w  a  [        R
                  " UR                  SUR                  S   S5      R                  SSS5      U	R                  S   SS	9n
U
R                  SU	R                  S   5      R                  SS5      n	U(       a/  [        S
R                  XgR                  XiR                  5      5        OUn	U	R                  5       XV'   GM     U$ s  snf )N	rel_pos_h	rel_pos_w	rel_pos_tr   r   r   linearsizemodezInflate {}: {} -> {}: {})copyitemsanyshapeFinterpolatereshapepermuteprintformatclone)state_dict_originstate_dict_modeltemporalverboserel_pos_embed_typesstate_dict_inflatedkv2dxv3drel_pos_resizeds              h/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/multi_modal/mplug/mvit.pyinterpolate_rel_pos_embedrE   #   s8    '4},+002#))+ 34 31Q 3455"%Cyy|syy|+"#--KK399Q<4<<Q1E1!#
 &--b#))A,?GG1M4;;99a4 5 %(YY[" ,   5s   E
c           	      n   U S   n[         R                  " US-   5      [         R                  " US-   5      p2[        [        U S   5      5       H  nU S   U   S   X S   U   S   '   M     [        [        U S   5      5       H  nU S   U   S   X0S   U   S   '   M     [        U5       Vs/ s H  n/ PM     nn[        U5       Vs/ s H  n/ PM     nn[        U5       Vs/ s H  n/ PM     nn[        U5       Vs/ s H  n/ PM     nn[        [        U S   5      5       H)  nU S   U   SS  XpS   U   S   '   U S   XPS   U   S   '   M+     U S   b  U S   n	/ U S	'   [        U S   5       Hd  n[        Xt   5      S:  a8  [        [        U	5      5       V
s/ s H  n
[	        X   Xt   U
   -  S5      PM     n	n
U S	   R                  U/U	-   5        Mf     [        [        U S	   5      5       H)  nU S	   U   SS  XS	   U   S   '   U S   X`S	   U   S   '   M+     X#XVXx4$ s  snf s  snf s  snf s  snf s  sn
f )
Nr   r   r    r   r!   r"   r#   r$   pool_kv_stride)torchonesrangelenmaxappend)cfgr   r    r!   ipool_qpool_kvstride_q	stride_kv
_stride_kvds              rD   _prepare_mvit_configsrV   ?   s   LE

519-uzz%!)/DX3s9~&'(+Iq(9!(<Iq!!$% (3s:'(*-j/!*<Q*?Z#A&' )  ,',Qb,F' <(<ar<G(!%L)LqLH)"5\*\\I*3s?+,-/2?/CA/Fqr/J_%a(+,-01B-C?#A&q)* . $%123
 "s7|$A8;!# #3z?33 
Q7;3    !((!z)9: % 3s+,-.145E1Fq1I!"1M	&'*1-./23D/E$%a(+, / fxBB1 (()*s   ,HH#"H(=H- H2c                   N   ^  \ rS rSrSS\R
                  S4U 4S jjrS rSrU =r	$ )Mlpb   N        c                 6  > [         TU ]  5         XPl        U=(       d    UnU=(       d    Un[        R                  " X5      U l        U" 5       U l        [        R                  " X#5      U l        U R                  S:  a  [        R                  " U5      U l	        g g NrZ   )
super__init__	drop_ratennLinearfc1actfc2Dropoutdrop)selfin_featureshidden_featuresout_features	act_layerr_   	__class__s         rD   r^   Mlp.__init__d   ss     	"#2{)8[99[:;99_;>>C

9-DI      c                     U R                  U5      nU R                  U5      nU R                  S:  a  U R                  U5      nU R	                  U5      nU R                  S:  a  U R                  U5      nU$ r\   )rb   rc   r_   rf   rd   rg   rA   s     rD   forwardMlp.forwardv   s`    HHQKHHQK>>C		!AHHQK>>C		!Arn   )rc   rf   r_   rb   rd   )
__name__
__module____qualname____firstlineno__r`   GELUr^   rq   __static_attributes____classcell__rl   s   @rD   rX   rX   b   s$    
 ''.$ rn   rX   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )Permute   c                 .   > [         TU ]  5         Xl        g N)r]   r^   dims)rg   r   rl   s     rD   r^   Permute.__init__   s    	rn   c                 4    UR                   " U R                  6 $ r   )r5   r   rp   s     rD   rq   Permute.forward   s    yy$))$$rn   )r   )rs   rt   ru   rv   r^   rq   rx   ry   rz   s   @rD   r|   r|      s    % %rn   r|   	drop_probtrainingc                    US:X  d  U(       d  U $ SU-
  nU R                   S   4SU R                  S-
  -  -   nU[        R                  " X@R                  U R
                  S9-   nUR                  5         U R                  U5      U-  nU$ )z
Stochastic Depth per sample.
rZ   r   r   )r   )dtypedevice)r1   ndimrH   randr   r   floor_div)rA   r   r   	keep_probr1   maskoutputs          rD   	drop_pathr      s     CxIIWWQZNU	
 Euzz%wwqxxHHDKKMUU9$FMrn   c                   6   ^  \ rS rSrSrSU 4S jjrS rSrU =r$ )DropPath   zYDrop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).c                 6   > [         [        U ]  5         Xl        g r   )r]   r   r^   r   )rg   r   rl   s     rD   r^   DropPath.__init__   s    h&("rn   c                 B    [        XR                  U R                  5      $ r   )r   r   r   rp   s     rD   rq   DropPath.forward   s    NNDMM::rn   )r   r   	rs   rt   ru   rv   __doc__r^   rq   rx   ry   rz   s   @rD   r   r      s    c#; ;rn   r   c                 2   U(       d  U $ X-  n U=(       d    UnU(       aA  [        SU 35        [        SU  SU 35        [        S[        XS-  -   5      U-  U-   35        [        U[        XS-  -   5      U-  U-  5      nUSU -  :  a  XS-  n[        U5      $ )Nz
min width zwidth z	 divisor zother r   g?)r6   intrL   )width
multiplier	min_widthdivisorr<   	width_outs         rD   round_widthr      s    	E$WI
9+&'ugYwi01s5Q;./7:WDEFGIs5Q;#677BWLMI3;	y>rn   c                   B   ^  \ rS rSrSr      SU 4S jjrS rSrU =r$ )
PatchEmbed   z
PatchEmbed.
c                    > [         TU ]  5         U(       a  [        R                  nO[        R                  nU" UUUUUS9U l        g )N)kernel_sizestridepadding)r]   r^   r`   Conv2dConv3dproj)	rg   dim_indim_outkernelr   r   conv2dconv_functionrl   s	           rD   r^   PatchEmbed.__init__   s@     	IIMIIM!
	rn   c                 ~    U R                  U5      nUR                  S5      R                  SS5      UR                  4$ )Nr   r   )r   flatten	transposer1   rp   s     rD   rq   PatchEmbed.forward   s3    IIaLyy|%%a+QWW44rn   )r   )r   i   )r   r   )r   r   )r   r   Fr   rz   s   @rD   r   r      s+     
05 5rn   r   c                    Uc  X4$ U R                   nUS:X  a  O0US:X  a  U R                  S5      n O[        SU R                   35      eU(       a!  U S S 2S S 2S S2S S 24   U S S 2S S 2SS 2S S 24   pU R                  u  pxpUu  pnU R	                  Xx-  XX5      R                  SSSSS5      R                  5       n U" U 5      n U R                  S   U R                  S   U R                  S   /nU R                  S   U R                  S   -  U R                  S   -  nU R	                  XxX5      R                  SS5      n U(       a  [        R                  " WU 4SS9n Ub  U" U 5      n US:X  a   X4$ U R                  S5      n X4$ )Nr   r   r   zUnsupported input dimension r   r   dim)r   	unsqueezeNotImplementedErrorr1   r4   r5   
contiguousr   rH   catsqueeze)tensorpool	thw_shapehas_cls_embednorm
tensor_dimcls_tokBNLCTHWL_pooleds                  rD   attention_poolr      s   |  JQ	q!!!$!*6<<.9; 	;  Arr1-vaABk/BJA!GA!quaA)11!Q1a@KKM  &\Fa&,,q/6<<?CI||Aa06<<?BH^^A!.88A>FGV,!4fQ  "rn   c                    [        U[        5      (       ap  U R                  S   nX!:X  a  U $ [        R                  " U R                  SUS5      R                  SSS5      USS9nUR                  SU5      R                  SS5      $ g )Nr   r   r)   r   r*   r+   )
isinstancer   r1   r2   r3   r4   r5   )rel_posrU   ori_dnew_pos_embeds       rD   get_rel_posr      s    !Sa :N MM5"-55aA>M !((Q/771== rn   c                 |   U(       a  SOSnUu  pnUu  pn[        S[        X5      -  S-
  5      n[        S[        X5      -  S-
  5      n[        X-  S5      n[        X-  S5      n[        R                  " U
5      SS2S4   U-  [        R                  " U5      SSS24   U-  -
  nUUS-
  U-  -  n[        X-  S5      n[        X-  S5      n[        R                  " U5      SS2S4   U-  [        R                  " U5      SSS24   U-  -
  nUUS-
  U-  -  n[	        Xo5      n[	        UU5      nUUR                  5          nUUR                  5          nUR                  u  nnnnUSS2SS2US24   R                  UUXUU5      n[        R                  " SUU5      n[        R                  " SUU5      nU SS2SS2US2US24   R                  USXXX5      USS2SS2SS2SS2SS2SSS2S4   -   USS2SS2SS2SS2SS2SSSS24   -   R                  USX-  U-  X-  U-  5      U SS2SS2US2US24'   U $ )	z4
Decomposed Spatial Relative Positional Embeddings.
r   r   r         ?Nzbythwc,hkc->bythwkzbythwc,wkc->bythwkr)   )
r   rL   rH   aranger   longr1   r4   einsumview) attnqr?   r   q_shapek_shaper&   r'   sp_idxq_tq_hq_wk_tk_hk_wdhdw	q_h_ratio	k_h_ratiodist_h	q_w_ratio	k_w_ratiodist_wRhRwr   n_headq_Nr   r_qrel_h_qrel_w_qs                                    rD   cal_rel_pos_spatialr     sn   
  QQFMCcMCc	QS"	#B	QS"	#B CIs#ICIs#IS!T'"Y.
,,s
D!G
$y
0	1  sQw)##FCIs#ICIs#IS!T'"Y.
,,s
D!G
$y
0	1  sQw)##F I*IIr*I	6;;=	!B	6;;=	!B''AvsC
Aq&'M

"
"1fcS
ACll/Gll/G 	Q67FG#$))!R3SN
!Q1aq$.
/	0
!Q1atQ.
/	015aSY_69i#o2G 	Avw	  Krn   c           
         U(       a  SOSnUu  pxn	Uu  pn[        S[        Xz5      -  S-
  5      n[        X]5      n[        X-  S5      n[        Xz-  S5      n[        R                  " U5      SS2S4   U-  [        R                  " U
5      SSS24   U-  -
  nUU
S-
  U-  -  nUUR                  5          nUR                  u  nnnnUSS2SS2US24   R                  UUXxU	U5      nUR                  SSSSSS5      R                  UUU-  U-  U	-  U5      n[        R                  " UUR                  SS5      5      R                  SS5      nUR                  UUXXz5      R                  SSSSSS5      nU SS2SS2US2US24   R                  US	XxXX5      USS2SS2SS2SS2SS2SS2SS4   -   R                  US	Xx-  U	-  X-  U-  5      U SS2SS2US2US24'   U $ )
z*
Temporal Relative Positional Embeddings.
r   r   r   r   Nr   r   r	   r)   )r   rL   r   rH   r   r   r1   r4   r5   matmulr   r   )r   r   r   r   r   r(   r   r   r   r   r   r   r   dt	q_t_ratio	k_t_ratiodist_tRtr   r   r   r   r   rels                           rD   cal_rel_pos_temporalr   =  s
     QQFMCcMCc	QS"	#BI*I CIs#ICIs#IS!T'"Y.
,,s
D!G
$y
0	1  sQw)##F	6;;=	!B''AvsC
Aq&'M

"
"1fcS
AC
++aAq!Q
'
/
/QZ#5E5K035C ,,sBLLA.
/
9
9!Q
?C
((1fc
1
9
9!Q1a
KC 	Q67FG#$))!R3SN
aAq!Qd*
+	,-1T!RS25)c/.C 	Avw	 
 Krn   c                   f   ^  \ rS rSrSSSSSSS\R
                  SSSSSSSS4U 4S jjrS	 rS
rU =r	$ )MultiScaleAttentionie  r   FrZ   r   r   r   Tconvc           
      j	  > [         TU ]  5         Xl        UU l        X`l        X@l        X l        X$-  nUS-  U l        Xl        U Vs/ s H  n[        US-  5      PM     nnU Vs/ s H  n[        US-  5      PM     nnU(       d  U(       aO  [        R                  " XUS9U l        [        R                  " XUS9U l        [        R                  " XUS9U l        O[        R                  " XS-  US9U l        [        R                  " X"5      U l        US:  a  [        R"                  " U5      U l        [&        R(                  " U5      S:X  a  [&        R(                  " U	5      S:X  a  Sn[&        R(                  " U5      S:X  a  [&        R(                  " U
5      S:X  a  SnXl        US;   a  US	:X  a  [        R,                  O[        R.                  n[1        U5      S
:  a  U" XyUSS9OS U l        [1        U5      S
:  a  U" XUSS9OS U l        [1        U5      S
:  a  U" XUSS9OS U l        GO&US:X  d  US:X  Ga  U(       a  US:X  a  X-  OUnOUS:X  a  X$-  OUn[1        U5      S
:  a  [        R8                  " UUUU	UUSS9OS U l        [1        U5      S
:  a  U" U5      OS U l        [1        U5      S
:  a  [        R8                  " UUUU
UUSS9OS U l        [1        U5      S
:  a  U" U5      OS U l        [1        U5      S
:  a  [        R8                  " UUUU
UUSS9OS U l        [1        U5      S
:  a  U" U5      OS U l        O[A        SU 35      eXl!        UU l"        U RB                  (       a  US   US   :X  d   eUS   n[1        U	5      S
:  a  UU	S   -  OUn[1        U
5      S
:  a  UU
S   -  OUnS[G        UU5      -  S-
  n[        RH                  " [J        RL                  " UU5      5      U l'        [        RH                  " [J        RL                  " UU5      5      U l(        U(       d(  [S        U RN                  SS9  [S        U RP                  SS9  U RD                  (       a9  [        RH                  " [J        RL                  " SUS
   -  S-
  U5      5      U l*        UU l+        g s  snf s  snf )Ng      r   )biasr   rZ   r    )avgrL   rL   r   F	ceil_moder  conv_unshared)r   r   groupsr  zUnsupported model {Gz?std),r]   r^   
pool_firstseparate_qkvr_   	num_headsr   scaler   r   r`   ra   r   r?   vqkvr   re   	proj_dropnpprodr-   	MaxPool3d	AvgPool3drK   rP   pool_kpool_vr   norm_qnorm_knorm_vr   rel_pos_spatialrel_pos_temporalrL   	ParameterrH   zerosr&   r'   r   r(   residual_pooling) rg   r   r   
input_sizer  qkv_biasr_   kernel_q	kernel_kvrR   rS   
norm_layerr   r-   r  r  r  rel_pos_zero_initr!  r  head_dimr   	padding_qkv
padding_kvpool_opdim_convr,   q_sizekv_size
rel_sp_dimrl   s                                   rD   r^   MultiScaleAttention.__init__g  sD   0 	$(""'t^
**23(QSa[(	3-67Yrc"'lY
7YYs(;DFYYs(;DFYYs(;DFyykADHIIg/	s?ZZ	2DN 778!bggh&71&<H779"rwwy'9Q'>I	>!&*embllG x=1$ IG*. K
 y>A% 	jEJ+/ K
 y>A% 	jEJ+/ K V^t6/3v~3+3376>7/w ]Q& 		#%# -1 K 36h-!2C*X.DK ^a' 		$&# .2 K 36i.12D*X.$DK ^a' 		$&# .2 K 36i.12D*X.$DK%(:4&&ABB. 0a=JqM111a=D,/MA,=TXa[(4F.1)nq.@dil*dGS11A5J\\%++j(*KLDN\\%++j(*KLDN$dnn$7dnn$7  \\A
1-18<>DN
 !1{ 47s   R+%R0c           
         UR                   u  p4nU R                  (       aK  U R                  S:X  a  SnOU R                  nUR	                  X4US5      R                  SSSS5      nU=n=pGO1U R                  S:w  d   eU R                  (       dO  U R                  U5      R	                  X4SU R                  S5      R                  SSSSS5      n
U
S   U
S   U
S   pnOU=n=pU R                  U5      R	                  X4U R                  S5      R                  SSSS5      nU R                  U5      R	                  X4U R                  S5      R                  SSSS5      nU R                  U	5      R	                  X4U R                  S5      R                  SSSS5      n	[        UU R                  UU R                  [        U S5      (       a  U R                  OS S	9u  p{[        UU R                   UU R                  [        U S
5      (       a  U R"                  OS S	9u  p[        U	U R$                  UU R                  [        U S5      (       a  U R&                  OS S	9u  pU R                  (       Ga  U R                  (       a  [(        R*                  " U5      S-   O[(        R*                  " U5      nU R                  (       a  [(        R*                  " U5      S-   O[(        R*                  " U5      nU R                  (       a  [(        R*                  " U5      S-   O[(        R*                  " U5      nUR                  SSSS5      R	                  X>S5      nU R                  U5      R	                  X>U R                  S5      R                  SSSS5      nU	R                  SSSS5      R	                  UUS5      n	U R                  U	5      R	                  UUU R                  S5      R                  SSSS5      n	UR                  SSSS5      R	                  X?S5      nU R                  U5      R	                  X?U R                  S5      R                  SSSS5      nUR                   S   nXpR,                  -  UR/                  SS5      -  nU R0                  (       a0  [3        UUUU R                  UUU R4                  U R6                  5      nU R8                  (       a$  [;        UUU R                  UUU R<                  5      nUR?                  SS9nUU	-  nU R@                  (       a>  U R                  (       a)  US S 2S S 2SS 2S S 24==   US S 2S S 2SS 2S S 24   -  ss'   OX-   nUR/                  SS5      R	                  USU RB                  5      nU RE                  U5      nU RF                  S:  a  U RI                  U5      nX4$ )Nr  r   r)   r   r   r   r   r  )r   r   r  r  r   rZ   )%r1   r  r-   r  r4   r5   r  r  r   r?   r  r   rP   r   hasattrr  r  r  r  r  r  r  r  r   r  r   r&   r'   r  r   r(   softmaxr!  r   r   r_   r  )rg   rA   r   r   r   _fold_dimr   r?   r  r  r   r   v_shaper   k_Nv_Nr   s                     rD   rq   MultiScaleAttention.forward  s   ''a??yyO+>>		!"-55aAqAAMAMA99///$$HHQK''a(*,,3GAq!Q,B  a&#a&#a&aaAFF1I%%aDNN&(**1'!Q1*=  FF1I%%aDNN&(**1'!Q1*=  FF1I%%aDNN&(**1'!Q1*=  $KK,, 'h 7 7T

 $KK,, 'h 7 7T

 $KK,, 'h 7 7T

 ??? ))  /1www/? 
 ))  /1www/? 
 ))  /1www/?  		!Q1%--ab9Aq	!!!$.."$&&-gaAq&9  		!Q1%--ab9Aq	!!!S$.."$&&-gaAq&9  		!Q1%--ab9Aq	!!!$.."$&&-gaAq&9  GGAJJJ!++b""55&""	D   '""D |||#1H  !!!QA+!Aq!"aK.0EKK1%%aT\\:IIaL>>Cq!Azrn   )r   r_   r   r?   r-   r  r  r  r  r  r  rP   r  r   r  r   r  r&   r  r(   r  r'   r!  r  r  r  )
rs   rt   ru   rv   r`   	LayerNormr^   rq   rx   ry   rz   s   @rD   r   r   e  sL     <<-~1@t trn   r   c                      ^  \ rS rSrSSSSS\R
                  \R                  SSSSSSSSSSSSSSS4U 4S	 jjrS
 rSr	U =r
$ )MultiScaleBlocki^        @FNrZ   r   r  Tc                   > [         T#U ]  5         Xl        X l        U" U5      U l        UU l        U Vs/ s H  nUS:  a  US-   OUPM     nnUnU Vs/ s H  n[        US-  5      PM     nnU(       a  UOUn UU l        [        UU 40 SU_SU_SU_SU_SU_SU_S	U_S
U_SU_SU_SU_SU_SU_SU_SU_SU_SU_6U l	        U	S:  a  [        U	5      O[        R                  " 5       U l        U" U 5      U l        [        U U-  5      n!UU l        Ub  US:  a  X-  n"OUn"[!        U U!U"U
US9U l        X:w  a  [        R$                  " X5      U l        [)        U5      S:  a  [        R*                  " UUUSS9U l        g S U l        g s  snf s  snf )Nr   r   r  r"  r#  r_   r$  r%  rR   rS   r&  r   r-   r  r  r  r'  r!  r  rZ   )rh   ri   rj   rk   r_   r   Fr  )r]   r^   r   r   norm1dim_mul_in_attr   use_grad_checkpointr   r   r   r`   Identityr   norm2r   rX   mlpra   r   rK   r  	pool_skip)$rg   r   r   r  r"  	mlp_ratior#  qk_scaler_   r   rk   r&  up_rater$  r%  rR   rS   r-   r   r  r  r  r'  r!  rB  r  rC  skernel_skipstride_skipskippadding_skipatt_dimmlp_hidden_dimmlp_dim_outrl   s$                                      rD   r^   MultiScaleBlock.__init__`  s   : 	_
,6>?hAq1u1,h?3>?;4DAI;?+'#6 '
  
 "	

 
  
 
  
 
  
 "
 (
 
 "
 ,
  .!
" 0#
$ .%
& &'
	, $-s?HY 	(
Wy01*7Q;-K!K*$
 >		#/DI
 ;!# LL[,%I 	 *. 	a @?s   FF
c                    U R                  U5      nU R                  (       a$  [        R                  " U R                  X25      u  pEOU R                  X25      u  pEU R                  (       a+  U R
                  U R                  :w  a  U R                  U5      n[        XR                  X R                  S9u  pgX`R                  U5      -   nU R                  U5      nU R                  (       a"  [        R                  " U R                  U5      nOU R                  U5      nU R                  (       d+  U R
                  U R                  :w  a  U R                  U5      nXR                  U5      -   nX4$ )N)r   )rA  rC  
checkpointr   rB  r   r   r   r   rG  r   r   rE  rF  )	rg   rA   r   x_normx_blockthw_shape_newx_resr6  x_mlps	            rD   rq   MultiScaleBlock.forward  s   A##%/%:%:		6&."G] &*YYv%A"G488t||#;		&!A!~~y8J8JLNN7++A##))$((F;EHHV$E""txx4<<'?		&!Au%%rn   )r   r   rB  r   r   r   rF  rA  rE  rG  r   rC  )rs   rt   ru   rv   r`   rw   r<  r^   rq   rx   ry   rz   s   @rD   r>  r>  ^  s`     ''<<!7U/n   rn   r>  c                      ^  \ rS rSrSrSSSSSS/ S	Q/ S
Q/ SQSSSSSSSSSSSSSSSSSSS4U 4S jjrS r\R                  R                  S 5       r
S rS rS rSrU =r$ )MViTv2i  a  
Improved Multiscale Vision Transformers for Classification and Detection
Yanghao Li*, Chao-Yuan Wu*, Haoqi Fan, Karttikeya Mangalam, Bo Xiong, Jitendra Malik,
    Christoph Feichtenhofer*
https://arxiv.org/abs/2112.01526
Multiscale Vision Transformers
Haoqi Fan*, Bo Xiong*, Karttikeya Mangalam*, Yanghao Li*, Zhicheng Yan, Jitendra Malik,
    Christoph Feichtenhofer*
https://arxiv.org/abs/2104.11227
   `   i  r   r   r   )r   r   r   )r   r   r   )r   r   r   NrZ   r?  Tr  Fc                 
  > [         T0U ]  5         SnXl        X0l        X l        XPl        X`l        UU l        UU l        UU l	        UU l
        UU l        Xl        [        [        R                  SS9nU(       a  [!        [#        UUUUU	S95      U l        O[#        UUUUU	S9U l        XHS   -  XS   -  XS   -  /n[&        R(                  " U5      n [*        R,                  " SX5       V!s/ s H  n!U!R/                  5       PM     n"n!U R                  (       a7  [        R0                  " [*        R2                  " SSU5      5      U l        U S-   n#OU n#U R                  (       a1  [        R0                  " [*        R2                  " SU#U5      5      U l        U R                  (       Ga  U R                  (       a  [        R0                  " [*        R2                  " SU R8                  S   U R8                  S   -  U5      5      U l        [        R0                  " [*        R2                  " SU R8                  S   U5      5      U l        U R                  (       a1  [        R0                  " [*        R2                  " SSU5      5      U l        O1[        R0                  " [*        R2                  " SU#U5      5      U l        U
c   e[A        U
5      u  n$n%n&n'n(n)Un*U(       a  U" U5      OS U l!        [        RD                  " 5       U l#        [I        U5       GHs  n+[K        UU%U+   5      nU(       a  [K        UU$U+   [K        UU%U+   5      S9n,O![K        UU$U+S-      [K        UU%U+S-      5      S9n,[M        S$0 S	U_S
U,_SU_SU*_SU_SU_SU R                  _SU"U+   _SU_S[O        U&5      U+:  a  U&U+   O/ _S[O        U'5      U+:  a  U'U+   O/ _S[O        U(5      U+:  a  U(U+   O/ _S[O        U)5      U+:  a  U)U+   O/ _SU_SU R                  _SU_SU_SU_SU_SU_SU_SU_SS _6n-U(       a
  [!        U-S S!9n-U RF                  RQ                  U-5        [O        U(U+   5      S:  a'  [S        U*U(U+   5       V.V/s/ s H  u  n.n/U.U/-  PM     n*n.n/U,nGMv     U" U5      U l*        [        RV                  " 5       U l,        U R                  (       as  U R                  (       aN  [[        U R:                  S"S#9  [[        U R<                  S"S#9  U R                  (       a  [[        U R>                  S"S#9  O[[        U R6                  S"S#9  U R                  (       a  [[        U R4                  S"S#9  U R]                  U R^                  5        g s  sn!f s  sn/n.f )%Nr   gư>)eps)r   r   r   r   r   r   r   r   )r   r   r   r  r"  rH  r#  r_   r   r&  r$  r%  rR   rS   r-   r   r  r  r  r'  r!  rB  r  rC  F)offload_to_cpur
  r  r  )0r]   r^   img_sizenum_classes	embed_dimr  r   cls_embed_onuse_abs_poszero_decay_pos_clsrC  sep_pos_embedr_   r   r`   r<  r   r   patch_embedr  r  rH   linspaceitemr  r   	cls_token	pos_embed
patch_dimspos_embed_spatialpos_embed_temporalpos_embed_classrV   	norm_stem
ModuleListblocksrJ   r   r>  rK   rM   zipr   rD  headr   apply_init_weights)1rg   rc  re  rd  
num_framesr  r   patch_kernelpatch_stridepatch_paddingconfigdropout_ratedrop_path_raterH  r#  r-   rf  rg  r  r  r'  r!  rB  r  rh  r  rs  ri  rC  in_chansr&  ro  num_patchesrA   dprpos_embed_dimr    r!   rP   rQ   rR   rS   r"  rO   r   attention_blockr,   r   rl   s1                                                   rD   r^   MViTv2.__init__  sZ   > 	 &""
(&"4#6 *%R\\t4
1#%'') D  *!##% D q/)Q'Q'


 ggj)!&>!I !IAqvvx!I  \\%++aI*FGDN'!OM'M\\A}i8:DN !!)+KK4??1#58J#J )+*,& +-,,KK4??1#5yA+C'$$+-<<Aq)4,6D( "$KK=)<"> !!!BWC?67Hi
2;I.mmouA#Ix{;I%AJ'	8A;? &AEN'	8AE?C
 . +++ $+ &	+
 $+ "+ ..+ a&+ &+ '*&kAo2+ ),Gq(8'!*b+ ),H(9!r+ +.i.1*<)A,"+ + #//+  &!+" !0#+$ "2%+& #4'+( "2)+*  .++, *-+. %*/+O0 #"4#E#;KK/8A;!# ),J(D(Df FN(D    Ia d y)	KKM	!!d44$?d554@$$!$"6"6DAdnn$7$..d3

4%%&I`s   )U,$U1c                 ,   [        U[        R                  5      (       a  [        R                  R	                  UR
                  SS9  [        U[        R                  5      (       a9  UR                  b+  [        R                  R                  UR                  S5        g g g [        U[        R                  5      (       aU  [        R                  R                  UR                  S5        [        R                  R                  UR
                  S5        g g )Nr
  r  r   r   )	r   r`   ra   initr   weightr  	constant_r<  )rg   ms     rD   ry  MViTv2._init_weights  s    a##GG!!!((!5!RYY''AFF,>!!!&&!, -?'2<<((GGaffa(GGahh, )rn   c                    / nU R                   (       a  U R                  (       a7  U R                  (       a  UR                  / SQ5        OUR	                  S/5        U R
                  (       a  UR                  / SQ5        U R                  (       a  UR                  S/5        U R                  (       a  UR	                  S5        U$ )N)rp  rq  rr  rn  )r&   r'   
rel_pos_hwr(   rm  )rh  rg  ri  extendrM   r  r  rf  )rg   namess     rD   no_weight_decayMViTv2.no_weight_decay  s    ""%%LL "  LL+/##EF$$k]+  [)rn   c           	         US   US   US   pTnU R                   (       a  US S 2SS2S S 24   nUS S 2SS 24   nUR                  S   nU R                  u  pn
X-  U
-  U:X  d   eXU
4X4U4:w  ar  [        R                  " US S 2S S 2S S 24   R                  SXU
S5      R                  SSSSS5      X4U4S	S
9nUR                  SSX4-  U-  5      R                  SSS5      nU R                   (       a  [        R                  " WU4SS9nU$ )Nr3  r)   r   r   r   r   r   	trilinearr+   r   )	rf  r1   ro  r2   r3   r4   r5   rH   r   )rg   rn  bcthwthwcls_pos_embedtxy_nump_tp_hp_wr   s               rD   _get_pos_embedMViTv2._get_pos_embed  s'   )U2Yb	a%a1ai0M!!QR%(I//!$#y3')))c?qQi'MM!Q'"**1c+-//6wq!Q1/EAY 	M &--a./eai99@Aq9I  		=)"<!DIrn   c                    UR                  SSSSS5      nU R                  U5      u  pUS   US   US   pTnUR                  u  pgnU R                  (       a3  U R                  R                  USS5      n	[        R                  " X4SS	9nU R                  (       a  U R                  (       a  U R                  R                  SU R                  S   S5      [        R                  " U R                  U R                  S   U R                  S   -  SS	9-   n
U R                  (       a#  [        R                  " U R                  U
/S5      n
U R!                  X5      n
X-   nO U R!                  U R"                  U5      n
X-   nU R$                  (       a  U R'                  U5      nU R(                  (       a  U R)                  U5      nX4U/nU R*                   H  nU" X5      u  pM     U R-                  U5      nU$ )
Nr   r   r   r   r   r  r3  r)   r   )r5   rj  r1   rf  rm  expandrH   r   rg  ri  rp  repeatro  repeat_interleaverq  rr  r  rn  r_   pos_droprs  ru  r   )rg   rA   r  r   r   r   r   r   r   
cls_tokensrn  thwblks                rD   forward_featuresMViTv2.forward_features  s   IIaAq!$##A&)U2Yb	a''a..2rJ		:/q1A!! 2299tq)1.050G0G//*T__Q-??1	
 $$ %		4+?+?*KQ OI //	A	M //F	M>>a A>>q!AQi;;C[FAs  IIaLrn   c                 J    U R                  U5      nU R                  U5      nU$ r   )r  rw  rp   s     rD   rq   MViTv2.forward  s$    !!!$IIaLrn   )ru  rf  rm  r   r_   re  rw  rc  r   rs  rd  r  rj  rn  rr  rp  rq  ri  rg  rC  rh  )rs   rt   ru   rv   r   r^   ry  rH   jitignorer  r  r  rq   rx   ry   rz   s   @rD   r]  r]    s    	   ;o'b- YY *0&P rn   r]  )TF)rZ   F)r   r   F)TN)'collectionsr   	functoolsr   numpyr  rH   torch.nnr`   torch.nn.functional
functionalr2   torch.utils.checkpointutilsrU  timm.models.layersr   fairscale.nn.checkpointr   ImportErrorMViTv2_Base_configrE   rV   ModulerX   r|   floatboolr   r   r   r   r   r   r   r   r   r>  r]  r  rn   rD   <module>r     s}   $       + + ,: C1c(RI.SAs8b#Y/<|\<|\M=-M=-M=-	A
 !( $ (,&+8 CF")) >%bii %E 4 ;ryy ;  5  5F"J> -`%Pv")) vro bii o d_RYY _A  s   E E)(E)