
    9i-                         S SK Jr  S SKJrJr  S SKrS SKrS SKJ	s  J
r  S SKJs  Jr  S SKJ	r	  S SKJrJr  S SKJr   " S S\	R(                  5      r " S	 S
\	R*                  5      r " S S\	R*                  5      r " S S\	R*                  5      r " S S\	R*                  5      r " S S\	R*                  5      r " S S\	R*                  5      r " S S\	R*                  5      r " S S\	R*                  5      r " S S\	R*                  5      rg)    )OrderedDict)TupleUnionN)nn)
BertConfigBertForMaskedLM)compatible_position_idsc                   H   ^  \ rS rSrSrS\R                  4U 4S jjrSrU =r	$ )	LayerNorm   z*Subclass torch's LayerNorm to handle fp16.xc                    > UR                   n[        TU ]	  UR                  [        R
                  5      5      nUR                  U5      $ N)dtypesuperforwardtypetorchfloat32)selfr   	orig_typeret	__class__s       h/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/multi_modal/team/utils.pyr   LayerNorm.forward   s6    GG	goaffU]]34xx	""     )
__name__
__module____qualname____firstlineno____doc__r   Tensorr   __static_attributes____classcell__r   s   @r   r   r      s    4# # #r   r   c                   6    \ rS rSrS\R
                  4S jrSrg)	QuickGELU   r   c                 :    U[         R                  " SU-  5      -  $ )NgZd;?)r   sigmoidr   r   s     r   r   QuickGELU.forward!   s    5==+++r   r   N)r   r   r    r!   r   r#   r   r$   r   r   r   r(   r(      s    , ,r   r(   c                      ^  \ rS rSr S
S\S\S\R                  4U 4S jjjrS\R                  4S jrS\R                  4S jr	S	r
U =r$ )ResidualAttentionBlock%   d_modeln_head	attn_maskc                 l  > [         TU ]  5         [        R                  " X5      U l        [        U5      U l        [        R                  " [        S[        R                  " XS-  5      4S[        5       4S[        R                  " US-  U5      4/5      5      U l        [        U5      U l        X0l        g )Nc_fc   geluc_proj)r   __init__r   MultiheadAttentionattnr   ln_1
Sequentialr   Linearr(   mlpln_2r3   )r   r1   r2   r3   r   s       r   r9   ResidualAttentionBlock.__init__'   s     	))':	g&	==&"))Gq["AB )+."BIIgk7$CDF GH g&	"r   r   c                     U R                   b.  U R                   R                  UR                  UR                  S9OS U l         U R	                  XUSU R                   S9S   $ )Nr   deviceF)need_weightsr3   r   )r3   tor   rD   r;   r,   s     r   	attention ResidualAttentionBlock.attention6   so     !% : **''88 + @D 	 yy!%4>>  CCDF 	Fr   c                     XR                  U R                  U5      5      -   nXR                  U R                  U5      5      -   nU$ r   )rG   r<   r?   r@   r,   s     r   r   ResidualAttentionBlock.forward=   s9    tyy|,,1&&r   )r;   r3   r<   r@   r?   r   )r   r   r    r!   intr   r#   r9   rG   r   r$   r%   r&   s   @r   r/   r/   %   sW    
 ,0### "LL# #F5<< F  r   r/   c            	       z   ^  \ rS rSr  S
S\S\S\S\R                  4U 4S jjjrS\R                  4S jrS	r	U =r
$ )TransformerC   widthlayersheadsr3   c           
         > [         TU ]  5         XPl        Xl        X l        [
        R                  " [        U5       Vs/ s H  n[        XU5      PM     sn6 U l	        g s  snf r   )
r   r9   use_gcrO   rP   r   r=   ranger/   	resblocks)r   rO   rP   rQ   r3   rS   _r   s          r   r9   Transformer.__init__E   sZ     	
6])
" #5;")
  )
s   Ar   c                     U R                   (       a+  U R                   H  n[        R                  " X!5      nM     U$ U R                  U5      $ r   )rS   rU   
checkpoint)r   r   
each_blocks      r   r   Transformer.forwardT   s;    ;;"nn
))*8 -H>>!$$r   )rP   rU   rS   rO   )NF)r   r   r    r!   rK   r   r#   r9   r   r$   r%   r&   s   @r   rM   rM   C   sR     ,0  "LL	 % % %r   rM   c                   l   ^  \ rS rSr SS\S\S\S\S\S\4U 4S jjjrS	\R                  4S
 jrSr	U =r
$ )VisionTransformer]   input_resolution
patch_sizerO   rP   rQ   
output_dimc                   > [         T	U ]  5         Xl        X`l        [        R
                  " SUUUSS9U l        US-  n[        R                  " U[        R                  " U5      -  5      U l
        [        R                  " U[        R                  " X-  S-  S-   U5      -  5      U l        [        U5      U l        [        X4XWS9U l        [        U5      U l        [        R                  " U[        R                  " X65      -  5      U l        g )N   F)in_channelsout_channelskernel_sizestridebiasg            )rS   )r   r9   r_   ra   r   Conv2dconv1	Parameterr   randnclass_embeddingpositional_embeddingr   ln_prerM   transformerln_postproj)
r   r_   r`   rO   rP   rQ   ra   rS   scaler   s
            r   r9   VisionTransformer.__init___   s     	 0$YY"
 t!||EEKK4F,FG$&LL+a/!3U:< 2< %=!&&ueK 'LLU)G!GH	r   r   c           	         U R                  U5      nUR                  UR                  S   UR                  S   S5      nUR                  SSS5      nU R                  R                  UR                  5      [        R                  " UR                  S   SUR                  S   UR                  UR                  S9-   n[        R                  " X!/SS9nXR                  R                  UR                  5      -   nU R                  U5      nUR                  SSS5      nU R                  U5      nUR                  SSS5      nU R                  US S 2SS S 24   5      nU R                  b  XR                  -  nU$ Nr   rj   ri   rC   dim)rl   reshapeshapepermutero   rF   r   r   zerosrD   catrp   rq   rr   rs   rt   )r   r   ro   s      r   r   VisionTransformer.forward|   s8   JJqMIIaggaj!''!*IIaA..11!'':KK
Aqwwr{!''!((STII*)),,QWW55KKNIIaAQIIaALL1a7$99 IIAr   )	ro   rl   r_   rs   rq   ra   rp   rt   rr   )F)r   r   r    r!   rK   r9   r   r#   r   r$   r%   r&   s   @r   r]   r]   ]   sg     I#&I I I 	I
 I !I I:  r   r]   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )CLIPVisionWrapper   c           	      H   > [         TU ]  5         [        SSSSSSS9U l        g )N                  )r_   r`   rO   rP   rQ   ra   )r   r9   r]   vision_transformer)r   r   s    r   r9   CLIPVisionWrapper.__init__   s-    "3 #r   c           	         U R                   R                  U5      nUR                  UR                  S   UR                  S   S5      nUR	                  SSS5      nU R                   R
                  R                  UR                  5      [        R                  " UR                  S   SUR                  S   UR                  UR                  S9-   n[        R                  " X!/SS9nXR                   R                  R                  UR                  5      -   nU R                   R                  U5      nUR	                  SSS5      nU R                   R                  U5      nUR	                  SSS5      nUR                  5       nU R                   R!                  US S 2SS S 24   5      nU R                   R"                  b  XR                   R"                  -  nX4$ rx   )r   rl   r|   r}   r~   ro   rF   r   r   r   rD   r   rp   rq   rr   cloners   rt   )r   r   ro   x_tensors       r   r   CLIPVisionWrapper.forward   s   ##))!,IIaggaj!''!*IIaA11AADDQWWMKK
Aqwwr{!''!((STII*''<<??HH##**1-IIaA##//2IIaA779##++AaAgJ7""''3++000A{r   )r   r   r   r    r!   r9   r   r$   r%   r&   s   @r   r   r      s     r   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )BertWrapper   c                    > [         [        U ]  5         [        R                  " U5      n[        U5      R                  U l        [        R                  " SUSS9U l	        [        R                  " SU5      U l
        g )Nr   F)rh   )r   r   r9   r   from_json_filer   bertr   r>   	projectorprojector_token_embeds)r   config_jsonfeat_dim	token_dimbert_configr   s        r   r9   BertWrapper.__init__   sY    k4)+ //<#K055	3u=&(iiY&?#r   c                     UUS.nU R                   " S0 UDSS0D6nUS   nUS S 2SS S 24   nU R                  U5      U R                  U5      4$ )N)	input_idsattention_maskreturn_dictFr   r   r   r   r   )r   r   r   trans_featuresoutput_statesoutput_tokens
cls_tokenss          r   r   BertWrapper.forward   sg    ",
 		FNFF%a("1a7+
~~j)4+F+F,  	r   r   r   r&   s   @r   r   r      s    @ r   r   c                   N   ^  \ rS rSrSS\R
                  S4U 4S jjrS rSrU =r	$ )Mlp   Ng        c                   > [         TU ]  5         U=(       d    UnU=(       d    Un[        R                  " X5      U l        U" 5       U l        [        R                  " X#5      U l        [        R                  " U5      U l        g r   )	r   r9   r   r>   fc1actfc2Dropoutdrop)r   in_featureshidden_featuresout_features	act_layerr   r   s         r   r9   Mlp.__init__   s]     	#2{)8[99[:;99_;JJt$	r   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   r   r   r,   s     r   r   Mlp.forward   sH    HHQKHHQKIIaLHHQKIIaLr   )r   r   r   r   )
r   r   r    r!   r   GELUr9   r   r$   r%   r&   s   @r   r   r      s$     "&"77% r   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )
CrossLayer   c                   > [         [        U ]  5         [        R                  " U5      U l        [        R                  " U5      U l        [        R                  " U5      U l        [        R                  " USS9U l	        [        R                  " USS9U l
        [        UX-  SS9U l        [        R                  " S5      U l        [        R                  " S5      U l        [        R                  " S5      U l        g )Nr   )	embed_dim	num_headsg?)r   r   r   )r   r   r9   r   r   norm1norm2norm3r:   	self_attn
cross_attnr   ffnr   dropout1dropout2dropout3)r   r   	mlp_ratior   s      r   r9   CrossLayer.__init__   s    j$(*\\(+
\\(+
\\(+
..".//". $0
 

3

3

3r   c           	      t   U R                  U5      nU R                  X-   R                  SSS5      X-   R                  SSS5      UR                  SSS5      US:H  S9S   R                  SSS5      nX@R                  U5      -   nU R	                  U5      nU R                  X-   R                  SSS5      UR                  SSS5      UR                  SSS5      5      S   R                  SSS5      nX@R                  U5      -   nU R                  U5      nX@R                  U R                  U5      5      -   nU$ )Nrj   r   ri   )key_padding_mask)
r   r   r~   r   r   r   r   r   r   r   )r   text_tensors
text_masksimage_tensorsretrieved_tensorsretrieved_tensors_ress         r   r   CrossLayer.forward   sh    $

+< = $1::1aC1::1aC!))!Q2(Ao	 !/ !

 !
 WQ1 	 .!1# # !%

+< = $1::1aC!!!Q*!!!Q*!, -.!/ 07wq!Q/? 	 .!1# # !%

+< =-HH*+1- - ! r   )	r   r   r   r   r   r   r   r   r   r   r&   s   @r   r   r      s    (&! !r   r   c                   8   ^  \ rS rSrU 4S jrSS jrS rSrU =r$ )TEAMi  c                 `  > [         [        U ]  5         Xl        X l        [
        R                  " [        SSS9/5      U l        [
        R                  " SS5      U l
        [
        R                  " SS5      U l        [        R                  " US5      n[        US5        U R                  USS9  g )	Nr   ri   )r   r   r   cpuz'text_model.bert.embeddings.position_idsT)strict)r   r   r9   
text_modelimage_modelr   
ModuleListr   cross_modelr>   image_tensor_fctext_tensor_fcr   loadr	   load_state_dict)r   r   r   
pretrainedparamsr   s        r   r9   TEAM.__init__  s    dD"$$&==346  "yys3 iic2J. I	KVD1r   c                     Ub*  U R                  X5      u  pE[        R                  " USSS9nOSu  pEUb*  U R                  U5      u  pg[        R                  " USSS9nOSu  pgXEXg4$ )N       @rj   pr{   )NN)r   F	normalizer   )r   	text_data	text_mask
img_tensortext_featurer   image_featurer   s           r   get_featureTEAM.get_feature,  sq     )-)N&L;;|sBL)3&L!+/+;+;J+G(MKK!DM+5(M=GGr   c           
         [         R                  " U5      n/ nU R                  U5      nUR                  UR                  5      nU R
                   H  nU" XUU5      nU R                  U5      n	[         R                  " [        R                  " U	SSS9[        R                  " USSS9-  SS9n
[         R                  " X-  SS9[         R                  " [         R                  " USS9SS9-  nUR                  U5        M     U$ )Nr   ri   r   rz   rj   g      ?)min)r   
zeros_liker   r   r   r   r   sumr   r   clampappend)r   r   r   r   r   pair_score_listtext_tensors_projtext_mask_floateach_cross_modelretrieved_tensors_proj
pair_scorepair_score_reduceds               r   get_cross_scoreTEAM.get_cross_score;  s   !,,\: //=#..):)@)@A $ 0 0 01>1B!D &*%9%9:K%L"2cqA++/3A>?J "',!"57<{{IIo1538@"@ ""#56 !1 r   )r   r   r   r   r   )NNN)	r   r   r    r!   r9   r   r   r$   r%   r&   s   @r   r   r     s    2 H r   r   )collectionsr   typingr   r   numpynpr   torch.nn.functionalr   
functionalr   torch.utils.checkpointutilsrY   transformersr   r   -modelscope.utils.compatible_with_transformersr	   r   Moduler(   r/   rM   r]   r   r   r   r   r   r   r   r   <module>r     s    $      + +  4# #,		 ,RYY <%")) %44		 4n"		 "J")) 0")) 2-! -!`4299 4r   