
    9ix.                        S SK r S SKJr  S SKJrJr  S SKrS SKrS SKJ	r	   " S S\	R                  5      r " S S\	R                  5      r " S	 S
\	R                  5      r " S S\	R                  5      r " S S\	R                  5      r " S S\	R                  5      rS\4S jr\R&                  R)                  5       (       a  SOSS4S\S\\\R,                  4   4S jjrg)    N)OrderedDict)TupleUnion)nnc                      ^  \ rS rSrS\S\S\\\\\\4   \4   S\S\S\S\S	\S
\S\4U 4S jjrS rS r	\
S 5       rS rS rS rSrU =r$ )CLIP   	embed_dimimage_resolutionvision_layersvision_widthvision_patch_sizecontext_length
vocab_sizetransformer_widthtransformer_headstransformer_layersc           	      |  > [         TU ]  5         X`l        US-  n[        UUUUUUS9U l        [        UU
U	U R                  5       S9U l        Xpl        [        R                  " Xx5      U l        [        R                  " [        R                  " U R                  U5      5      U l        [!        U5      U l        [        R                  " [        R                  " X5      5      U l        [        R                  " [        R&                  " / 5      [(        R*                  " S5      -  5      U l        U R/                  5         g )N@   )input_resolution
patch_sizewidthlayersheads
output_dim)r   r   r   	attn_maskg$I$I,@)super__init__r   VisionTransformervisualTransformerbuild_attention_masktransformerr   r   	Embeddingtoken_embedding	Parametertorchemptypositional_embedding	LayerNormln_finaltext_projectiononesnploglogit_scaleinitialize_parameters)selfr
   r   r   r   r   r   r   r   r   r   vision_heads	__class__s               i/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/multi_modal/soonet/clip.pyr   CLIP.__init__   s     	,#r)'-(  " '#%#//1	3 %!||JJ$&LLKK++->?%A!!"34!||KK)5 7<<

29I(IJ""$    c                 .   [         R                  R                  U R                  R                  SS9  [         R                  R                  U R
                  SS9  U R                  R                  S-  SU R                  R                  -  S-  -  nU R                  R                  S-  nSU R                  R                  -  S-  nU R                  R                   H  n[         R                  R                  UR                  R                  US9  [         R                  R                  UR                  R                  R                  US9  [         R                  R                  UR                  R                  R                  US9  [         R                  R                  UR                  R                  R                  US9  M     U R                   b@  [         R                  R                  U R                   U R                  R                  S-  S9  g g )Ng{Gz?)stdg{Gz?         )r   initnormal_r%   weightr)   r#   r   r   	resblocksattnin_proj_weightout_projmlpc_fcc_projr,   )r2   proj_stdattn_stdfc_stdblocks        r5   r1   CLIP.initialize_parameters<   s   
,,33>
11t<$$**D0!!(((4/1##))4/d&&,,,t3%%//EGGOOEJJ558ODGGOOEJJ//66HOEGGOOEIINN11vO>GGOOEII,,33OB	 0 +GGOO$$$*:*:*@*@$*F  H ,r7   c                     [         R                  " U R                  U R                  5      nUR                  [	        S5      5        UR                  S5        U$ )Nz-inf   )r'   r(   r   fill_floattriu_)r2   masks     r5   r"   CLIP.build_attention_maskN   s@     {{4..0C0CD

5=!

1r7   c                 V    U R                   R                  R                  R                  $ N)r    conv1r>   dtype)r2   s    r5   rU   
CLIP.dtypeV   s    {{  ''---r7   c                 V    U R                  UR                  U R                  5      5      $ rS   )r    typerU   )r2   images     r5   encode_imageCLIP.encode_imageZ   s    {{5::djj122r7   c                    U R                  U5      R                  U R                  5      nX R                  R                  U R                  5      -   nUR	                  SSS5      nU R                  U5      nUR	                  SSS5      nU R                  U5      R                  U R                  5      nU[        R                  " UR                  S   5      UR                  SS94   nU$ )NrL   r   r;   dim)r%   rX   rU   r)   permuter#   r+   r'   arangeshapeargmax)r2   textxs      r5   encode_textCLIP.encode_text]   s      &++JJ ))..tzz::IIaAQIIaAMM!!!$**- ell1771:&(;;<r7   c                    U R                  U5      nU R                  U5      nX3R                  SSS9-  nXDR                  SSS9-  nU R                  R	                  5       nXS-  UR                  5       -  nUR                  5       nXg4$ )NrL   T)r_   keepdim)rZ   rf   normr0   expt)r2   rY   rd   image_featurestext_featuresr0   logits_per_imagelogits_per_texts           r5   forwardCLIP.forwardm   s    **51((. (*=*=4 +> +! !%(:(:q$(:(OO &&**,&7-//:KK*,,.  00r7   )	r   r+   r0   r)   r,   r%   r#   r    r   )__name__
__module____qualname____firstlineno__intr   r   r   r1   r"   propertyrU   rZ   rf   rq   __static_attributes____classcell__r4   s   @r5   r   r      s    +%+% "	+%
 !sCc'9!:C!?@+% +%  #+%  +% +%  #+%  #+% !$+%ZH$ . .3 1 1r7   r   c                   H   ^  \ rS rSrSrS\R                  4U 4S jjrSrU =r	$ )r*      z*Subclass torch's LayerNorm to handle fp16.re   c                    > UR                   n[        TU ]	  UR                  [        R
                  5      5      nUR                  U5      $ rS   )rU   r   rq   rX   r'   float32)r2   re   	orig_typeretr4   s       r5   rq   LayerNorm.forward   s6    GG	goaffU]]34xx	""r7    )
rs   rt   ru   rv   __doc__r'   Tensorrq   ry   rz   r{   s   @r5   r*   r*      s    4# # #r7   r*   c                   6    \ rS rSrS\R
                  4S jrSrg)	QuickGELU   re   c                 :    U[         R                  " SU-  5      -  $ )NgZd;?)r'   sigmoidr2   re   s     r5   rq   QuickGELU.forward   s    5==+++r7   r   N)rs   rt   ru   rv   r'   r   rq   ry   r   r7   r5   r   r      s    , ,r7   r   c                      ^  \ rS rSr S
S\S\S\R                  4U 4S jjjrS\R                  4S jrS\R                  4S jr	S	r
U =r$ )ResidualAttentionBlock   d_modeln_headr   c                 l  > [         TU ]  5         [        R                  " X5      U l        [        U5      U l        [        R                  " [        S[        R                  " XS-  5      4S[        5       4S[        R                  " US-  U5      4/5      5      U l        [        U5      U l        X0l        g )NrD      gelurE   )r   r   r   MultiheadAttentionr@   r*   ln_1
Sequentialr   Linearr   rC   ln_2r   )r2   r   r   r   r4   s       r5   r   ResidualAttentionBlock.__init__   s     	))':	g&	==&"))Gq["AB )+."BIIgk7$CDF GH g&	"r7   re   c                     U R                   b.  U R                   R                  UR                  UR                  S9OS U l         U R	                  XUSU R                   S9S   $ )NrU   deviceF)need_weightsr   r   )r   torU   r   r@   r   s     r5   	attention ResidualAttentionBlock.attention   so     !% : **''88 + @D 	 yy!%4>>  CCDF 	Fr7   c                     XR                  U R                  U5      5      -   nXR                  U R                  U5      5      -   nU$ rS   )r   r   rC   r   r   s     r5   rq   ResidualAttentionBlock.forward   s9    tyy|,,1&&r7   )r@   r   r   r   rC   rS   )rs   rt   ru   rv   rw   r'   r   r   r   rq   ry   rz   r{   s   @r5   r   r      sW    
 ,0### "LL# #F5<< F  r7   r   c            	       x   ^  \ rS rSr S
S\S\S\S\R                  4U 4S jjjrS\R                  4S jrS	r	U =r
$ )r!      r   r   r   r   c           
         > [         TU ]  5         Xl        X l        [        R
                  " [        U5       Vs/ s H  n[        XU5      PM     sn6 U l        g s  snf rS   )	r   r   r   r   r   r   ranger   r?   )r2   r   r   r   r   _r4   s         r5   r   Transformer.__init__   sU    
 	
6])
" #5;")
  )
s   Are   c                 $    U R                  U5      $ rS   )r?   r   s     r5   rq   Transformer.forward   s    ~~a  r7   )r   r?   r   rS   )rs   rt   ru   rv   rw   r'   r   r   rq   ry   rz   r{   s   @r5   r!   r!      sO     ,0	  "LL	 ! ! !r7   r!   c                   f   ^  \ rS rSrS\S\S\S\S\S\4U 4S jjrS	\R                  4S
 jrSr	U =r
$ )r      r   r   r   r   r   r   c                   > [         TU ]  5         Xl        X`l        [        R
                  " SUUUSS9U l        US-  n[        R                  " U[        R                  " U5      -  5      U l
        [        R                  " U[        R                  " X-  S-  S-   U5      -  5      U l        [        U5      U l        [        X4U5      U l        [        U5      U l        [        R                  " U[        R                  " X65      -  5      U l        g )N   F)in_channelsout_channelskernel_sizestridebiasr:   r;   rL   )r   r   r   r   r   Conv2drT   r&   r'   randnclass_embeddingr)   r*   ln_prer!   r#   ln_postproj)	r2   r   r   r   r   r   r   scaler4   s	           r5   r   VisionTransformer.__init__   s     0$YY"
 t!||EEKK4F,FG$&LL+a/!3U:< 2< %=!&&ue< 'LLU)G!GH	r7   re   c           	         U R                  U5      nUR                  UR                  S   UR                  S   S5      nUR                  SSS5      nU R                  R                  UR                  5      [        R                  " UR                  S   SUR                  S   UR                  UR                  S9-   n[        R                  " X!/SS9nXR                  R                  UR                  5      -   nU R                  U5      nUR                  SSS5      nU R                  U5      nUR                  SSS5      nU R                  US S 2SS S 24   5      nU R                  b  XR                  -  nU$ )Nr   rL   r]   r;   r   r^   )rT   reshaperb   r`   r   r   rU   r'   zerosr   catr)   r   r#   r   r   )r2   re   class_tokens      r5   rq   VisionTransformer.forward   s8   JJqMIIaggaj!''!*IIaA**--agg6GGAJ1772;aggahh:H HII{&A.)),,QWW55KKNIIaAQIIaALL1a7$99 IIAr7   )	r   rT   r   r   r   r   r)   r   r#   )rs   rt   ru   rv   rw   r   r'   r   rq   ry   rz   r{   s   @r5   r   r      sQ    I I# Ic II%(I69I.  r7   r   
state_dictc                    U S   R                   S   n[        U R                  5        Vs/ s H5  nUR                  S5      (       d  M  UR	                  S5      (       d  M3  UPM7     sn5      nU S   R                   S   n[        U S   R                   S   S-
  S-  5      nXE-  nU S	   R                   S   nU S
   R                   S   nU S   R                   S   n	U S   R                   S   n
U
S-  n[        [        S U  5       5      5      n[        XvX1XHU	XU5
      nS H  nX;   d  M
  X	 M     UR                  U 5        UR                  5       $ s  snf )Nzvisual.conv1.weightr   zvisual.z.attn.in_proj_weightr]   zvisual.positional_embeddingrL   g      ?r,   r)   ztoken_embedding.weightzln_final.weightr   c              3   z   #    U  H1  nUR                  S 5      (       d  M  UR                  S5      S   v   M3     g7f)ztransformer.resblocks.r;   N)
startswithsplit).0ks     r5   	<genexpr>build_model.<locals>.<genexpr>   s1      6%/||34 AGGCLOZs   ;;)r   r   r   )
rb   lenkeysr   endswithroundsetr   load_state_dicteval)r   r   r   r   r   	grid_sizer   r
   r   r   r   r   r   modelkeys                  r5   build_modelr      s   34::1=L??$$a<<	" 	
'(zz2H'I 	
$ M ##89??C	1	2	8	8	;a	?#EGI(4,-33A6I 67==a@N45;;A>J"#45;;A>)R/ 6%/6 	67
 m"J"7IKE D D 
*%::<9s   EE EcudacpuTnamer   c                   ^^^ SnU n [         R                  R                  X2(       a  TOSS9R                  5       nS nU(       dR  [        U=(       d    WR                  5       5      R                  T5      n[        T5      S:X  a  UR                  5         U$ [         R                  R                  U4S j/ S9nUR                  R                  S5       Vs/ s H  nS	[        U5      ;   d  M  UPM     snS
   mU4S jnWR!                  U5        U" UR"                  5        U" UR$                  5        [        T5      S:X  a  [         R                  R                  S / S9n	['        U	R                  R)                  S5      R+                  5       5      S   n
U
R-                  5       mU4S jnUR!                  U5        U" UR"                  5        U" UR$                  5        UR                  5         U$ ! [         a<    U(       a  [
        R                  " SU S35        Sn[         R                  " USS9n GNf = fs  snf )NFr   )map_locationzFile z6 is not a JIT archive. Loading as a state dict insteadc                  v   > [         R                  " / 5      R                  [         R                  " T 5      5      $ rS   )r'   r-   r   r   )r   s   r5   <lambda>load_clip.<locals>.<lambda>)  s!    

2!!%,,v"67r7   )example_inputsprim::ConstantDevicer]   c                   > [        U S5      (       a  U R                  /O/ n[        U S5      (       a%  UR                  U R                  R                  5        U Hf  nUR	                  S5       HN  nSUR                  5       ;   d  M  [        US   5      R                  S5      (       d  M=  UR                  T5        MP     Mh     g )Ngraphforward1r   valuer   )	hasattrr   appendr   findAllNodesattributeNamesstrr   copyAttributes)modulegraphsr   nodedevice_nodes       r5   patch_deviceload_clip.<locals>.patch_device/  s    #*67#;#;&,,6:&&MM&////0E**+;<d1133W9''1z&'99:''4 = r7   c                  J    [         R                  " / 5      R                  5       $ rS   )r'   r-   rN   r   r7   r5   r   r   @  s    EJJrN((*r7   aten::torL   c                   > [        U S5      (       a  U R                  /O/ n[        U S5      (       a%  UR                  U R                  R                  5        U Hy  nUR	                  S5       Ha  n[        UR                  5       5      nS H?  nXE   R                  5       S   S:X  d  M  XE   R                  5       R                  T5        MA     Mc     M{     g )Nr   r   r   )rL   r;   r      )	r   r   r   r   r   listinputsr   r   )r   r   r   r   r   i
float_nodes         r5   patch_floatload_clip.<locals>.patch_floatD  s    '.vw'?'?fll^RFvz**foo334!..z:D!$++-0F#!9>>+G49"INN,;;JG $ ;  r7   )r'   jitloadr   RuntimeErrorwarningswarnr   r   r   r   rN   tracer   r   reprapplyrZ   rf   r   findNoder   r   )r   r   r   
model_pathr   r   device_holdernr   float_holderfloat_inputr   r   r   s    `          @@r5   	load_clipr    s    CJ
@		sV  ??Ctv 	
 J<%*:*:*<=@@Hv;%KKMIIOO7 $ LM !&&334DEEatAw 	
E 	
K
	5 
KK##$""#
6{eyy*2 ' ?<--66zBIIKLQO %%'

	H 	K E&&'E%%&Ly  @MM
|#YZ CZZ
?
@ s   6G5 H>.H>5AH;:H;)r   collectionsr   typingr   r   numpyr.   r'   r   Moduler   r*   r   r   r!   r   dictr   r   is_availabler   r   r  r   r7   r5   <module>r     s     #    o1299 o1d# #,		 ,RYY <!")) !&-		 -`D F ((** 2805FC FC-.Fr7   