
    9i                         S SK rS SKrS SKrS SKJr  S SKJr   " S S\R                  5      r
 " S S\R                  5      rg)    Nc                      ^  \ rS rSrSrSS/r      SU 4S jjrS rS rS r	SS	\
R                  4S
 jjrS rSrU =r$ )FrozenOpenCLIPEmbedder
   0
Uses the OpenCLIP transformer encoder for text
lastpenultimatec                 t  > [         T	U ]  5         X`R                  ;   d   e[        R                  " U[
        R                  " S5      US9u  n  nU?Xpl        X0l        X@l	        U(       a  U R                  5         X`l        U R                  S:X  a  SU l        g U R                  S:X  a  SU l        g [        5       e)Ncpudevice
pretrainedr   r   r      )super__init__LAYERS	open_clipcreate_model_and_transformstorchr   visualmodel
max_lengthfreezelayer	layer_idxNotImplementedError)
selfarchr   r   r   r   r   r   _	__class__s
            p/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/multi_modal/videocomposer/clip.pyr   FrozenOpenCLIPEmbedder.__init__   s     	###;;e,Eq!L
$KKM
::DNZZ=(DN%''    c                 ~    U R                   R                  5       U l         U R                  5        H
  nSUl        M     g NFr   eval
parametersrequires_gradr   params     r    r   FrozenOpenCLIPEmbedder.freeze*   -    ZZ__&
__&E"'E 'r"   c                     [         R                  " U5      nU R                  UR                  U R                  5      5      nU$ N)r   tokenizeencode_with_transformertor   )r   texttokenszs       r    forwardFrozenOpenCLIPEmbedder.forward/   s4    ##D)((4;;)?@r"   c                 4   U R                   R                  U5      nX R                   R                  -   nUR                  SSS5      nU R	                  X R                   R
                  S9nUR                  SSS5      nU R                   R                  U5      nU$ Nr   r      	attn_maskr   token_embeddingpositional_embeddingpermutetext_transformer_forwardr;   ln_finalr   r2   xs      r    r0   .FrozenOpenCLIPEmbedder.encode_with_transformer4       JJ&&t,

///IIaA))!zz7K7K)LIIaAJJ"r"   rC   c                    [        U R                  R                  R                  5       H  u  p4U[	        U R                  R                  R                  5      U R
                  -
  :X  a    U$ U R                  R                  R                  (       a1  [        R                  R                  5       (       d  [        XAU5      nM  U" XS9nM     U$ Nr:   	enumerater   transformer	resblockslenr   grad_checkpointingr   jitis_scripting
checkpointr   rC   r;   irs        r    r@   /FrozenOpenCLIPEmbedder.text_transformer_forward=       djj44>>?DAC

..889DNNJJ  zz%%88AWAW B BqY/a- @ r"   c                     U " U5      $ r.    r   r2   s     r    encodeFrozenOpenCLIPEmbedder.encodeH       Dzr"   )r   r   r   r   r   )ViT-H-14laion2b_s32b_b79kcudaM   Tr   r.   __name__
__module____qualname____firstlineno____doc__r   r   r   r5   r0   r   Tensorr@   rY   __static_attributes____classcell__r   s   @r    r   r   
   sV     m$F !/(4(

	%,, 	 r"   r   c                      ^  \ rS rSrSrSS/r       SU 4S jjrS rS rS r	SS	\
R                  4S
 jjrS rSrU =r$ )FrozenOpenCLIPVisualEmbedderL   r   r   r   c                 4  > [         TU ]  5         X`R                  ;   d   e[        R                  " U[
        R                  " S5      US9u  pn
U?Xl        [        R                  " U[        R                  S9S-  nU
" [        R                  " 5       " U5      5      R                  S5      U l        Xl        X0l        X@l        U(       a  U R%                  5         X`l        U R&                  S:X  a  SU l        g U R&                  S:X  a  SU l        g [+        5       e)	Nr
   r   )dtype   r   r   r   r   )r   r   r   r   r   r   r   rJ   r   nponesuint8T
ToPILImage	unsqueezeblack_image
preprocessr   r   r   r   r   )r   r   r   r   r   r   r   input_shaper   r   rw   
data_whiter   s               r    r   %FrozenOpenCLIPVisualEmbedder.__init__R   s     	###(DDe, E*
WW[9C?
%allnZ&@AKKAN$$KKM
::DNZZ=(DN%''r"   c                 ~    U R                   R                  5       U l         U R                  5        H
  nSUl        M     g r$   r%   r)   s     r    r   #FrozenOpenCLIPVisualEmbedder.freezep   r,   r"   c                 n    U R                   R                  UR                  U R                  5      5      nU$ r.   )r   encode_imager1   r   )r   imager4   s      r    r5   $FrozenOpenCLIPVisualEmbedder.forwardu   s(    JJ##EHHT[[$9:r"   c                 4   U R                   R                  U5      nX R                   R                  -   nUR                  SSS5      nU R	                  X R                   R
                  S9nUR                  SSS5      nU R                   R                  U5      nU$ r8   r<   rB   s      r    r0   4FrozenOpenCLIPVisualEmbedder.encode_with_transformerz   rE   r"   rC   c                    [        U R                  R                  R                  5       H  u  p4U[	        U R                  R                  R                  5      U R
                  -
  :X  a    U$ U R                  R                  R                  (       a1  [        R                  R                  5       (       d  [        XAU5      nM  U" XS9nM     U$ rG   rH   rQ   s        r    r@   5FrozenOpenCLIPVisualEmbedder.text_transformer_forward   rU   r"   c                     U " U5      $ r.   rW   rX   s     r    rY   #FrozenOpenCLIPVisualEmbedder.encode   r[   r"   )rv   r   r   r   r   r   rw   )r\   r]   r^   r_   Tr   )   r      r.   r`   ri   s   @r    rk   rk   L   sY     m$F !/*(<(

	%,, 	 r"   rk   )numpyrp   r   r   torch.nnnntorchvision.transforms
transformsrs   Moduler   rk   rW   r"   r    <module>r      s9        "?RYY ?DC299 Cr"   