
    9i                        S SK r S SKJs  Jr  S SK Jr  S SKJr  S SKJr  S SK	J
r
  S SKJr  S SKJr  SS	KJrJr   " S
 S\R$                  5      r\
R(                  " \R*                  \R,                  S9 " S S\5      5       rg)    N)nn)Models)Model)MODELS)SentencEmbeddingModelOutput)Tasks   )	BertModelBertPreTrainedModelc                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )Pooler   ae  
Parameter-free poolers to get the sentence embedding
'cls': [CLS] representation with BERT/RoBERTa's MLP pooler.
'cls_before_pooler': [CLS] representation without the original MLP pooler.
'avg': average of the last layers' hidden states at each token.
'avg_top2': average of the last two layers.
'avg_first_last': average of the first and the last layers.
c                 v   > [         TU ]  5         Xl        U R                  S;   d   SU R                  -  5       eg )N)clsavgavg_top2avg_first_lastzunrecognized pooling type %s)super__init__pooler_type)selfr   	__class__s     m/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/nlp/bert/sentence_embedding.pyr   Pooler.__init__   sD    & $
 
 	=)D,<,<<	= 
    c                    UR                   nUR                  nU R                  S;   a	  US S 2S4   $ U R                  S:X  aC  X2R                  S5      -  R	                  S5      UR	                  S5      R                  S5      -  $ U R                  S:X  aU  US   nUS   nXS-   S-  UR                  S5      -  R	                  S5      UR	                  S5      R                  S5      -  nU$ U R                  S:X  aU  US	   nUS   nX7-   S-  UR                  S5      -  R	                  S5      UR	                  S5      R                  S5      -  nU$ [
        e)
N)r   r   r   r	   r   g       @r   )last_hidden_statehidden_statesr   	unsqueezesumNotImplementedError)r   outputsattention_masklast_hiddenr    first_hiddenpooled_resultsecond_last_hiddens           r   forwardPooler.forward    sl   //--w&q!t$$& #;#;B#??DDQG$((,66r:; <!11(+L'+K*8C?-77;<"s1v(:(:2(>(H(H(LMM ! +!.r!2'+K)>#E-77;<"s1v(:(:2(>(H(H(LMM ! %%r   )r   )	__name__
__module____qualname____firstlineno____doc__r   r*   __static_attributes____classcell__r   s   @r   r   r      s    =& &r   r   )module_namec                   d   ^  \ rS rSrU 4S jrSS jr         SS jr\U 4S j5       rSr	U =r
$ )	BertForSentenceEmbedding;   c           	         > [         TU ]  U5        Xl        UR                  SS5      U l        [        U R                  5      U l        UR                  SS5      U l        [        X R                  [        USS95        g )Nemb_pooler_typer   	normalizeF)add_pooling_layer)r   r   configgetr   r   poolerr:   setattrbase_model_prefixr
   )r   r<   kwargsr   s      r   r   !BertForSentenceEmbedding.__init__>   sg     !::&7?T--.K7,,&E:	<r   c                    Su  pEUb  U R                   " S0 UD6nUb  U R                   " S0 UD6n[        XES9nUb  Uc  U$ U R                  R                  (       a  [        R
                  " 5       n[        R                  " XER                  5      nUcc  [        R                  " UR                  S5      UR                  [        R                  S9nUUR                  S5      UR                  S5      -  -  nU" X5      n	Xl        U$ )az  
Args:
    query (:obj: `dict`): Dict of pretrained models's input for the query sequence. See
        :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__`
        for details.
    docs (:obj: `dict`): Dict of pretrained models's input for the query sequence. See
        :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__`
        for details.
Returns:
    Returns `modelscope.outputs.SentencEmbeddingModelOutput
Examples:
    >>> from modelscope.models import Model
    >>> from modelscope.preprocessors import Preprocessor
    >>> model = Model.from_pretrained('damo/nlp_corom_sentence-embedding_chinese-base')
    >>> preprocessor = Preprocessor.from_pretrained('damo/nlp_corom_sentence-embedding_chinese-base')
    >>> print(model(**preprocessor('source_sentence':['This is a test'])))
)NN)query_embeddingsdoc_embeddingsr   )devicedtype )encoder   
base_modeltrainingr   CrossEntropyLosstorchmatmulTarangesizerF   longloss)
r   querydocslabelsrD   rE   r$   loss_fctscoresrS   s
             r   r*    BertForSentenceEmbedding.forwardG   s    $ ,6(#{{3U3![[040N--N#~'=N??##**,H\\"24D4DEF~KKN6==

L"''*.>.C.CA.FFHF+DLr   c
                     U R                   R                  UUUUUUUUU	S9	n
U R                  X5      n
U R                  (       a  [        R                  " U
SSS9n
U
$ )N)r%   token_type_idsposition_ids	head_maskinputs_embedsoutput_attentionsoutput_hidden_statesreturn_dict   r   )pdim)rJ   r*   r>   r:   F)r   	input_idsr%   r[   r\   r]   r^   r_   r`   ra   r$   s              r   rI   BertForSentenceEmbedding.encoden   sf     //))))%'/!5# * 	% ++g6>>kk'QB7Gr   c                    > UR                  S5      nUR                  SS5      UR                  SS5      S.n[        [        U ]  " SSU0UD6nX$l        U$ )	zInstantiate the model.

Args:
    kwargs: Input args.
            model_dir: The model dir used to load the checkpoint and the label information.

Returns:
    The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained
	model_dirr9   r   r:   F)r9   r:   pretrained_model_name_or_pathrH   )r=   r   r   from_pretrainedri   )r   rA   ri   model_kwargsmodelr   s        r   _instantiate%BertForSentenceEmbedding._instantiate   sg     JJ{+	%zz*;UCK7
 eS1 E*3E7CE#r   )r<   r:   r>   r   )NNN)	NNNNNNNNN)r,   r-   r.   r/   r   r*   rI   classmethodrn   r1   r2   r3   s   @r   r6   r6   ;   sD    <%R !6  r   r6   )rM   torch.nn.functionalr   
functionalre   modelscope.metainfor   modelscope.modelsr   modelscope.models.builderr   modelscope.outputsr   modelscope.utils.constantr   backboner
   r   Moduler   register_modulesentence_embeddingbertr6   rH   r   r   <module>r}      sm        & # , : + 4)&RYY )&X 00fkkJ`2 ` K`r   