
    9i                         S SK r S SKJr  S SKJr  S SKJr  S SKJrJ	r	  S SK
Jr  S SKJr   " S S	\ R                  R                  5      r\R"                  " \R$                  \R&                  S
9 " S S\\	5      5       rg)    N)BloomConfig)
BloomModel)Models)MODELS
TorchModel)SentencEmbeddingModelOutput)Tasksc                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )DecoderPooler   z
Parameter-free poolers to get the sentence embedding
'last': the last token state.
'weighted_mean': position weighted average of all token states.
c                 v   > [         TU ]  5         Xl        U R                  S;   d   SU R                  -  5       eg )N)lastweighted_meanzunrecognized pooling type %s)super__init__pooler_type)selfr   	__class__s     n/var/www/html/land-doc-ocr/venv/lib/python3.13/site-packages/modelscope/models/nlp/bloom/sentence_embedding.pyr   DecoderPooler.__init__   sD    & $
 
 	=)D,<,<<	= 
    c                 D   UR                   nU R                  S;   a  UR                  u  pEn[        R                  " USSS9u  px[        R
                  " US:H  UU5      S-
  n	[        R                  " U	SS9n	U	R                  S5      R                  S5      R                  USU5      n	[        R                  " USU	5      R                  SS9n
U
$ U R                  S:X  Ga<  UR                  S	5      R                  UR                  5       5      R                  5       n[        R                  " SUR                  S   S-   S
9R                  S5      R                  S	5      R                  UR                  5       5      R                  5       R                  UR                  5      nUR                  UR                  s=:X  a  UR                  :X  d   e   eX-  n[        R                   " X;-  S5      nUR!                  S5      n[        R                  " USS9nX-  n
U
$ ["        e)N)r      F)keepdimr   )min)dimr   )startendg&.>)last_hidden_stater   shapetorchr   whereclamp	unsqueezeexpandgathersqueezesizefloatarangetodevicesumNotImplementedError)r   outputsattention_masklast_hiddennlhvaluesindicesgather_indicespooled_outputinput_mask_expandedweightssum_embeddingssum_masks                  r   forwardDecoderPooler.forward   s   //x'!''GA! $ii5IOF"[[1g)*,./0N #[[Q?N ,55a8BB1ELL1aN "LLa)799@Q . ) 0"0":":2">"E"E  "#$$)EG   1+*;*;A*> +! ""+)A,yy}VV!,!1!1!3>55:UWRR%0%7%7>9 
 ==K$5$5R9L9R9RRRRRR"5"?"YY{'H!LN*..q1H{{86H*5M
  &%r   )r   )	__name__
__module____qualname____firstlineno____doc__r   r>   __static_attributes____classcell__r   s   @r   r   r      s    =+ +r   r   )	group_keymodule_namec                   Z   ^  \ rS rSrSrU 4S jrSS jr  S	S jr\U 4S j5       r	Sr
U =r$ )
BloomForSentenceEmbeddingH   z
This model represent a text to a dense vector by the last token state or weighted mean of all token states.
See `Language Models are Universal Embedders
<https://arxiv.org/pdf/2310.08232.pdf>`_ for details.
c                    > [         TU ]  U5        Xl        UR                  SS5      U l        [        U R                  5      U l        UR                  SS5      U l        [        X R                  [        U5      5        g )Nemb_pooler_typer   	normalizeF)r   r   configgetr   r   poolerrO   setattrbase_model_prefixBloomModelTransform)r   rP   kwargsr   s      r   r   "BloomForSentenceEmbedding.__init__Q   sc     !::&7I#D$4$45K7,,.A&.IJr   c                    Su  pEUb  U R                   " S0 UD6nUb  U R                   " S0 UD6n[        XES9nUb  Uc  U$ U R                  R                  (       a  [        R
                  R                  5       n[        R                  " XER                  5      nUcc  [        R                  " UR                  S5      UR                  [        R                  S9nUUR                  S5      UR                  S5      -  -  nU" X5      n	Xl        U$ )a  
Args:
    query (:obj: `dict`): Dict of pretrained models's input for the query sequence. See
        :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__`
        for details.
    docs (:obj: `dict`): Dict of pretrained models's input for the query sequence. See
        :meth:`transformers.PreTrainedTokenizer.encode` and :meth:`transformers.PreTrainedTokenizer.__call__`
        for details.
Returns:
    Returns `modelscope.outputs.SentencEmbeddingModelOutput
Examples:
    >>> from modelscope.models import Model
    >>> from modelscope.preprocessors import Preprocessor
    >>> model = Model.from_pretrained('damo/nlp_udever_bloom_560m')
    >>> preprocessor = Preprocessor.from_pretrained('damo/nlp_udever_bloom_560m')
    >>> inputs = preprocessor({'source_sentence': ['This is a test']})
    >>> outputs = model(**inputs)
    >>> print(outputs)
NN)query_embeddingsdoc_embeddingsr   )r-   dtype )encoder   
base_modeltrainingr"   nnCrossEntropyLossmatmulTr+   r)   r-   longloss)
r   querydocslabelsrZ   r[   r0   loss_fctscoresrf   s
             r   r>   !BloomForSentenceEmbedding.forwardY   s    ( ,6(#{{3U3![[040N--N#~'=N??##xx002H\\"24D4DEF~KKN6==

L"''*.>.C.CA.FFHF+DLr   c                     U R                   R                  XS9nU R                  X25      nU R                  (       a)  [        R
                  R                  R                  USSS9nU$ )N)r1      r   )pr   )r_   r>   rR   rO   r"   ra   
functional)r   	input_idsr1   r0   
embeddingss        r   r^    BloomForSentenceEmbedding.encode   s\    
 //)) * 6[[9
>>,,66zQB6OJr   c                    > UR                  S5      nUR                  SS5      UR                  SS5      S.nUc  [        S0 UD6nU " U5      nO[        [        U ]  " SSU0UD6nX%l        U$ )	zInstantiate the model.

Args:
    kwargs: Input args.
            model_dir: The model dir used to load the checkpoint and the label information.

Returns:
    The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained
	model_dirrN   r   rO   F)rN   rO   pretrained_model_name_or_pathr]   )rQ   r   r   rU   from_pretrainedru   )clsrV   ru   model_kwargsrP   modelr   s         r   _instantiate&BloomForSentenceEmbedding._instantiate   s     JJ{+	%zz*;_MK7
  *6*FKE-sC I.7I;GIE#r   )rP   rO   rR   r   )NNNrY   )r@   rA   rB   rC   rD   r   r>   r^   classmethodr{   rE   rF   rG   s   @r   rK   rK   H   s5    K'V 
  r   rK   )r"   transformersr   r   rU   modelscope.metainfor   modelscope.modelsr   r   modelscope.outputsr   modelscope.utils.constantr	   ra   Moduler   register_modulesentence_embeddingbloomrK   r]   r   r   <module>r      sl     $ : & 0 : +9EHHOO 9x &&FLLB[ 3Z [B[r   