
    JKik"                        S SK Jr  S SKJr  S SKrS SKJs  Jr  S SKJ	r	Jr  S SK
Jr  S SKJr   " S S\R                  5      rg)	    )annotations)IterableN)Tensornn)util)SentenceTransformerc                  v   ^  \ rS rSr    S           SU 4S jjjrS	S jrS	S jr\S
S j5       rSr	U =r
$ )MegaBatchMarginLoss   c                   > [         TU ]  5         Xl        X l        X0l        XPl        U(       a  U R                  U l        gU R                  U l        g)a.
  
Given a large batch (like 500 or more examples) of (anchor_i, positive_i) pairs, find for each pair in the batch
the hardest negative, i.e. find j != i such that cos_sim(anchor_i, positive_j) is maximal. Then create from this a
triplet (anchor_i, positive_i, positive_j) where positive_j serves as the negative for this triplet.

Then train as with the triplet loss.

Args:
    model: SentenceTransformerModel
    positive_margin: Positive margin, cos(anchor, positive)
        should be > positive_margin
    negative_margin: Negative margin, cos(anchor, negative)
        should be < negative_margin
    use_mini_batched_version: As large batch sizes require a lot
        of memory, we can use a mini-batched version. We break
        down the large batch into smaller batches with fewer
        examples.
    mini_batch_size: Size for the mini-batches. Should be a
        divisor for the batch size in your data loader.

References:
    - This loss function was inspired by the ParaNMT paper: https://www.aclweb.org/anthology/P18-1042/

Requirements:
    1. (anchor, positive) pairs
    2. Large batches (500 or more examples)

Inputs:
    +---------------------------------------+--------+
    | Texts                                 | Labels |
    +=======================================+========+
    | (anchor, positive) pairs              | none   |
    +---------------------------------------+--------+

Recommendations:
    - Use ``BatchSamplers.NO_DUPLICATES`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
      ensure that no in-batch negatives are duplicates of the anchor or positive samples.

Example:
    ::

        from sentence_transformers import SentenceTransformer, SentenceTransformerTrainingArguments, SentenceTransformerTrainer, losses
        from datasets import Dataset

        train_batch_size = 250
        train_mini_batch_size = 32

        model = SentenceTransformer('all-MiniLM-L6-v2')
        train_dataset = Dataset.from_dict({
            "anchor": [f"This is sentence number {i}" for i in range(500)],
            "positive": [f"This is sentence number {i}" for i in range(1, 501)],
        })
        loss = losses.MegaBatchMarginLoss(model=model, mini_batch_size=train_mini_batch_size)

        args = SentenceTransformerTrainingArguments(
            output_dir="output",
            per_device_train_batch_size=train_batch_size,
        )
        trainer = SentenceTransformerTrainer(
            model=model,
            args=args,
            train_dataset=train_dataset,
            loss=loss,
        )
        trainer.train()
N)	super__init__modelpositive_marginnegative_marginmini_batch_sizeforward_mini_batchedforward_non_mini_batchedforward)selfr   r   r   use_mini_batched_versionr   	__class__s         r/var/www/html/dynamic-report/venv/lib/python3.13/site-packages/sentence_transformers/losses/MegaBatchMarginLoss.pyr   MegaBatchMarginLoss.__init__   sD    T 	
...4Lt00RVRoRo    c           
        Uu  p4[        UR                  5       5      n[        U[        [	        U5      5         5      n/ n[
        R                  " 5          U R                  R                  5         [        SX`R                  5       Hh  nXR                  -   n	UR                  5        V
Vs0 s H	  u  pXX _M     nn
nUR                  U R                  U5      S   R                  5       5        Mj     U R                  R                  5         S S S 5        [
        R                  " USS9n[
        R                   " [        U5      [        U5      UR"                  S9n[        S[        U5      U R                  5       GH  nXR                  -   n	U R                  U Vs0 s H	  oX>   X _M     sn5      S   nU Vs0 s H  o/ _M     nn[
        R                  " 5          [$        R&                  " X5      nUSXU	 -  -
  n[
        R(                  " USS9u  nnS S S 5        W H%  nU H  nUU   R                  XN   U   5        M     M'     U H  n[
        R*                  " UU   5      UU'   M!     U R                  U Vs0 s H	  oXN   X _M     sn5      S   nU R                  U5      S   nUR,                  UR,                  :X  d   eUR,                  UR,                  :X  d   e[.        R0                  " UU5      n[.        R0                  " UU5      n[.        R2                  " U R4                  U-
  5      [.        R2                  " UU R6                  -
  5      -   nUR9                  5       nU	[        W5      :  d  GM  UR;                  5         GM     W$ s  snn
f ! , (       d  f       GN= fs  snf s  snf ! , (       d  f       GN= fs  snf )Nr   sentence_embeddingdim)device      )listkeyslennextitertorchno_gradr   evalranger   itemsappenddetachtraincateyer    r   pytorch_cos_simmaxstackshapeFcosine_similarityrelur   r   meanbackward)r   sentence_featureslabelsanchorpositivefeature_names
batch_sizeall_positive_emb	start_idxend_idxkvinput_mini_batchdiagonal_matrixkey
anchor_embhard_negative_features
cos_scoresnegative_scoresnegatives_maxnegatives_idshard_negative_idpositive_embnegative_emb
pos_cosine
neg_cosinelossess                              r   r   (MegaBatchMarginLoss.forward_mini_batched_   s_   ,V[[]+$tH~"678
]]_JJOO"1j2F2FG	#&:&::HPHX#YHXA';$;HX #Y ''

3C(DEY(Z(a(a(cd H JJ  !99%51=))C(8$93?O;PYiYpYpq q#&6"79M9MNI"6"66GTa$bTaS&+i*H%HTa$bc$J :G%G#2g"%G!11*O
_w%G!GG   05yya/P,} ! %2 (C*3/66x}EU7VW ) %2 %.3kk:PQT:U.V&s+ %  ::Xe&fXeQTHM),L'LXe&fg$L  ::&<=>RSL##|'9'9999##|'9'9999 ,,ZFJ,,ZFJVVD00:=>
UYUiUiHiAjjF[[]F Z(!Q OT e $Z	 _ %c &H  'gs>   AN*N
:ANN)
:N.9N3=O
N
N&3
O	c                   U Vs/ s H  o0R                  U5      S   PM     nnUu  pV[        R                  " XV5      n[        R                  " U5      nUS[        R
                  " UR                  SUR                  06-  -
  n	[        R                  " U	SS9u  p[        R                  " U R                  U-
  5      [        R                  " XR                  -
  5      -   nUR                  5       $ s  snf )Nr   r!   r    r"   r   )r   r   r2   r(   diagonalr1   r5   r    r3   r6   r8   r   r   r9   )r   r;   r<   sentence_featurerepsembeddings_aembeddings_brK   positive_scoresrL   rM   _rT   s                r   r   ,MegaBatchMarginLoss.forward_non_mini_batched   s    [lm[lGW

+,-AB[lm%)")),E
..4$		:++FJ4E4EFF
 !99_!<,,>?!&&YmYmImBnn{{} ns   C1c                    g)Na  
@inproceedings{wieting-gimpel-2018-paranmt,
    title = "{P}ara{NMT}-50{M}: Pushing the Limits of Paraphrastic Sentence Embeddings with Millions of Machine Translations",
    author = "Wieting, John and Gimpel, Kevin",
    editor = "Gurevych, Iryna and Miyao, Yusuke",
    booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2018",
    address = "Melbourne, Australia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P18-1042",
    doi = "10.18653/v1/P18-1042",
    pages = "451--462",
}
 )r   s    r   citationMegaBatchMarginLoss.citation   s    r   )r   r   r   r   r   )g?g333333?T2   )r   r   r   floatr   rd   r   boolr   intreturnNone)r;   zIterable[dict[str, Tensor]]r<   r   rg   r   )rg   str)__name__
__module____qualname____firstlineno__r   r   r   propertyra   __static_attributes____classcell__)r   s   @r   r
   r
      s     "%!$)-!Op"Op Op 	Op
 #'Op Op 
Op Opb<~  r   r
   )
__future__r   collections.abcr   r(   torch.nn.functionalr   
functionalr6   r   sentence_transformersr   )sentence_transformers.SentenceTransformerr   Moduler
   r`   r   r   <module>rx      s/    " $     & In")) nr   