
    +sg|                    r    d dl mZ d dlmZ d dlmZmZ d dlmZ ddl	m	Z	m
Z
  G d dej                        Zy	)
    )annotations)Iterable)Tensornn)SentenceTransformer   )BatchHardTripletLoss$BatchHardTripletLossDistanceFunctionc                  f     e Zd Zej                  df	 	 	 	 	 d fdZddZddZed	d       Z	 xZ
S )
BatchAllTripletLoss   c                L    t         |           || _        || _        || _        y)a  
        BatchAllTripletLoss takes a batch with (sentence, label) pairs and computes the loss for all possible, valid
        triplets, i.e., anchor and positive must have the same label, anchor and negative a different label. The labels
        must be integers, with same label indicating sentences from the same class. Your train dataset
        must contain at least 2 examples per label class.

        Args:
            model: SentenceTransformer model
            distance_metric: Function that returns a distance between
                two embeddings. The class SiameseDistanceMetric contains
                pre-defined metrics that can be used.
            margin: Negative samples should be at least margin further
                apart from the anchor than the positive.

        References:
            * Source: https://github.com/NegatioN/OnlineMiningTripletLoss/blob/master/online_triplet_loss/losses.py
            * Paper: In Defense of the Triplet Loss for Person Re-Identification, https://arxiv.org/abs/1703.07737
            * Blog post: https://omoindrot.github.io/triplet-loss

        Requirements:
            1. Each sentence must be labeled with a class.
            2. Your dataset must contain at least 2 examples per labels class.

        Inputs:
            +------------------+--------+
            | Texts            | Labels |
            +==================+========+
            | single sentences | class  |
            +------------------+--------+

        Recommendations:
            - Use ``BatchSamplers.GROUP_BY_LABEL`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
              ensure that each batch contains 2+ examples per label class.

        Relations:
            * :class:`BatchHardTripletLoss` uses only the hardest positive and negative samples, rather than all possible, valid triplets.
            * :class:`BatchHardSoftMarginTripletLoss` uses only the hardest positive and negative samples, rather than all possible, valid triplets.
              Also, it does not require setting a margin.
            * :class:`BatchSemiHardTripletLoss` uses only semi-hard triplets, valid triplets, rather than all possible, valid triplets.

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
                from datasets import Dataset

                model = SentenceTransformer("microsoft/mpnet-base")
                # E.g. 0: sports, 1: economy, 2: politics
                train_dataset = Dataset.from_dict({
                    "sentence": [
                        "He played a great game.",
                        "The stock is up 20%",
                        "They won 2-1.",
                        "The last goal was amazing.",
                        "They all voted against the bill.",
                    ],
                    "label": [0, 1, 0, 0, 2],
                })
                loss = losses.BatchAllTripletLoss(model)

                trainer = SentenceTransformerTrainer(
                    model=model,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()

        N)super__init__sentence_embeddertriplet_margindistance_metric)selfmodelr   margin	__class__s       c/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/losses/BatchAllTripletLoss.pyr   zBatchAllTripletLoss.__init__   s)    T 	!&$.    c                T    | j                  |d         d   }| j                  ||      S )Nr   sentence_embedding)r   batch_all_triplet_loss)r   sentence_featureslabelsreps       r   forwardzBatchAllTripletLoss.forward\   s1    $$%6q%9:;OP**6377r   c                N   | j                  |      }|j                  d      }|j                  d      }||z
  | j                  z   }t        j                  |      }|j                         |z  }d||dk  <   ||dkD     }|j                  d      }	|j                         |	dz   z  }|S )a]  Build the triplet loss over a batch of embeddings.
        We generate all the valid triplets and average the loss over the positive ones.
        Args:
            labels: labels of the batch, of size (batch_size,)
            embeddings: tensor of shape (batch_size, embed_dim)
            margin: margin for triplet loss
            squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                     If false, output is the pairwise euclidean distance matrix.
        Returns:
            Label_Sentence_Triplet: scalar tensor containing the triplet loss
           r   r   gؗҜ<)r   	unsqueezer   r	   get_triplet_maskfloatsizesum)
r   r   
embeddingspairwise_distanchor_positive_distanchor_negative_disttriplet_lossmaskvalid_tripletsnum_positive_tripletss
             r   r   z*BatchAllTripletLoss.batch_all_triplet_loss`   s     ,,Z8,66q9,66q9 ,.BBTEXEXX $44V<zz|l2 *+\A%& &lU&:; . 3 3A 6
 $'')-BU-JKr   c                     y)Na  
@misc{hermans2017defense,
    title={In Defense of the Triplet Loss for Person Re-Identification},
    author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
    year={2017},
    eprint={1703.07737},
    archivePrefix={arXiv},
    primaryClass={cs.CV}
}
 )r   s    r   citationzBatchAllTripletLoss.citation   s    	r   )r   r   r   r%   returnNone)r   zIterable[dict[str, Tensor]]r   r   r3   r   )r   r   r(   r   r3   r   )r3   str)__name__
__module____qualname__r
   eucledian_distancer   r    r   propertyr2   __classcell__)r   s   @r   r   r      sV     =OO	M/"M/ 	M/
 
M/^8)V 
 
r   r   N)
__future__r   collections.abcr   torchr   r   )sentence_transformers.SentenceTransformerr   r	   r
   Moduler   r1   r   r   <module>rA      s&    " $  I \J")) Jr   