
    +sgn0                        d dl mZ d dlmZmZ d dlmZ d dlmZ d dl	m
Z
 d dlZd dlZd dlmZmZ d dlmZmZ d d	lmZ d d
lmZ 	 	 	 	 	 	 	 	 ddZ G d dej.                        Zy)    )annotations)IterableIterator)nullcontext)partial)AnyN)Tensornn)SentenceTransformerutil)RandContext)StaticEmbedding+CachedMultipleNegativesSymmetricRankingLossc           
        |j                   J |j                  J t        j                         5  t	        ||j                   |j                        D ]u  \  }}}t	        |j                  |dd|      |      D ]M  \  \  }}}t        j                  |j                         |j                               | z  }	|	j                          O w 	 ddd       y# 1 sw Y   yxY w)zOA backward hook to backpropagate the cached gradients mini-batch by mini-batch.NTF)sentence_feature	with_gradcopy_random_staterandom_states)	cacher   torchenable_gradzipembed_minibatch_iterdotflattenbackward)
grad_outputsentence_featuresloss_objr   gradr   reps_mb_grad_mb	surrogates
             {/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/losses/CachedMultipleNegativesSymmetricRankingLoss.py_backward_hookr&      s     >>%%%!!---				 %589JHNN\d\r\r5s 	%1dM),--%5"&+"/	 .  * 
%%!g "IIgoo&79JKkY	""$
%	%% % %s   BCCc                       e Zd Zdej                  ddf	 	 	 	 	 	 	 	 	 	 	 d fdZ	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 d	 	 	 	 	 	 	 	 	 ddZddZddZ	dd	Z
dd
Z xZS )r   g      4@    Fc                    t         |           t        |d   t              rt	        d      || _        || _        || _        t        j                         | _
        || _        d| _        d| _        || _        y)a  
        Boosted version of :class:`MultipleNegativesSymmetricRankingLoss` (MNSRL) by GradCache (https://arxiv.org/pdf/2101.06983.pdf).

        Given a list of (anchor, positive) pairs, MNSRL sums the following two losses:

        1. Forward loss: Given an anchor, find the sample with the highest similarity out of all positives in the batch.
        2. Backward loss: Given a positive, find the sample with the highest similarity out of all anchors in the batch.

        For example with question-answer pairs, the forward loss finds the answer for a given question and the backward loss
        finds the question for a given answer. This loss is common in symmetric tasks, such as semantic textual similarity.

        The caching modification allows for large batch sizes (which give a better training signal) with constant memory usage,
        allowing you to reach optimal training signal with regular hardware.

        Note: If you pass triplets, the negative entry will be ignored. An anchor is just searched for the positive.

        Args:
            model: SentenceTransformer model
            scale: Output of similarity function is multiplied by scale value
            similarity_fct: similarity function between sentence embeddings. By default, cos_sim.
                Can also be set to dot product (and then set scale to 1)
            mini_batch_size: Mini-batch size for the forward pass, this denotes how much memory is actually used during
                training and evaluation. The larger the mini-batch size, the more memory efficient the training is, but
                the slower the training will be.
            show_progress_bar: If True, shows progress bar during processing

        Requirements:
            1. (anchor, positive) pairs
            2. Should be used with large batch sizes for superior performance, but has slower training time than non-cached versions

        Inputs:
            +---------------------------------------+--------+
            | Texts                                 | Labels |
            +=======================================+========+
            | (anchor, positive) pairs              | none   |
            +---------------------------------------+--------+

        Recommendations:
            - Use ``BatchSamplers.NO_DUPLICATES`` (:class:`docs <sentence_transformers.training_args.BatchSamplers>`) to
              ensure that no in-batch negatives are duplicates of the anchor or positive samples.

        Relations:
            - Like :class:`MultipleNegativesRankingLoss`, but with an additional symmetric loss term and caching mechanism.
            - Inspired by :class:`CachedMultipleNegativesRankingLoss`, adapted for symmetric loss calculation.

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
                from datasets import Dataset

                model = SentenceTransformer("microsoft/mpnet-base")
                train_dataset = Dataset.from_dict({
                    "anchor": ["It's nice weather outside today.", "He drove to work."],
                    "positive": ["It's so sunny.", "He took the car to the office."],
                })
                loss = losses.CachedMultipleNegativesSymmetricRankingLoss(model, mini_batch_size=32)

                trainer = SentenceTransformerTrainer(
                    model=model,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()

        References:
            - Efficient Natural Language Response Suggestion for Smart Reply, Section 4.4: https://arxiv.org/pdf/1705.00652.pdf
            - Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup: https://arxiv.org/pdf/2101.06983.pdf
        r   zCachedMultipleNegativesSymmetricRankingLoss is not compatible with a SentenceTransformer model based on a StaticEmbedding. Consider using MultipleNegativesSymmetricRankingLoss instead.N)super__init__
isinstancer   
ValueErrormodelscalesimilarity_fctr
   CrossEntropyLosscross_entropy_lossmini_batch_sizer   r   show_progress_bar)selfr.   r/   r0   r3   r4   	__class__s         r%   r+   z4CachedMultipleNegativesSymmetricRankingLoss.__init__)   sz    Z 	eAh0P 
 

,"$"5"5"7.04
=A!2    c                v   |rt         nt        j                  }|
t               n|}|j                         D 	
ci c]  \  }	}
|	|
||  }}	}
|5   |       5  |rt	        |j                          nd}| j                  |      d   }ddd       ddd       |fS c c}
}	w # 1 sw Y   xY w# 1 sw Y   |fS xY w)z Embed a mini-batch of sentences.Nsentence_embedding)r   r   no_graditemsr   valuesr.   )r5   r   beginendr   r   random_stategrad_contextrandom_state_contextkvsentence_feature_minibatchrepss                r%   embed_minibatchz;CachedMultipleNegativesSymmetricRankingLoss.embed_minibatch   s     '0{U]]0<0D{},BRBXBXBZ%[$!Qa5o%["%[! 	T TTe{,F,M,M,OPkozz"<=>RST	T \!! &\T T	T \!!s)   BB,/B B, B)	%B,,B8c           
   #     K   |d   }|j                   \  }}t        t        j                  d|| j                  d| j
                               D ];  \  }}	|	| j                  z   }
| j                  ||	|
|||dn||         \  }}||f = yw)z5Iterate over mini-batches of sentences for embedding.	input_idsr   zEmbed mini-batchesdescdisableN)r   r=   r>   r   r   r?   )shape	enumeratetqdmtranger3   r4   rF   )r5   r   r   r   r   rH   bszr"   iberE   r?   s                r%   r   z@CachedMultipleNegativesSymmetricRankingLoss.embed_minibatch_iter   s      -[9	QKK$$) 222
 	%DAq D(((A!%!5!5!1#"3%2%:Ta@P "6 "D, $$%	%s   BBc           	        t        j                  |d         }t        j                  |dd D cg c]  }t        j                  |       c}      }t        |      }t        j                  ||j                        }g }t        j                  d|| j                  d| j                         D ]  }t        || j                  z   |      }	| j                  |||	 |      | j                  z  }
| j                  |
|||	       }|
dd||	f   }| j                  |j                         |dt        |             }||z   dz  }|j                          |j                  |j!                                 t#        |      t        |      z  }|j%                         }|D cg c]  }|D cg c]  }|j&                   c} c}}| _        |S c c}w c c}w c c}}w )z1Calculate the symmetric loss and cache gradients.r      NdevicezPreparing cachesrI      )r   catlenarangerW   rN   rO   r3   r4   minr0   r/   r2   tr   appenddetachsumrequires_grad_r    r   )r5   rE   embeddings_arembeddings_b
batch_sizelabelslossesrR   rS   scoresforward_losspositive_scoresbackward_lossloss_mbatchlossrss                    r%   "calculate_loss_and_cache_gradientszNCachedMultipleNegativesSymmetricRankingLoss.calculate_loss_and_cache_gradients   s   yya)yyQR!A1%))A,!AB&
j1D1DE%'  #...
 	0A A,,,j9A!00a1BLQTXT^T^^F)-)@)@PQRS)UL$Q!VnO*.*A*A/BSBSBUW]^t`cds`tWu*vM'-71<K  "MM+,,./!	0$ 6{S[(""$59:rr*!qvv*:
; "B6 +:s   G
	G'G:GGc           	        t        j                  |d         }t        j                  |dd D cg c]  }t        j                  |       c}      }t        |      }t        j                  ||j                        }g }t        j                  d|| j                  d| j                         D ]  }t        || j                  z   |      }	| j                  |||	 |      | j                  z  }
| j                  |
|||	       }|
dd||	f   }| j                  |j                         |dt        |             }||z   dz  }|j                  |        t        |      t        |      z  }|S c c}w )zHCalculate the symmetric loss without caching gradients (for evaluation).r   rU   NrV   zCalculating lossrI   rX   )r   rY   rZ   r[   rW   rN   rO   r3   r4   r\   r0   r/   r2   r]   r^   r`   )r5   rE   rb   rc   rd   re   rf   rg   rR   rS   rh   ri   rj   rk   rl   rm   s                   r%   calculate_lossz:CachedMultipleNegativesSymmetricRankingLoss.calculate_loss   sd   yya)yyQR!A1%))A,!AB&
j1D1DE%'  #...
 	'A A,,,j9A!00a1BLQTXT^T^^F)-)@)@PQRS)UL$Q!VnO*.*A*A/BSBSBUW]^t`cds`tWu*vM'-71<KMM+&	'" 6{S[(1 "Bs   E-c                   g }g | _         |D ]  }g }g }| j                  |dd      D ]C  \  }}|j                  |j                         j	                                |j                  |       E |j                  |       | j                   j                  |        t        j                         r4| j                  |      }	|	j                  t        t        ||              |	S | j                  |      }	|	S )z"Forward pass of the loss function.FT)r   r   r   )r   r   )r   r   r^   r_   ra   r   is_grad_enabledro   register_hookr   r&   rq   )
r5   r   rf   rE   r   reps_mbsrandom_state_mbsr!   r?   rm   s
             r%   forwardz3CachedMultipleNegativesSymmetricRankingLoss.forward   s     1 	8H!)-)B)B!1"& *C * 6%
  0 ? ? AB ''56 KK!%%&67	8   "::4@Dw~IZeijk  &&t,Dr7   c                ^    | j                   | j                  j                  | j                  dS )z+Get the configuration of the loss function.)r/   r0   r3   )r/   r0   __name__r3   )r5   s    r%   get_config_dictz;CachedMultipleNegativesSymmetricRankingLoss.get_config_dict  s-     ZZ"11::#33
 	
r7   )r.   r   r/   floatr0   z"callable[[Tensor, Tensor], Tensor]r3   intr4   boolreturnNone)N)r   dict[str, Tensor]r=   r|   r>   r|   r   r}   r   r}   r?   zRandContext | Noner~   z!tuple[Tensor, RandContext | None])
r   r   r   r}   r   r}   r   zlist[RandContext] | Noner~   z+Iterator[tuple[Tensor, RandContext | None]])rE   zlist[list[Tensor]]r~   r	   )r   Iterable[dict[str, Tensor]]rf   r	   r~   r	   )r~   zdict[str, Any])ry   
__module____qualname__r   cos_simr+   rF   r   ro   rq   rw   rz   __classcell__)r6   s   @r%   r   r   (   s     =A\\!"'[3"[3 [3 ;	[3
 [3  [3 
[3H ,0"+" " 	"
 "  " )" 
+"0 37%+% %  	%
 0% 
5%< D:2
r7   )r   r	   r   r   r   r   r~   r   )
__future__r   collections.abcr   r   
contextlibr   	functoolsr   typingr   r   rN   r	   r
   sentence_transformersr   r   ?sentence_transformers.losses.CachedMultipleNegativesRankingLossr   sentence_transformers.modelsr   r&   Moduler    r7   r%   <module>r      sa    " . "      ; W 8%%2% :% 
	%.m
")) m
r7   