
    +sg                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 erd dlmZ  ej                  e      Z G d de
      Zy)	    )annotationsN)nullcontext)TYPE_CHECKING)SentenceEvaluator)SentenceTransformerc                  h     e Zd ZdZ	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZdddZed	d       Z xZS )
MSEEvaluatora
  
    Computes the mean squared error (x100) between the computed sentence embedding
    and some target sentence embedding.

    The MSE is computed between ||teacher.encode(source_sentences) - student.encode(target_sentences)||.

    For multilingual knowledge distillation (https://arxiv.org/abs/2004.09813), source_sentences are in English
    and target_sentences are in a different language like German, Chinese, Spanish...

    Args:
        source_sentences (List[str]): Source sentences to embed with the teacher model.
        target_sentences (List[str]): Target sentences to embed with the student model.
        teacher_model (SentenceTransformer, optional): The teacher model to compute the source sentence embeddings.
        show_progress_bar (bool, optional): Show progress bar when computing embeddings. Defaults to False.
        batch_size (int, optional): Batch size to compute sentence embeddings. Defaults to 32.
        name (str, optional): Name of the evaluator. Defaults to "".
        write_csv (bool, optional): Write results to CSV file. Defaults to True.
        truncate_dim (int, optional): The dimension to truncate sentence embeddings to. `None` uses the model's current truncation
            dimension. Defaults to None.

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import MSEEvaluator
            from datasets import load_dataset

            # Load a model
            student_model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
            teacher_model = SentenceTransformer('all-mpnet-base-v2')

            # Load any dataset with some texts
            dataset = load_dataset("sentence-transformers/stsb", split="validation")
            sentences = dataset["sentence1"] + dataset["sentence2"]

            # Given queries, a corpus and a mapping with relevant documents, the InformationRetrievalEvaluator computes different IR metrics.
            mse_evaluator = MSEEvaluator(
                source_sentences=sentences,
                target_sentences=sentences,
                teacher_model=teacher_model,
                name="stsb-dev",
            )
            results = mse_evaluator(student_model)
            '''
            MSE evaluation (lower = better) on the stsb-dev dataset:
            MSE (*100):  0.805045
            '''
            print(mse_evaluator.primary_metric)
            # => "stsb-dev_negative_mse"
            print(results[mse_evaluator.primary_metric])
            # => -0.8050452917814255
    c	                p   t         	|           || _        | j                  
t               n|j	                  | j                        5  |j                  |||d      | _        d d d        || _        || _        || _	        || _
        d|z   dz   | _        g d| _        || _        d| _        y # 1 sw Y   JxY w)NTshow_progress_bar
batch_sizeconvert_to_numpymse_evaluation_z_results.csv)epochstepsMSEnegative_mse)super__init__truncate_dimr   truncate_sentence_embeddingsencodesource_embeddingstarget_sentencesr   r   namecsv_filecsv_headers	write_csvprimary_metric)
selfsource_sentencesr   teacher_modelr   r   r   r   r   	__class__s
            `/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/evaluation/MSEEvaluator.pyr   zMSEEvaluator.__init__G   s     	(   ( M;;D<M<MN	
 &3%9%9 4ER\os &: &D"	 !1!2$	)D0>A4",#	 	s   B,,B5c                (   |dk7  r|dk(  rd| }nd| d| d}nd}| j                   |d| j                    dz  }| j                   
t               n|j                  | j                         5  |j                  | j                  | j
                  | j                  d	
      }d d d        | j                  z
  dz  j                         }|dz  }t        j                  d| j                   d| d       t        j                  d|d       || j                  rt        j                  j                  || j                         }t        j                  j#                  |      }	t%        |d|	rdndd      5 }
t'        j(                  |
      }|	s|j+                  | j,                         |j+                  |||g       d d d        d| i}| j/                  || j                        }| j1                  ||       |S # 1 sw Y   \xY w# 1 sw Y   KxY w)Nz after epoch z
 in epoch z after z steps z (truncated to )Tr      d   z'MSE evaluation (lower = better) on the z dataset:zMSE (*100):	4fawzutf-8)newlinemodeencodingr   )r   r   r   r   r   r   r   r   meanloggerinfor   r   ospathjoinr   isfileopencsvwriterwriterowr   prefix_name_to_metrics store_metrics_in_model_card_data)r    modeloutput_pathr   r   out_txttarget_embeddingsmsecsv_pathoutput_file_existsfr;   metricss                r$   __call__zMSEEvaluator.__call__g   s   B;{)%1&ugWUG6BG():):(;1==G"//7[]U=_=_`d`q`q=r 	 %%%"&"8"8??!%	 !- !	 &&)::q@FFHs
=dii[QXPYYZ[\mC8,-"t~~ww||K?H!#!9h8JPS^ef 5jkA)OOD$4$45s 345 "C4(--gtyyA--eW=9	 	"5 5s   -4G;7AH;HHc                     y)NzKnowledge Distillation )r    s    r$   descriptionzMSEEvaluator.description   s    '    )NF    r'   TN)r!   	list[str]r   rN   r   boolr   intr   strr   rO   r   z
int | None)Nr&   r&   )r?   r   r@   rQ   returnzdict[str, float])rR   rQ   )	__name__
__module____qualname____doc__r   rH   propertyrK   __classcell__)r#   s   @r$   r	   r	      s    3r "'#'-#- $-
  - - - - !-@'R ( (rL   r	   )
__future__r   r:   loggingr5   
contextlibr   typingr   2sentence_transformers.evaluation.SentenceEvaluatorr   )sentence_transformers.SentenceTransformerr   	getLoggerrS   r3   r	   rJ   rL   r$   <module>r`      sA    " 
  	 "   PM			8	$A($ A(rL   