
    +sg                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	Z
d dlmZ erd dlmZ  ej                  e      Z G d de      Zy)	    )annotationsN)nullcontext)TYPE_CHECKING)SentenceEvaluator)SentenceTransformerc                  x     e Zd ZdZ	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 d	 	 	 	 	 	 	 	 	 ddZed	d       Z xZS )
MSEEvaluatorFromDataFrameu  
    Computes the mean squared error (x100) between the computed sentence embedding and some target sentence embedding.

    Args:
        dataframe (List[Dict[str, str]]): It must have the following format. Rows contains different, parallel sentences.
            Columns are the respective language codes::

            [{'en': 'My sentence in English', 'es': 'Oración en español', 'fr': 'Phrase en français'...},
             {'en': 'My second sentence', ...}]
        teacher_model (SentenceTransformer): The teacher model used to compute the sentence embeddings.
        combinations (List[Tuple[str, str]]): Must be of the format ``[('en', 'es'), ('en', 'fr'), ...]``.
            First entry in a tuple is the source language. The sentence in the respective language will be fetched from
            the dataframe and passed to the teacher model. Second entry in a tuple the the target language. Sentence
            will be fetched from the dataframe and passed to the student model
        batch_size (int, optional): The batch size to compute sentence embeddings. Defaults to 8.
        name (str, optional): The name of the evaluator. Defaults to "".
        write_csv (bool, optional): Whether to write the results to a CSV file. Defaults to True.
        truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. If None, uses the model's
            current truncation dimension. Defaults to None.
    c                   t         |           || _        || _        || _        |rd|z   }d|z   dz   | _        ddg| _        d| _        || _        || _	        i | _
        t        j                  d       t               }| j                  D ]  \  }	}
g }g }|D ]l  }||	   j                         dk7  s||
   j                         dk7  s1|j                  ||	          |j!                  ||	          |j!                  ||
          n ||f| j                  |	|
f<   | j                  j!                  |	 d	|
         t#        |      }| j                  
t%               n|j'                  | j                        5  |j)                  || j                  
      }d d d        t+        |      D ci c]  \  }}||
 c}}| _        y # 1 sw Y   /xY wc c}}w )N_mse_evaluationz_results.csvepochstepsnegative_msezCompute teacher embeddings -
batch_size)super__init__combinationsnamer   csv_filecsv_headersprimary_metric	write_csvtruncate_dimdataloggerinfosetstripaddappendlistr   truncate_sentence_embeddingsencodezipteacher_embeddings)self	dataframeteacher_modelr   r   r   r   r   all_source_sentencessrc_langtrg_langsrc_sentencestrg_sentencesrowall_src_embeddingssentemb	__class__s                    m/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/evaluation/MSEEvaluatorFromDataFrame.pyr   z"MSEEvaluatorFromDataFrame.__init__)   s    	(	$:D(4/.@#W-,"(	01"u"&"3"3 	>HhMM  8x=&&(B.3x=3F3F3HB3N(,,S];!((X7!((X7	8 0=m.LDIIx*+##xj($<=	>  $$89   ( M;;D<M<MN	h
 "/!5!56JW[WfWf!5!g	h ?BBVXj>k"ls49"l	h 	h #ms   5G*GG
c           
        |j                          g }| j                  D ]"  \  }}| j                  ||f   \  }}	t        j                  |D 
cg c]  }
| j
                  |
    c}
      }| j                  
t               n|j                  | j                        5  t        j                  |j                  |	| j                              }d d d        |z
  dz  j                         }|dz  }|j                  |       t        j                  d| j                   d| d| d       t        j                  d|d	       % || j                   rt"        j$                  j'                  || j(                        }t"        j$                  j+                  |      }t-        |d
|rdndd      5 }t/        j0                  |      }|s|j3                  | j4                         |j3                  ||g|z          d d d        dt        j                  |      j7                          i}| j9                  || j                        }| j;                  ||       |S c c}
w # 1 sw Y   xY w# 1 sw Y   qxY w)Nr      d   zMSE evaluation on z dataset - r   :zMSE (*100):	4fr   awzutf-8)newlinemodeencodingr   )evalr   r   npasarrayr(   r   r   r%   r&   r   meanr#   r   r   r   r   ospathjoinr   isfileopencsvwriterwriterowr   itemprefix_name_to_metrics store_metrics_in_model_card_data)r)   modeloutput_pathr   r   
mse_scoresr-   r.   r/   r0   r3   src_embeddingstrg_embeddingsmsecsv_pathoutput_file_existsfrK   metricss                      r6   __call__z"MSEEvaluatorFromDataFrame.__call__Z   s$    	


"&"3"3 	2Hh+/99h5I+J(M=ZZS`(a4)@)@)F(abN"&"3"3";AcAcdhduduAv e!#ELLSWSbSbL,c!de #^39??AC3JCc"KK,TYYK{8*AhZWXYZKK-Bx01	2 "t~~ww||K?H!#!9h8JPS^ef =jkA)OOD$4$45 ;<= "BGGJ$7$<$<$>#>?--gtyyA--eW=3 )be e= =s   I
1I/A	I(I%	(I1c                     y)NzKnowledge Distillation )r)   s    r6   descriptionz%MSEEvaluatorFromDataFrame.description~   s    '    )   r   TN)r*   zlist[dict[str, str]]r+   r   r   zlist[tuple[str, str]]r   intr   strr   boolr   z
int | None)Nrc   )
rP   r   rQ   ra   r   r`   r   r`   returnzdict[str, float])rd   ra   )	__name__
__module____qualname____doc__r   rZ   propertyr]   __classcell__)r5   s   @r6   r	   r	      s    4 #'/m'/m +/m ,	/m
 /m /m /m !/md bd"("7:"JM"[^"	"H ( (r^   r	   )
__future__r   rJ   loggingrE   
contextlibr   typingr   numpyrB   2sentence_transformers.evaluation.SentenceEvaluatorr   )sentence_transformers.SentenceTransformerr   	getLoggerre   r   r	   r\   r^   r6   <module>rs      sD    " 
  	 "    PM			8	$m( 1 m(r^   