
    +sg~                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	Z
d dlZd dlmZ d dlmZ erd dlmZ  ej$                  e      Z G d d	e      Zy)
    )annotationsN)nullcontext)TYPE_CHECKING)SentenceEvaluator)pytorch_cos_sim)SentenceTransformerc                  n     e Zd ZdZ	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 d	 	 	 	 	 	 	 	 	 ddZ xZS )TranslationEvaluatora  
    Given two sets of sentences in different languages, e.g. (en_1, en_2, en_3...) and (fr_1, fr_2, fr_3, ...),
    and assuming that fr_i is the translation of en_i.
    Checks if vec(en_i) has the highest similarity to vec(fr_i). Computes the accuracy in both directions

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import TranslationEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')

            # Load a parallel sentences dataset
            dataset = load_dataset("sentence-transformers/parallel-sentences-news-commentary", "en-nl", split="train[:1000]")

            # Initialize the TranslationEvaluator using the same texts from two languages
            translation_evaluator = TranslationEvaluator(
                source_sentences=dataset["english"],
                target_sentences=dataset["non_english"],
                name="news-commentary-en-nl",
            )
            results = translation_evaluator(model)
            '''
            Evaluating translation matching Accuracy of the model on the news-commentary-en-nl dataset:
            Accuracy src2trg: 90.80
            Accuracy trg2src: 90.40
            '''
            print(translation_evaluator.primary_metric)
            # => "news-commentary-en-nl_mean_accuracy"
            print(results[translation_evaluator.primary_metric])
            # => 0.906
    c	                4   t         	|           || _        || _        || _        || _        || _        || _        || _        t        | j                        t        | j                        k(  sJ |rd|z   }d|z   dz   | _
        g d| _        || _        d| _        y)a  
        Constructs an evaluator based for the dataset

        The labels need to indicate the similarity between the sentences.

        Args:
            source_sentences (List[str]): List of sentences in the source language.
            target_sentences (List[str]): List of sentences in the target language.
            show_progress_bar (bool): Whether to show a progress bar when computing embeddings. Defaults to False.
            batch_size (int): The batch size to compute sentence embeddings. Defaults to 16.
            name (str): The name of the evaluator. Defaults to an empty string.
            print_wrong_matches (bool): Whether to print incorrect matches. Defaults to False.
            write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
            truncate_dim (int, optional): The dimension to truncate sentence embeddings to. If None, the model's
                current truncation dimension will be used. Defaults to None.
        _translation_evaluationz_results.csv)epochstepssrc2trgtrg2srcmean_accuracyN)super__init__source_sentencestarget_sentencesname
batch_sizeshow_progress_barprint_wrong_matchestruncate_dimlencsv_filecsv_headers	write_csvprimary_metric)
selfr   r   r   r   r   r   r   r   	__class__s
            h/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/evaluation/TranslationEvaluator.pyr   zTranslationEvaluator.__init__:   s    6 	 0 0	$!2#6 (4(()S1F1F-GGGG:D047.HC"-    c           	     &   |dk7  r|dk(  rd| }nd| d| d}nd}| j                   |d| j                    dz  }t        j                  d	| j                   d
| d       | j                   
t	               n|j                  | j                         5  t        j                  |j                  | j                  | j                  | j                  d            }t        j                  |j                  | j                  | j                  | j                  d            }d d d        t              j                         j                         j!                         }d}	d}
t#        t%        |            D ]  }t'        j(                  ||         }||k(  r|	dz  }	&| j*                  s3t-        d|d|d|       t-        d| j                  |          t-        d| j                  |   d||   |   dd       t-        d| j                  |   d||   |   dd       t/        ||         }t1        |d d      }|d d D ]%  \  }}t-        d|d|dd| j                  |          '  |j2                  }t#        t%        |            D ]%  }t'        j(                  ||         }||k(  s!|
dz  }
' |	t%        |      z  }|
t%        |      z  }t        j                  d|dz  d       t        j                  d |dz  d       || j4                  rt6        j8                  j;                  || j<                        }t6        j8                  j?                  |      }tA        |d|rd!nd"d#$      5 }tC        jD                  |      }|s|jG                  | jH                         |jG                  ||||g       d d d        ||||z   d%z  d&}| jK                  || j                        }| jM                  ||       |S # 1 sw Y   xY w# 1 sw Y   RxY w)'Nz after epoch z
 in epoch z after z steps z (truncated to )z=Evaluating translation matching Accuracy of the model on the z dataset:F)r   r   convert_to_numpyr      z
Incorrect  : Sourcezis most similar to targetzinstead of targetzSource     :zPred Target:z(Score: z.4fzTrue Target:c                    | d   S )Nr+    )xs    r#   <lambda>z/TranslationEvaluator.__call__.<locals>.<lambda>   s
    ! r$   T)keyreverse   	zAccuracy src2trg: d   z.2fzAccuracy trg2src: awzutf-8)newlinemodeencoding   )src2trg_accuracytrg2src_accuracyr   )'r   loggerinfor   r   truncate_sentence_embeddingstorchstackencoder   r   r   r   r   detachcpunumpyranger   npargmaxr   print	enumeratesortedTr   ospathjoinr   isfileopencsvwriterwriterowr   prefix_name_to_metrics store_metrics_in_model_card_data)r!   modeloutput_pathr   r   out_txtembeddings1embeddings2cos_simscorrect_src2trgcorrect_trg2srcimax_idxresultsidxscoreacc_src2trgacc_trg2srccsv_pathoutput_file_existsfrS   metricss                          r#   __call__zTranslationEvaluator.__call__h   s    B;{)%1&ugWUG6BG():):(;1==GSTXT]T]S^^fgnfoopqr"//7[]U=_=_`d`q`q=r 	++))&*&<&<#%*	  K  ++))&*&<&<#%*	  K	$ #;<CCEIIKQQSs8}% 	ZAii,GG|1$))-q2MwXkmnond&;&;A&>?nd&;&;G&DQYZ[Q\]dQefiPjjkFlmnd&;&;A&>(8TU;WX>Z]J^^_@`a#HQK0 ndK")"1+ ZJC$xc{!%<d>S>STW>XYZ	Z  ::s8}% 	%Aii,GG|1$	%
 &H5%H5(s):3(?@A(s):3(?@A"t~~ww||K?H!#!9h8JPS^ef JjkA)OOD$4$45{K HIJ !, +)K71<

 --gtyyA--eW=I	 	lJ Js   BO:.AP:PP)F   r'   FTN)r   	list[str]r   rl   r   boolr   intr   strr   rm   r   rm   r   z
int | None)Nr&   r&   )
rW   r   rX   ro   r   rn   r   rn   returnzdict[str, float])__name__
__module____qualname____doc__r   rj   __classcell__)r"   s   @r#   r
   r
      s    "P #($)#',.#,. $,.  	,.
 ,. ,. ",. ,. !,.^ bdS(S7:SJMS[^S	Sr$   r
   )
__future__r   rR   loggingrM   
contextlibr   typingr   rE   rG   r@   2sentence_transformers.evaluation.SentenceEvaluatorr   sentence_transformers.utilr   )sentence_transformers.SentenceTransformerr   	getLoggerrq   r=   r
   r-   r$   r#   <module>r~      sJ    " 
  	 "     P 6M			8	$f, fr$   