
    +sgN                    $   d dl mZ d dlZd dlZd dlmZmZmZ d dlZ	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ erd dlmZ  ej0                  e      Zed   ZddddddddddddddZdddddddd d!d"d#d$d%dZ G d& d'e      Zy)(    )annotationsN)TYPE_CHECKINGCallableLiteral)Tensor)tqdm)SentenceTransformer)InformationRetrievalEvaluator)SentenceEvaluator)SimilarityFunction)is_datasets_available)climatefeverdbpediafeverfiqa2018hotpotqamsmarconfcorpusnqquoraretrievalscidocsarguanascifact
touche2020zzeta-alpha-ai/NanoClimateFEVERzzeta-alpha-ai/NanoDBPediazzeta-alpha-ai/NanoFEVERzzeta-alpha-ai/NanoFiQA2018zzeta-alpha-ai/NanoHotpotQAzzeta-alpha-ai/NanoMSMARCOzzeta-alpha-ai/NanoNFCorpuszzeta-alpha-ai/NanoNQz zeta-alpha-ai/NanoQuoraRetrievalzzeta-alpha-ai/NanoSCIDOCSzzeta-alpha-ai/NanoArguAnazzeta-alpha-ai/NanoSciFactzzeta-alpha-ai/NanoTouche2020ClimateFEVERDBPediaFEVERFiQA2018HotpotQAMSMARCONFCorpusNQQuoraRetrievalSCIDOCSArguAnaSciFact
Touche2020c                       e Zd ZdZddgdgg dg ddgddddddej
                  d	ddf	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fd
Zd Z	 d	 	 	 	 	 	 	 	 	 ddZddZ	ddZ
d Zd Z xZS )NanoBEIREvaluatora  
    This class evaluates the performance of a SentenceTransformer Model on the NanoBEIR collection of datasets.

    The collection is a set of datasets based on the BEIR collection, but with a significantly smaller size, so it can be used for quickly evaluating the retrieval performance of a model before commiting to a full evaluation.
    The datasets are available on HuggingFace at https://huggingface.co/collections/zeta-alpha-ai/nanobeir-66e1a0af21dfd93e620cd9f6
    The Evaluator will return the same metrics as the InformationRetrievalEvaluator (i.e., MRR, nDCG, Recall@k), for each dataset and on average.


    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import NanoBEIREvaluator

            model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')

            datasets = ["QuoraRetrieval", "MSMARCO"]
            query_prompts = {
                "QuoraRetrieval": "Instruct: Given a question, retrieve questions that are semantically equivalent to the given question\nQuery: ",
                "MSMARCO": "Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery: "
            }

            evaluator = NanoBEIREvaluator(
                dataset_names=datasets,
                query_prompts=query_prompts,
            )

            results = evaluator(model)
            '''
            NanoBEIR Evaluation of the model on ['QuoraRetrieval', 'MSMARCO'] dataset:
            Evaluating NanoQuoraRetrieval
            Information Retrieval Evaluation of the model on the NanoQuoraRetrieval dataset:
            Queries: 50
            Corpus: 5046

            Score-Function: cosine
            Accuracy@1: 92.00%
            Accuracy@3: 98.00%
            Accuracy@5: 100.00%
            Accuracy@10: 100.00%
            Precision@1: 92.00%
            Precision@3: 40.67%
            Precision@5: 26.00%
            Precision@10: 14.00%
            Recall@1: 81.73%
            Recall@3: 94.20%
            Recall@5: 97.93%
            Recall@10: 100.00%
            MRR@10: 0.9540
            NDCG@10: 0.9597
            MAP@100: 0.9395

            Evaluating NanoMSMARCO
            Information Retrieval Evaluation of the model on the NanoMSMARCO dataset:
            Queries: 50
            Corpus: 5043

            Score-Function: cosine
            Accuracy@1: 40.00%
            Accuracy@3: 74.00%
            Accuracy@5: 78.00%
            Accuracy@10: 88.00%
            Precision@1: 40.00%
            Precision@3: 24.67%
            Precision@5: 15.60%
            Precision@10: 8.80%
            Recall@1: 40.00%
            Recall@3: 74.00%
            Recall@5: 78.00%
            Recall@10: 88.00%
            MRR@10: 0.5849
            NDCG@10: 0.6572
            MAP@100: 0.5892
            Average Queries: 50.0
            Average Corpus: 5044.5

            Aggregated for Score Function: cosine
            Accuracy@1: 66.00%
            Accuracy@3: 86.00%
            Accuracy@5: 89.00%
            Accuracy@10: 94.00%
            Precision@1: 66.00%
            Recall@1: 60.87%
            Precision@3: 32.67%
            Recall@3: 84.10%
            Precision@5: 20.80%
            Recall@5: 87.97%
            Precision@10: 11.40%
            Recall@10: 94.00%
            MRR@10: 0.7694
            NDCG@10: 0.8085
            '''
            print(evaluator.primary_metric)
            # => "NanoBEIR_mean_cosine_ndcg@10"
            print(results[evaluator.primary_metric])
            # => 0.8084508771660436
    N
   )         r*   d   F    Tmeanc                >   t         |           |t        t        j	                               }|| _        || _        || _        |	| _        || _	        || _
        || _        |	| _        || _        |r,t        t        | j                  j	                                     ng | _        || _        |
| _        d| | _        | j                   r"| xj"                  d| j                    z  c_        || _        || _        || _        || _        || _        | j/                          | j1                          ||||||||	|
||d}| j
                  D cg c]  } | j2                  |fi | c}| _        d| d| _        ddg| _        | j;                  | j                         yc c}w )	a  
        Initializes the NanoBEIREvaluator.

        Args:
            dataset_names (List[str]): The names of the datasets to evaluate on.
            mrr_at_k (List[int]): A list of integers representing the values of k for MRR calculation. Defaults to [10].
            ndcg_at_k (List[int]): A list of integers representing the values of k for NDCG calculation. Defaults to [10].
            accuracy_at_k (List[int]): A list of integers representing the values of k for accuracy calculation. Defaults to [1, 3, 5, 10].
            precision_recall_at_k (List[int]): A list of integers representing the values of k for precision and recall calculation. Defaults to [1, 3, 5, 10].
            map_at_k (List[int]): A list of integers representing the values of k for MAP calculation. Defaults to [100].
            show_progress_bar (bool): Whether to show a progress bar during evaluation. Defaults to False.
            batch_size (int): The batch size for evaluation. Defaults to 32.
            write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
            truncate_dim (int, optional): The dimension to truncate the embeddings to. Defaults to None.
            score_functions (Dict[str, Callable[[Tensor, Tensor], Tensor]]): A dictionary mapping score function names to score functions. Defaults to {SimilarityFunction.COSINE.value: cos_sim, SimilarityFunction.DOT_PRODUCT.value: dot_score}.
            main_score_function (Union[str, SimilarityFunction], optional): The main score function to use for evaluation. Defaults to None.
            aggregate_fn (Callable[[list[float]], float]): The function to aggregate the scores. Defaults to np.mean.
            aggregate_key (str): The key to use for the aggregated score. Defaults to "mean".
            query_prompts (str | dict[str, str], optional): The prompts to add to the queries. If a string, will add the same prompt to all queries. If a dict, expects that all datasets in dataset_names are keys.
            corpus_prompts (str | dict[str, str], optional): The prompts to add to the corpus. If a string, will add the same prompt to all corpus. If a dict, expects that all datasets in dataset_names are keys.
        N	NanoBEIR__)mrr_at_k	ndcg_at_kaccuracy_at_kprecision_recall_at_kmap_at_kshow_progress_bar
batch_size	write_csvtruncate_dimscore_functionsmain_score_functionNanoBEIR_evaluation_z_results.csvepochsteps)super__init__listdataset_name_to_idkeysdataset_namesaggregate_fnaggregate_keyr;   query_promptscorpus_promptsr9   r=   sortedscore_function_namesr>   r<   namer4   r5   r6   r7   r8   _validate_dataset_names_validate_prompts_load_dataset
evaluatorscsv_filecsv_headers_append_csv_headers)selfrG   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   rH   rI   rJ   rK   ir_evaluator_kwargsrN   	__class__s                      e/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/evaluation/NanoBEIREvaluator.pyrC   zNanoBEIREvaluator.__init__   s   P 	  !3!8!8!:;M*(*"*,!2".Q`F40D0D0I0I0K+L$Mfh!#6 (/	II1T../00I "*%:" $$&  !"*%: !2$"(.#6
 X\WiWijt-4--dJ6IJj3M?,O#W-  !:!:; ks   Fc                @   |D ]  }| j                   D ]"  }| j                  j                  | d|        $ | j                  D ]B  }| j                  j                  | d|        | j                  j                  | d|        D | j                  D ]"  }| j                  j                  | d|        $ | j
                  D ]"  }| j                  j                  | d|        $ | j                  D ]"  }| j                  j                  | d|        $  y )Nz
-Accuracy@z-Precision@z-Recall@z-MRR@z-NDCG@z-MAP@)r6   rT   appendr7   r4   r5   r8   )rV   rM   
score_nameks       rY   rU   z%NanoBEIREvaluator._append_csv_headers  sB   . 	AJ'' F  '':,j(DEF // D  '':,k!(EF  '':,hqc(BCD ]] A  '':,eA3(?@A ^^ B  '':,fQC(@AB ]] A  '':,eA3(?@A	A    c                b   i }i }|dk7  r|dk(  rd| }	nd| d| d}	nd}	| j                   |	d| j                    dz  }	t        j                  d	| j                   d
|	 d       | j                  J|j
                  |j                  i| _        |j
                  g| _        | j                  | j                         t        | j                  d| j                         D ]  }
t        j                  d|
j                           |
||||      }|D ]j  }| j                   r|j                  dd      \  }}}n|j                  dd      \  }}||vrg ||<   ||   ||dz   |z   <   ||   j                  ||          l  i }|D ]  }| j                  ||         ||<    || j                   rt"        j$                  j'                  || j(                        }t"        j$                  j+                  |      sJt-        |dd      }|j/                  dj'                  | j0                               |j/                  d       nt-        |dd      }||g}| j                  D ]  }| j2                  D ]  }|j                  ||   d   |           | j4                  D ]6  }|j                  ||   d   |          |j                  ||   d   |          8 | j6                  D ]  }|j                  ||   d   |           | j8                  D ]  }|j                  ||   d   |           | j:                  D ]  }|j                  ||   d   |            |j/                  dj'                  t=        t>        |                   |j/                  d       |jA                          | jB                  s| jD                  ftG        | j                  D cg c]!  }||| dtG        | j8                            f# c}d  !      d"   }| dtG        | j8                         | _!        n3| jD                  jH                   dtG        | j8                         | _!        tK        jL                  | j                  D 
cg c]  }
tO        |
jP                         c}
      }tK        jL                  | j                  D 
cg c]  }
tO        |
jR                         c}
      }t        j                  d#|        t        j                  d$| d       | j                  D ]I  }t        j                  d%|        | j2                  D ]2  }t        j                  d&jU                  ||| d'|    d(z               4 | j4                  D ]b  }t        j                  d)jU                  ||| d*|    d(z               t        j                  d+jU                  ||| d,|    d(z               d | j6                  D ]/  }t        j                  d-jU                  ||| d.|                 1 | j8                  D ]/  }t        j                  d/jU                  ||| d|                 1 L | jW                  || j                        }| jY                  ||       |j[                  |       |S c c}w c c}
w c c}
w )0Nz after epoch z
 in epoch z after z steps z (truncated to )z$NanoBEIR Evaluation of the model on z dataset:zEvaluating datasets)descdisablezEvaluating r3      )maxsplitr+   wzutf-8)modeencoding,
az
accuracy@kzprecision@kzrecall@kzmrr@kzndcg@kzmap@kz_ndcg@c                    | d   S )Nr+    )xs    rY   <lambda>z,NanoBEIREvaluator.__call__.<locals>.<lambda>`  s
    !A$ r^   )keyr   zAverage Queries: zAverage Corpus: zAggregated for Score Function: zAccuracy@{}: {:.2f}%z
_accuracy@r.   zPrecision@{}: {:.2f}%z_precision@zRecall@{}: {:.2f}%z_recall@zMRR@{}: {:.4f}z_mrr@zNDCG@{}: {:.4f}).r<   loggerinforG   r=   similarity_fn_name
similarityrM   rU   r   rR   r9   rN   splitr[   rH   r;   ospathjoinrS   isfileopenwriterT   r6   r7   r4   r5   r8   mapstrcloseprimary_metricr>   maxvaluenpr0   lenqueriescorpusformatprefix_name_to_metrics store_metrics_in_model_card_dataupdate)rV   modeloutput_pathr@   rA   argskwargsper_metric_resultsper_dataset_resultsout_txt	evaluator
evaluationr]   datasetr3   metricagg_resultscsv_pathfOutoutput_datarN   score_functionavg_queries
avg_corpuss                           rY   __call__zNanoBEIREvaluator.__call__  s      B;{)%1&ugWUG6BG():):(;1==G:4;M;M:NhW^V__`ab'$)$<$<e>N>N#OD ).)A)A(BD%$$T%>%>?doo4IW[WmWmSmn 	AIKK+inn%567"5+ueDJ A$$)*q)A&GQ&'ggcAg&>OGV!3313&v.>Hm#GcMF$:;"6*11*Q-@A	A ( 	PF"&"3"34Fv4N"OK	P "t~~ww||K?H77>>(+H3A

388D$4$456

4  H3A %.K11 N++ SA&&':4'@'Nq'QRS 33 QA&&':4'@'OPQ'RS&&':4'@'LQ'OPQ  NA&&':4'@'I!'LMN  OA&&':4'@'J1'MNO  NA&&':4'@'I!'LMNN" JJsxxC 567JJtJJL""''/!$[_[t[tuSWdK4&s4>>7J6K(LMNu&" " *8(8s4>>?R>S&T#)-)A)A)G)G(HsSWSaSaObNc&d#ggtW)s9#4#45WXWWT__U	c)"2"23UV
'}56&zl"56-- 	ZDKK9$@A'' i299![D6Q[\][^I_=`cf=fghi // e3::1kTFR]^_]`Ja>beh>hij077;$xXYWZG[;\_b;bcde ]] X,33A{dV5QRPSCT7UVWX ^^ Z-44QtfFSTRUDV8WXYZ	Z  11+tyyI--e[A"";/""C v XUs   &Z"4Z'4Z,c                v    dt         |j                             }| j                  |d| j                   z  }|S )NNanor3   )dataset_name_to_human_readablelowerr<   )rV   dataset_namehuman_readable_names      rY   _get_human_readable_namez*NanoBEIREvaluator._get_human_readable_name  sJ     $%CLDVDVDX%Y$Z[(Qt'8'8&9#::""r^   c                   t               st        d      ddlm} t        |j                            } ||dd      } ||dd      } ||dd      }|D ci c]  }t        |d	         dkD  s|d
   |d	    }	}|D ci c]  }t        |d	         dkD  s|d
   |d	    }
}i }|D ]3  }|d   |vrt               ||d   <   ||d      j                  |d          5 | j                  | j                  j                  |d       |d<   | j                  | j                  j                  |d       |d<   | j                  |      }t        d|
|	||d|S c c}w c c}w )NzJdatasets is not available. Please install it to use the NanoBEIREvaluator.r   )load_datasetr   train)rw   r   qrelstext_idzquery-idz	corpus-idquery_promptcorpus_prompt)r   r   relevant_docsrN   ro   )r   
ValueErrordatasetsr   rE   r   r   setaddrJ   getrK   r   r
   )rV   r   rW   r   dataset_pathr   r   r   samplecorpus_dictqueries_dict
qrels_dictr   s                rY   rQ   zNanoBEIREvaluator._load_dataset  s   $&ijj)),*<*<*>?lHGD|YgF\7'BCIeSQWX^Q_M`cdMdve}fVn4eeDKg&sSYZ`SaObefOfuvf~5gg
 	DFj!314
6*-.vj)*..vk/BC	D
 )262D2D2H2HW[2\/*373F3F3J3J<Y]3^0";;LI, 
 $$	

 "
 	
 fgs   E/E EEc           	         | j                   D cg c]  }|j                         t        vs| c}x}r,t        d| dt	        t        j                                      y c c}w )NzDataset(s) z? not found in the NanoBEIR collection.Valid dataset names are: )rG   r   rE   r   rD   rF   )rV   r   missing_datasetss      rY   rO   z)NanoBEIREvaluator._validate_dataset_names  s{    -1-?-? 
)<CUCUCW_qCqL 
 
 
 ./ 0,,01C1H1H1J,K+LN 
  
s
   A!A!c                z   d}| j                   yt        | j                   t              r+| j                  D ci c]  }|| j                    c}| _         n4| j                  D cg c]  }|| j                   vs| c}x}r	|d| dz  }| j                  yt        | j                  t              r+| j                  D ci c]  }|| j                   c}| _        n4| j                  D cg c]  }|| j                  vs| c}x}r	|d| dz  }|rt        |j                               y c c}w c c}w c c}w c c}w )Nra   z2The following datasets are missing query prompts: rl   z3The following datasets are missing corpus prompts: )rJ   
isinstancer   rG   rK   r   strip)rV   	error_msgr   missing_query_promptsmissing_corpus_promptss        rY   rP   z#NanoBEIREvaluator._validate_prompts  sQ   	)$,,c2[_[m[m%n<lD4F4F&F%n"151C1C+!-|[_[m[mGm+ &  QRgQhhjkk	*$--s3]a]o]o&p\|T5H5H'H&p#151C1C,!-|[_[n[nGn, '  RSiRjjlmm	Y__.//  &o+ 'q,s#   D)"D.6D.<D3'D8;D8) rG   zlist[DatasetNameType] | Noner4   	list[int]r5   r   r6   r   r7   r   r8   r   r9   boolr:   intr;   r   r<   z
int | Noner=   z-dict[str, Callable[[Tensor, Tensor], Tensor]]r>   zstr | SimilarityFunction | NonerH   zCallable[[list[float]], float]rI   r   rJ   str | dict[str, str] | NonerK   r   )Nr`   r`   )
r   r	   r   r   r@   r   rA   r   returnzdict[str, float])r   DatasetNameTyper   r   )r   r   r   r
   )__name__
__module____qualname____doc__r   r0   rC   rU   r   r   rQ   rO   rP   __classcell__)rX   s   @rY   r)   r)   H   sQ   `H 7;!d "t#0+8"e"'#'IM?C79ww#596:#W<3W< W< 	W<
 !W<  )W< W<  W< W< W< !W< GW< =W< 5W< W<  3!W<" 4#W<rA& bdj#(j#7:j#JMj#[^j#	j#X#
<0r^   r)   )
__future__r   loggingrx   typingr   r   r   numpyr   torchr   r   sentence_transformersr	   >sentence_transformers.evaluation.InformationRetrievalEvaluatorr
   2sentence_transformers.evaluation.SentenceEvaluatorr   *sentence_transformers.similarity_functionsr   sentence_transformers.utilr   )sentence_transformers.SentenceTransformer	getLoggerr   rs   r   rE   r   r)   ro   r^   rY   <module>r      s    "  	 3 3    5 h P I <M			8	$$ 5*&,,*,
 8***0 " #
&" "z0) z0r^   