
    +sgX                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	 d dl
Zd dlZd dlmZ d dlmZ d dlmZ d dlmZ erd d	lmZ  ej,                  e      Z G d
 de      Zy)    )annotationsN)nullcontext)TYPE_CHECKINGCallable)Tensor)trange)SentenceEvaluator)SimilarityFunction)SentenceTransformerc                       e Zd ZdZddgdgg dg ddgdddd	d
d
d
d
d
d
d
f	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZd Z	 d	 	 	 	 	 	 	 	 	 ddZ	 d	 	 	 	 	 ddZddZd Z	e
d        Z xZS )InformationRetrievalEvaluatora  
    This class evaluates an Information Retrieval (IR) setting.

    Given a set of queries and a large corpus set. It will retrieve for each query the top-k most similar document. It measures
    Mean Reciprocal Rank (MRR), Recall@k, and Normalized Discounted Cumulative Gain (NDCG)

    Example:
        ::

            import random
            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import InformationRetrievalEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('all-MiniLM-L6-v2')

            # Load the Touche-2020 IR dataset (https://huggingface.co/datasets/BeIR/webis-touche2020, https://huggingface.co/datasets/BeIR/webis-touche2020-qrels)
            corpus = load_dataset("BeIR/webis-touche2020", "corpus", split="corpus")
            queries = load_dataset("BeIR/webis-touche2020", "queries", split="queries")
            relevant_docs_data = load_dataset("BeIR/webis-touche2020-qrels", split="test")

            # For this dataset, we want to concatenate the title and texts for the corpus
            corpus = corpus.map(lambda x: {'text': x['title'] + " " + x['text']}, remove_columns=['title'])

            # Shrink the corpus size heavily to only the relevant documents + 30,000 random documents
            required_corpus_ids = set(map(str, relevant_docs_data["corpus-id"]))
            required_corpus_ids |= set(random.sample(corpus["_id"], k=30_000))
            corpus = corpus.filter(lambda x: x["_id"] in required_corpus_ids)

            # Convert the datasets to dictionaries
            corpus = dict(zip(corpus["_id"], corpus["text"]))  # Our corpus (cid => document)
            queries = dict(zip(queries["_id"], queries["text"]))  # Our queries (qid => question)
            relevant_docs = {}  # Query ID to relevant documents (qid => set([relevant_cids])
            for qid, corpus_ids in zip(relevant_docs_data["query-id"], relevant_docs_data["corpus-id"]):
                qid = str(qid)
                corpus_ids = str(corpus_ids)
                if qid not in relevant_docs:
                    relevant_docs[qid] = set()
                relevant_docs[qid].add(corpus_ids)

            # Given queries, a corpus and a mapping with relevant documents, the InformationRetrievalEvaluator computes different IR metrics.
            ir_evaluator = InformationRetrievalEvaluator(
                queries=queries,
                corpus=corpus,
                relevant_docs=relevant_docs,
                name="BeIR-touche2020-subset-test",
            )
            results = ir_evaluator(model)
            '''
            Information Retrieval Evaluation of the model on the BeIR-touche2020-test dataset:
            Queries: 49
            Corpus: 31923

            Score-Function: cosine
            Accuracy@1: 77.55%
            Accuracy@3: 93.88%
            Accuracy@5: 97.96%
            Accuracy@10: 100.00%
            Precision@1: 77.55%
            Precision@3: 72.11%
            Precision@5: 71.43%
            Precision@10: 62.65%
            Recall@1: 1.72%
            Recall@3: 4.78%
            Recall@5: 7.90%
            Recall@10: 13.86%
            MRR@10: 0.8580
            NDCG@10: 0.6606
            MAP@100: 0.2934
            '''
            print(ir_evaluator.primary_metric)
            # => "BeIR-touche2020-test_cosine_map@100"
            print(results[ir_evaluator.primary_metric])
            # => 0.29335196224364596
    iP  
   )         r   d   F     TNc                j   t         |           g | _        |D ]4  }||v st        ||         dkD  s| j                  j	                  |       6 | j                  D cg c]  }||   	 c}| _        t        |j                               | _        | j                  D cg c]  }||   	 c}| _	        || _
        || _        || _        || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _        || _        || _        || _        || _        |r,t5        t        | j2                  j                                     ng | _        |rt9        |      nd| _        || _        |rd|z   }d|z   dz   | _        ddg| _         | jC                  | j6                         yc c}w c c}w )aS	  
        Initializes the InformationRetrievalEvaluator.

        Args:
            queries (Dict[str, str]): A dictionary mapping query IDs to queries.
            corpus (Dict[str, str]): A dictionary mapping document IDs to documents.
            relevant_docs (Dict[str, Set[str]]): A dictionary mapping query IDs to a set of relevant document IDs.
            corpus_chunk_size (int): The size of each chunk of the corpus. Defaults to 50000.
            mrr_at_k (List[int]): A list of integers representing the values of k for MRR calculation. Defaults to [10].
            ndcg_at_k (List[int]): A list of integers representing the values of k for NDCG calculation. Defaults to [10].
            accuracy_at_k (List[int]): A list of integers representing the values of k for accuracy calculation. Defaults to [1, 3, 5, 10].
            precision_recall_at_k (List[int]): A list of integers representing the values of k for precision and recall calculation. Defaults to [1, 3, 5, 10].
            map_at_k (List[int]): A list of integers representing the values of k for MAP calculation. Defaults to [100].
            show_progress_bar (bool): Whether to show a progress bar during evaluation. Defaults to False.
            batch_size (int): The batch size for evaluation. Defaults to 32.
            name (str): A name for the evaluation. Defaults to "".
            write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
            truncate_dim (int, optional): The dimension to truncate the embeddings to. Defaults to None.
            score_functions (Dict[str, Callable[[Tensor, Tensor], Tensor]]): A dictionary mapping score function names to score functions. Defaults to the ``similarity`` function from the ``model``.
            main_score_function (Union[str, SimilarityFunction], optional): The main score function to use for evaluation. Defaults to None.
            query_prompt (str, optional): The prompt to be used when encoding the corpus. Defaults to None.
            query_prompt_name (str, optional): The name of the prompt to be used when encoding the corpus. Defaults to None.
            corpus_prompt (str, optional): The prompt to be used when encoding the corpus. Defaults to None.
            corpus_prompt_name (str, optional): The name of the prompt to be used when encoding the corpus. Defaults to None.
        r   N_z Information-Retrieval_evaluationz_results.csvepochsteps)"super__init__queries_idslenappendquerieslistkeys
corpus_idscorpusquery_promptquery_prompt_namecorpus_promptcorpus_prompt_namerelevant_docscorpus_chunk_sizemrr_at_k	ndcg_at_kaccuracy_at_kprecision_recall_at_kmap_at_kshow_progress_bar
batch_sizename	write_csvscore_functionssortedscore_function_namesr
   main_score_functiontruncate_dimcsv_filecsv_headers_append_csv_headers)selfr   r"   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r6   r2   r5   r#   r$   r%   r&   qidcid	__class__s                          q/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/evaluation/InformationRetrievalEvaluator.pyr   z&InformationRetrievalEvaluator.__init__e   s   ` 	 	-Cm#M#,>(?!(C  '',	- 150@0@AAv{{}-.2oo>svc{>(!2*"4*!2 "*%:" !2$	".Q`F40D0D0I0I0K+L$Mfh!Na#56I#Jgk (:D?$FW#W-  !:!:;C B ?s   F+F0c                @   |D ]  }| j                   D ]"  }| j                  j                  | d|        $ | j                  D ]B  }| j                  j                  | d|        | j                  j                  | d|        D | j                  D ]"  }| j                  j                  | d|        $ | j
                  D ]"  }| j                  j                  | d|        $ | j                  D ]"  }| j                  j                  | d|        $  y )Nz
-Accuracy@z-Precision@z-Recall@z-MRR@z-NDCG@z-MAP@)r+   r8   r   r,   r)   r*   r-   )r:   r4   
score_nameks       r>   r9   z1InformationRetrievalEvaluator._append_csv_headers   sB   . 	AJ'' F  '':,j(DEF // D  '':,k!(EF  '':,hqc(BCD ]] A  '':,eA3(?@A ^^ B  '':,fQC(@AB ]] A  '':,eA3(?@A	A    c                   |dk7  r|dk(  rd| }nd| d| d}nd}| j                   |d| j                    dz  }t        j                  d	| j                   d
| d       | j                  J|j
                  |j                  i| _        |j
                  g| _        | j                  | j                          | j                  |g|i |}|| j                  rt        j                  j                  || j                        }	t        j                  j                  |	      sJt!        |	dd      }
|
j#                  dj                  | j$                               |
j#                  d       nt!        |	dd      }
||g}| j                  D ]  }| j&                  D ]  }|j)                  ||   d   |           | j*                  D ]6  }|j)                  ||   d   |          |j)                  ||   d   |          8 | j,                  D ]  }|j)                  ||   d   |           | j.                  D ]  }|j)                  ||   d   |           | j0                  D ]  }|j)                  ||   d   |            |
j#                  dj                  t3        t4        |                   |
j#                  d       |
j7                          | j8                  s| j:                  gt=        | j                  D cg c]"  }|||   d   t=        | j.                           f$ c}d       d   }| dt=        | j.                         | _        n3| j:                  j>                   dt=        | j.                         | _        |jA                         D ci c]Y  \  }}|jA                         D ]A  \  }}|jA                         D ])  \  }}| d|jC                  ddt5        |      z          |+ C [ }}}}}}}| jE                  || j                        }| jG                  ||       |S c c}w c c}}}}}}w )Nz after epoch z
 in epoch z after z stepsr   z (truncated to )z5Information Retrieval Evaluation of the model on the z dataset:wzutf-8)modeencoding,
a
accuracy@kprecision@krecall@kmrr@kndcg@kmap@kc                    | d   S )Nr    xs    r>   <lambda>z8InformationRetrievalEvaluator.__call__.<locals>.<lambda>  s
    !A$ rB   )keyr   z_ndcg@r   z@k@)$r6   loggerinfor0   r2   similarity_fn_name
similarityr4   r9   compute_metricesr1   ospathjoinr7   isfileopenwriter8   r+   r   r,   r)   r*   r-   mapstrcloseprimary_metricr5   maxvalueitemsreplaceprefix_name_to_metrics store_metrics_in_model_card_data)r:   modeloutput_pathr   r   argskwargsout_txtscorescsv_pathfOutoutput_datar0   rA   score_functionvalues_dictmetric_namevaluesrj   metricss                       r>   __call__z&InformationRetrievalEvaluator.__call__   sI    B;{)%1&ugWUG6BG():):(;1==GKDII;V^_f^gghij'$)$<$<e>N>N#OD ).)A)A(BD%$$T%>%>?&&&u>t>v> "t~~ww||K?H77>>(+H3A

388D$4$456

4  H3A %.K11 A++ FA&&vd|L'A!'DEF 33 DA&&vd|M'B1'EF&&vd|J'?'BCD  AA&&vd|G'<Q'?@A  BA&&vd|H'=a'@AB  AA&&vd|G'<Q'?@AA" JJsxxC 567JJtJJL""''/!$UYUnUnoTdF4L23t~~3FGHo&" " *8(8s4>>?R>S&T#)-)A)A)G)G(HsSWSaSaObNc&d# 06||~
 
+'2'8'8':
 $V"LLN	
 5 a 3 3D#A, GHI5P
I
I
 
 --gtyyA--eW= p
s   ?'P2AP7c                &	   ||}t        t        | j                        t        | j                        t        | j                        t        | j                        t        | j
                              }| j                  
t               n|j                  | j                        5  |j                  | j                  | j                  | j                  | j                  | j                  d      }d d d        i }| j                  D ]'  }t!        t#                    D cg c]  }g  c}||<   ) t%        dt#        | j&                        | j(                  d| j                         D ]  }	t+        |	| j(                  z   t#        | j&                              }
|}| j                  
t               n|j                  | j                        5  |j                  | j&                  |	|
 | j,                  | j.                  | j                  dd      }d d d        n||	|
 }| j                  j1                         D ]  \  }} |      }t3        j4                  |t+        |t#        |d               ddd      \  }}|j7                         j9                         }|j7                         j9                         }t!        t#        |            D ]  }t;        ||   ||         D ]h  \  }}| j<                  |	|z      }t#        ||   |         |k  rt?        j@                  ||   |   ||f       Kt?        jB                  ||   |   ||f       j    |D ]Y  }t!        t#        ||               D ]=  }t!        t#        ||   |               D ]  }||   |   |   \  }}||d	||   |   |<     ? [ tD        jG                  d
t#        | j                                tD        jG                  dt#        | j&                         d       | j                  D ci c]  }|| jI                  ||          }}| jJ                  D ].  }tD        jG                  d|        | jM                  ||          0 |S # 1 sw Y   xY wc c}w # 1 sw Y   qxY wc c}w )NT)prompt_namepromptr/   r.   convert_to_tensorr   zCorpus Chunks)descdisableFr   )dimlargestr3   )	corpus_idscorez	Queries: zCorpus: rK   zScore-Function: )'ri   r)   r*   r+   r,   r-   r6   r   truncate_sentence_embeddingsencoder   r$   r#   r/   r.   r2   ranger   r   r"   r(   minr&   r%   rk   torchtopkcputolistzipr!   heapqheappushheappushpoprZ   r[   compute_metricsr4   output_scores)r:   ro   corpus_modelcorpus_embeddingsmax_kquery_embeddingsqueries_result_listr0   r   corpus_start_idxcorpus_end_idxsub_corpus_embeddingsrx   pair_scorespair_scores_top_k_valuespair_scores_top_k_idx	query_itrsub_corpus_idr   r   doc_itrrt   s                         r>   r^   z.InformationRetrievalEvaluator.compute_metrices  s     L""#**+
 #//7[]U=_=_`d`q`q=r 	$|| 22((??"&"8"8"&  ,  	 !(( 	SD5:3?O;P5Q(R(R%	S !'s4;;!7!7o[_[q[qWq!
 0	h !!1D4J4J!JCPTP[P[L\]N !( ((0  M%BB4CTCTU
 -9,?,?$4^D$($;$;#11#'??*/*. -@ -)  )::J>(Z% )-(<(<(B(B(D h$n,-=?TU CH**UCA,?!@aQU^cC?(*? ,D+G+G+I+P+P+R((=(A(A(C(J(J(L%!&s+;'<!= hI03-i8:RS\:]1 h,u %)OO4D}4T$U	 248CDuL!NN+>t+DY+ORWYbQcd!--.A$.G	.RUZ\eTfghhh10	hd ( 	mD"3':4'@#AB m	$S)<T)B9)M%NO mG':4'@'KG'T$E9R[fkDl'-i8Amm	m 	iDLL 1234hs4;;/034 UYThThiD$,,-@-FGGii -- 	-DKK*4&12vd|,	- e	 	 )S j js&   $A
Q/	Q<	ARR/Q9R	c           	     D   | j                   D ci c]  }|d }}| j                  D ci c]  }|g  }}| j                  D ci c]  }|g  }}| j                  D ci c]  }|d }}| j                  D ci c]  }|g  }}| j                  D ci c]  }|g  }}t        t        |            D ]  }	| j                  |	   }
t        ||	   d d      }| j                  |
   }| j                   D ]"  }|d| D ]  }|d   |v s||xx   dz  cc<    " $ | j                  D ]R  }d}|d| D ]  }|d   |v s|dz  } ||   j                  ||z         ||   j                  |t        |      z         T | j                  D ]4  }t        |d|       D ]!  \  }}|d   |v s||xx   d|dz   z  z  cc<    4 6 | j                  D ]e  }|d| D cg c]  }|d   |v rdnd }}dgt        |      z  }| j                  ||      | j                  ||      z  }||   j                  |       g | j                  D ]`  }d}d}t        |d|       D ]  \  }}|d   |v s|dz  }|||dz   z  z  } |t        |t        |            z  }||   j                  |       b  |D ]"  }||xx   t        | j                        z  cc<   $ |D ]  }t        j                   ||         ||<    |D ]  }t        j                   ||         ||<    |D ]  }t        j                   ||         ||<    |D ]"  }||xx   t        | j                        z  cc<   $ |D ]  }t        j                   ||         ||<    ||||||dS c c}w c c}w c c}w c c}w c c}w c c}w c c}w )	Nr   c                    | d   S )Nr   rT   rU   s    r>   rW   z?InformationRetrievalEvaluator.compute_metrics.<locals>.<lambda>  s
    AgJ rB   T)rX   reverser   r   g      ?)rM   rN   rO   rQ   rP   rR   )r+   r,   r)   r*   r-   r   r   r   r3   r'   r   	enumeratecompute_dcg_at_kr   r   npmean)r:   r   rA   num_hits_at_kprecisions_at_krecall_at_kMRRndcg	AveP_at_kr   query_idtop_hitsquery_relevant_docsk_valhitnum_correctranktop_hitpredicted_relevancetrue_relevances
ndcg_valuesum_precisionsavg_precisions                          r>   r   z-InformationRetrievalEvaluator.compute_metrics~  s   '+'9'9:!A::*.*D*DEQ1b5EE&*&@&@Aq"uAA!]]+q!t++#~~.!2..$(MM2qQU2	2 s#678 6	7I''	2H 1)<BV`deH"&"4"4X"> ++ #Ae, C;'+>>%e,1, 33 R#Ae, )C;'+>>#q()  &--kE.ABE"))+<O8P*PQR  !*8Ae+<!= ID#;'+>>E
cTAX&66
  	/[cdefk[l'PW-1DDA!K'# ' $%#,?(@"@!223FNQUQfQf#UR 
 U"":.	/  
7!"!*8Ae+<!= CID#;'+>>#q(&+*BBC
 !/UC@S<T1U U% ''6
7Y6	7r  	2A!DLL 11	2 ! 	=A!#);!<OA	=  	5AWW[^4KN	5  	'Aggd1g&DG	'  	(AFc$,,''F	(  	1A779Q<0IaL	1 (*#
 	
g ;EA+.2H's(   
M?
N
N	
N7
N
NNc                   |d   D ]0  }t         j                  dj                  ||d   |   dz               2 |d   D ]0  }t         j                  dj                  ||d   |   dz               2 |d   D ]0  }t         j                  dj                  ||d   |   dz               2 |d   D ]-  }t         j                  d	j                  ||d   |                / |d
   D ]-  }t         j                  dj                  ||d
   |                / |d   D ]-  }t         j                  dj                  ||d   |                / y )NrM   zAccuracy@{}: {:.2f}%r   rN   zPrecision@{}: {:.2f}%rO   zRecall@{}: {:.2f}%rP   zMRR@{}: {:.4f}rQ   zNDCG@{}: {:.4f}rR   zMAP@{}: {:.4f})rZ   r[   format)r:   rt   rA   s      r>   r   z+InformationRetrievalEvaluator.output_scores  sv   % 	YAKK.55a9Ma9PSV9VWX	Y & 	[AKK/66q&:OPQ:RUX:XYZ	[ 
# 	UAKK,33Avj7I!7Ls7RST	U  	HAKK(//6'?13EFG	H ! 	JAKK)00F84DQ4GHI	J  	HAKK(//6'?13EFG	HrB   c                    d}t        t        t        |       |            D ]#  }|| |   t        j                  |dz         z  z  }% |S )Nr      )r   r   r   r   log2)
relevancesrA   dcgis       r>   r   z.InformationRetrievalEvaluator.compute_dcg_at_k  sJ    s3z?A./ 	2A:a=2771q5>11C	2
rB   )*r   dict[str, str]r"   r   r'   zdict[str, set[str]]r(   intr)   	list[int]r*   r   r+   r   r,   r   r-   r   r.   boolr/   r   r0   rf   r1   r   r6   z
int | Noner2   z4dict[str, Callable[[Tensor, Tensor], Tensor]] | Noner5   zstr | SimilarityFunction | Noner#   
str | Noner$   r   r%   r   r&   r   returnNone)NrD   rD   )
ro   r   rp   rf   r   r   r   r   r   dict[str, float])NN)ro   r   r   zTensor | Noner   r   )r   zlist[object])__name__
__module____qualname____doc__r   r9   r}   r^   r   r   staticmethodr   __classcell__)r=   s   @r>   r   r      s   Kd "'!d "t#0+8"e"'#'PT?C#'(,$()-+W<W< W< +	W<
 W< W< W< !W<  )W< W<  W< W< W< W< !W<  N!W<" =#W<$ !%W<& &'W<( ")W<* '+W<, 
-W<rA& bdI(I7:IJMI[^I	IX aea(aP]a	aF\
|H&  rB   r   )
__future__r   r   loggingr_   
contextlibr   typingr   r   numpyr   r   r   tqdmr   2sentence_transformers.evaluation.SentenceEvaluatorr	   *sentence_transformers.similarity_functionsr
   )sentence_transformers.SentenceTransformerr   	getLoggerr   rZ   r   rT   rB   r>   <module>r      sP    "   	 " *     P IM			8	$]$5 ]rB   