
    +sgn1                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	 d dl
Zd dlZd dlZd dlmZmZ d dlmZ d dlmZ erd dlmZ  ej.                  e      Z G d	 d
e      Zy)    )annotationsN)nullcontext)TYPE_CHECKINGCallable)average_precision_score
ndcg_score)SentenceEvaluator)cos_sim)SentenceTransformerc            	           e Zd ZdZdddedddddf		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZ	 d	 	 	 	 	 	 	 	 	 dd	Zd
 Zd Zd Z	 xZ
S )RerankingEvaluatora  
    This class evaluates a SentenceTransformer model for the task of re-ranking.

    Given a query and a list of documents, it computes the score [query, doc_i] for all possible
    documents and sorts them in decreasing order. Then, MRR@10, NDCG@10 and MAP is compute to measure the quality of the ranking.

    Args:
        samples (list): A list of dictionaries, where each dictionary represents a sample and has the following keys:
            - 'query': The search query.
            - 'positive': A list of positive (relevant) documents.
            - 'negative': A list of negative (irrelevant) documents.
        at_k (int, optional): Only consider the top k most similar documents to each query for the evaluation. Defaults to 10.
        name (str, optional): Name of the evaluator. Defaults to "".
        write_csv (bool, optional): Write results to CSV file. Defaults to True.
        similarity_fct (Callable[[torch.Tensor, torch.Tensor], torch.Tensor], optional): Similarity function between sentence embeddings. By default, cosine similarity. Defaults to cos_sim.
        batch_size (int, optional): Batch size to compute sentence embeddings. Defaults to 64.
        show_progress_bar (bool, optional): Show progress bar when computing embeddings. Defaults to False.
        use_batched_encoding (bool, optional): Whether or not to encode queries and documents in batches for greater speed, or 1-by-1 to save memory. Defaults to True.
        truncate_dim (Optional[int], optional): The dimension to truncate sentence embeddings to. `None` uses the model's current truncation dimension. Defaults to None.
        mrr_at_k (Optional[int], optional): Deprecated parameter. Please use `at_k` instead. Defaults to None.
    
    T@   FNc                   t         |           || _        || _        |
!t        j                  d|
 d       |
| _        n|| _        || _        || _        || _	        || _
        |	| _        t        | j                  t              r(t        | j                  j                               | _        | j                  D cg c](  }t!        |d         dkD  st!        |d         dkD  s'|* c}| _        d|rd|z   ndz   d	| j                   d
z   | _        dddd| j                   d| j                   g| _        || _        d| _        y c c}w )Nz?The `mrr_at_k` parameter has been deprecated; please use `at_k=z
` instead.positiver   negativer   _r   z
_results_@z.csvepochstepsMAPMRR@NDCG@map)super__init__samplesnameloggerwarningat_ksimilarity_fct
batch_sizeshow_progress_baruse_batched_encodingtruncate_dim
isinstancedictlistvalueslencsv_filecsv_headers	write_csvprimary_metric)selfr   r!   r   r.   r"   r#   r$   r%   r&   mrr_at_ksample	__class__s               f/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/evaluation/RerankingEvaluator.pyr   zRerankingEvaluator.__init__.   sO    		NN\]e\ffpqr DIDI,$!2$8!(dllD) 3 3 56DL "&
VJ5G1H11LQTU[\fUgQhklQlF
 -dd
KPZ[_[d[dZeeiNjj499+DII;
 ##
s   ;EE$Ec                $   |dk7  r|dk(  rd| }nd| d| d}nd}| j                   |d| j                    d	z  }t        j                  d
| j                   d| d       | j	                  |      }|d   }|d   }|d   }	| j
                  D 
cg c]  }
t        |
d          }}
| j
                  D 
cg c]  }
t        |
d          }}
t        j                  dt        | j
                         dt        j                  |      ddt        j                  |      ddt        j                  |      ddt        j                  |      ddt        j                  |      ddt        j                  |      d       t        j                  d|dz  d       t        j                  d| j                   d|dz  d       t        j                  d| j                   d|	dz  d       || j                  rt        j                  j                  || j                         }t        j                  j#                  |      }t%        |d|rdndd !      5 }t'        j(                  |      }|s|j+                  | j,                         |j+                  |||||	g       ddd       d|d"| j                   |d#| j                   |	i}| j/                  || j                        }| j1                  ||       |S c c}
w c c}
w # 1 sw Y   exY w)$a  
        Evaluates the model on the dataset and returns the evaluation metrics.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to evaluate.
            output_path (str, optional): The output path to write the results. Defaults to None.
            epoch (int, optional): The current epoch number. Defaults to -1.
            steps (int, optional): The current step number. Defaults to -1.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        z after epoch z
 in epoch z after z stepsr   Nz (truncated to )z0RerankingEvaluator: Evaluating the model on the z dataset:r   mrrndcgr   r   z	Queries: z 	 Positives: Min z.1fz, Mean z, Max z 	 Negatives: Min zMAP: d   z.2fr   z: r   awzutf-8)newlinemodeencodingzmrr@zndcg@)r&   r   infor   compute_metricesr   r+   npminmeanmaxr!   r.   ospathjoinr,   isfileopencsvwriterwriterowr-   prefix_name_to_metrics store_metrics_in_model_card_data)r0   modeloutput_pathr   r   out_txtscoresmean_apmean_mrr	mean_ndcgr2   num_positivesnum_negativescsv_pathoutput_file_existsfrM   metricss                     r4   __call__zRerankingEvaluator.__call__^   s    B;{)%1&ugWUG6BG():):(;1==GFtyykQYZaYbbcde&&u--%=6N	 @D||LVVJ/0LL?C||LVVJ/0LLDLL)**=bff]>STW=XX_`b`g`ghu`vwz_{  |B  CE  CI  CI  JW  CX  Y\  B]  ]p  qs  qw  qw  xE  qF  GJ  pK  KR  SU  SZ  SZ  [h  Si  jm  Rn  nt  uw  u{  u{  |I  uJ  KN  tO  P	
 	eGcM#./0d499+R3s';<=eDII;bS(=>? "t~~ww||K?H!#!9h8JPS^ef NjkA)OOD$4$45w) LMN 7499+DII;

 --gtyyA--eW=9 MLN Ns   K<5LA	LLc                ^    | j                   r| j                  |      S | j                  |      S )a  
        Computes the evaluation metrics for the given model.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        )r%   compute_metrices_batchedcompute_metrices_individual)r0   rQ   s     r4   rB   z#RerankingEvaluator.compute_metrices   s7     (( ))%0	
 11%8	
    c                $   g }g }g }| j                   
t               n|j                  | j                         5  |j                  | j                  D cg c]  }|d   	 c}d| j
                  | j                        }g }| j                  D ]*  }|j                  |d          |j                  |d          , |j                  |d| j
                  | j                        }ddd       d\  }	}
| j                  D ]=  }|	   }|	dz  }	t        |d         }t        |d         }|
|
|z   |z    }|
||z   z  }
|d	k(  s|d	k(  rH| j                  ||      }t        |j                        dkD  r|d	   }t        j                  |       }|j                         j                         }dg|z  d	g|z  z   }d	}t        |d	| j                          D ]  \  }}||   sd|dz   z  } n |j#                  |       |j#                  t%        |g|g| j                   
             |j#                  t'        ||             @ t)        j*                  |      }t)        j*                  |      }t)        j*                  |      }|||dS c c}w # 1 sw Y   xY w)aE  
        Computes the evaluation metrics in a batched way, by batching all queries and all documents together.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        NqueryTconvert_to_tensorr#   r$   r   r   )r   r      r   kr   r9   r:   )r&   r   truncate_sentence_embeddingsencoder   r#   r$   extendr+   r"   shapetorchargsortcputolist	enumerater!   appendr   r   rC   rE   )r0   rQ   all_mrr_scoresall_ndcg_scoresall_ap_scoresr2   all_query_embsall_docsall_docs_embs	query_idxdocs_idxinstance	query_embnum_posnum_negdocs_embpred_scorespred_scores_argsortis_relevant	mrr_scorerankindexrU   rV   rW   s                            r4   r`   z+RerankingEvaluator.compute_metrices_batched   s    "//7[]U=_=_`d`q`q=r 	"\\/3||<V<"&??"&"8"8	 * N H,, 4z 23z 234 "LLDT__`d`v`v ) M	& #	8  	TH&y1INI(:./G(:./G$X70BW0LMH'))H!|w!|--iBK;$$%))!n"'--"=%//+224K #-1#-7KI()<Q)KL eu% !TAXI !!), "":{mk]dii#XY   !8k!RSA 	TD ''-(77>*GGO,	xCCq =	 	s   JJ 
BJ JJc                   g }g }g }t        j                   | j                  | j                   d      D ]  }|d   }t        |d         }t        |d         }t	        |      dk(  st	        |      dk(  rB||z   }	dgt	        |      z  dgt	        |      z  z   }
| j
                  
t               n|j                  | j
                        5  |j                  |gd	| j                  d
      }|j                  |	d	| j                  d
      }ddd       | j                        }t	        |j                        dkD  r|d   }t        j                  |       }|j                         j                         }d}t!        |d| j"                         D ]  \  }}|
|   sd|dz   z  } n |j%                  |       |j%                  t'        |
g|g| j"                               |j%                  t)        |
|              t+        j,                  |      }t+        j,                  |      }t+        j,                  |      }|||dS # 1 sw Y   ;xY w)aO  
        Computes the evaluation metrics individually by embedding every (query, positive, negative) tuple individually.

        Args:
            model (SentenceTransformer): The SentenceTransformer model to compute metrics for.

        Returns:
            Dict[str, float]: A dictionary containing the evaluation metrics.
        Samples)disabledescrd   r   r   r   rg   NTFre   rh   rj   )tqdmr   r$   r)   r+   r&   r   rk   rl   r#   r"   rn   ro   rp   rq   rr   rs   r!   rt   r   r   rC   rE   )r0   rQ   ru   rv   rw   r}   rd   r   r   docsr   r~   r   r   r   r   r   r   rU   rV   rW   s                        r4   ra   z.RerankingEvaluator.compute_metrices_individual   sH    		$,,D<R<R8RYbc &	THW%EHZ01HHZ01H8}!S]a%7h&D#H-c(m0CCK"&"3"3";AcAcdhduduAv !LLGtch ) 	 !<<DT__`e ( 	 --iBK;$$%))!n"'--"=%//+224K I()<Q)KL eu% !TAXI !!), "":{mk]dii#XY   !8k!RSM&	TP ''-(77>*GGO,	xCCC s   A II	)r!   intr   strr.   boolr"   z4Callable[[torch.Tensor, torch.Tensor], torch.Tensor]r#   r   r$   r   r%   r   r&   
int | Noner1   r   )Nr6   r6   )
rQ   r   rR   r   r   r   r   r   returnzdict[str, float])__name__
__module____qualname____doc__r
   r   r^   rB   r`   ra   __classcell__)r3   s   @r4   r   r      s    2 OV"'%)#'#.$ .$ 	.$
 .$ M.$ .$  .$ #.$ !.$ .$b bd=(=7:=JM=[^=	=~
 HDT:Drb   r   )
__future__r   rL   loggingrG   
contextlibr   typingr   r   numpyrC   ro   r   sklearn.metricsr   r   2sentence_transformers.evaluation.SentenceEvaluatorr	   sentence_transformers.utilr
   )sentence_transformers.SentenceTransformerr   	getLoggerr   r   r    rb   r4   <module>r      sR    " 
  	 " *    ? P .M			8	$ZD* ZDrb   