
    sgt                         d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	  e	j                  e      Zd Zd Zd Zd	 Zd
 Zd ZddZd Zd Zd Zd Zd ZddZddZd Zd Zd Zd Zy)a  
Very heavily inspired by the official evaluation script for SQuAD version 2.0 which was modified by XLNet authors to
update `find_best_threshold` scripts for SQuAD V2.0

In addition to basic functionality, we also compute additional statistics and plot precision-recall curves if an
additional na_prob.json file is provided. This file is expected to map question ID's to the model's predicted
probability that a question is unanswerable.
    N   )BasicTokenizer)loggingc           	      N    d }d }d }d } | | | ||                         S )zALower text and remove punctuation, articles and extra whitespace.c                 x    t        j                  dt         j                        }t        j                  |d|       S )Nz\b(a|an|the)\b )recompileUNICODEsub)textregexs     Z/var/www/html/venv/lib/python3.12/site-packages/transformers/data/metrics/squad_metrics.pyremove_articlesz)normalize_answer.<locals>.remove_articles'   s*    

,bjj9vveS$''    c                 @    dj                  | j                               S )Nr   )joinsplitr   s    r   white_space_fixz)normalize_answer.<locals>.white_space_fix+   s    xx

%%r   c                 j    t        t        j                        dj                  fd| D              S )N c              3   ,   K   | ]  }|vs|  y wN ).0chexcludes     r   	<genexpr>z8normalize_answer.<locals>.remove_punc.<locals>.<genexpr>0   s     >bBg,=r>s   	)setstringpunctuationr   )r   r   s    @r   remove_puncz%normalize_answer.<locals>.remove_punc.   s(    f(()ww>D>>>r   c                 "    | j                         S r   )lowerr   s    r   r%   znormalize_answer.<locals>.lower2   s    zz|r   r   )sr   r   r#   r%   s        r   normalize_answerr'   $   s1    (&? ?;uQx+@ABBr   c                 <    | sg S t        |       j                         S r   )r'   r   )r&   s    r   
get_tokensr)   8   s    	A$$&&r   c                 B    t        t        |       t        |      k(        S r   )intr'   )a_golda_preds     r   compute_exactr.   >   s    '+;F+CCDDr   c                 z   t        |       }t        |      }t        j                  |      t        j                  |      z  }t        |j	                               }t        |      dk(  st        |      dk(  rt        ||k(        S |dk(  ryd|z  t        |      z  }d|z  t        |      z  }d|z  |z  ||z   z  }|S )Nr         ?   )r)   collectionsCountersumvalueslenr+   )	r,   r-   	gold_toks	pred_tokscommonnum_same	precisionrecallf1s	            r   
compute_f1r>   B   s    6"I6"I  +k.A.A).LLF6==?#H
9~c)n19	)**1}hY/I8^c)n,F
i-&
 Y%7	8BIr   c                 0   i }i }| D ]  }|j                   }|j                  D cg c]  }t        |d         s|d    }}|sdg}||vrt        d|        R||   t	        fd|D              ||<   t	        fd|D              ||<    ||fS c c}w )zV
    Computes the exact and f1 scores from the examples and the model predictions
    r   r   zMissing prediction for c              3   6   K   | ]  }t        |        y wr   )r.   r   a
predictions     r   r   z!get_raw_scores.<locals>.<genexpr>f   s     "VA=J#?"V   c              3   6   K   | ]  }t        |        y wr   )r>   rA   s     r   r   z!get_raw_scores.<locals>.<genexpr>g   s     Pa
1j 9PrD   )qas_idanswersr'   printmax)	examplespredsexact_scores	f1_scoresexamplerF   answergold_answersrC   s	           @r   get_raw_scoresrQ   R   s     LI Q5<__i6HXY_`fYgHhvii4L+F8456]
""V"VVVP<PP	&Q  "" js
   BBc                 ~    i }| j                         D ]'  \  }}||   |kD  }|rt        ||          ||<   #|||<   ) |S r   )itemsfloat)scoresna_probsqid_to_has_ansna_prob_thresh
new_scoresqidr&   pred_nas           r   apply_no_ans_thresholdr\   l   sX    J,,.  Q3-.0#s(;$;<JsOJsO  r   c           	          |sct               }t        j                  ddt         j	                               z  |z  fddt        j	                               z  |z  fd|fg      S t        |      }t        j                  ddt         fd|D              z  |z  fddt        fd|D              z  |z  fd|fg      S )Nexact      Y@r=   totalc              3   (   K   | ]	  }|     y wr   r   )r   krL   s     r   r   z!make_eval_dict.<locals>.<genexpr>   s     %H!l1o%H   c              3   (   K   | ]	  }|     y wr   r   )r   rb   rM   s     r   r   z!make_eval_dict.<locals>.<genexpr>   s     "BA9Q<"Brc   )r6   r2   OrderedDictr4   r5   )rL   rM   qid_listr`   s   ``  r   make_eval_dictrg   w   s    L!&&%#l&9&9&;"<<uDEus9#3#3#566>?% 
 	
 H&&%#%Hx%H"HH5PQus"B"BBBUJK% 
 	
r   c                 ,    |D ]  }||   | | d| <    y )N_r   )	main_evalnew_evalprefixrb   s       r   
merge_evalrm      s)     1%-a[	VHAaS/"1r   c                 T   t        fdD              }|}|}d}t        fd      }t        |      D ]1  \  }	}
|
|vr|
   r||
   }n
| |
   rd}nd}||z  }||kD  s+|}|
   }3 d\  }}|D ]  }
|
   s	|dz  }|
|vr|||
   z  } d	|z  t        |      z  |d
|z  |z  fS )Nc              3   .   K   | ]  }|   r	d   yw   Nr   r   rb   rW   s     r   r   z&find_best_thresh_v2.<locals>.<genexpr>        H1nQ6GQH   
        c                     |    S r   r   rb   rV   s    r   <lambda>z%find_best_thresh_v2.<locals>.<lambda>       hqk r   keyr   )r   r   rq   r_   r0   r4   sorted	enumerater6   )rK   rU   rV   rW   
num_no_ans	cur_score
best_scorebest_threshrf   irZ   diffhas_ans_scorehas_ans_cnts     ``          r   find_best_thresh_v2r      s   HHHJIJKh$9:HH% (3f##;DSzT	z!"J"3-K( "&M; %c"qf$% :F+[#:MP[:[[[r   c                     t        ||||      \  }}}t        ||||      \  }	}
}|| d<   || d<   |	| d<   |
| d<   || d<   || d<   y )N
best_exactbest_exact_threshbest_f1best_f1_threshhas_ans_exact
has_ans_f1)r   )rj   rK   	exact_rawf1_rawrV   rW   r   exact_threshr   r   	f1_threshr   s               r   find_all_best_thresh_v2r      sq    .A%T\^l.m+Jm%8R`%a"GY
(Il%1I!""Ii"+I!.Io(Ilr   c                     t        fdD              }|}|}d}t        fd      }t        |      D ]1  \  }	}
|
|vr|
   r||
   }n
| |
   rd}nd}||z  }||kD  s+|}|
   }3 d|z  t        |      z  |fS )Nc              3   .   K   | ]  }|   r	d   ywrp   r   rr   s     r   r   z#find_best_thresh.<locals>.<genexpr>   rs   rt   ru   c                     |    S r   r   rw   s    r   rx   z"find_best_thresh.<locals>.<lambda>   ry   r   rz   r|   r   r_   r}   )rK   rU   rV   rW   r   r   r   r   rf   ri   rZ   r   s     ``        r   find_best_threshr      s    HHHJIJKh$9:HH% (3f##;DSzT	z!"J"3-K( :F+[88r   c                 p    t        ||||      \  }}t        ||||      \  }}	|| d<   || d<   || d<   |	| d<   y )Nr   r   r   r   )r   )
rj   rK   r   r   rV   rW   r   r   r   r   s
             r   find_all_best_threshr      sV    /y(N[J)%>RGY(Il%1I!""Ii"+Ir   c                 J   | D ci c]"  }|j                   t        |j                        $ }}|j                         D cg c]
  \  }}|s	| }}}|j                         D cg c]
  \  }}|r	| }	}}||D 
ci c]  }
|
d }}
t	        | |      \  }}t        ||||      }t        ||||      }t        ||      }|rt        |||      }t        ||d       |	rt        |||	      }t        ||d       |rt        ||||||       |S c c}w c c}}w c c}}w c c}
w )Nru   )rf   HasAnsNoAns)	rF   boolrG   rS   rQ   r\   rg   rm   r   )rJ   rK   no_answer_probsno_answer_probability_thresholdrN   qas_id_to_has_answerrF   
has_answerhas_answer_qidsno_answer_qidsrb   r^   r=   exact_thresholdf1_threshold
evaluationhas_ans_evalno_ans_evals                     r   squad_evaluater      s9   QYZgGNND,AAZZ8L8R8R8Tc"4&*XbvcOc7K7Q7Q7Sf!3[effNf+01a1c611x/IE2, 46UO *"o?SUtuL>J%o|o^:|X6$_l^\:{G4Zr?L`a7 [cf 2s#   'D
DD&
D1D>
D c                    d }t        |      }dj                  |j                  |            }|j                  |       }|dk(  r |rt        j                  d|  d| d       |S |t        |       z   dz
  } ||      \  }	}
 ||      \  }}t        |	      t        |      k7  r |rt        j                  d	|	 d
| d       |S i }|j                         D ]
  \  }}|||<    d}||v r||   }||
v r|
|   }||rt        j                  d       |S d}||v r||   }||
v r|
|   }||rt        j                  d       |S |||dz    }|S )z;Project the tokenized prediction back to the original text.c                     g }t        j                         }t        |       D ]*  \  }}|dk(  r||t        |      <   |j	                  |       , dj                  |      }||fS )Nr   r   )r2   re   r   r6   appendr   )r   ns_charsns_to_s_mapr   cns_texts         r   _strip_spacesz%get_final_text.<locals>._strip_spaces  sk    !--/dO 	DAqCx)*KH&OOA		
 ''(#%%r   )do_lower_caser   r|   zUnable to find text: 'z' in ''rq   z*Length not equal after stripping spaces: 'z' vs 'NzCouldn't map start positionzCouldn't map end position)r   r   tokenizefindloggerinfor6   rS   )	pred_text	orig_textr   verbose_loggingr   	tokenizertok_textstart_positionend_positionorig_ns_textorig_ns_to_s_maptok_ns_texttok_ns_to_s_maptok_s_to_ns_mapr   	tok_indexorig_start_positionns_start_positionorig_end_positionns_end_positionoutput_texts                        r   get_final_textr      s   8	& ];Ixx	**956H]]9-NKK06)ANO!C	N2Q6L'4Y'?$\#%28%<"[/
<C,,KKD\NRXYdXeefgh O'--/ '9%&	"' (+N; 00"23D"E"KK56&),7.. 0 A KK34/3Dq3HJKr   c                     t        t        |       d d      }g }t        t        |            D ]!  }||k\  r |S |j	                  ||   d          # |S )z"Get the n-best logits from a list.c                     | d   S )Nrq   r   xs    r   rx   z#_get_best_indexes.<locals>.<lambda>^  s
    ad r   Tr{   reverser   )r~   r   ranger6   r   )logitsn_best_sizeindex_and_scorebest_indexesr   s        r   _get_best_indexesr   \  sf    Yv.NDQOL3'( 3 	OA.q123 r   c                     | sg S d}| D ]  }|||kD  s|} g }d}| D ]0  }t        j                  ||z
        }|j                  |       ||z  }2 g }|D ]  }|j                  ||z          |S )z,Compute softmax probability over raw logits.Nru   )mathexpr   )rU   	max_scorescore
exp_scores	total_sumr   probss          r   _compute_softmaxr   h  s    	I 	 1I JI HHUY&'!Q	
 E (UY&'(Lr   c                    |rt         j                  d|        |rt         j                  d|        |r|
rt         j                  d|        t        j                  t              }|D ]   }||j
                     j                  |       " i }|D ]  }|||j                  <    t        j                  dg d      }t        j                         }t        j                         }t        j                         }t        |       D ]  \  }}||   }g }d}d}d}d}t        |      D ]N  \  }}||j                     }t        |j                  |      }t        |j                  |      }|
rF|j                  d   |j                  d   z   } | |k  r"| }|}|j                  d   }|j                  d   }|D ]  }!|D ]  }"|!t        |j                        k\  r|"t        |j                        k\  r5|!|j                   vrD|"|j                   vrS|j"                  j%                  |!d      sp|"|!k  rv|"|!z
  d	z   }#|#|kD  r|j                   |||!|"|j                  |!   |j                  |"                  Q |
r|j                   ||dd||             t'        |d
 d      }t        j                  dg d      }$i }%g }&|D ]<  }'t        |&      |k\  r n+||'j(                     }|'j*                  dkD  r|j                  |'j*                  |'j,                  d	z    }(|j                   |'j*                     })|j                   |'j,                     }*|j.                  |)|*d	z    }+|j1                  |(      },|,j3                         },dj5                  |,j7                               },dj5                  |+      }-t9        |,|-||	      }.|.|%v rd|%|.<   nd}.d|%|.<   |&j                   |$|.|'j:                  |'j<                               ? |
rGd|%vr|&j                   |$d||             t        |&      d	k(  r|&j?                  d |$ddd             |&s|&j                   |$ddd             t        |&      d	k  rtA        d      g }/d}0|&D ]<  }1|/j                  |1j:                  |1j<                  z          |0r.|1jB                  s;|1}0> tE        |/      }2g }3t        |&      D ]_  \  }4}1t        j                         }5|1jB                  |5d<   |2|4   |5d<   |1j:                  |5d<   |1j<                  |5d<   |3j                  |5       a t        |3      d	k  rtA        d      |
s|3d   d   ||jF                  <   nY||0j:                  z
  |0j<                  z
  }6|6||jF                  <   |6|kD  rd||jF                  <   n|0jB                  ||jF                  <   |3||jF                  <    |r>tI        |d      5 }7|7jK                  tM        jN                  |d      dz          ddd       |r>tI        |d      5 }7|7jK                  tM        jN                  |d      dz          ddd       |rB|
r@tI        |d      5 }7|7jK                  tM        jN                  |d      dz          ddd       |S |S # 1 sw Y   xY w# 1 sw Y   [xY w# 1 sw Y   |S xY w)zHWrite final predictions to the json file and log-odds of null if needed.Writing predictions to: zWriting nbest to: zWriting null_log_odds to: PrelimPrediction)feature_indexstart_index	end_indexstart_logit	end_logit@B r   Frq   c                 4    | j                   | j                  z   S r   )r   r   r   s    r   rx   z,compute_predictions_logits.<locals>.<lambda>  s    q}}WXWbWbGb r   Tr   NbestPrediction)r   r   r   r   r   emptyru   No valid predictionsNr   probabilityr   r   w   indent
)(r   r   r2   defaultdictlistexample_indexr   	unique_id
namedtuplere   r   r   start_logits
end_logitsr6   tokenstoken_to_orig_maptoken_is_max_contextgetr~   r   r   r   
doc_tokensconvert_tokens_to_stringstripr   r   r   r   r   insert
ValueErrorr   r   rF   openwritejsondumps)8all_examplesall_featuresall_resultsr   max_answer_lengthr   output_prediction_fileoutput_nbest_fileoutput_null_log_odds_filer   version_2_with_negativenull_score_diff_thresholdr   example_index_to_featuresfeatureunique_id_to_resultresult_PrelimPredictionall_predictionsall_nbest_jsonscores_diff_jsonr   rN   featuresprelim_predictions
score_nullmin_null_feature_indexnull_start_logitnull_end_logitr   start_indexesend_indexesfeature_null_scorer   r   length_NbestPredictionseen_predictionsnbestpred
tok_tokensorig_doc_startorig_doc_endorig_tokensr   r   
final_texttotal_scoresbest_non_null_entryentryr   
nbest_jsonr   output
score_diffwriters8                                                           r   compute_predictions_logitsr3    s0     ./E.FGH():(;<= %<01J0KLM + 7 7 = I!'"7"78??HI  706F,,-7 $..e "--/O ,,.N"..0"+L"9 W4w,];
!"&/&9 (	"M7():):;F-f.A.A;OM+F,=,={KK&%+%8%8%;f>O>OPQ>R%R"%
2!3J-:*'-':':1'=$%+%6%6q%9N, !, I #c'..&99  C$77 "'*C*CC  (A(AA "77;;KO  ;. &4q8F 11 &--)*7(3&/(.(;(;K(H&,&7&7	&B'(	R #%%!"8 ! 0, $$6<cmqr&11C
 &  	tD5z[(t112G!#$^^D,<,<QR@RT
!(!:!:4;K;K!L&88H%00<RSCSU$==jI $>>+88HNN$45HH[1	+HiP_`
!11/3 ,
/3 ,LL)ztGWGWcgcqcqrsA 	tD #))-2CS_mno 5zQQ 0g3Z] ^_ LL)wCSVWXu:>344" 	0E 1 1EOO CD&::*/'		0 !.
!%( 	&HAu ,,.F"ZZF6N$)!HF=!$)$5$5F=!"'//F;f%	& z?Q344&.8mF.COGNN+ $&9&E&EEI\IfIfgJ/9W^^,5524/2E2J2J/)3w~~&oW4r (#. 	G&LLOA>EF	G #S) 	FVLLN1=DE	F !%<+S1 	HVLL$4Q?$FG	H ?	G 	G	F 	F	H s$   +*Z#+*Z/-*Z;#Z,/Z8;[c                    t        j                  dg d      }t        j                  dg d      }t        j                  d|        t        j                  t
              }|D ]   }||j                     j                  |       " i }|D ]  }|||j                  <    t        j                         }t        j                         }t        j                         }t        |       D ]  \  }}||   }g }d}t        |      D ]  \  }}||j                     }|j                  }t        ||      }t        |      D ]  }t        |	      D ]  }|j                  |   }|j                  |   } ||	z  |z   }!|j                   |!   }"|j"                  |!   }#| |j$                  dz
  k\  rZ|#|j$                  dz
  k\  rm|j&                  j)                  | d      s|#| k  r|#| z
  dz   }$|$|kD  r|j                   ||| |#||"                t+        |d	 d
      }i }%g }&|D ]J  }'t-        |&      |k\  r n9||'j.                     }|j0                  |'j2                  |'j4                  dz    }(|j6                  |'j2                     })|j6                  |'j4                     }*|j8                  |)|*dz    }+|j;                  |(      },|,j=                         },dj?                  |,jA                               },dj?                  |+      }-tC        |d      r|jD                  }.n|jF                  }.tI        |,|-|.|      }/|/|%v rd
|%|/<   |&j                   ||/|'jJ                  |'jL                               M |&s|&j                   |ddd             g }0d}1|&D ]/  }2|0j                  |2jJ                  |2jL                  z          |1r.|2}11 tO        |0      }3g }4t        |&      D ]_  \  }}2t        j                         }5|2jP                  |5d<   |3|   |5d<   |2jJ                  |5d<   |2jL                  |5d<   |4j                  |5       a t-        |4      dk  rtS        d      |1tS        d      |}6|6||jT                  <   |1jP                  ||jT                  <   |4||jT                  <    tW        |d      5 }7|7jY                  t[        j\                  |d      dz          ddd       tW        |d      5 }7|7jY                  t[        j\                  |d      dz          ddd       |
r@tW        |d      5 }7|7jY                  t[        j\                  |d      dz          ddd       |S |S # 1 sw Y   xY w# 1 sw Y   YxY w# 1 sw Y   |S xY w)z
    XLNet write prediction logic (more complex than Bert's). Write final predictions to the json file and log-odds of
    null if needed.

    Requires utils_squad_evaluate.py
    r   )r   r   r   start_log_probend_log_probr   )r   r5  r6  r   r   rq   Fc                 4    | j                   | j                  z   S r   )r5  r6  r   s    r   rx   z/compute_predictions_log_probs.<locals>.<lambda>  s    q/?/?!../P r   Tr   r   r   r   g    .Nr   r   r5  r6  r   r   r   r   r   )/r2   r   r   r   r   r   r   r   r   re   r   
cls_logitsminr   r   start_top_indexr   end_top_indexparagraph_lenr   r   r~   r6   r   r   r   r   r   r   r   r  r   r   hasattrr   do_lowercase_and_remove_accentr   r5  r6  r   r   r  rF   r  r  r  r  )8r  r	  r
  r   r  r  r  r  start_n_top	end_n_topr  r   r   r  r#  r  r  r  r  r  r  r  r   rN   r  r  r  r   cur_null_scorer   jr5  r   j_indexr6  r   r"  r$  r%  r&  r'  r(  r)  r*  r   r   r   r+  r,  r-  r.  r   r/  r0  r1  r2  s8                                                           r   compute_predictions_log_probsrD  N  s   * $..k #--E KK*+A*BCD + 7 7 = I!'"7"78??HI  706F,,-7 "--/O ,,.N"..0"+L"9 F4w,];
&/&9 *	"M7():):;F#..N Z8J;' "y) !A%+%8%8%;N"("8"8";K)ma/G#)#4#4W#=L & 4 4W =I
 #g&;&;a&??  G$9$9A$== "77;;KO  ;. &4q8F 11 &--)*7(3&/+9)53!"*	X $$Q[_
 & )	D5z[(t112G !(8(8DNNQ<NPJ$66t7G7GHN"44T^^DL!,,^|a?OQK 99*EH  ~~'Hxx 01H-Iy/2 ) 7 7 ) H H')]O\J--+/Z(LL jATATcgctctuO)	Z LL)r$UYZ[" 	,E 4 4u7I7I IJ&&+#	,
 !.
!%( 	&HAu ,,.F"ZZF6N$)!HF=!',';';F#$%*%7%7F>"f%	& z?Q344&344
+5( +>*B*B')3w~~&MF4P 
$c	* CfTZZ:TABC 
	% BTZZq9D@AB +S1 	HVLL$4Q?$FG	H ?C CB B	H s$   (*U&*U(&*U4U%(U14U>r   )Nr0   )F)__doc__r2   r  r   r	   r!   models.bertr   utilsr   
get_logger__name__r   r'   r)   r.   r>   rQ   r\   rg   rm   r   r   r   r   r   r   r   r   r3  rD  r   r   r   <module>rJ     s       	  )  
		H	%C('E #4
*1
\D)9.,>[|	.L^}r   