
    +sg                    <   d dl mZ d dlZd dlZd dlZd dlZd dlmZmZ d dl	m	Z	 d dl
mZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ d dlZd dlZd d
lmZmZ d dlmZ d dlmZ  d dl!m"Z"m#Z# d dl$m%Z% d dlm&Z& d dl'm(Z( d dlm)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0 d dl1m2Z3 d dl4m5Z5m6Z6 d dl7m8Z8 d dl9m:Z:m;Z;m<Z<  e<       rd dl=m>Z>m?Z?m@Z@mAZA  ej                  eC      ZDerd dlEmFZF d dlGmHZH d dlImJZJ  G d de)      ZKg dZLg d ZMd%d!ZNe G d" d#e             ZOd&d$ZPy)'    )annotationsN)Counterdefaultdict)copy)	dataclassfieldfields)Path)python_versionindent)TYPE_CHECKINGAnyLiteral)CardData	ModelCard)dataset_info)
model_info)
EvalResulteval_results_to_model_index)	yaml_dump)nn)tqdm)TrainerCallback)CodeCarbonCallback)make_markdown_table)TrainerControlTrainerState__version__)StaticEmbeddingTransformer)$SentenceTransformerTrainingArguments)fullnameis_accelerate_availableis_datasets_available)DatasetDatasetDictIterableDatasetValue)SentenceEvaluator)SentenceTransformer)SentenceTransformerTrainerc                       e Zd Zd fdZ	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 	 	 	 	 d	dZ xZS )
ModelCardCallbackc                   t         |           || _        || _        | j                  j                  j
                  D cg c]  }t        |t              r| }}|r|d   |j                  j                  _
        ||j                  j                  _        |j                  j                  j                  d       y c c}w )Nr   generated_from_trainer)super__init__trainerdefault_args_dictcallback_handler	callbacks
isinstancer   modelmodel_card_datacode_carbon_callbackadd_tags)selfr4   r5   callbackr7   	__class__s        S/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/model_card.pyr3   zModelCardCallback.__init__.   s    !2 !LL99CC
($67 
	 

 AJ1GMM))>07%%-%%../GH
s    B<c                   ddl m}m}m} | j                  j
                  rT|j                  j                  | j                  j
                  |j                  j                  d      |j                  _        | j                  j                  rT|j                  j                  | j                  j                  |j                  j                  d      |j                  _
        t        | j                  j                  t              r.t        | j                  j                  j                               }	n| j                  j                  g}	d}
|
t!        |	      k  r]|	|
   }t        ||||f      r5t#        |d      r)|j                  |	vr|	j%                  |j                         |
dz  }
|
t!        |	      k  r]|j                  j'                  |	       y )Nr   )AdaptiveLayerLossMatryoshka2dLossMatryoshkaLosstrainevalloss   )sentence_transformers.lossesrB   rC   rD   r4   train_datasetr:   extract_dataset_metadatatrain_datasetseval_dataseteval_datasetsr8   rG   dictlistvalueslenhasattrappend
set_losses)r=   argsstatecontrolr9   kwargsrB   rC   rD   lossesloss_idxrG   s               r@   on_init_endzModelCardCallback.on_init_end>   sk    	ed <<%%383H3H3a3a**E,A,A,P,PRY4E!!0 <<$$272G2G2`2`))5+@+@+N+NPV3E!!/ dll''.$,,++2245Fll''(F V$(#D4.2CEU!VWD&)IIV+dii(MH V$ 	((0    c                b   h d}|j                         }|j                         D 	ci c]  \  }}	||vs||	 c}	}|j                  _        |j                         D 	ci c],  \  }}	||vr#|| j                  v r|	| j                  |   k7  r||	. c}	}|j                  _        y c c}	}w c c}	}w )N>   do_evaldo_testdo_trainrun_name	hub_token	report_to
eval_delay
eval_steps
output_dir
save_stepslogging_dirlogging_stepssave_strategylogging_strategysave_total_limitgreater_is_betterpush_to_hub_tokensamples_per_labelshow_progress_barlogging_first_stepevaluation_strategymetric_for_best_model)to_dictitemsr:   all_hyperparametersr5   non_default_hyperparameters)
r=   rV   rW   rX   r9   rY   ignore_keys	args_dictkeyvalues
             r@   on_train_beginz ModelCardCallback.on_train_beginf   s    
0 LLN	)2):5
%3c>TCJ5
1
 (oo/=
U+%#1G1G*GEUYUkUkloUpLp J=
95
=
s   B%B% 1B+c                   |D ci c];  }|j                  d      sdj                  |j                  d      dd        ||   = }}t        |      dk(  rd|v rd|d   i}|j                  j
                  rR|j                  j
                  d   d   |j                  k(  r)|j                  j
                  d   j                  |       y |j                  j
                  j                  |j                  |j                  d	|       y c c}w )
N_loss _rH   rG   Validation LossStepEpochr   )
endswithjoinsplitrR   r:   training_logsglobal_stepupdaterT   epoch)	r=   rV   rW   rX   r9   metricsrY   r{   	loss_dicts	            r@   on_evaluatezModelCardCallback.on_evaluate   s     LSlCVYVbVbcjVkSXXciinQR0173<?l	ly>Q6Y#6*If,=>I!!//%%33B7?5CTCTT!!//3::9E!!//66"[[!--   ms
   D)Dc                   dht        |      z  }|r|j                  j                  rW|j                  j                  d   d   |j                  k(  r.||j	                            |j                  j                  d   d<   y |j                  j                  j                  |j                  |j                  ||j	                            d       y y )NrG   r   r   Training Loss)r   r   r   )setr:   r   r   poprT   r   )r=   rV   rW   rX   r9   logsrY   keyss           r@   on_logzModelCardCallback.on_log   s     x#d)#%%33))77;FCuGXGXXKOPTPXPXPZK[%%33B7H%%33::!& % 1 1)-dhhj)9 r]   )r4   r-   r5   dict[str, Any]returnNone)
rV   r#   rW   r   rX   r   r9   r,   r   r   )rV   r#   rW   r   rX   r   r9   r,   r   dict[str, float]r   r   )rV   r#   rW   r   rX   r   r9   r,   r   r   r   r   )	__name__
__module____qualname__r3   r\   r}   r   r   __classcell__)r?   s   @r@   r/   r/   -   s    I &12&1 &1  	&1
 #&1 
&1P)
2)
 )
  	)

 #)
 
)
V2   	
 # " 
42   	
 #  
r]   r/   )languagelicenselibrary_nametagsdatasetsr   pipeline_tagwidgetmodel-indexco2_eq_emissions
base_model)r9   r4   eval_results_dictc                     t               t        t        j                  t        j                  d} t               rddlm} || d<   t               rddlm} || d<   ddl	m} || d<   | S )N)pythonsentence_transformerstransformerstorchr   r   
accelerater   
tokenizers)
r   sentence_transformers_versionr   r    r   r%   r   r&   r   r   )versionsaccelerate_versiondatasets_versiontokenizers_versions       r@   get_versionsr      s^     "!>$00""	H  @!3</</H\Or]   c                     e Zd ZU dZ ee      Zded<   dZded<   dZ	ded<   dZ
ded	<    ee      Zd
ed<    ee      Zd
ed<   dZded<    ed       Zded<   dZded<    edd      Zded<    edd      Zded<    eed      Zded<    eed      Zded<    eed      Zded<    eed      Zd ed!<    eed      Zd
ed"<    edd      Zded#<    eed      Zd
ed$<    edd      Zd%ed&<    eed      Zd'ed(<    edd      Zd)ed*<    eddd+      Zd,ed-<    eedd.      Zd/ed0<    ed1d      Zd2ed3<    ed4d      Z d5ed6<    ed7d      Z!ded8<    ed9d      Z"ded:<    ee#d      Z$d'ed;<    eddd+      Z%d<ed=<   dTd>Z&dUdVd?Z'dWd@Z(dXdAZ)dYdBZ*dZdCZ+d[dDZ,d\d]dEZ-	 	 	 	 	 	 	 	 d^dFZ.	 	 	 	 	 	 d_dGZ/d`dHZ0dadIZ1d\dbdJZ2dcdKZ3dddLZ4dedMZ5dTdNZ6dfdOZ7dP Z8dgdQZ9dfdRZ:d\dhdSZ;y)i SentenceTransformerModelCardDataa  A dataclass storing data used in the model card.

    Args:
        language (`Optional[Union[str, List[str]]]`): The model language, either a string or a list,
            e.g. "en" or ["en", "de", "nl"]
        license (`Optional[str]`): The license of the model, e.g. "apache-2.0", "mit",
            or "cc-by-nc-sa-4.0"
        model_name (`Optional[str]`): The pretty name of the model, e.g. "SentenceTransformer based on microsoft/mpnet-base".
        model_id (`Optional[str]`): The model ID when pushing the model to the Hub,
            e.g. "tomaarsen/sbert-mpnet-base-allnli".
        train_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the training datasets.
            e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}, {"name": "STSB"}]
        eval_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the evaluation datasets.
            e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"id": "mteb/stsbenchmark-sts"}]
        task_name (`str`): The human-readable task the model is trained on,
            e.g. "semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more".
        tags (`Optional[List[str]]`): A list of tags for the model,
            e.g. ["sentence-transformers", "sentence-similarity", "feature-extraction"].

    .. tip::

        Install `codecarbon <https://github.com/mlco2/codecarbon>`_ to automatically track carbon emission usage and
        include it in your model cards.

    Example::

        >>> model = SentenceTransformer(
        ...     "microsoft/mpnet-base",
        ...     model_card_data=SentenceTransformerModelCardData(
        ...         model_id="tomaarsen/sbert-mpnet-base-allnli",
        ...         train_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
        ...         eval_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
        ...         license="apache-2.0",
        ...         language="en",
        ...     ),
        ... )
    )default_factoryzstr | list[str] | Noner   N
str | Noner   
model_namemodel_idlist[dict[str, str]]rL   rN   zjsemantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and morestr	task_namec                 
    g dS )N)sentence-transformerssentence-similarityzfeature-extraction r   r]   r@   <lambda>z)SentenceTransformerModelCardData.<lambda>  s	     !
 r]   zlist[str] | Noner   
deprecatedzLiteral['deprecated']generate_widget_examplesF)defaultinitr   base_model_revision)r   r   r   rx   rw   z.dict[SentenceEvaluator, dict[str, Any]] | Noner   zlist[dict[str, float]]r   r   predict_examplelabel_example_listzCodeCarbonCallback | Noner;   dict[str, str]	citationsz
int | Nonebest_model_step)r   r   reprz!SentenceTransformerTrainer | Noner4   )r   r   r   	list[str]r   Tbool
first_saver   intwidget_stepr   r   r   r   versionzSentenceTransformer | Noner9   c                   | j                    }t        | j                   t              r| j                   g| _         | j                  | j                  |      | _        | j                  | j
                  |      | _        | j                  rJ| j                  j                  d      dk7  r+t        j                  d| j                  d       d | _        y y y )N)infer_languages/rH   zThe provided z} model ID should include the organization or user, such as "tomaarsen/mpnet-base-nli-matryoshka". Setting `model_id` to None.)
r   r8   r   validate_datasetsrL   rN   r   countloggerwarning)r=   r   s     r@   __post_init__z.SentenceTransformerModelCardData.__post_init__B  s    "mm+dmmS)!]]ODM"44T5H5HZi4j!33D4F4FXg3h==T]]005:NN0 1^ ^ !DM ;=r]   c                R   g }|D ]  }d|vrd|v r|d   |d<   d|v r	 t        |d         }|j                  rq|rod|j                  v ra|j                  j                  d      }|Dt        |t              r|g}|D ],  }|| j
                  vs| j
                  j                  |       . |j                  | j                  vr&| j                  j                  |j                         	 |j                  |        |S # t        $ r" t        j                  d|d   d       |d= Y ?w xY w)Nnameidr   zThe dataset `id` z5 does not exist on the Hub. Setting the `id` to None.)get_dataset_infocardDatagetr8   r   r   rT   r   r   	Exceptionr   r   )r=   dataset_listr   output_dataset_listdatasetinfodataset_languager   s           r@   r   z2SentenceTransformerModelCardData.validate_datasetsR  s7    # 	0GW$7?&-dmGFOw6+GDM:D }}Z4===X+/==+<+<Z+H(+7)*:C@4D3E 0,< C#+4==#@$(MM$8$8$BC
 wwdmm3,,TWW5&&w/9	0: #") ! &NN+GDM+<<qr  	&s   C;;(D&%D&c                   ddi}|D ]&  }	 |j                   ||j                  j                  <   ( t	        t
              }|j                         D ]  \  }}||   j                  |        dd}|j                         D ci c]  \  }} ||      | c}}| _        | j                  D ci c]  }|j                  j                  | c}D cg c]  }d| 	 c}       y # t        $ r Y w xY wc c}}w c c}w c c}w )NzSentence Transformersa  
@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}
c                b    t        |       dkD  rdj                  | d d       dz   | d   z   S | d   S )NrH   z, r   z and r   )rR   r   )rZ   s    r@   	join_listz>SentenceTransformerModelCardData.set_losses.<locals>.join_list  s:    6{Qyy-7&*DD!9r]   zloss:)rZ   r   r   r   )
citationr?   r   r   r   rP   rv   rT   r   r<   )r=   rZ   r   rG   inverted_citationsr   r   s          r@   rU   z+SentenceTransformerModelCardData.set_lossess  s   # 
&
	  	D59]]	$..112	
 ).'oo/ 	6ND(x(//5	6	
 OaNfNfNhi:J(F)F+X5i]c2dUY4>>3J3JD3P2de$tf~ef   j2des#   #C$C3-C9C>$	C0/C0c                    || _         y N)r   )r=   steps     r@   set_best_model_stepz4SentenceTransformerModelCardData.set_best_model_step  s
    #r]   c                   t        |t              ry t        |t              rt        |      }g | _        t        t        j                  t        |j                               d            }d}t        |j                         ddd      D ]g  \  }}||   j                  j                         D cg c]%  \  }}t        |t              r|j                  d	v r|' }}}||   j                  |      }	t!        |	      }
|
d
k(  ryi }t#        |	j%                  t        j&                  t)        |
      t+        ||
                        D ](  \  }}t-        d |j                         D              ||<   * t/        t1        |j                         d        \  }}|d | t        ||d  d d d         }}|D ]<  }|	|   j                         D cg c]  \  }}|dk7  r|j3                  d      s| }}}t!        |      dk  r|r|j5                         }|	|   j                         D cg c]  \  }}|dk7  r|j3                  d      s| }}}t!        |      dk(  r|j7                  |       n|j9                  |d          t!        |      dk  r|rt!        |      dk  r| j                  j9                  |d
   t        j&                  |dd  t!        |      dz
        d       |d d | _        ? j y c c}}w c c}}w c c}}w )N)r      )k  zComputing widget examplesexampleF)descunitleave>   stringlarge_stringr   c              3  d   K   | ](  \  }}|d k7  r|j                  d      st        |       * yw)dataset_name_prompt_lengthN)r   rR   .0r{   r|   s      r@   	<genexpr>zGSentenceTransformerModelCardData.set_widget_examples.<locals>.<genexpr>  s4      #"Un,S\\BR5S J#s   .0c                    | d   S )NrH   r   )xs    r@   r   zFSentenceTransformerModelCardData.set_widget_examples.<locals>.<lambda>  s
    AaD r]   r{   r   r   r      rH   )source_sentence	sentences   )r8   r)   r'   r(   r   r   randomchoicesrP   r   r   rv   featuresr*   dtypeselect_columnsrR   	enumerateselectsamplerangeminsumzipsortedr   r   extendrT   r   )r=   r   dataset_namesnum_samples_to_checkr   num_samplescolumnfeaturecolumnsstr_datasetdataset_sizelengthsidxr  indicesr   target_indicesbackup_indicesr{   sentencer	  
backup_idxbackup_samples                          r@   set_widget_examplesz4SentenceTransformerModelCardData.set_widget_examples  s   g/gw'!'2GtGLLN/Cq IJ#)-!(C)[`*
 8	5%L+ (/|'<'E'E'K'K'M#FGgu-'--C]2] G 
 ",/>>wGK{+Lq G(""6==|1DL`bnHo#pq  V  # #&,lln#   fW]]_.IJJGQ-4\k-BDQ\Q]I^_cac_cIdDeNN & 5 *5S)9)?)?)A%Xn,S\\BR5S 	 
 )nq(^!/!3!3!5J .9-D-J-J-L%)C.0FV9W !%M %
 =)Q.!((7 "((q)9: )nq(^ y>A%""(1!6==QZ[\[]Q^befobpstbtCuv (1!}$75;8	5
4%s   2*K7"K=
("L
c                   ddl m} t        |      | j                  |<   t	        |d      r-|j
                  x}rt        ||      r$|j                  D cg c]  }|j
                   }}nt        |t              r|g}| j                  d}d}n@| j                  j                  j                  }| j                  j                  j                  }|j                         D 	ci c]  \  }}	||v s||	 }
}}	| j                  r4| j                  d   d   |k(  r| j                  d   j                  |
       y | j                  j!                  ||d|
       y y y c c}w c c}	}w )Nr   )SequentialEvaluatorprimary_metricr   r   r   ) sentence_transformers.evaluationr+  r   r   rS   r,  r8   
evaluatorsr   r4   rW   r   r   rv   r   r   rT   )r=   	evaluatorr   r+  primary_metricssub_evaluatorr   r   r{   r|   training_log_metricss              r@   set_evaluation_metricsz7SentenceTransformerModelCardData.set_evaluation_metrics  sP   H,0My) 9./	H`H`5`_5`)%89U^UiUi"jM=#?#?"j"jOS1#2"3||#||))55**00AH#k:3TW[jTjCJ#k #k!!d&8&8&<V&D&L""2&--.BC""))!& $ /# 6a/"j $ls   E"E/Ec           	     (   d}t        t              }t               }|D ]m  }|d   }|d   }||vrC||   j                  dt	        |       d       t        ||         |k\  r|j                  |       t        |      | j                  k(  sm n |j                         D cg c]^  \  }}| j                  j                  r)t        |t              r| j                  j                  |   n|ddj                  |      z   dz   d	` c}}| _        y c c}}w )
Nr
  textlabelz<li>z</li>z<ul> z</ul>)LabelExamples)r   rP   r   rT   r   rR   addnum_classesrv   r9   labelsr8   r   r   r   )	r=   r   num_examples_per_labelexamplesfinished_labelsr  r5  r6  example_sets	            r@   set_label_examplesz3SentenceTransformerModelCardData.set_label_examples  s
   !"t$% 	F&>D7OEO+&&d4j\'?@x'+AA#''.?#t'7'77	 '/nn&6#

 #{ 6:ZZ5F5F:V[]`Ka**51gl"RWW[%99GC#
 #
s   !A#Dc           	     4   t        |t              r=|j                         D cg c]  \  }}| j                  ||      D ]  }| ! c}}}S |rt	        j
                  d|      rd }|xs |j                  j                  t        |j                        d}|j                  j                  rR|j                  |j                  j                  v r0|j                  j                  |j                     j                  |d<   |j                  x}rt        |j                               d   }|j                  d      rUd|v rQ|t!        d      d  j                  d      }|d   |d<   |d	   j                  d
      d   x}rt!        |      dk(  r||d<   |gS c c}}}w )N)r   z_dataset_\d+)r   r   sizer   zhf://datasets/@r   rH   r   (   revision)r8   r(   rv   infer_datasetsrematchr   r   r   r   splitsnum_examplesdownload_checksumsrP   r   
startswithrR   )	r=   r   r   sub_datasetdataset_output	checksumssourcesource_partsrF  s	            r@   rG  z/SentenceTransformerModelCardData.infer_datasets  s   g{+ 29 -L+#22;\2Z    BHH_lCL !=GLL$=$='
 <<7==GLL4G4G#G%,\\%8%8%G%T%TN6"  22292)..*+A.F  !12sf}%c*:&;&=>DDSI'3At$ ,Q 5 5c :1 ==H=3x=TVCV19N:.7s   $Fc                &
   |si S d|vrt        |t              rt        |      |d<   |j                  D cg c]  }d| d
 c}|d<   i |d<   t        |t              r|j                  D ]t  }|dd |   }|d   }t        |t              r| j
                  j                  |      }t        |t              r*d	|v r&|d	   j                  d
      j                         }d}	n|D 
cg c]  }
t        |
       }}
d}	dt        t        |      d       d|	 t        t        |      t        |      z  d       d|	 t        t        |      d       d|	 dd|d   |<   t        |t        t        f      rTt        |      }dt!        |      D ci c])  }|t        |      d
kD  rdnd ||   t        |      z  d+ c}d|d   |<   ^t        |t"              rVdt        t        |      d      t        t        |      t        |      z  d      t        t        |      d      dd|d   |<   t        |t$              rt        |D cg c]  }t        |       c}      }t        |      d
k(  rddt        |       did|d   |<   dt        |       dt        |      t        |      z  ddt        |       ddd|d   |<   at'        |      i d|d   |<   w d5d}ddi|d   j)                         D ci c]  \  }}||d    c}}ddi|d   j)                         D ci c]  \  }}| ||d          c}}g}t+        t-        |      j/                  d d!      d"      |d#<   |dd$ |d%<   t        |d%   t%        |d%         d            }g }t1        |      D ]  }i }|j                  D ]  }|d%   |   |   }t        |t$              r"t        |      d&kD  rt	        |dd&       dd' d(z   }t        |t              rt        |      dkD  r|dd d)z   }t	        |      j/                  d*d+      }d| d||<    |j3                  |        t+        t-        |      j/                  d d!      d"      |d,<   d-t'        |      i|d.<   t5        |d/      r>|j7                         }	 t9        j:                  |d01      }t+        d2| d3d"      |d.   d4<   |S c c}w c c}
w c c}w c c}w c c}}w c c}}w # t<        $ r t	        |      }Y Nw xY w)6a  
        Given a dataset, compute the following:
        * Dataset Size
        * Dataset Columns
        * Dataset Stats
            - Strings: min, mean, max word count/token length
            - Integers: Counter() instance
            - Floats: min, mean, max range
            - List: number of elements or min, mean, max number of elements
        * 3 Example samples
        * Loss function name
            - Loss function config
        rC  z<code>z</code>r  statsNr   r   attention_maskrH   )dimtokens
charactersr      r   )r  meanmax)r  datar   ~r7  z.2%floatrP   z	 elementsz.2fr\  c                Z    ddj                  d | j                         D              z   dz   S )Nz<ul><li>z	</li><li>c              3  0   K   | ]  \  }}| d |   yw)z: Nr   r  s      r@   r  zaSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.to_html_list.<locals>.<genexpr>  s      4f:3PUuBug5F4fs   z
</li></ul>)r   rv   )r\  s    r@   to_html_listzNSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.to_html_list  s.    !K$4$44fY]YcYcYe4f$ffiuuur]   typer  details-:|--|z  stats_tabler
  r>  r   r   z, ...]z...
z<br>examples_tabler$   rG   get_config_dictr  r   z```json
z
```config_code)r\  rO   )r8   r'   rR   column_namesr   r9   tokenizerO   r  tolistroundr  r[  r   r   r   r  r^  rP   r$   rv   r   r   replacer  rT   rS   ri  jsondumps	TypeError)r=   r   r   rG   r  
subsectionfirst	tokenizedr!  suffixr&  counterr{   lstra  r|   stats_linesr  examples_lines
sample_idxr  config
str_configs                          r@   compute_dataset_metricsz8SentenceTransformerModelCardData.compute_dataset_metrics.  s   & I%*Wg*F#&w<L JQJ^J^"_VF87#;"_Y "Wgw'!.. 8[$Ud^F3
"1eS) $

 3 3J ?I!)T27G97T"+,<"="A"Aa"A"H"O"O"Q!)AK"LX3x="L"L!-!)&+CL!&<%=Qvh#G',S\CL-H!'L&MQvh$W&+CL!&<%=Qvh#G!5L)&1  T{3%j1G!& (.g! #  3w<!+;C#DWS\TWXbTcEcdgDh!ii!5L)&1  u-!(#(Z!#<$)#j/C
O*KQ$O#(Z!#<!5L)&1  t,%:&FCs3x&FGG7|q(%+ &3u:,i(@%9W-f5 &,*-g,y'A+.w<#g,+Fs*K9(U*-g,y'A%9W-f5 ?GuoWY4ZL)&1q8[tv VelSZF[FaFaFcd
UU7^ 3deYuVbcjVkVqVqVs"t
U3U6](C#C"tuK +11D[1Q1Y1YZ_af1gim*nL''.r{L$l:6tL<T7UVW7XYZKN#K0 /
%22 
>F(4V<ZHE!%.3u:> #E"1Is 3h >!%-#e*t2C %etu 4J..tV<E(.ugW&=GFO
> %%g./ .44G4W4_4_`egl4mos-tL)*  
V 4*+))+F)!ZZq9
 399ZLPU9VX\2]L /G #` #M! 'G0  e"t<  ) [
)s5   S$S.S#S(
?S-,S3
*S9 9TTc                x   |rW|rft        |t              rt        |      t        |      k7  st        |t              r/t        |      dk7  r!t        j                  d| d| d| d       g }|s| j                  |      }t        |t              rt        |j                         |j                         |      D cg c]j  \  }}}| j                  ||t        | j                  j                  t              r| j                  j                  |   n| j                  j                        l }}}}n+| j                  ||d   | j                  j                        g}|dk(  r?t        |D cg c]  }|j                  dd       c}      }|r| j!                  d	|        | j#                  |      S c c}}}w c c}w )
NrH   zThe number of `z?_datasets` in the model card data does not match the number of z1 datasets in the Trainer. Removing the provided `z$_datasets` from the model card data.r   rE   rC  zdataset_size:)r8   r(   rR   r'   r   r   rG  r  r   rQ   r~  r4   rG   rO   r  r   r<   r   )	r=   r   dataset_metadatadataset_typer   dataset_valuer   metadatanum_training_sampless	            r@   rK   z9SentenceTransformerModelCardData.extract_dataset_metadata  s    G[1c:J6KsSZ|6[w0S9I5Ja5O%l^3rs  sA A..:^;_a $& ##'#6#6w#? ';/ FI(8:JF	$ 	$ Bm\ 00%$;EdllFWFWY];^)),7dhdpdpdudu	$  	$ %)$@$@JZ[\J]_c_k_k_p_p$q#r  7"#&P`'aHVQ(?'a#b #.B-CDE%%&677'	$ (bs   <A/F0)F7c                    || _         y r   )r9   )r=   r9   s     r@   register_modelz/SentenceTransformerModelCardData.register_model  s	    
r]   c                    || _         y r   )r   )r=   r   s     r@   set_model_idz-SentenceTransformerModelCardData.set_model_id  s	     r]   c                    	 t        |      }|j                  | _        ||dk(  r|j                  }|| _        y# t        $ r Y yw xY w)NFmainT)get_model_infor   r   r   shar   )r=   r   rF  r   s       r@   set_base_modelz/SentenceTransformerModelCardData.set_base_model  sS    	'1J %--x61!~~H#+   		s   9 	AAc                8    t        |t              r|g}|| _        y r   )r8   r   r   )r=   r   s     r@   set_languagez-SentenceTransformerModelCardData.set_language  s    h$ zH r]   c                    || _         y r   )r   )r=   r   s     r@   set_licensez,SentenceTransformerModelCardData.set_license  s	    r]   c                    t        |t              r|g}|D ],  }|| j                  vs| j                  j                  |       . y r   )r8   r   r   rT   )r=   r   tags      r@   r<   z)SentenceTransformerModelCardData.add_tags  s@    dC 6D 	&C$))#		  %	&r]   c           
        t        | j                  d   t              r| j                  d   j                  j                  j
                  }t        |      }dj                  |j                  dd        g}|j                  j                  d      }|t        dt        |            D cg c].  }dj                  |d |       dz   dj                  ||d        z   0 c}z  }|D ]  }| j                  |      s y  y t        | j                  d   t              rC| j                  d   j                  r)| j                  | j                  d   j                         y y y c c}w )Nr   r   r   rH   )r8   r9   r"   
auto_modelr|  _name_or_pathr
   r   partsr   r   r  rR   r  r!   r   )r=   r   base_model_pathcandidate_model_idsrJ  r"  r   s          r@   try_to_set_base_modelz6SentenceTransformerModelCardData.try_to_set_base_model  s=   djjm[1A1188FFJ":.O $'88O,A,A"#,F#G"H
 %))//4FQVWXZ]^dZeQf$JM&,sxxst/EE$  0 &&x0 

17zz!}''##DJJqM$<$<= ( 8$s   (3Ec                J   g }i }g }| j                   j                         D ]  \  }}t        |dd      t        |dd      }r{t        fd|j	                         D              rY|j                         D ci c]  \  }}|t              dz   d | }}}|r%|j                  dz         r|t              dz   d }dd}	|j                         D ci c]  \  }}| |	|       }}}|j                         D 
cg c]4  \  }
}|
|k(  rd|
 dn|
|
|k(  rdt        |d	       dnt        |d	      d
6 }}
}|j                  }t        |dd      }|j                  t        |      |||d       |j                  |j                         D 
cg c]  \  }
}t        |t        t        f      rvt        ||j!                         j#                  dd      |xs d|r"|j#                  dd      j#                  dd      nd|
j#                  dd      j%                         |
|       c}}
       |j'                  |       ! g }|D ]  }|d   D ci c]  }|d   |d    }}t)        |      }|D ]  }t)        d |d   D              }|d   |d   k(  s$||k(  s*|d   |d   k7  s6|d   D ]+  }d|v r|j+                  d      ||d   <   ||d      ||d   <   - t        |d   t,              s	|d   g|d<   |d   j                  |d            |j                  |        |D ]/  }t/        |j+                  d            j#                  dd      |d<   1 |t-        |j	                               t1        | j2                  |      dS c c}}w c c}}w c c}}
w c c}}
w c c}w )au  Format the evaluation metrics for the model card.

        The following keys will be returned:
        - eval_metrics: A list of dictionaries containing the class name, description, dataset name, and a markdown table
          This is used to display the evaluation metrics in the model card.
        - metrics: A list of all metric keys. This is used in the model card metadata.
        - model-index: A list of dictionaries containing the task name, task type, dataset type, dataset name, metric name,
          metric type, and metric value. This is used to display the evaluation metrics in the model card metadata.
        r   Nr,  c              3  F   K   | ]  }|j                  d z           yw)r   N)rM  )r  r{   r   s     r@   r  zGSentenceTransformerModelCardData.format_eval_metrics.<locals>.<genexpr>  s     Q3CNN4#:6Qs   !rH   r   c                b    	 t        | d      r| j                         S 	 | S # t        $ r Y | S w xY w)z^Try to convert a value from a Numpy or Torch scalar to pure Python, if not already pure Pythonr  )rS   itemr   )r|   s    r@   try_to_pure_pythonzPSentenceTransformerModelCardData.format_eval_metrics.<locals>.try_to_pure_python  sB    ug.$zz|+ /  ! s   ! 	..**r  )Metricr*   )
class_namedescriptionr   table_linesr   -unknownUnknown)r   	task_typer  r   metric_namemetric_typemetric_valuer  r  r*   c              3  &   K   | ]	  }|d      yw)r  Nr   )r  lines     r@   r  zGSentenceTransformerModelCardData.format_eval_metrics.<locals>.<genexpr>U  s     1pT$x.1ps   r  r   rd  re  table)eval_metricsr   r   )r|   r   r   r   )r   rv   getattrallr   rR   rM  rn  r  rT   r$   r  r8   r   r^  r   lowerro  titler   r   r   rP   r   r   r   )r=   r  all_metricseval_resultsr/  r   r,  r{   r|   r  
metric_keyr  r  r  r   grouped_eval_metricseval_metricr  eval_metric_mappingeval_metric_metricsgrouped_eval_metricgrouped_eval_metric_metricsr   s                         @r@   format_eval_metricsz4SentenceTransformerModelCardData.format_eval_metrics	  s.    "&"8"8">">"@ 7	(Iw9fd3D$Y0@$GNQ',,.QQIPY:33s4y1}/6YY!n&?&?s
&K%3CIMO%DN IPX*#us.u55XGX 18 -J 6@>5Q:,b1Wa!^3  "%a"8!9<|Q/	K  $//K"9fd;L"*9"5#.$0#.	  5<MMO 1
L!,e= "-"-"3"3"5"="=c3"G%1%>YYe\%9%9#s%C%K%KCQT%Ukt$.$6$6sC$@$F$F$H$.%1 w'o7	(t  "' 	9KMXYfMg"hT4>4=#@"h"h"%&9":'; 9#.11pM`anMo1p.p+-1D\1RR+/JJ#N37J>7ZZ !4M B `"d?HLQXHYD!4^!DE<OPTU]P^<_[89	` &&9.&I4P?RSa?b>c+N;'7>>{>?Z[#9& %++K8-	90 $8 	+>?R?V?VWd?e+f+n+nu,(	 1K,,./6tU
 	
i Z Y,& #is   :NN 9NBNN c                   g | j                   D ]-  }|j                         D ]  }|vsj                  |        / dfd}t        |      }| j                   D cg c]N  }|D ci c]@  }||d   | j                  k(  rd||v rt        ||   d      nd dn|j                  |d      B c}P }}}t        |      }|d|v dS c c}w c c}}w )	Nc                    | dk(  ry| dk(  ry| dk(  ry| dk(  ry| j                  d	      ry
j                  |       dz   S )Nr   r   r   rH   r   rY  r   r
  rG   r  r   )r   index)r{   eval_lines_keyss    r@   sort_metricszKSentenceTransformerModelCardData.format_training_logs.<locals>.sort_metrics}  sS    g~f}o%''||F#"((-11r]   r  r   r  r  r  )
eval_linesexplain_bold_in_eval)r{   r   r   r   )r   r   rT   r  r   rn  r   r   )	r=   linesr{   r  sorted_eval_lines_keysr  r   r  r  s	           @r@   format_training_logsz5SentenceTransformerModelCardData.format_training_logst  s
   '' 	0Ezz| 0o-#**3/0	0	2 "(\!J **
  2	  <4#7#77 %S	1-#FbIXXc3'(
 
 )7
$$(J$6
 	

s   "	C+AC
0C
Cc                f   | j                   j                  j                         }dt        |j                        dz  t        |j
                        dd|j                  dk(  |j                  |j                  t        |j                  dz  d      di}|j                  r|j                  |d   d	<   |S )
Nr   r   
codecarbonzfine-tuningYi  r
  )	emissionsenergy_consumedrQ  training_typeon_cloud	cpu_modelram_total_size
hours_usedhardware_used)r;   tracker_prepare_emissions_datar^  r  r  r  r  r  rn  duration	gpu_model)r=   emissions_dataresultss      r@   get_codecarbon_dataz4SentenceTransformerModelCardData.get_codecarbon_data  s    22::RRT">#;#;<tC#()G)G#H&!.*33s:+55"0"?"?#N$;$;d$BAF
!
 ##;I;S;SG&'8r]   c                J   | j                   sM| j                  A| j                  j                  xs | j                  j                  x}r| j	                  |       | j
                  r| j                  s	 | j                          | j                  s(| j                  rd| j                   | _	        nd| _	        t        |       D ci c]#  }|j                  t        | |j                        % }}| j                  r 	 |j                  | j                                | j$                  r 	 |j                  | j'                                t)        | j$                        dkD  |d<   | j*                  rU| j*                  j,                  r?| j*                  j,                  j.                  |j                  | j1                                | j2                  j5                         |d<   | j2                  j7                         |d<   t9        | j2                        |d	<   | j2                  j:                  rad
ddddj=                  | j2                  j:                  | j2                  j:                  j?                  dd      jA                               |d<   nd
|d<   d| _        tB        D ]  }|jE                  |d         |S # t        $ r Y ?w xY wc c}w # t        $ r}t         j#                  d|        |d }~ww xY w# t        $ r#}t         j#                  d|        Y d }~d }~ww xY w)NzSentenceTransformer based on r,   z+Error while formatting evaluation metrics: z&Error while formatting training logs: d   hide_eval_linesmodel_max_lengthoutput_dimensionalitymodel_stringzCosine SimilarityzDot ProductzEuclidean DistancezManhattan Distance)cosinedot	euclidean	manhattanr   r   similarity_fn_nameF)#r   r4   rM   rJ   r)  r   r   r  r   r   r	   r   r  r   r   r  r   r   r   r  rR   r;   r  _start_timer  r9   get_max_seq_length get_sentence_embedding_dimensionr   r  r   ro  r  IGNORED_FIELDSr   )r=   r   r   
super_dictexcr{   s         r@   ru   z(SentenceTransformerModelCardData.to_dict  s    ( LL55S9S9SSS$$W- ??4??**,
 $A$//AR"S"7IOPTVejj'$

";;V
V !!!!$":":"<= O!!$";";"=> ),D,>,>(?#(E
$% %%))11))11==Id6689 *.)F)F)H
%&.2jj.Y.Y.[
*+%(_
>"::((-$11	0
 c$**//1N1N1V1VWZ\_1`1f1f1hi +, 0CJ+,! 	&CNN3%	&o   W  !LSERS	  O!GuMNNOsH   3J6 (K;K 'K6 6	KK	K3K..K36	L"?LL"c           	         t        | j                         j                         D ci c]  \  }}|t        v s|d g fvs|| c}}d|      j	                         S c c}}w )NF)	sort_keys
line_break)r   ru   rv   YAML_FIELDSstrip)r=   r  r{   r|   s       r@   to_yamlz(SentenceTransformerModelCardData.to_yaml  s\    *.,,.*>*>*@sJCC;DV[`imoqhr[rS%Zs!
 %'		ss   A
A
A
)r   r   )T)r   r   r   r   )rZ   zlist[nn.Module]r   r   )r   r   r   r   )r   Dataset | DatasetDictr   r   )r/  r+   r   r   r   r   )r   r'   r   r   r   )r   r  r   r   r   r   )r   z Dataset | IterableDataset | Noner   r   rG   z'dict[str, nn.Module] | nn.Module | Noner   r   )r   r  r  zLiteral['train', 'eval']r   r   )r9   r,   r   r   )r   r   r   r   )r   r   rF  r   r   r   )r   str | list[str]r   r   )r   r   r   r   )r   r  r   r   r   r   )r   z1dict[Literal['co2_eq_emissions'], dict[str, Any]])r   r   )<r   r   r   __doc__r   rP   r   __annotations__r   r   r   rL   rN   r   r   r   r   r   rO   rx   rw   r   r   r   r   r   r;   r   r   r4   r   r   r   r   r   r   r   r9   r   r   rU   r   r)  r3  rA  rG  r~  rK   r  r  r  r  r  r<   r  r  r  r  ru   r  r   r]   r@   r   r      s   $N (-T'BH$BGZ!J
!Hj+0+FN(F*/*EM'Et s  #
D
  7C3B #4e<J
<&+Du&EE27SX2YY*/5*QQHM^binHoEo,1$U,SM)S#(E#JF J"'5"AOZA/4TPU/V,V6;Du6U3U %d GI~G"'5"AOZA16t%V[1\G.\5uMHiM T6J6Re4K4 &;%HL#H&=EJL#J#LuMG^M ).dU(SE%S! #Bg>$D5L>
* >{1{ %{ 6	{
 
{z%8,%8Nf%8	%8N!
!
&>,i
V$
L&DLr]   r   c                    t        t              j                  dz  }t        j                  | j
                  |d      }|j                  S )Nzmodel_card_template.mdu   🤗)	card_datatemplate_pathhf_emoji)r
   __file__parentr   from_templater:   content)r9   r  
model_cards      r@   generate_model_cardr     s?    N)),DDM((53H3HXepvwJr]   r  )r9   r,   r   r   )Q
__future__r   rp  loggingr  rH  collectionsr   r   r   dataclassesr   r   r	   pathlibr
   platformr   textwrapr   typingr   r   r   r   r   huggingface_hubr   r   r   r   r   r  huggingface_hub.repocard_datar   r   huggingface_hub.utilsr   r   tqdm.autonotebookr   r   transformers.integrationsr   transformers.modelcardr   transformers.trainer_callbackr   r   r   r    r   sentence_transformers.modelsr!   r"   #sentence_transformers.training_argsr#   sentence_transformers.utilr$   r%   r&   r   r'   r(   r)   r*   	getLoggerr   r   2sentence_transformers.evaluation.SentenceEvaluatorr+   )sentence_transformers.SentenceTransformerr,   sentence_transformers.trainerr-   r/   r  r  r   r   r   r   r]   r@   <module>r     s    "    	 ,  0 0  #  . .   / < 8 Q +  " ( 8 6 F N E T _ _EE			8	$TMHU Up ;, Lx L L^r]   