
    +sg                    ^    d dl mZ d dlZd dlZd dlZddlmZ  G d d      Z G d de      Zy)	    )annotationsN   )InputExamplec                  D    e Zd ZdZddddej
                  dddfdZdd	Zy
)STSDataReadera1  Reads in the STS dataset. Each line contains two sentences (s1_col_idx, s2_col_idx) and one label (score_col_idx)

    Default values expects a tab separated file with the first & second column the sentence pair and third column the score (0...1). Default config normalizes scores from 0...5 to 0...1
    r   r      	T   c
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        y )N)	dataset_folderscore_col_idx
s1_col_idx
s2_col_idx	delimiterquotingnormalize_scores	min_score	max_score)
selfr   r   r   r   r   r   r   r   r   s
             ^/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/readers/STSDataReader.py__init__zSTSDataReader.__init__   sF     -*$$" 0""    c           
        t         j                  j                  | j                  |      }|j	                  d      rt        j                  |dd      nt        |d      5 }t        j                  || j                  | j                        }g }t        |      D ]  \  }}t        || j                           }	| j                  r)|	| j                  z
  | j                   | j                  z
  z  }	|| j"                     }
|| j$                     }|j'                  t)        |t+        |      z   |
|g|	             |dkD  st-        |      |k\  s n d	d	d	       |S # 1 sw Y   S xY w)
zJfilename specified which data split to use (train.csv, dev.csv, test.csv).z.gzrtutf8)encodingzutf-8)r   r   )guidtextslabelr   N)ospathjoinr   endswithgzipopencsvreaderr   r   	enumeratefloatr   r   r   r   r   r   appendr   strlen)r   filenamemax_examplesfilepathfIndataexamplesidrowscores1s2s               r   get_exampleszSTSDataReader.get_examples&   s5   77<< 3 3X>   ' IIhv6h1	 69::cT^^T\\RDH$T? 
Cc$"4"456(("T^^38WXE))(SW2DRQSH\a bc!#H(E
	& '	& s   !CE=EEE#N)r   )__name__
__module____qualname____doc__r&   
QUOTE_NONEr   r8    r   r   r   r   
   s/     #,r   r   c                  H     e Zd ZdZddddej
                  dddf fd	Z xZS )	STSBenchmarkDataReaderzReader especially for the STS benchmark dataset. There, the sentences are in column 5 and 6, the score is in column 4.
    Scores are normalized from 0...5 to 0...1
    r
         r	   Tr   c
                6    t         
|   |||||||||		       y )N)	r   r   r   r   r   r   r   r   r   )superr   )r   r   r   r   r   r   r   r   r   r   	__class__s             r   r   zSTSBenchmarkDataReader.__init__D   s2     	)!!'- 	 
	
r   )r9   r:   r;   r<   r&   r=   r   __classcell__)rE   s   @r   r@   r@   ?   s0     
 
r   r@   )	
__future__r   r&   r$   r     r   r   r@   r>   r   r   <module>rI      s+    " 
  	 2 2j
] 
r   