
    sgZ                        d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZmZ d	d
lmZmZmZ  e       rddlZ ej*                  e      ZdZ	 	 	 	 d/de
ee   df   dede	e   fdZ e       r:edfdej8                  j:                  dede	e   dej8                  j:                  fdZ	 	 	 	 d/dee   dede	e   fdZ G d de      Z  G d de      Z! G d de      Z" G d de"      Z# G d de      Z$ G d de      Z% G d  d!e      Z& G d" d#e      Z' G d$ d%e      Z( G d& d'e      Z) G d( d)e      Z*d*dd*d*d	d*d*d*d*d+	Z+e$e"e#e!e%e&e'e(e)e*d,
Z,d-d-d-d-d-d.d-d-d-d-d,
Z-y)0zGLUE processors and helpers    N)asdict)Enum)ListOptionalUnion   )PreTrainedTokenizer)is_tf_availablelogging   )DataProcessorInputExampleInputFeaturesu  This {0} will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-classification/run_glue.pyexamplesztf.data.Dataset	tokenizer
max_lengthc                    t        j                  t        j                  d      t               t               r@t        | t        j                  j                        r|t        d      t        | |||      S t        | |||||      S )a=  
    Loads a data file into a list of `InputFeatures`

    Args:
        examples: List of `InputExamples` or `tf.data.Dataset` containing the examples.
        tokenizer: Instance of a tokenizer that will tokenize the examples
        max_length: Maximum example length. Defaults to the tokenizer's max_len
        task: GLUE task
        label_list: List of labels. Can be obtained from the processor using the `processor.get_labels()` method
        output_mode: String indicating the output mode. Either `regression` or `classification`

    Returns:
        If the `examples` input is a `tf.data.Dataset`, will return a `tf.data.Dataset` containing the task-specific
        features. If the input is a list of `InputExamples`, will return a list of task-specific `InputFeatures` which
        can be fed to the model.

    functionzWWhen calling glue_convert_examples_to_features from TF, the task parameter is required.r   task)r   r   
label_listoutput_mode)warningswarnDEPRECATION_WARNINGformatFutureWarningr
   
isinstancetfdataDataset
ValueError%_tf_glue_convert_examples_to_features"_glue_convert_examples_to_features)r   r   r   r   r   r   s         T/var/www/html/venv/lib/python3.12/site-packages/transformers/data/processors/glue.py!glue_convert_examples_to_featuresr&   )   st    2 MM%,,Z8-HZ"''//B<vww4XyU_fjkk-)
*bm     returnc                 $  
 t        |          }| D cg c]"  }|j                  |j                  |            $ } }t        | |||      
|dk(  rt        j
                  nt        j                  }
fd}|j                  }t        j                  j                  j                  ||D 	ci c]  }	|	t        j                   c}	|f|D 	ci c]  }	|	t	        j                  dg       c}	t	        j                  g       f      S c c}w c c}	w c c}	w )zb
        Returns:
            A `tf.data.Dataset` containing the task-specific features.

        r   sts-bc               3      K   D ]H  } t        |       j                         D ci c]  \  }}|	|| }}}|j                  d      }||f J y c c}}w w)Nlabel)r   itemspop)exkvdr,   featuress        r%   genz2_tf_glue_convert_examples_to_features.<locals>.gen^   s\      !&,Rj&6&6&8JdaAMQTJJg%j !Js   "A
AA#AN)glue_processorstfds_mapget_example_from_tensor_dictr&   r   float32int64model_input_namesr    r!   from_generatorint32TensorShape)r   r   r   r   	processorexample
label_typer4   input_namesr0   r3   s             @r%   r#   r#   N   s     $D)+	gop\cI&&y'M'Mg'VWpp4XyU_fjk#'7?RZZ
	!  11ww--#./aak/<1<=Aa''=r~~b?QR
 	
 q 0=s   'D)D
	Dc                    ||j                   }|`t        |          }|+|j                         }t        j	                  d| d|        $t
        |   t        j	                  d d|        t        |      D ci c]  \  }}||
 c}}dt        dt        t        t        d f   ffd}	| D 
cg c]
  }
 |	|
       }}
 || D 
cg c]  }
|
j                  |
j                  f c}
|dd	      }g }t        t        |             D ];  }|D ci c]  }|||   |    }}t        di |d
||   i}|j!                  |       = t        | d d       D ]W  \  }}
t        j	                  d       t        j	                  d|
j"                          t        j	                  d||           Y |S c c}}w c c}
w c c}
w c c}w )NzUsing label list z
 for task zUsing output mode r?   r(   c                     | j                   y dk(  r| j                      S dk(  rt        | j                         S t              )Nclassification
regression)r,   floatKeyError)r?   	label_mapr   s    r%   label_from_examplez>_glue_convert_examples_to_features.<locals>.label_from_example   sJ    == **W]]++L(''{##r'   r   T)r   padding
truncationr,      z*** Example ***zguid: z
features:  )model_max_lengthr5   
get_labelsloggerinfoglue_output_modes	enumerater   r   intrF   text_atext_brangelenr   appendguid)r   r   r   r   r   r   r>   ir,   rI   r?   labelsbatch_encodingr3   r0   inputsfeaturerH   s        `           @r%   r$   r$   m   s    //
#D)+	"--/JKK+J<z$HI+D1KKK,[MD6JK*3J*?@ha@I$L $U3t;K5L $ :BBg )BFB9ABg'..'..	)B	N H3x=! !3ABa!^A&q))BB:&:q	: 	!  !- 0
7%&fW\\N+,j!./0
 OA A C 	C Cs   F67F<GGc                       e Zd ZdZdZy)
OutputModerD   rE   N)__name__
__module____qualname__rD   rE   rM   r'   r%   ra   ra      s    %NJr'   ra   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	MrpcProcessorz/Processor for the MRPC data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y Nr>   super__init__r   r   r   r   r   selfargskwargs	__class__s      r%   rk   zMrpcProcessor.__init__   /    $)&))00=}Mr'   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S See base class.idx	sentence1utf-8	sentence2r,   r   numpydecodestrrm   tensor_dicts     r%   r7   z*MrpcProcessor.get_example_from_tensor_dict   n    $$&$**,33G<$**,33G<G$**,-	
 	
r'   c                     t         j                  dt        j                  j	                  |d              | j                  | j                  t        j                  j	                  |d            d      S )rt   zLOOKING AT 	train.tsvtrain)rP   rQ   ospathjoin_create_examples	_read_tsvrm   data_dirs     r%   get_train_examplesz MrpcProcessor.get_train_examples   sQ    k"'',,x"E!FGH$$T^^BGGLL;4W%XZabbr'   c                     | j                  | j                  t        j                  j	                  |d            d      S rt   zdev.tsvdevr   r   r   r   r   r   s     r%   get_dev_exampleszMrpcProcessor.get_dev_examples   .    $$T^^BGGLL94U%VX]^^r'   c                     | j                  | j                  t        j                  j	                  |d            d      S rt   ztest.tsvtestr   r   s     r%   get_test_exampleszMrpcProcessor.get_test_examples   .    $$T^^BGGLL:4V%WY_``r'   c                 
    ddgS rt   01rM   rm   s    r%   rO   zMrpcProcessor.get_labels       Szr'   c           	          g }t        |      D ]F  \  }}|dk(  r| d| }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             H |S )5Creates examples for the training, dev and test sets.r   -r      r   NrZ   rU   rV   r,   rS   rY   r   
rm   linesset_typer   r[   linerZ   rU   rV   r,   s
             r%   r   zMrpcProcessor._create_examples   s~     ' 	`GAtAvZq$D!WF!WF$.DDGEOOLd6&X]^_	` r'   rb   rc   rd   __doc__rk   r7   r   r   r   rO   r   __classcell__rp   s   @r%   rf   rf      s-    9N
c
_ar'   rf   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	MnliProcessorz3Processor for the MultiNLI data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rh   ri   rl   s      r%   rk   zMnliProcessor.__init__   rq   r'   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S )rt   ru   premiserw   
hypothesisr,   ry   r}   s     r%   r7   z*MnliProcessor.get_example_from_tensor_dict   sn    $$&	"((*11':%++-44W=G$**,-	
 	
r'   c                     | j                  | j                  t        j                  j	                  |d            d      S rt   r   r   r   r   s     r%   r   z MnliProcessor.get_train_examples   .    $$T^^BGGLL;4W%XZabbr'   c                     | j                  | j                  t        j                  j	                  |d            d      S )rt   zdev_matched.tsvdev_matchedr   r   s     r%   r   zMnliProcessor.get_dev_examples   s/    $$T^^BGGLLK\4]%^`mnnr'   c                     | j                  | j                  t        j                  j	                  |d            d      S )rt   ztest_matched.tsvtest_matchedr   r   s     r%   r   zMnliProcessor.get_test_examples   s/    $$T^^BGGLLK]4^%_aoppr'   c                 
    g dS )rt   )contradiction
entailmentneutralrM   r   s    r%   rO   zMnliProcessor.get_labels   s    99r'   c           	          g }t        |      D ]U  \  }}|dk(  r| d|d    }|d   }|d   }|j                  d      rdn|d   }	|j                  t        ||||	             W |S )	r   r   r      	   r   Nr   )rS   
startswithrY   r   r   s
             r%   r   zMnliProcessor._create_examples   s     ' 	`GAtAvZqa	*D!WF!WF$//7DT"XEOOLd6&X]^_	` r'   r   r   s   @r%   r   r      s-    =N
coq:r'   r   c                   .     e Zd ZdZ fdZd Zd Z xZS )MnliMismatchedProcessorz>Processor for the MultiNLI Mismatched data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rh   ri   rl   s      r%   rk   z MnliMismatchedProcessor.__init__	  rq   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S )rt   zdev_mismatched.tsvdev_mismatchedr   r   s     r%   r   z(MnliMismatchedProcessor.get_dev_examples  s/    $$T^^BGGLLK_4`%acsttr'   c                     | j                  | j                  t        j                  j	                  |d            d      S )rt   ztest_mismatched.tsvtest_mismatchedr   r   s     r%   r   z)MnliMismatchedProcessor.get_test_examples  s/    $$T^^BGGLLK`4a%bduvvr'   )rb   rc   rd   r   rk   r   r   r   r   s   @r%   r   r     s    HNuwr'   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	ColaProcessorz/Processor for the CoLA data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rh   ri   rl   s      r%   rk   zColaProcessor.__init__  rq   r'   c           	          t        |d   j                         |d   j                         j                  d      dt        |d   j                                     S rt   ru   sentencerw   Nr,   ry   r}   s     r%   r7   z*ColaProcessor.get_example_from_tensor_dict  U    $$&
#))+227;G$**,-	
 	
r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   z ColaProcessor.get_train_examples&  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zColaProcessor.get_dev_examples*  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zColaProcessor.get_test_examples.  r   r'   c                 
    ddgS r   rM   r   s    r%   rO   zColaProcessor.get_labels2  r   r'   c           	          |dk(  }|r|dd }|rdnd}g }t        |      D ]8  \  }}| d| }||   }	|rdn|d   }
|j                  t        ||	d|
             : |S )r   r   r   Nr   r   r   r   )rm   r   r   	test_mode
text_indexr   r[   r   rZ   rU   r,   s              r%   r   zColaProcessor._create_examples6  s    &	!"IE#Q
 ' 	^GAtZq$D*%F%D47EOOLd6$V[\]		^
 r'   r   r   s   @r%   r   r     s-    9N
c_ar'   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	Sst2Processorz0Processor for the SST-2 data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rh   ri   rl   s      r%   rk   zSst2Processor.__init__H  rq   r'   c           	          t        |d   j                         |d   j                         j                  d      dt        |d   j                                     S r   ry   r}   s     r%   r7   z*Sst2Processor.get_example_from_tensor_dictL  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   z Sst2Processor.get_train_examplesU  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zSst2Processor.get_dev_examplesY  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zSst2Processor.get_test_examples]  r   r'   c                 
    ddgS r   rM   r   s    r%   rO   zSst2Processor.get_labelsa  r   r'   c           	          g }|dk(  rdnd}t        |      D ]A  \  }}|dk(  r| d| }||   }|dk(  rdn|d   }	|j                  t        ||d|	             C |S )r   r   r   r   r   Nr   r   )
rm   r   r   r   r   r[   r   rZ   rU   r,   s
             r%   r   zSst2Processor._create_examplese  s    "f,Q!
 ' 	^GAtAvZq$D*%F$.DDGEOOLd6$V[\]	^ r'   r   r   s   @r%   r   r   E  s-    :N
c_ar'   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	StsbProcessorz0Processor for the STS-B data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rh   ri   rl   s      r%   rk   zStsbProcessor.__init__v  rq   r'   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S rs   ry   r}   s     r%   r7   z*StsbProcessor.get_example_from_tensor_dictz  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   z StsbProcessor.get_train_examples  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zStsbProcessor.get_dev_examples  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zStsbProcessor.get_test_examples  r   r'   c                     dgS )rt   NrM   r   s    r%   rO   zStsbProcessor.get_labels  s	    vr'   c           	          g }t        |      D ]I  \  }}|dk(  r| d|d    }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             K |S )	r   r   r      r   r   Nr   r   r   r   s
             r%   r   zStsbProcessor._create_examples       ' 	`GAtAvZqa	*D!WF!WF$.DDHEOOLd6&X]^_	` r'   r   r   s   @r%   r   r   s  s-    :N
c_ar'   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	QqpProcessorz.Processor for the QQP data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rh   ri   rl   s      r%   rk   zQqpProcessor.__init__  rq   r'   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S )rt   ru   	question1rw   	question2r,   ry   r}   s     r%   r7   z)QqpProcessor.get_example_from_tensor_dict  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zQqpProcessor.get_train_examples  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zQqpProcessor.get_dev_examples  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zQqpProcessor.get_test_examples  r   r'   c                 
    ddgS r   rM   r   s    r%   rO   zQqpProcessor.get_labels  r   r'   c           	          |dk(  }|rdnd}|rdnd}g }t        |      D ]G  \  }}|dk(  r| d|d    }		 ||   }
||   }|rdn|d	   }|j                  t        |	|
||
             I |S # t        $ r Y Ww xY w)r   r   r   r      r   r   r   NrL   r   )rS   
IndexErrorrY   r   )rm   r   r   r   q1_indexq2_indexr   r[   r   rZ   rU   rV   r,   s                r%   r   zQqpProcessor._create_examples  s    &	!1q!1q ' 
	`GAtAvZqa	*Dhh )tAw OOLd6&X]^_
	`   s   A++	A76A7r   r   s   @r%   r   r     s-    8N
c_ar'   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	QnliProcessorz/Processor for the QNLI data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rh   ri   rl   s      r%   rk   zQnliProcessor.__init__  rq   r'   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S )rt   ru   questionrw   r   r,   ry   r}   s     r%   r7   z*QnliProcessor.get_example_from_tensor_dict  sn    $$&
#))+227;
#))+227;G$**,-	
 	
r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   z QnliProcessor.get_train_examples  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zQnliProcessor.get_dev_examples  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zQnliProcessor.get_test_examples  r   r'   c                 
    ddgS rt   r   not_entailmentrM   r   s    r%   rO   zQnliProcessor.get_labels      .//r'   c           	          g }t        |      D ]I  \  }}|dk(  r| d|d    }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             K |S 	r   r   r   r   r   r   Nr   r   r   r   s
             r%   r   zQnliProcessor._create_examples  r   r'   r   r   s   @r%   r   r     s-    9N
c_a0r'   r   c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	RteProcessorz.Processor for the RTE data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rh   ri   rl   s      r%   rk   zRteProcessor.__init__  rq   r'   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S rs   ry   r}   s     r%   r7   z)RteProcessor.get_example_from_tensor_dict
  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zRteProcessor.get_train_examples  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zRteProcessor.get_dev_examples  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zRteProcessor.get_test_examples  r   r'   c                 
    ddgS r  rM   r   s    r%   rO   zRteProcessor.get_labels  r  r'   c           	          g }t        |      D ]I  \  }}|dk(  r| d|d    }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             K |S r  r   r   s
             r%   r   zRteProcessor._create_examples#  r   r'   r   r   s   @r%   r  r    s-    8N
c_a0r'   r  c                   F     e Zd ZdZ fdZd Zd Zd Zd Zd Z	d Z
 xZS )	WnliProcessorz/Processor for the WNLI data set (GLUE version).c                 ~    t        |   |i | t        j                  t        j                  d      t               y rh   ri   rl   s      r%   rk   zWnliProcessor.__init__4  rq   r'   c           	          t        |d   j                         |d   j                         j                  d      |d   j                         j                  d      t        |d   j                                     S rs   ry   r}   s     r%   r7   z*WnliProcessor.get_example_from_tensor_dict8  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   z WnliProcessor.get_train_examplesA  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zWnliProcessor.get_dev_examplesE  r   r'   c                     | j                  | j                  t        j                  j	                  |d            d      S r   r   r   s     r%   r   zWnliProcessor.get_test_examplesI  r   r'   c                 
    ddgS r   rM   r   s    r%   rO   zWnliProcessor.get_labelsM  r   r'   c           	          g }t        |      D ]I  \  }}|dk(  r| d|d    }|d   }|d   }|dk(  rdn|d   }	|j                  t        ||||	             K |S r  r   r   s
             r%   r   zWnliProcessor._create_examplesQ  r   r'   r   r   s   @r%   r  r  1  s-    9N
c_ar'   r  r   )	colamnlimrpcsst-2r*   qqpqnlirtewnli)
r  r  zmnli-mmr  r  r*   r  r  r  r  rD   rE   )NNNN).r   r   r   dataclassesr   enumr   typingr   r   r   tokenization_utilsr	   utilsr
   r   r   r   r   
tensorflowr   
get_loggerrb   rP   r   rT   r&   r|   r    r!   r#   r$   ra   rf   r   r   r   r   r   r   r   r  r  glue_tasks_num_labelsr5   rR   rM   r'   r%   <module>r(     s    " 	    ( ( 5 - = = 			H	%m  !%	 D&(99: "   F 
 $(	
''//
&
 SM	

 

D !%	4< 4"4 4n 
,M ,^+M +\wm w ,M ,^+M +\+M +\1= 1h+M +\+= +\+M +^ 
  &  r'   