
    sg0                         d dl Z d dlmZmZ d dlZddlmZ ddlm	Z	m
Z
 ddlmZmZmZ  e
j                  e      Z G d d	e      Z e	 ed
             G d de             Zy)    N)ListUnion   )TruncationStrategy)add_end_docstringslogging   )ArgumentHandlerChunkPipelinebuild_pipeline_init_argsc                       e Zd ZdZd Zd Zy)%ZeroShotClassificationArgumentHandlerz
    Handles arguments for zero-shot for text classification by turning each possible label into an NLI
    premise/hypothesis pair.
    c                     t        |t              r=|j                  d      D cg c]#  }|j                         s|j                         % }}|S c c}w )N,)
isinstancestrsplitstrip)selflabelslabels      b/var/www/html/venv/lib/python3.12/site-packages/transformers/pipelines/zero_shot_classification.py_parse_labelsz3ZeroShotClassificationArgumentHandler._parse_labels   sA    fc"17c1BTekkmekkmTFT Us
   AAc           
      \   t        |      dk(  st        |      dk(  rt        d      |j                  |d         |k(  rt        dj                  |            t        |t              r|g}g }|D ]2  }|j                  |D cg c]  }||j                  |      g c}       4 ||fS c c}w )Nr   z>You must include at least one label and at least one sequence.zThe provided hypothesis_template "{}" was not able to be formatted with the target labels. Make sure the passed template includes formatting syntax such as {{}} where the label should go.)len
ValueErrorformatr   r   extend)r   	sequencesr   hypothesis_templatesequence_pairssequencer   s          r   __call__z.ZeroShotClassificationArgumentHandler.__call__   s    v;!s9~2]^^%%fQi04GGw&,-	  i%"I! 	gH!!^d"eUZH.A.H.H.O#P"ef	g y(( #fs   B)
N)__name__
__module____qualname____doc__r   r#        r   r   r      s    

)r)   r   T)has_tokenizerc                        e Zd ZdZ e       f fd	Zed        Zddej                  fdZ
d Zdeeee   f   f fdZdd	Zd
 ZddZ xZS )ZeroShotClassificationPipelinea  
    NLI-based zero-shot classification pipeline using a `ModelForSequenceClassification` trained on NLI (natural
    language inference) tasks. Equivalent of `text-classification` pipelines, but these models don't require a
    hardcoded number of potential classes, they can be chosen at runtime. It usually means it's slower but it is
    **much** more flexible.

    Any combination of sequences and labels can be passed and each combination will be posed as a premise/hypothesis
    pair and passed to the pretrained model. Then, the logit for *entailment* is taken as the logit for the candidate
    label being valid. Any NLI model can be used, but the id of the *entailment* label must be included in the model
    config's :attr:*~transformers.PretrainedConfig.label2id*.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> oracle = pipeline(model="facebook/bart-large-mnli")
    >>> oracle(
    ...     "I have a problem with my iphone that needs to be resolved asap!!",
    ...     candidate_labels=["urgent", "not urgent", "phone", "tablet", "computer"],
    ... )
    {'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['urgent', 'phone', 'computer', 'not urgent', 'tablet'], 'scores': [0.504, 0.479, 0.013, 0.003, 0.002]}

    >>> oracle(
    ...     "I have a problem with my iphone that needs to be resolved asap!!",
    ...     candidate_labels=["english", "german"],
    ... )
    {'sequence': 'I have a problem with my iphone that needs to be resolved asap!!', 'labels': ['english', 'german'], 'scores': [0.814, 0.186]}
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This NLI pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"zero-shot-classification"`.

    The models that this pipeline can use are models that have been fine-tuned on an NLI task. See the up-to-date list
    of available models on [huggingface.co/models](https://huggingface.co/models?search=nli).
    c                 |    || _         t        |   |i | | j                  dk(  rt        j                  d       y y )NzFailed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.)_args_parsersuper__init__entailment_idloggerwarning)r   args_parserargskwargs	__class__s       r   r1   z'ZeroShotClassificationPipeline.__init__W   sB    '$)&)#NNk $r)   c                     | j                   j                  j                  j                         D ](  \  }}|j	                         j                  d      s&|c S  y)Nentailr.   )modelconfiglabel2iditemslower
startswith)r   r   inds      r   r2   z,ZeroShotClassificationPipeline.entailment_id`   sL    **++44::< 	JE3{{}''1
	 r)   Tc                    | j                   }| j                  j                  :t        j	                  d       | j                  j
                  | j                  _        	 | j                  |||||      }|S # t        $ r?}dt        |      v r%| j                  ||||t        j                        }n|Y d}~|S d}~ww xY w)ze
        Parse arguments and tokenize only_first so that hypothesis (label) is not truncated
        NzfTokenizer was not supporting padding necessary for zero-shot, attempting to use  `pad_token=eos_token`)add_special_tokensreturn_tensorspadding
truncationz	too short)
	framework	tokenizer	pad_tokenr3   error	eos_token	Exceptionr   r   DO_NOT_TRUNCATE)	r   r!   rE   rC   rF   r7   rD   inputses	            r   _parse_and_tokenizez2ZeroShotClassificationPipeline._parse_and_tokenizeg   s     >>##+LL) (,~~'?'?DNN$	^^#5-% $ F2 %  	c!f$ "'9#1#1AA (    %	s   A6 6	B>?4B99B>c                     |j                  dd       |d   |d<   t        j                  d       i }d|v r!| j                  j	                  |d         |d<   d|v r|d   |d<   i }d|v r|d   |d<   |i |fS )Nmulti_classmulti_labelzThe `multi_class` argument has been deprecated and renamed to `multi_label`. `multi_class` will be removed in a future version of Transformers.candidate_labelsr    )getr3   r4   r/   r   )r   r7   preprocess_paramspostprocess_paramss       r   _sanitize_parametersz3ZeroShotClassificationPipeline._sanitize_parameters   s    ::mT*6$*=$9F=!NNU '484E4E4S4STZ[mTn4o01 F*7=>S7T34F"06}0E}- "&888r)   r   c                     t        |      dk(  rn)t        |      dk(  rd|vr	|d   |d<   nt        d|       t        |   |fi |S )a  
        Classify the sequence(s) given as inputs. See the [`ZeroShotClassificationPipeline`] documentation for more
        information.

        Args:
            sequences (`str` or `List[str]`):
                The sequence(s) to classify, will be truncated if the model input is too large.
            candidate_labels (`str` or `List[str]`):
                The set of possible class labels to classify each sequence into. Can be a single label, a string of
                comma-separated labels, or a list of labels.
            hypothesis_template (`str`, *optional*, defaults to `"This example is {}."`):
                The template used to turn each label into an NLI-style hypothesis. This template must include a {} or
                similar syntax for the candidate label to be inserted into the template. For example, the default
                template is `"This example is {}."` With the candidate label `"sports"`, this would be fed into the
                model like `"<cls> sequence to classify <sep> This example is sports . <sep>"`. The default template
                works well in many cases, but it may be worthwhile to experiment with different templates depending on
                the task setting.
            multi_label (`bool`, *optional*, defaults to `False`):
                Whether or not multiple candidate labels can be true. If `False`, the scores are normalized such that
                the sum of the label likelihoods for each sequence is 1. If `True`, the labels are considered
                independent and probabilities are normalized for each candidate by doing a softmax of the entailment
                score vs. the contradiction score.

        Return:
            A `dict` or a list of `dict`: Each result comes as a dictionary with the following keys:

            - **sequence** (`str`) -- The sequence for which this is the output.
            - **labels** (`List[str]`) -- The labels sorted by order of likelihood.
            - **scores** (`List[float]`) -- The probabilities for each of the labels.
        r   r	   rT   z%Unable to understand extra arguments )r   r   r0   r#   )r   r   r6   r7   r8   s       r   r#   z'ZeroShotClassificationPipeline.__call__   s^    H t9>Y!^ 2& @)-aF%&DTFKLLw	4V44r)   c              #      K   | j                  |||      \  }}t        t        ||            D ]6  \  }\  }}| j                  |g      }	||d   |t	        |      dz
  k(  d|	 8 y w)Nr   r	   candidate_labelr"   is_last)r/   	enumerateziprP   r   )
r   rN   rT   r    r!   r   ir\   sequence_pairmodel_inputs
             r   
preprocessz)ZeroShotClassificationPipeline.preprocess   s     $($5$5f>NPc$d!	3<SAQSa=b3c 	/A/22M?CK $3%aL$4 5 99 	 	s   A&A(c                    |d   }|d   }| j                   j                  D ci c]  }|||   
 }}| j                  dk(  r| j                  j                  n| j                  j
                  }dt        j                  |      j                  j                         v rd|d<    | j                  di |}|||d   d|}|S c c}w )	Nr\   r"   pt	use_cacheFr]   r[   r(   )
rH   model_input_namesrG   r;   forwardcallinspect	signature
parameterskeys)	r   rN   r\   r"   kmodel_inputsmodel_forwardoutputsmodel_outputss	            r   _forwardz'ZeroShotClassificationPipeline._forward   s     !23*%.2nn.N.NO6!9OO.2nn.D

**$**//'++M:EEJJLL(-L%$**,|,  / i(
 	
  Ps   C c                    |D cg c]  }|d   	 }}|D cg c]  }|d   	 }}| j                   dk(  rCt        j                  |D cg c]#  }|d   j                         j	                         % c}      }n4t        j                  |D cg c]  }|d   j	                          c}      }|j
                  d   }t        |      }	||	z  }
|j                  |
|	df      }|st        |      dk(  r`| j                  }|dk(  rdnd}|d||gf   }t        j                  |      t        j                  |      j                  dd	
      z  }|d   }nM|d| j                  f   }t        j                  |      t        j                  |      j                  dd	
      z  }t        t        |d   j                                     }|d   |D cg c]  }||   	 c}|d|f   j                         dS c c}w c c}w c c}w c c}w c c}w )Nr\   r"   re   logitsr   r.   r	   .T)keepdims).r	   )r"   r   scores)rG   npconcatenatefloatnumpyshaper   reshaper2   expsumlistreversedargsorttolist)r   rr   rS   rq   rT   r   outputru   Nnnum_sequencesreshaped_outputsr2   contradiction_identail_contr_logitsrw   entail_logitstop_indsr`   s                      r   postprocessz*ZeroShotClassificationPipeline.postprocess   s   FST7G$56TT8EFWWZ(F	F>>T!^^Ta$b&VH%5%;%;%=%C%C%E$bcF^^M$Z&VH%5%;%;%=$Z[FLLO !Q!>>=!R*@A#./14 ..M%2a%7rQ"239I=8Y3Y"ZVV/0266:M3N3R3RSU`d3R3eeFF^F -S$2D2D-DEMVVM*RVVM-B-F-FrTX-F-YYF!2!2!456!!4<=q'*=Q[)002
 	
1 UF$b$Z* >s   G-G2(G7
G<H)NzThis example is {}.)F)r$   r%   r&   r'   r   r1   propertyr2   r   
ONLY_FIRSTrP   rX   r   r   r   r#   rc   rs   r   __classcell__)r8   s   @r   r,   r,   .   sp    %N $I#J    '+tPbPmPm(T9$+5d3i(+5Z$
r)   r,   )rj   typingr   r   r{   rx   tokenization_utilsr   utilsr   r   baser
   r   r   
get_loggerr$   r3   r   r,   r(   r)   r   <module>r      se       3 / J J 
		H	%)O )@ ,4@A]
] ]
 B]
r)   