
    sg D                        d dl Z d dlZddlmZ ddlmZmZmZmZ ddl	m
Z
mZ  e       r
d dlZddlmZ  e       rddlmZ  ej$                  e      Z G d	 d
e j*                        Z e ed             G d de
             Z e ed             G d de             Z e ed             G d de             Zy)    N   )TruncationStrategy)add_end_docstringsis_tf_availableis_torch_availablelogging   )Pipelinebuild_pipeline_init_args)/TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES),MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMESc                       e Zd ZdZdZy)
ReturnTyper   r	   N)__name__
__module____qualname__TENSORSTEXT     ^/var/www/html/venv/lib/python3.12/site-packages/transformers/pipelines/text2text_generation.pyr   r      s    GDr   r   T)has_tokenizerc                        e Zd ZdZdZ fdZ	 	 	 	 	 	 ddZdededefdZd	 Z	 fd
Z
ej                  fdZd Zej                   dfdZ xZS )Text2TextGenerationPipelineaW  
    Pipeline for text to text generation using seq2seq models.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> generator = pipeline(model="mrm8488/t5-base-finetuned-question-generation-ap")
    >>> generator(
    ...     "answer: Manuel context: Manuel has created RuPERTa-base with the support of HF-Transformers and Google"
    ... )
    [{'generated_text': 'question: Who created the RuPERTa-base?'}]
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial). You can pass text
    generation parameters to this pipeline to control stopping criteria, decoding strategy, and more. Learn more about
    text generation parameters in [Text generation strategies](../generation_strategies) and [Text
    generation](text_generation).

    This Text2TextGenerationPipeline pipeline can currently be loaded from [`pipeline`] using the following task
    identifier: `"text2text-generation"`.

    The models that this pipeline can use are models that have been fine-tuned on a translation task. See the
    up-to-date list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=text2text-generation). For a list of available
    parameters, see the [following
    documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

    Usage:

    ```python
    text2text_generator = pipeline("text2text-generation")
    text2text_generator("question: What is 42 ? context: 42 is the answer to life, the universe and everything")
    ```	generatedc                     t        |   |i | | j                  | j                  dk(  rt               y t
               y )Ntf)super__init__check_model_type	frameworkr   r   selfargskwargs	__class__s      r   r   z$Text2TextGenerationPipeline.__init__B   s>    $)&)~~% <	
 >	
r   c                 "   i }|||d<   |}	i }
|$|"|rt         j                  nt         j                  }|||
d<   |||
d<   |H| j                  j	                  |d      }t        |      dkD  rt        j                  d       |d   |d	<   ||	|
fS )
N
truncationreturn_typeclean_up_tokenization_spacesF)add_special_tokensr	   zStopping on a multiple token sequence is not yet supported on transformers. The first token of the stop sequence will be used as the stop sequence string in the interim.r   eos_token_id)r   r   r   	tokenizerencodelenwarningswarn)r#   return_tensorsreturn_textr)   r*   r(   stop_sequencegenerate_kwargspreprocess_paramsforward_paramspostprocess_paramsstop_sequence_idss               r   _sanitize_parametersz0Text2TextGenerationPipeline._sanitize_parametersK   s     !.8l+(%+*=0>*,,JOOK"0;}-'3A]=>$ $ 5 5mX] 5 ^$%)b /@.BON+ .2DDDr   input_length
min_length
max_lengthc                      y)j
        Checks whether there might be something wrong with given input with regard to the model.
        Tr   r#   r;   r<   r=   s       r   check_inputsz(Text2TextGenerationPipeline.check_inputso   s     r   c                   | j                   | j                   nd}t        |d   t              r;| j                  j                  t        d      |d   D cg c]  }||z   	 c}f}d}n1t        |d   t              r||d   z   f}d}nt        d|d    d       | j                  |||| j                  d}d	|v r|d	= |S c c}w )
N r   zOPlease make sure that the tokenizer has a pad_token_id when using a batch inputTFz `args[0]`: zI have the wrong format. The should be either of type `str` or type `list`)paddingr(   r2   token_type_ids)prefix
isinstancelistr-   pad_token_id
ValueErrorstrr!   )r#   r(   r$   rF   argrD   inputss          r   _parse_and_tokenizez/Text2TextGenerationPipeline._parse_and_tokenizeu   s     $ 7Rd1gt$~~**2 !rss-1!W5cVc\57DGQ%T!W$&DGtAwi'pq   w:^b^l^lmv%'( 6s   Cc                     t        |   |i |}t        |d   t              r:t	        d |d   D              r%t	        d |D              r|D cg c]  }|d   	 c}S |S c c}w )a  
        Generate the output text(s) using text(s) given as inputs.

        Args:
            args (`str` or `List[str]`):
                Input text for the encoder.
            return_tensors (`bool`, *optional*, defaults to `False`):
                Whether or not to include the tensors of predictions (as token indices) in the outputs.
            return_text (`bool`, *optional*, defaults to `True`):
                Whether or not to include the decoded texts in the outputs.
            clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
                Whether or not to clean up the potential extra spaces in the text output.
            truncation (`TruncationStrategy`, *optional*, defaults to `TruncationStrategy.DO_NOT_TRUNCATE`):
                The truncation strategy for the tokenization within the pipeline. `TruncationStrategy.DO_NOT_TRUNCATE`
                (default) will never truncate, but it is sometimes desirable to truncate the input to fit the model's
                max_length instead of throwing an error down the line.
            generate_kwargs:
                Additional keyword arguments to pass along to the generate method of the model (see the generate method
                corresponding to your framework [here](./text_generation)).

        Return:
            A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

            - **generated_text** (`str`, present when `return_text=True`) -- The generated text.
            - **generated_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The token
              ids of the generated text.
        r   c              3   <   K   | ]  }t        |t                y w)N)rG   rK   ).0els     r   	<genexpr>z7Text2TextGenerationPipeline.__call__.<locals>.<genexpr>   s     :BJr3':s   c              3   8   K   | ]  }t        |      d k(    yw)r	   N)r/   )rQ   ress     r   rS   z7Text2TextGenerationPipeline.__call__.<locals>.<genexpr>   s     4cCHM4s   )r   __call__rG   rH   all)r#   r$   r%   resultrU   r&   s        r   rV   z$Text2TextGenerationPipeline.__call__   sf    : !4262tAw%:$q'::4V44&,-sCF-- .s   A c                 0     | j                   |fd|i|}|S )Nr(   )rN   )r#   rM   r(   r%   s       r   
preprocessz&Text2TextGenerationPipeline.preprocess   s#    )))&RZR6Rr   c                    | j                   dk(  r|d   j                  \  }}n8| j                   dk(  r)t        j                  |d         j                         \  }}| j	                  |j                  d| j                  j                        |j                  d| j                  j                               d|vr| j                  |d<    | j                  j                  d
i ||}|j                  d   }| j                   dk(  r( |j                  ||z  g|j                  dd   }d	|iS | j                   dk(  r+t        j                  |||z  g|j                  dd        }d	|iS )Npt	input_idsr   r<   r=   generation_configr   r	   
output_idsr   )r!   shaper   numpyrA   getr^   r<   r=   modelgeneratereshape)r#   model_inputsr5   in_br;   r_   out_bs          r   _forwardz$Text2TextGenerationPipeline._forward   sm   >>T!!-k!:!@!@D,^^t#!#,{*C!D!J!J!LD,d.D.D.O.OPd.D.D.O.OP	
 o5373I3IO/0(TZZ((K<K?K
  #>>T!+++D%4-W*BRBRSTSUBVWJ j)) ^^t#Ju}0\zGWGWXYXZG[0\]Jj))r   Fc                    g }|d   d   D ]x  }|t         j                  k(  r| j                   d|i}n@|t         j                  k(  r-| j                   d| j                  j                  |d|      i}|j                         z |S )Nr_   r   
_token_ids_textT)skip_special_tokensr*   )r   r   return_namer   r-   decodeappend)r#   model_outputsr)   r*   recordsr_   records          r   postprocessz'Text2TextGenerationPipeline.postprocess   s    '5a8 	#Jj000!--.j9:F
/''(.0E0E",05Q 1F 1 NN6"	# r   )NNNNNN)r   r   r   __doc__rn   r   r:   intrA   rN   rV   r   DO_NOT_TRUNCATErZ   ri   r   r   rt   __classcell__r&   s   @r   r   r      s|    "J K
 %)"EH # 3 *$L -?,N,N *0 6@__ch r   r   c                   >     e Zd ZdZdZ fdZdedededefdZ xZ	S )	SummarizationPipelinea  
    Summarize news articles and other documents.

    This summarizing pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"summarization"`.

    The models that this pipeline can use are models that have been fine-tuned on a summarization task, which is
    currently, '*bart-large-cnn*', '*google-t5/t5-small*', '*google-t5/t5-base*', '*google-t5/t5-large*', '*google-t5/t5-3b*', '*google-t5/t5-11b*'. See the up-to-date
    list of available models on [huggingface.co/models](https://huggingface.co/models?filter=summarization). For a list
    of available parameters, see the [following
    documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

    Usage:

    ```python
    # use bart in pytorch
    summarizer = pipeline("summarization")
    summarizer("An apple a day, keeps the doctor away", min_length=5, max_length=20)

    # use t5 in tf
    summarizer = pipeline("summarization", model="google-t5/t5-base", tokenizer="google-t5/t5-base", framework="tf")
    summarizer("An apple a day, keeps the doctor away", min_length=5, max_length=20)
    ```summaryc                 "    t        |   |i |S )a  
        Summarize the text(s) given as inputs.

        Args:
            documents (*str* or `List[str]`):
                One or several articles (or one list of articles) to summarize.
            return_text (`bool`, *optional*, defaults to `True`):
                Whether or not to include the decoded texts in the outputs
            return_tensors (`bool`, *optional*, defaults to `False`):
                Whether or not to include the tensors of predictions (as token indices) in the outputs.
            clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
                Whether or not to clean up the potential extra spaces in the text output.
            generate_kwargs:
                Additional keyword arguments to pass along to the generate method of the model (see the generate method
                corresponding to your framework [here](./text_generation)).

        Return:
            A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

            - **summary_text** (`str`, present when `return_text=True`) -- The summary of the corresponding input.
            - **summary_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The token
              ids of the summary.
        r   rV   r"   s      r   rV   zSummarizationPipeline.__call__   s    0 w000r   r;   r<   r=   returnc           	          ||k  rt         j                  d| d| d       ||k  r#t         j                  d| d| d|dz   d       y	y	)
r?   zYour min_length=z' must be inferior than your max_length=.zYour max_length is set to z , but your input_length is only z. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=r   )Nloggerwarningr@   s       r   rA   z"SummarizationPipeline.check_inputs  sq     
"NN-j\9`ak`llmno*$NN,ZL8XYeXf g^^jlm^m]nnoq %r   )
r   r   r   ru   rn   rV   rv   boolrA   rx   ry   s   @r   r{   r{      s6    2 K14 # 3 SW r   r{   c                   n     e Zd ZdZdZdededefdZej                  ddd fd	
Z	d fd
	Z
 fdZ xZS )TranslationPipelinea  
    Translates from one language to another.

    This translation pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"translation_xx_to_yy"`.

    The models that this pipeline can use are models that have been fine-tuned on a translation task. See the
    up-to-date list of available models on [huggingface.co/models](https://huggingface.co/models?filter=translation).
    For a list of available parameters, see the [following
    documentation](https://huggingface.co/docs/transformers/en/main_classes/text_generation#transformers.generation.GenerationMixin.generate)

    Usage:

    ```python
    en_fr_translator = pipeline("translation_en_to_fr")
    en_fr_translator("How old are you?")
    ```translationr;   r<   r=   c                 L    |d|z  kD  rt         j                  d| d| d       y)Ng?zYour input_length: z" is bigger than 0.9 * max_length: z`. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)Tr   r@   s       r   rA   z TranslationPipeline.check_inputs:  s=    #
**NN%l^3UV`Ua b^ ^ r   N)r(   src_langtgt_langc                    t        | j                  dd       r) | j                  j                  || j                  |||dS t	        |   |d|iS )N_build_translation_inputs)r2   r(   r   r   r(   )getattrr-   r   r!   r   rN   )r#   r(   r   r   r$   r&   s        r   rZ   zTranslationPipeline.preprocessB  sU    4>>#>E;4>>;;dnnV^iq  7.LLLr   c                     t        	|   di |\  }}}|||d<   |||d<   |O|M|j                  d| j                        }|j	                  d      }|rt        |      dk(  r|d   |d<   |d   |d<   |||fS )	Nr   r   task_   r	      r   )r   r:   rb   r   splitr/   )
r#   r   r   r%   r6   r7   r8   r   itemsr&   s
            r   r:   z(TranslationPipeline._sanitize_parametersJ  s    @E@\@f_e@f=>+=,4j),4j) 0::fdii0DJJsOEE
a05a!*-05a!*- .2DDDr   c                 "    t        |   |i |S )a  
        Translate the text(s) given as inputs.

        Args:
            args (`str` or `List[str]`):
                Texts to be translated.
            return_tensors (`bool`, *optional*, defaults to `False`):
                Whether or not to include the tensors of predictions (as token indices) in the outputs.
            return_text (`bool`, *optional*, defaults to `True`):
                Whether or not to include the decoded texts in the outputs.
            clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
                Whether or not to clean up the potential extra spaces in the text output.
            src_lang (`str`, *optional*):
                The language of the input. Might be required for multilingual models. Will not have any effect for
                single pair translation models
            tgt_lang (`str`, *optional*):
                The language of the desired output. Might be required for multilingual models. Will not have any effect
                for single pair translation models
            generate_kwargs:
                Additional keyword arguments to pass along to the generate method of the model (see the generate method
                corresponding to your framework [here](./text_generation)).

        Return:
            A list or a list of list of `dict`: Each result comes as a dictionary with the following keys:

            - **translation_text** (`str`, present when `return_text=True`) -- The translation.
            - **translation_token_ids** (`torch.Tensor` or `tf.Tensor`, present when `return_tensors=True`) -- The
              token ids of the translation.
        r~   r"   s      r   rV   zTranslationPipeline.__call__Z  s    < w000r   )NN)r   r   r   ru   rn   rv   rA   r   rw   rZ   r:   rV   rx   ry   s   @r   r   r   #  sP    &  K # 3  ,>+M+MX\gk ME 1 1r   r   )enumr0   tokenization_utilsr   utilsr   r   r   r   baser
   r   
tensorflowr   models.auto.modeling_tf_autor   models.auto.modeling_autor   
get_loggerr   r   Enumr   r   r{   r   r   r   r   <module>r      s      3 T T 4 ^X			H	% 
 ,4@A@( @ B@F ,4@AB7 B BBJ ,4@AT15 T1 BT1r   