
    sg                         d dl Z d dlmZmZmZ d dlZd dlmZ d dl	m
Z
 ddlmZ ddlmZ  G d	 d
ej                  j                         Zy)    N)DictListUnion)BytePairTokenizer)pad_model_inputs   )keras   )GPT2Tokenizerc            	            e Zd ZdZddeeef   dee   dedef fdZe	de
fd       Ze	d	eeej                  f   fd
       Ze	d        Zd ZddefdZ xZS )TFGPT2Tokenizera7  
    This is an in-graph tokenizer for GPT2. It should be initialized similarly to other tokenizers, using the
    `from_pretrained()` method. It can also be initialized with the `from_tokenizer()` method, which imports settings
    from an existing standard tokenizer object.

    In-graph tokenizers, unlike other Hugging Face tokenizers, are actually Keras layers and are designed to be run
    when the model is called, rather than during preprocessing. As a result, they have somewhat more limited options
    than standard tokenizer classes. They are most useful when you want to create an end-to-end model that goes
    straight from `tf.string` inputs to outputs.

    Args:
        vocab (Dict[str, int]): Vocabulary dict for Byte Pair Tokenizer
        merges (List[str]): Merges list for Byte Pair Tokenizer
    vocabmerges
max_lengthpad_token_idc                     t         |           || _        || _        || _        || _        t        |||      | _        y )N)sequence_length)super__init__r   r   r   r   r   tf_tokenizer)selfr   r   r   r   	__class__s        `/var/www/html/venv/lib/python3.12/site-packages/transformers/models/gpt2/tokenization_gpt2_tf.pyr   zTFGPT2Tokenizer.__init__   s<    ($
-eVZX    	tokenizerc                     |j                   j                         D cg c]  }dj                  |       }}|j                         } | ||g|i |S c c}w )ag  Creates TFGPT2Tokenizer from GPT2Tokenizer

        Args:
            tokenizer (GPT2Tokenizer)

        Examples:

        ```python
        from transformers import AutoTokenizer, TFGPT2Tokenizer

        tokenizer = AutoTokenizer.from_pretrained("openai-community/gpt2")
        tf_tokenizer = TFGPT2Tokenizer.from_tokenizer(tokenizer)
        ```
         )	bpe_rankskeysjoin	get_vocab)clsr   argskwargsmr   r   s          r   from_tokenizerzTFGPT2Tokenizer.from_tokenizer$   sY      (1':':'?'?'AB!#((1+BB##%5&242622 Cs   Apretrained_model_name_or_pathc                 `    t        j                  |g|i |} | j                  |g|i |S )a_  Creates TFGPT2Tokenizer from pretrained GPT2Tokenizer

        Args:
            pretrained_model_name_or_path (Union[str, os.PathLike]): Path to pretrained model

        Examples:

        ```python
        from transformers import TFGPT2Tokenizer

        tf_tokenizer = TFGPT2Tokenizer.from_pretrained("openai-community/gpt2")
        ```
        )r   from_pretrainedr&   )r"   r'   init_inputsr$   r   s        r   r)   zTFGPT2Tokenizer.from_pretrained8   s>     "112OhR]hagh	!s!!)DkDVDDr   c                      | di |S )zCreates TFGPT2Tokenizer from configurations

        Args:
            config (Dict): Dictionary with keys such as stated in `get_config`.
         r,   )r"   configs     r   from_configzTFGPT2Tokenizer.from_configJ   s     }V}r   c                 `    | j                   | j                  | j                  | j                  dS )Nr   r   r   r   r0   )r   s    r   
get_configzTFGPT2Tokenizer.get_configS   s*    ZZkk// --	
 	
r   c                     | j                  |      }t        j                  |      }| j                  -||n| j                  }|t        ||| j                        \  }}||dS )N)max_seq_length	pad_value)attention_mask	input_ids)r   tf	ones_liker   r   r   )r   xr   r6   r5   s        r   callzTFGPT2Tokenizer.call[   sk    %%a(	i0('1'=4??J%,<jDDUDU-)	> #1yIIr   )NN)N)__name__
__module____qualname____doc__r   strintr   r   classmethodr   r&   r   osPathLiker)   r.   r1   r:   __classcell__)r   s   @r   r   r      s    Yd38n Yd3i YS Ygj Y 3} 3 3& EE#r{{BR<S E E"  
J# Jr   r   )rB   typingr   r   r   
tensorflowr7   keras_nlp.tokenizersr   tensorflow_textr   modeling_tf_utilsr	   tokenization_gpt2r   layersLayerr   r,   r   r   <module>rM      s7    	 $ $  2 , & ,\Jell(( \Jr   