
    sgt                     H    d dl mZ d dlmZ d dlmZ d dlmZmZ dede	fdZ
y)	    )Path)Any)TikTokenConverter)TIKTOKEN_VOCAB_FILETOKENIZER_FILEencoding
output_dirc                    t        |      }|j                  d       |dz  t        z  }|t        z  }t	        |j                               }t	        |j                               }	 ddlm} ddlm	} t        | t              r ||       }  || j                  |       t        || j                  | j                         j#                         }|j%                  |       y	# t        $ r t        d      w xY w)
a  
    Converts given `tiktoken` encoding to `PretrainedTokenizerFast` and saves the configuration of converted tokenizer
    on disk.

    Args:
        encoding (`str` or `tiktoken.Encoding`):
            Tokenizer from `tiktoken` library. If `encoding` is `str`, the tokenizer will be loaded with
            `tiktoken.get_encoding(encoding)`.
        output_dir (`str`):
            Save path for converted tokenizer configuration file.
    T)exist_oktiktokenr   )get_encoding)dump_tiktoken_bpezY`tiktoken` is required to save a `tiktoken` file. Install it with `pip install tiktoken`.)
vocab_filepatternadditional_special_tokensN)r   mkdirr   r   strabsoluter   r   tiktoken.loadr   
isinstance_mergeable_ranksImportError
ValueErrorr   _pat_str_special_tokens	tokenizersave)	r   r	   	save_filetokenizer_filesave_file_absoluteoutput_file_absoluter   r   r   s	            U/var/www/html/venv/lib/python3.12/site-packages/transformers/integrations/tiktoken.pyconvert_tiktoken_to_fastr#      s     j!Jd#Z'*==I.0NY//12~6689
)3h$#H-H(335GH "%x/@/@\d\t\tik  NN'(  
j
 	

s   &7C C4N)pathlibr   typingr   #transformers.convert_slow_tokenizerr   $transformers.tokenization_utils_fastr   r   r   r#        r"   <module>r*      s$      A T%)s %) %)r)   