
     sg$                        d dl mZ d dlZd dlmZmZmZ d dlZd dlmZ  G d dej                        Z
 G d de
      Z G d	 d
e
      Zy)    )annotationsN)ListOptionalAny)r   c                  ~    e Zd Zej                  	 d	 	 	 	 	 	 	 dd       Zej                  	 	 d	 	 	 	 	 	 	 dd       Zy)BaseLlamaTokenizerc                    t         )zTokenize the text into tokens.

        Args:
            text: The utf-8 encoded string to tokenize.
            add_bos: Whether to add a beginning of sequence token.
            special: Whether to tokenize special tokens.
        NotImplementedErrorselftextadd_bosspecials       L/var/www/html/venv/lib/python3.12/site-packages/llama_cpp/llama_tokenizer.pytokenizezBaseLlamaTokenizer.tokenize   s
     "!    Nc                    t         )a  Detokenize the tokens into text.

        Args:
            tokens: The list of tokens to detokenize.
            prev_tokens: The list of previous tokens. Offset mapping will be performed if provided.
            special: Whether to detokenize special tokens.
        r
   r   tokensprev_tokensr   s       r   
detokenizezBaseLlamaTokenizer.detokenize   s
     "!r   TTr   bytesr   boolr   r   return	List[int]NFr   r   r   zOptional[List[int]]r   r   r   r   )__name__
__module____qualname__abcabstractmethodr   r    r   r   r   r      s    AE
"
"$(
":>
"	
" 
" 	 ,0	"" )" 	"
 
" "r   r   c                  ~    e Zd ZddZ	 d		 	 	 	 	 	 	 d
dZ	 	 d	 	 	 	 	 	 	 ddZ	 d		 	 	 	 	 	 	 ddZddZedd       Z	y)LlamaTokenizerc                &    |j                   | _         y N)_model)r   llamas     r   __init__zLlamaTokenizer.__init__.   s    llr   c                >    | j                   j                  |||      S )Nr   r   )r+   r   r   s       r   r   zLlamaTokenizer.tokenize1   s      {{##D'7#KKr   Nc                <    | j                   j                  ||      S )N)r   )r+   r   r   s       r   r   zLlamaTokenizer.detokenize6   s     {{%%fg%>>r   c                L    | j                  |j                  dd      ||      S )Nutf-8ignoreerrorsr/   )r   encoder   s       r   r6   zLlamaTokenizer.encode>   s.     }}KKK17G  
 	
r   c                F    | j                  |      j                  dd      S )Nr2   r3   r4   )r   decode)r   r   s     r   r8   zLlamaTokenizer.decodeE   s!    v&--gh-GGr   c                <     | t        j                  |d            S )NT)
model_path
vocab_only)	llama_cppLlama)clspaths     r   from_ggml_filezLlamaTokenizer.from_ggml_fileH   s    9??dtDEEr   )r,   zllama_cpp.Llamar   r   r   r    )r   strr   r   r   r   r   r   )r   r   r   rA   )r?   rA   r   z'LlamaTokenizer')
r!   r"   r#   r-   r   r   r6   r8   classmethodr@   r&   r   r   r(   r(   -   s    # BFLL$(L:>L	L ,0	?? )? 	?
 
? @D

"&
8<
	
H F Fr   r(   c                  \    e Zd ZddZ	 d	 	 	 	 	 	 	 ddZ	 	 d		 	 	 	 	 	 	 d
dZedd       Zy)LlamaHFTokenizerc                    || _         y r*   )hf_tokenizer)r   rF   s     r   r-   zLlamaHFTokenizer.__init__N   s
    (r   c                ^    | j                   j                  |j                  dd      |      S )Nr2   r3   r4   )add_special_tokens)rF   r6   r8   r   s       r   r   zLlamaHFTokenizer.tokenizeQ   s4       ''KKK1g ( 
 	
r   Nc                B   | }|m| j                   j                  ||z   |      j                  dd      }| j                   j                  ||      j                  dd      }|t        |      d  S | j                   j                  ||      j                  dd      S )N)skip_special_tokensr2   r3   r4   )rF   r8   r6   len)r   r   r   r   rJ   r   	prev_texts          r   r   zLlamaHFTokenizer.detokenizeX   s     #*k"$$++f$:M , fWXf.  ))001D 1 fWXf.  I())$$++,? , fWXf./r   c                t    	 ddl m} |j                  |      } | |      S # t        $ r t        d      w xY w)Nr   )AutoTokenizerzsThe `transformers` library is required to use the `HFTokenizer`.You can install it with `pip install transformers`.)pretrained_model_name_or_path)transformersrN   ImportErrorfrom_pretrained)r>   rO   rN   rF   s       r   rR   z LlamaHFTokenizer.from_pretrainedl   sU    	2 %44*G 5 
 <    	F 	s   " 7)rF   r   r   r   r   r    )rO   rA   r   z'LlamaHFTokenizer')r!   r"   r#   r-   r   r   rB   rR   r&   r   r   rD   rD   M   sz    ) BF

$(
:>
	
 ,0	// )/ 	/
 
/( ! !r   rD   )
__future__r   r$   typingr   r   r   r<   llama_cpp.llama_typesABCr   r(   rD   r&   r   r   <module>rW      sJ    " 
   &" ">F' F@+!) +!r   