
     sgm                    |   d dl mZ d dlZd dlZd dlmZmZmZmZm	Z	 d dl
mZmZ d dlmZ d dlZd dlmZ ddl ddlmZ dd	lmZ d dlmZ  G d
 d      Z G d d      Z G d d      Z G d d      Zd Ze G d d             Ze G d d             Z d dlmZm!Z!mZm"Z" d dlZd dlZ G d d      Z# G d d      Z$y)    )annotationsN)DictListTupleOptionalSequence)	dataclassfield)	ExitStack   )*)LlamaGrammar)suppress_stdout_stderrc                      e Zd ZdZdd	 	 	 	 	 d$dZd Zd Zd%dZd%dZd%d	Z	d%d
Z
d&dZd'dZd%dZd%dZd(dZd)dZd*dZd+dZd%dZd%dZd%dZd%dZd%dZd%dZd%dZd%dZd%dZd,dZd,dZd-dZd.d/dZd.d0d Z d1d!Z!e"d"        Z#y#)2
LlamaModelzIntermediate Python wrapper for a llama.cpp llama_model.
    NOTE: For stability it's recommended you use the Llama class instead.Tverbosec                   | _         | _        | _        t                _        d }t
        j                  j                  |      st        d|       t        |      5  t        j                   j                   j                  d       j                        }d d d        |t        d|       | _         fd} j                  j                  |       y # 1 sw Y   AxY w)NzModel path does not exist: )disableutf-8z Failed to load model from file: c                 l     j                   y t        j                   j                          d  _         y N)model	llama_cppllama_free_modelselfs   G/var/www/html/venv/lib/python3.12/site-packages/llama_cpp/_internals.py
free_modelz'LlamaModel.__init__.<locals>.free_model<   )    zz!&&tzz2DJ    )
path_modelparamsr   r   _exit_stackospathexists
ValueErrorr   r   llama_load_model_from_fileencoder   callback)r   r"   r#   r   r   r   s   `     r   __init__zLlamaModel.__init__!   s     %$;ww~~j)::,GHH#G4 	88&&w/E	
 =?
|LMM
	 	!!*-!	 	s   !:CC$c                8    | j                   j                          y r   r$   closer   s    r   r/   zLlamaModel.closeD        r!   c                $    | j                          y r   r/   r   s    r   __del__zLlamaModel.__del__G       

r!   c                @    t        j                  | j                        S r   )r   llama_vocab_typer   r   s    r   
vocab_typezLlamaModel.vocab_typeJ       ))$**55r!   c                @    t        j                  | j                        S r   )r   llama_n_vocabr   r   s    r   n_vocabzLlamaModel.n_vocabM   s    &&tzz22r!   c                @    t        j                  | j                        S r   )r   llama_n_ctx_trainr   r   s    r   n_ctx_trainzLlamaModel.n_ctx_trainP   s    **4::66r!   c                @    t        j                  | j                        S r   )r   llama_n_embdr   r   s    r   n_embdzLlamaModel.n_embdS   s    %%djj11r!   c                @    t        j                  | j                        S r   )r   llama_rope_freq_scale_trainr   r   s    r   rope_freq_scale_trainz LlamaModel.rope_freq_scale_trainV   s    44TZZ@@r!   c                    t        j                  d      }t        j                  | j                  |d       |j
                  j                  d      S )N   r   )ctypescreate_string_bufferr   llama_model_descr   valuedecode)r   bufs     r   desczLlamaModel.descY   s=    ))$/""4::sD9yy((r!   c                @    t        j                  | j                        S r   )r   llama_model_sizer   r   s    r   sizezLlamaModel.size^   r8   r!   c                @    t        j                  | j                        S r   )r   llama_model_n_paramsr   r   s    r   n_paramszLlamaModel.n_paramsa   s    --djj99r!   c                `    t        j                  | j                  |j                  d            S Nr   )r   llama_get_model_tensorr   r*   )r   names     r   
get_tensorzLlamaModel.get_tensord   s"    //

DKK<PQQr!   c                `    t        j                  | j                  |      j                  d      S rU   )r   llama_token_get_textr   rK   r   tokens     r   token_get_textzLlamaModel.token_get_texti   s$    --djj%@GGPPr!   c                B    t        j                  | j                  |      S r   )r   llama_token_get_scorer   r[   s     r   token_get_scorezLlamaModel.token_get_scorel   s    ..tzz5AAr!   c                B    t        j                  | j                  |      S r   )r   llama_token_get_attrr   r[   s     r   token_get_attrzLlamaModel.token_get_attro   s    --djj%@@r!   c                @    t        j                  | j                        S r   )r   llama_token_bosr   r   s    r   	token_boszLlamaModel.token_bost       ((44r!   c                @    t        j                  | j                        S r   )r   llama_token_eosr   r   s    r   	token_eoszLlamaModel.token_eosw   rg   r!   c                @    t        j                  | j                        S r   )r   llama_token_clsr   r   s    r   	token_clszLlamaModel.token_clsz   rg   r!   c                @    t        j                  | j                        S r   )r   llama_token_sepr   r   s    r   	token_sepzLlamaModel.token_sep}   rg   r!   c                @    t        j                  | j                        S r   )r   llama_token_nlr   r   s    r   token_nlzLlamaModel.token_nl   s    ''

33r!   c                @    t        j                  | j                        S r   )r   llama_token_prefixr   r   s    r   token_prefixzLlamaModel.token_prefix       ++DJJ77r!   c                @    t        j                  | j                        S r   )r   llama_token_middler   r   s    r   token_middlezLlamaModel.token_middle   rw   r!   c                @    t        j                  | j                        S r   )r   llama_token_suffixr   r   s    r   token_suffixzLlamaModel.token_suffix   rw   r!   c                @    t        j                  | j                        S r   )r   llama_token_eotr   r   s    r   	token_eotzLlamaModel.token_eot   rg   r!   c                @    t        j                  | j                        S r   )r   llama_add_bos_tokenr   r   s    r   add_bos_tokenzLlamaModel.add_bos_token       ,,TZZ88r!   c                @    t        j                  | j                        S r   )r   llama_add_eos_tokenr   r   s    r   add_eos_tokenzLlamaModel.add_eos_token   r   r!   c           	        | j                         }t        j                  |z         }t        j                  | j                  |t        |      ||||      }|dk  rft        |      }t        j                  |z         }t        j                  | j                  |t        |      ||||      }|dk  rt        d| d|       t        |d |       S )Nr   zFailed to tokenize: text="z" n_tokens=)	r>   r   llama_tokenllama_tokenizer   lenabsRuntimeErrorlist)r   textadd_bosspecialn_ctxtokensn_tokenss          r   tokenizezLlamaModel.tokenize   s      "''%/2++JJc$i
 a<8}H++h69F //

D#d)VXwH !|"0k(L  F9H%&&r!   c                    t        j                  d      }t        j                  | j                  ||dd|       t        |      S )N    r   )rG   rH   r   llama_token_to_piecer   bytes)r   r\   r   rL   s       r   token_to_piecezLlamaModel.token_to_piece   s8    ))"-&&tzz5#r1gNSzr!   c           	     J   d}d}t        j                  |z         }|D ]Q  }t        j                  | j                  t        j
                  |      ||d|      }||k  sJ |t        |d |       z  }S t        |      dkD  r#|d   | j                         k(  r|dd dk(  r|dd  S |S )Nr!   r   r   r       )	rG   c_charr   r   r   r   r   r   rf   )r   r   r   outputrP   bufferr\   ns           r   
detokenizezLlamaModel.detokenize   s    --$&) 	(E..

I11%8&$7A 99eF2AJ''F	( 6{Q6!90@#@VAa[TXEX 12J	
 	
r!   c                   i }d}t        j                  |      }d|z  |_        t        t	        j
                  | j                              D ]  }t	        j                  | j                  |||      }||kD  r<|dz   }t        j                  |      }t	        j                  | j                  |||      }|j                  j                  d      }t	        j                  | j                  |||      }||kD  r<|dz   }t        j                  |      }t	        j                  | j                  |||      }|j                  j                  d      }|||<    |S )NrF       r   r   )
rG   rH   rJ   ranger   llama_model_meta_countr   llama_model_meta_key_by_indexrK   !llama_model_meta_val_str_by_index)r   metadatabuffer_sizer   inbyteskeyrJ   s           r   r   zLlamaModel.metadata   sA   #%,,[9{*y77

CD 	"A<<

Av{F #$qj44[A"@@JJ6; ,,%%g.C@@

Av{F #$qj44[A"DDJJ6; LL''0E!HSM+	", r!   c                 *    t        j                         S )z#Get the default llama_model_params.)r   llama_model_default_params r!   r   default_paramszLlamaModel.default_params   s     3355r!   N)r"   strr#   zllama_cpp.llama_model_paramsr   boolreturnint)r   float)r   r   )rW   r   r   zctypes.c_void_p)r\   r   r   r   )r\   r   r   r   )r\   r   r   r   )r   r   )r   r   r   r   r   r   )F)r\   r   r   r   r   r   )r   z	List[int]r   r   r   r   )r   zDict[str, str])$__name__
__module____qualname____doc__r,   r/   r3   r7   r;   r>   rA   rD   rM   rP   rS   rX   r]   r`   rc   rf   rj   rm   rp   rs   rv   rz   r}   r   r   r   r   r   r   r   staticmethodr   r   r!   r   r   r      s    M !. !. -	!.
 !.F!6372A)
6:R
QBA
55554888599
'$

&> 6 6r!   r   c                  r   e Zd ZdZdd	 	 	 	 	 d&dZd Zd Zd'dZd'dZd	 Z	d(d
Z
d)dZd*dZd+dZd'dZd,dZd-dZd Zd.dZd Zd/dZ	 	 	 	 	 	 	 	 	 	 	 	 d0dZd1dZd2dZd3dZd3dZ	 	 	 	 	 	 d3dZd4dZd5dZ	 	 	 	 	 	 	 	 	 	 	 	 d6dZ	 	 	 	 	 	 	 	 	 	 d7dZd8dZd8d Z d9d!Z!d" Z"d# Z#e$d$        Z%y%):LlamaContextzIntermediate Python wrapper for a llama.cpp llama_context.
    NOTE: For stability it's recommended you use the Llama class instead.Tr   c                   | _         | _        | _        t                _        t        j                   j                   j                    j                        }|t        d      | _         fd} j                  j                  |       y )NzFailed to create llama_contextc                 l     j                   y t        j                   j                          d  _         y r   )ctxr   
llama_freer   s   r   free_ctxz'LlamaContext.__init__.<locals>.free_ctx   s)    xx  *DHr!   )
r   r#   r   r   r$   r   llama_new_context_with_modelr(   r   r+   )r   r   r#   r   r   r   s   `     r   r,   zLlamaContext.__init__   sw     
$;44TZZ5E5Et{{S;=>>	 	!!(+r!   c                8    | j                   j                          y r   r.   r   s    r   r/   zLlamaContext.close  r0   r!   c                $    | j                          y r   r2   r   s    r   r3   zLlamaContext.__del__  r4   r!   c                @    t        j                  | j                        S r   )r   llama_n_ctxr   r   s    r   r   zLlamaContext.n_ctx  s    $$TXX..r!   c                @    t        j                  | j                        S r   )r   llama_pooling_typer   r   s    r   pooling_typezLlamaContext.pooling_type  s    ++DHH55r!   c                B    t        j                  | j                         y r   )r   llama_kv_cache_clearr   r   s    r   kv_cache_clearzLlamaContext.kv_cache_clear  s    &&txx0r!   c                H    t        j                  | j                  |||       y r   )r   llama_kv_cache_seq_rmr   )r   seq_idp0p1s       r   kv_cache_seq_rmzLlamaContext.kv_cache_seq_rm  s    ''&"bAr!   c                J    t        j                  | j                  ||||       y r   )r   llama_kv_cache_seq_cpr   )r   
seq_id_src
seq_id_dstr   r   s        r   kv_cache_seq_cpzLlamaContext.kv_cache_seq_cp  s    ''*j"bQr!   c                D    t        j                  | j                  |       y r   )r   llama_kv_cache_seq_keepr   )r   r   s     r   kv_cache_seq_keepzLlamaContext.kv_cache_seq_keep  s    ))$((F;r!   c                J    t        j                  | j                  ||||       y r   )r   llama_kv_cache_seq_addr   )r   r   r   r   shifts        r   kv_cache_seq_shiftzLlamaContext.kv_cache_seq_shift  s    ((62r5Ir!   c                @    t        j                  | j                        S r   )r   llama_get_state_sizer   r   s    r   get_state_sizezLlamaContext.get_state_size       --dhh77r!   c                ~    t        j                  | j                  |j                        }|dk7  rt	        d|       y )Nr   zllama_decode returned )r   llama_decoder   batchr   )r   r   return_codes      r   rK   zLlamaContext.decode+  sB    ,,HHKK
 !!7}EFF r!   c                F    t        j                  | j                  ||       y r   )r   llama_set_n_threadsr   )r   	n_threadsn_threads_batchs      r   set_n_threadszLlamaContext.set_n_threads3  s    %%dhh	?Kr!   c                @    t        j                  | j                        S r   )r   llama_get_logitsr   r   s    r   
get_logitszLlamaContext.get_logits6  s    ))$((33r!   c                B    t        j                  | j                  |      S r   )r   llama_get_logits_ithr   )r   r   s     r   get_logits_ithzLlamaContext.get_logits_ith9  s    --dhh::r!   c                @    t        j                  | j                        S r   )r   llama_get_embeddingsr   r   s    r   get_embeddingszLlamaContext.get_embeddings<  r   r!   c                D    t        j                  | j                  |       y r   )r   llama_set_rng_seedr   )r   seeds     r   set_rng_seedzLlamaContext.set_rng_seedA  s    $$TXXt4r!   c           	         t        j                  | j                  t        j                  |j                        |||||       y r   )r   !llama_sample_repetition_penaltiesr   byref
candidates)r   r   last_tokens_datapenalty_last_npenalty_repeatpenalty_freqpenalty_presents          r   sample_repetition_penaltiesz(LlamaContext.sample_repetition_penaltiesE  s:     	33HHOOJ112	
r!   c                ~    t        j                  | j                  t        j                  |j                               y r   )r   llama_sample_softmaxr   r   r   r   r   s     r   sample_softmaxzLlamaContext.sample_softmaxX  s)    &&HHOOJ112	
r!   c                    t        j                  | j                  t        j                  |j                        ||       y r   )r   llama_sample_top_kr   r   r   )r   r   kmin_keeps       r   sample_top_kzLlamaContext.sample_top_k^  ,    $$HHiooj&;&;<a	
r!   c                    t        j                  | j                  t        j                  |j                        ||       y r   )r   llama_sample_top_pr   r   r   r   r   pr  s       r   sample_top_pzLlamaContext.sample_top_pc  r  r!   c                    t        j                  | j                  t        j                  |j                        ||       y r   )r   llama_sample_min_pr   r   r   r  s       r   sample_min_pzLlamaContext.sample_min_ph  r  r!   c                    t        j                  | j                  t        j                  |j                        ||       y r   )r   llama_sample_typicalr   r   r   r  s       r   sample_typicalzLlamaContext.sample_typicalm  s.     	&&HHiooj&;&;<a	
r!   c                    t        j                  | j                  t        j                  |j                        |       y r   )r   llama_sample_tempr   r   r   )r   r   temps      r   sample_tempzLlamaContext.sample_tempt  s*    ##HHiooj&;&;<d	
r!   c                    t        j                  | j                  t        j                  |j                        |j
                         y r   )r   llama_sample_grammarr   r   r   grammar)r   r   r  s      r   sample_grammarzLlamaContext.sample_grammary  s0    &&HHOOJ112OO	
r!   c                    t        j                  | j                  t        j                  |j                        ||||      S r   )r   llama_sample_token_mirostatr   r   r   )r   r   tauetammus         r   sample_token_mirostatz"LlamaContext.sample_token_mirostat  s<     44HHOOJ112
 	
r!   c                    t        j                  | j                  t        j                  |j                        |||      S r   )r   llama_sample_token_mirostat_v2r   r   r   )r   r   r#  r$  r&  s        r   sample_token_mirostat_v2z%LlamaContext.sample_token_mirostat_v2  s9     77HHOOJ112
 	
r!   c                |    t        j                  | j                  t        j                  |j                              S r   )r   llama_sample_token_greedyr   r   r   r  s     r   sample_token_greedyz LlamaContext.sample_token_greedy  s.    22HHOOJ112
 	
r!   c                |    t        j                  | j                  t        j                  |j                              S r   )r   llama_sample_tokenr   r   r   r  s     r   sample_tokenzLlamaContext.sample_token  s.    ++HHOOJ112
 	
r!   c                Z    t        j                  |j                  | j                  |       y r   )r   llama_grammar_accept_tokenr  r   )r   r  r\   s      r   grammar_accept_tokenz!LlamaContext.grammar_accept_token  s    ,,W__dhhNr!   c                B    t        j                  | j                         y r   )r   llama_perf_context_resetr   r   s    r   reset_timingszLlamaContext.reset_timings      **4884r!   c                B    t        j                  | j                         y r   )r   llama_perf_context_printr   r   s    r   print_timingszLlamaContext.print_timings  r7  r!   c                 *    t        j                         S )z%Get the default llama_context_params.)r   llama_context_default_paramsr   r!   r   r   zLlamaContext.default_params  s     5577r!   N)r   r   r#   zllama_cpp.llama_context_paramsr   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   )r   r   r   r   r   r   r   r   )r   
LlamaBatch)r   r   r   r   )r   r   r   r   )r   '_LlamaTokenDataArray'r   z('llama_cpp.Array[llama_cpp.llama_token]'r   r   r   r   r  r   r  r   )r   r?  )r   r?  r
  r   r  r   )r   r?  r  r   r  r   )r   r?  r  r   )r   r?  r  r   )r   r?  r#  r   r$  r   r%  r   r&  ,llama_cpp.CtypesPointerOrRef[ctypes.c_float]r   r   )
r   r?  r#  r   r$  r   r&  r@  r   r   )r   r?  r   r   )r  r   r\   r   )&r   r   r   r   r,   r/   r3   r   r   r   r   r   r   r   r   rK   r   r   r   r   r   r  r  r  r  r  r  r  r   r'  r*  r-  r0  r3  r6  r:  r   r   r   r!   r   r   r      s   M , , /	,
 ,6!/61BR<J8GL4;8
5
*
 C
 	

 
 
 
&







0
5:
FI




*
 
 	

 
 9
 

"
*
 
 	

 9
 



O55 8 8r!   r   c                  R    e Zd Zdd	 	 	 	 	 	 	 ddZd Zd ZddZd ZddZdd	Z	y
)r=  Tr   c               .    | _         | _        | _        | _        t	                _        t        j                   j                    j                   j                        }|t        d      | _	         fd} j
                  j                  |       y )NzFailed to create llama_batchc                 l     j                   y t        j                   j                          d  _         y r   )r   r   llama_batch_freer   s   r   
free_batchz'LlamaBatch.__init__.<locals>.free_batch  r    r!   )	_n_tokensembd	n_seq_maxr   r   r$   r   llama_batch_initr(   r   r+   )r   r   rG  rH  r   r   rE  s   `      r   r,   zLlamaBatch.__init__  s~     "	"$;**4>>499dnnU=;<<
	 	!!*-r!   c                8    | j                   j                          y r   r.   r   s    r   r/   zLlamaBatch.close  r0   r!   c                $    | j                          y r   r2   r   s    r   r3   zLlamaBatch.__del__  r4   r!   c                .    | j                   j                  S r   r   r   r   s    r   r   zLlamaBatch.n_tokens  s    zz"""r!   c                &    d| j                   _        y )Nr   rM  r   s    r   resetzLlamaBatch.reset  s    

r!   c                   t        |      }|| j                  _        t        |      D ]  }||   | j                  j                  |<   ||z   | j                  j
                  |<   d| j                  j                  |   d<   d| j                  j                  |<   || j                  j                  |<    d| j                  j                  |dz
  <   y Nr   r   T	r   r   r   r   r\   posr   n_seq_idlogits)r   r   n_past
logits_allr   r   s         r   	set_batchzLlamaBatch.set_batch  s    u:&

x 	.A"'(DJJQ &
DJJNN1&'DJJa #%&DJJ"#-DJJa 	. +/

(Q,'r!   c                   t        |      }| j                  j                  }| j                  xj                  |z  c_        t        |      D ]  }||z   }||   | j                  j                  |<   || j                  j
                  |<   || j                  j                  |   d<   d| j                  j                  |<   || j                  j                  |<    d| j                  j                  |dz
  <   y rQ  rR  )r   r   r   rW  r   	n_tokens0r   js           r   add_sequencezLlamaBatch.add_sequence  s    u:JJ''	

x'x 	.AAA"'(DJJQ !DJJNN1&,DJJa #%&DJJ"#-DJJa 	. +/

(Q,'r!   N)r   r   rG  r   rH  r   r   r   r   )r   Sequence[int]rV  r   rW  r   )r   r]  r   r   rW  r   )
r   r   r   r,   r/   r3   r   rO  rX  r\  r   r!   r   r=  r=    sH    KO..&).69.DH.0!# 	//r!   r=  c                      e Zd ZddZddZy)LlamaTokenDataArrayc          	     p   || _         t        j                  | j                   ft        j                  dt        j                  fdt        j
                  fdt        j
                  fgd            | _        t        j                  | j                  j                  j                  t        j                        | j                   d      | _        t        j                  | j                   t        j                        | _        t        j                  | j                   t        j
                        | _        y )	Nidlogitr  T)aligndtypeF)datarP   sorted)r;   nprecarrayre  intcsinglecandidates_datar   llama_token_data_arrayrG   data_asllama_token_data_pr   arangedefault_candidates_data_idzerosdefault_candidates_data_p)r   r;   s     r   r,   zLlamaTokenDataArray.__init__  s    !{{\\O((7BII"6bii8HIQU 
 $::%%,,44Y5Q5QR

 +-))DLL*P')+$,,bii)P&r!   c                   | j                   | j                  j                  d d  || j                  j                  d d  | j                  | j                  j
                  d d  d| j                  _        | j                  | j                  _	        y )NF)
rq  rl  ra  rb  rs  r  r   rg  r;   rP   )r   rU  s     r   copy_logitszLlamaTokenDataArray.copy_logits  sl    %)%D%D"(.""1%$($B$Bq!!&#||r!   N)r;   r   )rU  znpt.NDArray[np.single])r   r   r   r,   ru  r   r!   r   r_  r_    s    Q ,r!   r_  c                    t        t        j                  j                  |             }|dk(  r| S | D cg c]  }||z  	 c}S c c}w )N        )r   rh  linalgnorm)	embeddingry  vs      r   normalize_embeddingr|    s>    	*+Ds{'(AH(((s   Ac                  4   e Zd ZU dZded<   dZded<   dZded<   dZd	ed
<   dZd	ed<   dZ	d	ed<   dZ
d	ed<   dZd	ed<   dZded<   dZd	ed<   dZd	ed<   dZd	ed<   dZded<   dZd	ed<   dZd	ed<   dZded<   dZd ed!<   dZd ed"<   dZd	ed#<    ee$      Zd%ed&<   y')(LlamaSamplingParams@   r   n_prevr   n_probs(   top_kgffffff?r   top_pg?min_pg      ?tfs_z	typical_pg?r  r   r   rw  r  r  mirostatg      @mirostat_taug?mirostat_etaTr   penalize_nl r   r  cfg_negative_prompt	cfg_scaledefault_factoryzdict[int, float]
logit_biasN)r   r   r   r  __annotations__r  r  r  r  r  r  r  r   r   r  r  r  r  r  r  r  r  r  r
   dictr  r   r!   r   r~  r~  !  s    FCGSE3OE5E5E5IuD%NCNEL%!OU!HcL%L%KGS!!Iu#(#>J >r!   r~  c                      e Zd ZU  ee      Zded<    eej                        Z	ded<   dZ
ded<    ee      Zd	ed
<    ee      Zded<   d Zd ZddZddZ	 	 d	 	 	 	 	 ddZddZy)LlamaSamplingContextr  r~  r#   zctypes.c_floatmirostat_muNzOptional[LlamaGrammar]r  z	list[int]prevz list[llama_cpp.llama_token_data]curc                n    g | _         g | _        | j                  | j                  j                          y y r   )r  r  r  rO  r   s    r   rO  zLlamaSamplingContext.resetE  s/    	<<#LL  $r!   c                    t        | j                  | j                  | j                  | j                  j                         | j                  j                               S )N)r#   r  r  r  r  )r  r#   r  r  r  copyr  r   s    r   cpzLlamaSamplingContext.cpK  sA    #;;((LL!
 	
r!   c                R    t        | j                        dkD  r| j                  d   S y )Nr   )r   r  r   s    r   lastzLlamaSamplingContext.lastT  s#    tyy>A99R= r!   c                r    |j                   j                  | j                  | d        j                  d      S rU   )r   r   r  rK   )r   ctx_mainr   s      r   prev_strzLlamaSamplingContext.prev_strZ  s.    ~~((A238??HHr!   c           	     	   |j                   j                         }d}|x|j                  |      }t        j                  t        j                  |t        j                  t
        j                  |z              j                  t        j                        }| j                  j                  j                         D ]  \  }}||xx   |z  cc<    t        |      }	|	j                  |       t!        | j"                        dkD  r|j                   j%                         }
||
   }| j"                  | j                  j&                   d  }t)        t!        |      | j                  j&                        }|dkD  rpt+        j,                  t!        |      z  | }|j/                  |	||| j                  j0                  | j                  j2                  | j                  j4                         | j                  j6                  s||	j8                  j:                  |
<   | j<                  |j?                  |	| j<                         | j                  j@                  dk  r,|jC                  |	       |	j8                  jD                  d   }|S | j                  j@                  dk(  r|jG                  |	      }|S | j                  jH                  dk(  rd}|jK                  |	| j                  j@                         |jM                  |	| j                  jN                  | j                  jP                  |t        jR                  | jT                              }|S | j                  jH                  dk(  r|jK                  |	| j                  j@                         |jW                  |	| j                  jN                  | j                  jP                  t        jR                  | jT                              }|S tY        d| j                  jZ                        }|j]                  |	| j                  j^                  |       |ja                  |	| j                  jb                  |       |je                  |	| j                  jf                  |       |ji                  |	| j                  jj                  |       |jK                  |	| j                  j@                         |jm                  |	      }|S )Nr   rd  )r;   r   d      )r  )7r   r;   r   rh  arrayrG   castPOINTERc_floatcontentsrk  r#   r  itemsr_  ru  r   r  rs   r   minr   r   r  r   r  r  r  rl  rb  r  r   r  r  ra  r-  r  r  r'  r  r  pointerr  r*  maxr  r  r  r  r  r  r  r  r  r0  )r   r  idxlogits_arrayr;   ra  rU  r\   r  token_data_arraynl_tokennl_logitlast_tokenslast_tokens_sizelast_tokens_p
mirostat_mr  s                    r   samplezLlamaSamplingContext.sample]  s    ..((*,,S1F88FFNN6>>G3K$LMVViiL "&!7!7!=!=!? 	.E::-	. /
 	$$\2 tyy>A~~..0H#H-H))T[[%?%?$?$ABK"3{#3T[[5O5OP!#!*!6!6[9I!IK X44$!$KK..KK,,KK// ;;**CK 0066x@<<###$4dllC;;a##$45!1144Q7BL 	K [["--.>?BH 	E {{##q( 
$$%5t{{7G7GH33$KK,,KK,,NN4#3#34> 	1 %%*$$%5t{{7G7GH66$KK,,KK,,NN4#3#34	, 	 q$++"5"56%%$dkk&7&7( &  ''$dkk&;&;h (  %%$dkk&7&7( &  %%$dkk&7&7( &  $$%5t{{7G7GH**+;<	r!   c                    |r(| j                   |j                  | j                   |       | j                  j                  |       y r   )r  r3  r  append)r   r  ra  apply_grammars       r   acceptzLlamaSamplingContext.accept  s4    T\\5))$,,;		r!   )r   zOptional[int])r  r   r   r   r   r   )r   N)r  r   r  r   r  z Optional[npt.NDArray[np.single]])r  r   ra  r   r  r   )r   r   r   r
   r~  r#   r  rG   r  r  r  r   r  r  rO  r  r  r  r  r  r   r!   r   r  r  <  s    "'8K"LFL"'"GKG&*G#*D1D)1,1$,GC	)G!
I 9=	WW W 7	Wrr!   r  )r   Callabler   Unionc                       e Zd Z	 	 ddZddZy)CustomSamplerc                :    | _         	 	 	 	 d fd}dd}t        j                         }t        j                  |      |_        | _        t        j                  d      |_        t        j                  d      |_	        t        j                  d      |_        t        j                  d      |_        t        j                  d      |_        t        j                           _        t%        j&                  |       j"                  _        d  j"                  _        y )Nc                (    j                  |       y r   )
apply_func)samplercur_pr   s     r   apply_wrapperz-CustomSampler.__init__.<locals>.apply_wrapper  s     OOE"r!   c                     y r   r   )r  s    r   free_wrapperz,CustomSampler.__init__.<locals>.free_wrapper  s    r!   r   )r  llama_cpp.llama_sampler_pr  z"llama_cpp.llama_token_data_array_pr  r  )r  r   llama_sampler_illama_sampler_i_applyapply_apply_wrapper_refllama_sampler_i_namerW   llama_sampler_i_acceptr  llama_sampler_i_resetrO  llama_sampler_i_cloneclonellama_sampler_i_freefreellama_samplerr  rG   r  ifacer   )r   r  r  r  	sampler_is   `    r   r,   zCustomSampler.__init__  s     %	#.	#5	#	 --/	#99-H	"/"77:	$;;A>	#99!<	#99!<	"77:	 ..0#^^I6r!   c                @    t        j                  | j                        S r   )rG   r  r  r   s    r   get_samplerzCustomSampler.get_sampler  s    ~~dll++r!   N)r  z9typing.Callable[[llama_cpp.llama_token_data_array], None])r   r  )r   r   r   r,   r  r   r!   r   r  r    s     S 8,r!   r  c                      e Zd Zd Zd ZddZd ZddZddZddZ	ddZ
dd	Zdd
ZddZddZddZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 d dZ	 	 d!dZd"dZd#dZd$dZd Zd Zy)%LlamaSamplerc                |    t        j                         }t        j                  |      | _        g | _        g | _        y r   )r   llama_sampler_chain_paramsllama_sampler_chain_initr  samplerscustom_samplers)r   r#   s     r   r,   zLlamaSampler.__init__  s1    557 99&A9;@Br!   c                N    t        j                         }| j                  |       y r   )r   llama_sampler_init_greedy_add_samplerr   r  s     r   
add_greedyzLlamaSampler.add_greedy  s    557'"r!   c                P    t        j                  |      }| j                  |       y r   )r   llama_sampler_init_distr  )r   r   r  s      r   add_distzLlamaSampler.add_dist       33D9'"r!   c                N    t        j                         }| j                  |       y r   )r   llama_sampler_init_softmaxr  r  s     r   add_softmaxzLlamaSampler.add_softmax  s    668'"r!   c                P    t        j                  |      }| j                  |       y r   )r   llama_sampler_init_top_kr  )r   r
  r  s      r   	add_top_kzLlamaSampler.add_top_k  s     44Q7'"r!   c                R    t        j                  ||      }| j                  |       y r   )r   llama_sampler_init_top_pr  r   r  r  r  s       r   	add_top_pzLlamaSampler.add_top_p  "    44QA'"r!   c                R    t        j                  ||      }| j                  |       y r   )r   llama_sampler_init_min_pr  r  s       r   	add_min_pzLlamaSampler.add_min_p  r  r!   c                R    t        j                  ||      }| j                  |       y r   )r   llama_sampler_init_typicalr  r  s       r   add_typicalzLlamaSampler.add_typical  s"    66q(C'"r!   c                P    t        j                  |      }| j                  |       y r   )r   llama_sampler_init_tempr  )r   r  r  s      r   add_tempzLlamaSampler.add_temp  r  r!   c                T    t        j                  |||      }| j                  |       y r   )r   llama_sampler_init_temp_extr  )r   tdeltaexponentr  s        r   add_temp_extzLlamaSampler.add_temp_ext	  s$    775(K'"r!   c                X    t        j                  |||||      }| j                  |       y r   )r   llama_sampler_init_mirostatr  )r   r;   r   r#  r$  r%  r  s          r   add_mirostatzLlamaSampler.add_mirostat  s)    77sCQRS'"r!   c                T    t        j                  |||      }| j                  |       y r   )r   llama_sampler_init_mirostat_v2r  )r   r   r#  r$  r  s        r   add_mirostat_v2zLlamaSampler.add_mirostat_v2  s$    ::4cJ'"r!   c                    t        j                  |j                  |j                  j	                  d      |j
                  j	                  d            }| j                  |       y rU   )r   llama_sampler_init_grammarr   _grammarr*   _rootr  )r   r   r  r  s       r   add_grammarzLlamaSampler.add_grammar  sN    66KK))0097==;O;OPW;X
 	'"r!   c
                `    t        j                  |||||||||		      }
| j                  |
       y r   )r   llama_sampler_init_penaltiesr  )r   r;   special_eos_idlinefeed_idr   r   r  r  r  
ignore_eosr  s              r   add_penaltieszLlamaSampler.add_penalties  s?     88

 	'"r!   c                T    t        j                  |||      }| j                  |       y r   )r   llama_sampler_init_logit_biasr  )r   r;   n_logit_biasr  r  s        r   init_logit_biaszLlamaSampler.init_logit_bias4  s+     99\:
 	'"r!   c                    t        |      }|j                         }| j                  |       | j                  j	                  t        j                  | j                        dz
  |f       y )Nr   )r  r  r  r  r  r   llama_sampler_chain_nr  )r   r  custom_samplerr  s       r   
add_customzLlamaSampler.add_custom<  sZ     'z2 ,,.'"##,,T\\:Q>O	
r!   c                    | j                   J t        j                  | j                   |       | j                  j	                  |       y r   )r  r   llama_sampler_chain_addr  r  r  s     r   r  zLlamaSampler._add_samplerG  s8    ||'''))$,,@W%r!   c                \    | j                   J t        j                  | j                         S r   )r  r   llama_sampler_get_seedr   s    r   get_seedzLlamaSampler.get_seedL  s'    ||'''//==r!   c                t    | j                   J t        j                  | j                   |j                  |      S r   )r  r   llama_sampler_sampler   )r   r   r  s      r   r  zLlamaSampler.sampleP  s/    ||'''--dllCGGSIIr!   c                J   | j                   rct        | j                        D ]%  \  }}t        j                  | j                   |       ' t        j
                  | j                          d | _         | j                  j                          | j                  j                          y r   )r  reversedr  r   llama_sampler_chain_removellama_sampler_freer  clear)r   r   _s      r   r/   zLlamaSampler.closeT  sx    << !5!56 F144T\\1EF((6DL""$r!   c                $    | j                          y r   r2   r   s    r   r3   zLlamaSampler.__del__^  r4   r!   Nr>  )r
  r   )r  r   r  r   )r  r   )r  r   r  r   r  r   )
r;   r   r   r   r#  r   r$  r   r%  r   )r   r   r#  r   r$  r   )r   r   r  r   )r;   r   r  r   r  r   r   r   r   r   r  r   r  r   r  r   r  r   )r;   r   r  zllama_cpp.llama_logit_bias_p)r  z2Callable[[llama_cpp.llama_token_data_array], None]r  r   )r   r   r  r   r   r   )r   r   r   r,   r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r  r  r"  r  r/   r3   r   r!   r   r  r    s    C############## # 	#
 # # # # # #2##6R#	
L	
&
>J%r!   r  )%
__future__r   r%   rG   typingr   r   r   r   r   dataclassesr	   r
   
contextlibr   numpyrh  numpy.typingnptllama_typesllama_grammarr   _utilsr   llama_cpp.llama_cppr   r   r   r=  r_  r|  r~  r  r  r  r  r  r   r!   r   <module>r7     s    " 	   )      ' * 'F6 F6RT8 T8n;/ ;/|, ,8) ? ? ?4 | | |~ 3 2  , ,B} }r!   