
     sg                   @   d dl mZ d dlZd dlZd dlZd dlmZmZmZm	Z	m
Z
 d dlmZmZmZ e
rd dlmZmZmZmZmZmZmZ dZej.                  j1                  d      ZeG ej4                  ej6                  j9                  ej6                  j;                  e                  dz  n ej4                  e      Z eee      Z  ee       Z!d Z"d	Z#d
Z$dZ%dZ&dZ'dZ(dZ)dZ*dZ+dZ,dZ-dZ.dZ/dZ0dZ1dZ2dZ3dZ4dZ5dZ6dZ7dZ8dZ9d Z:d!Z;d"Z<d#Z=d$Z> ej~                  ej                  ej                  ej                  ej                        ZB ej~                  ej                  ej                        ZCg e j                  _E        ej                  e j                  _G        e j                         ZHd%ZId&ZJd'ZKd(ZLd)ZMeLZNdZOeMZPd
ZQ ed*eR      ZSej                  ZT ed+eR      ZUej                  ZVej                  ZXej                  ZY ej                  eY      Z[ej                  Z\d Z]	 d	Z^	 d
Z_	 dZ`	 d,Za	 d-Zb	 d Zcd	Zdd
ZedZfd,Zgd-ZhdZidZjdZkdZldZmdZndZodZpdZqdZrdZsdZtdZudZvdZwdZxdZydZzdZ{dZ|d Z}d!Z~d&Zd Zd
xZZd Zd	Zd
ZdZd,Zd-ZdZd Zd	Zd
Zd,ZdZdZd.Zd/Zd0Zd1Zd2Zd Zd	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZd Zd!Zd"Zd#Zd$Zd3Zd.Zd4Zd5Zd6Zd&Zd Zd	Zd
ZdZeZd&Zd Zd	Zd
ZdZd,Zd&Zd Zd	Zd Zd	Zd
Z G d7 d8ej                        Z ej                  eɫ      Z G d9 d:ej                        Z ej                  e˫      Z ej~                  ej                  ej                  ej                        Z G d; d<ej                        Zd Zd	Zd
ZdZ G d= d>ej                        Z G d? d@ej                        Z G dA dBej                        Z G dC dDej                        Z ej~                  dej                  ej                  ej                        Z	  G dE dFej                        Z G dG dHej                        Z ej                  eܫ      Z G dI dJej                        Z G dK dLej                        Zej                  Z ej                  ej                        Z e!dMg e֫      ddN       Z e!dOg e׫      ddP       Z e!dQg eޫ      ddR       Z e!dSg e۫      ddT       Z e!dUg d      dV        Zd Zd	Zd
ZdZd,Zd-Z e!dWej                  gd      ddX       Z e!dYg d      dZ        Z e!d[ej                  egeT      	 	 	 	 	 	 dd\       Z e!d]eTgd      dd^       Z e!d_eTegeV      	 	 	 	 	 	 dd`       Z e!daeVgd      ddb       Z e!dcg ej                        ddd       Z e!deg ej                        ddf       ZD e!dgg ej                        ddh       Z e!dig ej                        ddj       Z e!dkg ej                        ddl       Z e!dmg ej                        ddn       Z e!doeVgej                        ddp       Z e!dqeVgej                        ddr       Z e!dseVgej                        ddt       Z e!dueVgej                        ddv       Z e!dweTgej                        ddx       Z e!dyeTgej                        ddz       Z e!d{eTgej                        dd|       Z  e!d}eTgej                        dd~       Z e!deTgej                        dd       Z e!deVgeT      dd       Z e!deVgej                        dd       Z e!deTgej                        dd       Z e!deTgej                        dd       Z e!deTgej                        dd       Z e!deTej                  ej                  ej                  gej                        	 	 	 	 	 	 	 	 	 	 dd       Z e!deTgej                        dd       Z	 e!deTej                  ej                  ej                  gej                        	 	 	 	 	 	 	 	 	 	 dd       Z
 e!deTej                  ej                  ej                  gej                        	 	 	 	 	 	 	 	 	 	 dd       Z e!deTej                  ej                  gej                        	 	 	 	 	 	 	 	 dd       Z e!deTgej                        dd       Z e!deTgej                        dd       Z e!deTej                  gej                        	 	 	 	 	 	 dd       Z e!deTgej                        dd       Z e!deTgej                        dd       Z e!deTgej                        dd       Z e!deTgej                        dd       Z e!dej                  ej                   ej                  e۫      gej                        	 	 	 	 	 	 	 	 dd       Z e!deTej                  ge      	 	 	 	 	 	 dd       Z e!deVeej                  gej                        	 	 	 	 	 	 	 	 dd       Z e!deVegej                        	 	 	 	 	 	 dd       Z e!deVgd      dd       Z e!degd      dd       Z e!deV ej                  ej                        ej                  ej                  ej                  ej                  gej                        	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z G d dej                        Z G d dej                        Z ej                  e      Z e!deVej                  ge      	 	 	 	 	 	 dd       Z e!degd      dd       Z  e!deVegd      dd       Z! e!deVgej                        dd       Z" e!deVgej                        dd       Z# e!deVgd      dd       Z$ e!deVe\eXeXgej                        	 	 	 	 	 	 	 	 	 	 ddÄ       Z% e!deVe\e\eXeXgd      	 	 	 	 	 	 	 	 	 	 ddń       Z& e!deVe\gd      ddǄ       Z' e!deVe\eXeXeXgd      	 	 	 	 	 	 	 	 	 	 ddɄ       Z( e!deVe\eXeXej                  gd      	 	 	 	 	 	 	 	 	 	 dd˄       Z) e!deVgd      dd̈́       Z* e!deVgd      ddτ       Z+ e!deVgej                        ddф       Z, e!deVgej                        ddӄ       Z- e!deVgej                        ddՄ       Z. e!deV ej                  ej^                        ej                  gej                        	 	 	 	 	 	 	 	 ddׄ       Z0 e!deV ej                  ej^                        gej                        	 	 	 	 	 	 ddل       Z1 e!deV ej                  ej^                        ej                  gej                        	 	 	 	 	 	 	 	 ddۄ       Z2 e!deV ej                  ej^                        gej                        	 	 	 	 	 	 dd݄       Z3 e!deVej                  e[ej                   ej                  ej                        gej                        	 	 	 	 	 	 	 	 	 	 	 	 dd߄       Z4 e!deVej                  e[ej                   ej                  ej                        gej                        	 	 	 	 	 	 	 	 	 	 	 	 dd       Z5 e!deVej                  e[ej                  gej                        	 	 	 	 	 	 	 	 	 	 dd       Z6 e!deVej                  e[ej                  gej                        	 	 	 	 	 	 	 	 	 	 dd       Z7 e!deVe\gej                        dd       Z8 e!deV ej                  ej^                        ej                  e\gej                        	 	 	 	 	 	 	 	 	 	 dd       Z9 e!deV ej                  ej^                        ej                  e\gej                        	 	 	 	 	 	 	 	 	 	 dd       Z: e!deVej                  e\e[ej                  gej                        	 	 	 	 	 	 	 	 	 	 	 	 dd       Z; e!deVej                  e\e[ej                   ej                  ej                        gej                        	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z< e!de[ej                  geϫ      	 	 	 	 	 	 dd       Z= e!dej                  ej                  ej                  geϫ      	 	 	 	 	 	 	 	 dd       Z> e!degd      dd       Z? e!deVegej                        dd       Z@ e!deVegej                        dd       ZA e!deVej                  ej                  gd      	 	 	 	 	 	 dd       ZB e!deVgej                        dd       ZC e!deVgej                        dd       ZD e!d eVej                  gd      dڐd       ZE e!deVej                  gd      dېd       ZF e!deVeCej                  gd      	 	 	 	 	 	 dܐd       ZG e!deVgd      dd       ZH e!deVg ej                  ej                              dݐd	       ZI e!d
eVej                  g ej                  ej                              	 	 	 	 	 	 dސd       ZJ e!deVg ej                  ej                              dݐd       ZK e!deVej                  g ej                  ej                              	 	 	 	 	 	 dސd       ZL e!deVe\g ej                  ej                              	 	 	 	 	 	 dߐd       ZM e!deTeYgej                        	 	 	 	 	 	 dd       ZN e!deTeYgej                        	 	 	 	 	 	 dd       ZO e!deTeYgej                        	 	 	 	 	 	 dd       ZP e!deTeYgej                        dd       ZQ e!deTeYgej                        	 	 	 	 	 	 dd       ZR e!deTgeY      dd       ZS e!deTgeY      dd       ZT e!d eTgeY      dd!       ZU e!d"eTgeY      dd#       ZV e!d$eTgeY      dd%       ZW e!d&eTgeY      dd'       ZX e!d(eTgej                        dd)       ZY e!d*eTgej                        dd+       ZZ e!d,eTgeY      dd-       Z[ e!d.eTgeY      dd/       Z\ e!d0eTgeY      dd1       Z] e!d2eTgeY      dd3       Z^ e!d4eTgeY      dd5       Z_ e!d6eTgeY      dd7       Z` e!d8eTgeY      dd9       Za e!d:eTgeY      dd;       Zb e!d<eTgeY      dd=       Zc e!d>eTej                  ej                  e[ej                  ej                  ej                  gej                        	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd?       Zd e!d@eTeYej                  ej                  ej                  ej                  gej                        	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddA       Ze e!dBeT ej                  eY      ej                  ej                  ej                  ej                  ej                  gej                        	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddC       Zf e!dDej                  ej                   ej                  e߫      ej                  gej                        	 	 	 	 	 	 	 	 	 	 ddE       Zg e!dF ej                  ej                        ej                  gej                        	 	 	 	 	 	 ddG       Zhej                  Zi G dH dIej                        Zj G dJ dKej                        Zke
reek   Zl ej                  ek      Zm ej~                  ej                  em      Zn ej~                  demeY      Zo ej~                  deme̫      Zp ej~                  dem      Zq ej~                  emem      Zr ej~                  dem      ZsdenfdLeofdMepfdNeqfdOerfdPesfgej_t         e!dQemgej                        ddR       Zu e!dSemeYgd      ddT       Zv e!dUemegd      	 	 	 	 ddV       Zw e!dWemgd      ddX       Zx e!dYemgem      ddZ       Zy e!d[emgd      dd\       Zz e!d]egem      dd^       Z{ e!d_ememgd      dd`       Z| e!daemej                  gem      	 	 	 	 	 	 ddb       Z} e!dcemgej                        ddd       Z~ e!deemej                  gem      	 	 	 	 	 	 ddf       Z e!dgg em      ddh       Z e!diej                  gem      ddj       Z e!dkg em      ddl       Z e!dmej                  gem      ddn       Z e!doej                  ej                  gem      ddp       Z e!dqej                  ej                  gem      ddr       Z e!dsej                  ej                  gem      ddt       Z e!duej                  gem      ddv       Z e!dwej                  ej                  ej                  gem      	 	 	 	 	 	 	 	 ddx       Z e!dyej                  ej                  ej                  ej                  gem      	 	 	 	 	 	 	 	 	 	 ddz       Z e!d{ej                  ej                  ej                  ej                  ej                  gem      	 	 	 	 	 	 	 	 	 	 	 	 dd|       Z e!d}ej                  ej                  ej                  gem      	 	 	 	 	 	 	 	 dd~       Z e!deTej                  ej                  gem      	 	 	 	 	 	 	 	 dd       Z e!dej                  eYeYej                  ej                  ej                  ej                  ej                  ej                  g	em      	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z e!deTej                  ej                  ej                  ej                   ej                  ej                        ej                  gem      	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z e!dej                  ej                  egem      	 	 	 	 	 	 	 	 dd       Z e!deTgem      dd       Z e!demgej                        d d       Z e!demeVej                  geY      	 	 	 	 	 	 	 	 dd       Z e!dej                  ej                  ej                  ej                  ej                  gej                        	 	 	 	 	 	 	 	 	 	 	 	 dd       Z e!dej                  ej                  ej                  ej                  ej                  gej                        	 	 	 	 	 	 	 	 	 	 	 	 dd       Z e!dg ej                        dd       Z e!dej                  ej                  gd      	 	 	 	 dd       Z G d dej                        Z G d dej                        Z e!deVge      dd       Z e!deVgd      dd       Z e!deVgd      dd       Z e!demge      dd       Z e!demgd      dd       Z e!demgd      dd       Zy(	      )annotationsN)CallableUnionNewTypeOptionalTYPE_CHECKING)load_shared_librarybyref"ctypes_function_for_shared_library)CtypesCDataCtypesArrayCtypesPointerCtypesVoidPointer	CtypesRefCtypesPointerOrRefCtypesFuncPointerllamaLLAMA_CPP_LIB_PATHlib                  	   
                                                               l    ialgginsggiqsggllama_model_pllama_context_p          @         i      $   %   i   c                  r    e Zd ZU dZerded<   ded<   ded<   defdej                  fdej                  fgZ	y)llama_token_datazUsed to store token data

    Attributes:
        id (llama_token): token id
        logit (float): log-odds of the token
        p (float): probability of the tokenllama_tokenidfloatlogitpN
__name__
__module____qualname____doc__r   __annotations__r@   ctypesc_float_fields_     F/var/www/html/venv/lib/python3.12/site-packages/llama_cpp/llama_cpp.pyr?   r?     sD    /  
{	&..!	fnnHrO   r?   c                      e Zd ZU dZerded<   ded<   ded<   ded<   defdej                  fdej                  fdej                  fgZy	)
llama_token_data_arraya  Used to sample tokens given logits

    Attributes:
        data (ctypes.Array[llama_token_data]): token data
        size (int): size of the array
        selected (int): index in the data array (i.e. not the token id)
        sorted (bool): whether the array is sortedzCtypesArray[llama_token_data]dataintsizeselectedboolsortedN)rF   rG   rH   rI   r   rJ   llama_token_data_prK   c_size_tc_int64c_boolrM   rN   rO   rP   rR   rR     sW    6 ++	 
#$	!	V^^$	6==!	HrO   rR   c                     e Zd ZU dZer#ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   dej                  fd ej                  e	      fd ej                  ej                        fd	 ej                  e      fd ej                  ej                        fd ej                   ej                  e            fd ej                  ej                        fgZy)llama_batcha#  Input data for llama_decode

    A llama_batch object can contain input about one or many sequences

    The provided arrays (i.e. token, embd, pos, etc.) must have size of n_tokens

    Attributes:
        n_tokens (int): number of tokens
        token (ctypes.Array[llama_token]): the token ids of the input (used when embd is NULL)
        embd (ctypes.Array[ctypes.ctypes.c_float]): token embeddings (i.e. float vector of size n_embd) (used when token is NULL)
        pos (ctypes.Array[ctypes.Array[llama_pos]]): the positions of the respective token in the sequence
        seq_id (ctypes.Array[ctypes.Array[llama_seq_id]]): the sequence to which the respective token belongs
        logits (ctypes.Array[ctypes.ctypes.c_int8]): if zero, the logits for the respective token will not be output
    rT   n_tokensCtypesArray[llama_token]tokenCtypesArray[ctypes.c_float]embdz#CtypesArray[CtypesArray[llama_pos]]poszCtypesArray[ctypes.c_int]n_seq_idz&CtypesArray[CtypesArray[llama_seq_id]]seq_idzCtypesArray[ctypes.c_int8]logitsN)rF   rG   rH   rI   r   rJ   rK   c_int32POINTERr@   rL   	llama_posllama_seq_idc_int8rM   rN   rO   rP   r^   r^     s     ''))00++66** 
V^^$	.&..-.	/0	y)*	^V^^FNN34	>6>>.&..">?@	>6>>&--01HrO   r^   c                      e Zd ZU dej                  fdej
                  fdej                  fdej                  dz  fgZe	rde
d<   de
d<   de
d<   d	e
d<   y
y
)llama_model_kv_override_valueval_i64val_f64val_boolval_strr9   rT   rB   rW   bytesN)rF   rG   rH   rK   r[   c_doubler\   c_charrM   r   rJ   rN   rO   rP   rn   rn   Q  s\    	FNN#	FOO$	V]]#	FMMC'(	H 	 rO   rn   c                  v    e Zd ZU dej                  fdej
                  dz  fdefgZerde	d<   de	d<   de	d<   yy)	llama_model_kv_overridetagkeyr9   valuerT   rs   zUnion[int, float, bool, bytes]N)
rF   rG   rH   rK   c_intru   rn   rM   r   rJ   rN   rO   rP   rw   rw   `  sI    		#$	/0H 
-- rO   rw   c                     e Zd ZU dZer<ded<   ded<   ded<   ded<   ded	<   d
ed<   ded<   ded<   ded<   ded<   ded<   ded<   dej                  fdej                  fdej                  fdej                  fd ej                  ej                        fd	ej                  fdefdej                  fd ej                  e      fdej                  fdej                  fdej                  fdej                  fgZy)llama_model_paramsa  Parameters for llama_model

    Attributes:
        n_gpu_layers (int): number of layers to store in VRAM
        split_mode (int): how to split the model across multiple GPUs
        main_gpu (int): the GPU that is used for the entire model. main_gpu interpretation depends on split_mode: LLAMA_SPLIT_NONE: the GPU that is used for the entire model LLAMA_SPLIT_ROW: the GPU that is used for small tensors and intermediate results LLAMA_SPLIT_LAYER: ignored
        tensor_split (ctypes.Array[ctypes.ctypes.c_float]): proportion of the model (layers or rows) to offload to each GPU, size: llama_max_devices()
        rpc_servers (ctypes.c_char_p): comma separated list of RPC servers to use for offloading
        progress_callback (llama_progress_callback): called with a progress value between 0.0 and 1.0. Pass NULL to disable. If the provided progress_callback returns true, model loading continues. If it returns false, model loading is immediately aborted.
        progress_callback_user_data (ctypes.ctypes.c_void_p): context pointer passed to the progress callback
        kv_overrides (ctypes.Array[llama_model_kv_override]): override key-value pairs of the model meta data
        vocab_only (bool): only load the vocabulary, no weights
        use_mmap (bool): use mmap if possible
        use_mlock (bool): force system to keep model in RAM
        check_tensors (bool): validate model tensor datarT   n_gpu_layers
split_modemain_gpurb   tensor_splitzctypes.c_char_prpc_serversz(Callable[[float, ctypes.c_void_p], bool]progress_callbackctypes.c_void_pprogress_callback_user_dataz$CtypesArray[llama_model_kv_override]kv_overridesrW   
vocab_onlyuse_mmap	use_mlockcheck_tensorsdevicesN)rF   rG   rH   rI   r   rJ   rK   c_void_prh   r{   ri   rL   c_char_pllama_progress_callbackrw   r\   rM   rN   rO   rP   r}   r}     s   <  11$$CC%44:: 
FOO$	(	v||$	V^^$	78	(	56	&8	(?@A	v}}%	V]]#	fmm$	&--(HrO   r}   c                     e Zd ZU dZerded<   ded<   ded<   ded<   ded<   ded<   ded	<   ded
<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   ded<   d ed!<   ded"<   dej                  fdej                  fdej                  fdej                  fdej                  fdej                  fd	ej                  fd
ej                  fdej                  fdej                  fdej                  fdej                  fdej                  fdej                  fdej                  fdej                  fdej                  fdefdej                  fdej                  fdej                  fdej                  fdej                  fdej                  fdej                  fd!efd"ej                  fgZy#)$llama_context_paramsaJ  Parameters for llama_context

    Attributes:
        n_ctx (int): text context, 0 = from model
        n_batch (int): logical maximum batch size that can be submitted to llama_decode
        n_ubatch (int): physical maximum batch size
        n_seq_max (int): max number of sequences (i.e. distinct states for recurrent models)
        n_threads (int): number of threads to use for generation
        n_threads_batch (int): number of threads to use for batch processing
        rope_scaling_type (int): RoPE scaling type, from `enum llama_rope_scaling_type`
        pooling_type (int): whether to pool (sum) embedding results by sequence id (ignored if no pooling layer)
        attention_type (int): attention type to use for embeddings
        rope_freq_base (float): RoPE base frequency, 0 = from model
        rope_freq_scale (float): RoPE frequency scaling factor, 0 = from model
        yarn_ext_factor (float): YaRN extrapolation mix factor, negative = from model
        yarn_attn_factor (float): YaRN magnitude scaling factor
        yarn_beta_fast (float): YaRN low correction dim
        yarn_beta_slow (float): YaRN high correction dim
        yarn_orig_ctx (int): YaRN original context size
        defrag_thold (float): defragment the KV cache if holes/size > thold, < 0 disabled (default)
        cb_eval (ggml_backend_sched_eval_callback): callback for scheduling eval
        cb_eval_user_data (ctypes.ctypes.c_void_p): user data for cb_eval
        type_k (int): data type for K cache
        type_v (int): data type for V cache
        logits_all (bool): the llama_decode() call computes all logits, not just the last one (DEPRECATED - set llama_batch.logits instead)
        embeddings (bool): if true, extract embeddings (together with logits)
        offload_kqv (bool): whether to offload the KQV ops (including the KV cache) to GPU
        flash_attn (bool): whether to use flash attention
        abort_callback (ggml_abort_callback): abort callback if it returns true, execution of llama_decode() will be aborted
        abort_callback_data (ctypes.ctypes.c_void_p): data for abort_callback
    rT   n_ctxn_batchn_ubatch	n_seq_max	n_threadsn_threads_batchrope_scaling_typepooling_typeattention_typerB   rope_freq_baserope_freq_scaleyarn_ext_factoryarn_attn_factoryarn_beta_fastyarn_beta_slowyarn_orig_ctxdefrag_tholdz'Callable[[ctypes.c_void_p, bool], bool]cb_evalr   cb_eval_user_datatype_ktype_vrW   
logits_all
embeddingsoffload_kqv
flash_attnz!Callable[[ctypes.c_void_p], bool]abort_callbackabort_callback_dataN)rF   rG   rH   rI   r   rJ   rK   c_uint32rh   r{   rL    ggml_backend_sched_eval_callbackr   r\   ggml_abort_callbackrM   rN   rO   rP   r   r     s   @ 
88**99,, 
&//"	FOO$	V__%	foo&	fnn%	FNN+	fll+	&	6<<(	6>>*	FNN+	FNN+	V^^,	6>>*	6>>*	&//*	(	45	foo.	6<< 	6<< 	v}}%	v}}%	&	v}}%	./	07HrO   r   c                     e Zd ZU dZer7ded<   ded<   ded<   ded<   ded<   ded	<   ded
<   ded<   ded<   ded<   ded<   dej                  fdej                  fdej                  fdej                  fdej                  fd	ej                  fd
ej                  fdej                  fdej                  fdej                  fdej                  fgZy)llama_model_quantize_paramsaY  Parameters for llama_model_quantize

    Attributes:
        nthread (int): number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
        ftype (int): quantize to this llama_ftype
        output_tensor_type (int): output tensor type
        token_embedding_type (int): token embeddings tensor type
        allow_requantize (bool): allow quantizing non-f32/f16 tensors
        quantize_output_tensor (bool): quantize output.weight
        only_copy (bool): only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
        pure (bool): quantize all tensors to the default type
        keep_split (bool): quantize to the same number of shards
        imatrix (ctypes.c_void_p): pointer to importance matrix data
        kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
    rT   nthreadftypeoutput_tensor_typetoken_embedding_typerW   allow_requantizequantize_output_tensor	only_copypure
keep_splitr   imatrixr   N)rF   rG   rH   rI   r   rJ   rK   rh   r{   r\   r   rM   rN   rO   rP   r   r   i  s      
!! $$
  %% 
FNN#	&,,	v||,	.	V]]+	!6==1	fmm$		v}}%	FOO$	)HrO   r   c                  N    e Zd ZU dZer
ded<   ded<   defdej                  fgZ	y)llama_logit_biaszjUsed to store logit bias

    Attributes:
        token (llama_token): token id
        bias (float): biasr@   ra   rB   biasNrE   rN   rO   rP   r   r     s4      
+	 HrO   r   c                  >    e Zd ZU dZerded<   dej                  fgZy)llama_sampler_chain_paramszrParameters for llama_sampler_chain

    Attributes:
        no_perf (bool): whether to measure performance timingsrW   no_perfN)	rF   rG   rH   rI   r   rJ   rK   r\   rM   rN   rO   rP   r   r     s'    B
  
FMM"HrO   r   c                  D    e Zd Zdej                  fdej                  fgZy)llama_chat_messagerolecontentN)rF   rG   rH   rK   r   rM   rN   rO   rP   r   r     s!    	!	FOO$HrO   r   llama_model_default_paramsc                      y)z&Get default parameters for llama_modelNrN   rN   rO   rP   r   r          rO   llama_context_default_paramsc                      y)z(Get default parameters for llama_contextNrN   rN   rO   rP   r   r     r   rO   "llama_sampler_chain_default_paramsc                      y)z.Get default parameters for llama_sampler_chainNrN   rN   rO   rP   r   r     r   rO   #llama_model_quantize_default_paramsc                      y)z/Get default parameters for llama_model_quantizeNrN   rN   rO   rP   r   r     r   rO   llama_backend_initc                      y)zyInitialize the llama + ggml backend
    If numa is true, use NUMA optimizations
    Call once at the start of the programNrN   rN   rO   rP   r   r          rO   llama_numa_initc                    y NrN   )numas    rP   r   r   "       rO   llama_backend_freec                      y)zACall once at the end of the program - currently only used for MPINrN   rN   rO   rP   r   r   7  r   rO   llama_load_model_from_filec                    y r   rN   )
path_modelparamss     rP   r   r   D       rO   llama_free_modelc                    y r   rN   models    rP   r   r   P  r   rO   llama_new_context_with_modelc                    y r   rN   )r   r   s     rP   r   r   \  r   rO   
llama_freec                    y)zFrees all allocated memoryNrN   ctxs    rP   r   r   i  r   rO   llama_time_usc                      y r   rN   rN   rO   rP   r   r   t  r   rO   llama_max_devicesc                      y r   rN   rN   rO   rP   r   r   ~      rO   llama_supports_mmapc                      y r   rN   rN   rO   rP   r   r     r   rO   llama_supports_mlockc                      y r   rN   rN   rO   rP   r   r     r   rO   llama_supports_gpu_offloadc                      y r   rN   rN   rO   rP   r   r     r   rO   llama_supports_rpcc                      y r   rN   rN   rO   rP   r   r     r   rO   llama_n_ctxc                    y r   rN   r   s    rP   r   r     r   rO   llama_n_batchc                    y r   rN   r   s    rP   r   r     r   rO   llama_n_ubatchc                    y r   rN   r   s    rP   r   r     r   rO   llama_n_seq_maxc                    y r   rN   r   s    rP   r   r     r   rO   llama_n_vocabc                    y r   rN   r   s    rP   r   r     r   rO   llama_n_ctx_trainc                    y r   rN   r   s    rP   r   r     r   rO   llama_n_embdc                    y r   rN   r   s    rP   r   r     r   rO   llama_n_layerc                    y r   rN   r   s    rP   r  r    r   rO   llama_n_headc                    y r   rN   r   s    rP   r  r    r   rO   llama_get_modelc                    y r   rN   r   s    rP   r  r    r   rO   llama_pooling_typec                    y r   rN   r   s    rP   r  r    r   rO   llama_vocab_typec                    y r   rN   r   s    rP   r
  r
    r   rO   llama_rope_typec                    y r   rN   r   s    rP   r  r    r   rO   llama_rope_freq_scale_trainc                    y)z-Get the model's RoPE frequency scaling factorNrN   r   s    rP   r  r         rO   llama_model_meta_val_strc                    y)z*Get metadata value as a string by key nameNrN   )r   ry   bufbuf_sizes       rP   r  r        $ rO   llama_model_meta_countc                    y)z*Get the number of metadata key/value pairsNrN   r   s    rP   r  r    r  rO   llama_model_meta_key_by_indexc                    y)zGet metadata key name by indexNrN   r   ir  r  s       rP   r  r    r  rO   !llama_model_meta_val_str_by_indexc                    y)z'Get metadata value as a string by indexNrN   r  s       rP   r  r  /  r  rO   llama_model_descc                    y)z&Get a string describing the model typeNrN   )r   r  r  s      rP   r  r  F       rO   llama_model_sizec                    y)z?Returns the total size of all the tensors in the model in bytesNrN   r   s    rP   r!  r!  W  r  rO   llama_model_n_paramsc                    y)z3Returns the total number of parameters in the modelNrN   r   s    rP   r#  r#  _  r  rO   llama_get_model_tensornamec                    y)zGet a llama model tensorNrN   )r   r&  s     rP   r%  r%  g  r   rO   llama_model_has_encoderc                    y)zOReturns true if the model contains an encoder that requires llama_encode() callNrN   r   s    rP   r(  r(  s  r  rO   llama_model_has_decoderc                    y)zNReturns true if the model contains a decoder that requires llama_decode() callNrN   r   s    rP   r*  r*  {  r  rO   llama_model_decoder_start_tokenc                    y)zFor encoder-decoder models, this function returns id of the token that must be provided
    to the decoder to start generating output sequence. For other models, it returns -1.
    NrN   r   s    rP   r,  r,    r   rO   llama_model_is_recurrentc                    y)z?Returns true if the model is recurrent (like Mamba, RWKV, etc.)NrN   r   s    rP   r.  r.    r  rO   llama_model_quantizec                    y)zReturns 0 on successNrN   )	fname_inp	fname_outr   s      rP   r0  r0          rO   llama_lora_adapter_initc                    y)zLoad a LoRA adapter from file
    The loaded adapter will be associated to the given model, and will be free when the model is deleted
    NrN   )r   	path_loras     rP   r5  r5         rO   llama_lora_adapter_setc                    y)zRAdd a loaded LoRA adapter to given context
    This will not modify model's weightNrN   )r   adapterscales      rP   r9  r9         rO   llama_lora_adapter_removec                    y)zcRemove a LoRA adapter from given context
    Return -1 if the adapter is not present in the contextNrN   )r   r;  s     rP   r>  r>    r=  rO   llama_lora_adapter_clearc                    y)z+Remove all LoRA adapters from given contextNrN   r   s    rP   r@  r@    r   rO   llama_lora_adapter_freec                    y)zhManually free a LoRA adapter
    Note: loaded adapters will be free when the associated model is deletedNrN   )r;  s    rP   rB  rB    r   rO   llama_control_vector_applyc                    y)a  Apply a loaded control vector to a llama_context, or if data is NULL, clear
    the currently loaded vector.
    n_embd should be the size of a single layer's control, and data should point
    to an n_embd x n_layers buffer starting from layer 1.
    il_start and il_end are the layer range the vector should apply to (both inclusive)
    See llama_control_vector_load in common to load a control vector.NrN   )lctxrS   lenn_embdil_startil_ends         rP   rD  rD        6 rO   c                  *    e Zd ZU dZerded<   defgZy)llama_kv_cache_view_cellzInformation associated with an individual cell in the KV cache view.

    Attributes:
        pos (llama_pos): The position for this cell. Takes KV cache shifts into account.
            May be negative if the cell is not populated.rj   rd   N)rF   rG   rH   rI   r   rJ   rj   rM   rN   rO   rP   rM  rM  7  s    = 	"#HrO   rM  c                  N   e Zd ZU er(ded<   ded<   ded<   ded<   ded<   ded<   ded	<   d
ed<   dej                  fdej                  fdej                  fdej                  fdej                  fdej                  fd	 ej                  e      fd ej                  e	      fgZ
y)llama_kv_cache_viewrT   n_cells	n_max_seqtoken_count
used_cellsmax_contiguousmax_contiguous_idxz%CtypesArray[llama_kv_cache_view_cell]cellszCtypesArray[llama_seq_id]cells_sequencesN)rF   rG   rH   r   rJ   rK   rh   ri   rM  rk   rM   rN   rO   rP   rO  rO  d  s    4422 
FNN#	fnn%	'	v~~&	6>>*	v~~.	.&..!9:;	NFNN<89	HrO   rO  llama_kv_cache_view_initc                    y)z@Create an empty KV cache view. (use only for debugging purposes)NrN   )r   r   s     rP   rX  rX    r   rO   llama_kv_cache_view_freec                    y)z7Free a KV cache view. (use only for debugging purposes)NrN   )views    rP   rZ  rZ    r  rO   llama_kv_cache_view_updatec                    y)zlUpdate the KV cache view structure with the current state of the KV cache. (use only for debugging purposes)NrN   )r   r\  s     rP   r]  r]        
 rO   llama_get_kv_cache_token_countc                    y)zReturns the number of tokens in the KV cache (slow, use only for debug)
    If a KV cell has multiple sequences assigned to it, it will be counted multiple times
    NrN   r   s    rP   r`  r`    r   rO   llama_get_kv_cache_used_cellsc                    y)zVReturns the number of used KV cells (i.e. have at least one sequence assigned to them)NrN   r   s    rP   rb  rb    r_  rO   llama_kv_cache_clearc                    y)zClear the KV cacheNrN   r   s    rP   rd  rd    r  rO   llama_kv_cache_seq_rmc                    y)a  Removes all tokens that belong to the specified sequence and have positions in [p0, p1)

    Returns false if a partial sequence cannot be removed. Removing a whole sequence never fails

    seq_id < 0 : match any sequence
    p0 < 0     : [0,  p1]
    p1 < 0     : [p0, inf)NrN   )r   rf   p0p1s       rP   rf  rf    s    0 rO   llama_kv_cache_seq_cpc                    y)zCopy all tokens that belong to the specified sequence to another sequence
    Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence
    p0 < 0 : [0,  p1]
    p1 < 0 : [p0, inf)NrN   )r   
seq_id_src
seq_id_dstrh  ri  s        rP   rj  rj        . rO   llama_kv_cache_seq_keepc                    y)z?Removes all tokens that do not belong to the specified sequenceNrN   r   rf   s     rP   ro  ro    r_  rO   llama_kv_cache_seq_addc                    y)a2  Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
    If the KV cache is RoPEd, the KV data is updated accordingly:
    - lazily on next llama_decode()
    - explicitly with llama_kv_cache_update()
    p0 < 0 : [0,  p1]
    p1 < 0 : [p0, inf)NrN   )r   rf   rh  ri  deltas        rP   rr  rr    s    2 rO   llama_kv_cache_seq_divc                    y)zInteger division of the positions by factor of `d > 1`
    If the KV cache is RoPEd, the KV data is updated accordingly
    p0 < 0 : [0,  p1]
    p1 < 0 : [p0, inf)NrN   )r   rf   rh  ri  ds        rP   ru  ru  E  rn  rO   llama_kv_cache_defragc                    y)zDefragment the KV cache
    This will be applied:
    - lazily on next llama_decode()
    - explicitly with llama_kv_cache_update()NrN   r   s    rP   rx  rx  d  r   rO   llama_kv_cache_updatec                    y)zDApply the KV cache updates (such as K-shifts, defragmentation, etc.)NrN   r   s    rP   rz  rz  o  r  rO   llama_kv_cache_can_shiftc                    y)z/Check if the context supports KV cache shiftingNrN   r   s    rP   r|  r|  w  r  rO   llama_state_get_sizec                    y)zReturns the *actual* size in bytes of the state (rng, logits, embedding and kv_cache) - will often be smaller after compacting tokensNrN   r   s    rP   r~  r~    r  rO   llama_get_state_sizec                    y)zReturns the maximum size in bytes of the state (rng, logits, embedding
    and kv_cache) - will often be smaller after compacting tokensNrN   r   s    rP   r  r         rO   llama_state_get_datac                    yzCopies the state to the specified destination address.
    Destination needs to have allocated enough memory.
    Returns the number of bytes copiedNrN   )r   dstrU   s      rP   r  r    r  rO   llama_copy_state_datac                    yr  rN   )r   r  s     rP   r  r         rO   llama_state_set_datac                    y)zUSet the state reading from the specified address
    Returns the number of bytes readNrN   )r   srcrU   s      rP   r  r         rO   llama_set_state_datac                    y)z0Set the state reading from the specified addressNrN   )r   r  s     rP   r  r    r   rO   llama_state_load_filec                    y r   rN   r   path_session
tokens_outn_token_capacityn_token_count_outs        rP   r  r        & rO   llama_load_session_filec                    y r   rN   r  s        rP   r  r    r  rO   llama_state_save_filec                    y r   rN   r   r  tokensn_token_counts       rP   r  r  +      " rO   llama_save_session_filec                    y r   rN   r  s       rP   r  r  E  r  rO   llama_state_seq_get_sizec                    y)zCGet the exact size needed to copy the KV cache of a single sequenceNrN   rq  s     rP   r  r  ]  r   rO   llama_state_seq_get_datac                    y)z@Copy the KV cache of a single sequence into the specified bufferNrN   )r   r  rU   rf   s       rP   r  r  m  r  rO   llama_state_seq_set_datac                    y)zfCopy the sequence data (originally copied with `llama_state_seq_get_data`) into the specified sequenceNrN   )r   r  rU   dest_seq_ids       rP   r  r    r  rO   llama_state_seq_save_filec                    y r   rN   )r   filepathrf   r  r  s        rP   r  r    r  rO   llama_state_seq_load_filec                    y r   rN   )r   r  r  r  r  r  s         rP   r  r    s    * rO   llama_batch_get_onec                    y)zReturn batch for single sequence of tokens starting at pos_0

    NOTE: this is a helper function to facilitate transition to the new batch API - avoid using it
    NrN   )r  r_   s     rP   r  r    r  rO   llama_batch_initc                    y)a  Allocates a batch of tokens on the heap that can hold a maximum of n_tokens
    Each token can be assigned up to n_seq_max sequence ids
    The batch has to be freed with llama_batch_free()
    If embd != 0, llama_batch.embd will be allocated with size of n_tokens * embd * sizeof(float)
    Otherwise, llama_batch.token will be allocated to store n_tokens llama_token
    The rest of the llama_batch members are allocated with size n_tokens
    All members are left uninitializedNrN   )r_   rc   r   s      rP   r  r  	  r4  rO   llama_batch_freec                    y)z9Frees a batch of tokens allocated with llama_batch_init()NrN   )batchs    rP   r  r  	  r  rO   llama_encodec                    y)zProcesses a batch of tokens with the ecoder part of the encoder-decoder model.
    Stores the encoder output internally for later use by the decoder cross-attention layers.
    0 - success
    < 0 - errorNrN   r   r  s     rP   r  r  *	  r   rO   llama_decodec                    y)zPositive return values does not mean a fatal error, but rather a warning.
    0 - success
    1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
    < 0 - errorNrN   r  s     rP   r  r  :	  r   rO   llama_set_n_threadsc                    y)zSet the number of threads used for decoding
    n_threads is the number of threads used for generation (single token)
    n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)
    NrN   )r   r   r   s      rP   r  r  G	  r  rO   llama_n_threadsc                    y)z?Get the number of threads used for generation of a single tokenNrN   r   s    rP   r  r  _	  r  rO   llama_n_threads_batchc                    y)zOGet the number of threads used for prompt and batch processing (multiple token)NrN   r   s    rP   r  r  g	  r  rO   llama_set_embeddingsc                    y)zpSet whether the model is in embeddings model or not
    If true, embeddings will be returned but logits will notNrN   )r   r   s     rP   r  r  p	  r  rO   llama_set_causal_attnc                    y)zlSet whether to use causal attention or not
    If set to true, the model will only attend to the past tokensNrN   )r   causal_attns     rP   r  r  z	  r  rO   llama_set_abort_callbackc                    y)zSet abort callbackNrN   )r   r   r   s      rP   r  r  	  r   rO   llama_synchronizec                    y)zWait until all computations are finished
    This is automatically done when using one of the functions below to obtain the computation results
    and is not necessary to call it explicitly in most casesNrN   r   s    rP   r  r  	  r_  rO   llama_get_logitsc                    y)aZ  Token logits obtained from the last call to llama_decode()
    The logits for which llama_batch.logits[i] != 0 are stored contiguously
    in the order they have appeared in the batch.
    Rows: number of tokens for which llama_batch.logits[i] != 0
    Cols: n_vocab

    Returns:
        Pointer to the logits buffer of shape (n_tokens, n_vocab)NrN   r   s    rP   r  r  	  r   rO   llama_get_logits_ithc                    y)zNLogits for the ith token. Equivalent to:
    llama_get_logits(ctx) + i*n_vocabNrN   r   r  s     rP   r  r  	  r=  rO   llama_get_embeddingsc                    y)zDGet the embeddings for the input
    shape: [n_embd] (1-dimensional)NrN   r   s    rP   r  r  	  r   rO   llama_get_embeddings_ithc                    y)zPGet the embeddings for the ith sequence
    llama_get_embeddings(ctx) + i*n_embdNrN   r  s     rP   r  r  	  r=  rO   llama_get_embeddings_seqc                    y)zGet the embeddings for a sequence id
    Returns NULL if pooling_type is LLAMA_POOLING_TYPE_NONE
    shape: [n_embd] (1-dimensional)NrN   rq  s     rP   r  r  	  r8  rO   llama_token_get_textc                    y r   rN   r   ra   s     rP   r  r  
  r   rO   llama_token_get_scorec                    y r   rN   r  s     rP   r  r  
  r   rO   llama_token_get_attrc                    y r   rN   r  s     rP   r  r  
  r   rO   llama_token_is_eogc                    y)zXCheck if the token is supposed to end generation (end-of-generation, eg. EOS, EOT, etc.)NrN   r  s     rP   r  r   
  r_  rO   llama_token_is_controlc                    y)z>Identify if Token Id is a control token or a render-able tokenNrN   r  s     rP   r  r  *
  r   rO   llama_token_bosc                    y)zbeginning-of-sentenceNrN   r   s    rP   r  r  8
  r  rO   llama_token_eosc                    y)zend-of-sentenceNrN   r   s    rP   r  r  ?
  r  rO   llama_token_eotc                    y)zend-of-turnNrN   r   s    rP   r  r  F
  r  rO   llama_token_clsc                    y)classificationNrN   r   s    rP   r  r  M
  r  rO   llama_token_sepc                    y)zsentence separatorNrN   r   s    rP   r  r  T
  r  rO   llama_token_nlc                    y)z	next-lineNrN   r   s    rP   r  r  [
  r  rO   llama_add_bos_tokenc                    y r   rN   r   s    rP   r  r  b
  r   rO   llama_add_eos_tokenc                    y r   rN   r   s    rP   r  r  h
  r   rO   llama_token_prefixc                     y)zcodellama infill tokensNrN   r   s    rP   r  r  o
  r  rO   llama_token_middlec                    y r   rN   r   s    rP   r  r  v
  r   rO   llama_token_suffixc                    y r   rN   r   s    rP   r  r  |
  r   rO   llama_token_fim_prec                    y r   rN   r   s    rP   r  r  
  r   rO   llama_token_fim_sufc                    y r   rN   r   s    rP   r  r  
  r   rO   llama_token_fim_midc                    y r   rN   r   s    rP   r  r  
  r   rO   llama_token_fim_padc                    y r   rN   r   s    rP   r  r  
  r   rO   llama_token_fim_repc                    y r   rN   r   s    rP   r  r  
  r   rO   llama_token_fim_sepc                    y r   rN   r   s    rP   r   r   
  r   rO   llama_tokenizec                    y)a  Convert the provided text into tokens.

    Args:
        model: The model to use for tokenization.
        text: The text to tokenize.
        text_len: The length of the text.
        tokens: The tokens pointer must be large enough to hold the resulting tokens.
        n_max_tokens: The maximum number of tokens to return.
        add_special: Allow adding special tokenns if the model is configured to do so.
        parse_special: Allow parsing special tokens.

    Returns:
        Returns the number of tokens on success, no more than n_tokens_max
        Returns a negative number on failure - the number of tokens that would have been returned
    NrN   )r   texttext_lenr  n_tokens_maxadd_specialparse_specials          rP   r  r  
  s    L rO   llama_token_to_piecec                    y)a#  Token Id -> Piece.
    Uses the vocabulary in the provided context.
    Does not write null terminator to the buffer.
    User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens.

    Args:
        model: The model to use for tokenization.
        token: The token to convert.
        buf: The buffer to write the token to.
        length: The length of the buffer.
        lstrip: The number of leading spaces to skip.
        special: If true, special tokens are rendered in the output.NrN   )r   ra   r  lengthlstripspecials         rP   r	  r	  
      B rO   llama_detokenizec                    y)a  Convert the provided tokens into text (inverse of llama_tokenize()).

    Args:
        model: The model to use for tokenization.
        tokens: The tokens to convert.
        n_tokens: The number of tokens.
        text: The buffer to write the text to.
        text_len_max: The length of the buffer.
        remove_special: Allow to remove BOS and EOS tokens if model is configured to do so.
        unparse_special: If true, special tokens are rendered in the output.NrN   )r   r  r_   r  text_len_maxremove_specialunparse_specials          rP   r  r  -  r  rO   llama_chat_apply_templatec                    y r   rN   )r   tmplchatn_msgs       rP   r  r  h  r  rO   llama_chat_builtin_templatesc                    y)zGet list of built-in chat templates.

    Args:
        output: Output buffer to store template names.
        len: Length of the output buffer.

    Returns:
        Number of templates available.
        Returns a negative number on error.
    NrN   )outputrG  s     rP   r  r  ~  rn  rO   c                      e Zd Zy)llama_sampler_iN)rF   rG   rH   rN   rO   rP   r  r    s    rO   r  c                  <    e Zd Zd ej                  e      fdefgZy)llama_samplerifacer   N)rF   rG   rH   rK   ri   r  llama_sampler_context_trM   rN   rO   rP   r  r    s%    	.&..12	'(HrO   r  acceptapplyresetclonefreellama_sampler_namec                    y r   rN   smpls    rP   r'  r'    r   rO   llama_sampler_acceptc                    y r   rN   )r*  ra   s     rP   r+  r+    r   rO   llama_sampler_applyc                    y r   rN   )r*  cur_ps     rP   r-  r-    r   rO   llama_sampler_resetc                    y r   rN   r)  s    rP   r0  r0    r   rO   llama_sampler_clonec                    y r   rN   r)  s    rP   r2  r2  #  r   rO   llama_sampler_freec                    y r   rN   r)  s    rP   r4  r4  .  r   rO   llama_sampler_chain_initc                    y r   rN   )r   s    rP   r6  r6  ;  r   rO   llama_sampler_chain_addc                    y r   rN   )chainr*  s     rP   r8  r8  F  r   rO   llama_sampler_chain_getc                    y r   rN   r:  r  s     rP   r;  r;  P  r   rO   llama_sampler_chain_nc                    y r   rN   r:  s    rP   r>  r>  \  r   rO   llama_sampler_chain_removec                    y r   rN   r=  s     rP   rA  rA  g  r   rO   llama_sampler_init_greedyc                      y r   rN   rN   rO   rP   rC  rC  u  r   rO   llama_sampler_init_distc                     y r   rN   )seeds    rP   rE  rE  {  r   rO   llama_sampler_init_softmaxc                      y r   rN   rN   rO   rP   rH  rH    r   rO   llama_sampler_init_top_kc                     y r   rN   )ks    rP   rJ  rJ    r   rO   llama_sampler_init_top_pc                     y r   rN   rD   min_keeps     rP   rM  rM    r   rO   llama_sampler_init_min_pc                     y r   rN   rO  s     rP   rQ  rQ    r   rO   llama_sampler_init_typicalc                     y r   rN   rO  s     rP   rS  rS    r   rO   llama_sampler_init_tempc                     y r   rN   )ts    rP   rU  rU    r   rO   llama_sampler_init_temp_extc                     y r   rN   )rW  rt  exponents      rP   rX  rX    r   rO   llama_sampler_init_xtcc                    y r   rN   )rD   rW  rP  rG  s       rP   r[  r[    r   rO   llama_sampler_init_mirostatc                    y r   rN   )n_vocabrG  tauetams        rP   r]  r]    r   rO   llama_sampler_init_mirostat_v2c                    y r   rN   )rG  r`  ra  s      rP   rc  rc    r   rO   llama_sampler_init_grammarc                    y r   rN   )r   grammar_strgrammar_roots      rP   re  re     r   rO   llama_sampler_init_penaltiesc	   	                 y r   rN   )	r_  special_eos_idlinefeed_idpenalty_last_npenalty_repeatpenalty_freqpenalty_presentpenalize_nl
ignore_eoss	            rP   ri  ri    rK  rO   llama_sampler_init_dryc                    y r   rN   )r   dry_multiplierdry_basedry_allowed_lengthdry_penalty_last_nseq_breakersnum_breakerss          rP   rs  rs  <  rn  rO   llama_sampler_init_logit_biasc                    y r   rN   )r_  n_logit_bias
logit_biass      rP   r{  r{  Z  r   rO   llama_sampler_init_infillc                    y)zGThis sampler is meant to be used for fill-in-the-middle infilling.
    NrN   r   s    rP   r  r  {  r   rO   llama_sampler_get_seedc                    y r   rN   r)  s    rP   r  r    r   rO   llama_sampler_samplec                    y r   rN   )r*  r   idxs      rP   r  r    r   rO   llama_split_pathc                    y)z-Build a split GGUF final path for this chunk.NrN   )
split_pathmaxlenpath_prefixsplit_nosplit_counts        rP   r  r    r  rO   llama_split_prefixc                    y)z^Extract the path prefix from the split_path if and only if the split_no and split_count match.NrN   )split_prefixr  r  r  r  s        rP   r  r    r  rO   llama_print_system_infoc                      y r   rN   rN   rO   rP   r  r    r   rO   llama_log_setc                    y)z|Set callback for all future logging events.

    If this is not called, or NULL is supplied, everything is output on stderr.NrN   )log_callback	user_datas     rP   r  r    r  rO   c                      e Zd Zdej                  fdej                  fdej                  fdej                  fdej
                  fdej
                  fgZy)llama_perf_context_data
t_start_ms	t_load_mst_p_eval_ms	t_eval_msn_p_evaln_evalNrF   rG   rH   rK   rt   rh   rM   rN   rO   rP   r  r     sQ    	v'	foo&	(	foo&	V^^$	6>>"HrO   r  c                  D    e Zd Zdej                  fdej
                  fgZy)llama_perf_sampler_datat_sample_msn_sampleNr  rN   rO   rP   r  r    s!    	(	V^^$HrO   r  llama_perf_contextc                    y r   rN   r   s    rP   r  r    r   rO   llama_perf_context_printc                    y r   rN   r   s    rP   r  r  "  r   rO   llama_perf_context_resetc                    y r   rN   r   s    rP   r  r  ,  r   rO   llama_perf_samplerc                    y r   rN   r@  s    rP   r  r  7  r   rO   llama_perf_sampler_printc                    y r   rN   r@  s    rP   r  r  A  r   rO   llama_perf_sampler_resetc                    y r   rN   r@  s    rP   r  r  K  r   rO   )returnr}   )r  r   )r  r   )r  r   )r   rT   )r   rs   r   r}   r  Optional[llama_model_p])r   r3   )r   r3   r   r   r  zOptional[llama_context_p])r   r4   )r  rT   )r  rW   )r   r4   r  rT   )r   r3   r  rT   )r   r4   r  r  )r   r3   r  rB   )
r   r3   ry   Union[ctypes.c_char_p, bytes]r  rs   r  rT   r  rT   )
r   r3   r  Union[ctypes.c_int, int]r  (Union[bytes, CtypesArray[ctypes.c_char]]r  rT   r  rT   )r   r3   r  r  r  Union[ctypes.c_size_t, int]r  rT   )r   r3   r&  r  r  r   )r   r3   r  rW   )r2  rs   r3  rs   r   z/CtypesPointerOrRef[llama_model_quantize_params]r  rT   )r   r3   r7  rs   r  zOptional[llama_lora_adapter_p])r   r4   r;  llama_lora_adapter_pr<  rB   r  rT   )r   r4   r;  r  r  rT   )r;  r  )rF  r4   rS   z"CtypesPointerOrRef[ctypes.c_float]rG  rT   rH  rT   rI  rT   rJ  rT   r  rT   )r   r4   r   Union[ctypes.c_int32, int]r  rO  )r\  z%'ctypes.pointer[llama_kv_cache_view]')r   r4   r\  z'CtypesPointerOrRef[llama_kv_cache_view])
r   r4   rf   Union[llama_seq_id, int]rh  Union[llama_pos, int]ri  r  r  rW   )
r   r4   rl  r  rm  r  rh  r  ri  r  )r   r4   rf   r  )
r   r4   rf   r  rh  r  ri  r  rt  r  )
r   r4   rf   r  rh  r  ri  r  rw  r  )r   r4   r  rW   )r   r4   r  CtypesArray[ctypes.c_uint8]rU   r  r  rT   )r   r4   r  r  r  rT   )r   r4   r  r  rU   r  r  rT   )r   r4   r  r  r  rT   )r   r4   r  rs   r  r`   r  r  r  #CtypesPointerOrRef[ctypes.c_size_t]r  rW   )r   r4   r  rs   r  r`   r  r  r  r  r  rT   )
r   r4   r  rs   r  r`   r  r  r  rW   )
r   r4   r  rs   r  r`   r  r  r  rT   )r   r4   rf   rk   r  rT   )
r   r4   r  r  rU   r  rf   rk   r  rT   )
r   r4   r  r  rU   r  r  rk   r  rT   )r   r4   r  rs   rf   rk   r  r`   r  r  r  rT   )r   r4   r  rs   r  rk   r  r`   r  r  r  r  r  rT   )r  r`   r_   r  r  r^   )r_   r  rc   r  r   r  r  r^   )r  r^   )r   r4   r  r^   r  rT   )r   r4   r   r  r   r  )r   r4   r   rW   )r   r4   r  rW   )r   r4   r   z!Callable[[ctypes.c_void_p], None]r   r   )r   r4   r  rb   )r   r4   r  r  r  rb   )r   r4   rf   r  r  rb   )r   r3   ra   Union[llama_token, int]r  rs   )r   r3   ra   r  r  rB   )r   r3   ra   r  r  rT   )r   r3   ra   r  r  rW   )r   r3   r  rs   r  r  r  r`   r  r  r  Union[ctypes.c_bool, bool]r  r  r  rT   )r   r3   ra   r  r  z9Union[ctypes.c_char_p, bytes, CtypesArray[ctypes.c_char]]r  r  r  r  r  r  r  rT   )r   r3   r  r`   r_   r  r  rs   r  r  r  r  r  r  r  rT   )
r   r3   r  rs   r  zCtypesArray[llama_chat_message]r  rT   r  rT   )r  CtypesArray[bytes]rG  r  r  rT   )r*  llama_sampler_pr  rs   )r*  r  ra   r  )r*  r  r/  z#CtypesArray[llama_token_data_array])r*  r  )r*  r  r  r  )r   r   r  r  )r:  r  r*  r  )r:  r  r  r  r  r  )r:  r  r  rT   )r  r  )rG  rT   r  r  )rL  rT   r  r  )rD   rB   rP  rT   r  r  )rW  rB   r  r  )rW  rB   rt  rB   rZ  rB   r  r  )
rD   rB   rW  rB   rP  rT   rG  rT   r  r  )r_  rT   rG  rT   r`  rB   ra  rB   rb  rT   r  r  )rG  rT   r`  rB   ra  rB   r  r  )r   r3   rg  rs   rh  rs   r  r  )r_  rT   rk  rT   rl  rT   rm  rT   rn  rB   ro  rB   rp  rB   rq  rW   rr  rW   r  r  )r   r3   ru  rB   rv  rB   rw  rT   rx  rT   ry  r  rz  rT   r  r  )r_  rT   r}  rT   r~  zCtypesArray[llama_logit_bias]r  r  )r   r3   r  r  )r*  r  r  rT   )r*  r  r   r4   r  rT   r  rT   )r  rs   r  r  r  rs   r  r  r  r  r  rT   )r  rs   r  r  r  rs   r  r  r  r  r  rT   )r  rs   )r  zOptional[CtypesFuncPointer]r  r   )r   r4   r  r  )r:  r  r  r  )r:  r  (  
__future__r   osrK   pathlibtypingr   r   r   r   r   llama_cpp._ctypes_extensionsr	   r
   r   r   r   r   r   r   r   r   _lib_base_nameenvironget_override_base_pathPathpathabspathdirname__file__
_base_path_libctypes_functionGGML_TYPE_F32GGML_TYPE_F16GGML_TYPE_Q4_0GGML_TYPE_Q4_1GGML_TYPE_Q5_0GGML_TYPE_Q5_1GGML_TYPE_Q8_0GGML_TYPE_Q8_1GGML_TYPE_Q2_KGGML_TYPE_Q3_KGGML_TYPE_Q4_KGGML_TYPE_Q5_KGGML_TYPE_Q6_KGGML_TYPE_Q8_KGGML_TYPE_IQ2_XXSGGML_TYPE_IQ2_XSGGML_TYPE_IQ3_XXSGGML_TYPE_IQ1_SGGML_TYPE_IQ4_NLGGML_TYPE_IQ3_SGGML_TYPE_IQ2_SGGML_TYPE_IQ4_XSGGML_TYPE_I8GGML_TYPE_I16GGML_TYPE_I32GGML_TYPE_I64GGML_TYPE_F64GGML_TYPE_IQ1_MGGML_TYPE_COUNT	CFUNCTYPEr\   r   r   r   r   argtypesrZ   restypeLLAMA_MAX_DEVICESLLAMA_DEFAULT_SEEDLLAMA_TOKEN_NULLLLAMA_FILE_MAGIC_GGLALLAMA_FILE_MAGIC_GGSNLLAMA_FILE_MAGIC_GGSQLLAMA_SESSION_MAGICLLAMA_SESSION_VERSIONLLAMA_STATE_SEQ_MAGICLLAMA_STATE_SEQ_VERSIONrT   r3   llama_model_p_ctypesr4   llama_context_p_ctypesrh   rj   r@   ri   llama_token_prk   LLAMA_VOCAB_TYPE_NONELLAMA_VOCAB_TYPE_SPMLLAMA_VOCAB_TYPE_BPELLAMA_VOCAB_TYPE_WPMLLAMA_VOCAB_TYPE_UGMLLAMA_VOCAB_TYPE_RWKVLLAMA_VOCAB_PRE_TYPE_DEFAULTLLAMA_VOCAB_PRE_TYPE_LLAMA3!LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM#LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODERLLAMA_VOCAB_PRE_TYPE_FALCONLLAMA_VOCAB_PRE_TYPE_MPTLLAMA_VOCAB_PRE_TYPE_STARCODERLLAMA_VOCAB_PRE_TYPE_GPT2LLAMA_VOCAB_PRE_TYPE_REFACTLLAMA_VOCAB_PRE_TYPE_COMMAND_RLLAMA_VOCAB_PRE_TYPE_STABLELM2LLAMA_VOCAB_PRE_TYPE_QWEN2LLAMA_VOCAB_PRE_TYPE_OLMOLLAMA_VOCAB_PRE_TYPE_DBRXLLAMA_VOCAB_PRE_TYPE_SMAUGLLAMA_VOCAB_PRE_TYPE_POROLLAMA_VOCAV_PRE_TYPE_CHATGLM3LLAMA_VOCAB_PRE_TYPE_CHATGLM4LLAMA_VOCAB_PRE_TYPE_VIKINGLLAMA_VOCAB_PRE_TYPE_JAISLLAMA_VOCAB_PRE_TYPE_TEKKENLLAMA_VOCAB_PRE_TYPE_SMOLLMLLAMA_VOCAB_PRE_TYPE_CODESHELLLLAMA_VOCAB_PRE_TYPE_BLOOM!LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISHLLAMA_VOCAB_PRE_TYPE_EXAONELLAMA_VOCAB_PRE_TYPE_CHAMELEONLLAMA_VOCAB_PRE_TYPE_MINERVALLAMA_ROPE_TYPE_NONELLAMA_ROPE_TYPE_NORMLLAMA_ROPE_TYPE_NEOXGGML_ROPE_TYPE_NEOXLLAMA_TOKEN_TYPE_UNDEFINEDLLAMA_TOKEN_TYPE_NORMALLLAMA_TOKEN_TYPE_UNKNOWNLLAMA_TOKEN_TYPE_CONTROLLLAMA_TOKEN_TYPE_USER_DEFINEDLLAMA_TOKEN_TYPE_UNUSEDLLAMA_TOKEN_TYPE_BYTELLAMA_TOKEN_ATTR_UNDEFINEDLLAMA_TOKEN_ATTR_UNKNOWNLLAMA_TOKEN_ATTR_UNUSEDLLAMA_TOKEN_ATTR_NORMALLLAMA_TOKEN_ATTR_CONTROLLLAMA_TOKEN_ATTR_USER_DEFINEDLLAMA_TOKEN_ATTR_BYTELLAMA_TOKEN_ATTR_NORMALIZEDLLAMA_TOKEN_ATTR_LSTRIPLLAMA_TOKEN_ATTR_RSTRIPLLAMA_TOKEN_ATTR_SINGLE_WORDLLAMA_FTYPE_ALL_F32LLAMA_FTYPE_MOSTLY_F16LLAMA_FTYPE_MOSTLY_Q4_0LLAMA_FTYPE_MOSTLY_Q4_1LLAMA_FTYPE_MOSTLY_Q8_0LLAMA_FTYPE_MOSTLY_Q5_0LLAMA_FTYPE_MOSTLY_Q5_1LLAMA_FTYPE_MOSTLY_Q2_KLLAMA_FTYPE_MOSTLY_Q3_K_SLLAMA_FTYPE_MOSTLY_Q3_K_MLLAMA_FTYPE_MOSTLY_Q3_K_LLLAMA_FTYPE_MOSTLY_Q4_K_SLLAMA_FTYPE_MOSTLY_Q4_K_MLLAMA_FTYPE_MOSTLY_Q5_K_SLLAMA_FTYPE_MOSTLY_Q5_K_MLLAMA_FTYPE_MOSTLY_Q6_KLLAMA_FTYPE_MOSTLY_IQ2_XXSLLAMA_FTYPE_MOSTLY_IQ2_XSLLAMA_FTYPE_MOSTLY_Q2_K_SLLAMA_FTYPE_MOSTLY_IQ3_XSLLAMA_FTYPE_MOSTLY_IQ3_XXSLLAMA_FTYPE_MOSTLY_IQ1_SLLAMA_FTYPE_MOSTLY_IQ4_NLLLAMA_FTYPE_MOSTLY_IQ3_SLLAMA_FTYPE_MOSTLY_IQ3_MLLAMA_FTYPE_MOSTLY_IQ2_SLLAMA_FTYPE_MOSTLY_IQ2_MLLAMA_FTYPE_MOSTLY_IQ4_XSLLAMA_FTYPE_MOSTLY_IQ1_MLLAMA_FTYPE_MOSTLY_BF16LLAMA_FTYPE_MOSTLY_TQ1_0LLAMA_FTYPE_MOSTLY_TQ2_0LLAMA_FTYPE_GUESSED#LLAMA_ROPE_SCALING_TYPE_UNSPECIFIEDLLAMA_ROPE_SCALING_TYPE_NONELLAMA_ROPE_SCALING_TYPE_LINEARLLAMA_ROPE_SCALING_TYPE_YARN LLAMA_ROPE_SCALING_TYPE_LONGROPE!LLAMA_ROPE_SCALING_TYPE_MAX_VALUELLAMA_POOLING_TYPE_UNSPECIFIEDLLAMA_POOLING_TYPE_NONELLAMA_POOLING_TYPE_MEANLLAMA_POOLING_TYPE_CLSLLAMA_POOLING_TYPE_LASTLLAMA_POOLING_TYPE_RANK LLAMA_ATTENTION_TYPE_UNSPECIFIEDLLAMA_ATTENTION_TYPE_CAUSALLLAMA_ATTENTION_TYPE_NON_CAUSALLLAMA_SPLIT_MODE_NONELLAMA_SPLIT_MODE_LAYERLLAMA_SPLIT_MODE_ROW	Structurer?   rY   rR   llama_token_data_array_prL   r   r^   LLAMA_KV_OVERRIDE_TYPE_INTLLAMA_KV_OVERRIDE_TYPE_FLOATLLAMA_KV_OVERRIDE_TYPE_BOOLLLAMA_KV_OVERRIDE_TYPE_STRrn   rw   r}   r   r{   r   llama_log_callbackr   r   llama_logit_bias_pr   r   r  llama_lora_adapter_p_ctypesr   r   r   r   r   GGML_NUMA_STRATEGY_DISABLEDGGML_NUMA_STRATEGY_DISTRIBUTEGGML_NUMA_STRATEGY_ISOLATEGGML_NUMA_STRATEGY_NUMACTLGGML_NUMA_STRATEGY_MIRRORGGML_NUMA_STRATEGY_COUNTr   r   r   r   r   r   r[   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r
  r  r  r  r  r  r  r  c_uint64r!  r#  r%  r(  r*  r,  r.  r0  r5  r9  r>  r@  rB  rD  rM  rO  llama_kv_cache_view_prX  rZ  r]  r`  rb  rd  rf  rj  ro  rr  ru  rx  rz  r|  r~  r  c_uint8r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r	  r  r  r  r!  r  r  r  llama_sampler_p_ctypesllama_sampler_i_namellama_sampler_i_acceptllama_sampler_i_applyllama_sampler_i_resetllama_sampler_i_clonellama_sampler_i_freerM   r'  r+  r-  r0  r2  r4  r6  r8  r;  r>  rA  rC  rE  rH  rJ  rM  rQ  rS  rU  rX  r[  r]  rc  re  ri  rs  r{  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rN   rO   rP   <module>r     s.   " 	        jjnn%9: QdQl\W\\"''//"''//(*CDEMr~ryr~r~  @S  sT
>:64T:L       $46#3#3
MM6??FMM6??$   'f&&v}}fooF  #%   !'   **,       #  #  #  ,   .   -  +S1  NN	nn{+~~     @  -  '  #  1D  !  $% !&' #  !"   !" !#       "  "        !#  $& !  !# !    -. . *      !      !     !  &  $     % \                                    ') #  !"   #$  $@ ! "$       $&   "#     v'' ( $V^^$45 V-- . *6>>*@A  +&**
MM6>>6?? 8!&"" !T       FLL 
.f.. 
.d-)) -xZ6++ ZF &V%%&,, J**&"2"2 *bv'' " $V^^$45 !1!1 &))   ,fnnV__= 
  

 "

 (

 )

 

     !     
 \\N

  

  __()
1
 

 "/0
"6
 

 
NN


 $b&//: ;
 &FMM: ;
 'V]]; <
 -r6==A B
 %r6==9 :
 !7 8&//J K
 #9":FOOL M
 !$:#;V__M N
 "%;$<fooN O
 #7"8&..I J
 $';&<fnnM N
 "6!7H I
 #7"8&..I J
 "6!7H I
 "%;$<>RS T
 %(>'?N O
 #&:%;V\\J K
 "%9$:FLLI J .1E0FW X 	 NN		& 
 	 		 ),@+A6>>R S #	 NN	 
2 	 		 '	 NN	 
2 	 		 6??FOO<
NN
	1 *
 	
 #&:%;V__M N '*>)?Q R 3V__Ev = *-A,BFMMR S *-A,BFMMR S %(<'=v~~ +.B-CV]]S T 23
 OO <
 	 6??+
%*#
 8&..I
NN
	#7@E
 89
NN
	#7
 

  !

&  v~~& NN

, 
 	
   	:
$v// 
$Z&** . '':; 
 V^^,
	%?
 +.C-DdK L  #9;P"QSW $'=&> #&<%=v~~ '*@)A4H I 	 MM		$ 	 		 
	6  	
	( ) 		
 	
&  6Et"  	
	$ 	 		
 !
6  	
	$ 	 		
  
( (+A*BDI J (+A*BDI J +.D-Ev}}U V '*@)A6??S T '*@)A6??S T v~~&
 OO				$	 &	
 			  v~~& OO	: ^V^^FNN;V__M
OO
		$ &
 	
 ^V^^FNN;<
OO
	:
 v' MM
	 ) 2	
 ; 

$ v' OO
	 ) 2	
 ; 	
  	 MM		 % /	 
	  	 OO		 % /	 		 \*
OO

 v~~&	 OO			$ & 	 		( v~~&	 OO			$ & 	 		"  OO
	  %	
 / 	
$ v' OO				 	 )		
 2	 ;	 			4  	$	&	 			. H+(
$ *
 $ #k]D9 : "8+!FW X "8+!FW X 
 	
	
)
 0

 "%;$<fnnM N (+A*BFNNS T '*@&--)PRVW X (+A6==*QSWX Y 0&//B
	5 )
 $'=&>E F /0.&..2P		" V^^,FNN6>>"
	7 
 34nfnnV^^6T V^^,FNN6>>"
	7 
 \*FNN6>>"
	": 
 1;?!8
 2K@&..!8
 1;?!8 /=v}} 3[A6==!8	 "%9$:KH I "%9$:KH I "%9$:KH I "%9$:KH I "%9$:KH I !$8#9;G H &)=(>N O
 &)=(>N O %(<'={K L %(<'={K L
 %(<'={K L
 &)=(>L M &)=(>L M &)=(>L M &)=(>L M &)=(>L M &)=(>L M2  NN
 ' %	
 + , . 	P  NN" 
C %	
 % ( 	n {# NN$ ' 	
 + / 0 	\ )*	 NN	
 * 	 		 "v' NN	$ 		v !// f&& F$$  #M2O'6 'v''9OP )))$0FT (((
 ":  )((/EF ((()?AWX 'v''.DE  !"%&#$#$#$!"  
OO


 [)


 56

"E
 


 

 

  

 34


 V^^,
9
 
LL

  V^^,
9
 ,b2HI J
 *V__,=?UV W -r3IJ K +fnn-=?UV W ^^V__%

 ^^V__%

  ^^V__%


 *V^^,<>TU V !^^V^^V^^4
&+
 ^^V^^V__fooF
"%-0
$ !^^V__fnnfnnfnnU
"'.38;
 $__fnnfnn5

 %
  6??FOO<
',<A
  "
   	
      0 v' 


 
 	

 
 %
 
 

" #^^V^^%78
 #1N
8 

 
OO

 3V^^D

 /69
 __foovfllS
LL
		'	 	 '		
 *	 		
	  __foovfllS
LL
		'	 	 '		
 *	 		
	 *B@ A __foo&
-
6f..  f..  


 


 

 


 


 

rO   