
     sgv!                        d dl mZ d dlZd dlmZmZmZmZmZm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlZdej$                  d<    G d	 d
e      Z G d de      Z G d dee      Z G d de      Zy)    )annotationsN)OptionalListLiteralUnionDictcast)Self)Fieldmodel_validator)BaseSettings protected_namespacesc                     e Zd ZU dZ ed      Zded<    edd      Zd	ed
<    eddd      Zded<    ee	j                  d      Zded<    eddd      Zded<    edd      Zded<    edd      Zded<    e e	j                         d      Zded<    e e	j"                         d      Zded<    edd       Zd!ed"<    edd#      Zd	ed$<    ee	j*                  d%      Zded&<    ed'dd(      Zded)<    ed*d+d,      Zded-<    ed*d+d.      Zded/<    e e ej8                         d0z  d+      d+d1      Zded2<    e e ej8                         d+      dd3      Zded4<    ee	j>                  5      Z ded6<    ed7d8      Z!d9ed:<    ed7d;      Z"d9ed<<    ed=5      Z#d9ed><    ed?5      Z$d9ed@<    edA5      Z%d9edB<    ed?5      Z&d9edC<    ed5      Z'dedD<    edEdF      Z(dedG<    edEdH      Z)dedI<    eddJ      Z*dedK<    edEdL      Z+dedM<    eddN      Z,dedO<    edPddQ      Z-dedR<    eddS      Z.d	edT<    eddU      Z/d	edV<    eddW      Z0dXedY<    eddZ      Z1d	ed[<    edd\      Z2d	ed]<    edd^      Z3ded_<    ed`da      Z4dbedc<    eddde      Z5dedf<    eddg      Z6d	edh<    eddi      Z7d	edj<    eddk      Z8d	edl<    eddm      Z9d	edn<    edodp      Z:dedq<    eddr      Z;dsedt<    eddu      Z<dsedv<    edEdw      Z=dedx<    e>dyz      d|d{       Z?y)}ModelSettingsz*Model settings used to load a Llama model.z8The path to the model to use for generating completions.)descriptionstrmodelNz9The alias of the model to use for generating completions.defaultr   Optional[str]model_aliasr   z_The number of layers to put on the GPU. The rest will be on the CPU. Set -1 to move all to GPU.)r   ger   intn_gpu_layerszThe split mode to use.
split_modezMain GPU to use.main_gpuz0Split layers across multiple GPUs in proportion.zOptional[List[float]]tensor_splitFz&Whether to only return the vocabulary.bool
vocab_onlyz	Use mmap.use_mmapz
Use mlock.	use_mlockzList of model kv overrides in the format key=type:value where type is one of (bool, int, float). Valid true values are (true, TRUE, 1), otherwise false.zOptional[List[str]]kv_overridesz2comma seperated list of rpc servers for offloadingrpc_serverszRandom seed. -1 for random.seedi   zThe context size.n_ctxi      zThe batch size to use per eval.n_batchz)The physical batch size used by llama.cppn_ubatch   z8The number of threads to use. Use -1 for max cpu threads	n_threadszNThe number of threads to use when batch processing. Use -1 for max cpu threadsn_threads_batch)r   rope_scaling_typeg        zRoPE base frequencyfloatrope_freq_basezRoPE frequency scaling factorrope_freq_scaleg      yarn_ext_factorg      ?yarn_attn_factorg      @@yarn_beta_fastyarn_beta_slowyarn_orig_ctxTz+if true, use experimental mul_mat_q kernels	mul_mat_qzWhether to return logits.
logits_allzWhether to use embeddings.	embeddingz"Whether to offload kqv to the GPU.offload_kqvzWhether to use flash attention.
flash_attn@   z5Last n tokens to keep for repeat penalty calculation.last_n_tokens_sizezoOptional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.	lora_basez*Path to a LoRA file to apply to the model.	lora_pathzEnable NUMA support.zUnion[bool, int]numazChat format to use.chat_formatz<Path to a CLIP model to use for multi-modal chat completion.clip_model_pathz=Use a cache to reduce processing times for evaluated prompts.cacheramz5The type of cache to use. Only used if cache is True.zLiteral['ram', 'disk']
cache_typel        z;The size of the cache in bytes. Only used if cache is True.
cache_sizez5The path to a HuggingFace tokenizer_config.json file.hf_tokenizer_config_pathz~The model name or path to a pretrained HuggingFace tokenizer model. Same as you would pass to AutoTokenizer.from_pretrained(). hf_pretrained_model_name_or_pathz=The model repo id to use for the HuggingFace tokenizer model.hf_model_repo_idzHMethod to use for speculative decoding. One of (prompt-lookup-decoding).draft_model
   z2Number of tokens to predict using the draft model.draft_model_num_pred_tokensz#Type of the key cache quantization.zOptional[int]type_kz%Type of the value cache quantization.type_vz#Whether to print debug information.verbosebefore)modec                    t        j                         }t        t        t        t
        f   |       }|j                  dd      dk(  r||d<   |j                  dd      dk(  r||d<   | S )Nr,   r   r   r-   )multiprocessing	cpu_countr	   r   r   r   get)selfrT   valuess      L/var/www/html/venv/lib/python3.12/site-packages/llama_cpp/server/settings.pyset_dynamic_defaultsz"ModelSettings.set_dynamic_defaults   sg    
 $--/	d38nd+::k1%+"+F;::'+r1(1F$%    )returnr
   )@__name__
__module____qualname____doc__r   r   __annotations__r   r   	llama_cppLLAMA_SPLIT_MODE_LAYERr   r   r   r!   llama_supports_mmapr"   llama_supports_mlockr#   r$   r%   LLAMA_DEFAULT_SEEDr&   r'   r)   r*   maxrS   rT   r,   r-   #LLAMA_ROPE_SCALING_TYPE_UNSPECIFIEDr.   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r=   r>   r?   r@   rA   rB   rC   rE   rF   rG   rH   rI   rJ   rL   rM   rN   rO   r   rY   r   rZ   rX   r   r      s   4NE3  "'O"K 
 uL# 
 00,J  &Hc 
 +0F+L'  #KJ  -	--/Hd  .	..0 It  ). o)L%  "'H"K 
 ,,:WD#  t7JKE3K'HGS  'RHc  -O--/14a8NIs 
 !-O--/3dOS 
 #==s  "#;PQNEQ"!@OU  #40OU0#C0e0!$/NE/!#.NE.q)M3)"OIt  T7RSJSE7STItT"FK  #DJ  $K   % F I}   %@ I} 
 #*D
 
 "')"K  &+R&O] 
 SE4  */K*J&  QJ 
 /4K/m  7< U7$m 
 ',S'm 
 "'^"K  (-H( 
 "9FM  ";FM 
 "GGT  rZ   r   c                     e Zd ZU dZ edd      Zded<    edd      Zd	ed
<    edd      Zded<    edd      Z	ded<    edd      Z
ded<    edd      Zded<    edd      Zded<    edd      Zded<   y)ServerSettingszAServer settings used to configure the FastAPI and Uvicorn server.	localhostzListen addressr   r   hosti@  zListen portr   portNzSSL key file for HTTPSr   ssl_keyfilezSSL certificate file for HTTPSssl_certfilezIAPI key for authentication. If set all requests need to be authenticated.api_keyTz=Whether to interrupt requests when a new request is received.r    interrupt_requestsFz;Disable EventSource pings (may be needed for some clients).disable_ping_events zIThe root path for the server. Useful when running behind a reverse proxy.	root_path)r\   r]   r^   r_   r   rk   r`   rl   rm   rn   ro   rp   rq   rs   r   rZ   rX   ri   ri      s    K k7GHD#Hd>D#>!&":"K  #("B#L-  #_G]   %S   !&Q!  _Is rZ   ri   c                      e Zd Zy)SettingsN)r\   r]   r^   r   rZ   rX   ru   ru      s    rZ   ru   c                  0    e Zd ZU dZ eg d      Zded<   y)ConfigFileSettingsz#Configuration file format settings.zModel configsr   zList[ModelSettings]modelsN)r\   r]   r^   r_   r   rx   r`   r   rZ   rX   rw   rw      s    -"'"PFPrZ   rw   )
__future__r   rS   typingr   r   r   r   r   r	   typing_extensionsr
   pydanticr   r   pydantic_settingsr   ra   model_configr   ri   ru   rw   r   rZ   rX   <module>r      sg    "  = = " + *  57  0 1vL vr\ >	~} 	Q QrZ   