Ë
     ©sgv!  ã                  óà   — d dl mZ d dlZd dlmZmZmZmZmZm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlZdej$                  d<    G d	„ d
e«      Z G d„ de«      Z G d„ dee«      Z G d„ de«      Zy)é    )ÚannotationsN)ÚOptionalÚListÚLiteralÚUnionÚDictÚcast)ÚSelf)ÚFieldÚmodel_validator)ÚBaseSettings© Úprotected_namespacesc                  óŒ  — e Zd ZU dZ ed¬«      Zded<    edd¬«      Zd	ed
<    eddd¬«      Zded<    ee	j                  d¬«      Zded<    eddd¬«      Zded<    edd¬«      Zded<    edd¬«      Zded<    e e	j                  «       d¬«      Zded<    e e	j"                  «       d¬«      Zded<    edd ¬«      Zd!ed"<    edd#¬«      Zd	ed$<    ee	j*                  d%¬«      Zded&<    ed'dd(¬«      Zded)<    ed*d+d,¬«      Zded-<    ed*d+d.¬«      Zded/<    e e ej8                  «       d0z  d+«      d+d1¬«      Zded2<    e e ej8                  «       d+«      dd3¬«      Zded4<    ee	j>                  ¬5«      Z ded6<    ed7d8¬«      Z!d9ed:<    ed7d;¬«      Z"d9ed<<    ed=¬5«      Z#d9ed><    ed?¬5«      Z$d9ed@<    edA¬5«      Z%d9edB<    ed?¬5«      Z&d9edC<    ed¬5«      Z'dedD<    edEdF¬«      Z(dedG<    edEdH¬«      Z)dedI<    eddJ¬«      Z*dedK<    edEdL¬«      Z+dedM<    eddN¬«      Z,dedO<    edPddQ¬«      Z-dedR<    eddS¬«      Z.d	edT<    eddU¬«      Z/d	edV<    eddW¬«      Z0dXedY<    eddZ¬«      Z1d	ed[<    edd\¬«      Z2d	ed]<    edd^¬«      Z3ded_<    ed`da¬«      Z4dbedc<    eddde¬«      Z5dedf<    eddg¬«      Z6d	edh<    eddi¬«      Z7d	edj<    eddk¬«      Z8d	edl<    eddm¬«      Z9d	edn<    edodp¬«      Z:dedq<    eddr¬«      Z;dsedt<    eddu¬«      Z<dsedv<    edEdw¬«      Z=dedx<    e>dy¬z«      d|d{„«       Z?y)}ÚModelSettingsz*Model settings used to load a Llama model.z8The path to the model to use for generating completions.)ÚdescriptionÚstrÚmodelNz9The alias of the model to use for generating completions.©Údefaultr   úOptional[str]Úmodel_aliasr   éÿÿÿÿz_The number of layers to put on the GPU. The rest will be on the CPU. Set -1 to move all to GPU.)r   Úger   ÚintÚn_gpu_layerszThe split mode to use.Ú
split_modezMain GPU to use.Úmain_gpuz0Split layers across multiple GPUs in proportion.zOptional[List[float]]Útensor_splitFz&Whether to only return the vocabulary.ÚboolÚ
vocab_onlyz	Use mmap.Úuse_mmapz
Use mlock.Ú	use_mlockz˜List of model kv overrides in the format key=type:value where type is one of (bool, int, float). Valid true values are (true, TRUE, 1), otherwise false.zOptional[List[str]]Úkv_overridesz2comma seperated list of rpc servers for offloadingÚrpc_serverszRandom seed. -1 for random.Úseedi   zThe context size.Ún_ctxi   é   zThe batch size to use per eval.Ún_batchz)The physical batch size used by llama.cppÚn_ubatché   z8The number of threads to use. Use -1 for max cpu threadsÚ	n_threadszNThe number of threads to use when batch processing. Use -1 for max cpu threadsÚn_threads_batch)r   Úrope_scaling_typeg        zRoPE base frequencyÚfloatÚrope_freq_basezRoPE frequency scaling factorÚrope_freq_scaleg      ð¿Úyarn_ext_factorg      ð?Úyarn_attn_factorg      @@Úyarn_beta_fastÚyarn_beta_slowÚyarn_orig_ctxTz+if true, use experimental mul_mat_q kernelsÚ	mul_mat_qzWhether to return logits.Ú
logits_allzWhether to use embeddings.Ú	embeddingz"Whether to offload kqv to the GPU.Úoffload_kqvzWhether to use flash attention.Ú
flash_attné@   z5Last n tokens to keep for repeat penalty calculation.Úlast_n_tokens_sizezoOptional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.Ú	lora_basez*Path to a LoRA file to apply to the model.Ú	lora_pathzEnable NUMA support.zUnion[bool, int]ÚnumazChat format to use.Úchat_formatz<Path to a CLIP model to use for multi-modal chat completion.Úclip_model_pathz=Use a cache to reduce processing times for evaluated prompts.ÚcacheÚramz5The type of cache to use. Only used if cache is True.zLiteral['ram', 'disk']Ú
cache_typel        z;The size of the cache in bytes. Only used if cache is True.Ú
cache_sizez5The path to a HuggingFace tokenizer_config.json file.Úhf_tokenizer_config_pathz~The model name or path to a pretrained HuggingFace tokenizer model. Same as you would pass to AutoTokenizer.from_pretrained().Ú hf_pretrained_model_name_or_pathz=The model repo id to use for the HuggingFace tokenizer model.Úhf_model_repo_idzHMethod to use for speculative decoding. One of (prompt-lookup-decoding).Údraft_modelé
   z2Number of tokens to predict using the draft model.Údraft_model_num_pred_tokensz#Type of the key cache quantization.zOptional[int]Útype_kz%Type of the value cache quantization.Útype_vz#Whether to print debug information.ÚverboseÚbefore)Úmodec                óÐ   — t        j                  «       }t        t        t        t
        f   | «      }|j                  dd«      dk(  r||d<   |j                  dd«      dk(  r||d<   | S )Nr,   r   r   r-   )ÚmultiprocessingÚ	cpu_countr	   r   r   r   Úget)ÚselfrT   Úvaluess      úL/var/www/html/venv/lib/python3.12/site-packages/llama_cpp/server/settings.pyÚset_dynamic_defaultsz"ModelSettings.set_dynamic_defaults¼   sg   € ô
 $×-Ñ-Ó/ˆ	Ü”dœ3¤˜8‘n dÓ+ˆØ:‰:k 1Ó%¨Ò+Ø"+ˆF;ÑØ:‰:Ð'¨Ó+¨rÒ1Ø(1ˆFÐ$Ñ%Øˆó    )Úreturnr
   )@Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   Ú__annotations__r   r   Ú	llama_cppÚLLAMA_SPLIT_MODE_LAYERr   r   r   r!   Úllama_supports_mmapr"   Úllama_supports_mlockr#   r$   r%   ÚLLAMA_DEFAULT_SEEDr&   r'   r)   r*   ÚmaxrS   rT   r,   r-   Ú#LLAMA_ROPE_SCALING_TYPE_UNSPECIFIEDr.   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r=   r>   r?   r@   rA   rB   rC   rE   rF   rG   rH   rI   rJ   rL   rM   rN   rO   r   rY   r   rZ   rX   r   r      sÞ  … Ù4áØNô€Eˆ3ó ñ "'ØØOô"€Kó ñ
 ØØØuô€L#ó ñ
 Ø×0Ñ0Ø,ô€Jó ñ ØØØ&ô€Hˆcó ñ
 +0ØØFô+€LÐ'ó ñ ØÐ#Kô€Jó ñ Ø-	×-Ñ-Ó/Øô€Hˆdó ñ Ø.	×.Ñ.Ó0Ø ô€Iˆtó ñ ).Øð oô)€LÐ%ó ñ "'ØØHô"€Kó ñ
 Ø×,Ñ,Ð:Wô€Dˆ#ó ñ ˜t¨Ð7JÔK€Eˆ3ÓKÙØ˜Ð'Hô€GˆSó ñ Ø˜Ð'Rô€Hˆcó ñ ÙÐ-O×-Ñ-Ó/°1Ñ4°aÓ8ØØNô€Iˆsó ñ
 !ÙÐ-O×-Ñ-Ó/°Ó3ØØdô€OSó ñ
 #Ø×=Ñ=ôÐsó ñ "¨#Ð;PÔQ€NEÓQÙ"ØÐ!@ô€OUó ñ #¨4Ô0€OUÓ0Ù#¨CÔ0ÐeÓ0Ù!¨$Ô/€NEÓ/Ù!¨#Ô.€NEÓ.Ù qÔ)€M3Ó)ÙØÐ"Oô€Iˆtó ñ  TÐ7RÔS€JÓSÙ EÐ7SÔT€IˆtÓTÙØÐ"Fô€Kó ñ ØÐ#Dô€Jó ñ $ØØØKôÐ˜ó ñ  %Øð Fô €Iˆ}ó ñ  %ØØ@ô €Iˆ}ó ñ
 #ØØ*ô€DÐ
ó ñ
 "'ØØ)ô"€Kó ñ &+ØØRô&€O]ó ñ
 ØØSô€Eˆ4ó ñ */ØØKô*€JÐ&ó ñ ØØQô€Jó ñ
 /4ØØKô/Ð˜mó ñ 7<Øð Uô7Ð$ mó ñ
 ',ØØSô'Ðmó ñ
 "'ØØ^ô"€Kó ñ (-ØØHô(Ð ó ñ
 "ØØ9ô€FˆMó ñ "ØØ;ô€FˆMó ñ
 ØÐ"Gô€GˆTó ñ ØôòóñrZ   r   c                  ó  — e Zd ZU dZ edd¬«      Zded<    edd¬«      Zd	ed
<    edd¬«      Zded<    edd¬«      Z	ded<    edd¬«      Z
ded<    edd¬«      Zded<    edd¬«      Zded<    edd¬«      Zded<   y)ÚServerSettingszAServer settings used to configure the FastAPI and Uvicorn server.Ú	localhostzListen addressr   r   Úhosti@  zListen portr   ÚportNzSSL key file for HTTPSr   Ússl_keyfilezSSL certificate file for HTTPSÚssl_certfilezIAPI key for authentication. If set all requests need to be authenticated.Úapi_keyTz=Whether to interrupt requests when a new request is received.r    Úinterrupt_requestsFz;Disable EventSource pings (may be needed for some clients).Údisable_ping_eventsÚ zIThe root path for the server. Useful when running behind a reverse proxy.Ú	root_path)r\   r]   r^   r_   r   rk   r`   rl   rm   rn   ro   rp   rq   rs   r   rZ   rX   ri   ri   Ê   sÀ   … ÙKñ ˜kÐ7GÔH€Dˆ#ÓHÙ˜d°Ô>€Dˆ#Ó>Ù!&ØÐ":ô"€Kó ñ #(ØÐ"Bô#€L-ó ñ #ØØ_ô€Gˆ]ó ñ  %ØØSô Ð˜ó ñ !&ØØQô!Ð˜ó ñ ØØ_ô€Iˆsô rZ   ri   c                  ó   — e Zd Zy)ÚSettingsN)r\   r]   r^   r   rZ   rX   ru   ru   é   s   „ ØrZ   ru   c                  ó0   — e Zd ZU dZ eg d¬«      Zded<   y)ÚConfigFileSettingsz#Configuration file format settings.zModel configsr   zList[ModelSettings]ÚmodelsN)r\   r]   r^   r_   r   rx   r`   r   rZ   rX   rw   rw   í   s   … Ù-á"'°ÀÔ"P€FÐÔPrZ   rw   )Ú
__future__r   rS   Útypingr   r   r   r   r   r	   Útyping_extensionsr
   Úpydanticr   r   Úpydantic_settingsr   ra   Úmodel_configr   ri   ru   rw   r   rZ   rX   ú<module>r      sg   ðÝ "ã ç =× =Ý "ç +Ý *ã ð 57€× Ñ Ð0Ñ 1ôvLô vôr\ô ô>	ˆ~˜}ô 	ôQ˜õ QrZ   