
     sg*2                    p    d dl mZ d dlZd dlmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ  G d d      Zy)    )annotationsN)DictOptionalUnionList)ModelSettingsc                  L    e Zd Zd	dZd
ddZddZddZd Zd Ze	dd       Z
y)
LlamaProxyc                ~   t        |      dkD  sJ d       i | _        |D ]8  }|j                  s|j                  |_        || j                  |j                  <   : d | _        d | _        |d   | _        | j                  j                  | _        | j                  | j                        | _        | j                  | _        y )Nr   zNo models provided!)	len_model_settings_dictmodel_aliasmodel_current_model_current_model_alias_default_model_settings_default_model_aliasload_llama_from_model_settings)selfmodelsr   s      I/var/www/html/venv/lib/python3.12/site-packages/llama_cpp/server/model.py__init__zLlamaProxy.__init__   s    6{Q5 55>@! 	AE$$$)KK!;@D%%e&7&78	A
 :>37!6<Qi$)-)E)E)Q)Q! #AA((
 %)$=$=!    Nc                j   || j                   }|| j                  vr| j                   }|| j                  k(  r| j                  | j                  S | j                  r| j                  j	                          d | _        | j                  |   }| j                  |      | _        || _        | j                  S N)r   r   r   r   closer   r   r   settingss      r   __call__zLlamaProxy.__call__$   s    =--E111--ED---"".***%%'",,U3"AA(K$)!"""r   c                <    | j                   |   j                         S r   )r   
model_dumpr   r   s     r   __getitem__zLlamaProxy.__getitem__8   s    ((/::<<r   c                x    t        |t        t        f      rt        j                  |      }|| j
                  |<   y r   )
isinstancebytesstrr   model_validate_jsonr   r   s      r   __setitem__zLlamaProxy.__setitem__;   s0    h-$88BH+3!!%(r   c              #  6   K   | j                   D ]  }|  y wr   )r   r"   s     r   __iter__zLlamaProxy.__iter__@   s      .. 	EK	s   c                V    | j                   r| j                   j                          | ` y y r   )r   r   )r   s    r   freezLlamaProxy.freeD   s(    %%'# r   c                v   d }| j                   dk(  r| j                  J d       | j                  Lt        j                  j
                  j                  | j                  | j                  | j                        }nt        j                  j                  | j                  | j                        }n| j                   dk(  r| j                  J d       | j                  Lt        j                  j                  j                  | j                  | j                  | j                        }n]t        j                  j                  | j                  | j                        }n&| j                   dk(  r| j                  J d       | j                  Lt        j                  j                  j                  | j                  | j                  | j                        }nt        j                  j                  | j                  | j                        }nu| j                   dk(  r| j                  J d       | j                  Lt        j                  j                  j                  | j                  | j                  | j                        }nt        j                  j                  | j                  | j                        }n| j                   dk(  r| j                  J d       | j                  Lt        j                  j                  j                  | j                  | j                  | j                        }nJt        j                  j                  | j                  | j                        }n| j                   d	k(  r| j                  J d       | j                  Lt        j                  j                  j                  | j                  | j                  | j                        }nt        j                  j                  | j                  | j                        }nb| j                   d
k(  r| j                  J d       | j                  Kt        j                  j                  j                  | j                  | j                  | j                        }nt        j                  j                  | j                  | j                        }n| j                   dk(  r=| j                  J d       t        j                  j                  | j                        }ng| j                   dk(  rX| j                   J d       t        j                  j#                  t%        j&                  t)        | j                                     }d }| j                  )t*        j,                  j                  | j                        }d }| j.                   t1        j2                  | j4                        }d }| j6                  t9        | j6                  t:              sJ i }| j6                  D ]  }|j=                  d      \  }}d|v s|j=                  d      \  }}|dk(  r|j?                         dv ||<   K|dk(  rtA        |      ||<   _|dk(  rtC        |      ||<   s|dk(  r|||<   ~tE        d|        dd l#}	i }
| j                  A|	jI                  t        jJ                  j                  | j                  | jL                        }nt        jJ                  }| jL                  |
d<    |dFi |
i d| jN                  d| jP                  d| jR                  d| jT                  d| jV                  d | jX                  d!| jZ                  d"|d#| j\                  d$| j^                  d%| j`                  d&| jb                  d'| jd                  d(| jf                  d)| jh                  d*| jj                  d+| jl                  d,| jn                  d-| jp                  d.| jr                  d/| jt                  d0| jv                  d1| jx                  d2| jz                  d3| j|                  d4| j~                  d5| j                  d6| j                  d7| j                  d8| j                  d9| j                  d:| j                  d;| j                   d<|d=|d>| j                  d?| j                  d@|dA| j                  }| j                  r| j                  dBk(  rE| j                  rt        dC| j                          t        j                  | j                  D      }nD| j                  rt        dE| j                          t        j                  | j                  D      }|j                  |       |S )GNz	llava-1-5zclip model not found)repo_idfilenameverbose)clip_model_pathr1   obsidianz	llava-1-6	moondream	nanollavazllama-3-vision-alphazminicpm-v-2.6zhf-autotokenizerzAhf_pretrained_model_name_or_path must be set for hf-autotokenizerzhf-tokenizer-configz<hf_tokenizer_config_path must be set for hf-tokenizer-config)num_pred_tokens=:bool)true1intfloatr'   zUnknown value type r   )r/   r0   
model_pathn_gpu_layers
split_modemain_gputensor_split
vocab_onlyuse_mmap	use_mlockkv_overridesrpc_serversseedn_ctxn_batchn_ubatch	n_threadsn_threads_batchrope_scaling_typerope_freq_baserope_freq_scaleyarn_ext_factoryarn_attn_factoryarn_beta_fastyarn_beta_slowyarn_orig_ctx	mul_mat_q
logits_all	embeddingoffload_kqv
flash_attnlast_n_tokens_size	lora_base	lora_pathnumachat_formatchat_handlerdraft_modeltype_ktype_v	tokenizerr1   diskzUsing disk cache with size )capacity_byteszUsing ram cache with size  )Or_   r2   hf_model_repo_id	llama_cppllama_chat_formatLlava15ChatHandlerfrom_pretrainedr1   ObsidianChatHandlerLlava16ChatHandlerMoondreamChatHandlerNanoLlavaChatHandlerLlama3VisionAlphaMiniCPMv26ChatHandler hf_pretrained_model_name_or_path+hf_autotokenizer_to_chat_completion_handlerhf_tokenizer_config_path.hf_tokenizer_config_to_chat_completion_handlerjsonloadopenllama_tokenizerLlamaHFTokenizerra   llama_speculativeLlamaPromptLookupDecodingdraft_model_num_pred_tokensrF   r%   listsplitlowerr<   r=   
ValueError	functoolspartialLlamar   r?   r@   rA   rB   rC   rD   rE   rG   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   rb   rc   cache
cache_typeprint
cache_sizeLlamaDiskCacheLlamaRAMCache	set_cache)r   r`   rd   ra   rF   kvkeyvalue
value_typer   kwargs	create_fn_modelr   s                 r   r   z)LlamaProxy.load_llama_from_model_settingsI   s	   ;.++7O9OO7((4//BBRR ( 9 9!)!9!9 ( 0 0 S    )::MM$,$<$<hFVFV  N   !!Z/++7O9OO7((4//CCSS ( 9 9!)!9!9 ( 0 0 T    )::NN$,$<$<hFVFV  O   !![0++7O9OO7((4//BBRR ( 9 9!)!9!9 ( 0 0 S    )::MM$,$<$<hFVFV  N   !![0++7O9OO7((4//DDTT ( 9 9!)!9!9 ( 0 0 U    )::OO$,$<$<hFVFV  P   !![0++7O9OO7((4//DDTT ( 9 9!)!9!9 ( 0 0 U    )::OO$,$<$<hFVFV  P   !!%;;++7O9OO7((4//AAQQ ( 9 9!)!9!9 ( 0 0 R    )::LL$,$<$<hFVFV  M   !!_4++7O9OO7((4//EEUU ( 9 9!)!9!9 ( 0 0 V    )::PP$,$<$<hFVFV  Q   !!%7799ESRSE ++WW== 
 !!%::11=NMN=$66ee		$x@@ABL =A	44@'88HH99I ++EE ( D DK KO  ,h33T:::L++ MXXc]
U%<(-C(8%J!V+,1KKM],JS)#u,,/JS)#w.,1%LS)#u,,1S)(+>zl)KLLM 	$$0!))// 11! * I "I#+>>F<  3
3
 "..3
  **	3

 &&3
 "..3
  **3
 &&3
 ((3
 &3
 !,,3
 3
 ..3
 $$3
  &&!3
" ((#3
$ %44%3
& '88'3
( $22)3
* %44+3
, %44-3
. &66/3
0 $2213
2 $2233
4 #0053
6 ((73
8  **93
: ((;3
< !,,=3
>  **?3
B  (::C3
F ((G3
H ((I3
L M3
P !,,Q3
R &S3
V $W3
Z ??[3
\ ??]3
`  a3
d $$e3
h >>""f,##78K8K7LMN!00@S@ST##6x7J7J6KLM!//x?R?RSU#r   )r   zList[ModelSettings]returnNoner   )r   zOptional[str]r   llama_cpp.Llama)r   r'   )r   r'   r   z Union[ModelSettings, str, bytes])r   r   r   r   )__name__
__module____qualname__r   r   r#   r)   r+   r-   staticmethodr   rg   r   r   r
   r
      s6    >*#(=4
$
 ` `r   r
   )
__future__r   rw   typingr   r   r   r   ri   llama_cpp.llama_speculativer|   llama_cpp.llama_tokenizerrz   llama_cpp.server.settingsr   r
   rg   r   r   <module>r      s)    "  . .  7 3 3\ \r   