
     sgpP                       U d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dlm
Z
mZmZmZmZ d dlZd dlZd dlmZ d dlmZmZ d dlmZmZmZmZmZmZmZ d d	lmZ d d
lm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/ d dl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9 d dl:m;Z;  ee;      Z<da=de>d<   drdZ?d Z@daAde>d<    e       ZB e       ZCdsdZDd ZEdaFde>d<   dtdZG	 	 	 du	 	 	 	 	 dvdZH	 dw	 	 	 	 	 	 	 dxd ZI	 	 	 	 	 	 dyd!ZJ e"d"#      ZK ee@       eeK      f	 	 	 dzd$ZLd%ZMe<j                  d&d' eeL      geej                  ePf   d(d)d*d+d,igd-d.id*d/d0d1d2id3d4ieMg5      e<j                  d6d" eeL      geMg7      	 	 	 	 	 	 d{d8              ZQe<j                  d9d: eeL      geMg;       eeE      f	 	 	 d|d<       ZRe<j                  d=d> eeL      geej                  ePf   d(d)d*d+d?igd-d.id*d/d@dAd2id3d4ieMg5       edBdCdDdEdFdGdHdFgdIdJdKdCdDdEdFdGdLdFgdMdNidOdJdPdCdDdEdFdGdQdFgdRdSdTdUdMd/idMdVidWdXdYgdZd[d\gdRdXdSid\d]dJd^dCdDdEdFdGdHdFgd_d`dadJdbc      f	 	 	 	 	 d}dd       ZTe<j                  dedf eeL      geMg;       eeE      f	 	 	 d~dg       ZVdhZWe<j                  didj eeL      geWg;       eeE      f	 	 	 	 	 ddk       ZXe<j                  dldm eeL      geWg;       eeE      f	 	 	 	 	 ddn       ZYe<j                  dodp eeL      geWg;       eeE      f	 	 	 	 	 ddq       ZZy)    )annotationsN)Lock)partial)IteratorListOptionalUnionDict)MemoryObjectSendStream)run_in_threadpooliterate_in_threadpool)DependsFastAPI	APIRouterRequestHTTPExceptionstatusBody)
Middleware)CORSMiddleware)
HTTPBearer)EventSourceResponse)RequestIdPlugin)RawContextMiddleware)
LlamaProxy)ConfigFileSettingsSettingsModelSettingsServerSettings)	CreateCompletionRequestCreateEmbeddingRequestCreateChatCompletionRequest	ModelListTokenizeInputRequestTokenizeInputResponseTokenizeInputCountResponseDetokenizeInputRequestDetokenizeInputResponse)RouteErrorHandler)route_classzOptional[ServerSettings]_server_settingsc                    | a y Nr+   )server_settingss    G/var/www/html/venv/lib/python3.12/site-packages/llama_cpp/server/app.pyset_server_settingsr1   5   s    &    c               #     K   t          y wr-   r.    r2   r0   get_server_settingsr5   :   s     
s   
zOptional[LlamaProxy]_llama_proxyc                    t        |       ay )N)models)r   r6   model_settingss    r0   set_llama_proxyr;   D   s    ^4Lr2   c                  K   t         j                          d {    d} 	 t        j                          d {    	 t         j                          d} t         t        j                          	 | rt         j                          y y 7 p7 S# t        j                          w xY w# | rt         j                          w w xY ww)NTF)llama_outer_lockacquirellama_inner_lockreleaser6   )release_outer_locks    r0   get_llama_proxyrB   I   s      
"
"
$$$
'&&(((	'$$&!&$$&$$&  % 	) $$&$$& sM   CBCB( BB( B B( 3CB( B%%B( (CCz+typing.Optional[typing.Callable[[], bytes]]_ping_message_factoryc                    | a y r-   )rC   )factorys    r0   set_ping_message_factoryrF   _   s    #r2   c                   t         j                  j                  dd       }|t         j                  j	                  |      st        d| d      t        |d      5 }|j                  d      s|j                  d      r<dd l}t        j                  t        j                  |j                  |                  }n#t        j                  |j                               }t        j                   |      }|j"                  }d d d        |9|7| 
t%               } t        j                   |       }t'        j                   |       g}||J d       t)        |       t+        t,        t/               f	      g}t1        |d
t2        j4                  |j6                        }|j9                  t:        dgddgdg       |j=                  t>               |J tA        |       |jB                  rtE        d        |S # 1 sw Y   xY w)NCONFIG_FILEzConfig file z not found!rbz.yamlz.ymlr   z<server_settings and model_settings must be provided together)pluginsu   🦙 llama.cpp Python API)
middlewaretitleversion	root_path*T)allow_originsallow_credentialsallow_methodsallow_headersr9   c                     t               S r-   )bytesr4   r2   r0   <lambda>zcreate_app.<locals>.<lambda>   s     r2   )#osenvirongetpathexists
ValueErroropenendswithyamlr   model_validate_jsonjsondumps	safe_loadreadr   model_validater8   r   r   r1   r   r   r   r   	llama_cpp__version__rN   add_middlewarer   include_routerrouterr;   disable_ping_eventsrF   )	settingsr/   r:   config_filefr_   config_file_settingsrK   apps	            r0   
create_apprq   d   s   
 **..5Kww~~k*|K=DEE+t$ 	9##G,0D0DV0L'9'M'MJJt~~a01($ (:'M'Maffh'W$,;;<PQO188N	9 >#9zH(77A'66x@A 	#(BFEFB (1O<M;OPQJ
)%%!++	C eee   v%%%>2** 1J]	9 	9s   B#G77H c           	       K   t        t                     }|r|j                  nd}|4 d {    	 t        |      2 3 d {   }|j	                  t        t        j                  |                   d {    | j                          d {   r t        j                                |sst        j                         s|j	                  t        d             d {     t        j                                7 7 7 7 w7 %6 |j	                  t        d             d {  7   nm# t        j                         $ rS}t        d       t        j                  dd      5  t        d| j                          |# 1 sw Y   nxY wY d }~nd }~ww xY w|r) |        d {  7   n# |r |        d {  7   w w xY wd d d       d {  7   y # 1 d {  7  sw Y   y xY ww)	NF)dataz[DONE]disconnected   T)shieldz-Disconnected from client (via refresh/close) )nextr5   interrupt_requestsr   senddictra   rb   is_disconnectedanyioget_cancelled_exc_classr=   lockedprintmove_on_afterclient)requestinner_send_chaniteratoron_completer/   rx   chunkes           r0   get_event_publisherr      s     .01O.=**5   $ $	$4X> < <e%**4TZZ5F+GHHH 002229%779;;%*:*A*A*C)..t/BCCC9%779;;$<H2 D  ? "&&t':;;;,,. 	.!$$Qt4 EgnnEUVW  	 !m## !m## !$ $ $ $ $s  *G0DG0GD1DDD2D17D8D1DD10D1D1#D	$D1G0DD1D1	D1D1*D-+D10F11F"F*FF
FF1FF1G*F-+G1G>G
?GG	G0GG0G-!G$"G-)G0c                    i }|j                         D ]=  \  }}|j                  d      }| j                  |dd      D ]  }||t        |      <    ? |S )Nutf-8FT)add_bosspecial)itemsencodetokenizestr)llama
logit_biasto_biastokenscoreinput_ids         r0   _logit_bias_tokens_to_input_idsr      sf     !#G"((* +uW%ueTJ 	+H%*GCM"	++ Nr2   F)
auto_errorc                   K   | j                   y|r%|j                  | j                   k(  r|j                  S t        t        j                  d      w)NTzInvalid API keystatus_codedetail)api_keycredentialsr   r   HTTP_401_UNAUTHORIZED)rl   authorizations     r0   authenticater      sV     
  22h6F6FF((( 00  s   AAz	OpenAI V1z/v1/completions
Completion200zSuccessful Responseschemaz$refz-#/components/schemas/CreateCompletionResponsez&Completion response, when stream=False)anyOfrL   stringzServer Side Streaming response, when stream=True. See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_formatzQdata: {... see CreateCompletionResponse ...} \n\n data: ... \n\n ... data: [DONE])typerL   example)zapplication/jsonztext/event-stream)descriptioncontent)summarydependenciesresponse_model	responsestags%/v1/engines/copilot-codex/completions)include_in_schemar   r   c           
       K   t        j                         }|j                   t        j                  t                            d {   }|t        t        j                  d      t        |j                  t              rHt        |j                        dk  sJ t        |j                        dkD  r|j                  d   nd|_	         || j                  j                  dk7  r|j                  nd      }h d}|j                  |	      }|j                   4|j"                  d
k(  rt%        ||j                         n|j                   |d<   |j&                  ,t(        j*                  j-                  |j&                        |d<   |j.                  dkD  r`t)        j0                  t)        j2                  |j.                  |j5                               g      }d|vr||d<   n|d   j7                  |       	 t9        |fi | d {   t        t>              rkt9        t@               d {   dfd}	tC        jD                  d      \  }
}tG        |tI        tJ        | |
 |	       |j<                        dtL              S |j=                          d {    S 7 X7 # t:        $ r }|j=                          d {  7   |d }~ww xY w7 7 9w)NService is not availabler   ru   r    r   zcopilot-codex>   nuserbest_of
min_tokenslogit_bias_typeexcludetokensr   grammarlogits_processorc               3  .   K     E d {    y 7 wr-   r4   first_responseiterator_or_completions   r0   r   z#create_completion.<locals>.iteratorJ         ---   

   r   r   r   r   
data_sender_callablesepping_message_factory)returnz2Iterator[llama_cpp.CreateCompletionStreamResponse])'
contextlibAsyncExitStackenter_async_contextasynccontextmanagerrB   r   r   HTTP_503_SERVICE_UNAVAILABLE
isinstancepromptlistlenurlrZ   model
model_dumpr   r   r   r   rf   LlamaGrammarfrom_stringr   LogitsProcessorListMinTokensLogitsProcessor	token_eosextendr   	Exceptionacloser   rw   r|   create_memory_object_streamr   r   r   rC   )r   body
exit_stackllama_proxyr   r   kwargs_min_tokens_logits_processorerrr   	send_chan	recv_chanr   r   s               @@r0   create_completionr      s    V **,J"667fz7U7UVe7f7hiiK;;-
 	
 $++t$4;;1$$$(+DKK(81(<dkk!n";;FF 	

EG __W_-F" ##x/ ,E4??C 	| ||%22>>t||Ly'0'D'D//ARST(
$ V+)EF%&%&--.JK $E4V44 	 ((307MNN	.  %@@D	9"!(# )!&--" !6
 	
 !!!%%] j` 5 !!!	 O, 	"st   AK	J!
F-K8J& J$J& #K/K0A*KKK$J& &	K/K
KK

KKKz/v1/embeddings	Embedding)r   r   r   c                   K   t         || j                        j                  fi | j                  dh       d {   S 7 w)Nr   r   )r   r   create_embeddingr   )r   r   s     r0   r   r   `  sI      #GMM"33


fX

.   s   ;AAAz/v1/chat/completionsChatz1#/components/schemas/CreateChatCompletionResponsezServer Side Streaming response, when stream=TrueSee SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_formatzUdata: {... see CreateChatCompletionResponse ...} \n\n data: ... \n\n ... data: [DONE]zChat Completionzgpt-3.5-turbosystemzYou are a helpful assistant.)roler   r   zWhat is the capital of France?)r   messages)r   valuez	JSON Modez Who won the world series in 2020r   json_object)r   r   response_formatzTool CallingzExtract Jason is 30 years old.functionUserzUser recordobjectnumber)nameager   r   )r   
propertiesrequired)r   r   
parameters)r   r   )r   r   toolstool_choiceLogprobsTr   )r   r   logprobstop_logprobs)normal	json_modetool_callingr  )openapi_examplesc           
       K   t        j                         }|j                   t        j                  t                            d {   }|t        t        j                  d      h d}|j                  |      } ||j                        }|j                  4|j                  dk(  rt        ||j                        n|j                  |d<   |j                  ,t        j                  j!                  |j                        |d<   |j"                  dkD  r`t        j$                  t        j&                  |j"                  |j)                               g      }d	|vr||d	<   n|d	   j+                  |       	 t-        |j.                  fi | d {   t5        t6              rkt-        t8               d {   dfd
}	t;        j<                  d      \  }
}t?        |tA        tB        | |
 |	       |j2                        dtD              S |j3                          d {    S 7 7 # t0        $ r }|j3                          d {  7   |d }~ww xY w7 7 9w)Nr   r   >   r   r   r   r   r   r   r   r   r   r   c               3  .   K     E d {    y 7 wr-   r4   r   s   r0   r   z(create_chat_completion.<locals>.iterator  r   r   r   r   r   r   )r   z'Iterator[llama_cpp.ChatCompletionChunk])#r   r   r   r   rB   r   r   r   r   r   r   r   r   r   rf   r   r   r   r   r   r   r   r   create_chat_completionr   r   r   r   rw   r|   r   r   r   r   rC   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   s               @@r0   r	  r	  p  s<    Z **,J"667fz7U7UVe7f7hiiK;;-
 	
G __W_-F

#E" ##x/ ,E4??C 	| ||%22>>t||Ly'0'D'D//ARST(
$ V+)EF%&%&--.JK $E$@$@KFKK 	 ((307MNN	.  %@@D	9"!(# )!&--" !6
 	
 !!!%%E jH L !!!	 O, 	"st   AI#	H.
D0I#;H3 H1H3 #I#<I=A*I#'I!(I#1H3 3	I<IIIII#!I#z
/v1/modelsModelsc           	     D   K   d| D cg c]	  }|ddg d c}dS c c}w w)Nr   r   me)idr   owned_bypermissions)r   rs   r4   )r   model_aliass     r0   
get_modelsr  #  sB        +
  "! !	
 
s    
 Extrasz/extras/tokenizeTokenizec                   K    || j                         j                  | j                  j                  d      d      }t	        |      S w)Nr   Tr   )r   )r   r   inputr   r%   r   r   r   s      r0   r   r   =  sA      $--djj.?.?.HRV-WF //s   A	Az/extras/tokenize/countzTokenize Countc                   K    || j                         j                  | j                  j                  d      d      }t	        t        |            S w)Nr   Tr  )count)r   r   r  r   r&   r   r  s      r0   count_query_tokensr  L  sE      $--djj.?.?.HRV-WF%CK88s   AAz/extras/detokenize
Detokenizec                   K    || j                         j                  | j                        j                  d      }t	        |      S w)Nr   )text)r   
detokenizer   decoder(   )r   r   r  s      r0   r  r  [  s;      tzz"--dkk:AA'JD"--s   AA	)r/   r   )r:   zList[ModelSettings])rE   ztyping.Callable[[], bytes])NNN)rl   zSettings | Noner/   zServerSettings | Noner:   zList[ModelSettings] | Noner-   )r   r   r   z"MemoryObjectSendStream[typing.Any]r   zIterator[typing.Any]r   z<typing.Optional[typing.Callable[[], typing.Awaitable[None]]])r   zllama_cpp.Llamar   Dict[str, float]r   r   )rl   r   r   zOptional[str])r   r   r   r    r   zllama_cpp.Completion)r   r!   r   r   )r   r   r   r"   r   zllama_cpp.ChatCompletion)r   r   r   r#   )r   r$   r   r   r   r%   )r   r$   r   r   r   r&   )r   r'   r   r   r   r(   )[
__future__r   rW   ra   typingr   r|   r   	functoolsr   r   r   r   r	   r
   rf   anyio.streams.memoryr   starlette.concurrencyr   r   fastapir   r   r   r   r   r   r   fastapi.middlewarer   fastapi.middleware.corsr   fastapi.securityr   sse_starlette.sser   starlette_context.pluginsr   starlette_context.middlewarer   llama_cpp.server.modelr   llama_cpp.server.settingsr   r   r   r   llama_cpp.server.typesr    r!   r"   r#   r$   r%   r&   r'   r(   llama_cpp.server.errorsr)   rj   r+   __annotations__r1   r5   r6   r=   r?   r;   rB   rC   rF   rq   r   r   bearer_schemer   openai_v1_tagpostCreateCompletionResponser   r   r   ChatCompletionr	  rY   r  
extras_tagr   r  r  r4   r2   r0   <module>r8     s    " 	      8 8   7 J U U U ) 2 ' 1 5 = 
 
 
 6 
0	1-1 * 1'
 &*" )6 6 5
'& FJ B I$ !%-11577*7 /7| QU	$$7$ #$ N	$<		 	 	 e, !!45#*=#9 (  ,'(**	
 	0 #%TU" "J	%  ("]#~	&
. ?   B +,'(
	  S&S&
!S& S&C NS&l ,'(
	   &o6# ,'(113670  !'(["
 "J	%  ("] $C	&
2 =  D )- -,!)6TU!'4TU!	 ',!)6TU!'4VW! )/'>
 *,!)6TU!'4TU! %/(./<,4170B06/A3& 28/")$ !+"F%$/ D &,!)6TU!'4TU! !%$&oC
E)P&P&
%P&P QP&A@P&f ,'(
	   &o6" 
 ,'(
	   &o60
00 00 ,'(
	   &o69
99  99 ,'(
	   &o6.
 .. ..r2   