
    +sgx\                        d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlZd dlZd dlmZ d dlmZmZmZmZmZ d dlmZ d d	lmZ  ej0                  e      Zdd
Z G d dej8                        Zy)    )annotationsN)fnmatch)Path)AnyCallable)nn)
AutoConfig	AutoModelAutoTokenizer	MT5ConfigT5Config)is_peft_available)find_adapter_config_filec                     d fd}|S )Nc                t    t        j                  t        |       z  d        t        |       z  fi |S )NT)exist_ok)osmakedirsr   )save_directorykwargs_save_pretrained_fn	subfolders     [/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/models/Transformer.pywrapperz)_save_pretrained_wrapper.<locals>.wrapper   s5    
D(94tD"4#7)#CNvNN    )r   z
str | PathreturnNone )r   r   r   s   `` r   _save_pretrained_wrapperr      s    O Nr   c                  "    e Zd ZU dZdZded<   	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZddZddZddZ	dd	Z
dd
Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZddZddZddZddZddZ	 d	 	 	 	 	 d dZd!dZdd"dZed#d       Z xZS )$Transformera  Hugging Face AutoModel to generate token embeddings.
    Loads the correct class, e.g. BERT / RoBERTa etc.

    Args:
        model_name_or_path: Hugging Face models name
            (https://huggingface.co/models)
        max_seq_length: Truncate any inputs longer than max_seq_length
        model_args: Keyword arguments passed to the Hugging Face
            Transformers model
        tokenizer_args: Keyword arguments passed to the Hugging Face
            Transformers tokenizer
        config_args: Keyword arguments passed to the Hugging Face
            Transformers config
        cache_dir: Cache dir for Hugging Face Transformers to store/load
            models
        do_lower_case: If true, lowercases the input (independent if the
            model is cased or not)
        tokenizer_name_or_path: Name or path of the tokenizer. When
            None, then model_name_or_path is used
        backend: Backend used for model inference. Can be `torch`, `onnx`,
            or `openvino`. Default is `torch`.
    Tboolsave_in_rootc
                   t         |           ddg| _        || _        |	| _        |i }|i }|i }| j                  |||	|      }
 | j                  ||
||	fi | |	d|vr||d<   t        j                  ||n|fd|i|| _	        |t        | j                  d      rtt        | j                  j                  d      rTt        | j                  d      r>t        | j                  j                  j                  | j                  j                        }|| _        |:| j                  j"                  j$                  | j                  j                  _        y y )Nmax_seq_lengthdo_lower_casemodel_max_length	cache_dirconfigmax_position_embeddings)super__init__config_keysr&   backend_load_config_load_modelr   from_pretrained	tokenizerhasattr
auto_modelr)   minr*   r'   r%   	__class____name__tokenizer_class)selfmodel_name_or_pathr%   
model_argstokenizer_argsconfig_argsr(   r&   tokenizer_name_or_pathr.   r)   r6   s              r   r,   zTransformer.__init__6   sa    	,o>*J!NK""#5y';W+VYV:V%*<N*R1?N-.&66&<&H"N`

 
 !2DOO224MNDNN,>?!$T__%;%;%S%SUYUcUcUtUt!u,!-59^^5M5M5V5VDOO""2 .r   c           	     6   t        ||j                  d      |j                  d      |j                  dd            	 Bt               st        d      |dk7  rt	        d      d	d
lm}  |j                  |fi |d|iS t        j                  |fi |d|iS )z"Loads the configuration of a modeltokenrevisionlocal_files_onlyF)r@   rA   rB   zgLoading a PEFT model requires installing the `peft` package. You can install it via `pip install peft`.torcha  PEFT models can currently only be loaded with the `torch` backend. To use other backends, load the model with `backend="torch"`, call `model[0].auto_model.merge_and_unload()`, save that model with `model.save_pretrained()` and then load the model with the desired backend.r   )
PeftConfigr(   )	r   getr   	Exception
ValueErrorpeftrD   r1   r	   )r9   r:   r(   r.   r=   rD   s         r   r/   zTransformer._load_configf   s     %"!oog.$4!,1CU!K	  %&}  '! w 
 (-:--.@eKe[dee))*<aaW`aar   c                   |dk(  rt        |t              r | j                  |||fi | nDt        |t              r | j                  |||fi | nt        j                  |f||d|| _         | j                  |||fi | y|dk(  r | j                  |||fi | y|dk(  r | j                  |||fi | yt        d| d      )zLoads the transformer modelrC   r)   r(   onnxopenvinozUnsupported backend 'z6'. `backend` should be `torch`, `onnx`, or `openvino`.N)
isinstancer   _load_t5_modelr   _load_mt5_modelr
   r1   r4   _load_peft_model_load_onnx_model_load_openvino_modelrG   )r9   r:   r)   r(   r.   r;   s         r   r0   zTransformer._load_model   s    g&(+###$6	XZXFI.$$$%7YjY"+";";&#/5#NX# "D!!"4fiV:V!D!!"4fiV:V
"%D%%&8&)ZzZ4WI=stuur   c                    t               r<ddlm}m} t	        ||      r' |j
                  | j                  |f||d|| _        y y y )Nr   )rD   	PeftModelrJ   )r   rH   rD   rT   rM   r1   r4   )r9   r:   r)   r(   r;   rD   rT   s          r   rP   zTransformer._load_peft_model   sP    2&*-";)";";OO%7#@FR[#_i# . r   c                   t        |t              st        |t              rt        d      	 ddlm} ddlm} t        |      }|j                         }d}	d}
| j                  |||||
|	      \  }}|r|j                  dd        d	|v rh|d	   }t        |t              sXt        |      j                         st        d
      t        |d      5 }t!        j"                  |      |d	<   d d d        ni |d	<    |j$                  |f|||d|| _        t)        | j&                  j*                  | j,                        | j&                  _        |r| j/                  |||	       y y # t        $ r t        d      w xY w# 1 sw Y   xY w)Nz8T5 models are not yet supported by the OpenVINO backend.r   )OVModelForFeatureExtraction)OV_XML_FILE_NAMEzUsing the OpenVINO backend requires installing Optimum and OpenVINO. You can install them with pip: `pip install optimum[openvino]`.OpenVINOzopenvino*.xml	file_name	ov_configzXov_config should be a dictionary or a path to a .json file containing an OpenVINO configzutf-8)encodingr)   r(   export)rM   r   r   rG   optimum.intelrV   optimum.intel.openvinorW   ModuleNotFoundErrorrF   r   exists_backend_should_exportpopdictopenjsonloadr1   r4   r   _save_pretrainedr.   _backend_warn_to_save)r9   r:   r)   r(   r;   rV   rW   	load_pathis_localbackend_nametarget_file_globr]   rZ   fs                 r   rR   z Transformer._load_openvino_model   s   fh':fi+HWXX	A? +,	##%!* "88x-=?OQ]


 NN;- *$";/Ii.I--/$r  )g6 ;!.2iilJ{+; ; ')J{# 8c7R7b7b8
	8

 8
 ,DDOODdDdfjfrfr+s( &&'98\R [ # 	R 	8; ;s   E" E:"E7:Fc                   	 dd l }ddlm}m} |j                  d|j                         d         |d<   t        |      }|j                         }	d}
d}| j                  ||	||||
      \  }}|r|j                  dd         |j                  |f|||d|| _        t        | j                  j                  | j                        | j                  _        |r| j!                  ||	|
       y y # t        $ r t        d      w xY w)	Nr   )ONNX_WEIGHTS_NAMEORTModelForFeatureExtractionzUsing the ONNX backend requires installing Optimum and ONNX Runtime. You can install them with pip: `pip install optimum[onnxruntime]` or `pip install optimum[onnxruntime-gpu]`providerONNXz*.onnxrY   r\   )onnxruntimeoptimum.onnxruntimerp   rq   r`   rF   rc   get_available_providersr   ra   rb   r1   r4   r   rh   r.   ri   )r9   r:   r)   r(   r;   ortrp   rq   rj   rk   rl   rm   r]   s                r   rQ   zTransformer._load_onnx_model   s,   	%[ ",
C<W<W<YZ[<\!]
:+,	##%# "88x->@PR^


 NN;- 9e8T8d8d9
	9

 9
 ,DDOODdDdfjfrfr+s( &&'98\R K # 	< 	s   C, ,Dc                &   |j                  dd      }|r||fS |j                  d|      }|j                  dd      }	|	rt        |	|      j                         nt        |      j                         }
|	r%t        |	| j                  |      j                         n#t        | j                  |      j                         }|	r|	 d| nd| }|r<|j                  |      D cg c]!  }|j                  |      j                         # }}nct        j                  |j                         d|j                  dd      |j                  d	d      
      }|D cg c]  }t        ||      s| }}|
|v }|s`d|vr\||v }|rVt        |      dkD  r4d|vr0t        j                  d| d|j                         d| d|d	       | j                  |d<   ||d<   || }t        |      j                  }t        |      dkD  r8|d   |d<   t        |j                  dd      g|dd  j                         |d<   |rQt        j                  d|d|j                         d| d       |r"t        j                  d| d| d|d    d       ||fS c c}w c c}w )a  
        Determines whether the model should be exported to the backend, or if it can be loaded directly.
        Also update the `file_name` and `subfolder` model_args if necessary.

        These are the cases:

        1. If export is set in model_args, just return export
        2. If `<subfolder>/<file_name>` exists; set export to False
        3. If `<backend>/<file_name>` exists; set export to False and set subfolder to the backend (e.g. "onnx")
        4. If `<file_name>` contains a folder, add those folders to the subfolder and set the file_name to the last part

        We will warn if:

        1. The expected file does not exist in the model directory given the optional file_name and subfolder.
           If there are valid files for this backend, but they're don't align with file_name, then we give a useful warning.
        2. Multiple files are found in the model directory that match the target file name and the user did not
           specify the desired file name via `model_kwargs={"file_name": "<file_name>"}`

        Args:
            load_path: The model repository or directory, as a Path instance
            is_local: Whether the model is local or remote, i.e. whether load_path is a local directory
            model_args: The model_args dictionary. Notable keys are "export", "file_name", and "subfolder"
            target_file_name: The expected file name in the model directory, e.g. "model.onnx" or "openvino_model.xml"
            target_file_glob: The glob pattern to match the target file name, e.g. "*.onnx" or "openvino*.xml"
            backend_name: The human-readable name of the backend for use in warnings, e.g. "ONNX" or "OpenVINO"

        Returns:
            Tuple[bool, dict[str, Any]]: A tuple of the export boolean and the updated model_args dictionary.
        r]   NrY   r   z/**/z**/modelrA   r@   )	repo_typerA   r@      z	Multiple z files found in z: z, defaulting to zW. Please specify the desired file name via `model_kwargs={"file_name": "<file_name>"}`. zNo z
 found in z. Exporting the model to .z#If you intended to load one of the  zN files, please specify the desired file name via `model_kwargs={"file_name": "r   z"}`.)rc   rE   r   as_posixr.   globrelative_tohuggingface_hublist_repo_filesr   lenloggerwarningparts)r9   rj   rk   r;   target_file_namerm   rl   r]   rY   r   primary_full_pathsecondary_full_pathglob_patternpathmodel_file_names	all_filesfnamemodel_foundfile_name_partss                      r   rb   z"Transformer._backend_should_export  s   N $/:%%NN;0@A	NN;5	ENDI6??ATXYbTcTlTlTn  DLL)4==?dllI.779 	
 @I)D)9(:;PSTdSeNf S\SaSabnSop4 0 0 ; D D Fpp'77""$!#
D9 nnWd3	I 4=]%|@\]]
 (+;;{*<-1AAK'(1,J1NNN#L>1A)BTBTBVAYY[\l[mm}  R  ~U Ur s +/,,
;'*3
;'>$_F y///!#&5b&9J{#&*:>>+r+J&b_]`^`Ma&b&k&k&mJ{#NNi]*Y-?-?-A,DD]^j]kklm  9:J9K1\N [^^nop^q]rrwy
 z!!Y  q  ^s    &J	J&Jc                d    d| d}|r
|d|dz  }n	|d|dz  }t         j                  |       y )NzSaving the exported zA model is heavily recommended to avoid having to export it again.z# Do so with `model.save_pretrained(z)`.z Do so with `model.push_to_hub(z, create_pr=True)`.)r   r   )r9   r:   rk   rl   to_logs        r   ri   z!Transformer._backend_warn_to_savef  sN    '~5vw;<N;QQTUUF78J7MM`aaFvr   c                V    ddl m} dg|_         |j                  |f||d|| _        y)Loads the encoder model from T5r   )T5EncoderModel	decoder.*rJ   N)transformersr   "_keys_to_ignore_on_load_unexpectedr1   r4   )r9   r:   r)   r(   r;   r   s         r   rN   zTransformer._load_t5_modeln  s8    /=HM98.88
'-
FP
r   c                V    ddl m} dg|_         |j                  |f||d|| _        y)r   r   )MT5EncoderModelr   rJ   N)r   r   r   r1   r4   )r9   r:   r)   r(   r;   r   s         r   rO   zTransformer._load_mt5_modelw  s8    0>I]:9/99
'-
FP
r   c                l    d| j                          d| j                  j                  j                   dS )NzTransformer(z) with Transformer model: r   )get_config_dictr4   r6   r7   r9   s    r   __repr__zTransformer.__repr__  s7    d22455OPTP_P_PiPiPrPrOsstuur   c                |   |d   |d   d}d|v r|d   |d<    | j                   di ||ddi}|d   }t               rddlm} t	        | j                   |      r| j                   j
                  j                  rr|j                  d      }|d   }t        j                  || j                   j
                  j                  |j                  	      }	t        j                  |	|fd
      |d<   ||d<   | j                   j                  j                  r*t        |      dkD  rd}
t        |      dk  rd
}
||
   }||d<   |S )z#Returns token_embeddings, cls_token	input_idsattention_mask)r   r   token_type_idsreturn_dictFr   )PeftModelForFeatureExtraction)devicer{   )dimtoken_embeddings      all_layer_embeddingsr   )r4   r   rH   r   rM   active_peft_configis_prompt_learningsizerC   onesnum_virtual_tokensr   catr)   output_hidden_statesr   )r9   featuresr   trans_featuresoutput_statesoutput_tokensr   
batch_sizer   prefix_attention_maskall_layer_idxhidden_statess               r   forwardzTransformer.forward  sK   '/'<PXYiPjkx'/78H/IN+,'V.VFVPUV%a( : 4??,IJOO66II*//2
!)*:!;(-

 B B U U^l^s^s)% .3YY8M~7^de-f)*'4#$??!!663};MPQ;QM=!A% !)-8M/<H+,r   c                B    | j                   j                  j                  S N)r4   r)   hidden_sizer   s    r   get_word_embedding_dimensionz(Transformer.get_word_embedding_dimension  s    %%111r   c           
        i }t        |d   t              r|g}nt        |d   t              r\g }g |d<   |D ]L  }t        t	        |j                                     \  }}|j                  |       |d   j                  |       N |g}n7g g }	}|D ]*  }
|j                  |
d          |	j                  |
d          , ||	g}|D cg c])  }|D cg c]  }t        |      j                          c}+ }}}| j                  r-|D cg c]   }|D cg c]  }|j                          c}" }}}|j                   | j                  ||dd| j                  d       |S c c}w c c}}w c c}w c c}}w )z-Tokenizes a text and maps tokens to token-idsr   	text_keysr{   longest_firstpt)padding
truncationreturn_tensors
max_length)rM   strrd   nextiteritemsappendstripr&   lowerupdater2   r%   )r9   textsr   outputto_tokenizelookuptext_keytextbatch1batch2
text_tuplecolss                r   tokenizezTransformer.tokenize  sr    eAh$ 'Ka$'K"$F; 5!%d6<<>&:!;$""4({#**845 '-KFF# -
jm,jm,- "6*K ALL41A4LL ?JKs3!AGGI3KKKDNN*#..	
  5L 4Ks0   	E$ E.E$	E/E*'E/E$*E/c                \    | j                   D ci c]  }|| j                  |    c}S c c}w r   )r-   __dict__)r9   keys     r   r   zTransformer.get_config_dict  s*    373C3CDCT]]3''DDDs   )c                >   | j                   j                  ||       | j                  j                  |       t        t        j
                  j                  |d      d      5 }t        j                  | j                         |d       d d d        y # 1 sw Y   y xY w)N)safe_serializationsentence_bert_config.jsonwr   )indent)
r4   save_pretrainedr2   re   r   r   joinrf   dumpr   )r9   output_pathr   fOuts       r   savezTransformer.save  sw    ''HZ'[&&{3"'',,{,GH#N 	>RVIId**,d1=	> 	> 	>s   #'BBc                   dD ]C  }t         j                  j                  ||      }t         j                  j                  |      sC n t	              5 }t        j                  |      }d d d        dv rd|d   v r|d   j                  d       d|v rd|d   v r|d   j                  d       d|v rd|d   v r|d   j                  d        | dd|i|S # 1 sw Y   pxY w)N)r   zsentence_roberta_config.jsonzsentence_distilbert_config.jsonzsentence_camembert_config.jsonzsentence_albert_config.jsonz sentence_xlm-roberta_config.jsonzsentence_xlnet_config.jsonr;   trust_remote_coder<   r=   r:   r   )r   r   r   ra   re   rf   rg   rc   )cls
input_pathconfig_namesbert_config_pathfInr)   s         r   rg   zTransformer.load  s    
 	K !#Z Eww~~/0	 #$ 	$YYs^F	$ 6!&9VL=Q&Q< $$%89v%*=HXAY*Y#$(()<=F"':f]>S'S=!%%&9:;j;F;;	$ 	$s   CC")NNNNNFNrC   )r:   r   r%   z
int | Noner;   dict[str, Any] | Noner<   r   r=   r   r(   
str | Noner&   r"   r>   r   r.   r   r   r   )r:   r   r(   r   r.   r   r=   dict[str, Any])r   r   )rj   r   rk   r"   r;   r   r   r   rm   r   rl   r   r   ztuple[bool, dict[str, Any]])r:   r   rk   r   rl   r   r   r   )r   r   )r   dict[str, torch.Tensor]r   r   )r   int)T)r   z.list[str] | list[dict] | list[tuple[str, str]]r   z
str | boolr   r   )r   r   )r   r   r   r"   r   r   )r   r   r   r!   )r7   
__module____qualname____doc__r#   __annotations__r,   r/   r0   rP   rR   rQ   rb   ri   rN   rO   r   r   r   r   r   r   classmethodrg   __classcell__)r6   s   @r   r!   r!      sv   . L$
 &*,004-1 $#&*.W.W #.W *	.W
 ..W +.W .W .W !$.W .W 
.W`b8v&5Sn*SXc"c" c" #	c"
 c" c" c" 
%c"J

v#J2 \`&C&NX&	 &PE> < <r   r!   )r   r   r   r   r   zCallable[..., None])
__future__r   rf   loggingr   r   pathlibr   typingr   r   r   rC   r   r   r	   r
   r   r   r   transformers.utils.import_utilsr   transformers.utils.peft_utilsr   	getLoggerr7   r   r   Moduler!   r   r   r   <module>r     sX    "   	        R R = B			8	$Z<")) Z<r   