
    +sgF                       d dl mZ d dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlZd dlmZmZ  ej                  e      Zerd dlmZ 	 d dlmZ 	 d dlmZmZ 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 dd	Z	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 dd
Z	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZy# e$ r Y rw xY w# e$ r Y tw xY w)    )annotationsN)Path)TYPE_CHECKINGCallableLiteral)disable_datasets_cachingis_datasets_availableSentenceTransformer)OVQuantizationConfig)OptimizationConfigQuantizationConfigc           
        ddl m} ddlm} 	 ddlm}m}	 ddlm}
 t        | |      r3t        |       r(t        | d   |      rt        | d   j                  |      st        d      | d   j                  }|	j                  |      t        t              r0|
j                   vrt        d      xs  t#        |
             d
t%        fdd|||d       y	# t        $ r t        d      w xY w)a  
    Export an optimized ONNX model from a SentenceTransformer model.

    The O1-O4 optimization levels are defined by Optimum and are documented here:
    https://huggingface.co/docs/optimum/main/en/onnxruntime/usage_guides/optimization

    The optimization levels are:

    - O1: basic general optimizations.
    - O2: basic and extended general optimizations, transformers-specific fusions.
    - O3: same as O2 with GELU approximation.
    - O4: same as O3 with mixed precision (fp16, GPU-only)

    See https://sbert.net/docs/sentence_transformer/usage/efficiency.html for more information & benchmarks.

    Args:
        model (SentenceTransformer): The SentenceTransformer model to be optimized. Must be loaded with `backend="onnx"`.
        optimization_config (OptimizationConfig | Literal["O1", "O2", "O3", "O4"]): The optimization configuration or level.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the optimized model will be saved.
        push_to_hub (bool, optional): Whether to push the optimized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str | None, optional): The suffix to add to the optimized model file name. Defaults to None.

    Raises:
        ImportError: If the required packages `optimum` and `onnxruntime` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer model loaded with `backend="onnx"`.
        ValueError: If the provided optimization_config is not valid.

    Returns:
        None
    r   r
   Transformer)ORTModelForFeatureExtractionORTOptimizer)AutoOptimizationConfigPlease install Optimum and ONNX Runtime to use this function. You can install them with pip: `pip install optimum[onnxruntime]` or `pip install optimum[onnxruntime-gpu]`]The model must be a Transformer-based SentenceTransformer model loaded with `backend="onnx"`.z\optimization_config must be an OptimizationConfig instance or one of 'O1', 'O2', 'O3', 'O4'.N	optimizedc                ,    j                  |       S N)file_suffix)optimize)save_dirr   optimization_config	optimizers    P/var/www/html/venv/lib/python3.12/site-packages/sentence_transformers/backend.py<lambda>z-export_optimized_onnx_model.<locals>.<lambda>j       ););<OQYgr);)s     export_optimized_onnx_modelonnxexport_functionexport_function_nameconfigmodel_name_or_pathpush_to_hub	create_prr   backend)sentence_transformersr   (sentence_transformers.models.Transformerr   optimum.onnxruntimer   r   !optimum.onnxruntime.configurationr   ImportError
isinstancelen
auto_model
ValueErrorfrom_pretrainedstr_LEVELSgetattrsave_or_push_to_hub_model)modelr   r)   r*   r+   r   r   r   r   r   r   	ort_modelr   s    `   `      @r   r#   r#      s   N :D
RL u125z%(K0%(--/KLk
 	
 /4Ah.A.AI,,Y7I%s+&<&D&DDn  "8%8Rg&<>QRT!s:"-	A  
8
 	

s   C& &C;c           
        ddl m} ddlm} 	 ddlm}m}	 ddlm}
 t        | |      r3t        |       r(t        | d   |      rt        | d   j                  |      st        d      | d   j                  }|	j                  |      t        t              rTdvrt        d	      d
d
 } t!        |
      d      xs) j"                  j$                  j'                          d| 'j"                  j$                  j'                          dt)        fdd|||d       y
# t        $ r t        d      w xY w)a  
    Export a quantized ONNX model from a SentenceTransformer model.

    This function applies dynamic quantization, i.e. without a calibration dataset.
    Each of the default quantization configurations quantize the model to int8, allowing
    for faster inference on CPUs, but are likely slower on GPUs.

    See https://sbert.net/docs/sentence_transformer/usage/efficiency.html for more information & benchmarks.

    Args:
        model (SentenceTransformer): The SentenceTransformer model to be quantized. Must be loaded with `backend="onnx"`.
        quantization_config (QuantizationConfig): The quantization configuration.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the quantized model will be saved.
        push_to_hub (bool, optional): Whether to push the quantized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str | None, optional): The suffix to add to the quantized model file name. Defaults to None.

    Raises:
        ImportError: If the required packages `optimum` and `onnxruntime` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer model loaded with `backend="onnx"`.
        ValueError: If the provided quantization_config is not valid.

    Returns:
        None
    r   r
   r   )r   ORTQuantizer)AutoQuantizationConfigr   r   )arm64avx2avx512avx512_vnnizqquantization_config must be an QuantizationConfig instance or one of 'arm64', 'avx2', 'avx512', or 'avx512_vnni'.NF)	is_static_
_quantizedc                ,    j                  |       S r   quantize)r   r   quantization_config	quantizers    r   r    z5export_dynamic_quantized_onnx_model.<locals>.<lambda>   r!   r"   #export_dynamic_quantized_onnx_modelr$   r%   )r-   r   r.   r   r/   r   r>   r0   r?   r1   r2   r3   r4   r5   r6   r7   r9   weights_dtypenamelowerr:   )r;   rJ   r)   r*   r+   r   r   r   r   r>   r?   r<   quantization_config_namerK   s    `   `       @r   rL   rL   u   se   B :D
RL u125z%(K0%(--/KLk
 	
 /4Ah.A.AI,,Y7I%s+&PP D  $7q#9 Rg&<>QR]bc!s(;(I(I(N(N(T(T(V'WWXYqXr%s,::??EEGH
SsB"-	C  
8
 	

s   D/ /Ec
           
         ddl m}
 ddlm} 	 ddlm}m}m}m} t               st        d      t         |
      r3t               r(t         d   |      rt         d   j                  |      st        d      | |       } d   j                  } ||	      |j                  |      t!        d
 |||fD              r!t#        d |||fD              st        d       fd||nd}||nd}||nd}ndt%               5  j'                  ||fd||j(                  nd|      ddd       t+        fdd|||||	d       y# t        $ r t        d      w xY w# 1 sw Y   :xY w)a  
    Export a quantized OpenVINO model from a SentenceTransformer model.

    This function applies Post-Training Static Quantization (PTQ) using a calibration dataset, which calibrates
    quantization constants without requiring model retraining. Each default quantization configuration converts
    the model to int8 precision, enabling faster inference while maintaining accuracy.

    See https://sbert.net/docs/sentence_transformer/usage/efficiency.html for more information & benchmarks.

    Args:
        model (SentenceTransformer): The SentenceTransformer model to be quantized. Must be loaded with `backend="openvino"`.
        quantization_config (OVQuantizationConfig | dict | None): The quantization configuration. If None, default values are used.
        model_name_or_path (str): The path or Hugging Face Hub repository name where the quantized model will be saved.
        dataset_name(str, optional): The name of the dataset to load for calibration.
            If not specified, the `sst2` subset of the `glue` dataset will be used by default.
        dataset_config_name (str, optional): The specific configuration of the dataset to load.
        dataset_split (str, optional): The split of the dataset to load (e.g., 'train', 'test'). Defaults to None.
        column_name (str, optional): The column name in the dataset to use for calibration. Defaults to None.
        push_to_hub (bool, optional): Whether to push the quantized model to the Hugging Face Hub. Defaults to False.
        create_pr (bool, optional): Whether to create a pull request when pushing to the Hugging Face Hub. Defaults to False.
        file_suffix (str, optional): The suffix to add to the quantized model file name. Defaults to `qint8_quantized`.

    Raises:
        ImportError: If the required packages `optimum` and `openvino` are not installed.
        ValueError: If the provided model is not a valid SentenceTransformer model loaded with `backend="openvino"`.
        ValueError: If the provided quantization_config is not valid.

    Returns:
        None
    r   r
   r   )OVConfigOVModelForFeatureExtractionr   OVQuantizerzPlease install datasets, optimum-intel and openvino to use this function. You can install them with pip: `pip install datasets optimum[openvino]`zaPlease install datasets to use this function. You can install it with pip: `pip install datasets`zaThe model must be a Transformer-based SentenceTransformer model loaded with `backend="openvino"`.N)rJ   c              3  $   K   | ]  }|d u 
 y wN .0params     r   	<genexpr>z9export_static_quantized_openvino_model.<locals>.<genexpr>  s     
j5
j   c              3  $   K   | ]  }|d u 
 y wrV   rW   rX   s     r   r[   z9export_static_quantized_openvino_model.<locals>.<genexpr>  s      w#Twr\   zEither specify all of `dataset_name`, `dataset_config_name`, `dataset_split`, and `column_name`, or leave them all unspecified.c                .    j                  | ddd      S )N
max_lengthi  T)paddingr_   
truncation)	tokenizer)examplesr;   s    r   preprocess_functionzCexport_static_quantized_openvino_model.<locals>.preprocess_function  s    x#Z^__r"   gluesst2trainsentencec                     |          S rV   rW   )rc   column_namerd   s    r   r    z8export_static_quantized_openvino_model.<locals>.<lambda>%  s    1DXkEZ1[ r"   i,  )dataset_namedataset_config_namerd   num_samplesdataset_splitc                ,    j                  |       S )N)save_directory	ov_configrH   )r   calibration_datasetrq   rK   s    r   r    z8export_static_quantized_openvino_model.<locals>.<lambda>+  s    ););I *< *
 r"   &export_static_quantized_openvino_modelopenvinor%   )r-   r   r.   r   optimum.intelrR   rS   r   rT   r1   r	   r2   r3   r4   r5   r6   anyallr   get_calibration_datasetrm   r:   )r;   rJ   r)   rk   rl   rn   rj   r*   r+   r   r   r   rR   rS   r   rT   ov_modelrr   rq   rd   rK   s   `     `          @@@@r   rs   rs      s   T :D
jj !"o
 	

 u125z%(K0%(--/JKo
 	
 "24,1!H,?,?H-@AI++H5I

j<9Lm]h*i
jjsv w(46I=Ze'fw t  N
 	
` $0#;<L1D1P-V\%2%>MGM!,!8+jK	!	# 
'??% 3 [;N;Z+77`c' @ 

 
 F"-e  
V
 	

R
 
s   E )E1E.1E:c                V   |dk(  rd| d}n|dk(  rd| d}t        j                         5 }	 | |	       |dk(  ret        |	      |z  }	t        j                  |	dz  |	z         t        j                  |	dz  |	|z  j                  d	             |	j                         }	|dk(  rZt        |	      |z  }
|
j                  d
d
       t        |	      z  }|
|z  }t        j                  ||       |
j                         }	|rwd}|rPt        |      j                  dd      j                  dd      j                  dd      }d| d| d| d| d| d d}t        j                  |	||dd| d||       nt        |      |z  }
|
j                  d
d
       t        |	      z  }|
|z  }t        j                  ||       |dk(  rPt        |	      |z  j                  d	      }t        |
      |z  j                  d	      }t        j                  ||       d d d        y # 1 sw Y   y xY w)Nr$   model_z.onnxrt   openvino_model_z.xmlzopenvino_model.xmlzopenvino_model.binz.binT)parentsexist_ok (z(
	z, z,
	)z
)zGHello!

*This pull request has been automatically generated from the [`zT`](https://sbert.net/docs/package_reference/util.html#sentence_transformers.backend.zI) function from the Sentence Transformers library.*

## Config
```python
a  
```

## Tip:
Consider testing this pull request before merging by loading the model from this PR with the `revision` argument:
```python
from sentence_transformers import SentenceTransformer

# TODO: Fill in the PR number
pr_number = 2
model = SentenceTransformer(
    "z5",
    revision=f"refs/pr/{pr_number}",
    backend="z#",
    model_kwargs={"file_name": "a  "},
)

# Verify that everything works as expected
embeddings = model.encode(["The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium."])
print(embeddings.shape)

similarities = model.similarity(embeddings, embeddings)
print(similarities)
```
r;   zAdd exported z model )folder_pathpath_in_reporepo_id	repo_typecommit_messagecommit_descriptionr+   )tempfileTemporaryDirectoryr   shutilmovewith_suffixas_posixmkdirreprreplacehuggingface_hubupload_foldercopy)r&   r'   r(   r)   r*   r+   r   r,   	file_namer   dst_dirsourcedestinationr   opt_config_string
bin_sourcebin_destinations                    r   r:   r:   8  s    &[M/		J	%k]$7			$	$	& Q9(! j H~/HKK#77I9MNKK#77(Y:N9[9[\b9cd((*H f8nw.GMM$M6(^i/F!I-KKK,'')H!#$(L$8$8g$F$N$NtU\$]$e$efikp$q!*@ AU?U  Vj  k  j@ @     Y ""+ 
-+&"B ))$$*!!.wiwymL#5# -.8GMM$M6(^i/F!I-KKK, *$"8ny8EEfM
#'=9#<"I"I&"QJ8cQ9 Q9 Q9s   G*HH()FFN)r;   r   r   z4OptimizationConfig | Literal['O1', 'O2', 'O3', 'O4']r)   r7   r*   boolr+   r   r   
str | NonereturnNone)r;   r   rJ   zFQuantizationConfig | Literal['arm64', 'avx2', 'avx512', 'avx512_vnni']r)   r7   r*   r   r+   r   r   r   r   r   )NNNNFFqint8_quantized)r;   r   rJ   z"OVQuantizationConfig | dict | Noner)   r7   rk   r   rl   r   rn   r   rj   r   r*   r   r+   r   r   r7   r   r   )FFNr$   )r&   r   r'   r7   r)   r7   r*   r   r+   r   r   r   r,   r7   )
__future__r   loggingr   r   pathlibr   typingr   r   r   r   sentence_transformers.utilr   r	   	getLogger__name__logger)sentence_transformers.SentenceTransformerr   ru   r   r1   r0   r   r   r#   rL   rs   r:   rW   r"   r   <module>r      s   "     3 3  V			8	$M6\ "VVMV V 	V
 V V 
Vz "QQ_Q Q 	Q
 Q Q 
Qp  $&* $"(ll;l l 	l
 $l l l l l l 
lh "`9`9`9 	`9
 `9 `9 `9 `9I	    s$   
B; C ;CCCC