
    sg                         d dl mZmZmZ d dlmZ ddlmZ  e       rddlZ ej                  e
      Z G d de      Zy)	   )is_compressed_tensors_availableis_torch_availablelogging)QuantizationConfigMixin   )HfQuantizer    Nc                        e Zd ZdZdZdgZdef fdZd ZddZ	d	 Z
dd
Zedefd       Zedefd       ZddefdZ xZS )CompressedTensorsHfQuantizerz
    Quantizer for the compressed_tensors package.  Loads and restores models to
    quantized state with compressed_tensors
    Tcompressed_tensorsquantization_configc                 ^    t        |   |fi | ddlm} |j	                  |      | _        y )Nr	   )ModelCompressor)super__init__compressed_tensors.compressorsr   from_compression_config
compressor)selfr   kwargsr   	__class__s       g/var/www/html/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_compressed_tensors.pyr   z%CompressedTensorsHfQuantizer.__init__#   s,    ,77B)AABUV    c                 X    t               st        d      t               st        d      y )NzuUsing `compressed_tensors` quantized models requires the compressed-tensors library: `pip install compressed-tensors`z;torch is required for using compressed-tensors quantization)r   ImportErrorr   )r   argsr   s      r   validate_environmentz1CompressedTensorsHfQuantizer.validate_environment*   s3    .03  "#[\\ $r   returnc                     |'t         j                  d       t        j                  }|S |t        j                  k7  rt         j                  d       |S )NzELoading model using torch.float16 for compressed-tensors quantizationz`We suggest you to set `torch_dtype=torch.float16` for better efficiency with compressed_tensors.)loggerinfotorchfloat16)r   torch_dtypes     r   update_torch_dtypez/CompressedTensorsHfQuantizer.update_torch_dtype4   sK    KK_`--K
 	 EMM)KKr r   c                 R    ddl m} | j                  j                  } |||d       y )Nr	   )apply_quantization_configT)run_compressed)compressed_tensors.quantizationr'   r   r   )r   modelr   r'   ct_quantization_configs        r   $_process_model_before_weight_loadingzACompressedTensorsHfQuantizer._process_model_before_weight_loading>   s#    M!%!D!D!%)?PTUr   c                      y N )r   r*   r   s      r   #_process_model_after_weight_loadingz@CompressedTensorsHfQuantizer._process_model_after_weight_loadingD   s    r   c                      y)z:Models quantized using compressed tensors can be finetunedTr/   r   s    r   is_trainablez)CompressedTensorsHfQuantizer.is_trainableG        r   c                      y)z7Loaded Models can carry out quantization aware trainingTr/   r2   s    r   is_qat_trainablez-CompressedTensorsHfQuantizer.is_qat_trainableL   r4   r   c                      y)z>Models quantized using compressed tensors can be saved to diskTr/   )r   safe_serializations     r   is_serializablez,CompressedTensorsHfQuantizer.is_serializableQ   s    r   )r$   torch.dtyper   r:   )r   Nr.   )__name__
__module____qualname____doc__requires_calibrationrequired_packagesr   r   r   r%   r,   r0   propertyboolr3   r6   r9   __classcell__)r   s   @r   r   r      s    
  -.W,C W]V d   $  $ r   r   )utilsr   r   r   utils.quantization_configr   baser   r"   
get_loggerr;   r    r   r/   r   r   <module>rH      s=    Q P ?  			H	%9; 9r   