
    sgl                         d dl Z d dlmZmZ d dlmZ ddlmZ erddlm	Z	 ddl
mZ dd	lmZmZmZmZ dd
lmZ  e       rd dlZ ej(                  e      Z G d de      Zy)    N)TYPE_CHECKINGOptional)version   )HfQuantizer   )PreTrainedModel)replace_with_aqlm_linear)is_accelerate_availableis_aqlm_availableis_torch_availablelogging)QuantizationConfigMixinc                   ~     e Zd ZdZdZdgZdZdef fdZd Z	ddZ
	 	 ddZddZedd	ed
   fd       ZddZ xZS )AqlmHfQuantizerzS
    Quantizer of the AQLM method. Enables the loading of prequantized models.
    TaqlmNquantization_configc                 4    t        |   |fi | || _        y N)super__init__r   )selfr   kwargs	__class__s      Y/var/www/html/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_aqlm.pyr   zAqlmHfQuantizer.__init__-   s    ,77#6     c                 X    t               st        d      t               st        d      y )NzGUsing `aqlm` quantization requires Accelerate: `pip install accelerate`zDUsing `aqlm` quantization requires AQLM: `pip install aqlm[gpu,cpu]`)r   ImportErrorr   )r   argsr   s      r   validate_environmentz$AqlmHfQuantizer.validate_environment1   s+    &(ghh "dee #r   c                     |jt         j                  j                         r't         j                  }t        j                  d       |S t         j                  }t        j                  d       |S )NzCUDA available. Assuming AQLM inference on GPU and loading the model in `torch.float16`. To overwrite it, set `torch_dtype` manually.zCUDA is unavailable. Assuming AQLM inference on CPU and loading the model in `torch.float32`. To overwrite it, set `torch_dtype` manually.)torchcudais_availablefloat16loggerinfofloat32)r   torch_dtypes     r   update_torch_dtypez"AqlmHfQuantizer.update_torch_dtype8   s`    zz&&(#mm \ 	 $mm a r   modelr	   c                     t        || j                  | j                  j                         | j                  |j                  _        y )N)r   linear_weights_not_to_quantize)r
   r   r-   configr   r+   r   s      r   $_process_model_before_weight_loadingz4AqlmHfQuantizer._process_model_before_weight_loadingF   s;    
 	! $ 8 8+/+C+C+b+b	

 ,0+C+C(r   c                     |S r    r/   s      r   #_process_model_after_weight_loadingz3AqlmHfQuantizer._process_model_after_weight_loadingR   s    r   c                    t        j                  t        j                  j                  d            t        j                  d      k\  }|ryt        j                  dt        j                  j                  d       d       y)Nr   z1.0.2Tz$Currently installed `aqlm` version (zw) doesn't support training. If you wish to train a quantized model, please update `aqlm` with `pip install aqlm>=1.0.2`F)r   parse	importlibmetadatar&   warning)r   r+   aqlm_supports_trainings      r   is_trainablezAqlmHfQuantizer.is_trainableU   sr    !(y/A/A/I/I&/Q!RV]VcVcdkVl!l!NN6y7I7I7Q7QRX7Y6Z  [R  S r   c                      y)NTr2   )r   safe_serializations     r   is_serializablezAqlmHfQuantizer.is_serializable`   s    r   )r)   torch.dtypereturnr>   )r+   r	   r   )__name__
__module____qualname____doc__requires_calibrationrequired_packagesoptimum_quantizerr   r   r    r*   r0   r3   propertyr   r:   r=   __classcell__)r   s   @r   r   r   $   sp      7,C 7f
D 
D (+<"=  r   r   )r6   typingr   r   	packagingr   baser   modeling_utilsr	   integrationsr
   utilsr   r   r   r   utils.quantization_configr   r"   
get_loggerr@   r&   r   r2   r   r   <module>rQ      sP     *   0 3 [ [ ? 			H	%=k =r   