
    sg1                         d dl Z d dlmZmZ d dlmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZ dd	lmZmZ  e       rd dlZ ej&                  e      Z G d
 de      Zy)    N)TYPE_CHECKINGOptional)version   )HfQuantizer   )PreTrainedModel)is_auto_gptq_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinc                   |     e Zd ZdZdZddgZdZdef fdZd Z	dd	Z
ddZddZedd
ed   fd       ZddZ xZS )GptqHfQuantizerz
    Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
    `auto_gptq` package. Quantization is done under the hood for users if they load a non-prequantized model.
    Foptimum	auto_gptqNquantization_configc                     t        |   |fi | ddlm} |j	                  | j
                  j                               | _        y )Nr   )GPTQQuantizer)super__init__optimum.gptqr   	from_dictr   to_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__s       Y/var/www/html/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_gptq.pyr   zGptqHfQuantizer.__init__-   s;    ,77.!.!8!89Q9Q9a9a9c!d    c                    t        j                  t        j                  j                  d            t        j                  d      kD  }|s)t        j
                  j                         st        d      t               r
t               st        d      t        j                  t        j                  j                  d            t        j                  d      k  rt        d      y )Nz	auto-gptqz0.4.2z2GPU is required to quantize or run quantize model.zwLoading a GPTQ quantized model requires optimum (`pip install optimum`) and auto-gptq library (`pip install auto-gptq`)r   zWYou need a version of auto_gptq >= 0.4.2 to use GPTQ: `pip install --upgrade auto-gptq`)r   parse	importlibmetadatatorchcudais_availableRuntimeErrorr   r
   ImportError)r   argsr   gptq_supports_cpus       r    validate_environmentz$GptqHfQuantizer.validate_environment3   s    #MM)*<*<*D*D[*QRU\UbUbcjUkk )@)@)BSTT&(-C-E J  ]]9--55kBCgmmT[F\\i  ]r!   c                 ~    |t         j                  }|S |t         j                  k7  rt        j                  d       |S )NzRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with GPTQ.)r&   float16loggerinfo)r   torch_dtypes     r    update_torch_dtypez"GptqHfQuantizer.update_torch_dtype@   s:    --K  EMM)KKlmr!   modelr	   c                     |j                   j                  dk7  rt        d      | j                  r| j                  j                  |      }y y )N	input_idsz%We can only quantize pure text model.)r   main_input_namer)   pre_quantizedr   convert_modelr   r4   r   s      r    $_process_model_before_weight_loadingz4GptqHfQuantizer._process_model_before_weight_loadingG   sD    ??**k9FGG**88?E r!   c                    | j                   r| j                  j                  |      }y | j                  j                  |j
                  | j                  _        | j                  j                  || j                  j                         t        j                  | j                  j                               |j                  _        y N)r8   r   post_init_modelr   	tokenizername_or_pathquantize_modelr   r   to_dictconfigr:   s      r    #_process_model_after_weight_loadingz3GptqHfQuantizer._process_model_after_weight_loadingN   s    **::5AE''1195:5G5G((2""11%9Q9Q9[9[\/9/C/CDDZDZDbDbDd/eELL,r!   c                      yNT )r   r4   s     r    is_trainablezGptqHfQuantizer.is_trainableX   s    r!   c                      yrF   rG   )r   safe_serializations     r    is_serializablezGptqHfQuantizer.is_serializable\   s    r!   )r2   torch.dtypereturnrL   )r4   r	   r=   )__name__
__module____qualname____doc__requires_calibrationrequired_packagesr   r   r   r-   r3   r;   rD   propertyr   rH   rK   __classcell__)r   s   @r    r   r   #   sl    
 !"K0e,C e@f (+<"=  r!   r   )r$   typingr   r   	packagingr   baser   modeling_utilsr	   utilsr
   r   r   r   utils.quantization_configr   r   r&   
get_loggerrN   r0   r   rG   r!   r    <module>r]      sM     *   0 ] ] K 			H	%:k :r!   