
    sg                         d dl mZmZmZmZmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZ ddlmZ  e       rd dlZ ej$                  e      Z G d	 d
e      Zy)    )TYPE_CHECKINGAnyDictListOptional   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_eetq_availableis_torch_availablelogging)get_module_from_nameNc                        e Zd ZdZdZdZddgZ fdZd Zdd	Z	d
dddde
dee
ef   fdZ	 dd
dddde
dddee
ef   deee
      fdZddZg fd
ddee
   fdZddZedefd       Z xZS )EetqHfQuantizera  
    8-bit quantization from EETQ quantization method:
        before loading: converts transformer layers into W8A16Linear during loading: load 16bit weight and pass to the
        layer object after: quantizes individual weights in Linear8bitLt into 8bit at first .cuda() call
    TFeetq
acceleratec                 4    t        |   |fi | || _        y N)super__init__quantization_config)selfr   kwargs	__class__s      Y/var/www/html/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_eetq.pyr   zEetqHfQuantizer.__init__-   s    ,77#6     c                 B   t               st        d      	 dd l}t	               st        d      |j                  dd      s|j                  dd      rt        d	      t        j                  j                         st        d
      |j                  dd       }|t        j                  d       y |At        |t              r0d|j                         v sd|j                         v rt        d      y y y # t        $ r}dt        |      v rt        d      | d }~ww xY w)NzUsing `eetq` 8-bit quantization requires eetq.Please install the latest version of eetq from : https://github.com/NetEase-FuXi/EETQr   shard_checkpointzYou are using a version of EETQ that is incompatible with the current transformers version. Either downgrade transformers to <= v4.46.3 or, if available, upgrade EETQ to > v1.0.0.zNLoading an EETQ quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxzConverting into 8-bit weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.
device_mapzYou have loaded an EETQ model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load an EETQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)r   ImportErrorr   strr   get
ValueErrortorchcudais_availableRuntimeErrorloggerwarning_once
isinstancedictvalues)r   argsr   r   excr#   s         r   validate_environmentz$EetqHfQuantizer.validate_environment1   s?    "h 
	 '(noo::i'6::k5+I; 
 zz&&(PQQZZd3
I #*d+*:K:K:M1MQW[e[l[l[nQn h  Ro+ $=  
	!SX- "n 
 
	s   C6 6	D?DDreturnc                     |(t         j                  }t        j                  d|       |S |t         j                  k7  rt        j                  d       |S )Na  Overriding torch_dtype=%s with `torch_dtype=torch.float16` due to requirements of `eetq` to enable model loading in 8-bit. Pass your own torch_dtype to specify the dtype of the remaining non-linear layers or pass torch_dtype=torch.float16 to remove this warning.zRWe suggest you to set `torch_dtype=torch.float16` for better efficiency with EETQ.)r*   float16r.   info)r   torch_dtypes     r   update_torch_dtypez"EetqHfQuantizer.update_torch_dtype_   sQ    --KKKE   EMM)KKlmr   modelr   param_valueztorch.Tensor
param_name
state_dictc                     ddl m} t        ||      \  }}t        ||      rP| j                  s|dk(  r.|dk(  r(|j
                  t        j                  k7  rt        d      y|dk(  rt        d      y	y)
Nr   )
EetqLinearbiasweightz6Expect quantized weights but got an unquantized weightFweight_scalez;Expect unquantized weights but got a quantized weight_scaleT)	r   rA   r   r0   pre_quantizeddtyper*   int8r)   )	r   r<   r=   r>   r?   r   rA   moduletensor_names	            r   check_quantized_paramz%EetqHfQuantizer.check_quantized_paramm   st     	$25*Efj)!![F%:(*{/@/@EJJ/N$%]^^.0$%bccr   target_deviceztorch.deviceunexpected_keysc                     ddl m} t        ||      \  }}	 ||      \  }
}|
j                  |      |j                  |	<   |j                  d|j                  |             y)zB
        quantizes weights into qweight and weight_scales
        r   )quantize_and_preprocess_weightsweight_scalesN)r   rN   r   to_buffersregister)r   r<   r=   r>   rK   r?   rL   rN   rH   rI   	new_valuerD   s               r   create_quantized_paramz&EetqHfQuantizer.create_quantized_param   sU     	925*E"A+"N	<'0||M'B$)GHr   c                     |S r    )r   r<   r   s      r   #_process_model_after_weight_loadingz3EetqHfQuantizer._process_model_after_weight_loading   s    r   keep_in_fp32_modulesc                 B   ddl m}m}  ||      | _        | j                  j                  /| j                  j                  | j                  j                          ||| j                  | j                  | j                        }| j                  |j                  _        y )Nr
   )get_keys_to_not_convertreplace_with_eetq_linear)modules_to_not_convertr   rE   )integrationsrZ   r[   r\   r   extendrE   config)r   r<   r#   rX   r   rZ   r[   s          r   $_process_model_before_weight_loadingz4EetqHfQuantizer._process_model_before_weight_loading   s     	U&=e&D###::F''..t/G/G/^/^_(#'#>#> $ 8 8,,	
 ,0+C+C(r   c                      yNTrV   )r   safe_serializations     r   is_serializablezEetqHfQuantizer.is_serializable   s    r   c                      yrb   rV   )r   s    r   is_trainablezEetqHfQuantizer.is_trainable   s    r   )r:   torch.dtyper6   rg   r   )r<   r   )__name__
__module____qualname____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r5   r;   r'   r   r   rJ   r   r   rT   rW   r`   rd   propertyboolrf   __classcell__)r   s   @r   r   r   !   s    (,$ .7,\  $ 	
 cN< 04I I $I 	I
 &I cNI "$s),I( +-	D D #3i	D. d  r   r   )typingr   r   r   r   r   baser	   modeling_utilsr   utilsr   r   r   r   quantizers_utilsr   r*   
get_loggerrh   r.   r   rV   r   r   <module>rx      sN    < ;  0 [ [ 2  
		H	%Vk Vr   