
    sg                         d dl Z d dlmZmZmZmZmZmZ d dlm	Z	 ddl
mZ ddlmZ erddlmZ dd	lmZmZmZmZ dd
lmZ  e       rd dlZ ej0                  e      Z G d de      Zy)    N)TYPE_CHECKINGAnyDictListOptionalUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)is_accelerate_availableis_optimum_quanto_availableis_torch_availablelogging)QuantoConfigc            
       6    e Zd ZdZddgZdZdZdef fdZd Z	d	 Z
d
 Zd"dZdee   dedee   fdZdddddedeeef   def
dZdeeeeef   f   deeeeef   f   fdZdddddeddfdZd#dZg fdddee   fdZd Zed$ded   fd        Zd$d!Z xZS )%QuantoHfQuantizerz*
    Quantizer for the quanto library
    quanto
accelerateTFquantization_configc                 F    t        |   |fi | | j                          y N)super__init__	post_init)selfr   kwargs	__class__s      [/var/www/html/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_quanto.pyr   zQuantoHfQuantizer.__init__2   s     ,77    c                 `    | j                   j                  | j                  st        d      yy)z 
        Safety checker
        NzWe don't support quantizing the activations with transformers library.Use quanto library for more complex use cases such as activations quantization, calibration and quantization aware training.)r   activationspre_quantized
ValueError)r   s    r!   r   zQuantoHfQuantizer.post_init6   s;     ##//;DDVDVO  EW;r"   c                 X    t               st        d      t               st        d      y )NzhLoading an optimum-quanto quantized model requires optimum-quanto library (`pip install optimum-quanto`)z`Loading an optimum-quanto quantized model requires accelerate library (`pip install accelerate`))r   ImportErrorr   )r   argsr   s      r!   validate_environmentz&QuantoHfQuantizer.validate_environment@   s5    *,z  '(r  )r"   c                 <    |ddi}t         j                  d       |S )N cpuzThe device_map was not initialized. Setting device_map to {'':'cpu'}. If you want to use the model for inference, please set device_map ='auto')loggerinfo)r   
device_maps     r!   update_device_mapz#QuantoHfQuantizer.update_device_mapJ   s+    eJKK\
 r"   returnc                 T    |%t         j                  d       t        j                  }|S )NzVYou did not specify `torch_dtype` in `from_pretrained`. Setting it to `torch.float32`.)r.   r/   torchfloat32)r   torch_dtypes     r!   update_torch_dtypez$QuantoHfQuantizer.update_torch_dtypeT   s$    KKpq--Kr"   missing_keysprefixc                 8   t               rddlm} g }|j                         D ]\  \  }}t	        |      s|D ]E  }||v s
|| d| v s|j                  d      r#|j                  d      r5|j                  |       G ^ |D 	cg c]	  }	|	|vs|	 c}	S c c}	w )Nr   QModuleMixin.z.weightz.bias)r   optimum.quantor<   named_modules
isinstanceendswithappend)
r   modelr8   r9   r<   not_missing_keysnamemodulemissingks
             r!   update_missing_keysz%QuantoHfQuantizer.update_missing_keysZ   s    &(3!//1 	9LD&&,/+ 9GDvhay4I,I ' 0 0 ; ' 0 0 9(//89	9 (Ea14D+DEEEs   	BBrC   r   param_valueztorch.Tensor
param_name
state_dictc                 @   t               rddlm} |j                  dd      }|j                  dd      }|<|:t	        |j                               }	|dk(  rt        |	      dkD  r|	dhk(  s|	ddhk(  sy	t        ||      \  }
}t        |
      rd
|v r|
j                   S y	)z=
        Check if a parameter needs to be quantized.
        r   r;   r0   Nparam_devicer-   r
   diskFweight)
r   r>   r<   getsetvalueslenr   r@   frozen)r   rC   rJ   rK   rL   r   r<   r0   rN   device_map_valuesrF   tensor_names               r!   check_quantized_paramz'QuantoHfQuantizer.check_quantized_paramj   s     '(3ZZd3
zz.$7!l&> #J$5$5$7 8u$->)?!)C)eW48IeU[_8\ 25*Efl+K0G}}$$r"   
max_memoryc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r   rY   keyvals       r!   adjust_max_memoryz#QuantoHfQuantizer.adjust_max_memory   s6    6@6F6F6HI(#sc3:oI
I Js   )target_deviceztorch.devicec                     ddl m}  |||||       t        ||      \  }}	|j                          d|j                  _        y)ze
        Create the quantized parameter by calling .freeze() after setting it to the module.
        r   )set_module_tensor_to_deviceFN)accelerate.utilsra   r   freezerP   requires_grad)
r   rC   rJ   rK   r_   r)   r   ra   rF   _s
             r!   create_quantized_paramz(QuantoHfQuantizer.create_quantized_param   s;     	A#E:}kR(
;	&+#r"   c                 P   t        j                  t        j                  j                  d            t        j                  d      kD  rTddlm} t        j                  |j                  |j                  |j                  d}|| j                  j                     }|S t        d      )Nr   z0.27.0r   )CustomDtype)int8float8int4int2zYou are using `device_map='auto'` on an optimum-quanto quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library,`pip install --upgrade accelerate` or install it from source.)r	   parse	importlibmetadatarb   rh   r4   ri   FP8INT4INT2r   weightsr&   )r   target_dtyperh   mappings       r!   adjust_target_dtypez%QuantoHfQuantizer.adjust_target_dtype   s    ==++33LABW]]S[E\\4 

%//#((#((	G #4#;#;#C#CDLP r"   keep_in_fp32_modulesc                    ddl m}m} | j                  j                   ||      | _        n| j                  j                  | _        t        | j                  t              s| j                  g| _        | j                  j                  |        ||| j                  | j                        \  }}| j                  |j                  _        y )Nr   )get_keys_to_not_convertreplace_with_quanto_layers)modules_to_not_convertr   )	integrationsry   rz   r   r{   r@   listextendconfig)r   rC   rw   r   ry   rz   re   s          r!   $_process_model_before_weight_loadingz6QuantoHfQuantizer._process_model_before_weight_loading   s     	W ##::B*A%*HD'*.*B*B*Y*YD'$55t<+/+F+F*GD'##**+?@-$*E*E[_[s[s
q ,0+C+C(r"   c                     |S r    r   rC   s     r!   #_process_model_after_weight_loadingz5QuantoHfQuantizer._process_model_after_weight_loading   s    r"   c                      y)NTr   r   s     r!   is_trainablezQuantoHfQuantizer.is_trainable   s    r"   c                      y)NFr   )r   safe_serializations     r!   is_serializablez!QuantoHfQuantizer.is_serializable   s    r"   )r6   torch.dtyper2   r   )rt   r   r2   r   r   ) __name__
__module____qualname____doc__required_packages requires_parameters_quantizationrequires_calibrationr   r   r   r*   r1   r7   r   strrI   r   r   boolrX   r   intr^   rf   rv   r   r   propertyr   r   r   __classcell__)r    s   @r!   r   r   )   sb    "<0'+$ L FtCy F# FRVWZR[ F   $ 	
 cN 
>DeCHo1E,F 4PSUZ[^`c[cUdPdKe , , $, 	,
 &,&( KMD&D>B3iD* (+<"=  r"   r   )rn   typingr   r   r   r   r   r   	packagingr	   baser   quantizers_utilsr   modeling_utilsr   utilsr   r   r   r   utils.quantization_configr   r4   
get_loggerr   r.   r   r   r"   r!   <module>r      s[     B B   2 0  5 			H	%g gr"   