
    sg&                         d dl Z d dlZd dlmZmZ d dlmZ ddlmZ ddl	m
Z
 erddlmZ d d	lmZmZmZ dd
lmZmZmZ  e       r
d dlZd dlmZ  ej.                  e      Zd Zd Zd Z G d de      Zy)    N)TYPE_CHECKINGUnion)version   )HfQuantizer)get_module_from_name   )PreTrainedModel)AnyDictList)is_torch_availableis_torchao_availableloggingc                 ^    |j                  d      d d }| }|D ]  }|j                  |   } |S )N.)split_modules)modelnamemodule_treeparentms        \/var/www/html/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_torchao.pyfind_parentr   (   s=    **S/#2&KF $#$M    c                    ddl m} ddlm} t	        | |      r*| j
                  j                   d| j                          dS t	        | |      r<| j
                  j                   d| j                   dt        | j                         dS y )Nr   )AffineQuantizedTensor)LinearActivationQuantizedTensor()z(activation=	, weight=)
torchao.dtypesr   7torchao.quantization.linear_activation_quantized_tensorr    
isinstance	__class____name___quantization_typeinput_quant_funcoriginal_weight_tensor)weightr   r    s      r   r)   r)   0   s    4g&/0""++,Af.G.G.I-J!LL&9:""++,L9P9P8QQZ[mnt  oL  oL  \M  [N  NO  P  	P ;r   c                    t        | j                        }|7d| j                  j                  d    d| j                  j                  d    dS d| j                  j                  d    d| j                  j                  d    d| S )Nzin_features=r   z, out_features=r   z, weight=Noner#   )r)   r,   shape)selfr,   s     r   _linear_extra_reprr0   ;   s    ,F~dkk//23?4;;CTCTUVCWBXXeffdkk//23?4;;CTCTUVCWBXXabhaijjr   c                        e Zd ZdZdZdZdgZ fdZd Zd Z	dd	Z
d
eeeeef   f   deeeeef   f   fdZddZdddddedeeef   def
dZdddddedddeeef   dee   fdZd ZddZed        Z xZS )TorchAoHfQuantizerz?
    Quantizer for torchao: https://github.com/pytorch/ao/
    TFtorchaoc                 &    t        |   |fi | y N)super__init__)r/   quantization_configkwargsr'   s      r   r7   zTorchAoHfQuantizer.__init__L   s    ,77r   c                    t               st        d      d| _        |j                  dd       }t	        |t
              rBd|j                         v sd|j                         v r| j                  rt        d      d| _        | j                  ro|j                  dd       }|rZt        j                  t        j                  j                  d	            }|t        j                  d
      k  rt        d| d      y y y )NzSLoading an torchao quantized model requires torchao library (`pip install torchao`)F
device_mapcpudiskzYou are attempting to perform cpu/disk offload with a pre-quantized torchao model This is not supported yet . Please remove the CPU or disk device from the device_map.Tweights_onlytorchz2.5.0zlIn order to use torchao pre-quantized model, you need to have torch>=2.5.0. However, the current version is zc. You can also set with `weights_only=False` in `from_pretrained` if you don't want to update torch)r   ImportErroroffloadgetr&   dictvaluespre_quantized
ValueErrorr   parse	importlibmetadataRuntimeError)r/   argsr9   r;   r>   torch_versions         r   validate_environmentz'TorchAoHfQuantizer.validate_environmentO   s   #%sttZZd3
j$'
))++v9J9J9L/L%%$p 
 $(DL!::nd;L 'i.@.@.H.H.Q R 7==#99& G  HU  GV V} ~  :  r   c                 8   | j                   j                  dk(  rU|,|t        j                  k7  rt        j                  d| d       |%t        j                  d       t        j                  }| j                   j                  dk(  r|t        j                  }|S )Nint4_weight_onlyzSetting torch_dtype to zu for int4_weight_only quantization, but only bfloat16 is supported right now. Please set the torch_dtype to bfloat16.zSetting torch_dtype to torch.bfloat16 for int4_weight_only quantization since only bfloat16 is supported right now. Please set torch_dtype=torch.bfloat16 to remove this warning.#int8_dynamic_activation_int8_weight)r8   
quant_typer?   bfloat16loggerwarning_oncefloat32)r/   torch_dtypes     r   update_torch_dtypez%TorchAoHfQuantizer.update_torch_dtypeh   s    ##..2DD&;%..+H##-k]  ;p  q "## H $nn##..2WW"#mmr   returnc                 >   t        j                  t        j                  j                  d            t        j                  d      kD  rKddlm} |j                  t        j                  t        j                  d}|| j                  j                     S t        d      )N
acceleratez0.19.0r   )CustomDtype)rO   int8_weight_onlyrP   zYou are using `device_map='auto'` on a torchao quantized model. To automatically compute the appropriate device map, you should upgrade your `accelerate` library with `pip install --upgrade accelerate`)r   rG   rH   rI   accelerate.utilsr[   INT4r?   int8r8   rQ   rF   )r/   target_dtyper[   map_to_target_dtypes       r   adjust_target_dtypez&TorchAoHfQuantizer.adjust_target_dtypey   s}    ==++33LABW]]S[E\\4 %0$4$4$)JJ7<zz#
 't'?'?'J'JKK5 r   
max_memoryc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r/   rc   keyvals       r   adjust_max_memoryz$TorchAoHfQuantizer.adjust_max_memory   s6    5?5E5E5GHcc39nH
H Is   )r   r
   c                     ddl m}  ||      | _        | j                  j                  /| j                  j	                  | j                  j                         y )Nr	   )get_keys_to_not_convert)integrationsrj   modules_to_not_convertr8   extend)r/   r   r9   rj   s       r   $_process_model_before_weight_loadingz7TorchAoHfQuantizer._process_model_before_weight_loading   sG    :&=e&D###::F''..t/G/G/^/^_r   param_valueztorch.Tensor
param_name
state_dictc                     |j                  dd       }t        fd| j                  D              ry|dk(  r| j                  ryt	        |      \  }}t        |t        j                  j                        xr |dk(  S )Nparam_devicec              3   :   K   | ]  }|d z   v xs |k(    yw)r   N ).0rf   rp   s     r   	<genexpr>z;TorchAoHfQuantizer.check_quantized_param.<locals>.<genexpr>   s'     gCc	Z'?SJ->?gs   Fr<   r,   )	popanyrl   rA   r   r&   r?   nnLinear)	r/   r   ro   rp   rq   r9   rs   moduletensor_names	      `     r   check_quantized_paramz(TorchAoHfQuantizer.check_quantized_param   so     zz.$7g4KfKfggU"t|| #7uj"IFKfehhoo6TK8<STr   target_deviceztorch.deviceunexpected_keysc                    ddl m} t        ||      \  }}	| j                  rwt        j
                  j                  |j                  |            |j                  |	<   t        |t
        j                        r t        j                  t        |      |_        yyt        j
                  j                  |      j                  |      |j                  |	<    ||| j                  j!                                y)z
        Each nn.Linear layer that needs to be quantized is processsed here.
        First, we set the value the weight tensor, then we move it to the target device. Finally, we quantize the module.
        r   )	quantize_)deviceN)torchao.quantizationr   r   rE   r?   rz   	Parameterto_parametersr&   r{   types
MethodTyper0   
extra_reprr8   get_apply_tensor_subclass)
r/   r   ro   rp   r   rq   r   r   r|   r}   s
             r   create_quantized_paramz)TorchAoHfQuantizer.create_quantized_param   s     	325*E.3hh.@.@WdAe.fF{+&")),$)$4$45G$P! - /4hh.@.@.M.P.PXe.P.fF{+fd66PPRSr   c                      y)z/No process required for torchao quantized modelNru   )r/   r   s     r   #_process_model_after_weight_loadingz6TorchAoHfQuantizer._process_model_after_weight_loading   s    r   c                 d   |rt         j                  d       yt        j                  t        j
                  j                  d            t        j                  d      k\  }|st         j                  d       | j                  r,| j                  j                  t         j                  d       y|S )Nzetorchao quantized model does not support safe serialization, please set `safe_serialization` to FalseFhuggingface_hubz0.25.0zMtorchao quantized model is only serializable after huggingface_hub >= 0.25.0 a  The model contains offloaded modules and these modules are not quantized. We don't recommend saving the model as we won't be able to reload them.If you want to specify modules to not quantize, please specify modules_to_not_convert in the quantization_config.)	rS   warningr   rG   rH   rI   rA   r8   rl   )r/   safe_serialization_is_torchao_serializables      r   is_serializablez"TorchAoHfQuantizer.is_serializable   s    NN; #*==1C1C1K1KL]1^#_cjcpcpd
 $
  (NNjk<<D44KKSNND ''r   c                 :    ddg}| j                   j                  |v S )Nr\   rP   )r8   rQ   )r/   "supported_quant_types_for_trainings     r   is_trainablezTorchAoHfQuantizer.is_trainable   s,     1.
* ''226XXXr   )r`   torch.dtyperX   r   )r   r
   r5   )r(   
__module____qualname____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr7   rM   rW   rb   r   strr   intrh   rn   r   boolr~   r   r   r   r   propertyr   __classcell__)r'   s   @r   r2   r2   C   s)    (,$ "82""DeCHo1E,F 4PSUZ[^`c[cUdPdKe 
U U $U 	U
 cNU 
U(T T $T 	T
 &T cNT cT2(( Y Yr   r2   )rH   r   typingr   r   	packagingr   baser   quantizers_utilsr   modeling_utilsr
   r   r   r   utilsr   r   r   r?   torch.nnrz   
get_loggerr(   rS   r   r)   r0   r2   ru   r   r   <module>r      sn      '   2 0 " " E E 			H	%PkaY aYr   