
    sg,                         d dl mZmZmZmZ ddlmZ ddlmZm	Z	m
Z
mZ ddlmZ ddlmZ erddlmZ  e       rd d	lmZ  e
       rd d
lZ ej*                  e      Zd Z G d de      Zy
)    )TYPE_CHECKINGAnyDictList   )prepare_for_hqq_linear)is_accelerate_availableis_hqq_availableis_torch_availablelogging   )HfQuantizer)get_module_from_name)PreTrainedModel)remove_hook_from_moduleNc                 ^    |j                  d      d d }| }|D ]  }|j                  |   } |S )N.)split_modules)modelnamemodule_treeparentms        X/var/www/html/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_hqq.pyfind_parentr   %   s=    **S/#2&KF $#$M    c                   $    e Zd ZdZdZdZdZdgZ fdZd Z	ddd	e
e   d
ede
e   fdZddde
e   de
e   de
e   fdZdddddedeeef   def
dZdddddedddeeef   de
e   fdZd Z	 dddde
e   fdZd dZddZedefd       Z xZS )!HqqHfQuantizerz
    HQQ quantizer base HF class.
    nn.Linear modules are first tagged with quant_config in _process_model_before_weight_loading().
    The actual quantization and offloading to the GPU is done in check_quantized_param().
    FThqqc                 B    t        |   |fi | d | _        d| _        y )NF)super__init__torch_dtypeusing_multi_gpu)selfquantization_configkwargs	__class__s      r   r$   zHqqHfQuantizer.__init__9   s&    ,77$r   c                 `   t               st        d      |j                  dd      s|j                  dd      rt        d      t        j
                  j                         st        d      | j                  9d|v r|d   | _        n*t        j                  | _        t        j                  d       |j                  d	d       }t        |t              rZd
|j                         v sd|j                         v rt        d      t        t!        |j                                     dkD  | _        y y )NzA valid HQQ version (>=0.2.1) is not available. Please follow the instructions to install it: `https://github.com/mobiusml/hqq/`.from_tfF	from_flaxzwConverting weights from tf/flax weights is currently not supported, please make sure the weights are in PyTorch format.z/No GPU found. A GPU is needed for quantization.r%   zUSetting torch_dtype to torch.float32 as the default value since it was not specified.
device_mapcpudiskzYou are attempting to use an HQQ model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.r   )r
   ImportErrorget
ValueErrortorchcudais_availableRuntimeErrorr%   float32loggerinfo
isinstancedictvalueslensetr&   )r'   argsr)   r.   s       r   validate_environmentz#HqqHfQuantizer.validate_environment>   s    " T  ::i'6::k5+I; 
 zz&&(PQQ#&#)-#8 #(== stZZd3
j$'
))++v9J9J9L/L h 
 (+3z/@/@/B+C'Dq'H$ (r   r   r   missing_keysprefixreturnc                 R    | j                   r|D cg c]	  }d|vs| c}S |S c c}w )Nweight)pre_quantized)r'   r   rB   rC   r)   keys         r   update_missing_keysz"HqqHfQuantizer.update_missing_keys^   s1     #/ICHC4GCII Js   	$$expected_keysloaded_keysc                    | j                   s|S fdt        |      }t               r)ddlm} |j                         D ]  \  }}||_         t               } ||       |t        |j                  j                  d         z  } |d d t        j                  d      j                         dhz
  }	t               }
|D ](  t        fd|D              s|
j                         * ||
z  }|D ]_  }|d	z   |v r|j                  |d	z          n%|j                  |	D ch c]
  }|d
z   |z    c}       |dz   |v sL|j                  |dz          a t        |      S c c}w )Nc                     | j                         D ]M  \  }}t        |t        j                  j                        r|j                  |j                          ||       O y N)named_childrenr;   r4   nnLinearaddr   )r   layersr   module_find_hqq_quantizable_layerss       r   rU   zIHqqHfQuantizer.update_expected_keys.<locals>._find_hqq_quantizable_layersn   sK     % 4 4 6 =ffuxx8JJv{{+,VV<=r   r   	HQQLinearskip_modulesr/   linear_layerquant_configcompute_dtypedevicebiasc              3   &   K   | ]  }|v  
 y wrN    ).0_modulerH   s     r   	<genexpr>z6HqqHfQuantizer.update_expected_keys.<locals>.<genexpr>   s     D'w#~Ds   z.weightr   z.bias)rG   r?   r
   hqq.core.quantizerW   named_modulesr   configr(   r4   float16state_dict_keysanyrR   updatelist)r'   r   rJ   rK   new_keysrW   r   rT   _valid_modules	_ref_keys_rm_keysrb   _ref_keyrU   rH   s                @@r   update_expected_keysz#HqqHfQuantizer.update_expected_keysg   sl    !!  	= }%3 !& 3 3 5 #f"# !UN(?c%,,"B"B>"RSSN "!EMMZ_o6(+I
 uH &D^DDLL%&  H * 4Y&+5LL9!45OOi$X(Ws]X%=$XYW$3LL7!234 H~	 %Ys   !E 
param_valueztorch.Tensor
param_name
state_dictc                 .   t               rddlm} t        ||      \  }}| j                  r@t        |t        j                  j                        xs t        |      xr |dk7  xr |dk7  S t        |t        j                  j                        xr |dk(  S )Nr   rV   rF   r^   )	r
   rd   rW   r   rG   r;   r4   rP   rQ   )	r'   r   rr   rs   rt   r)   rW   rT   tensor_names	            r   check_quantized_paramz$HqqHfQuantizer.check_quantized_param   s     325*EFEHHOO4U
698U *8+*6) fehhoo6R;(;RRr   target_deviceztorch.deviceunexpected_keysc           	         t               rddlm} t        ||      \  }}	dj	                  |j                  d      dd       }
t        ||
      }|
j                  d      d   }i }|j                         D ]=  \  }}|
dz   |v s|||j                  d      d   <   |(||v s-|j                  |       ? | j                  rt        |      ry |dd| j                  |      }|j                  |       |j                  Rt        |j                  t        j                        r.t        j                   j#                  |j                        |_        | j$                  r| j'                  |      }t)        |||       |`~t        j,                  j/                          y|D ]/  }t)        ||t        j                   j#                  ||                1 t1        |d      r ||j2                  | j                  |d	      }|j                  Rt        |j                  t        j                        r.t        j                   j#                  |j                        |_        | j$                  r| j'                  |      }t)        |||       n*|j5                  | j                  |
      }t)        |||       t        j,                  j/                          y)a  
        Each nn.Linear layer is processsed here.
        We first check if the corresponding module state_dict contains already HQQ quantized parameters.
        If not, we create a temp linear layer with the module state_dict params and use it for quantization
        r   rV   r   Nr   rY   r[   T)r\   r]   del_orig)dtyper]   )r
   rd   rW   r   joinr   r   itemsremoverG   r;   r%   load_state_dictr^   r4   TensorrP   	Parameterr&   _patch_layer_for_multigpusetattr__dict__r5   empty_cachehasattrr[   to)r'   r   rr   rs   rx   rt   ry   rW   rT   rv   
layer_nameparent_modulenodemodule_state_dictkv	hqq_layerrH   s                     r   create_quantized_paramz%HqqHfQuantizer.create_quantized_param   su    325*EXXj..s3CR89
#E:6$R( $$& 	.DAqC1$67!!''#,r"23".13G#**1-		. &),%!%!%"&"2"2(		 %%&78~~)j.V!&!3!3INN!C	## ::9E	M43 JJ""$ % 	MCFC!3!34Ec4J!KL	M
 6>*!##"..$I ~~)j.V!&!3!3INN!C	## ::9E	M43 YYT%5%5mYLFM40

 r   c                 <    t              d fd_        S )Nc                     t        j                  |j                  | j                        | j	                         j                               }| j                  || j                  z  }|S rN   )r4   matmulr   r]   
dequantizetr^   )r'   xouts      r   forward_with_devicezEHqqHfQuantizer._patch_layer_for_multigpu.<locals>.forward_with_device  sL    ,,qttDKK0$//2C2E2E2GHCyy$tyy Jr   c                      |       S rN   r`   )r   r   r   s    r   <lambda>z:HqqHfQuantizer._patch_layer_for_multigpu.<locals>.<lambda>  s    &9)Q&G r   )r   forward)r'   r   r   s    `@r   r   z(HqqHfQuantizer._patch_layer_for_multigpu  s#    +I6		 H	r   keep_in_fp32_modulesc                 >    ||ng }t        || j                        }y )N)r(   )r   r(   )r'   r   r.   r   r)   s        r   $_process_model_before_weight_loadingz3HqqHfQuantizer._process_model_before_weight_loading  s(     8L7W3]_ 'u$BZBZ[r   c                 >    d|_         | j                         |_        |S NT)is_hqq_quantizedis_serializableis_hqq_serializable)r'   r   r)   s      r   #_process_model_after_weight_loadingz2HqqHfQuantizer._process_model_after_weight_loading  s     !%$($8$8$:!r   c                      yr   r`   )r'   safe_serializations     r   r   zHqqHfQuantizer.is_serializable#  s    r   c                      yr   r`   )r'   s    r   is_trainablezHqqHfQuantizer.is_trainable&  s    r   rN   )r   r   )__name__
__module____qualname____doc__use_keep_in_fp32_modules requires_parameters_quantizationrequires_calibrationrequired_packagesr$   rA   r   strrI   rq   r   r   boolrw   r   r   r   r   r   propertyr   __classcell__)r*   s   @r   r    r    -   ss     %'+$ %
I@ & 6:3i IL 	c 0&07;Cy0OSTWy0	c0dS S $S 	S
 cNS 
S*T! T! $T! 	T!
 &T! cNT! cT!n
  +/	\ \ #3i	\
 d  r   r    )typingr   r   r   r   integrationsr   utilsr	   r
   r   r   baser   quantizers_utilsr   modeling_utilsr   accelerate.hooksr   r4   
get_loggerr   r9   r   r    r`   r   r   <module>r      s]    2 1 1 Z Z  2 0 8			H	%{[ {r   