
    sg                         d dl mZmZmZmZ ddlmZ erddlmZ ddl	m
Z
mZmZ  e       rd dlZ ej                  e      Z G d d	e      Zy)
    )TYPE_CHECKINGDictListUnion   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_torch_availableloggingNc                        e Zd ZdZdZdZdgZ fdZd Zdd	Z	g fddd
e
e   fdZdeeeeef   f   deeeeef   f   fdZddZddZedefd       Z xZS )BitNetHfQuantizerz
    1.58-bit quantization from BitNet quantization method:
    Before loading: it converts the linear layers into BitLinear layers during loading.

    Checkout the paper introducing this method : https://arxiv.org/pdf/2402.17764
    FT
acceleratec                 4    t        |   |fi | || _        y N)super__init__quantization_config)selfr   kwargs	__class__s      [/var/www/html/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_bitnet.pyr   zBitNetHfQuantizer.__init__-   s    ,77#6     c                    t               st        d      |j                  dd      s|j                  dd      rt        d      t        j
                  j                         st        j                  d       y |j                  dd       }|t        j                  d       y |At        |t              r0d	|j                         v sd
|j                         v rt        d      y y y )NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)from_tfF	from_flaxztLoading ternary weights from tf/flax is currently not supported, please make sure the weights are in PyTorch format.zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.cpudiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r   ImportErrorget
ValueErrortorchcudais_availableloggerwarning_once
isinstancedictvalues)r   argsr   r   s       r   validate_environmentz&BitNetHfQuantizer.validate_environment1   s    &(opp::i'6::k5+I; 
 zz&&(z ZZd3
I #*d+*:K:K:M1MQW[e[l[l[nQn g  Ro+ $r   modelr
   c                     |S r    )r   r.   r   s      r   #_process_model_after_weight_loadingz5BitNetHfQuantizer._process_model_after_weight_loadingN   s    r   keep_in_fp32_modulesc                    ddl m}m}  ||      | _        | j                  j                  /| j                  j                  | j                  j                          ||| j                  | j                  | j                        }y )Nr	   )get_keys_to_not_convertreplace_with_bitnet_linear)modules_to_not_convertr   pre_quantized)integrationsr4   r5   r6   r   extendr7   )r   r.   r   r2   r   r4   r5   s          r   $_process_model_before_weight_loadingz6BitNetHfQuantizer._process_model_before_weight_loadingQ   so     	W&=e&D###::F''..t/G/G/^/^_*#'#>#> $ 8 8,,	
r   
max_memoryreturnc                 ^    |j                         D ci c]  \  }}||dz   }}}|S c c}}w )Ng?)items)r   r;   keyvals       r   adjust_max_memoryz#BitNetHfQuantizer.adjust_max_memoryf   s6    6@6F6F6HI(#sc3:oI
I Js   )c                 &    t         j                  }|S r   )r$   int8)r   target_dtypes     r   adjust_target_dtypez%BitNetHfQuantizer.adjust_target_dtypej   s    zzr   c                      y)NTr0   )r   safe_serializations     r   is_serializablez!BitNetHfQuantizer.is_serializablen   s    r   c                      y)NFr0   )r   s    r   is_trainablezBitNetHfQuantizer.is_trainableq   s    r   )r.   r
   )rD   torch.dtyper<   rK   r   )__name__
__module____qualname____doc__ requires_parameters_quantizationrequires_calibrationrequired_packagesr   r-   r1   r   strr:   r   r   intrA   rE   rH   propertyboolrJ   __classcell__)r   s   @r   r   r       s     (-$%7: +-	
 
 #3i	
*DeCHo1E,F 4PSUZ[^`c[cUdPdKe  d  r   r   )typingr   r   r   r   baser   modeling_utilsr
   utilsr   r   r   r$   
get_loggerrL   r'   r   r0   r   r   <module>r]      sK    4 3  0 H H  
		H	%S Sr   