
    sg                     b    d dl mZmZmZ  e       rddlZ ej
                  e      Z	 	 	 	 ddZy)   )is_optimum_quanto_availableis_torch_availablelogging    Nc                 "   ddl m} t               rddlm}m}m}m}	m}
m	} 
	d}d||d}|g }| j                         D ]  \  }}g j                  |       t        fd|D              sA |       5  t        |t        j                  j                         r |j"                  |j$                  |j&                  du|j(                  j*                  ||j,                     ||j.                           | j0                  |<   | j0                  |   j3                  d	       d
}nt        |t        j                  j4                        r_|j.                  S |j6                  |j8                  |j:                  |j&                  du||j.                           | j0                  |<   d
}ddd       t=        t?        |jA                                     dkD  rtC        ||||      \  }}jE                  d        | |fS # 1 sw Y   YxY w)a[  
    Public method that recursively replaces the Linear layers of the given model with Quanto quantized layers.
    Returns the converted model and a boolean that indicates if the conversion has been successfull or not.

    Args:
        model (`torch.nn.Module`):
            The model to convert, can be any `torch.nn.Module` instance.
        quantization_config (`AqlmConfig`, defaults to `None`):
            The quantization config object that contains the quantization parameters.
        modules_to_not_convert (`list`, *optional*, defaults to `None`):
            A list of modules to not convert. If a module name is in the list (e.g. `lm_head`), it will not be
            converted.
        current_key_name (`list`, *optional*, defaults to `None`):
            A list that contains the current key name. This is used for recursion and should not be passed by the user.
        has_been_replaced (`bool`, *optional*, defaults to `None`):
            A boolean that indicates if the conversion has been successful or not. This is used for recursion and
            should not be passed by the user.
    r   )init_empty_weights)
QLayerNormQLinearqfloat8qint2qint4qint8)float8int8int4int2N)Nr   r   c              3   D   K   | ]  }|d j                        v   yw).N)join).0keycurrent_key_names     S/var/www/html/venv/lib/python3.12/site-packages/transformers/integrations/quanto.py	<genexpr>z-replace_with_quanto_layers.<locals>.<genexpr>A   s      W3#((#344Ws    )in_featuresout_featuresbiasdtypeweightsactivationsFT)r    )quantization_configmodules_to_not_convertr   has_been_replaced)#
accelerater   r   optimum.quantor	   r
   r   r   r   r   named_childrenappendany
isinstancetorchnnLinearr   r   r   weightr   r   r    _modulesrequires_grad_	LayerNormnormalized_shapeepselementwise_affinelenlistchildrenreplace_with_quanto_layerspop)modelr!   r"   r   r#   r   r	   r
   r   r   r   r   	w_mapping	a_mappingnamemodule_s      `             r   r8   r8      s   2 ."$TT"E5%PIw>I%!#,,. %!f#!%W@VWW#% 1fehhoo6+2$*$6$6%+%8%8#[[4$mm11 )*=*E*E F$-.A.M.M$N,ENN4( NN4(77>(,%(:(:;*66B/9"33"JJ"55"KKt3(12E2Q2Q(R0t, -1)+1, tFOO%&'!+#=$7'=!1"3$ A  	R K%!L ###A1 1s   ;D2HH	)NNNF)	utilsr   r   r   r+   
get_logger__name__loggerr8        r   <module>rF      s?    M L 			H	%
 J$rE   