
    sgxu                        d dl Z d dlZd dlZd dlmZ d dlmZ ddlmZ  ej                  e
      Z G d dej                        Z G d d	ej                        Z G d
 dej                        Z G d dej                        Z G d dej                        Z G d dej                        ZddZddZddZ G d de      Z G d de      Z G d de      ZddZ G d de      Zy)     N)nn)Function   )loggingc                   >     e Zd ZdZ	 	 	 	 	 	 	 	 	 d fd	ZddZ xZS )QuantEmbeddinga  
    Quantized version of `torch.nn.Embedding`. Adds quantization-specific arguments on top of `torch.nn.Embedding`.

    Args:
        weight_bit (`int`, *optional*, defaults to `8`):
            Bitwidth for the quantized weight.
        momentum (`float`, *optional*, defaults to `0.95`):
            Momentum for updating the activation quantization range.
        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
    c                    t         |           || _        || _        || _        || _        || _        || _        || _        t        j                  t        j                  ||g            | _        | j                  dt        j                  d             | j                  dt        j                  | j                               |	| _        |
| _        || _        d| _        t(        j*                  | _        y )Nweight_scaling_factor   weight_integerF)super__init__num_dimpadding_idxmax_norm	norm_typescale_grad_by_freqsparser   	Parametertorchzerosweightregister_buffer
zeros_like
weight_bitmomentum
quant_modepercentile_modeSymmetricQuantFunctionapplyweight_function)selfnum_embeddingsembedding_dimr   r   r   r   r   _weightr   r   r   	__class__s               Z/var/www/html/venv/lib/python3.12/site-packages/transformers/models/ibert/quant_modules.pyr   zQuantEmbedding.__init__,   s     	"	 & ""4ll5;;/N#OP4ekk!nE-u/?/?/LM$ $$5;;    c           	      \   | j                   sct        j                  j                  || j                  | j
                  | j                  | j                  | j                  | j                        d fS | j                  }|j                  j                         }|j                         j                  d      }|j                         j                  d      }t        | j                   ||d      | _        | j%                  | j                  | j                   | j&                  | j"                        | _        t        j                  j                  || j(                  | j
                  | j                  | j                  | j                  | j                        }|| j"                  z  | j"                  fS )Nr   F)r   r   
functional	embeddingr   r   r   r   r   r   datadetachminexpandmax$symmetric_linear_quantization_paramsr   r
   r"   r   r   )	r#   x	positionsincremental_stateww_transformw_minw_maxemb_ints	            r(   forwardzQuantEmbedding.forwardM   sT   ''KK$$MMNN++KK   KKffmmo!((+!((+%I$//[`bgin%o""22KK$*>*>@Z@Z
 --))MMNN##KK
 333T5O5OOOr)   )	NN       @FFN   ffffff?FNN)__name__
__module____qualname____doc__r   r;   __classcell__r'   s   @r(   r   r      s1    
   <B"Pr)   r   c                   <     e Zd ZdZd fd	Zd Z	 	 	 	 	 ddZ xZS )QuantActap  
    Quantizes the given activation.

    Args:
        activation_bit (`int`):
            Bitwidth for the quantized activation.
        act_range_momentum (`float`, *optional*, defaults to `0.95`):
            Momentum for updating the activation quantization range.
        per_channel (`bool`, *optional*, defaults to `False`):
            Whether to or not use channel-wise quantization.
        channel_len (`int`, *optional*):
            Specify the channel length when set the *per_channel* True.
        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
    c                    t         |           || _        || _        || _        || _        d| _        t        j                  | _	        | j
                  s| j                  dt        j                  d             | j                  dt        j                  d             | j                  dt        j                  d             | xj                  dz  c_        | xj                  dz  c_        y t        d      )NFx_minr   x_maxact_scaling_factorgh㈵>;per-channel mode is not currently supported for activation.)r   r   activation_bitact_range_momentumr   per_channel
percentiler    r!   act_functionr   r   r   rI   rJ   NotImplementedError)r#   rM   rN   rO   channel_lenr   r'   s         r(   r   zQuantAct.__init__   s    ,"4$&288  %++a.9  %++a.9  !5u{{1~FJJ$JJJ$J%&cddr)   c           
          | j                   j                   d| j                   d| j                   d| j                  j                         dd| j                  j                         dd
S )Nz(activation_bit=z, quant_mode: z, Act_min: z.2fz, Act_max: ))r'   r@   rM   r   rI   itemrJ   )r#   s    r(   __repr__zQuantAct.__repr__   si    ~~&&''78K8K7L M??+;tzz7H6M N

)#.a1	
r)   c                    ||n||z   }| j                   r| j                  rJ d       | j                  rJ d       |j                  j	                         }|j                  j                         }	|	j                         j                         dk(  r!|j                         j                         dk(  sJ d       | j                  j	                         dkD  rF| j                  j                         dk  r)| j                  |z   | _        | j                  |	z   | _	        n| j                  dk(  rKt        j                  | j                  |      | _        t        j
                  | j                  |	      | _	        nb| j                  | j                  z  |d| j                  z
  z  z   | _        | j                  | j                  z  |	d| j                  z
  z  z   | _	        | j                  s|d fS || j                  n|}|| j                  n|}	t        | j                  ||	| j                  	      | _        |3| j!                  || j                  | j                  | j                        }
n.t"        j%                  ||| j                  | j                  ||      }
| j                  j'                  d      }|
|z  | j                  fS )
Nz:percentile mode is not currently supported for activation.rL   r   z5NaN detected when computing min/max of the activationg&|g&|>r   )rO   )trainingrP   rO   r-   r/   r1   isnansumrI   rJ   rN   r   r   r2   rM   rK   rQ   FixedPointMulr!   view)r#   r3   pre_act_scaling_factoridentityidentity_scaling_factorspecified_minspecified_maxx_actrI   rJ   quant_act_intcorrect_output_scales               r(   r;   zQuantAct.forward   s_    %8a<==d(dd&''f)ff'JJNN$EJJNN$E !!#q(U[[]->->-@A-EGFGE zz~~')djjnn.>.G!ZZ%/
!ZZ%/
 ((B."YYtzz59
"YYtzz59
!ZZ$*A*AAEQQUQhQhMhDii
!ZZ$*A*AAEQQUQhQhMhDii
$;+3

+3

"F4;K;K#
 ") --a1D1DdooW[WnWnoM)//&##'''M  $66;;B?33T5L5LLLr)   )r>   FNF)NNNNNr@   rA   rB   rC   r   rW   r;   rD   rE   s   @r(   rG   rG   r   s*     e&
  $ $<Mr)   rG   c                   8     e Zd ZdZ	 d fd	Z fdZddZ xZS )QuantLineara8  
    Quantized version of `torch.nn.Linear`. Adds quantization-specific arguments on top of `torch.nn.Linear`.

    Args:
        weight_bit (`int`, *optional*, defaults to `8`):
            Bitwidth for the quantized weight.
        bias_bit (`int`, *optional*, defaults to `32`):
            Bitwidth for the quantized bias.
        per_channel (`bool`, *optional*, defaults to `False`):
            Whether or not to use channel-wise quantization.
        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
    c                    t         |           || _        || _        t	        j
                  t        j                  ||g            | _        | j                  dt        j                  | j                               | j                  dt        j                  | j                               |r\t	        j
                  t        j                  |            | _        | j                  dt        j                  | j                               || _        || _        || _        || _        || _        d| _        t"        j$                  | _        y )Nr   fc_scaling_factorbias_integerF)r   r   in_featuresout_featuresr   r   r   r   r   r   r   biasr   r   rO   bias_bitr   r    r!   r"   )	r#   rm   rn   ro   r   rp   rO   r   r'   s	           r(   r   zQuantLinear.__init__   s     	&(ll5;;k/J#KL-u/?/?/LM0%++d>O>O2PQU[[%>?DI  1A1A$))1LM$$& $$5;;r)   c                 d    t         |          }d| d| j                   d| j                   d}|S )N(z weight_bit=z, quant_mode=rU   )r   rW   r   r   )r#   sr'   s     r(   rW   zQuantLinear.__repr__  s9    Gs,t/}T__<MQOr)   c                    | j                   s8t        j                  j                  || j                  | j
                        d fS ||j                  dk(  sJ d       | j                  }|j                  j                         }| j                  r7t        j                  |dd       \  }}t        j                  |dd       \  }}n>|j                         j                  d      }|j                         j                  d      }t        | j                  ||| j                        | _        | j#                  | j                  | j                  | j$                  | j                         | _        | j                   |z  }| j
                  -| j#                  | j
                  | j(                  d|      | _        |j-                  dd      }||z  }	t        j                  j                  |	| j&                  | j*                        |z  |fS )N)r   ro   )r   zInput activation to the QuantLinear layer should be globally (non-channel-wise) quantized. Please add a QuantAct layer with `per_channel = True` before this QuantAct layerr   )r   outFrY   )r   r   r+   linearr   ro   shaper-   r.   rO   r   r/   r1   r0   r2   r   rk   r"   r   r   rp   rl   r^   )
r#   r3   prev_act_scaling_factorr6   r7   r8   _r9   bias_scaling_factorx_ints
             r(   r;   zQuantLinear.forward  s   ==''$++DII'NPTTT '27N7T7TX\7\ 	
_	
\
 KKffmmoyy!>HE1yy!>HE1OO%,,Q/EOO%,,Q/E!EdooW\^ceieueu!v"22KK$*>*>@V@V
 #447NN99  $ 4 4TYYuVi jD"9">">q""E++ MM  t/B/BIZIZ [^qq
 	
r)   )Tr=       FFNrg   rE   s   @r(   ri   ri      s     ns<,
#
r)   ri   c                   2     e Zd ZdZd fd	Zd ZddZ xZS )IntGELUa}  
    Quantized version of `torch.nn.GELU`. Adds quantization-specific arguments on top of `torch.nn.GELU`.

    Args:
        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
        force_dequant (`str`, *optional*, defaults to `"none"`):
            Force dequantize the layer if either "gelu" or "nonlinear" is given.
    c                 0   t         |           || _        |dv rt        j	                  d       d| _        | j                  st        j                         | _        d| _        d| _	        g d| _
        | j                  dxx   | j                  d   z  cc<   y )	N)	nonlineargeluzForce dequantize geluFg-?   )g]m{ҿgMr      r   )r   r   r   loggerinfor   GELUactivation_fnkconstcoeff)r#   r   force_dequantr'   s      r(   r   zIntGELU.__init__7  sv    $11KK/0#DO!#D
)


1A&r)   c                    t        j                  | j                  d   |z        }t        j                  | j                  d   |dz  z        }t        j                  |      }t        j                  t        j
                  |      |       }|||z   dz  |z   z  }|dz  | j                  d   z  }t        j                  |d| j                  z  z        }|d| j                  z  z  }||fS Nr   r   r   )	r   floorr   signr/   abs	floor_ster!   r   )r#   r{   scaling_factorb_intc_intr   abs_inty_ints           r(   int_erfzIntGELU.int_erfG  s    DJJqMN:;DJJqMNA,==>zz% ))EIIe,uf55Q.67'*TZZ]: 4:: 56'!TZZ-7n$$r)   c                     | j                   s| j                  |      d fS ||z  }| j                  ||| j                  z        \  }}d|z  }|||z   z  }||z  dz  }||z  |fS )N      ?r   )r   r   r   r   )r#   r3   r   r{   sigmoid_intsigmoid_scaling_factor	shift_ints          r(   r;   zIntGELU.forwardV  s    %%a($..N".2ll5.SWSYSYBY.Z++11	y01'*@@1D~%~55r)   )Tnoner}   )r@   rA   rB   rC   r   r   r;   rD   rE   s   @r(   r   r   ,  s    ' %6r)   r   c                   6     e Zd ZdZd fd	Zd Zd Zd Z xZS )
IntSoftmaxa  
    Quantized version of `torch.nn.Softmax`. Adds quantization-specific arguments on top of `torch.nn.Softmax`.

    Args:
        output_bit (`int`):
            Bitwidth for the layer output activation.
        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
        force_dequant (`str`, *optional*, defaults to `"none"`):
            Force dequantize the layer if either "softmax" or "nonlinear" is given.
    c                    t         |           || _        d| _        || _        |dv rt
        j                  d       d| _        t        d| j                        | _        d| _	        d| _
        g d	| _        | j                  d
xx   | j                  d   z  cc<   | j                  dxx   | j                  d   z  cc<   y )Nr|   )r   softmaxzForce dequantize softmaxF   r   gvq-   )gN$?g'|:?r   r   r   r   )r   r   
output_bitmax_bitr   r   r   rG   actx0r   coef)r#   r   r   r   r'   s       r(   r   zIntSoftmax.__init__r  s    $$44KK23#DOB4??;
1			!		!$		!		!$r)   c                 6   t        j                         5  t        j                  | j                  d   |z        }t        j                  | j                  d   |dz  z        }d d d        |z   |z  z   }| j                  d   |dz  z  }||fS # 1 sw Y   -xY wr   )r   no_gradr   r   )r#   r{   r   r   r   zs         r(   int_polynomialzIntSoftmax.int_polynomial  s    ]]_ 	BKK		!~ =>EKK		!~q/@ @AE	B U]e#e+1(99.  	B 	Bs   ABBc                    t        j                         5  t        j                  | j                  |z        }d d d        t        j                  || j
                  z        }t        j                  ||z        }|||z  z
  }| j                  ||      \  }}t        j                  t        j                  |d| j
                  |z
  z  z        d      }|d| j
                  z  z  }||fS # 1 sw Y   xY w)Nr   r   r/   )
r   r   r   r   r1   r   r   r!   r   clamp)r#   r{   r   x0_intqrexp_intexp_scaling_factors           r(   int_expzIntSoftmax.int_exp  s    ]]_ 	;[[>!9:F	;		%f!45OOEFN+FQJ&*&9&9!^&L##++ioogdjj1n8M.MNTUV+am;&&	; 	;s   #C++C4c                 
   | j                   s#t        j                  j                  |d      d fS ||z  }|j	                  dd      \  }}||z
  }| j                  ||      \  }}| j                  ||      \  }}||z  }|j                  dd      }	t        j                  d| j                  z  |	z        }
t        j                  ||
z  d| j                  | j                  z
  z  z        }dd| j                  z  z  }||z  |fS )NrY   r   T)r   keepdimr   r   )r   r   r+   r   r1   r   r   r\   r   r!   r   r   )r#   r3   r   r{   	x_int_maxry   r   r   expexp_int_sumfactors              r(   r;   zIntSoftmax.forward  s   ==(((3T99N"yyRy6	1	!&*ll5.&I## #'((74F"G**kkb$k7DLL;!>?//'F"2Q4<<$//;Y5Z"Z[Q//'77r)   )Fr   )	r@   rA   rB   rC   r   r   r   r;   rD   rE   s   @r(   r   r   e  s    
%"!
'8r)   r   c                   8     e Zd ZdZd fd	Zd Zd ZddZ xZS )IntLayerNorma  
    Quantized version of `torch.nn.LayerNorm`. Adds quantization-specific arguments on top of `torch.nn.LayerNorm`.

    Args:
        output_bit (`int`, *optional*, defaults to `8`):
            Bitwidth for the layer output activation.
        quant_mode (`bool`, *optional*, defaults to `False`):
            Whether or not the layer is quantized.
        force_dequant (`str`, *optional*, defaults to `"none"`):
            Force dequantize the layer if either "layernorm" or "nonlinear" is given.
    c                     t         |           || _        || _        t	        j
                  t        j                  |            | _        t	        j
                  t        j                  |            | _	        || _
        |dv rt        j                  d       d| _
        | j                  dt        j                  d             || _        d| _        d | _        t#        | j                  | j                        | _        y )N)r   	layernormzForce dequantize layernormFshiftr   r|   r   )r   r   normalized_shapeepsr   r   r   r   r   ro   r   r   r   r   r   r   dim_sqrtrG   
activation)r#   r   r   r   r   r   r'   s         r(   r   zIntLayerNorm.__init__  s     0ll5;;/?#@ALL-=!>?	$66KK45#DOWekk!n5$"4??tOr)   c           	         t        j                         5  |dz  }t        j                  |dd      }t        j                  t        j                  |d| j
                  z  z              j                         j                         }| j                  }t        j                  | j                  |      | _        t        j                  dt        |       dt        | j                                d d d        y # 1 sw Y   y xY w)Nr   Taxisr   zDynamic shift adjustment: z -> )r   r   r\   log2sqrtr   ceilr1   r   r   r   int)r#   r   y_sq_intvar_intr   	shift_olds         r(   	set_shiftzIntLayerNorm.set_shift  s    ]]_ 	\axHiiq$?GZZ

7Q_+D EFKKMRRTE

I4::u5DJKK4S^4DDTZZHYZ[	\ 	\ 	\s   CC88Dc                     | j                  |       t        j                  |d| j                  z  z        }|dz  }t	        j
                  |dd      }|S )z
        This fallback function is called when overflow is detected during training time, and adjusts the `self.shift`
        to avoid overflow in the subsequent runs.
        r   Tr   )r   r   r!   r   r   r\   )r#   r   y_int_shiftedr   r   s        r(   overflow_fallbackzIntLayerNorm.overflow_fallback  sL    
 	u!4::(=> !#))H1d;r)   c                    | j                   sx|j                  dd      }||z
  }t        j                  |dz  dd      }|t        j                  | j                  |z         z  }|| j
                  z  | j                  z   }|d fS | j                  et        j                  |j                  d   t        j                        }t        j                  |      j                  |j                        | _        ||z  }t        j                  |j                  dd            }||z
  }	t        j                  |	d| j                   z  z        }
|
dz  }t        j"                  |dd      }| j$                  r[|j'                         d| j(                  z  k\  r;| j+                  |	      }|j'                         d| j(                  z  dz   k  sJ d       t        j                  t        j                  |            d| j                   z  z  }t        j                  d|z        }t        j                  |	|z  dz        }	| j                  dz  }| j                  j,                  j/                         | j
                  j,                  j/                         z  }t        j                  ||z        }|	|z   }	|| j
                  z  }|	|z  }||fS )	Nr   Tr   )dtypeg?zfError detected in overflow handling: `var_int` exceeds `self.max_bit` (the maximum possible bit width)l        i   @)r   meanr   r   r   r   ro   r   tensorrw   floattodevice	round_ster!   r   r   r\   rZ   r1   r   r   r-   r.   )r#   r3   r   r   yvarnr{   mean_intr   r   r   r   std_intr   ro   bias_ints                    r(   r;   zIntLayerNorm.forward  sL   66q$6/DDA**QT48CEJJtxx#~..ADKK$))+Ad7N == QWWQZu{{;A!JJqM,,QXX6DM N"??5::1d:#CD !4::(=> !#))H1d; =={{}4<</007{{}q$,,'<< X< //%**W"56DJJF1 23. yy~~$$&$++*:*:*A*A*CD??4.#89 '$++5N".  r)   )r=   Fr   r}   )	r@   rA   rB   rC   r   r   r   r;   rD   rE   s   @r(   r   r     s    
P&\	.!r)   r   c                 T   | j                   d   }t        |d|dz  z
  z        }t        ||z  dz        }t        j                  | |      j                  }|dk(  r|dz  }n#t        j                  |  |      j                   }|s |j                         }|j                         }||fS )a  
    Calculate the percentile max and min values in a given tensor

    Args:
        input (`torch.Tensor`):
            The target tensor to calculate percentile max and min.
        lower_percentile (`float`):
            If 0.1, means we return the value of the smallest 0.1% value in the tensor as percentile min.
        upper_percentile (`float`):
            If 99.9, means we return the value of the largest 0.1% value in the tensor as percentile max.
        output_tensor (`bool`, *optional*, defaults to `False`):
            If True, this function returns tensors, otherwise it returns values.

    Returns:
        `Tuple(torch.Tensor, torch.Tensor)`: Percentile min and max value of *input*
    r   r   g{Gz?)r   )rw   roundr   kthvaluevaluesrV   )	inputlower_percentileupper_percentileoutput_tensorinput_lengthlower_indexupper_indexupper_boundlower_bounds	            r(   get_percentile_min_maxr     s    " ;;q>L,<t,C(CDEK'77$>?K..+6==K1!Ao ~~uf<CCC!&&(!&&(##r)   c                    t        | j                        dk(  r)|j                  dddd      }|j                  dddd      }n_t        | j                        dk(  r%|j                  dd      }|j                  dd      }n"|j                  d      }|j                  d      }|r3| j                  d|z        j	                  |      j                          | S t        j                  d|z  | z  |z         S )a?  
    Quantize single-precision input tensor to integers with the given scaling factor and zeropoint.

    Args:
        input (`torch.Tensor`):
            Single-precision input tensor to be quantized.
        scale (`torch.Tensor`):
            Scaling factor for quantization.
        zero_pint (`torch.Tensor`):
            Shift for quantization.
        inplace (`bool`, *optional*, defaults to `False`):
            Whether to compute inplace or not.

    Returns:
        `torch.Tensor`: Linearly quantized value of *input* according to *scale* and *zero_point*.
       rY   r   r   r   )lenrw   r^   mul_add_round_r   r   )r   scale
zero_pointinplaces       r(   linear_quantizer   5  s    $ 5;;1

2q!Q'__RAq1
	U[[	Q	

2q!__R+


2__R(


3;$$Z0779;;sU{U*Z788r)   c                    t        j                         5  d| dz
  z  dz
  }|rht        j                  t        j                  |j	                         |j	                         gd      d      \  }}t        j
                  |d      |z  }nBt        |j	                         |j	                               }t        j
                  |d      |z  }ddd       |S # 1 sw Y   S xY w)a/  
    Compute the scaling factor with the given quantization range for symmetric quantization.

    Args:
        saturation_min (`torch.Tensor`):
            Lower bound for quantization range.
        saturation_max (`torch.Tensor`):
            Upper bound for quantization range.
        per_channel (`bool`, *optional*, defaults to `False`):
            Whether to or not use channel-wise quantization.

    Returns:
        `torch.Tensor`: Scaling factor that linearly quantizes the given range between *saturation_min* and
        *saturation_max*.
    r   r   r   g:0yE>r   N)r   r   r1   stackr   r   )num_bitssaturation_minsaturation_maxrO   r   r   ry   s          r(   r2   r2   X  s    $ 
 	5(Q,!#yyn.@.@.BNDVDVDX-Y_`!aghiHE1KK4014E **,n.@.@.BCEKK4014E	5 L	5 Ls   B8CC!c                   0    e Zd ZdZed        Zed        Zy)r    zw
    Class to quantize the given floating-point values using symmetric quantization with given range and bitwidth.
    c                     t        j                  d      j                  |j                        }d|dz
  z  dz
  }t	        |||d      }t        j
                  || |dz
        }|| _        |S )a6  
        Args:
            x (`torch.Tensor`):
                Floating point tensor to be quantized.
            k (`int`):
                Quantization bitwidth.
            percentile_mode (`bool`):
                Whether or not to use percentile calibration.
            scale (`torch.Tensor`):
                Pre-calculated scaling factor for *x*. Note that the current implementation of SymmetricQuantFunction
                requires pre-calculated scaling factor.

        Returns:
            `torch.Tensor`: Symmetric-quantized value of *input*.
        g        r   r   F)r   )r   r   r   r   r   r   r   )ctxr3   r   r   r   r   r   new_quant_xs           r(   r;   zSymmetricQuantFunction.forward}  sh    " \\#&))%,,7
!a%L1%a
EJkk+r1q59	r)   c                    | j                   }t        |j                        dk(  r|j                  dddd      }n<t        |j                        dk(  r|j                  dd      }n|j                  d      }|j	                         |z  d d d d fS )Nr   rY   r   r   )r   r   rw   r^   clone)r  grad_outputr   s      r(   backwardzSymmetricQuantFunction.backward  s    		{  !Q&JJr1a+E""#q(JJr1%EJJrNE  "U*D$dBBr)   Nr@   rA   rB   rC   staticmethodr;   r   r)   r(   r    r    x  s1      2 
C 
Cr)   r    c                   0    e Zd ZdZed        Zed        Zy)r   z;
    Straight-through Estimator(STE) for torch.floor()
    c                 ,    t        j                  |      S r}   )r   r   r  r3   s     r(   r;   zfloor_ste.forward      {{1~r)   c                 "    |j                         S r}   r  r  r  s     r(   r  zfloor_ste.backward        ""r)   Nr	  r  r)   r(   r   r     /       # #r)   r   c                   0    e Zd ZdZed        Zed        Zy)r   z;
    Straight-through Estimator(STE) for torch.round()
    c                 ,    t        j                  |      S r}   )r   r   r  s     r(   r;   zround_ste.forward  r  r)   c                 "    |j                         S r}   r  r  s     r(   r  zround_ste.backward  r  r)   Nr	  r  r)   r(   r   r     r  r)   r   c                    | j                         }| j                  d      } t        j                  | j	                         j                               \  }}g }|D ]i  }t        t        j                  |d|z  z        j                  t        j                  d      t        j                              }|j                  |       k t        j                  |      }t        |      |z
  }t        j                  |      j!                  | j"                        j                  |      t        j                  |      j!                  | j"                        j                  |      fS )z
    Decompose the scaling factor into mantissa and twos exponent.

    Args:
        scaling_factor (`torch.Tensor`):
            Target scaling factor to decompose.

    Returns:
        ``Tuple(torch.Tensor, torch.Tensor)`: mantisa and exponent
    rY   r   1)rounding)sizer^   npfrexpcpunumpyr   decimalDecimalquantizeROUND_HALF_UPappendarrayr   r   
from_numpyr   r   )inputsr   shape_of_inputoutput_moutput_etmp_mmint_m_shifteds           r(   batch_frexpr.    s    [[]N [[_F&**,"4"4"67HhE $OOAG,-66ws7KV]VkVk6l
 	]#	$
 xxHW~(H 	"%%fmm499.I"%%fmm499.I r)   c                   6    e Zd ZdZe	 	 dd       Zed        Zy)r]   aQ  
    Function to perform fixed-point arithmetic that can match integer arithmetic on hardware.

    Args:
        pre_act (`torch.Tensor`):
            Input tensor.
        pre_act_scaling_factor (`torch.Tensor`):
            Scaling factor of the input tensor *pre_act*.
        bit_num (`int`):
            Quantization bitwidth.
        z_scaling_factor (`torch.Tensor`):
            Scaling factor of the output tensor.
        identity (`torch.Tensor`, *optional*):
            Identity tensor, if exists.
        identity_scaling_factor (`torch.Tensor`, *optional*):
            Scaling factor of the identity tensor *identity*, if exists.

    Returns:
        `torch.Tensor`: Output tensor(*pre_act* if *identity* is not given, otherwise the addition of *pre_act* and
        *identity*), whose scale is rescaled to *z_scaling_factor*.
    Nc                    t        |j                        dk(  rd }nd }|| _        d|dz
  z  dz
  }t        j                         5   ||      }| ||      }|| _        t        j                  ||z        }	|j                  t        j                        }
|j                  t        j                        j                  t        j                        }|
|z  } ||      }t        |      \  }}|	j                  t        j                        |j                  t        j                        z  }t        j                  |d|z  z        }|t        j                  ||z        }|j                  t        j                        }
|j                  t        j                        j                  t        j                        }|
|z  } ||      }t        |      \  }}|j                  t        j                        |j                  t        j                        z  }t        j                  |d|z  z        }||z   }t        j                  |j                  t        j                        | dz
  |      cd d d        S # 1 sw Y   y xY w)Nr   c                     | S r}   r  r3   s    r(   <lambda>z'FixedPointMul.forward.<locals>.<lambda>  s     r)   c                 (    | j                  ddd      S )Nr   rY   )r^   r2  s    r(   r3  z'FixedPointMul.forward.<locals>.<lambda>  s    q!R 0 r)   r   r   r<   )r   rw   r`   r   r   z_scaling_factorr   typedoubler   r.  r   )r  pre_actr_   bit_numr5  r`   ra   reshaper   z_int_A_B	new_scaler,  eoutputwx_intm1e1output1s                       r(   r;   zFixedPointMul.forward  s    %++,1!G0G'A+"]]_ !	D%,-C%D"#*12I*J'#3C KK*@ @AE',,U\\:B"''4::5<<HBRI	*Iy)DAqZZ-u||0DDF[[36!23F#X0G%GH,11%,,?&++EKK8>>u||LG	#I.	$Y/B ++ell3bggell6KK++gb&9: 6);;v{{5;;7!aCC!	D !	D !	Ds   H(I88Jc                     d }| j                   |j                         | j                  z  }|j                         | j                  z  d d d d |d fS r}   )r`   r  r5  )r  r  identity_grads      r(   r  zFixedPointMul.backward/  sU    <<#'--/#2F2FFM  "S%9%994tTS`bfffr)   r?   r	  r  r)   r(   r]   r]     s<    ,   $2D 2Dh g gr)   r]   )F)   )r   r  r  r   r   torch.autogradr   utilsr   
get_loggerr@   r   Moduler   rG   ri   r   r   r   r   r   r2   r    r   r   r.  r]   r  r)   r(   <module>rL     s   $     #  
		H	%PPRYY PPfgMryy gMTM
")) M
`66bii 66rD8 D8Nb!299 b!J!$H 9F@*CX *CZ# ## #DQgH Qgr)   