
    sg                        d dl Z d dlmZ d dlZd dlmZ d dlmZmZ ddlm	Z	  e	j                  e      Z G d dej                        Z G d	 d
ej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d dej                        Z G d de      Zi ded ed!d"d#fd$ed%ed&ed'd(ifd)ed*ed+ed,ej4                  d-ed.ed/ed0ej6                  d1ed2ej8                  d3ej:                  d4ej<                  ej<                  ej>                  d5Z  ee       Z!d6 Z" e"d&      Z# e"d%      Z$ e"d      Z% e"d$      Z& e"d/      Z' e"d4      Z( e"d.      Z) e"d-      Z*y)7    N)OrderedDict)version)Tensornn   )loggingc                   2     e Zd ZdZ fdZdedefdZ xZS )PytorchGELUTanha  
    A fast C implementation of the tanh approximation of the GeLU activation function. See
    https://arxiv.org/abs/1606.08415.

    This implementation is equivalent to NewGELU and FastGELU but much faster. However, it is not an exact numerical
    match due to rounding errors.
    c                     t         |           t        j                  t        j
                        t        j                  d      k  rt        dt        j
                   d      y )Nz1.12.0zYou are using torch==zM, but torch>=1.12.0 is required to use PytorchGELUTanh. Please upgrade torch.)super__init__r   parsetorch__version__ImportErrorself	__class__s    K/var/www/html/venv/lib/python3.12/site-packages/transformers/activations.pyr   zPytorchGELUTanh.__init__%   sY    ==**+gmmH.EE'(9(9': ;9 9  F    inputreturnc                 D    t         j                  j                  |d      S )Ntanh)approximate)r   
functionalgelur   r   s     r   forwardzPytorchGELUTanh.forward-   s    }}!!%V!<<r   __name__
__module____qualname____doc__r   r   r   __classcell__r   s   @r   r
   r
      s     =V = =r   r
   c                        e Zd ZdZdedefdZy)NewGELUActivationz
    Implementation of the GELU activation function currently in Google BERT repo (identical to OpenAI GPT). Also see
    the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    r   r   c                     d|z  dt        j                  t        j                  dt        j                  z        |dt        j
                  |d      z  z   z        z   z  S )N      ?      ?       @Hm?g      @)r   r   mathsqrtpipowr   s     r   r   zNewGELUActivation.forward7   sP    U{cEJJtyytww/G5S[^c^g^ghmor^sSsKs/t$uuvvr   Nr!   r"   r#   r$   r   r    r   r   r(   r(   1   s    
wV w wr   r(   c                   J     e Zd ZdZddef fdZdedefdZdedefdZ xZ	S )	GELUActivationa  
    Original Implementation of the GELU activation function in Google BERT repo when initially created. For
    information: OpenAI GPT's GELU is slightly different (and gives slightly different results): 0.5 * x * (1 +
    torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) This is now written in C in nn.functional
    Also see the Gaussian Error Linear Units paper: https://arxiv.org/abs/1606.08415
    use_gelu_pythonc                     t         |           |r| j                  | _        y t        j
                  j                  | _        y N)r   r   _gelu_pythonactr   r   r   )r   r6   r   s     r   r   zGELUActivation.__init__C   s/    ((DH}}))DHr   r   r   c                 j    |dz  dt        j                  |t        j                  d      z        z   z  S )Nr*   r+   r,   )r   erfr.   r/   r   s     r   r9   zGELUActivation._gelu_pythonJ   s,    s{cEIIediin.D$EEFFr   c                 $    | j                  |      S r8   r:   r   s     r   r   zGELUActivation.forwardM       xxr   )F)
r!   r"   r#   r$   boolr   r   r9   r   r%   r&   s   @r   r5   r5   ;   s=    * *G& GV GV  r   r5   c                        e Zd ZdZdedefdZy)FastGELUActivationz}
    Applies GELU approximation that is slower than QuickGELU but more accurate. See: https://github.com/hendrycks/GELUs
    r   r   c                 \    d|z  dt        j                  |dz  dd|z  |z  z   z        z   z  S )Nr*   r+   g3E?r-   )r   r   r   s     r   r   zFastGELUActivation.forwardV   s:    U{cEJJu|/CsXX]M]`eMeGe/f$gghhr   Nr2   r3   r   r   rB   rB   Q   s    iV i ir   rB   c                        e Zd ZdZdedefdZy)QuickGELUActivationzr
    Applies GELU approximation that is fast but somewhat inaccurate. See: https://github.com/hendrycks/GELUs
    r   r   c                 8    |t        j                  d|z        z  S )NgZd;?)r   sigmoidr   s     r   r   zQuickGELUActivation.forward_   s    u}}UU]333r   Nr2   r3   r   r   rE   rE   Z   s    4V 4 4r   rE   c                   <     e Zd ZdZdedef fdZdedefdZ xZS )ClippedGELUActivationa  
    Clip the range of possible GeLU outputs between [min, max]. This is especially useful for quantization purpose, as
    it allows mapping negatives values in the GeLU spectrum. For more information on this trick, please refer to
    https://arxiv.org/abs/2004.09602.

    Gaussian Error Linear Unit. Original Implementation of the gelu activation function in Google Bert repo when
    initially created.

    For information: OpenAI GPT's gelu is slightly different (and gives slightly different results): 0.5 * x * (1 +
    torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))). See https://arxiv.org/abs/1606.08415
    minmaxc                 l    ||kD  rt        d| d| d      t        | 	          || _        || _        y )Nzmin should be < max (got min: z, max: ))
ValueErrorr   r   rJ   rK   )r   rJ   rK   r   s      r   r   zClippedGELUActivation.__init__p   s>    9=cU'#aPQQr   xr   c                 j    t        j                  t        |      | j                  | j                        S r8   )r   clipr   rJ   rK   )r   rO   s     r   r   zClippedGELUActivation.forwardx   s!    zz$q'488TXX66r   )	r!   r"   r#   r$   floatr   r   r   r%   r&   s   @r   rI   rI   c   s.    
E  7 7F 7r   rI   c                   2     e Zd ZdZ fdZdedefdZ xZS )AccurateGELUActivationz
    Applies GELU approximation that is faster than default and more accurate than QuickGELU. See:
    https://github.com/hendrycks/GELUs

    Implemented along with MEGA (Moving Average Equipped Gated Attention)
    c                 x    t         |           t        j                  dt        j                  z        | _        y )N   )r   r   r.   r/   r0   precomputed_constantr   s    r   r   zAccurateGELUActivation.__init__   s'    $(IIa$''k$:!r   r   r   c                     d|z  dt        j                  | j                  |dt        j                  |d      z  z   z        z   z  S )Nr*   r   r-      )r   r   rW   r1   r   s     r   r   zAccurateGELUActivation.forward   sE    U{a%**T-F-F%RZ]b]f]fglno]pRpJp-q"rrssr   r    r&   s   @r   rT   rT   |   s#    ;tV t tr   rT   c                   B     e Zd ZdZ fdZdedefdZdedefdZ xZS )MishActivationz
    See Mish: A Self-Regularized Non-Monotonic Activation Function (Misra., https://arxiv.org/abs/1908.08681). Also
    visit the official repository for the paper: https://github.com/digantamisra98/Mish
    c                     t         |           t        j                  t        j
                        t        j                  d      k  r| j                  | _        y t        j                  j                  | _        y )Nz1.9.0)r   r   r   r   r   r   _mish_pythonr:   r   r   mishr   s    r   r   zMishActivation.__init__   sL    ==**+gmmG.DD((DH}}))DHr   r   r   c                 l    |t        j                  t        j                  j	                  |            z  S r8   )r   r   r   r   softplusr   s     r   r]   zMishActivation._mish_python   s%    uzz"--"8"8"?@@@r   c                 $    | j                  |      S r8   r>   r   s     r   r   zMishActivation.forward   r?   r   )	r!   r"   r#   r$   r   r   r]   r   r%   r&   s   @r   r[   r[      s6    
*A& AV AV  r   r[   c                        e Zd ZdZdedefdZy)LinearActivationz[
    Applies the linear activation function, i.e. forwarding input directly to output.
    r   r   c                     |S r8   r3   r   s     r   r   zLinearActivation.forward   s    r   Nr2   r3   r   r   rc   rc      s    V  r   rc   c                       e Zd ZdZddZy)LaplaceActivationz
    Applies elementwise activation based on Laplace function, introduced in MEGA as an attention activation. See
    https://arxiv.org/abs/2209.10655

    Inspired by squared relu, but with bounded range and gradient for better stability
    c                     ||z
  j                  |t        j                  d      z        }ddt        j                  |      z   z  S )Nr,   r*   r+   )divr.   r/   r   r<   )r   r   musigmas       r   r   zLaplaceActivation.forward   s<      3!78cEIIe,,--r   N)g۞?g ^/?r!   r"   r#   r$   r   r3   r   r   rf   rf      s    .r   rf   c                       e Zd ZdZd Zy)ReLUSquaredActivationzX
    Applies the relu^2 activation introduced in https://arxiv.org/abs/2109.08668v2
    c                 n    t         j                  j                  |      }t        j                  |      }|S r8   )r   r   relur   square)r   r   relu_appliedsquareds       r   r   zReLUSquaredActivation.forward   s)    }}))%0,,|,r   Nrk   r3   r   r   rm   rm      s    r   rm   c                        e Zd Z fdZ xZS )ClassInstantierc                 d    t         |   |      }t        |t              r|n|i f\  }} |di |S )Nr3   )r   __getitem__
isinstancetuple)r   keycontentclskwargsr   s        r   rv   zClassInstantier.__getitem__   s6    '%c*!+GU!;g'2V}V}r   )r!   r"   r#   rv   r%   r&   s   @r   rt   rt      s     r   rt   r   gelu_10i
   )rJ   rK   	gelu_fastgelu_newgelu_pythonr6   Tgelu_pytorch_tanhgelu_accuratelaplace
leaky_relulinearr^   
quick_geluro   relu2relu6rG   silu)swishr   c           	      |    | t         v r	t         |    S t        d|  dt        t         j                                      )Nz	function z not found in ACT2FN mapping )ACT2FNKeyErrorlistkeys)activation_strings    r   get_activationr      sB    F"'((#4"55RSWX^XcXcXeSfRghiir   )+r.   collectionsr   r   	packagingr   r   r   utilsr   
get_loggerr!   loggerModuler
   r(   r5   rB   rE   rI   rT   r[   rc   rf   rm   rt   	LeakyReLUReLUReLU6SigmoidSiLUTanhACT2CLSr   r   r   r   r   r   r   r   r^   
linear_actr3   r   r   <module>r      sh    #     
		H	%=bii =*w		 wRYY ,i i4")) 47BII 72tRYY t RYY (ryy 
.		 
.BII k 
N%s2'>? # !	
 N%6$=>  +   ",,  N % BGG " RXX  rzz!" BGG#$ WWGG'* 
	!j ]+*%f;'	L)
ffH%
r   