
    sgp                        d dl Z d dlmZmZ ddlmZ ddlmZmZ  ej                  e
      Z e       rd dlZ	 	 	 d dee   ded   d	ee   d
edef   fdZ	 	 	 d dee   ded   d	ee   d
edef   fdZ	 	 	 d dee   ded   d	ee   d
edef   fdZ	 d!deddd	ee   d
edef   fdZ	 d!deddd	ee   d
edef   fdZ	 d!deddd	ee   d
edef   fdZeeeeeedZ	 	 d"dedededee   dee   f
dZd!dedee   fdZd!dedee   fdZd!dedee   fdZd!dedee   fdZd!dedee   fdZd!dedee   fdZeeeeeedZd!dedee   fdZ y)#    N)OptionalTuple   )PretrainedConfig)is_torch_availableloggingconfigdeviceztorch.deviceseq_lenreturnztorch.Tensorc                    | t        |      dkD  rt        d| d|        t        |      dkD  r|d   }|d   }nZ| X| j                  }t        | d      r| j                  nd}t        | d| j                  | j                  z        }t        ||z        }d}dt        j                  dd	t        j                  
      j                         j                  |      |z  z  z  }	|	|fS )a  
    Computes the inverse frequencies according to the original RoPE implementation
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    r   zUnexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in `_compute_default_rope_parameters`, got `rope_kwargs`= and `config`=basedimpartial_rotary_factor      ?head_dim   dtype)len
ValueError
rope_thetahasattrr   getattrhidden_sizenum_attention_headsinttorcharangeint64floatto)
r	   r
   r   rope_kwargsr   r   r   r   attention_factorinv_freqs
             S/var/www/html/venv/lib/python3.12/site-packages/transformers/modeling_rope_utils.py _compute_default_rope_parametersr(      s
   * c+.2EEPMQ_`f_gi
 	
 ;!6"% 		  @GPg@h < <nq6:v/A/AVE_E_/_`(223 du||AsAU[[IOOQTTU[\_bbcdH%%%    c                     | t        |      dkD  rt        d| d|        t        |      dkD  r|d   }n| | j                  d   }t        | ||fi |\  }}|z  }||fS )a  
    Computes the inverse frequencies with linear scaling. Credits to the Reddit user /u/kaiokendev
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    r   zUnexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in `_compute_linear_scaling_rope_parameters`, got `rope_kwargs`=r   factor)r   r   rope_scalingr(   )r	   r
   r   r$   r+   r&   r%   s          r'   '_compute_linear_scaling_rope_parametersr-   G   s    * c+.2LLW=Xfgmfnp
 	
 ;!X&		$$X. "B&&RY!i]h!iH
 H%%%r)   c                 d   | t        |      dkD  rt        d| d|        t        |      dkD  r|d   }|d   }|d   }|d   }nu| s| j                  }t        | d      r| j                  nd	}t        | d
| j                  | j                  z        }	t        |	|z        }| j                  }| j                  d   }d	}
||kD  r|n}|z  |z  |dz
  z
  |dz
  z  z  z  }d	|t        j                  d|dt        j                        j                         j                  |      |z  z  z  }||
fS )a4  
    Computes the inverse frequencies with NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length, used to update the dynamic RoPE at inference time.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
    r   zUnexpected arguments: `**rope_kwargs` and `config` are mutually exclusive in `_compute_dynamic_ntk_parameters`, got `rope_kwargs`=r   r   r   max_position_embeddingsr+   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r/   r,   r   r    r!   r"   r#   )r	   r
   r   r$   r   r   r/   r+   r   r   r%   r&   s               r'   _compute_dynamic_ntk_parametersr0   p   s   , c+.2DDO=P^_e^fh
 	
 ;!6"% "-.G"HX&		  @GPg@h < <nq6:v/A/AVE_E_/_`(223"("@"@$$X. !,;R1RgXoG FW$'>>6A:NTW[^ab[bTcddDdu||AsAU[[IOOQTTU[\_bbcdH%%%r)   c                 N   t        |      dkD  rt        d|       | j                  }t        | d      r| j                  nd}t        | d| j                  | j                  z        }t        ||z        }| j                  }| j                  d   }	| j                  j                  d      }
|
dt        j                  |	      z  dz   }
| j                  j                  d	      xs d
}| j                  j                  d      xs d}d fd}d }|t        j                  d|d      j!                         j#                  |      |z  z  }d|z  }d|	|z  z  } ||||||      \  }}d ||||dz        j!                         j#                  |      z
  }|d|z
  z  ||z  z   }||
fS )a  
    Computes the inverse frequencies with NTK scaling. Please refer to the
    [original paper](https://arxiv.org/abs/2309.00071)
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    r   zYUnexpected arguments: `**rope_kwargs` should be unset in `_compute_yarn_parameters`, got r   r   r   r+   r%   g?	beta_fast    	beta_slowr   c                     |t        j                  || dz  t         j                  z  z        z  dt        j                  |      z  z  S )zPInverse dimension formula to find the dimension based on the number of rotationsr   )mathlogpi)num_rotationsr   r   r/   s       r'   find_correction_dimz5_compute_yarn_parameters.<locals>.find_correction_dim   sB    dhh6-!:Kdgg:UVWW\]`d`h`him`n\noor)   c                     t        j                   | |||            }t        j                   ||||            }t        |d      t	        ||dz
        fS )z.Find dimension range bounds based on rotationsr   r   )r6   floorceilmaxmin)low_rothigh_rotr   r   r/   lowhighr:   s          r'   find_correction_rangez7_compute_yarn_parameters.<locals>.find_correction_range   sU    jj,Wc4AXYZyy,XsDBYZ[3{CcAg...r)   c                     | |k(  r|dz  }t        j                  |t         j                        | z
  || z
  z  }t        j                  |dd      }|S )NgMbP?r   r   r   )r   r    float32clamp)r?   r>   r   linear_func	ramp_funcs        r'   linear_ramp_factorz4_compute_yarn_parameters.<locals>.linear_ramp_factor   sL    #:5LC||Cu}}=Cc	RKKQ2	r)   r   )r   r   r   r   r   r   r   r   r   r/   r,   getr6   r7   r   r    r"   r#   )r	   r
   r   r$   r   r   r   r   r/   r+   r%   r2   r4   rD   rJ   	pos_freqsinv_freq_extrapolationinv_freq_interpolationrB   rC   inv_freq_extrapolation_factorr&   r:   s                         @r'   _compute_yarn_parametersrP      s   ( ;!ghsgtu
 	
 D<CFLc<dF88jmvz6+=+=A[A[+[\H
h..
/C$<<  *F **../AB&!11C7 ##''4:I##''49Ip/ aa0668;;FCcIJI 9_ FY$67%iCG^_IC %&(:3cQh(O(U(U(W(Z(Z[a(b$b!!&C"CD
 #@
@	A 
 %%%r)   c                    t        |      dkD  rt        d|       | j                  }t        | d      r| j                  nd}t        | d| j                  | j                  z        }t        ||z        }| j                  d   }| j                  d   }	| j                  j                  d      }
| j                  j                  d	      }t        | d
      r<|r|| j                  k  r| j                  }n| j                  }| j                  }||z  }
n| j                  }||
z  }|I|
dk  rd}nAt        j                  dt        j                  |
      t        j                  |      z  z         }||kD  r't!        j"                  |t         j$                  |      }n&t!        j"                  |	t         j$                  |      }t!        j&                  d|dt         j(                  |      j+                         |z  }d|||z  z  z  }||fS )a  
    Computes the inverse frequencies with LongRoPE scaling. Please refer to the
    [original implementation](https://github.com/microsoft/LongRoPE)
    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    r   z]Unexpected arguments: `**rope_kwargs` should be unset in `_compute_longrope_parameters`, got r   r   r   long_factorshort_factorr+   r%    original_max_position_embeddingsr   )r   r
   r   )r   r   r   r   r   r   r   r   r   r,   rK   rT   r/   r6   sqrtr7   r   tensorrF   r    r!   r"   )r	   r
   r   r$   r   r   r   r   rR   rS   r+   r%    expanded_max_position_embeddingsr/   ext_factorsinv_freq_shaper&   s                    r'   _compute_longrope_parametersrZ      s   * ;!km
 	

 D<CFLc<dF88jmvz6+=+=A[A[+[\H
h..
/C%%m4K&&~6L  $$X.F**../AB
 v9:w!H!HH/5/V/V,/5/M/M,"("I"I14KK"("@"@+BV+K( S="#yyTXXf-=I`@a-a)ab (*AAll;emmFSll<u}}VT\\!S!5;;vNTTVY\\NkD.$889H%%%r)   c                    t        | ||fi |\  }}| j                  d   }| j                  d   }| j                  d   }| j                  d   }	|	|z  }
|	|z  }dt        j                  z  |z  }t	        j
                  ||
kD  ||z  |      }|	|z  |z
  ||z
  z  }d|z
  |z  |z  ||z  z   }||k   ||
kD   z  }t	        j
                  |||      }||fS )a  
    Computes the inverse frequencies for llama 3.1.

    Args:
        config ([`~transformers.PretrainedConfig`]):
            The model configuration.
        device (`torch.device`):
            The device to use for initialization of the inverse frequencies.
        seq_len (`int`, *optional*):
            The current sequence length. Unused for this type of RoPE.
        rope_kwargs (`Dict`, *optional*):
            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
    Returns:
        Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
        post-processing scaling factor applied to the computed cos/sin.
    r+   low_freq_factorhigh_freq_factorrT   r   r   )r(   r,   r6   r8   r   where)r	   r
   r   r$   r&   r%   r+   r\   r]   old_context_lenlow_freq_wavelenhigh_freq_wavelenwaveleninv_freq_llamasmooth_factorsmoothed_inv_freqis_medium_freqs                    r'   _compute_llama3_parametersrg   6  s#   ( "B&&RY!i]h!iH  *F))*;<O**+=>))*LMO&8'*::$''kH$G [[+;!;X=NPXYN$w.@EUXgEghM]*n<vEXfHff!223BR8R6SSN[[1BNSN+++r)   )defaultlineardynamicyarnlongropellama3	rope_typereceived_keysrequired_keysoptional_keysignore_keysc                     d|v r|dhz  }|j                  d       |||z  }||z
  }|rt        d|  d|       |	||z
  |z
  }n||z
  }|rt        j                  d|  d|        yy)zYCompare the received keys in `config.rope_scaling` against the expected and optional keystypern   Nz9Missing required keys in `rope_scaling` for 'rope_type'='z': z5Unrecognized keys in `rope_scaling` for 'rope_type'=')addKeyErrorloggerwarning)rn   ro   rp   rq   rr   missing_keysunused_keyss          r'   _check_received_keysr{   n  s     &!+& $ =0LRS\R]]`am`nopp #m3mC#m3NykY\]h\ijk r)   c                     | j                   }|j                  d|j                  dd             }dh}t        |j                               }t	        ||||       y )Nrn   rt   rr   )r,   rK   setkeysr{   )r	   rr   r,   rn   rp   ro   s         r'   !_validate_default_rope_parametersr     sT    &&L  l.>.>vt.LMI MM))+,MM=kZr)   c                 "   | j                   }|j                  d|j                  dd             }ddh}t        |j                               }t	        ||||       |d   }|t        |t              r|dk  rt        j                  d|        y y )Nrn   rt   r+   r}   r   8`rope_scaling`'s factor field must be a float >= 1, got 	r,   rK   r~   r   r{   
isinstancer"   rw   rx   )r	   rr   r,   rn   rp   ro   r+   s          r'   (_validate_linear_scaling_rope_parametersr     s    &&L  l.>.>vt.LMI (+M))+,MM=kZ(#F~Z6&3,QRXQYZ[ ;Gr)   c                 *   | j                   }|j                  d|j                  dd             }ddh}dh}t        |j                               }t	        |||||       |d   }|t        |t              r|dk  rt        j                  d|        y y )Nrn   rt   r+   rT   r}   r   r   r   )r	   rr   r,   rn   rp   rq   ro   r+   s           r'   )_validate_dynamic_scaling_rope_parametersr     s    &&L  l.>.>vt.LMI (+M78M))+,MM=-]hi(#F~Z6&3,QRXQYZ[ ;Gr)   c                    | j                   }|j                  d|j                  dd             }ddh}h d}t        |j                               }t	        |||||       |d   }|t        |t              r|dk  rt        j                  d|        |j                  d      }|-t        |t              r|d	k  rt        j                  d
|        |j                  d      }	|	(t        |	t              st        j                  d|	        |j                  d      }
|
(t        |
t              st        j                  d|
        |	xs d|
xs dk  rt        j                  d|	 d|
 d       y y )Nrn   rt   r+   >   r2   r4   r%   r}   r   r   r%   r   L`rope_scaling`'s attention_factor field must be a float greater than 0, got r2   z6`rope_scaling`'s beta_fast field must be a float, got r4   z6`rope_scaling`'s beta_slow field must be a float, got r3   r   zO`rope_scaling`'s beta_fast field must be greater than beta_slow, got beta_fast=z( (defaults to 32 if None) and beta_slow=z (defaults to 1 if None)r   )r	   rr   r,   rn   rp   rq   ro   r+   r%   r2   r4   s              r'   _validate_yarn_parametersr     s~   &&L  l.>.>vt.LMI (+MBM))+,MM=-]hi(#F~Z6&3,QRXQYZ[#''(:;#Z8H%-PTdghThZ[kZlm	
   -IZ	5%AOPY{[\  -IZ	5%AOPY{[\RIN+]^g]h i66?[@XZ	
 ,r)   c                    | j                   }|j                  d|j                  dd             }h d}h d}t        |j                               }t	        |||||       t        | d      r| j                  nd}t        | d| j                  | j                  z        }t        ||z        }	|j                  d	      }
t        |
t              s*t        d
 |
D              rt        j                  d|
        t!        |
      |	dz  k(  s't        j                  d|	dz   dt!        |
              |j                  d      }t        |t              s*t        d |D              rt        j                  d|        t!        |      |	dz  k(  s't        j                  d|	dz   dt!        |              t        | d      rt        j#                  d       y |j                  d      }|t        j                  d       n-t        |t$              r|dk  rt        j                  d|        |j                  d      }|/t        |t$              r|dk  rt        j                  d|        y y y )Nrn   rt   >   rn   rR   rS   >   r+   r%   rT   r}   r   r   r   rS   c              3   H   K   | ]  }t        |t        t        f        y wNr   r   r"   .0xs     r'   	<genexpr>z0_validate_longrope_parameters.<locals>.<genexpr>  s     1dRS*Qe2M1d    "zC`rope_scaling`'s short_factor field must be a list of numbers, got r   z5`rope_scaling`'s short_factor field must have length z, got rR   c              3   H   K   | ]  }t        |t        t        f        y wr   r   r   s     r'   r   z0_validate_longrope_parameters.<locals>.<genexpr>  s     0bQRAU|1L0br   zB`rope_scaling`'s long_factor field must be a list of numbers, got z4`rope_scaling`'s long_factor field must have length rT   aY  This model has set a `original_max_position_embeddings` field, to be used together with `max_position_embeddings` to determine a scaling factor. Please set the `factor` field of `rope_scaling`with this ratio instead -- we recommend the use of this field over `original_max_position_embeddings`, as it is compatible with most model architectures.r+   z1Missing required keys in `rope_scaling`: 'factor'r   r%   g        r   )r,   rK   r~   r   r{   r   r   r   r   r   r   r   listallrw   rx   r   warning_oncer"   )r	   rr   r,   rn   rp   rq   ro   r   r   r   rS   rR   r+   r%   s                 r'   _validate_longrope_parametersr     sI   &&L  l.>.>vt.LMI@MVM))+,MM=-]hi<CFLc<dF88jmvz6+=+=A[A[+[\H
h..
/C##N3LlD)c1dWc1d.d\]i\jkl|q(NsVWxjX^_bco_p^qrs""=1Kk4(S0bVa0b-b[\g[hij{sax'McUVhZW]^abm^n]opq
 v9:A	
 !!(+>NNNOFE*fslNNUV\U]^_'++,>?'.6:JS:Pbcsbtu ;Q (r)   c                    | j                   }|j                  d|j                  dd             }h d}t        |j                               }t	        ||||       |d   }|t        |t              r|dk  rt        j                  d|        |d   }|d	   }|t        |t              st        j                  d
|        |t        |t              st        j                  d|        ||k  rt        j                  d| d|        |d   }	|	t        |	t              st        j                  d|	        |	| j                  k\  r&t        j                  d|	 d| j                          y y )Nrn   rt   >   r+   rn   r\   r]   rT   r}   r+   r   r   r\   r]   z<`rope_scaling`'s low_freq_factor field must be a float, got z=`rope_scaling`'s high_freq_factor field must be a float, got zc`rope_scaling`'s high_freq_factor field must be greater than low_freq_factor, got high_freq_factor=z and low_freq_factor=rT   zP`rope_scaling`'s original_max_position_embeddings field must be an integer, got zg`rope_scaling`'s original_max_position_embeddings field must be less than max_position_embeddings, got z and max_position_embeddings=)r,   rK   r~   r   r{   r   r"   rw   rx   r   r/   )
r	   rr   r,   rn   rp   ro   r+   r\   r]   rT   s
             r'   _validate_llama3_parametersr     s   &&L  l.>.>vt.LMIvM))+,MM=kZ(#F~Z6&3,QRXQYZ["#45O#$67j%&HUVeUfghz2BE'JVWgVhij?*q  5o5FH	

 (44V'W$'/zBbdg7h^/02	
 (6+I+IIu/00MfNlNlMmo	
 Jr)   c                     t        | dd      }|y|j                  d|j                  dd            }t        j                  |      }| || |       yt        j	                  d| d       y)	zO
    Validate the RoPE config arguments, given a `PretrainedConfig` object
    r,   Nrn   rt   rh   r}   zTMissing validation function mapping in `ROPE_VALIDATION_FUNCTIONS` for 'rope_type'='')r   rK   ROPE_VALIDATION_FUNCTIONSrw   rx   )r	   rr   r,   rn   validation_fns        r'   rope_config_validationr   -  sw     6>48L   l.>.>vy.QRI-11)<M f+6bclbmmno	
r)   )NNNr   )NN)!r6   typingr   r   configuration_utilsr   utilsr   r   
get_logger__name__rw   r   r   r"   r(   r-   r0   rP   rZ   rg   ROPE_INIT_FUNCTIONSstrr~   r{   r   r   r   r   r   r   r   r    r)   r'   <module>r      s    " 1 . 
		H	%  *.'+!'&%&'&^$'& c]'&
 >5 !'&V *.'+!&&%&&&^$&& c]&&
 >5 !&&T *.'+!0&%&0&^$0& c]0&
 >5 !0&h PTL&L&&4L&?G}L&
>5 !L&` PTA&A&&4A&?G}A&
>5 !A&J PT(,(,&4(,?G}(,
>5 !(,^ 05.$,(  $(!%lll l C=	l
 #l:[.> [XVY] [	\5E 	\T\]`Ta 	\\6F \U]^aUb \
&6 
Xc] 
>/*: /RU /d!
(8 !
xPS} !
L 168%-) 
#3 
(3- 
r)   