
    sg              
       j   U d Z ddlZddlmZmZmZ ddlZg Zee   e	d<   dej                  defdZdej                  d	edej                  fd
Zdedee   defdZg dZdeeej$                  f   defdZ	 	 	 	 ddej                  dej                  dej                  deej                     fdZy)zCDefines utilities for interacting with scaled_dot_product_attention    N)ListOptionalUnion__all__tensorsreturnc                  &    t        d | D              S )z0Returns True if any of the tensors requires gradc              3   4   K   | ]  }|j                     y w)N)requires_grad).0ts     L/var/www/html/venv/lib/python3.12/site-packages/torch/nn/attention/_utils.py	<genexpr>z'_input_requires_grad.<locals>.<genexpr>   s     01q0s   )any)r   s    r   _input_requires_gradr      s    0000    inpt_tensorog_sizec                 @    | j                  d      |k7  r	| dd|f   S | S )z'Handles the unpad of the last dimension.N)size)r   r   s     r   _postprocess_flash_outputr      s.    w&3=))r   head_dim_sizescalec                 :    ||S dt        j                  |       z  S )z
    For FlashAttention we pad the head dimension to be a multiple of 8 so we need to scale the output
    by the original head size and not the padded.
    g      ?)mathsqrt)r   r   s     r   _calculate_scaler      s#    
 =)))r   )
                @         i   i   nc                     | t         v S )z:Returns true if the head dim is supported by FlexAttention)_SUPPORTED_HEAD_DIMS)r'   s    r   _supported_head_dimr*   %   s    $$$r   querykeyvalue	attn_maskc           	         | j                   |j                   k7  s| j                   |j                   k7  r3t        d| j                    d|j                    d|j                    d      | j                  |j                  k7  s| j                  |j                  k7  r3t        d| j                   d|j                   d|j                   d      | j                         dk  s&|j                         dk  s|j                         dk  r?t        d	| j                          d
|j                          d|j                          d      y )NzLExpected query, key, and value to have the same dtype, but got query.dtype: z, key.dtype: z, and value.dtype: z	 instead.zSExpected query, key, and value to have the same device type, but got query.device: z, key.device: z, and value.device: r   zUExpected query, key, and value to all be  at least 2 dimensional, but got query.dim: z, key.dim: z and value.dim: )dtype
ValueErrordevicedim)r+   r,   r-   r.   	dropout_p	is_causalr   s          r   _validate_sdpa_inputr6   *   s"    {{cii5;;%++#=$$)KK=cii[ I  %}I7
 	

 ||szz!U\\U\\%A%%*\\N. M!!&i9
 	

 yy{Q#'')a-599;?cyy{m;swwyk1A%))+iY
 	
 ,;r   )Ng        FN)__doc__r   typingr   r   r   torchr   str__annotations__Tensorboolr   intr   floatr   r)   SymIntr*   r6    r   r   <module>rB      s    I  ( (  c 15<< 1D 1
5<< # %,, *C * *E * B %5ell!23 % % )-

<<
	
 <<
 %	
r   