
    sgfZ                     ~    d dl Z d dlmZmZ d dlZd dlZd dlmc mZ	 d dlmZm
Z
 dgZ G d dej                        Zy)    N)OptionalTuple)nnTensorMultiheadAttentionc                       e Zd Zej                  Z	 dgZ	 	 	 	 	 	 	 	 	 ddededede	de	de	d	e
e   d
e
e   de	ddf fdZd Zed        Zej                   j"                  d        Zed        Z	 	 	 	 	 ddededede
e   de	de
e   de	de	deee
e   f   fdZ	 	 	 	 	 ddededede
e   de	de
e   de	de	deee
e   f   fdZ xZS )r   batch_firstN	embed_dim	num_headsdropoutbiasadd_bias_kvadd_zero_attnkdimvdimreturnc                     |
|d}t        |   |||||||||	f	i | t        j                  | j                  | j                  fd|i|| _        t        j                  | j                  | j                  fd|i|| _        t        j                  | j                  | j                  fd|i|| _	        t        j                  | j                  | j                  fd|i|| _
        t        j                  j                  j                  j                         | _        t        j                  j                   j#                         | _        t        j                  j                   j#                         | _        t        j                  j                   j)                         | _        t        j                  j                   j)                         | _        t        j                  j                   j)                         | _        y )N)devicedtyper   )super__init__r   Linearr
   linear_Qr   linear_Kr   linear_Vout_projtorchao	quantizedFloatFunctionalq_scaling_productquantization	QuantStubquant_attn_outputquant_attn_output_weightsDeQuantStub	dequant_q	dequant_k	dequant_v)selfr
   r   r   r   r   r   r   r   r	   r   r   factory_kwargs	__class__s                ]/var/www/html/venv/lib/python3.12/site-packages/torch/ao/nn/quantizable/modules/activation.pyr   zMultiheadAttention.__init__>   s    %+U;	
 	
 		NNDNN
15
9G
 		IIt~~
,0
4B
 		IIt~~
,0
4B
 		$..$..^t^~^ "'!6!6!F!F!H "'!6!6!@!@!B).)>)>)H)H)J&..::<..::<..::<    c                      y)NQuantizableMultiheadAttention )r*   s    r-   	_get_namezMultiheadAttention._get_namep   s    .r.   c                 	   t        |      | j                  k(  sJ t        |d      sJ d        | |j                  |j                  |j
                  |j                  d u|j                  d u|j                  |j                  |j                  |j                  	      }|j                  |_        |j                  |_        |j                  |_        |j                  j                  |j                  _        |j                  j                   |j                  _        |j"                  r|j                  }d}||j                  z   }|j$                  ||d d f   }|-t&        j(                  j+                  ||| |j,                        }t&        j(                  j+                  ||j,                        |j.                  _        ||j.                  _        |j                  }|}||j                  z   }|j$                  ||d d f   }|-t&        j(                  j+                  ||| |j,                        }t&        j(                  j+                  ||j,                        |j0                  _        ||j0                  _        |j                  }|}|j$                  |d d d f   }|-t&        j(                  j+                  ||d  |j,                        }t&        j(                  j+                  ||j,                        |j2                  _        ||j2                  _        nt)        j*                  |j4                        |j.                  _        t)        j*                  |j6                        |j0                  _        t)        j*                  |j8                        |j2                  _        |j                  4d |j.                  _        d |j0                  _        d |j2                  _        nt)        j*                  |j                  d|j                         |j.                  _        t)        j*                  |j                  |j                  |j                  dz         |j0                  _        t)        j*                  |j                  |j                  dz  d        |j2                  _        |j;                          t&        j<                  j>                  jA                  |d      }|S )Nqconfigz$The float module must have 'qconfig'r      T)inplace)!type_FLOAT_MODULEhasattrr
   r   r   in_proj_biasbias_kr   r   r   r	   bias_vr4   r   weightr   _qkv_same_embed_dimin_proj_weightr   r   	Parameterrequires_gradr   r   r   q_proj_weightk_proj_weightv_proj_weightevalr   r"   prepare)clsotherobservedr   _start_endr=   s          r-   
from_floatzMultiheadAttention.from_floats   s   E{c/////ui(P*PP(OOOOMMt+\\%JJJJ

  ,,,, == $)>>#8#8 !&!4!4$$%%DFEOO+D))&+q.9Fxx))$vd*;T=O=OP',xx'9'9&&BVBV'WH$%)H"%%DFEOO+D))&+q.9Fxx))$vd*;T=O=OP',xx'9'9&&BVBV'WH$%)H"%%DF))&'1*5Fxx))$vw-9K9KL',xx'9'9&&BVBV'WH$%)H"')||E4G4G'HH$')||E4G4G'HH$')||E4G4G'HH$!!))-!!&)-!!&)-!!&)+&&q5??;*!!& *,&&u%//A:MO*!!& *,&&!(;'>?*!!& 	88((0040Hr.   c                    | j                  | j                  | j                  | j                  | j                  j                         d   du| j                  du| j                  | j                  | j                  | j                  	      }|j                  | j                  k(  sJ | j                  2t        j                  | j                  j                               |_        | j                  2t        j                  | j                  j                               |_        | j                   j                         \  }}t        j                  |j                               |j                   _        |$t        j                  |      |j                   _        | j                  j                         \  }}|j                         }| j&                  j                         \  }}|j                         }| j(                  j                         \  }}	|j                         }|j                  rd}
|
|j                  z   }||j*                  |
|ddf<   |j,                  t/        |dk(        sJ ||j,                  |
| |}
|
|j                  z   }||j*                  |
|ddf<   |j,                  t/        |dk(        sJ ||j,                  |
| |}
||j*                  |
dddf<   |j,                  t/        |	dk(        sJ |	|j,                  |
d |S t        j                  |      |_        t        j                  |      |_        t        j                  |      |_        |j,                  5d| j                  _        d| j&                  _        d| j(                  _        |S ||j,                  d|j                   ||j,                  |j                  |j                  dz   |	|j,                  |j                  dz  d |S )zUtility to convert the quantized MHA back to float.

        The motivation for this is that it is not trivial to conver the weights
        from the format that is used in the quantized version back to the
        float.
           Nr   r5   )r8   r
   r   r   r   _weight_biasr;   r   r   r   r	   r>   r   r@   
dequantizer<   r   r=   r   r   r   r?   r:   allrB   rC   rD   )r*   fpwbwQbQwKbKwVbVrJ   rK   s               r-   rP   zMultiheadAttention.dequantize   sC    NNNNLL]]'')!,D8[[$IIII

 %%)A)AAAA;;"T[[%;%;%=>BI;;"T[[%;%;%=>BI }}))+1\\!,,.9=!||ABKK++-B]]_++-B]]_++-B]]_!!FBLL(D02BfTk1n-*27|#|/1t,FBLL(D02BfTk1n-*27|#|/1t,F,.Bfgqj)*27|#|+-( 	  "||B/B!||B/B!||B/B&%)"%)"%)" 		 57BLL1EGq0@B8:!1 45	r.   c                     t        d      )NzdIt looks like you are trying to prepare an MHA module. Please, see the examples on quantizable MHAs.)NotImplementedError)rG   rH   s     r-   from_observedz MultiheadAttention.from_observed	  s    
 "0
 	
r.   querykeyvaluekey_padding_maskneed_weights	attn_maskaverage_attn_weights	is_causalc	           
      2    | j                  ||||||||      S )aj  
        Note::
            Please, refer to :func:`~torch.nn.MultiheadAttention.forward` for more
            information

        Args:
            query, key, value: map a query and a set of key-value pairs to an output.
                See "Attention Is All You Need" for more details.
            key_padding_mask: if provided, specified padding elements in the key will
                be ignored by the attention. When given a binary mask and a value is True,
                the corresponding value on the attention layer will be ignored.
            need_weights: output attn_output_weights.
            attn_mask: 2D or 3D mask that prevents attention to certain positions. A 2D mask will be broadcasted for all
                the batches while a 3D mask allows to specify a different mask for the entries of each batch.

        Shape:
            - Inputs:
            - query: :math:`(L, N, E)` where L is the target sequence length, N is the batch size, E is
              the embedding dimension. :math:`(N, L, E)` if ``batch_first`` is ``True``.
            - key: :math:`(S, N, E)`, where S is the source sequence length, N is the batch size, E is
              the embedding dimension. :math:`(N, S, E)` if ``batch_first`` is ``True``.
            - value: :math:`(S, N, E)` where S is the source sequence length, N is the batch size, E is
              the embedding dimension. :math:`(N, S, E)` if ``batch_first`` is ``True``.
            - key_padding_mask: :math:`(N, S)` where N is the batch size, S is the source sequence length.
              If a BoolTensor is provided, the positions with the
              value of ``True`` will be ignored while the position with the value of ``False`` will be unchanged.
            - attn_mask: 2D mask :math:`(L, S)` where L is the target sequence length, S is the source sequence length.
              3D mask :math:`(N*num_heads, L, S)` where N is the batch size, L is the target sequence length,
              S is the source sequence length. attn_mask ensure that position i is allowed to attend the unmasked
              positions. If a BoolTensor is provided, positions with ``True``
              is not allowed to attend while ``False`` values will be unchanged. If a FloatTensor
              is provided, it will be added to the attention weight.
            - is_causal: If specified, applies a causal mask as attention mask. Mutually exclusive with providing attn_mask.
              Default: ``False``.
            - average_attn_weights: If true, indicates that the returned ``attn_weights`` should be averaged across
              heads. Otherwise, ``attn_weights`` are provided separately per head. Note that this flag only has an
              effect when ``need_weights=True.``. Default: True (i.e. average weights across heads)

            - Outputs:
            - attn_output: :math:`(L, N, E)` where L is the target sequence length, N is the batch size,
              E is the embedding dimension. :math:`(N, L, E)` if ``batch_first`` is ``True``.
            - attn_output_weights: If ``average_attn_weights=True``, returns attention weights averaged
              across heads of shape :math:`(N, L, S)`, where N is the batch size, L is the target sequence length,
              S is the source sequence length. If ``average_attn_weights=False``, returns attention weights per
              head of shape :math:`(N, num_heads, L, S)`.
        )_forward_impl)	r*   r^   r_   r`   ra   rb   rc   rd   re   s	            r-   forwardzMultiheadAttention.forward  s0    r !! 	
 		
r.   c	                 2   d }	d }
||rt        d      |rt        d      | j                  rd |||fD        \  }}}|j                         \  }}}| j                  |k(  sJ |j                  d      |j                  d      k(  r#|j                  d      |j                  d      k(  sJ | j                  | j                  z  }|| j                  z  | j                  k(  sJ d       t        |      dz  }| j                  |      }| j                  |      }| j                  |      }| j                  j                  ||      }||j                  t        j                  k(  r6t        j                  dd	
       |j!                  t        j"                        }|j%                         s1|j                  t        j"                  k(  sJ d|j                          |j'                         dk(  rY|j)                  d      }t+        |j                               d|j                  d      |j                  d      gk7  rt-        d      |j'                         d	k(  rUt+        |j                               || j                  z  |j                  d      |j                  d      gk7  r(t-        d      t-        d|j'                          d      |S|j                  t        j                  k(  r6t        j                  dd	
       |j!                  t        j"                        }| j.                  | j0                  |	|
| j.                  }|J | j0                  }|J t        j2                  ||j5                  d|d      g      }t        j2                  ||j5                  d|d      g      }|t7        j8                  |d      }|Et7        j8                  |d      }n.|	J d       |
#J d       | j.                  J | j0                  J |j;                         j=                  ||| j                  z  |      j?                  dd      }|>|j;                         j=                  d|| j                  z  |      j?                  dd      }|>|j;                         j=                  d|| j                  z  |      j?                  dd      }|	;|	j                  d      || j                  z  k(  sJ |	j                  d      |k(  sJ |	}|
;|
j                  d      || j                  z  k(  sJ |
j                  d      |k(  sJ |
}|j                  d      }|,|j                  d      |k(  sJ |j                  d      |k(  sJ | j@                  ro|dz  }t        jB                  |j                  d      df|j                         dd  z         }|jD                  r>t        jF                  ||jI                         |jK                         |j                        }t        j2                  ||gd      }t        jB                  |j                  d      df|j                         dd  z         }|jD                  r>t        jF                  ||jI                         |jK                         |j                        }t        j2                  ||gd      }|t7        j8                  |d      }|t7        j8                  |d      }| jM                  |      }| jO                  |      }| jQ                  |      }t        jR                  ||j?                  dd            }t+        |j                               || j                  z  ||gk(  sJ |>|j                  t        j"                  k(  r|jU                  |t        d             n||z  }|w|j=                  || j                  ||      }|jW                  |j)                  d      j)                  d      t        d            }|j=                  || j                  z  ||      }t7        jX                  |d      }t7        jZ                  || jZ                  | j\                        }t        jR                  ||      }t+        |j                               || j                  z  ||gk(  sJ | j                  r|j=                  ||| j                        }n;|j?                  dd      j;                         j=                  ||| j                        }| j_                  |      }| ja                  |      }| jc                  |      }|r6|j=                  || j                  ||      }|r|je                  d      }||fS |d fS )Nz#Only allow causal mask or attn_maskz*causal mask not supported by AO MHA modulec              3   @   K   | ]  }|j                  d d        yw)r   rN   N)	transpose).0xs     r-   	<genexpr>z3MultiheadAttention._forward_impl.<locals>.<genexpr>r  s      PqQ!2 Ps   r   rN   z(embed_dim must be divisible by num_headsg      z^Byte tensor for `attn_mask` in `nn.MultiheadAttention` is deprecated. Use bool tensor instead.   )
stacklevelz;Only float and bool types are supported for attn_mask, not r5   z,The size of the 2D attn_mask is not correct.z,The size of the 3D attn_mask is not correct.zattn_mask's dimension z is not supportedzeByte tensor for `key_padding_mask` in `nn.MultiheadAttention` is deprecated. Use bool tensor instead.)r   rN   z#bias cannot be added to static key.z%bias cannot be added to static value.)dimz-inf)ptraining)3AssertionErrorr	   sizer
   r   floatr   r   r   r!   
mul_scalarr   r   uint8warningswarntoboolis_floating_pointrr   	unsqueezelistRuntimeErrorr;   r<   catrepeatFpad
contiguousviewrk   r   zerosis_quantizedquantize_per_tensorq_scaleq_zero_pointr'   r(   r)   bmmmasked_fill_masked_fillsoftmaxr   rt   r$   r   r%   mean)r*   r^   r_   r`   ra   rb   rc   rd   re   static_kstatic_vtgt_lenbszembed_dim_to_checkhead_dimscalingqkvr;   r<   src_lenk_zerosv_zerosattn_output_weightsattn_outputs                             r-   rg   z MultiheadAttention._forward_implX  s      Y !FGG !MNN PUC<O PE3+0::<((~~!3333xx{ejjm+uzz!}0LLL>>T^^3t~~%7	65	67/T)MM% MM#MM% ""--a9 %++-/ 
 &LL4	++-EJJ1N_LY__L]^_N }}!#%//2		()aA-LL&'UVVA%	()$..(JJqMHHQK. 
 ''UVV",Y]]_,==NO  ',<,B,Bekk,QMM+
  0225::>;;"t{{'>H$4 ))))))IIq&--3":;<IIq&--3":;<( !i 8I#/'(uu-=v'F$'N)NN''P)PP';;&&&;;&&&LLNt~~)=xHRRSTVWX=##Bdnn(<hGQQRSUVWA=##Bdnn(<hGQQRSUVWA==#sT^^';;;;==#x///A==#sT^^';;;;==#x///A&&)'#((+s222#((+w666qLGkk166!9a.1668AB<"?@G~~33QYY[!..*:AGG 		1g,A.Akk166!9a.1668AB<"?@G~~33QYY[!..*:AGG 		1g,A.A$EE)V4	+#$55)96#B  NN1NN1NN1#ii1;;q!+<=',,./$.. 4
 
 	
 
  %**,#00E&MJ#y0#'"5":":T^^Wg# #6"A"A **1-77:f# #6":":dnn$gw#  ii(;Dii4<<$--
 ii 3Q7K$$&'C$..,@'8+TTTT%**3HK %%a+gsDNN3  ,,[9mmK0"<<=PQ"5":":T^^Wg# $&9&>&>1&>&E# 333$$r.   )	g        TFFNNFNN)NTNTF)__name__
__module____qualname__r   r   r8   __constants__intrw   r}   r   r   r2   classmethodrL   r   jitunusedrP   r]   r   r   rh   rg   __classcell__)r,   s   @r-   r   r      s   ))M*V #OM !#""!0=0= 0= 	0=
 0= 0= 0= sm0= sm0= 0= 
0=d/ G GR YYI IV 
 
 .2!&*%)B
B
 B
 	B

 #6*B
 B
 F#B
 #B
 B
 
vx''	(B
R .2!&*%)N%N% N% 	N%
 #6*N% N% F#N% #N% N% 
vx''	(N%r.   )rz   typingr   r   r   	torch.jittorch.nn.functionalr   
functionalr   r   __all__r   r1   r.   r-   <module>r      s8     "       
 X%.. X%r.   