
    sg                     (   d dl mZmZmZmZ d dlZd dlmZ d dl	Z	d dl
mZ d dlZd dlmZmZmZ d dlmZ d dlmZmZ ddlmZmZmZmZ ddlmZmZmZm Z  dd	l!m"Z"m#Z# d
dl$m%Z% ejL                  jN                   G d de             Z(dZ)dZ*dee+e+f   dejX                  fdZ-ej\                  fdZ/ G d dej`                        Z1 G d dej`                        Z2 G d dej`                        Z3 G d dej`                        Z4 G d dej`                        Z5 G d dej`                        Z6 G d  d!ej`                        Z7 G d" d#ej`                        Z8 G d$ d%ej`                        Z9 G d& d'ej`                        Z: G d( d)ej`                        Z; G d* d+ej`                        Z< G d, d-e      Z= G d. d/ej`                        Z> G d0 d1ej`                        Z? e"d2e)       G d3 d4e=             Z@d5ZA e e@eA        ee@e(e%6        G d7 d8ej`                        ZB e"d9e)       G d: d;e=             ZCd<ZD e eCeD        eeCee%6        G d= d>ej`                        ZE e"d?e)       G d@ dAe=             ZFdBZG e eFeG        eeFee%6       y)C    )CallableListOptionalTupleN)
FrozenDictfreezeunfreeze)dot_product_attention_weights)flatten_dictunflatten_dict   )FlaxBaseModelOutputFlaxBaseModelOutputWithPoolingFlaxMaskedLMOutputFlaxSequenceClassifierOutput)ACT2FNFlaxPreTrainedModel append_replace_return_docstringsoverwrite_call_docstring)add_start_docstrings%add_start_docstrings_to_model_forward   )
BeitConfigc                       e Zd ZdZy)FlaxBeitModelOutputWithPoolinga  
    Class for outputs of [`FlaxBeitModel`].

    Args:
        last_hidden_state (`jnp.ndarray` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        pooler_output (`jnp.ndarray` of shape `(batch_size, hidden_size)`):
            Average of the last layer hidden states of the patch tokens (excluding the *[CLS]* token) if
            *config.use_mean_pooling* is set to True. If set to False, then the final hidden state of the *[CLS]* token
            will be returned.
        hidden_states (`tuple(jnp.ndarray)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `jnp.ndarray` (one for the output of the embeddings + one for the output of each layer) of shape
            `(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer plus
            the initial embedding outputs.
        attentions (`tuple(jnp.ndarray)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `jnp.ndarray` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in
            the self-attention heads.
    N)__name__
__module____qualname____doc__     ^/var/www/html/venv/lib/python3.12/site-packages/transformers/models/beit/modeling_flax_beit.pyr   r   ,   s    r!   r   a  

    This model inherits from [`FlaxPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading, saving and converting weights from PyTorch models)

    This model is also a
    [flax.linen.Module](https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html) subclass. Use it as
    a regular Flax linen Module and refer to the Flax documentation for all matter related to general usage and
    behavior.

    Finally, this model supports inherent JAX features such as:

    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        config ([`BeitConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~FlaxPreTrainedModel.from_pretrained`] method to load the model weights.
        dtype (`jax.numpy.dtype`, *optional*, defaults to `jax.numpy.float32`):
            The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and
            `jax.numpy.bfloat16` (on TPUs).

            This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If
            specified all the computation will be performed with the given `dtype`.

            **Note that this only specifies the dtype of the computation and does not influence the dtype of model
            parameters.**

            If you wish to change the dtype of the model parameters, see [`~FlaxPreTrainedModel.to_fp16`] and
            [`~FlaxPreTrainedModel.to_bf16`].
a  
    Args:
        pixel_values (`numpy.ndarray` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`AutoImageProcessor.__call__`] for details.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
window_sizereturnc                    d| d   z  dz
  d| d   z  dz
  z  dz   }t        j                  | d         }t        j                  | d         }t        j                  t        j                  ||d            }t        j                  |d      }|dddddf   |dddddf   z
  }t        j
                  |d	      }|dddddfxx   | d   dz
  z  cc<   |dddddfxx   | d   dz
  z  cc<   |dddddfxx   d| d   z  dz
  z  cc<   t        j                  | d   | d   z  dz   fdz  |j                  
      }|j                  d      |ddddf<   |dz
  |dddf<   |dz
  |dddf<   |dz
  |d<   t        j                  |      S )zP
    get pair-wise relative position index for each token inside the window
       r   r   r   ij)indexing)r&   N)r   r&   r   shapedtyper)   )r   r   )nparangestackmeshgridreshape	transposezerosr,   sumjnparray)r#   num_relative_distancecoords_hcoords_wcoordscoords_flattenrelative_coordsrelative_position_indexs           r"   relative_position_index_initr>   w   s    Q/!3KN8JQ8NORSSyyQ(HyyQ(HXXbkk(HtDEFZZ0N$Q4Z0>!T1*3MMOll?I>OAq!GA 22Aq!GA 22Aq!GKN 2Q 66 hhk!n{1~.MPQ.Q-SVW-W_n_t_tu&5&9&9"&=ABF#%:Q%>AqrE"%:Q%>ABE"$9A$=D!99,--r!   c                 4    t        j                  ||      |z  S N)r5   ones)keyr+   scaler,   s       r"   ones_with_scalerD      s    88E5!E))r!   c                   b    e Zd ZU dZeed<   ej                  j                  dde	e
   fd       Zy)FlaxBeitDropPathzXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).ratedeterministicc                 N   | j                   dk(  r|S d| j                   z
  }|r|S |j                  d   fd|j                  dz
  z  z   }| j                  d      }|t        j
                  j                  |||j                        z   }t        j                  |      }||z  |z  }|S )N        g      ?r   )r   r   droppathr*   )
rG   r+   ndimmake_rngjaxrandomuniformr,   r5   floor)	selfinputsrH   	keep_probr+   rngrandom_tensorbinary_tensoroutputs	            r"   __call__zFlaxBeitDropPath.__call__   s    99M$))O	M\\!_&q)AAE--
+C%

(:(:3eSYS_S_(:(``MIIm4Mi'-7FMr!   NT)r   r   r   r   float__annotations__nnmodulecompactr   boolrY   r    r!   r"   rF   rF      s1    b
KYYhtn  r!   rF   c                   Z    e Zd ZU eed<   ej                  Zej                  ed<   d Zd Z	y)FlaxBeitPatchEmbeddingsconfigr,   c           
         | j                   j                  | _        | j                   j                  }| j                   j                  }||z  ||z  z  }||z  ||z  f}|| _        || _        t        j                  | j                   j                  ||f||fd| j                  t        j                  j                  j                  | j                   j                              | _        y )NVALID)kernel_sizestridespaddingr,   kernel_init)rc   num_channels
image_size
patch_sizenum_patchespatch_shaper]   Convhidden_sizer,   rN   initializersnormalinitializer_range
projection)rR   rk   rl   rm   rn   s        r"   setupzFlaxBeitPatchEmbeddings.setup   s     KK44[[++
[[++
!Z/J*4LM!Z/z1IJ&&''KK###Z0,**++224;;3P3PQ
r!   c                     |j                   d   }|| j                  k7  rt        d      | j                  |      }|j                   \  }}}}t	        j
                  ||d|f      S )Nr)   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r+   rj   
ValueErrorrt   r5   r1   )rR   pixel_valuesrj   
embeddings
batch_size_channelss          r"   rY   z FlaxBeitPatchEmbeddings.__call__   sl    #))"-4,,,w  __\2
%/%5%5"
Aq({{:
B'ABBr!   N
r   r   r   r   r\   r5   float32r,   ru   rY   r    r!   r"   rb   rb      s%    {{E399"
"Cr!   rb   c                   `    e Zd ZU dZeed<   ej                  Zej                  ed<   d Z	ddZ
y)FlaxBeitEmbeddingsz7Construct the CLS token, position and patch embeddings.rc   r,   c                    | j                  dt        j                  j                  dd| j                  j
                  f      | _        | j                  j                  rG| j                  dt        j                  j                  dd| j                  j
                  f      | _        t        | j                  | j                        | _        | j                  j                  }| j                  j                  rJ| j                  dt        j                  j                  d|dz   | j                  j
                  f      | _        t        j                  | j                  j                         | _        y )N	cls_tokenr   
mask_tokenr,   position_embeddingsrG   )paramr]   rq   r3   rc   rp   r   use_mask_tokenr   rb   r,   patch_embeddingsrm    use_absolute_position_embeddingsr   Dropouthidden_dropout_probdropout)rR   rm   s     r"   ru   zFlaxBeitEmbeddings.setup   s    K1F1FAt{{OfOfHgh;;%%"jjr7L7LqRSUYU`U`UlUlNmnDO 74:: V++77;;77'+zz%r'<'<q+PQ/SWS^S^SjSj>k(D$ zzt{{'F'FGr!   Nc                    | j                  |      }|j                  \  }}}t        j                  | j                  |d| j
                  j                  f      }|j                  |j                        }|wt        j                  | j                  ||| j
                  j                  f      }	|	j                  |j                        }	t        j                  |d      }
|d|
z
  z  |	|
z  z   }t        j                  ||fd      }| j
                  j                  r(|| j                  j                  |j                        z   }| j                  ||      }|S )Nr   r)   axisrH   )r   r+   r5   broadcast_tor   rc   rp   astyper,   r   expand_dimsconcatenater   r   r   )rR   rx   bool_masked_posrH   ry   rz   seq_lenr{   
cls_tokensmask_tokensws              r"   rY   zFlaxBeitEmbeddings.__call__   s)   **<8
!+!1!1
GQ%%dnnz1dkkF]F]6^_
&&z'7'78
&**4??ZRVR]R]RiRi<jkK%,,Z-=-=>Kb9A#q1u-a?J__j*%=AF
;;77#d&>&>&E&EjFVFV&WWJ\\*M\J
r!   )NT)r   r   r   r   r   r\   r5   r~   r,   ru   rY   r    r!   r"   r   r      s(    A{{E399"
Hr!   r   c                   n    e Zd ZU eed<   eeef   ed<   ej                  Z	ej                  ed<   d Z
d Zy)FlaxBeitRelativePositionBiasrc   r#   r,   c                     d| j                   d   z  dz
  d| j                   d   z  dz
  z  dz   }| j                  dt        j                  j                  || j
                  j                  f      | _        t        | j                         | _	        y )Nr&   r   r   r   relative_position_bias_table)
r#   r   r]   rq   r3   rc   num_attention_headsr   r>   r=   )rR   r7   s     r"   ru   z"FlaxBeitRelativePositionBias.setup   s    !"T%5%5a%8!81!<TEUEUVWEXAX[\A\ ]`a a,0JJ*OO!!"DKK$C$CD-
) (DDDTDT'U$r!   c                 *   | j                   j                  d      }| j                  d   | j                  d   z  dz   | j                  d   | j                  d   z  dz   df}| j                  |   j                  |      }t	        j
                  |d      S )Nr)   r   r   )r&   r   r   )r=   r1   r#   r   r5   r2   )rR   indexr+   relative_position_biass       r"   rY   z%FlaxBeitRelativePositionBias.__call__  s    ,,44R8!!!$t'7'7'::Q>@P@PQR@SVZVfVfghVi@ilm@moqr!%!B!B5!I!Q!QRW!X}}3Y??r!   N)r   r   r   r   r\   r   intr5   r~   r,   ru   rY   r    r!   r"   r   r      s4    sCx {{E399"	V@r!   r   c                   |    e Zd ZU eed<   eeef   ed<   ej                  Z	ej                  ed<   d Z
	 d	dedefdZy)
FlaxBeitSelfAttentionrc   r#   r,   c                 V   | j                   j                  | j                   j                  z  dk7  rQt        | j                   d      s;t	        d| j                   j                  f d| j                   j                   d      t        j                  | j                   j                  | j                  t        j
                  j                  j                  | j                   j                              | _        t        j                  | j                   j                  | j                  t        j
                  j                  j                  | j                   j                        d      | _        t        j                  | j                   j                  | j                  t        j
                  j                  j                  | j                   j                              | _        | j                  r2t!        | j                   | j                  | j                  	      | _        y d | _        y )
Nr   embedding_sizezThe hidden size z4 is not a multiple of the number of attention heads .)r,   ri   F)r,   ri   use_biasr#   r,   )rc   rp   r   hasattrrw   r]   Denser,   rN   rq   rr   rs   queryrB   valuer#   r   r   rR   s    r"   ru   zFlaxBeitSelfAttention.setup  s   ;;""T[[%D%DDIRYKK)S
 "4;;#:#:#;"< =889< 
 XXKK##**++224;;3P3PQ


 88KK##**++224;;3P3PQ	
 XXKK##**++224;;3P3PQ

  )$BRBRZ^ZdZde 	#  	#r!   NrH   output_attentionsc                 b   | j                   j                  | j                   j                  z  }| j                  |      j	                  |j
                  d d | j                   j                  |fz         }| j                  |      j	                  |j
                  d d | j                   j                  |fz         }| j                  |      j	                  |j
                  d d | j                   j                  |fz         }d }	|s*| j                   j                  dkD  r| j                  d      }	t        j                  d| j                        }
| j                  ?t        j                  | j                         d      }
|
j                  |j                        }
||
|j                  |
j                        z   }
t!        |||
|	| j                   j                  d|| j                  d 	      }t        j"                  d||      }|j	                  |j
                  d d d	z         }|r||f}|S |f}|S )
Nr&   rJ   r   r   r   T)biasdropout_rngdropout_ratebroadcast_dropoutrH   r,   	precisionz...hqk,...khd->...qhd)r)   )rc   rp   r   r   r1   r+   r   rB   attention_probs_dropout_probrM   r5   r6   r,   r   r   r   r
   einsum)rR   hidden_statesr   rH   r   head_dimquery_statesvalue_states
key_statesr   attention_biasattn_weightsattn_outputoutputss                 r"   rY   zFlaxBeitSelfAttention.__call__-  s    ;;**dkk.M.MMzz-088#t{{'F'F&QQ
 zz-088#t{{'F'F&QQ
 XXm,44#t{{'F'F&QQ

 !I!IC!O--	2K3djj9&&2 __T-H-H-JANN+22<3E3EFN "-+.D.K.KNL`L`.aaN4#AA"'**

 jj!8,U!))+*;*;BQ*?%*GH1B;- JUr!   NTFr   r   r   r   r\   r   r   r5   r~   r,   ru   r`   rY   r    r!   r"   r   r     sJ    sCx {{E399"
B qv-IM-im-r!   r   c                   b    e Zd ZU eed<   ej                  Zej                  ed<   d Zdde	fdZ
y)FlaxBeitSelfOutputrc   r,   c                 N   t        j                  | j                  j                  t        j                   j
                  j                  | j                  j                        | j                        | _	        t        j                  | j                  j                        | _        y Nri   r,   r   r]   r   rc   rp   rN   rq   rr   rs   r,   denser   r   r   r   s    r"   ru   zFlaxBeitSelfOutput.setupa  d    XXKK##++224;;3P3PQ**


 zzt{{'F'FGr!   rH   c                 N    | j                  |      }| j                  ||      }|S Nr   r   r   rR   r   rH   s      r"   rY   zFlaxBeitSelfOutput.__call__i  s(    

=1]-Pr!   NrZ   r   r   r   r   r\   r5   r~   r,   ru   r`   rY   r    r!   r"   r   r   ]  s,    {{E399"HT r!   r   c                   x    e Zd ZU eed<   eeef   ed<   ej                  Z	ej                  ed<   d Z
	 ddefdZy)	FlaxBeitAttentionrc   r#   r,   c                     t        | j                  | j                  | j                        | _        t        | j                  | j                        | _        y )Nr   )r   rc   r#   r,   	attentionr   rX   r   s    r"   ru   zFlaxBeitAttention.setupt  s9    .t{{D<L<LTXT^T^_(DJJGr!   Nr   c                 |    | j                  ||||      }|d   }| j                  ||      }|f}|r	||d   fz  }|S NrH   r   r   r   r   )r   rX   )rR   r   r   rH   r   attn_outputsr   r   s           r"   rY   zFlaxBeitAttention.__call__x  s_     ~~1bs & 
 #1okk+]kK.Q))Gr!   r   r   r    r!   r"   r   r   o  sB    sCx {{E399"H
 inaer!   r   c                   Z    e Zd ZU eed<   ej                  Zej                  ed<   d Zd Z	y)FlaxBeitIntermediaterc   r,   c                 4   t        j                  | j                  j                  t        j                   j
                  j                  | j                  j                        | j                        | _	        t        | j                  j                     | _        y )Nr   )r]   r   rc   intermediate_sizerN   rq   rr   rs   r,   r   r   
hidden_act
activationr   s    r"   ru   zFlaxBeitIntermediate.setup  s`    XXKK))++224;;3P3PQ**


 !!7!78r!   c                 J    | j                  |      }| j                  |      }|S r@   )r   r   )rR   r   s     r"   rY   zFlaxBeitIntermediate.__call__  s$    

=16r!   Nr}   r    r!   r"   r   r     s$    {{E399"9r!   r   c                   b    e Zd ZU eed<   ej                  Zej                  ed<   d Zdde	fdZ
y)FlaxBeitOutputrc   r,   c                 N   t        j                  | j                  j                  t        j                   j
                  j                  | j                  j                        | j                        | _	        t        j                  | j                  j                        | _        y r   r   r   s    r"   ru   zFlaxBeitOutput.setup  r   r!   rH   c                 N    | j                  |      }| j                  ||      }|S r   r   r   s      r"   rY   zFlaxBeitOutput.__call__  s(    

=1]-Pr!   NrZ   r   r    r!   r"   r   r     s,    {{E399"HT r!   r   c                       e Zd ZU eed<   eeef   ed<   eed<   ej                  Z
ej                  ed<   d Z	 d
dedefd	Zy)FlaxBeitLayerrc   r#   drop_path_rater,   c                 z   t        | j                  | j                  | j                        | _        t        | j                  | j                        | _        t        | j                  | j                        | _        t        j                  | j                  j                  | j                        | _        t        | j                        | _        t        j                  | j                  j                  | j                        | _        | j                  j"                  | _        | j$                  dkD  rw| j'                  dt(        | j                  j*                  | j$                        | _        | j'                  dt(        | j                  j*                  | j$                        | _        y d | _        d | _        y )Nr   epsilonr,   r   r   lambda_1lambda_2)r   rc   r#   r,   r   r   intermediater   rX   r]   	LayerNormlayer_norm_epslayernorm_beforerF   r   	drop_pathlayernorm_afterlayer_scale_init_valueinit_valuesr   rD   rp   r   r   r   s    r"   ru   zFlaxBeitLayer.setup  s&   *4;;8H8HPTPZPZ[0DJJO$T[[

C "T[[5O5OW[WaWa b)t/B/BC!||DKK4N4NVZV`V`a;;==a JJz?T[[E\E\_c_o_opDM JJz?T[[E\E\_c_o_opDM DM DMr!   NrH   r   c                    | j                  | j                  |      |||      }|d   }| j                  (| j                  j                  |j                        |z  }| j                  ||      |z   }| j                  |      }| j                  |      }| j                  ||      }| j                  (| j                  j                  |j                        |z  }| j                  ||      |z   }|f}|r	||d   fz  }|S r   )
r   r   r   r   r,   r   r   r   rX   r   )	rR   r   r   rH   r   self_attention_outputsattention_outputlayer_outputr   s	            r"   rY   zFlaxBeitLayer.__call__  s     "&!!-0"'/	 "0 "
 2!4 ==$#}}334D4J4JKN^^ '7}UXee ++M:((6{{<}{M ==$==//0B0BClRL ~~l-~PS``/.q133Gr!   r   )r   r   r   r   r\   r   r   r[   r5   r~   r,   ru   r`   rY   r    r!   r"   r   r     sO    sCx {{E399"!" qv$IM$im$r!   r   c            	           e Zd ZU eed<   eeef   ed<   ee   ed<   e	g e
j                  f   ed<   e
j                  Ze
j                  ed<   d Z	 	 	 	 ddeded	ed
efdZy)FlaxBeitLayerCollectionrc   r#   drop_path_ratesr   r,   c                 &   t        | j                  j                        D cg c]^  }t        | j                  | j                  j                  r| j
                  nd | j                  |   t        |      | j                        ` c}| _	        y c c}w )N)r#   r   namer,   )
rangerc   num_hidden_layersr   use_relative_position_biasr#   r   strr,   layers)rR   is     r"   ru   zFlaxBeitLayerCollection.setup  st     4;;889	
  040V0VD,,\`#33A6Vjj	
 	
s   A#BrH   r   output_hidden_statesreturn_dictc                 4   |rdnd }|rdnd }t        | j                        D ]H  \  }}	|r||fz  }| j                  | j                         nd }
 |	||
||      }|d   }|s@||d   fz  }J |r||fz  }|f}|st        d |D              S t	        |||      S )Nr    r   r   r   c              3   &   K   | ]	  }||  y wr@   r    ).0vs     r"   	<genexpr>z3FlaxBeitLayerCollection.__call__.<locals>.<genexpr>  s     =qq}=s   )last_hidden_stater   
attentions)	enumerater  r   tupler   )rR   r   rH   r   r  r  all_attentionsall_hidden_statesr  layerr   layer_outputsr   s                r"   rY   z FlaxBeitLayerCollection.__call__  s      1d"6BD!$++. 	6HAu#!m%55!FJFaFaFmT%@%@%Bsw"!5]fwM *!,M =#3"55	6  -!11 "=G==="+;LYg
 	
r!   NTFFT)r   r   r   r   r\   r   r   r   r[   r   r5   ndarrayr~   r,   ru   r`   rY   r    r!   r"   r   r     s    sCx %[ $R_55{{E399"

 #"'%* !
 !
  	!

 #!
 !
r!   r   c            	           e Zd ZU eed<   eeef   ed<   ej                  Z	ej                  ed<   d Z
	 	 	 	 ddedededefd	Zy
)FlaxBeitEncoderrc   r#   r,   c                    | j                   j                  r1t        | j                   | j                  | j                        | _        t        t        j                  d| j                   j                  | j                   j                              }t        | j                   | j                  || j                   j                  r| j
                  nd | j                        | _        y )N)rc   r#   r,   r   )r#   r   r   r,   )rc   !use_shared_relative_position_biasr   r#   r,   r   listr-   linspacer   r   r   r  )rR   r   s     r"   ru   zFlaxBeitEncoder.setup(  s    ;;88*F{{0@0@

+D'
 r{{1dkk.H.H$++JgJghi,KK((+{{<< $(#>#>**

r!   rH   r   r  r  c                 .    | j                  |||||      S )NrH   r   r  r  )r  )rR   r   rH   r   r  r  s         r"   rY   zFlaxBeitEncoder.__call__:  s)     zz'/!5#  
 	
r!   Nr  r   r    r!   r"   r  r  #  sh    sCx {{E399"
* #"'%* 
 
  	

 #
 
r!   r  c                   v    e Zd ZU dZeZdZdZdZe	j                  ed<   ddej                  dfded	ed
ej                  def fdZddej&                  j(                  dededefdZ eej5                  d            	 	 	 	 	 	 	 ddedej&                  j(                  dedee   dee   dee   fd       Z xZS )FlaxBeitPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    beitrx   Nmodule_classr   Trc   seedr,   _do_initc                      | j                   d||d|}|$d|j                  |j                  |j                  f}t        |   ||||||       y )N)rc   r,   r   )input_shaper   r,   r!  r    )r  rk   rj   super__init__)	rR   rc   r#  r   r,   r!  kwargsr^   	__class__s	           r"   r%  z FlaxBeitPreTrainedModel.__init__V  sc     #""H&HHf//1B1BFDWDWXK[tSXcklr!   rU   r#  paramsr$   c                    t        j                  || j                        }t        j                  j                  |      \  }}t        j                  j                  |      \  }}|||d}| j                  j                  ||d      d   }	|dt        t        |	            }	t        t        |            }| j                  D ]
  }
|	|
   ||
<    t               | _
        t        t        |            S |	S )Nr   )r(  r   rK   F)r  r(  )r5   r3   r,   rN   rO   splitr^   initr   r	   _missing_keyssetr   r   )rR   rU   r#  r(  rx   
params_rngr   droppath_rngrngsrandom_paramsmissing_keys              r"   init_weightsz$FlaxBeitPreTrainedModel.init_weightsd  s    yyDJJ?"%**"2"23"7
K$'JJ$4$4[$A!\$,W((|(OPXY(-)@AM!(6"23F#11 A&3K&@{#A!$D.011  r!   zbatch_size, sequence_lengthr   trainr   r  r  c	           
         ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }t	        j
                  |d      }i }	|,t        j                  j                  |      \  }}
||	d<   |
|	d<   | j                  j                  d|xs | j                  it	        j                  |t        j                        || ||||	      S )N)r   r&   r   r   r   rK   r(  r   )r0  )rc   r   r  r  r5   r2   rN   rO   r*  r^   applyr(  r6   r~   )rR   rx   r   r(  r   r4  r   r  r  r0  r/  s              r"   rY   z FlaxBeitPreTrainedModel.__call__x  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY}}\<@"(+

(8(8(E%K)DO+D{{  v,-IIl#++6I  ! 	
 		
r!   r@   )NNNFNNN)r   r   r   r   r   config_classbase_model_prefixmain_input_namer  r]   Moduler\   r5   r~   r   r,   r`   r%  rN   rO   PRNGKeyr   r   r3  r   BEIT_INPUTS_DOCSTRINGformatdictr   rY   __classcell__)r'  s   @r"   r  r  K  s+   
 L$O"L"))"
 ;;mm 	m
 yym m!

 2 2 ! !PZ !fp !( ++@+G+GHe+fg *.,0/3&*"
 	"

 ZZ''"
 "
 $D>"
 'tn"
 d^"
 h"
r!   r  c                   Z    e Zd ZU eed<   ej                  Zej                  ed<   d Zd Z	y)FlaxBeitPoolerrc   r,   c                     | j                   j                  r;t        j                  | j                   j                  | j
                        | _        y y )Nr   )rc   use_mean_poolingr]   r   r   r,   	layernormr   s    r"   ru   zFlaxBeitPooler.setup  s7    ;;''\\$++2L2LTXT^T^_DN (r!   c                     | j                   j                  r6|d d dd d d f   }| j                  t        j                  |d            }|S |d d df   }|S )Nr   r   r   )rc   rC  rD  r5   mean)rR   r   patch_tokenspooled_outputs       r"   rY   zFlaxBeitPooler.__call__  sX    ;;''(AB2L NN388Lq+IJM
  *!Q$/Mr!   Nr}   r    r!   r"   rA  rA    s%    {{E399"`	r!   rA  c            	           e Zd ZU eed<   ej                  Zej                  ed<   dZe	ed<   d Z
	 	 	 	 	 dde	de	d	e	d
e	fdZy)FlaxBeitModulerc   r,   Tadd_pooling_layerc                    t        | j                  | j                        | _        t	        | j                  | j                  j
                  j                  | j                        | _        | j                  j                  s:t        j                  | j                  j                  | j                        | _        | j                  r't        | j                  | j                        | _        y d | _        y )Nr   r   r   )r   rc   r,   ry   r  r   rn   encoderrC  r]   r   r   rD  rK  rA  poolerr   s    r"   ru   zFlaxBeitModule.setup  s    ,T[[

K&KKT__%E%E%Q%QY]YcYc
 {{++\\$++2L2LTXT^T^_DNGKG]G]nT[[

Ccgr!   NrH   r   r  r  c                 `   | j                  |||      }| j                  |||||      }|d   }| j                  j                  s| j	                  |      }| j
                  r| j                  |      nd }	|s|		|f|dd  z   S ||	f|dd  z   S t        ||	|j                  |j                        S )Nr   r  r   r   )r
  pooler_outputr   r  )
ry   rM  rc   rC  rD  rK  rN  r   r   r  )
rR   rx   r   rH   r   r  r  r   r   pooleds
             r"   rY   zFlaxBeitModule.__call__  s     oUbc,,'/!5#  
  
{{++ NN=9M/3/E/E]+4~%''!"+55!6*WQR[88-+ !//))	
 	
r!   )NTFFT)r   r   r   r   r\   r5   r~   r,   rK  r`   ru   rY   r    r!   r"   rJ  rJ    si    {{E399""t"h ""'%* "
 	"

  "
 #"
 "
r!   rJ  z^The bare Beit Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd ZeZy)FlaxBeitModelN)r   r   r   rJ  r  r    r!   r"   rS  rS    s	    
 "Lr!   rS  a  
    Returns:

    Examples:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxBeitModel
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224-pt22k-ft22k")
    >>> model = FlaxBeitModel.from_pretrained("microsoft/beit-base-patch16-224-pt22k-ft22k")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> last_hidden_states = outputs.last_hidden_state
    ```
)output_typer7  c                   n    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 	 	 	 	 dde	fdZ
y)$FlaxBeitForMaskedImageModelingModulerc   r,   c                    t        | j                  d| j                        | _        t	        j
                  | j                  j                  | j                        | _        t	        j                  | j                  j                  t        j                  j                  j                  | j                  j                        | j                        | _        y )NF)rK  r,   r   r   )rJ  rc   r,   r  r]   r   r   rD  r   
vocab_sizerN   rq   rr   rs   lm_headr   s    r"   ru   z*FlaxBeitForMaskedImageModelingModule.setup  s    "4;;%tzzZ	 dkk.H.HPTPZPZ[xxKK""++224;;3P3PQ**
r!   NrH   c                 "   ||n| j                   j                  }| j                  ||||||      }|d   }| j                  |      }| j	                  |d d dd f         }	|s|	f|dd  z   }
|
S t        |	|j                  |j                        S )Nr  r   r   r&   logitsr   r  )rc   use_return_dictr  rD  rY  r   r   r  )rR   rx   r   rH   r   r  r  r   sequence_outputprediction_scoresrX   s              r"   rY   z-FlaxBeitForMaskedImageModelingModule.__call__  s     &1%<k$++B]B]))'/!5#  
 "!*..9 LLAB)?@')GABK7FM!$!//))
 	
r!   NNTNNNr   r    r!   r"   rV  rV    sB    {{E399"	
 "! 
 	 
r!   rV  zYBeit Model transformer with a 'language' modeling head on top (to predict visual tokens).c                       e Zd ZeZy)FlaxBeitForMaskedImageModelingN)r   r   r   rV  r  r    r!   r"   rb  rb  9  s	    
 8Lr!   rb  a?  
    bool_masked_pos (`numpy.ndarray` of shape `(batch_size, num_patches)`):
        Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).

    Returns:

    Examples:

    ```python
    >>> from transformers import AutoImageProcessor, BeitForMaskedImageModeling
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224-pt22k")
    >>> model = BeitForMaskedImageModeling.from_pretrained("microsoft/beit-base-patch16-224-pt22k")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> logits = outputs.logits
    ```
c                   n    e Zd ZU eed<   ej                  Zej                  ed<   d Z	 	 	 	 	 	 dde	fdZ
y)$FlaxBeitForImageClassificationModulerc   r,   c                 >   t        | j                  | j                  d      | _        t	        j
                  | j                  j                  t        j                  j                  j                  | j                  j                        | j                        | _        y )NT)rc   r,   rK  r   )rJ  rc   r,   r  r]   r   
num_labelsrN   rq   rr   rs   
classifierr   s    r"   ru   z*FlaxBeitForImageClassificationModule.setupd  sa    "$++TZZ[_`	((KK""++224;;3P3PQ**
r!   NrH   c                     ||n| j                   j                  }| j                  |||||      }|d   }| j                  |      }	|s|	f|dd  z   }
|
S t	        |	|j
                  |j                        S )Nr  r   r&   r[  )rc   r]  r  rg  r   r   r  )rR   rx   r   rH   r   r  r  r   rH  r\  rX   s              r"   rY   z-FlaxBeitForImageClassificationModule.__call__l  s     &1%<k$++B]B]))'/!5#  
  
/Y,FM+!//))
 	
r!   r`  r   r    r!   r"   rd  rd  `  sB    {{E399"
 "!
 	
r!   rd  z
    Beit Model transformer with an image classification head on top (a linear layer on top of the average of the final
    hidden states of the patch tokens) e.g. for ImageNet.
    c                       e Zd ZeZy)FlaxBeitForImageClassificationN)r   r   r   rd  r  r    r!   r"   rj  rj    s	     8Lr!   rj  aM  
    Returns:

    Example:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxBeitForImageClassification
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/beit-base-patch16-224")
    >>> model = FlaxBeitForImageClassification.from_pretrained("microsoft/beit-base-patch16-224")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> logits = outputs.logits
    >>> # model predicts one of the 1000 ImageNet classes
    >>> predicted_class_idx = logits.argmax(-1).item()
    >>> print("Predicted class:", model.config.id2label[predicted_class_idx])
    ```
)Htypingr   r   r   r   flax
flax.linenlinenr]   rN   	jax.numpynumpyr5   r-   flax.core.frozen_dictr   r   r	   flax.linen.attentionr
   flax.traverse_utilr   r   modeling_flax_outputsr   r   r   r   modeling_flax_utilsr   r   r   r   utilsr   r   configuration_beitr   struct	dataclassr   BEIT_START_DOCSTRINGr<  r   r  r>   r~   rD   r:  rF   rb   r   r   r   r   r   r   r   r   r   r  r  rA  rJ  rS  FLAX_BEIT_MODEL_DOCSTRINGrV  rb  FLAX_BEIT_MLM_DOCSTRINGrd  rj  FLAX_BEIT_CLASSIF_DOCSTRINGr    r!   r"   <module>r~     s  " 3 2   
   > > > ;   Q * %C  ,! F ".eCHo .#++ .0 .1[[ *ryy *Cbii C@& &R@299 @.RBII Rj $		 4299 &RYY &:BII :z4
bii 4
n%
bii %
PP
1 P
fRYY (0
RYY 0
f d"+ "	" , (A B  <Zis t/
299 /
d _8%< 8	8 2 79P Q  "0BQ[
*
299 *
Z  8%< 88 2 79T U  "0L[er!   