
    sgT                         d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ  ej                  e      Z G d	 d
e      Z G d de
      Zy)zLeViT model configuration    OrderedDict)Mapping)version   )PretrainedConfig)
OnnxConfig)loggingc                   \     e Zd ZdZdZddddddg dg d	g d
g ddg dg ddf fd	Z xZS )LevitConfiga  
    This is the configuration class to store the configuration of a [`LevitModel`]. It is used to instantiate a LeViT
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the LeViT
    [facebook/levit-128S](https://huggingface.co/facebook/levit-128S) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Args:
        image_size (`int`, *optional*, defaults to 224):
            The size of the input image.
        num_channels (`int`, *optional*, defaults to 3):
            Number of channels in the input image.
        kernel_size (`int`, *optional*, defaults to 3):
            The kernel size for the initial convolution layers of patch embedding.
        stride (`int`, *optional*, defaults to 2):
            The stride size for the initial convolution layers of patch embedding.
        padding (`int`, *optional*, defaults to 1):
            The padding size for the initial convolution layers of patch embedding.
        patch_size (`int`, *optional*, defaults to 16):
            The patch size for embeddings.
        hidden_sizes (`List[int]`, *optional*, defaults to `[128, 256, 384]`):
            Dimension of each of the encoder blocks.
        num_attention_heads (`List[int]`, *optional*, defaults to `[4, 8, 12]`):
            Number of attention heads for each attention layer in each block of the Transformer encoder.
        depths (`List[int]`, *optional*, defaults to `[4, 4, 4]`):
            The number of layers in each encoder block.
        key_dim (`List[int]`, *optional*, defaults to `[16, 16, 16]`):
            The size of key in each of the encoder blocks.
        drop_path_rate (`int`, *optional*, defaults to 0):
            The dropout probability for stochastic depths, used in the blocks of the Transformer encoder.
        mlp_ratios (`List[int]`, *optional*, defaults to `[2, 2, 2]`):
            Ratio of the size of the hidden layer compared to the size of the input layer of the Mix FFNs in the
            encoder blocks.
        attention_ratios (`List[int]`, *optional*, defaults to `[2, 2, 2]`):
            Ratio of the size of the output dimension compared to input dimension of attention layers.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.

    Example:

    ```python
    >>> from transformers import LevitConfig, LevitModel

    >>> # Initializing a LeViT levit-128S style configuration
    >>> configuration = LevitConfig()

    >>> # Initializing a model (with random weights) from the levit-128S style configuration
    >>> model = LevitModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```levit   r            )      i  )         )r   r   r   )r   r   r   r   )r   r   r   g{Gz?c                 B   t        |   di | || _        || _        || _        || _        || _        || _        || _        |	| _	        |
| _
        || _        || _        || _        || _        || _        d|
d   |d   |
d   z  dddgd|
d   |d   |
d   z  dddgg| _        y )N	Subsampler   r   r   r    )super__init__
image_sizenum_channelskernel_sizestridepaddinghidden_sizesnum_attention_headsdepthskey_dimdrop_path_rate
patch_sizeattention_ratio	mlp_ratioinitializer_rangedown_ops)selfr   r   r   r   r    r&   r!   r"   r#   r$   r%   r(   r'   r)   kwargs	__class__s                   `/var/www/html/venv/lib/python3.12/site-packages/transformers/models/levit/configuration_levit.pyr   zLevitConfig.__init__X   s    $ 	"6"$(&(#6 ,$."!2'!*l1o&CQ1M'!*l1o&CQ1M
    )__name__
__module____qualname____doc__
model_typer   __classcell__)r-   s   @r.   r   r      sF    5n J $&!$
 $
r/   r   c                   p    e Zd Z ej                  d      Zedeeee	ef   f   fd       Z
edefd       Zy)LevitOnnxConfigz1.11returnc                 (    t        ddddddfg      S )Npixel_valuesbatchr   heightwidth)r   r   r   r   r   r+   s    r.   inputszLevitOnnxConfig.inputs   s&    WHQX!YZ
 	
r/   c                      y)Ng-C6?r   r>   s    r.   atol_for_validationz#LevitOnnxConfig.atol_for_validation   s    r/   N)r0   r1   r2   r   parsetorch_onnx_minimum_versionpropertyr   strintr?   floatrA   r   r/   r.   r7   r7      sZ    !.v!6
WS#X%6 67 
 
 U  r/   r7   N)r3   collectionsr   typingr   	packagingr   configuration_utilsr   onnxr	   utilsr
   
get_loggerr0   loggerr   r7   r   r/   r.   <module>rP      sL      #   3   
		H	%^
" ^
Dj r/   