
    sgr                     z   d Z ddlZddlmZ ddlmZmZmZ ddlZddl	Zddlm
Z
 ddlmZmZmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZmZmZ ddlmZ  ej:                  e      ZdZ dZ!g dZ"dZ#dZ$e G d de             Z% G d de
jL                        Z' G d de
jL                        Z( G d de
jL                        Z) G d de
jL                        Z* G d de
jL                        Z+ G d de
jL                        Z, G d d e
jL                        Z- G d! d"e
jL                        Z. G d# d$e
jL                        Z/ G d% d&e
jL                        Z0 G d' d(e
jL                        Z1 G d) d*e      Z2d+Z3d,Z4 ed-e3       G d. d/e2             Z5 ed0e3       G d1 d2e2             Z6 ed3e3       G d4 d5e2             Z7y)6zPyTorch LeViT model.    N)	dataclass)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttentionModelOutput)PreTrainedModel)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardlogging   )LevitConfigr   zfacebook/levit-128S)r      i  ztabby, tabby catc                       e Zd ZU dZdZej                  ed<   dZej                  ed<   dZ	ej                  ed<   dZ
eeej                        ed<   y),LevitForImageClassificationWithTeacherOutputa  
    Output type of [`LevitForImageClassificationWithTeacher`].

    Args:
        logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
            Prediction scores as the average of the `cls_logits` and `distillation_logits`.
        cls_logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
            Prediction scores of the classification head (i.e. the linear layer on top of the final hidden state of the
            class token).
        distillation_logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
            Prediction scores of the distillation head (i.e. the linear layer on top of the final hidden state of the
            distillation token).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer
            plus the initial embedding outputs.
    Nlogits
cls_logitsdistillation_logitshidden_states)__name__
__module____qualname____doc__r   torchFloatTensor__annotations__r   r   r   r   r        [/var/www/html/venv/lib/python3.12/site-packages/transformers/models/levit/modeling_levit.pyr   r   3   sT    $ !%FE$$(J!!(-1**18<M8E%"3"345<r&   r   c                   ,     e Zd ZdZ	 d fd	Zd Z xZS )LevitConvEmbeddingsz[
    LeViT Conv Embeddings with Batch Norm, used in the initial patch embedding layer.
    c	           
          t         	|           t        j                  |||||||d      | _        t        j
                  |      | _        y )NF)dilationgroupsbias)super__init__r   Conv2dconvolutionBatchNorm2d
batch_norm)
selfin_channelsout_channelskernel_sizestridepaddingr+   r,   bn_weight_init	__class__s
            r'   r/   zLevitConvEmbeddings.__init__R   sF     	99{FGh_elq
 ..6r&   c                 J    | j                  |      }| j                  |      }|S N)r1   r3   )r4   
embeddingss     r'   forwardzLevitConvEmbeddings.forward[   s&    %%j1
__Z0
r&   )r   r   r   r   r   r    r!   r/   r?   __classcell__r;   s   @r'   r)   r)   M   s    
 mn7r&   r)   c                   (     e Zd ZdZ fdZd Z xZS )LevitPatchEmbeddingsz
    LeViT patch embeddings, for final embeddings to be passed to transformer blocks. It consists of multiple
    `LevitConvEmbeddings`.
    c                 X   t         |           t        |j                  |j                  d   dz  |j
                  |j                  |j                        | _        t        j                         | _        t        |j                  d   dz  |j                  d   dz  |j
                  |j                  |j                        | _        t        j                         | _        t        |j                  d   dz  |j                  d   dz  |j
                  |j                  |j                        | _        t        j                         | _        t        |j                  d   dz  |j                  d   |j
                  |j                  |j                        | _        |j                  | _        y )Nr            )r.   r/   r)   num_channelshidden_sizesr7   r8   r9   embedding_layer_1r   	Hardswishactivation_layer_1embedding_layer_2activation_layer_2embedding_layer_3activation_layer_3embedding_layer_4r4   configr;   s     r'   r/   zLevitPatchEmbeddings.__init__g   so   !4!4!4Q!71!<f>P>PRXR_R_agaoao"
 #%,,.!4"a')<)<Q)?1)DfFXFXZ`ZgZgioiwiw"
 #%,,.!4"a')<)<Q)?1)DfFXFXZ`ZgZgioiwiw"
 #%,,.!4"a')<)<Q)?ASASU[UbUbdjdrdr"
 #//r&   c                    |j                   d   }|| j                  k7  rt        d      | j                  |      }| j	                  |      }| j                  |      }| j                  |      }| j                  |      }| j                  |      }| j                  |      }|j                  d      j                  dd      S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rH   )shaperI   
ValueErrorrK   rM   rN   rO   rP   rQ   rR   flatten	transpose)r4   pixel_valuesrI   r>   s       r'   r?   zLevitPatchEmbeddings.forward}   s    #))!,4,,,w  ++L9
,,Z8
++J7
,,Z8
++J7
,,Z8
++J7
!!!$..q!44r&   r@   rB   s   @r'   rD   rD   a   s    
0,5r&   rD   c                   &     e Zd Zd fd	Zd Z xZS )MLPLayerWithBNc                     t         |           t        j                  ||d      | _        t        j
                  |      | _        y )NF)in_featuresout_featuresr-   )r.   r/   r   LinearlinearBatchNorm1dr3   )r4   	input_dim
output_dimr:   r;   s       r'   r/   zMLPLayerWithBN.__init__   s3    iiIJUZ[..4r&   c                     | j                  |      }| j                  |j                  dd            j                  |      }|S )Nr   r   )ra   r3   rX   
reshape_asr4   hidden_states     r'   r?   zMLPLayerWithBN.forward   s<    {{<0|';';Aq'ABMMl[r&   )r   r   r   r    r/   r?   rA   rB   s   @r'   r\   r\      s    5
r&   r\   c                   $     e Zd Z fdZd Z xZS )LevitSubsamplec                 >    t         |           || _        || _        y r=   )r.   r/   r8   
resolution)r4   r8   rm   r;   s      r'   r/   zLevitSubsample.__init__   s    $r&   c                     |j                   \  }}}|j                  || j                  | j                  |      d d d d | j                  d d | j                  f   j	                  |d|      }|S )N)rV   viewrm   r8   reshape)r4   rh   
batch_size_channelss        r'   r?   zLevitSubsample.forward   sk    "."4"4
Ax#((T__dooW_`~$++~~$++~-

'*b(
+ 	 r&   ri   rB   s   @r'   rk   rk      s    %
r&   rk   c                   ^     e Zd Z fdZ ej
                         d fd	       Zd Zd Z xZ	S )LevitAttentionc                 ~   t         |           || _        |dz  | _        || _        || _        ||z  |z  ||z  dz  z   | _        ||z  |z  | _        t        || j                        | _	        t        j                         | _        t        | j                  |d      | _        t        t        j                   t#        |      t#        |                  }t%        |      }i g }	}|D ]W  }
|D ]P  }t'        |
d   |d   z
        t'        |
d   |d   z
        f}||vrt%        |      ||<   |	j)                  ||          R Y i | _        t,        j                  j/                  t-        j0                  |t%        |                  | _        | j5                  dt-        j6                  |	      j9                  ||      d       y )	N      rH   r   )r:   r   attention_bias_idxsF
persistent)r.   r/   num_attention_headsscalekey_dimattention_ratioout_dim_keys_valuesout_dim_projectionr\   queries_keys_valuesr   rL   
activation
projectionlist	itertoolsproductrangelenabsappendattention_bias_cacher"   	Parameterzerosattention_biasesregister_buffer
LongTensorrp   )r4   rJ   r~   r|   r   rm   points
len_pointsattention_offsetsindicesp1p2offsetr;   s                r'   r/   zLevitAttention.__init__   s   #6 d]
.#2W#<?R#RU\_rUruvUv#v "1G";>Q"Q#1,@X@X#Y ,,.()@)@,_`ai''j(95;LMN[
%'7 	:B :bebem,c"Q%"Q%-.@A!22034E0F%f-089	:	: %'! % 2 25;;?RTWXiTj3k l!5#3#3G#<#A#A*j#Yfk 	 	
r&   c                 R    t         |   |       |r| j                  ri | _        y y y r=   r.   trainr   r4   moder;   s     r'   r   zLevitAttention.train   )    dD--(*D% .4r&   c                     | j                   r| j                  d d | j                  f   S t        |      }|| j                  vr*| j                  d d | j                  f   | j                  |<   | j                  |   S r=   trainingr   ry   strr   r4   device
device_keys      r'   get_attention_biasesz#LevitAttention.get_attention_biases   t    ==((D,D,D)DEEVJ!:!::8<8M8MaQUQiQiNi8j))*5,,Z88r&   c                    |j                   \  }}}| j                  |      }|j                  ||| j                  d      j	                  | j
                  | j
                  | j                  | j
                  z  gd      \  }}}|j                  dddd      }|j                  dddd      }|j                  dddd      }||j                  dd      z  | j                  z  | j                  |j                        z   }	|	j                  d      }	|	|z  j                  dd      j                  ||| j                        }| j                  | j!                  |            }|S Nro   r   dimr   rH   r   )rV   r   rp   r|   splitr~   r   permuterY   r}   r   r   softmaxrq   r   r   r   )
r4   rh   rr   
seq_lengthrs   r   querykeyvalue	attentions
             r'   r?   zLevitAttention.forward   sN   $0$6$6!
J"66|D/44ZTMeMegijpp\\4<<)=)=)LMST q 
sE aAq)kk!Q1%aAq)CMM"b11DJJ>AZAZ[g[n[nAoo	%%"%-	!E)44Q:BB:z[_[r[rst|'DEr&   T
r   r   r    r/   r"   no_gradr   r   r?   rA   rB   s   @r'   rv   rv      s.    
: U]]_+ +
9r&   rv   c                   ^     e Zd Z fdZ ej
                         d fd	       Zd Zd Z xZ	S )LevitAttentionSubsamplec	                 x   t         |           || _        |dz  | _        || _        || _        ||z  |z  ||z  z   | _        ||z  |z  | _        || _        t        || j                        | _
        t        ||      | _        t        |||z        | _        t        j                         | _        t        | j                  |      | _        i | _        t'        t)        j*                  t-        |      t-        |                  }	t'        t)        j*                  t-        |      t-        |                  }
t/        |	      t/        |
      }}i g }}|
D ]q  }|	D ]j  }d}t1        |d   |z  |d   z
  |dz
  dz  z         t1        |d   |z  |d   z
  |dz
  dz  z         f}||vrt/        |      ||<   |j3                  ||          l s t4        j                  j7                  t5        j8                  |t/        |                  | _        | j=                  dt5        j>                  |      jA                  ||      d       y )Nrx   r   r   rH   ry   Frz   )!r.   r/   r|   r}   r~   r   r   r   resolution_outr\   keys_valuesrk   queries_subsamplequeriesr   rL   r   r   r   r   r   r   r   r   r   r   r"   r   r   r   r   r   rp   )r4   rc   rd   r~   r|   r   r8   resolution_inr   r   points_r   len_points_r   r   r   r   sizer   r;   s                      r'   r/   z LevitAttentionSubsample.__init__   s1    	#6 d]
.#2W#<?R#RU\_rUr#r "1G";>Q"Q,))T5M5MN!/!F%i;N1NO,,.()@)@*M$&!i''m(<eM>RSTy((~)>n@UVW"%f+s7|K
%'7 	:B :befnr!u4qA~EFBqETZN]_`a]bLbfjmnfnrsesLsHtu!22034E0F%f-089:	: !& 2 25;;?RTWXiTj3k l!5#3#3G#<#A#A+z#Zgl 	 	
r&   c                 R    t         |   |       |r| j                  ri | _        y y y r=   r   r   s     r'   r   zLevitAttentionSubsample.train  r   r&   c                     | j                   r| j                  d d | j                  f   S t        |      }|| j                  vr*| j                  d d | j                  f   | j                  |<   | j                  |   S r=   r   r   s      r'   r   z,LevitAttentionSubsample.get_attention_biases  r   r&   c                 L   |j                   \  }}}| j                  |      j                  ||| j                  d      j	                  | j
                  | j                  | j
                  z  gd      \  }}|j                  dddd      }|j                  dddd      }| j                  | j                  |            }|j                  || j                  dz  | j                  | j
                        j                  dddd      }||j                  dd      z  | j                  z  | j                  |j                        z   }|j                  d      }||z  j                  dd      j!                  |d| j"                        }| j%                  | j'                  |            }|S r   )rV   r   rp   r|   r   r~   r   r   r   r   r   rY   r}   r   r   r   rq   r   r   r   )	r4   rh   rr   r   rs   r   r   r   r   s	            r'   r?   zLevitAttentionSubsample.forward"  s~   $0$6$6!
J\*T*j$*B*BBGUDLL$"6"6"EFAUN 	U
 kk!Q1%aAq)T33LAB

:t':':A'=t?W?WY]YeYefnnq!Q
 CMM"b11DJJ>AZAZ[g[n[nAoo	%%"%-	!E)44Q:BB:rSWSjSjkt|'DEr&   r   r   rB   s   @r'   r   r      s/    +
Z U]]_+ +
9r&   r   c                   (     e Zd ZdZ fdZd Z xZS )LevitMLPLayerzE
    MLP Layer with `2X` expansion in contrast to ViT with `4X`.
    c                     t         |           t        ||      | _        t	        j
                         | _        t        ||      | _        y r=   )r.   r/   r\   	linear_upr   rL   r   linear_down)r4   rc   
hidden_dimr;   s      r'   r/   zLevitMLPLayer.__init__=  s8    '	:>,,.)*i@r&   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r=   )r   r   r   rg   s     r'   r?   zLevitMLPLayer.forwardC  s4    ~~l3|4''5r&   r@   rB   s   @r'   r   r   8  s    Ar&   r   c                   (     e Zd ZdZ fdZd Z xZS )LevitResidualLayerz"
    Residual Block for LeViT
    c                 >    t         |           || _        || _        y r=   )r.   r/   module	drop_rate)r4   r   r   r;   s      r'   r/   zLevitResidualLayer.__init__O  s    "r&   c                    | j                   r| j                  dkD  rt        j                  |j	                  d      dd|j
                        }|j                  | j                        j                  d| j                  z
        j                         }|| j                  |      |z  z   }|S || j                  |      z   }|S )Nr   r   )r   )
r   r   r"   randr   r   ge_divdetachr   )r4   rh   rnds      r'   r?   zLevitResidualLayer.forwardT  s    ==T^^a/**\..q11a@S@STC''$..)--a$...@AHHJC'$++l*Cc*IIL'$++l*CCLr&   r@   rB   s   @r'   r   r   J  s    #
 r&   r   c                   .     e Zd ZdZ fdZd Zd Z xZS )
LevitStagezP
    LeViT Stage consisting of `LevitMLPLayer` and `LevitAttention` layers.
    c                 
   t         |           g | _        || _        |
| _        t        |      D ]  }| j                  j                  t        t        |||||
      | j                  j                               |dkD  sO||z  }| j                  j                  t        t        ||      | j                  j                                |	d   dk(  r| j                  dz
  |	d   z  dz   | _        | j                  j                  t        | j                  j                  ||dz    |	d   |	d   |	d   |	d   |
| j                  d       | j                  | _        |	d   dkD  r| j                  j                  |dz      |	d   z  }| j                  j                  t        t        | j                  j                  |dz      |      | j                  j                               t        j                  | j                        | _        y )	Nr   	Subsampler      rH   r   )r~   r|   r   r8   r   r   rG   )r.   r/   layersrT   r   r   r   r   rv   drop_path_rater   r   r   rJ   r   
ModuleList)r4   rT   idxrJ   r~   depthsr|   r   	mlp_ratiodown_opsr   rs   r   r;   s                r'   r/   zLevitStage.__init__d  s    	*v 	AKK""<:M`mnKK.. 1})I5
""&}\:'NPTP[P[PjPjk	 A;+%#'#5#5#9hqk"IA"MDKK'[[--cC!G<$QK(0$,QK#A;"/#'#6#6
 "&!4!4D{Q![[55cAg>!L
""&%dkk&>&>sQw&GTVZVaVaVpVp mmDKK0r&   c                     | j                   S r=   )r   )r4   s    r'   get_resolutionzLevitStage.get_resolution  s    !!!r&   c                 8    | j                   D ]
  } ||      } |S r=   )r   )r4   rh   layers      r'   r?   zLevitStage.forward  s%    [[ 	/E .L	/r&   )r   r   r    r!   r/   r   r?   rA   rB   s   @r'   r   r   _  s    51n"r&   r   c                   *     e Zd ZdZ fdZddZ xZS )LevitEncoderzC
    LeViT Encoder consisting of multiple `LevitStage` stages.
    c                    t         |           || _        | j                  j                  | j                  j                  z  }g | _        | j                  j                  j                  dg       t        t        |j                              D ]  }t        |||j                  |   |j                  |   |j                  |   |j                  |   |j                  |   |j                   |   |j                  |   |
      }|j#                         }| j
                  j                  |        t%        j&                  | j
                        | _        y )N )r.   r/   rT   
image_size
patch_sizestagesr   r   r   r   r   r   rJ   r~   r|   r   r   r   r   r   )r4   rT   rm   	stage_idxstager;   s        r'   r/   zLevitEncoder.__init__  s   [[++t{{/E/EE
##RD)s6==12 	&I##I.y)i(**95&&y1  +	*E --/JKKu%	&  mmDKK0r&   c                     |rdnd }| j                   D ]  }|r||fz   } ||      } |r||fz   }|st        d ||fD              S t        ||      S )Nr%   c              3   &   K   | ]	  }||  y wr=   r%   ).0vs     r'   	<genexpr>z'LevitEncoder.forward.<locals>.<genexpr>  s     WqWs   )last_hidden_stater   )r   tupler   )r4   rh   output_hidden_statesreturn_dictall_hidden_statesr   s         r'   r?   zLevitEncoder.forward  ss    "6BD[[ 	/E#$5$G! .L	/
   1\O CW\3D$EWWW-\mnnr&   )FTr@   rB   s   @r'   r   r     s    12or&   r   c                   (     e Zd ZdZ fdZd Z xZS )LevitClassificationLayerz$
    LeViT Classification Layer
    c                     t         |           t        j                  |      | _        t        j
                  ||      | _        y r=   )r.   r/   r   rb   r3   r`   ra   )r4   rc   rd   r;   s      r'   r/   z!LevitClassificationLayer.__init__  s0    ..3ii	:6r&   c                 J    | j                  |      }| j                  |      }|S r=   )r3   ra   )r4   rh   r   s      r'   r?   z LevitClassificationLayer.forward  s#    |4\*r&   r@   rB   s   @r'   r   r     s    7
r&   r   c                   (    e Zd ZdZeZdZdZdgZd Z	y)LevitPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    levitrZ   r   c                    t        |t        j                  t        j                  f      rm|j                  j
                  j                  d| j                  j                         |j                  %|j                  j
                  j                          yyt        |t        j                  t        j                  f      rJ|j                  j
                  j                          |j                  j
                  j                  d       yy)zInitialize the weightsg        )meanstdNg      ?)
isinstancer   r`   r0   weightdatanormal_rT   initializer_ranger-   zero_rb   r2   fill_)r4   r   s     r'   _init_weightsz"LevitPreTrainedModel._init_weights  s    fryy"))45 MM&&CT[[5R5R&S{{&  &&( ' @AKK""$MM$$S) Br&   N)
r   r   r    r!   r   config_classbase_model_prefixmain_input_name_no_split_modulesr  r%   r&   r'   r   r     s'    
 L$O-.
*r&   r   aG  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. Use it
    as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`LevitConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
aC  
    Args:
        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`LevitImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zNThe bare Levit model outputting raw features without any specific head on top.c                        e Zd Z fdZ ee       eeee	de
      	 	 	 d	dej                  dee   dee   deeef   fd              Z xZS )

LevitModelc                     t         |   |       || _        t        |      | _        t        |      | _        | j                          y r=   )r.   r/   rT   rD   patch_embeddingsr   encoder	post_initrS   s     r'   r/   zLevitModel.__init__  s:      4V <#F+r&   vision)
checkpointoutput_typer  modalityexpected_outputrZ   r   r   returnc                 D   ||n| j                   j                  }||n| j                   j                  }|t        d      | j	                  |      }| j                  |||      }|d   }|j                  d      }|s
||f|dd  z   S t        |||j                        S )Nz You have to specify pixel_valuesr   r   r   r   r   )r   pooler_outputr   )	rT   r   use_return_dictrW   r  r  r  r   r   )r4   rZ   r   r   r>   encoder_outputsr   pooled_outputs           r'   r?   zLevitModel.forward   s     %9$D $++JjJj 	 &1%<k$++B]B]?@@**<8
,,!5# ' 
 ,A. *..1.5%}58KKK7/')77
 	
r&   NNN)r   r   r    r/   r   LEVIT_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr"   r#   r   boolr   r   r?   rA   rB   s   @r'   r  r    s    
 ++AB&<$. +//3&*	!
''!
 'tn!
 d^	!

 
u>>	?!
 C!
r&   r  z
    Levit Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                        e Zd Z fdZ ee       eeee	e
      	 	 	 	 d	dej                  deej                     dee   dee   deeef   f
d              Z xZS )
LevitForImageClassificationc                 >   t         |   |       || _        |j                  | _        t	        |      | _        |j                  dkD  r#t        |j                  d   |j                        nt        j                  j                         | _        | j                          y Nr   ro   )r.   r/   rT   
num_labelsr  r  r   rJ   r"   r   Identity
classifierr  rS   s     r'   r/   z$LevitForImageClassification.__init__T  s      ++'

   1$ %V%8%8%<f>O>OP""$ 	 	r&   r  r  r  r  rZ   labelsr   r   r  c                    ||n| j                   j                  }| j                  |||      }|d   }|j                  d      }| j	                  |      }d}|| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }	| j                  dk(  r& |	|j                         |j                               }n |	||      }n| j                   j
                  dk(  r=t               }	 |	|j                  d| j                        |j                  d            }n,| j                   j
                  dk(  rt               }	 |	||      }|s|f|d	d z   }
||f|
z   S |
S t!        |||j"                  
      S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr  r   r   
regressionsingle_label_classificationmulti_label_classificationro   rH   )lossr   r   )rT   r   r  r  r/  problem_typer-  dtyper"   longintr
   squeezer	   rp   r   r   r   )r4   rZ   r1  r   r   outputssequence_outputr   r6  loss_fctoutputs              r'   r?   z#LevitForImageClassification.forwardd  s   ( &1%<k$++B]B]**\@Tbm*n!!*)..q11{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE3!//
 	
r&   )NNNN)r   r   r    r/   r   r$  r   _IMAGE_CLASS_CHECKPOINTr   r&  _IMAGE_CLASS_EXPECTED_OUTPUTr"   r#   r   r   r(  r   r   r?   rA   rB   s   @r'   r*  r*  L  s      ++AB*8$4	 +/-1/3&*3
''3
 ))*3
 'tn	3

 d^3
 
u::	;3
 C3
r&   r*  ap  
    LeViT Model transformer with image classification heads on top (a linear layer on top of the final hidden state and
    a linear layer on top of the final hidden state of the distillation token) e.g. for ImageNet. .. warning::
           This model supports inference-only. Fine-tuning with distillation (i.e. with a teacher) is not yet
           supported.
    c                        e Zd Z fdZ ee       eeee	e
      	 	 	 ddej                  dee   dee   deeef   fd              Z xZS )	&LevitForImageClassificationWithTeacherc                    t         |   |       || _        |j                  | _        t	        |      | _        |j                  dkD  r#t        |j                  d   |j                        nt        j                  j                         | _        |j                  dkD  r#t        |j                  d   |j                        nt        j                  j                         | _        | j                          y r,  )r.   r/   rT   r-  r  r  r   rJ   r"   r   r.  r/  classifier_distillr  rS   s     r'   r/   z/LevitForImageClassificationWithTeacher.__init__  s      ++'

   1$ %V%8%8%<f>O>OP""$ 	   1$ %V%8%8%<f>O>OP""$ 	 	r&   r0  rZ   r   r   r  c                 .   ||n| j                   j                  }| j                  |||      }|d   }|j                  d      }| j	                  |      | j                  |      }}||z   dz  }|s|||f|dd  z   }	|	S t        ||||j                        S )Nr  r   r   rH   )r   r   r   r   )rT   r   r  r  r/  rE  r   r   )
r4   rZ   r   r   r<  r=  r   distill_logitsr   r?  s
             r'   r?   z.LevitForImageClassificationWithTeacher.forward  s     &1%<k$++B]B]**\@Tbm*n!!*)..q1%)___%EtG^G^_nGoN
~-2j.9GABKGFM;! .!//	
 	
r&   r#  )r   r   r    r/   r   r$  r   r@  r   r&  rA  r"   r#   r   r(  r   r   r?   rA   rB   s   @r'   rC  rC    s    * ++AB*@$4	 +//3&*	
''
 'tn
 d^	

 
uBB	C
 C
r&   rC  )8r!   r   dataclassesr   typingr   r   r   r"   torch.utils.checkpointr   torch.nnr   r	   r
   modeling_outputsr   r   r   r   modeling_utilsr   utilsr   r   r   r   configuration_levitr   
get_loggerr   loggerr&  r%  r'  r@  rA  r   Moduler)   rD   r\   rk   rv   r   r   r   r   r   r   r   LEVIT_START_DOCSTRINGr$  r  r*  rC  r%   r&   r'   <module>rT     s     ! ) )    A A  . u u , 
		H	%   , %  0 1  =; = =2")) ()5299 )5X	RYY 	RYY ;RYY ;|Pbii PfBII $   *B BJ+o299 +o\ryy  *? *0	   T2
% 2
	2
j  K
"6 K
K
\  5
-A 5
5
r&   