
    sg                       d Z ddlmZ ddlmZmZmZmZ ddlZ	ddl
mZ ddlmZmZmZmZ ddlmZmZmZmZ dd	lmZmZmZmZmZ dd
lmZmZ ddlm Z  ddl!m"Z"  e jF                  e$      Z%dZ&dZ'g dZ(dZ)dZ*d@dAdZ+ G d dejX                  jZ                        Z. G d dejX                  jZ                        Z/ G d dejX                  jZ                        Z0 G d dejX                  jZ                        Z1 G d dejX                  jZ                        Z2 G d dejX                  jZ                        Z3 G d d ejX                  jZ                        Z4 G d! d"ejX                  jZ                        Z5 G d# d$ejX                  jZ                        Z6 G d% d&ejX                  jZ                        Z7 G d' d(ejX                  jZ                        Z8 G d) d*ejX                  jZ                        Z9e G d+ d,ejX                  jZ                               Z: G d- d.e      Z;d/Z<d0Z= ed1e<       G d2 d3e;             Z> ed4e<       G d5 d6e;e             Z? G d7 d8ejX                  jZ                        Z@ G d9 d:ejX                  jZ                        ZA G d; d<ejX                  jZ                        ZB ed=e<       G d> d?e;             ZCy)BzTensorFlow 2.0 MobileViT model.    )annotations)DictOptionalTupleUnionN   )get_tf_activation)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardreplace_return_docstrings)TFBaseModelOutputTFBaseModelOutputWithPooling&TFImageClassifierOutputWithNoAttention(TFSemanticSegmenterOutputWithNoAttention)TFPreTrainedModelTFSequenceClassificationLosskeraskeras_serializableunpack_inputs)
shape_liststable_softmax)logging   )MobileViTConfigr   zapple/mobilevit-small)r   i     r   ztabby, tabby catc                |    ||}t        |t        | |dz  z         |z  |z        }|d| z  k  r||z  }t        |      S )a  
    Ensure that all layers have a channel count that is divisible by `divisor`. This function is taken from the
    original TensorFlow repo. It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
       g?)maxint)valuedivisor	min_value	new_values       f/var/www/html/venv/lib/python3.12/site-packages/transformers/models/mobilevit/modeling_tf_mobilevit.pymake_divisibler&   @   sS     	Is57Q;#677BWLMI3;W	y>    c                  j     e Zd Z	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZdddZddZ xZS )	TFMobileViTConvLayerc           
        t        |   di | t        j                  d| j                  j
                   d       t        |dz
  dz        |z  }t        j                  j                  |      | _
        ||z  dk7  rt        d| d| d      t        j                  j                  |||d	|||d
      | _        |	r(t        j                  j                  ddd      | _        nd | _        |
rht!        |
t"              rt%        |
      | _        nNt!        |j(                  t"              rt%        |j(                        | _        n|j(                  | _        nd | _        || _        || _        y )N
z has backpropagation operations that are NOT supported on CPU. If you wish to train/fine-tune this model, you need a GPU or a TPUr   r   r   zOutput channels (z) are not divisible by z groups.VALIDconvolution)filterskernel_sizestridespaddingdilation_rategroupsuse_biasnamegh㈵>g?normalization)epsilonmomentumr5    )super__init__loggerwarning	__class____name__r    r   layersZeroPadding2Dr1   
ValueErrorConv2Dr-   BatchNormalizationr6   
isinstancestrr	   
activation
hidden_actin_channelsout_channels)selfconfigrI   rJ   r/   strider3   biasdilationuse_normalizationuse_activationkwargsr1   r>   s                r%   r;   zTFMobileViTConvLayer.__init__P   s[    	"6"(() *E E	

 {Q!+,x7||11':& A%0>UV\U]]efgg <<.. #" / 	
 !&!@!@X[bq!@!rD!%D.#."3N"CF--s3"3F4E4E"F"("3"3"DO&(r'   c                    | j                  |      }| j                  |      }| j                  | j                  ||      }| j                  | j                  |      }|S Ntraining)r1   r-   r6   rG   )rK   featuresrV   padded_featuress       r%   callzTFMobileViTConvLayer.call   s^    ,,x0##O4)))(X)FH??&x0Hr'   c                ,   | j                   ry d| _         t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d d | j                  g       d d d        t        | dd       st        | j                  d      r\t        j                  | j                  j
                        5  | j                  j                  d d d | j                  g       d d d        y y y # 1 sw Y   xY w# 1 sw Y   y xY w)NTr-   r6   r5   )builtgetattrtf
name_scoper-   r5   buildrI   hasattrr6   rJ   rK   input_shapes     r%   r_   zTFMobileViTConvLayer.build   s    ::
4-9t//445 M  &&dD$:J:J'KLM4$/;t))62]]4#5#5#:#:; T&&,,dD$@Q@Q-RST T 3 <M MT Ts   *C>	*D
>D
D)r   r   Fr   TT)rL   r   rI   r    rJ   r    r/   r    rM   r    r3   r    rN   boolrO   r    rP   rc   rQ   zUnion[bool, str]returnNoneFrW   	tf.TensorrV   rc   rd   rh   Nr?   
__module____qualname__r;   rY   r_   __classcell__r>   s   @r%   r)   r)   O   s     "&+/4)4) 4) 	4)
 4) 4) 4) 4) 4)  4) )4) 
4)l
Tr'   r)   c                  P     e Zd ZdZ	 d	 	 	 	 	 	 	 	 	 	 	 d fdZdddZd	dZ xZS )
TFMobileViTInvertedResidualzQ
    Inverted residual block (MobileNetv2): https://arxiv.org/abs/1801.04381
    c           
     H   t        |   di | t        t        t	        ||j
                  z              d      }|dvrt        d| d      |dk(  xr ||k(  | _        t        |||dd      | _	        t        |||d|||d	
      | _
        t        |||ddd      | _        y )Nr   )r   r   zInvalid stride .r   
expand_1x1rI   rJ   r/   r5   r   conv_3x3)rI   rJ   r/   rM   r3   rO   r5   F
reduce_1x1rI   rJ   r/   rQ   r5   r9   )r:   r;   r&   r    roundexpand_ratiorB   use_residualr)   rs   ru   rv   )	rK   rL   rI   rJ   rM   rO   rR   expanded_channelsr>   s	           r%   r;   z$TFMobileViTInvertedResidual.__init__   s     	"6"*3u[6CVCV5V/W+XZ[\vha899#q[K{l/J.:KYZam
 -)*$	
 /)% 
r'   c                    |}| j                  ||      }| j                  ||      }| j                  ||      }| j                  r||z   S |S rT   )rs   ru   rv   rz   )rK   rW   rV   residuals       r%   rY   z TFMobileViTInvertedResidual.call   sU    ??8h??==H==??8h??&*&7&7x("EXEr'   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   qxY w# 1 sw Y   y xY w)NTrs   ru   rv   )	r[   r\   r]   r^   rs   r5   r_   ru   rv   ra   s     r%   r_   z!TFMobileViTInvertedResidual.build   s   ::
4t,8t334 ,%%d+,4T*6t}}112 *##D)*4t,8t334 ,%%d+, , 9, ,* *, ,s$   D%%D1?D=%D.1D:=Er   )rL   r   rI   r    rJ   r    rM   r    rO   r    rd   re   rf   rg   ri   r?   rk   rl   __doc__r;   rY   r_   rm   rn   s   @r%   rp   rp      sP    
 jk!
%!
47!
GJ!
TW!
cf!
	!
FF,r'   rp   c                  N     e Zd Z	 	 d	 	 	 	 	 	 	 	 	 	 	 d fdZdddZddZ xZS )	TFMobileViTMobileNetLayerc           	         t        	|   di | g | _        t        |      D ]9  }t	        ||||dk(  r|ndd|       }| j                  j                  |       |}; y )Nr   r   layer.)rI   rJ   rM   r5   r9   )r:   r;   r@   rangerp   append)
rK   rL   rI   rJ   rM   
num_stagesrR   ilayerr>   s
            r%   r;   z"TFMobileViTMobileNetLayer.__init__   sp     	"6"z" 		'A/')!"avQaS\E KKu%&K		'r'   c                <    | j                   D ]  } |||      } |S rT   r@   )rK   rW   rV   layer_modules       r%   rY   zTFMobileViTMobileNetLayer.call   s(     KK 	AL#Hx@H	Ar'   c                    | j                   ry d| _         t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   IxY wNTr@   r[   r\   r@   r]   r^   r5   r_   rK   rb   r   s      r%   r_   zTFMobileViTMobileNetLayer.build   t    ::
44(4 $ -]]<#4#45 - &&t,- -- 5- -   A..A7	)r   r   )rL   r   rI   r    rJ   r    rM   r    r   r    rd   re   rf   rg   ri   rj   rn   s   @r%   r   r      sT     '' ' 	'
 ' ' 
'.
-r'   r   c                  :     e Zd Zd fdZddZdddZd	dZ xZS )
TFMobileViTSelfAttentionc                   t        |   d
i | ||j                  z  dk7  rt        d|f d|j                   d      |j                  | _        t	        ||j                  z        | _        | j                  | j
                  z  | _        t        j                  | j
                  t        j                        }t        j                  j                  |      | _        t        j                  j                  | j                  |j                   d      | _        t        j                  j                  | j                  |j                   d      | _        t        j                  j                  | j                  |j                   d	      | _        t        j                  j)                  |j*                        | _        || _        y )Nr   zThe hidden size z4 is not a multiple of the number of attention heads rr   dtypequery)r4   r5   keyr!   r9   )r:   r;   num_attention_headsrB   r    attention_head_sizeall_head_sizer]   castfloat32mathsqrtscaler   r@   Denseqkv_biasr   r   r!   Dropoutattention_probs_dropout_probdropouthidden_size)rK   rL   r   rR   r   r>   s        r%   r;   z!TFMobileViTSelfAttention.__init__  s_   "6"333q8";<. 1334A7 
 $*#=#= #&{V5O5O'O#P !558P8PP00

CWW\\%(
\\''(:(:V__[b'c
<<%%d&8&86??Y^%_\\''(:(:V__[b'c
||++F,O,OP&r'   c                    t        j                  |      d   }t        j                  ||d| j                  | j                  f      }t        j
                  |g d      S )Nr   shaper   r   r   r   perm)r]   r   reshaper   r   	transpose)rK   x
batch_sizes      r%   transpose_for_scoresz-TFMobileViTSelfAttention.transpose_for_scores  sI    XXa[^
JJqR1I1I4KcKc de||AL11r'   c                *   t        j                  |      d   }| j                  | j                  |            }| j                  | j	                  |            }| j                  | j                  |            }t        j                  ||d      }|| j                  z  }t        |d      }| j                  ||      }t        j                  ||      }	t        j                  |	g d      }	t        j                  |	|d| j                  f	      }	|	S )
Nr   T)transpose_br   axisrU   r   r   r   )r]   r   r   r   r!   r   matmulr   r   r   r   r   r   )
rK   hidden_statesrV   r   	key_layervalue_layerquery_layerattention_scoresattention_probscontext_layers
             r%   rY   zTFMobileViTSelfAttention.call  s    XXm,Q/
--dhh}.EF	//

=0IJ//

=0IJ 99[)N+djj8 ))9C ,,,J		/;?]F

=RI[I[8\]r'   c                   | j                   ry d| _         t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   ~xY w# 1 sw Y   y xY w)NTr   r   r!   )
r[   r\   r]   r^   r   r5   r_   r   r   r!   ra   s     r%   r_   zTFMobileViTSelfAttention.build7  s)   ::
4$'3tzz/ A

  $d.>.>!?@A4%1txx}}- ?dD,<,<=>?4$'3tzz/ A

  $d.>.>!?@A A 4A A? ?A As$   )E2)E)E$EE!$E-rL   r   r   r    rd   re   )r   rh   rd   rh   rf   r   rh   rV   rc   rd   rh   ri   )r?   rk   rl   r;   r   rY   r_   rm   rn   s   @r%   r   r     s    ',2
0Ar'   r   c                  2     e Zd Zd fdZdddZddZ xZS )TFMobileViTSelfOutputc                    t        |   di | t        j                  j	                  |d      | _        t        j                  j                  |j                        | _        || _	        y Ndenser5   r9   )
r:   r;   r   r@   r   r   r   hidden_dropout_probr   r   rK   rL   r   rR   r>   s       r%   r;   zTFMobileViTSelfOutput.__init__G  sR    "6"\\''''B
||++F,F,FG&r'   c                N    | j                  |      }| j                  ||      }|S rT   r   r   )rK   r   rV   s      r%   rY   zTFMobileViTSelfOutput.callM  s(    

=1]XFr'   c                   | j                   ry d| _         t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   y xY wNTr   r[   r\   r]   r^   r   r5   r_   r   ra   s     r%   r_   zTFMobileViTSelfOutput.buildR  y    ::
4$'3tzz/ A

  $d.>.>!?@A A 4A A   )A>>Br   rf   r   ri   rj   rn   s   @r%   r   r   F  s    '
Ar'   r   c                  8     e Zd Zd fdZd ZdddZddZ xZS )	TFMobileViTAttentionc                p    t        |   di | t        ||d      | _        t	        ||d      | _        y )N	attentionr   outputr9   )r:   r;   r   r   r   dense_outputr   s       r%   r;   zTFMobileViTAttention.__init__\  s4    "6"1&+KX1&+HUr'   c                    t         ri   NotImplementedError)rK   headss     r%   prune_headsz TFMobileViTAttention.prune_headsa  s    !!r'   c                R    | j                  ||      }| j                  ||      }|S rT   )r   r   )rK   r   rV   self_outputsattention_outputs        r%   rY   zTFMobileViTAttention.calld  s0    ~~mh~G,,\H,Mr'   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr   r   )r[   r\   r]   r^   r   r5   r_   r   ra   s     r%   r_   zTFMobileViTAttention.buildi  s    ::
4d+7t~~223 +$$T*+4.:t00556 .!!''-. . ;+ +. .   C%CCC r   rf   r   ri   )r?   rk   rl   r;   r   rY   r_   rm   rn   s   @r%   r   r   [  s    V
" 
	.r'   r   c                  0     e Zd Zd fdZddZddZ xZS )TFMobileViTIntermediatec                   t        |   di | t        j                  j	                  |d      | _        t        |j                  t              r"t        |j                        | _
        || _        y |j                  | _
        || _        y r   )r:   r;   r   r@   r   r   rE   rH   rF   r	   intermediate_act_fnr   rK   rL   r   intermediate_sizerR   r>   s        r%   r;   z TFMobileViTIntermediate.__init__v  st    "6"\\''(9'H
f''-'89J9J'KD$ ' (.'8'8D$&r'   c                J    | j                  |      }| j                  |      }|S ri   )r   r   )rK   r   s     r%   rY   zTFMobileViTIntermediate.call  s&    

=100?r'   c                   | j                   ry d| _         t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   y xY wr   r   ra   s     r%   r_   zTFMobileViTIntermediate.build  r   r   rL   r   r   r    r   r    rd   re   )r   rh   rd   rh   ri   rj   rn   s   @r%   r   r   u  s    '
Ar'   r   c                  2     e Zd Zd fdZdddZddZ xZS )TFMobileViTOutputc                    t        |   di | t        j                  j	                  |d      | _        t        j                  j                  |j                        | _        || _	        y r   )
r:   r;   r   r@   r   r   r   r   r   r   r   s        r%   r;   zTFMobileViTOutput.__init__  sR    "6"\\''''B
||++F,F,FG!2r'   c                X    | j                  |      }| j                  ||      }||z   }|S rT   r   )rK   r   input_tensorrV   s       r%   rY   zTFMobileViTOutput.call  s2    

=1]XF%4r'   c                   | j                   ry d| _         t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   y xY wr   )r[   r\   r]   r^   r   r5   r_   r   ra   s     r%   r_   zTFMobileViTOutput.build  sy    ::
4$'3tzz/ G

  $d.D.D!EFG G 4G Gr   r   rf   )r   rh   r   rh   rV   rc   rd   rh   ri   rj   rn   s   @r%   r   r     s    3Gr'   r   c                  2     e Zd Zd fdZdddZddZ xZS )TFMobileViTTransformerLayerc                h   t        |   di | t        ||d      | _        t	        |||d      | _        t        |||d      | _        t        j                  j                  |j                  d      | _        t        j                  j                  |j                  d      | _        || _        y )	Nr   r   intermediater   layernorm_beforer7   r5   layernorm_afterr9   )r:   r;   r   r   r   r   r   mobilevit_outputr   r@   LayerNormalizationlayer_norm_epsr   r   r   r   s        r%   r;   z$TFMobileViTTransformerLayer.__init__  s    "6"-fkT3FKIZaop 1&+GX_g h % ? ?H]H]dv ? w$||>>vG\G\ct>u&r'   c                    | j                  | j                  |      |      }||z   }| j                  |      }| j                  |      }| j	                  |||      }|S rT   )r   r   r   r   r   )rK   r   rV   r   layer_outputs        r%   rY   z TFMobileViTTransformerLayer.call  si    >>$*?*?*NYa>b(=8++M:((6,,\=S[,\r'   c                b   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       [t        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   AxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTr   r   r   r   r   )r[   r\   r]   r^   r   r5   r_   r   r   r   r   r   ra   s     r%   r_   z!TFMobileViTTransformerLayer.build  s   ::
4d+7t~~223 +$$T*+4.:t00556 .!!''-.4+T2>t4499: 2%%++D124+T2>t4499: L%%++T49I9I,JKL4*D1=t33889 K$$**D$8H8H+IJK K >+ +. .2 2L LK Ks<   G3%H ?H)H )H%3G= H
HH"%H.r   rf   r   ri   rj   rn   s   @r%   r   r     s    'Kr'   r   c                  2     e Zd Zd fdZdddZddZ xZS )TFMobileViTTransformerc           	         t        |   di | g | _        t        |      D ]E  }t	        ||t        ||j                  z        d|       }| j                  j                  |       G y )Nr   )r   r   r5   r9   )r:   r;   r@   r   r   r    	mlp_ratior   )rK   rL   r   r   rR   r   transformer_layerr>   s          r%   r;   zTFMobileViTTransformer.__init__  sp    "6"z" 	2A ;'"%kF4D4D&D"EaS\	! KK01	2r'   c                <    | j                   D ]  } |||      } |S rT   r   )rK   r   rV   r   s       r%   rY   zTFMobileViTTransformer.call  s)     KK 	KL(JM	Kr'   c                    | j                   ry d| _         t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   IxY wr   r   r   s      r%   r_   zTFMobileViTTransformer.build  r   r   )rL   r   r   r    r   r    rd   re   rf   r   ri   rj   rn   s   @r%   r   r     s    2
-r'   r   c                  h     e Zd ZdZ	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d fdZd	dZd
dZdddZddZ xZ	S )TFMobileViTLayerz;
    MobileViT block: https://arxiv.org/abs/2110.02178
    c           	     D   t        	|   di | |j                  | _        |j                  | _        |dk(  r*t        ||||dk(  r|nd|dkD  r|dz  ndd      | _        |}nd | _        t        ||||j                  d      | _	        t        |||dddd	      | _
        t        |||d
      | _        t        j                  j                  |j                   d      | _        t        |||dd      | _        t        |d|z  ||j                  d      | _        || _        y )Nr   r   downsampling_layer)rI   rJ   rM   rO   r5   conv_kxkrt   Fconv_1x1)rI   rJ   r/   rP   rQ   r5   transformer)r   r   r5   	layernormr   conv_projectionfusionr9   )r:   r;   
patch_sizepatch_widthpatch_heightrp   r	  r)   conv_kernel_sizer
  r  r   r  r   r@   r   r   r  r  r  r   )
rK   rL   rI   rJ   rM   r   r   rO   rR   r>   s
            r%   r;   zTFMobileViTLayer.__init__  sE    	"6"!,,"--Q;&A')!)QvA*2Q,QA)'D# 'K&*D#,#$//
 -#$# 
 2

 88AVAV]h8i3+ST[l 
 +K$//
 'r'   c                   | j                   | j                  }}t        j                  ||z  d      }t        j                  |      d   }t        j                  |      d   }t        j                  |      d   }t        j                  |      d   }t        j                  t        j
                  j                  ||z        |z  d      }	t        j                  t        j
                  j                  ||z        |z  d      }
|
|k7  xs |	|k7  }|r$t        j                  j                  ||	|
fd      }|
|z  }|	|z  }||z  }t        j                  |g d      }t        j                  |||z  |z  |||f      }t        j                  |g d	      }t        j                  |||||f      }t        j                  |g d
      }t        j                  |||z  ||f      }||f||||||d}||fS )Nint32r   r   r   r   bilinearsizemethodr   r   r   r   r   r   r   r   r   )	orig_sizer   channelsinterpolatenum_patchesnum_patches_widthnum_patches_height)r  r  r]   r   r   r   ceilimageresizer   r   )rK   rW   r  r  
patch_arear   orig_height
orig_widthr  
new_height	new_widthr  num_patch_widthnum_patch_heightr  patches	info_dicts                    r%   	unfoldingzTFMobileViTLayer.unfolding.  s   $($4$4d6G6G\WW[<7A
XXh'*
hhx(+XXh'*
88H%a(WWRWW\\+*DETV]^
GGBGGLLk)AB[PRYZ	:-J{1Jxxxz96MV`aH ${2%5&8 <<,7**zH,/??`kl
 ,,w5**Wz8[*&UV,,w5**WzJ'>X&VW &z2$ &&!0"2
	 	!!r'   c                   | j                   | j                  }}t        ||z        }|d   }|d   }|d   }|d   }	|d   }
t        j                  ||||df      }t        j
                  |d      }t        j                  |||z  |	z  |
||f      }t        j
                  |d	      }t        j                  ||||	|z  |
|z  f      }t        j
                  |d
      }|d   r%t        j                  j                  ||d   d      }|S )Nr   r  r  r!  r   r   r  r   r   r   r   r   r   r  r  r  r  )r  r  r    r]   r   r   r#  r$  )rK   r,  r-  r  r  r%  r   r  r  r+  r*  rW   s               r%   foldingzTFMobileViTLayer.foldingZ  s&   $($4$4d6G6G\|34
|,
Z(.$%9:#$78 ::g
JR'PQ<<|<::zH,/??R^`kl
 <<|<::z8-=-Lo`kNkl
 <<|<]#xxxi6LU_`Hr'   c                   | j                   r| j                  ||      }|}| j                  ||      }| j                  ||      }| j                  |      \  }}| j	                  ||      }| j                  |      }| j                  ||      }| j                  ||      }| j                  t        j                  ||gd      |      }|S )NrU   r   r   )r	  r
  r  r.  r  r  r1  r  r  r]   concat)rK   rW   rV   r}   r,  r-  s         r%   rY   zTFMobileViTLayer.callv  s    ""..x(.KH ==H====H== "^^H5 ""7X">..) <<3''8'D;;ryy(H)=BGRZ;[r'   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   5xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   AxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)	NTr
  r  r  r  r  r  r	  )r[   r\   r]   r^   r
  r5   r_   r  r  r  r   r  r  r	  ra   s     r%   r_   zTFMobileViTLayer.build  sQ   ::
4T*6t}}112 *##D)*4T*6t}}112 *##D)*4-9t//445 -  &&t,-4d+7t~~223 E$$dD$2B2B%CDE4*D1=t33889 1$$**40144(4t{{//0 (!!$'(4-t4@t66;;< 4''--d34 4 A#* ** *- -E E1 1( (4 4sT   J%J'?J4)K KK4K&J$'J14J>KKK#&K/r   )rL   r   rI   r    rJ   r    rM   r    r   r    r   r    rO   r    rd   re   )rW   rh   rd   zTuple[tf.Tensor, Dict])r,  rh   r-  r   rd   rh   rf   rg   ri   )
r?   rk   rl   r   r;   r.  r1  rY   r_   rm   rn   s   @r%   r  r    sv     ?'?' ?' 	?'
 ?' ?' ?' ?' 
?'B*"X824r'   r  c                  J     e Zd Zd fdZ	 	 	 d	 	 	 	 	 	 	 	 	 ddZddZ xZS )TFMobileViTEncoderc           
        t        |   di | || _        g | _        dx}}|j                  dk(  rd}d}n|j                  dk(  rd}d}t        ||j                  d   |j                  d   ddd      }| j                  j                  |       t        ||j                  d   |j                  d	   d	d
d      }| j                  j                  |       t        ||j                  d	   |j                  d
   d	|j                  d   d	d      }| j                  j                  |       |r|d	z  }t        ||j                  d
   |j                  d   d	|j                  d   d|d      }	| j                  j                  |	       |r|d	z  }t        ||j                  d   |j                  d   d	|j                  d	   d
|d      }
| j                  j                  |
       y )NFr   T   r   r   zlayer.0)rI   rJ   rM   r   r5   r   r   zlayer.1zlayer.2)rI   rJ   rM   r   r   r5      zlayer.3)rI   rJ   rM   r   r   rO   r5      zlayer.4r9   )
r:   r;   rL   r@   output_strider   neck_hidden_sizesr   r  hidden_sizes)rK   rL   rR   dilate_layer_4dilate_layer_5rO   layer_1layer_2layer_3layer_4layer_5r>   s              r%   r;   zTFMobileViTEncoder.__init__  s   "6" +0/1$!N!N!!R'!N+00311!4
 	7#+00311!4
 	7#"00311!4++A.
 	7#MH"00311!4++A.	
 	7#MH"00311!4++A.	
 	7#r'   c                    |rdnd }t        | j                        D ]  \  }} |||      }|s||fz   } |st        d ||fD              S t        ||      S )Nr9   rU   c              3  &   K   | ]	  }||  y wri   r9   ).0vs     r%   	<genexpr>z*TFMobileViTEncoder.call.<locals>.<genexpr>	  s     Xq!-Xs   )last_hidden_stater   )	enumerater@   tupler   )rK   r   output_hidden_statesreturn_dictrV   all_hidden_statesr   r   s           r%   rY   zTFMobileViTEncoder.call  ss     #7BD(5 	IOA|(JM#$58H$H!		I X]4E$FXXX =Pabbr'   c                    | j                   ry d| _         t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   IxY wr   r   r   s      r%   r_   zTFMobileViTEncoder.build  r   r   rL   r   rd   re   )FTF)
r   rh   rM  rc   rN  rc   rV   rc   rd   zUnion[tuple, TFBaseModelOutput]ri   rj   rn   s   @r%   r6  r6    sU    L$b &+ c c #c 	c
 c 
)c(-r'   r6  c                  b     e Zd ZeZdd fdZd Ze	 	 	 	 d	 	 	 	 	 	 	 	 	 dd       Zd	dZ	 xZ
S )
TFMobileViTMainLayerc                   t        |   di | || _        || _        t	        ||j
                  |j                  d   ddd      | _        t        |d      | _	        | j                  r/t	        ||j                  d   |j                  d	   d
d      | _
        t        j                  j                  dd      | _        y )Nr   r   r   	conv_stem)rI   rJ   r/   rM   r5   encoderr   r:     r   conv_1x1_exprt   channels_firstpooler)data_formatr5   r9   )r:   r;   rL   expand_outputr)   num_channelsr<  rU  r6  rV  rX  r   r@   GlobalAveragePooling2DrZ  )rK   rL   r\  rR   r>   s       r%   r;   zTFMobileViTMainLayer.__init__  s    "6"*-++11!4
 *&yA 4"44Q7#55a8#!D ll99FV]e9fr'   c                    t         )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        r   )rK   heads_to_prunes     r%   _prune_headsz!TFMobileViTMainLayer._prune_heads6  s
    
 "!r'   c           	     4   ||n| j                   j                  }||n| j                   j                  }t        j                  |d      }| j                  ||      }| j                  ||||      }| j                  r?| j                  |d         }t        j                  |g d      }| j                  |      }n |d   }t        j                  |g d      }d }|s[|||fn|f}	| j                  s>|dd  }
t        |
d   D cg c]  }t        j                  |d       c}      }
|
f}
|	|
z   S |	|dd  z   S |r1t        |d   D cg c]  }t        j                  |d       c}      }t        |||r      S |j                        S c c}w c c}w )	Nr0  r   rU   rM  rN  rV   r   r  r   )rJ  pooler_outputr   )rL   rM  use_return_dictr]   r   rU  rV  r\  rX  rZ  rL  r   r   )rK   pixel_valuesrM  rN  rV   embedding_outputencoder_outputsrJ  pooled_outputr   remaining_encoder_outputshr   s                r%   rY   zTFMobileViTMainLayer.call=  s    %9$D $++JjJj 	 &1%<k$++B]B]
 ||L|D>>,>J,,3GU`ks ' 
  $ 1 1/!2D E !#->\ R !KK(9:M / 2 "->\ R M;H;T'7[lZnF %%,;AB,?),1AZ[\A]^AR\\!,7^-) .G,H) 999 333  !_`Oa"b!2<<#E"bcM+/'+?-
 	
 FUEbEb
 	
 _ #cs   F
Fc                d   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Ot        j                  | j                  j
                        5  | j                  j                  g d       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   ~xY w# 1 sw Y   y xY w)NTrU  rV  rZ  NNNNrX  )
r[   r\   r]   r^   rU  r5   r_   rV  rZ  rX  ra   s     r%   r_   zTFMobileViTMainLayer.build{  sR   ::
4d+7t~~223 +$$T*+4D)5t||001 )""4()44(4t{{//0 <!!":;<4.:t00556 .!!''-. . ;+ +) )< <. .s0   F%F?FF&FFF#&F/TrL   r   r\  rc   NNNF
rf  tf.Tensor | NonerM  Optional[bool]rN  rs  rV   rc   rd   z5Union[Tuple[tf.Tensor], TFBaseModelOutputWithPooling]ri   )r?   rk   rl   r   config_classr;   ra  r   rY   r_   rm   rn   s   @r%   rS  rS    sk    "Lg6"  *./3&*;
&;
 -;
 $	;

 ;
 
?;
 ;
z.r'   rS  c                      e Zd ZdZeZdZdZy)TFMobileViTPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    	mobilevitrf  N)r?   rk   rl   r   r   rt  base_model_prefixmain_input_namer9   r'   r%   rv  rv    s    
 #L#$Or'   rv  a	  
    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `pixel_values` only and nothing else: `model(pixel_values)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([pixel_values, attention_mask])` or `model([pixel_values, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"pixel_values": pixel_values, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Parameters:
        config ([`MobileViTConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~TFPreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        pixel_values (`np.ndarray`, `tf.Tensor`, `List[tf.Tensor]`, `Dict[str, tf.Tensor]` or `Dict[str, np.ndarray]` and each example must have the shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`MobileViTImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
            used instead.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
            eager mode, in graph mode the value will always be set to True.
zWThe bare MobileViT model outputting raw hidden-states without any specific head on top.c            	           e Zd Zdd fdZe ee       eee	e
de      	 	 	 	 d	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )TFMobileViTModelc                n    t        |   |g|i | || _        || _        t	        ||d      | _        y )Nrw  r\  r5   )r:   r;   rL   r\  rS  rw  )rK   rL   r\  inputsrR   r>   s        r%   r;   zTFMobileViTModel.__init__  s:    3&3F3*-fMXcdr'   vision)
checkpointoutput_typert  modalityexpected_outputc                0    | j                  ||||      }|S rT   )rw  )rK   rf  rM  rN  rV   r   s         r%   rY   zTFMobileViTModel.call  s!      .BKZbcr'   c                    | j                   ry d| _         t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   y xY w)NTrw  )r[   r\   r]   r^   rw  r5   r_   ra   s     r%   r_   zTFMobileViTModel.build  si    ::
4d+7t~~223 +$$T*+ + 8+ +s   A11A:rn  ro  rp  rq  ri   )r?   rk   rl   r;   r   r   MOBILEVIT_INPUTS_DOCSTRINGr
   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPErY   r_   rm   rn   s   @r%   r{  r{    s    
e *+EF&0$. *./3&*& - $	
  
? G +r'   r{  z
    MobileViT model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                       e Zd Zd fdZe ee       eee	e
e      	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 dd                     ZddZ xZS )	!TFMobileViTForImageClassificationc                z   t        |   |g|i | |j                  | _        t        |d      | _        t
        j                  j                  |j                        | _	        |j                  dkD  r+t
        j                  j                  |j                  d      nt        j                  | _        || _        y )Nrw  r   r   
classifier)r:   r;   
num_labelsrS  rw  r   r@   r   classifier_dropout_probr   r   r]   identityr  rL   )rK   rL   r~  rR   r>   s       r%   r;   z*TFMobileViTForImageClassification.__init__  s    3&3F3 ++-f;G ||++F,J,JKHNHYHY\]H]ELLv00|Dcecncn 	 r'   )r  r  rt  r  c                R   ||n| j                   j                  }| j                  ||||      }|r|j                  n|d   }| j	                  | j                  ||            }|dn| j                  ||      }	|s|f|dd z   }
|	|	f|
z   S |
S t        |	||j                        S )a  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss). If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nrc  r   rU   )labelslogitsr   lossr  r   )	rL   re  rw  rd  r  r   hf_compute_lossr   r   )rK   rf  rM  r  rN  rV   outputsri  r  r  r   s              r%   rY   z&TFMobileViTForImageClassification.call  s    , &1%<k$++B]B]../CQ\go ! 
 2=--'!*mh!OP~t4+?+?vV\+?+]Y,F)-)9TGf$EvE54^e^s^sttr'   c                (   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       t        | j                  d      rht        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  d   g       d d d        y y y # 1 sw Y   xY w# 1 sw Y   y xY w)NTrw  r  r5   r   )r[   r\   r]   r^   rw  r5   r_   r`   r  rL   r<  ra   s     r%   r_   z'TFMobileViTForImageClassification.build5  s    ::
4d+7t~~223 +$$T*+4t,8t/]]4??#7#78 [OO))4t{{7T7TUW7X*YZ[ [ 0 9+ +[ [s   C<;6D<DDrQ  NNNNF)rf  rr  rM  rs  r  rr  rN  rs  rV   rs  rd   z4Union[tuple, TFImageClassifierOutputWithNoAttention]ri   )r?   rk   rl   r;   r   r   r  r
   _IMAGE_CLASS_CHECKPOINTr   r  _IMAGE_CLASS_EXPECTED_OUTPUTrY   r_   rm   rn   s   @r%   r  r    s     *+EF*:$4	 *./3#'&*#(u&u -u !	u
 $u !u 
>u G u>
[r'   r  c                  2     e Zd Zd fdZdddZddZ xZS )TFMobileViTASPPPoolingc           
         t        |   di | t        j                  j	                  dd      | _        t        |||ddddd      | _        y )	NTglobal_pool)keepdimsr5   r   relur  )rI   rJ   r/   rM   rP   rQ   r5   r9   )r:   r;   r   r@   r^  r  r)   r  )rK   rL   rI   rJ   rR   r>   s        r%   r;   zTFMobileViTASPPPooling.__init__C  sT    "6" <<>>S`>a,#%"!	
r'   c                    t        |      dd }| j                  |      }| j                  ||      }t        j                  j                  ||d      }|S )Nr   r   rU   r  r  )r   r  r  r]   r#  r$  )rK   rW   rV   spatial_sizes       r%   rY   zTFMobileViTASPPPooling.callS  sR    !(+Ab1##H-==H==88??8,z?Rr'   c                   | j                   ry d| _         t        | dd       Ot        j                  | j                  j
                        5  | j                  j                  g d       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr  rm  r  )r[   r\   r]   r^   r  r5   r_   r  ra   s     r%   r_   zTFMobileViTASPPPooling.buildZ  s    ::
4-9t//445 A  &&'?@A4T*6t}}112 *##D)* * 7A A* *s   C'CCC")rL   r   rI   r    rJ   r    rd   re   rf   rg   ri   rj   rn   s   @r%   r  r  B  s    
 	*r'   r  c                  6     e Zd ZdZd fdZdddZddZ xZS )	TFMobileViTASPPzs
    ASPP module defined in DeepLab papers: https://arxiv.org/abs/1606.00915, https://arxiv.org/abs/1706.05587
    c                   t        	|   di | |j                  d   }|j                  }t	        |j
                        dk7  rt        d      g | _        t        |||ddd      }| j                  j                  |       | j                  j                  t        |j
                        D cg c]  \  }}t        |||d|dd|dz    	       c}}       t        |||dt	        |j
                        dz    
      }| j                  j                  |       t        |d|z  |ddd      | _        t        j                  j!                  |j"                        | _        y c c}}w )Nr   z"Expected 3 values for atrous_ratesr   r  zconvs.0rw   zconvs.)rI   rJ   r/   rO   rQ   r5   r   r:  projectr9   )r:   r;   r<  aspp_out_channelslenatrous_ratesrB   convsr)   r   extendrK  r  r  r   r@   r   aspp_dropout_probr   )
rK   rL   rR   rI   rJ   in_projectionr   rate
pool_layerr>   s
            r%   r;   zTFMobileViTASPP.__init__k  sl   "6"..r2//v""#q(ABB
,#%!
 	

-(

  ))<)<= At % +!- !!#)!!a%)	
 ,KfSATAT=UXY=Y<Z4[

 	

*%+L(%!
 ||++F,D,DE9s   /"E$
c                
   t        j                  |g d      }g }| j                  D ]  }|j                   |||              t        j                  |d      }| j                  ||      }| j                  ||      }|S )Nr0  r   rU   r   r   )r]   r   r  r   r3  r  r   )rK   rW   rV   pyramidconvpooled_featuress         r%   rY   zTFMobileViTASPP.call  sy     <<|<JJ 	>DNN48<=	>))G"-,,w,B,,,Jr'   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   bxY w# 1 sw Y   UxY w)NTr  r  )r[   r\   r]   r^   r  r5   r_   r  )rK   rb   r  s      r%   r_   zTFMobileViTASPP.build  s    ::
4D)5t||001 )""4()4$'3

 %]]499- %JJt$% %% 4) )% %s   C*CCC	rQ  rf   rg   ri   r   rn   s   @r%   r  r  f  s    2Fh
%r'   r  c                  6     e Zd ZdZd fdZdddZddZ xZS )	TFMobileViTDeepLabV3zB
    DeepLabv3 architecture: https://arxiv.org/abs/1706.05587
    c           
         t        |   di | t        |d      | _        t        j
                  j                  |j                        | _        t        ||j                  |j                  ddddd      | _        y )	Nasppr   r   FTr  )rI   rJ   r/   rP   rQ   rN   r5   r9   )r:   r;   r  r  r   r@   r   r  r   r)   r  r  r  rK   rL   rR   r>   s      r%   r;   zTFMobileViTDeepLabV3.__init__  sm    "6"#F8	||++F,J,JK.00**# 	
r'   c                ~    | j                  |d   |      }| j                  ||      }| j                  ||      }|S )Nr   rU   )r  r   r  )rK   r   rV   rW   s       r%   rY   zTFMobileViTDeepLabV3.call  sB    99]2.9B<<8<<??8h??r'   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr  r  )r[   r\   r]   r^   r  r5   r_   r  ra   s     r%   r_   zTFMobileViTDeepLabV3.build  s    ::
4&2tyy~~. &		%&4t,8t334 ,%%d+, , 9& &, ,r   rQ  rf   r   ri   r   rn   s   @r%   r  r    s    
"	,r'   r  zX
    MobileViT model with a semantic segmentation head on top, e.g. for Pascal VOC.
    c                       e Zd Zd fdZd Ze ee       ee	e
      	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 dd                     Zd	dZ xZS )
"TFMobileViTForSemanticSegmentationc                    t        |   |fi | |j                  | _        t        |dd      | _        t        |d      | _        y )NFrw  r}  segmentation_headr   )r:   r;   r  rS  rw  r  r  r  s      r%   r;   z+TFMobileViTForSemanticSegmentation.__init__  sC    *6* ++-fEP[\!5fCV!Wr'   c                     t        |      dd  }t        j                  j                  ||d      }t        j
                  j                  dd       fd} |||      S )Nr   r  r  Tnone)from_logits	reductionc                    | |      }t        j                  | j                  j                  k7  |j                        }||z  }t        j
                  |      t        j
                  |      z  }t        j                  |d      S )Nr   r   )r]   r   rL   semantic_loss_ignore_indexr   
reduce_sumr   )realpredunmasked_lossmaskmasked_lossreduced_masked_lossloss_fctrK   s         r%   r  zGTFMobileViTForSemanticSegmentation.hf_compute_loss.<locals>.masked_loss  sp    $T40M7744;;#I#IIQ^QdQdeD'$.K #%--"<r}}T?R"R::1488r'   )r   r]   r#  r$  r   lossesSparseCategoricalCrossentropy)rK   r  r  label_interp_shapeupsampled_logitsr  r  s   `     @r%   r  z2TFMobileViTForSemanticSegmentation.hf_compute_loss  sa     (/388??68JS]?^<<==$Z`=a	9 6#344r'   )r  rt  c                   ||n| j                   j                  }||n| j                   j                  }|$| j                   j                  dkD  st	        d      | j                  |d||      }|r|j                  n|d   }| j                  ||      }d}	|| j                  ||      }	t        j                  |g d	      }|s|r
|f|dd z   }
n	|f|d
d z   }
|	|	f|
z   S |
S t        |	||r|j                        S d      S )aK  
        labels (`tf.Tensor` of shape `(batch_size, height, width)`, *optional*):
            Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1`, a classification loss is computed (Cross-Entropy).

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, TFMobileViTForSemanticSegmentation
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> image_processor = AutoImageProcessor.from_pretrained("apple/deeplabv3-mobilevit-small")
        >>> model = TFMobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small")

        >>> inputs = image_processor(images=image, return_tensors="tf")

        >>> outputs = model(**inputs)

        >>> # logits are of shape (batch_size, num_labels, height, width)
        >>> logits = outputs.logits
        ```Nr   z/The number of labels should be greater than oneTrc  rU   )r  r  r  r   r   r  )rL   rM  re  r  rB   rw  r   r  r  r]   r   r   )rK   rf  r  rM  rN  rV   r  encoder_hidden_statesr  r  r   s              r%   rY   z'TFMobileViTForSemanticSegmentation.call  sI   N %9$D $++JjJj 	 &1%<k$++B]B]dkk&<&<q&@NOO..!%#	 ! 
 :E 5 5'RS*''(='Q''vf'ED f<8# WQR[0 WQR[0)-)9TGf$EvE73G'//
 	
 NR
 	
r'   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTrw  r  )r[   r\   r]   r^   rw  r5   r_   r  ra   s     r%   r_   z(TFMobileViTForSemanticSegmentation.buildQ  s    ::
4d+7t~~223 +$$T*+4,d3?t55::; 3&&,,T23 3 @+ +3 3r   rQ  r  )rf  rr  r  rr  rM  rs  rN  rs  rV   rc   rd   z6Union[tuple, TFSemanticSegmenterOutputWithNoAttention]ri   )r?   rk   rl   r;   r  r   r   r  r   r   r  rY   r_   rm   rn   s   @r%   r  r    s    X5( *+EF+Sbqr *.#'/3&*I
&I
 !I
 -	I

 $I
 I
 
@I
 s G I
V	3r'   r  )r   N)r!   r    r"   r    r#   zOptional[int]rd   r    )Dr   
__future__r   typingr   r   r   r   
tensorflowr]   activations_tfr	   
file_utilsr
   r   r   r   modeling_tf_outputsr   r   r   r   modeling_tf_utilsr   r   r   r   r   tf_utilsr   r   utilsr   configuration_mobilevitr   
get_loggerr?   r<   r  r  r  r  r  r&   r@   Layerr)   rp   r   r   r   r   r   r   r   r   r  r6  rS  rv  MOBILEVIT_START_DOCSTRINGr  r{  r  r  r  r  r  r9   r'   r%   <module>r     s  " & " / /  /    3  4 
		H	% $ . '  2 1 JT5<<-- JTZ=,%,,"4"4 =,@$- 2 2 $-N@Au||11 @AFAELL.. A*.5<<-- .4Aell00 A0G** G,%K%,,"4"4 %KP-U\\// -:4u||)) 4Dj-++ j-Z r.5<<-- r. r.j%!2 %' R   ]!+1 !+	!+H  ?[(BD` ?[?[D!*U\\// !*HP%ell(( P%f%,5<<-- %,P  	s3)C s3s3r'   