
    sgG                   J   d Z ddlmZ ddlZddlmZ ddlmZmZm	Z	 ddl
ZddlZddlmZ ddlmZmZmZmZmZmZmZmZ dd	lmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% dd
l&m'Z'm(Z(m)Z) ddl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2  e/jf                  e4      Z5dZ6dZ7dZ8dZ9dZ:dZ;dZ<dZ=dZ>dZ?dZ@dZAdZB G d d      ZC G d de#j                  j                        ZF G d d e#j                  j                        ZH G d! d"e#j                  j                        ZIeHeId#ZJ G d$ d%e#j                  j                        ZK G d& d'e#j                  j                        ZL G d( d)e#j                  j                        ZM G d* d+e#j                  j                        ZN G d, d-e#j                  j                        ZO G d. d/e#j                  j                        ZP G d0 d1e#j                  j                        ZQ G d2 d3e#j                  j                        ZR G d4 d5e#j                  j                        ZS G d6 d7e#j                  j                        ZT G d8 d9e#j                  j                        ZU G d: d;e#j                  j                        ZV G d< d=e#j                  j                        ZW G d> d?e#j                  j                        ZX G d@ dAe#j                  j                        ZY G dB dCe#j                  j                        ZZe$ G dD dEe#j                  j                               Z[ G dF dGe      Z\e G dH dIe+             Z]dJZ^dKZ_ e-dLe^       G dM dNe\             Z` e-dOe^       G dP dQe\eC             Za e-dRe^       G dS dTe\e             Zb G dU dVe#j                  j                        Zc e-dWe^       G dX dYe\e             Zd e-dZe^       G d[ d\e\e              Ze e-d]e^       G d^ d_e\e             Zf e-d`e^       G da dbe\e             Zg e-dce^       G dd dee\e!             Zhy)fzTF 2.0 MobileBERT model.    )annotationsN)	dataclass)OptionalTupleUnion   )get_tf_activation)TFBaseModelOutputTFBaseModelOutputWithPoolingTFMaskedLMOutputTFMultipleChoiceModelOutputTFNextSentencePredictorOutputTFQuestionAnsweringModelOutputTFSequenceClassifierOutputTFTokenClassifierOutput)TFMaskedLanguageModelingLossTFModelInputTypeTFMultipleChoiceLossTFNextSentencePredictionLossTFPreTrainedModelTFQuestionAnsweringLossTFSequenceClassificationLossTFTokenClassificationLossget_initializerkeraskeras_serializableunpack_inputs)check_embeddings_within_bounds
shape_liststable_softmax)ModelOutputadd_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )MobileBertConfigzgoogle/mobilebert-uncasedr(   z"vumichien/mobilebert-finetuned-nerzK['I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'I-LOC', 'O', 'I-LOC', 'I-LOC']gQ?z%vumichien/mobilebert-uncased-squad-v2z'a nice puppet'gףp=
@      zvumichien/emo-mobilebertz'others'z4.72c                      e Zd ZdZddZy)TFMobileBertPreTrainingLossz
    Loss function suitable for BERT-like pretraining, that is, the task of pretraining a language model by combining
    NSP + MLM. .. note:: Any label of -100 will be ignored (along with the corresponding logits) in the loss
    computation.
    c                   t         j                  j                  dt         j                  j                  j                        } |t
        j                  j                  |d         |d         }t        j                  |d   dk7  |j                        }||z  }t        j                  |      t        j                  |      z  } |t
        j                  j                  |d         |d	         }t        j                  |d   dk7  |j                        }	||	z  }
t        j                  |
      t        j                  |	      z  }t        j                  ||z   d
      S )NT)from_logits	reductionlabelsr   )y_truey_predidtypenext_sentence_labelr'   )r'   )r   lossesSparseCategoricalCrossentropy	ReductionNONEtfnnrelucastr4   
reduce_sumreshape)selfr0   logitsloss_fnunmasked_lm_losseslm_loss_maskmasked_lm_lossesreduced_masked_lm_lossunmasked_ns_lossns_loss_maskmasked_ns_lossreduced_masked_ns_losss               h/var/www/html/venv/lib/python3.12/site-packages/transformers/models/mobilebert/modeling_tf_mobilebert.pyhf_compute_lossz+TFMobileBertPreTrainingLoss.hf_compute_loss_   s-   ,,<<Y^YeYeYoYoYtYt<u %BEEJJvh7G,HQWXYQZ[ wwvh/47?Q?W?WX-<!#/?!@2==Q]C^!^ #"%%**V<Q5R*S\bcd\efwwv&;<DL\LbLbc)L8!#~!>|A\!\zz03II4PP    N)r0   	tf.TensorrA   rN   returnrN   )__name__
__module____qualname____doc__rL    rM   rK   r,   r,   X   s    QrM   r,   c                  ,     e Zd Z fdZd ZddZ xZS )TFMobileBertIntermediatec                ,   t        |   di | t        j                  j	                  |j
                  d      | _        t        |j                  t              r"t        |j                        | _        || _        y |j                  | _        || _        y )NdensenamerT   )super__init__r   layersDenseintermediate_sizerX   
isinstance
hidden_actstrr	   intermediate_act_fnconfigr@   rd   kwargs	__class__s      rK   r\   z!TFMobileBertIntermediate.__init__u   sw    "6"\\''(@(@w'O
f''-'89J9J'KD$  (.'8'8D$rM   c                J    | j                  |      }| j                  |      }|S N)rX   rc   r@   hidden_statess     rK   callzTFMobileBertIntermediate.call   s&    

=100?rM   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY wNTrX   )	builtgetattrr:   
name_scoperX   rZ   buildrd   true_hidden_sizer@   input_shapes     rK   rr   zTFMobileBertIntermediate.build   s}    ::
4$'3tzz/ M

  $dkk.J.J!KLM M 4M M   3BBri   rP   rQ   rR   r\   rl   rr   __classcell__rg   s   @rK   rV   rV   t   s    	MrM   rV   c                  *     e Zd Z fdZd fd	Z xZS )TFLayerNormc                2    || _         t        |   |i | y ri   )	feat_sizer[   r\   )r@   r}   argsrf   rg   s       rK   r\   zTFLayerNorm.__init__   s    "$)&)rM   c                >    t         |   d d | j                  g       y ri   )r[   rr   r}   r@   ru   rg   s     rK   rr   zTFLayerNorm.build   s    tT4>>23rM   ri   )rP   rQ   rR   r\   rr   rx   ry   s   @rK   r{   r{      s    *4 4rM   r{   c                  2     e Zd Zd fd	Z fdZddZ xZS )TFNoNormc                2    t        |   di | || _        y )NrT   )r[   r\   r}   )r@   r}   epsilonrf   rg   s       rK   r\   zTFNoNorm.__init__   s    "6""rM   c                    | j                  d| j                  gd      | _        | j                  d| j                  gd      | _        t        |   |       y )Nbiaszeros)shapeinitializerweightones)
add_weightr}   r   r   r[   rr   r   s     rK   rr   zTFNoNorm.build   sK    OOF4>>2BPWOX	ooht~~6FTZo[k"rM   c                :    || j                   z  | j                  z   S ri   )r   r   )r@   inputss     rK   rl   zTFNoNorm.call   s    #dii//rM   ri   )r   rN   )rP   rQ   rR   r\   rr   rl   rx   ry   s   @rK   r   r      s    ##
0rM   r   )
layer_normno_normc                  2     e Zd ZdZ fdZddZddZ xZS )TFMobileBertEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                V   t        |   di | |j                  | _        |j                  | _        || _        |j
                  | _        |j                  | _        |j                  | _        t        j                  j                  |j
                  d      | _        t        |j                     |j
                  |j                  d      | _        t        j                  j!                  |j"                        | _        | j                  | j                  r
dz  | _        y dz  | _        y )	Nembedding_transformationrY   	LayerNormr   rZ   )rater   r'   rT   )r[   r\   trigram_inputembedding_sizerd   hidden_sizemax_position_embeddingsinitializer_ranger   r]   r^   r   NORM2FNnormalization_typelayer_norm_epsr   Dropouthidden_dropout_probdropoutembedded_input_sizere   s      rK   r\   zTFMobileBertEmbeddings.__init__   s    "6"#11$33!--'-'E'E$!'!9!9(-(:(:6;M;MTn(:(o% !!:!:;(=(=K
 ||++1K1K+L#'#6#6t?Q?Q!#Y WX#Y rM   c                   t        j                  d      5  | j                  d| j                  j                  | j
                  gt        | j                              | _        d d d        t        j                  d      5  | j                  d| j                  j                  | j                  gt        | j                              | _        d d d        t        j                  d      5  | j                  d| j                  | j                  gt        | j                              | _        d d d        | j                  ry d| _        t        | d	d       Zt        j                  | j                   j"                        5  | j                   j%                  d d | j&                  g       d d d        t        | d
d       Nt        j                  | j(                  j"                        5  | j(                  j%                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   OxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)Nword_embeddingsr   )r   )rZ   r   r   token_type_embeddings
embeddingsposition_embeddingsTr   r   )r:   rq   r   rd   
vocab_sizer   r   r   r   type_vocab_sizer   r   r   r   ro   rp   r   rZ   rr   r   r   rt   s     rK   rr   zTFMobileBertEmbeddings.build   s   ]],- 	//{{--t/B/BC+d>T>TU * DK	 ]]23 	)-!{{22D4D4DE+d>T>TU *9 *D&	 ]]01 	'+!33T5E5EF+d>T>TU (7 (D$	 ::
43T:Ft<<AAB \--33T4AYAY4Z[\4d+7t~~223 +$$T*+ + 87	 		 		 	\ \+ +s>   AHAH,AH))H5)IHH&)H25H>I
c           
     R   ||J |At        || j                  j                         t        j                  | j
                  |      }t        |      dd }|t        j                  |d      }| j                  rTt        j                  t        j                  |ddddf   d      |t        j                  |ddddf   d      gd	
      }| j                  s| j                  | j                  k7  r| j                  |      }|/t        j                  t        j                  d|d         d
      }t        j                  | j                   |      }t        j                  | j"                  |      }||z   |z   }	| j%                  |	      }	| j'                  |	|      }	|	S )z
        Applies embedding based on inputs tensor.

        Returns:
            final_embeddings (`tf.Tensor`): output embedding tensor.
        N)paramsindicesr   )dimsvaluer'   )r   r   )r   r'   r   )r   )r'   r   r      axis)startlimit)r   )r   training)r   rd   r   r:   gatherr   r   fillr   concatpadr   r   r   expand_dimsranger   r   r   r   )
r@   	input_idsposition_idstoken_type_idsinputs_embedsr   ru   position_embedstoken_type_embedsfinal_embeddingss
             rK   rl   zTFMobileBertEmbeddings.call   s    %-*?@@ *9dkk6L6LMIIT[[)LM /4!WW+Q?N IIFF=AB/1IJ!FF=CRC02JK
 M !4!48H8H!H 99-HM>>"((+b/*RYZ[L))4+C+C\ZIIT-G-GQ_`(?:=NN>>1A>B<</?(<SrM   ri   )NNNNF)rP   rQ   rR   rS   r\   rr   rl   rx   ry   s   @rK   r   r      s    QZ&+@/ rM   r   c                  6     e Zd Z fdZd Z	 ddZddZ xZS )TFMobileBertSelfAttentionc                   t        |   di | |j                  |j                  z  dk7  r%t	        d|j                   d|j                         |j                  | _        |j
                  | _        |j                  |j                  z  dk(  sJ t        |j                  |j                  z        | _        | j                  | j                  z  | _	        t        j                  j                  | j                  t        |j                        d      | _        t        j                  j                  | j                  t        |j                        d      | _        t        j                  j                  | j                  t        |j                        d      | _        t        j                  j%                  |j&                        | _        || _        y )	Nr   zThe hidden size (z6) is not a multiple of the number of attention heads (querykernel_initializerrZ   keyr   rT   )r[   r\   r   num_attention_heads
ValueErroroutput_attentionsintrs   attention_head_sizeall_head_sizer   r]   r^   r   r   r   r   r   r   attention_probs_dropout_probr   rd   re   s      rK   r\   z"TFMobileBertSelfAttention.__init__  s   "6" : ::a?#F$6$6#7 8 4457 
 $*#=#= !'!9!9!!F$>$>>!CCC#&v'>'>A[A['[#\ !558P8PP\\''?6C[C[3\cj ( 

 <<%%?6C[C[3\ch & 
 \\''?6C[C[3\cj ( 

 ||++F,O,OPrM   c                    t        j                  ||d| j                  | j                  f      }t        j                  |g d      S )Nr   r   r   r'   r   perm)r:   r?   r   r   	transpose)r@   x
batch_sizes      rK   transpose_for_scoresz.TFMobileBertSelfAttention.transpose_for_scores-  s8    JJq:r4+C+CTE]E]^_||AL11rM   c                   t        |      d   }| j                  |      }	| j                  |      }
| j                  |      }| j	                  |	|      }| j	                  |
|      }| j	                  ||      }t        j                  ||d      }t        j                  t        |      d   |j                        }|t
        j                  j                  |      z  }|&t        j                  ||j                        }||z   }t        |d      }| j                  ||      }|||z  }t        j                  ||      }t        j                  |g d	      }t        j                  ||d| j                  f      }|r||f}|S |f}|S )
Nr   T)transpose_br   r3   r   r   r   r   )r   r   r   r   r   r:   matmulr=   r4   mathsqrtr    r   r   r?   r   )r@   query_tensor
key_tensorvalue_tensorattention_mask	head_maskr   r   r   mixed_query_layermixed_key_layermixed_value_layerquery_layer	key_layervalue_layerattention_scoresdkattention_probscontext_layeroutputss                       rK   rl   zTFMobileBertSelfAttention.call2  s     /2
 JJ|4((:. JJ|4//0A:N--ozJ	//0A:N 99
 WWZ	*2.6F6L6LM+bggll2.>>%WW^;K;Q;QRN/.@ ))9C ,,,J  -	9O		/;?]F

JD,>,>?
 7H=/2 O\M]rM   c                v   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       t        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  r| j                  j                  n| j                  j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTr   r   r   )ro   rp   r:   rq   r   rZ   rr   rd   rs   r   r   use_bottleneck_attentionr   rt   s     rK   rr   zTFMobileBertSelfAttention.build_  sW   ::
4$'3tzz/ M

  $dkk.J.J!KLM4%1txx}}- KdDKK,H,HIJK4$'3tzz/ 	

  ;;?? 44![[44	 	 4M MK K	 	s%   3F<3F#-AF/F #F,/F8Fri   )rP   rQ   rR   r\   r   rl   rr   rx   ry   s   @rK   r   r     s    62 ns+ZrM   r   c                  .     e Zd Z fdZddZddZ xZS )TFMobileBertSelfOutputc                   t        |   di | |j                  | _        t        j                  j                  |j                  t        |j                        d      | _	        t        |j                     |j                  |j                  d      | _        | j                  s.t        j                  j                  |j                        | _        || _        y )NrX   r   r   r   rT   )r[   r\   use_bottleneckr   r]   r^   rs   r   r   rX   r   r   r   r   r   r   r   rd   re   s      rK   r\   zTFMobileBertSelfOutput.__init__w  s    "6"$33\\''##H`H`8aho ( 

 !!:!:;##V-B-B
 "" <<//0J0JKDLrM   c                    | j                  |      }| j                  s| j                  ||      }| j                  ||z         }|S Nr   )rX   r   r   r   )r@   rk   residual_tensorr   s       rK   rl   zTFMobileBertSelfOutput.call  sD    

=1"" LLLJM}'FGrM   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY wNTrX   r   
ro   rp   r:   rq   rX   rZ   rr   rd   rs   r   rt   s     rK   rr   zTFMobileBertSelfOutput.build      ::
4$'3tzz/ M

  $dkk.J.J!KLM4d+7t~~223 +$$T*+ + 8M M+ +   3C"<C."C+.C7r   ri   rw   ry   s   @rK   r   r   v  s    	+rM   r   c                  6     e Zd Z fdZd Z	 ddZddZ xZS )TFMobileBertAttentionc                l    t        |   di | t        |d      | _        t	        |d      | _        y )Nr@   rY   outputrT   )r[   r\   r   r@   r   mobilebert_outputre   s      rK   r\   zTFMobileBertAttention.__init__  s0    "6"-f6B	!7X!NrM   c                    t         ri   NotImplementedError)r@   headss     rK   prune_headsz!TFMobileBertAttention.prune_heads  s    !!rM   c	           	     v    | j                  |||||||      }	| j                  |	d   ||      }
|
f|	dd  z   }|S )Nr   r   r'   )r@   r  )r@   r   r   r   layer_inputr   r   r   r   self_outputsattention_outputr   s               rK   rl   zTFMobileBertAttention.call  s`     yy*lNIO`ks ! 
  11,q/;Ya1b#%QR(88rM   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr@   r  )ro   rp   r:   rq   r@   rZ   rr   r  rt   s     rK   rr   zTFMobileBertAttention.build  s    ::
4&2tyy~~. &		%&4,d3?t55::; 3&&,,T23 3 @& &3 3   C%CCC r   ri   )rP   rQ   rR   r\   r	  rl   rr   rx   ry   s   @rK   r  r    s    O
" &	3rM   r  c                  .     e Zd Z fdZddZddZ xZS )TFOutputBottleneckc                V   t        |   di | t        j                  j	                  |j
                  d      | _        t        |j                     |j
                  |j                  d      | _
        t        j                  j                  |j                        | _        || _        y NrX   rY   r   r   rT   )r[   r\   r   r]   r^   r   rX   r   r   r   r   r   r   r   rd   re   s      rK   r\   zTFOutputBottleneck.__init__  s    "6"\\''(:(:'I
 !:!:;(=(=K
 ||++F,F,FGrM   c                v    | j                  |      }| j                  ||      }| j                  ||z         }|S r   )rX   r   r   )r@   rk   r   r   layer_outputss        rK   rl   zTFOutputBottleneck.call  s;    

=1]XF}'FGrM   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY wr   r   rt   s     rK   rr   zTFOutputBottleneck.build  r   r   r   ri   rw   ry   s   @rK   r  r    s    	+rM   r  c                  .     e Zd Z fdZddZddZ xZS )TFMobileBertOutputc                   t        |   di | |j                  | _        t        j                  j                  |j                  t        |j                        d      | _	        t        |j                     |j                  |j                  d      | _        | j                  s6t        j                  j                  |j                        | _        || _        y t#        |d      | _        || _        y )NrX   r   r   r   
bottleneckrY   rT   )r[   r\   r   r   r]   r^   rs   r   r   rX   r   r   r   r   r   r   r   r  r  rd   re   s      rK   r\   zTFMobileBertOutput.__init__  s    "6"$33\\''##H`H`8aho ( 

 !!:!:;##V-B-B
 "" <<//0J0JKDL  1lKDOrM   c                    | j                  |      }| j                  s)| j                  ||      }| j                  ||z         }|S | j                  ||z         }| j	                  ||      }|S r   )rX   r   r   r   r  )r@   rk   residual_tensor_1residual_tensor_2r   s        rK   rl   zTFMobileBertOutput.call  st    

=1"" LLLJM NN=;L+LMM  !NN=;L+LMM OOM;LMMrM   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   qxY w# 1 sw Y   y xY w)NTrX   r   r  )ro   rp   r:   rq   rX   rZ   rr   rd   r_   r   r  rt   s     rK   rr   zTFMobileBertOutput.build  s   ::
4$'3tzz/ N

  $dkk.K.K!LMN4d+7t~~223 +$$T*+4t,8t334 ,%%d+, , 9N N+ +, ,s$   3D<<EE<EEEr   ri   rw   ry   s   @rK   r  r    s    ,rM   r  c                  ,     e Zd Z fdZd ZddZ xZS )TFBottleneckLayerc                    t        |   di | t        j                  j	                  |j
                  d      | _        t        |j                     |j
                  |j                  d      | _
        || _        y r  )r[   r\   r   r]   r^   intra_bottleneck_sizerX   r   r   r   r   rd   re   s      rK   r\   zTFBottleneckLayer.__init__  sg    "6"\\''(D(D7'S
 !:!:;((&2G2Gk
 rM   c                J    | j                  |      }| j                  |      }|S ri   rX   r   )r@   r   rk   s      rK   rl   zTFBottleneckLayer.call  s$    

6*}5rM   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY wr   
ro   rp   r:   rq   rX   rZ   rr   rd   r   r   rt   s     rK   rr   zTFBottleneckLayer.build      ::
4$'3tzz/ H

  $dkk.E.E!FGH4d+7t~~223 +$$T*+ + 8H H+ +r   ri   rw   ry   s   @rK   r   r         
	+rM   r   c                  ,     e Zd Z fdZd ZddZ xZS )TFBottleneckc                    t        |   di | |j                  | _        |j                  | _        t	        |d      | _        | j                  rt	        |d      | _        y y )NinputrY   	attentionrT   )r[   r\   key_query_shared_bottleneckr   r   bottleneck_inputr-  re   s      rK   r\   zTFBottleneck.__init__  sZ    "6"+1+M+M((.(G(G% 1&w G++.vKHDN ,rM   c                    | j                  |      }| j                  r|fdz  S | j                  r| j                  |      }||||fS ||||fS )N   )r/  r   r.  r-  )r@   rk   bottlenecked_hidden_statesshared_attention_inputs       rK   rl   zTFBottleneck.call'  se    " &*%:%:=%I"((.0144--%)^^M%B"*,BMSmnn!=-A[\\rM   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr/  r-  )ro   rp   r:   rq   r/  rZ   rr   r-  rt   s     rK   rr   zTFBottleneck.buildA  s    ::
4+T2>t4499: 2%%++D124d+7t~~223 +$$T*+ + 82 2+ +r  ri   rw   ry   s   @rK   r*  r*    s    I]4	+rM   r*  c                  ,     e Zd Z fdZd ZddZ xZS )TFFFNOutputc                    t        |   di | t        j                  j	                  |j
                  d      | _        t        |j                     |j
                  |j                  d      | _
        || _        y r  )r[   r\   r   r]   r^   rs   rX   r   r   r   r   rd   re   s      rK   r\   zTFFFNOutput.__init__N  sg    "6"\\''(?(?g'N
 !:!:;##V-B-B
 rM   c                P    | j                  |      }| j                  ||z         }|S ri   r$  )r@   rk   r   s      rK   rl   zTFFFNOutput.callV  s)    

=1}'FGrM   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY wr   )
ro   rp   r:   rq   rX   rZ   rr   rd   r_   r   rt   s     rK   rr   zTFFFNOutput.build[  s    ::
4$'3tzz/ N

  $dkk.K.K!LMN4d+7t~~223 +$$T*+ + 8N N+ +r   ri   rw   ry   s   @rK   r6  r6  M  r(  rM   r6  c                  ,     e Zd Z fdZd ZddZ xZS )
TFFFNLayerc                l    t        |   di | t        |d      | _        t	        |d      | _        y )NintermediaterY   r  rT   )r[   r\   rV   r=  r6  r  re   s      rK   r\   zTFFFNLayer.__init__h  s1    "6"4V.Q!,V(!CrM   c                L    | j                  |      }| j                  ||      }|S ri   )r=  r  )r@   rk   intermediate_outputr  s       rK   rl   zTFFFNLayer.callm  s,    "//>../BMRrM   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr=  r  )ro   rp   r:   rq   r=  rZ   rr   r  rt   s     rK   rr   zTFFFNLayer.buildr  s    ::
4.:t00556 .!!''-.4,d3?t55::; 3&&,,T23 3 @. .3 3r  ri   rw   ry   s   @rK   r;  r;  g  s    D

	3rM   r;  c                  .     e Zd Z fdZddZddZ xZS )TFMobileBertLayerc                   t        |   di | |j                  | _        |j                  | _        t	        |d      | _        t        |d      | _        t        |d      | _	        | j                  rt        |d      | _        |j                  dkD  r:t        |j                  dz
        D cg c]  }t        |d|        c}| _        y y c c}w )	Nr-  rY   r=  r  r  r'   zffn.rT   )r[   r\   r   num_feedforward_networksr  r-  rV   r=  r  r  r*  r  r   r;  ffnr@   rd   rf   irg   s       rK   r\   zTFMobileBertLayer.__init__  s    "6"$33(.(G(G%.vKH4V.Q!3F!J*6EDO**Q.EJ6KjKjmnKnEop
6$qc
;pDH /ps   0Cc           
        | j                   r| j                  |      \  }}}}	n|gdz  \  }}}}	| j                  ||||	||||      }
|
d   }|f}| j                  dk7  r+t	        | j
                        D ]  \  }} ||      }||fz  } | j                  |      }| j                  ||||      }|f|
dd  z   t        j                  d      ||||	||fz   |z   }|S )Nr1  r   r   r'   )
r   r  r-  rD  	enumeraterE  r=  r  r:   constant)r@   rk   r   r   r   r   r   r   r   r  attention_outputsr  srG  
ffn_moduler?  layer_outputr   s                     rK   rl   zTFMobileBertLayer.call  sD   BF//R_B`?L*lKCP/TUBU?L*lK NN + 	
 -Q/((A-!*488!4 ):#-.>#? &(() #//0@A--.ACSUbmu-v O#$ A #
  	 rM   c                (   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   qxY w# 1 sw Y   $xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   {xY w)NTr-  r=  r  r  rE  )ro   rp   r:   rq   r-  rZ   rr   r=  r  r  rE  r@   ru   layers      rK   rr   zTFMobileBertLayer.build  s   ::
4d+7t~~223 +$$T*+4.:t00556 .!!''-.4,d3?t55::; 3&&,,T234t,8t334 ,%%d+,4%1 &]]5::. &KK%& && 2+ +. .3 3, ,& &s<   G%G#?G0G<8HG #G-0G9<HH	r   ri   rw   ry   s   @rK   rB  rB  ~  s    q+Z&rM   rB  c                  0     e Zd Z fdZ	 ddZddZ xZS )TFMobileBertEncoderc                    t        |   di | |j                  | _        |j                  | _        t	        |j
                        D cg c]  }t        |d|        c}| _        y c c}w )Nzlayer_._rY   rT   )r[   r\   r   output_hidden_statesr   num_hidden_layersrB  rQ  rF  s       rK   r\   zTFMobileBertEncoder.__init__  s^    "6"!'!9!9$*$?$?!NSTZTlTlNmn'xs^Dn
ns   
A*c                    |rdnd }|rdnd }	t        | j                        D ].  \  }
}|r||fz   } |||||
   ||      }|d   }|s&|	|d   fz   }	0 |r||fz   }|st        d |||	fD              S t        |||	      S )NrT   r   r   r'   c              3  &   K   | ]	  }||  y wri   rT   ).0vs     rK   	<genexpr>z+TFMobileBertEncoder.call.<locals>.<genexpr>  s     hqZ[Zghs   )last_hidden_staterk   
attentions)rI  rQ  tupler
   )r@   rk   r   r   r   rU  return_dictr   all_hidden_statesall_attentionsrG  layer_moduler  s                rK   rl   zTFMobileBertEncoder.call  s     #7BD0d(4 	FOA|#$58H$H!(~y|=NYaM *!,M !/=3C2E!E	F   1]4D Dh]4E~$Vhhh +;LYg
 	
rM   c                    | j                   ry d| _         t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   IxY w)NTrQ  )ro   rp   rQ  r:   rq   rZ   rr   rP  s      rK   rr   zTFMobileBertEncoder.build  sp    ::
4$'3 &]]5::. &KK%& && 4& &s   A..A7	r   ri   rw   ry   s   @rK   rS  rS    s    o !
F&rM   rS  c                  ,     e Zd Z fdZd ZddZ xZS )TFMobileBertPoolerc                    t        |   di | |j                  | _        | j                  rEt        j
                  j                  |j                  t        |j                        dd      | _
        || _        y )NtanhrX   )r   
activationrZ   rT   )r[   r\   classifier_activationdo_activater   r]   r^   r   r   r   rX   rd   re   s      rK   r\   zTFMobileBertPooler.__init__  sk    "6"!77++""#263K3K#L!	 , DJ rM   c                V    |d d df   }| j                   s|S | j                  |      }|S Nr   )rj  rX   )r@   rk   first_token_tensorpooled_outputs       rK   rl   zTFMobileBertPooler.call  s7     +1a40%% JJ'9:M  rM   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY wrn   )	ro   rp   r:   rq   rX   rZ   rr   rd   r   rt   s     rK   rr   zTFMobileBertPooler.build  s}    ::
4$'3tzz/ H

  $dkk.E.E!FGH H 4H Hrv   ri   rw   ry   s   @rK   re  re    s    
!HrM   re  c                  ,     e Zd Z fdZd ZddZ xZS )#TFMobileBertPredictionHeadTransformc                   t        |   di | t        j                  j	                  |j
                  t        |j                        d      | _        t        |j                  t              rt        |j                        | _        n|j                  | _        t        d   |j
                  |j                  d      | _        || _        y )NrX   r   r   r   r   rT   )r[   r\   r   r]   r^   r   r   r   rX   r`   ra   rb   r	   transform_act_fnr   r   r   rd   re   s      rK   r\   z,TFMobileBertPredictionHeadTransform.__init__$  s    "6"\\''?6C[C[3\cj ( 

 f''-$5f6G6G$HD!$*$5$5D! .v/A/A6K`K`grsrM   c                l    | j                  |      }| j                  |      }| j                  |      }|S ri   )rX   rs  r   rj   s     rK   rl   z(TFMobileBertPredictionHeadTransform.call0  s4    

=1--m<}5rM   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY wr   r&  rt   s     rK   rr   z)TFMobileBertPredictionHeadTransform.build6  r'  r   ri   rw   ry   s   @rK   rq  rq  #  s    
	+rM   rq  c                  D     e Zd Z fdZddZd Zd Zd Zd Zd Z	 xZ
S )	TFMobileBertLMPredictionHeadc                V    t        |   di | t        |d      | _        || _        y )N	transformrY   rT   )r[   r\   rq  ry  rd   re   s      rK   r\   z%TFMobileBertLMPredictionHead.__init__C  s(    "6"<V+VrM   c                   | j                  | j                  j                  fddd      | _        | j                  | j                  j                  | j                  j
                  z
  | j                  j                  fddd      | _        | j                  | j                  j                  | j                  j
                  fddd      | _        | j                  ry d| _        t        | dd       Nt        j                  | j                  j                        5  | j                  j                  d        d d d        y y # 1 sw Y   y xY w)Nr   Tr   )r   r   	trainablerZ   zdense/weightzdecoder/weightry  )r   rd   r   r   r   r   rX   decoderro   rp   r:   rq   ry  rZ   rr   rt   s     rK   rr   z"TFMobileBertLMPredictionHead.buildH  s   OO4;;+A+A*CQXdhouOv	__;;**T[[-G-GGI_I_`	 % 

 ;;))4;;+E+EF!	 ' 
 ::
4d+7t~~223 +$$T*+ + 8+ +s   D??Ec                    | S ri   rT   r@   s    rK   get_output_embeddingsz2TFMobileBertLMPredictionHead.get_output_embeddings^  s    rM   c                L    || _         t        |      d   | j                  _        y rl  )r|  r   rd   r   r@   r   s     rK   set_output_embeddingsz2TFMobileBertLMPredictionHead.set_output_embeddingsa  s    !+E!21!5rM   c                    d| j                   iS )Nr   )r   r~  s    rK   get_biasz%TFMobileBertLMPredictionHead.get_biase  s    		""rM   c                X    |d   | _         t        |d         d   | j                  _        y )Nr   r   )r   r   rd   r   r  s     rK   set_biasz%TFMobileBertLMPredictionHead.set_biash  s'    &M	!+E&M!:1!=rM   c                    | j                  |      }t        j                  |t        j                  t        j                  | j
                        | j                  gd            }|| j                  z   }|S )Nr   r   )ry  r:   r   r   r   r|  rX   r   rj   s     rK   rl   z!TFMobileBertLMPredictionHead.calll  sY    }5		-BLL<VX\XbXb;cjk1lm%		1rM   ri   )rP   rQ   rR   r\   rr   r  r  r  r  rl   rx   ry   s   @rK   rw  rw  B  s&    
+,6#>rM   rw  c                  ,     e Zd Z fdZd ZddZ xZS )TFMobileBertMLMHeadc                H    t        |   di | t        |d      | _        y )NpredictionsrY   rT   )r[   r\   rw  r  re   s      rK   r\   zTFMobileBertMLMHead.__init__t  s"    "6"7]SrM   c                (    | j                  |      }|S ri   r  )r@   sequence_outputprediction_scoress      rK   rl   zTFMobileBertMLMHead.callx  s     ,,_=  rM   c                    | j                   ry d| _         t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   y xY w)NTr  )ro   rp   r:   rq   r  rZ   rr   rt   s     rK   rr   zTFMobileBertMLMHead.build|  sm    ::
4-9t//445 -  &&t,- - :- -   A11A:ri   rw   ry   s   @rK   r  r  s  s    T!-rM   r  c                  d     e Zd ZeZd fd	Zd Zd Zd Ze		 	 	 	 	 	 	 	 	 	 dd       Z
d	dZ xZS )
TFMobileBertMainLayerc                :   t        |   di | || _        |j                  | _        |j                  | _        |j
                  | _        |j                  | _        t        |d      | _	        t        |d      | _        |rt        |d      | _        y d | _        y )Nr   rY   encoderpoolerrT   )r[   r\   rd   rV  r   rU  use_return_dictr_  r   r   rS  r  re  r  )r@   rd   add_pooling_layerrf   rg   s       rK   r\   zTFMobileBertMainLayer.__init__  s    "6"!'!9!9!'!9!9$*$?$?!!110lK*6	BCT(h?Z^rM   c                    | j                   S ri   )r   r~  s    rK   get_input_embeddingsz*TFMobileBertMainLayer.get_input_embeddings  s    rM   c                `    || j                   _        t        |      d   | j                   _        y rl  )r   r   r   r   r  s     rK   set_input_embeddingsz*TFMobileBertMainLayer.set_input_embeddings  s$    !&%/%6q%9"rM   c                    t         )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        r  )r@   heads_to_prunes     rK   _prune_headsz"TFMobileBertMainLayer._prune_heads  s
    
 "!rM   c           	     `   ||t        d      |t        |      }n|t        |      d d }nt        d      |t        j                  |d      }|t        j                  |d      }| j	                  |||||
      }t        j
                  ||d   dd|d   f      }t        j                  ||j                        }t        j                  d|j                        }t        j                  d	|j                        }t        j                  t        j                  ||      |      }|t        d g| j                  z  }| j                  ||||||	|
      }|d   }| j                  | j                  |      nd }|	s
||f|dd  z   S t        |||j                   |j"                  
      S )NzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embedsr'   r   r   r3   g      ?g     )r\  pooler_outputrk   r]  )r   r   r:   r   r   r?   r=   r4   rJ  multiplysubtractr  rV  r  r  r   rk   r]  )r@   r   r   r   r   r   r   r   rU  r_  r   ru   embedding_outputextended_attention_maskone_cstten_thousand_cstencoder_outputsr  rn  s                      rK   rl   zTFMobileBertMainLayer.call  s     ]%>cdd"$Y/K&$]3CR8KTUU!WW[!4N!WW[!4N??9lNTalt?u #%**^k!naQRT_`aTb=c"d #%''*AIYI_I_"`++c)9)?)?@;;x7G7M7MN"$++bkk'CZ.[]m"n  %%!7!77I,,#  ' 
 *!,8<8OO4UY  #$ $
 ,-')77&11	
 	
rM   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   qxY w# 1 sw Y   y xY w)NTr   r  r  )	ro   rp   r:   rq   r   rZ   rr   r  r  rt   s     rK   rr   zTFMobileBertMainLayer.build  s   ::
4t,8t334 ,%%d+,4D)5t||001 )""4()44(4t{{//0 (!!$'( ( 5, ,) )( ($   D%%D1?D=%D.1D:=E)T
NNNNNNNNNFri   )rP   rQ   rR   r(   config_classr\   r  r  r  r   rl   rr   rx   ry   s   @rK   r  r    sY    #L_:"  !Q
 Q
f(rM   r  c                      e Zd ZdZeZdZy)TFMobileBertPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    
mobilebertN)rP   rQ   rR   rS   r(   r  base_model_prefixrT   rM   rK   r  r    s    
 $L$rM   r  c                  X    e Zd ZU dZdZded<   dZded<   dZded<   dZded	<   dZ	ded
<   y) TFMobileBertForPreTrainingOutputaE  
    Output type of [`TFMobileBertForPreTraining`].

    Args:
        prediction_logits (`tf.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
        seq_relationship_logits (`tf.Tensor` of shape `(batch_size, 2)`):
            Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation
            before SoftMax).
        hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings + one for the output of each layer) of shape
            `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nztf.Tensor | NonelossrN   prediction_logitsseq_relationship_logitszTuple[tf.Tensor] | Nonerk   r]  )
rP   rQ   rR   rS   r  __annotations__r  r  rk   r]  rT   rM   rK   r  r    s@    , "D
!#'y')-Y--1M*1*.J'.rM   r  a	  

    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `input_ids` only and nothing else: `model(input_ids)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([input_ids, attention_mask])` or `model([input_ids, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"input_ids": input_ids, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Parameters:
        config ([`MobileBertConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`Numpy array` or `tf.Tensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.__call__`] and
            [`PreTrainedTokenizer.encode`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`Numpy array` or `tf.Tensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`Numpy array` or `tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`tf.Tensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail. This argument can be used only in eager mode, in graph mode the value in the
            config will be used instead.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail. This argument can be used only in eager mode, in graph mode the value in the config will be
            used instead.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. This argument can be used in
            eager mode, in graph mode the value will always be set to True.
        training (`bool`, *optional*, defaults to `False`):
            Whether or not to use the model in training mode (some modules like dropout modules have different
            behaviors between training and evaluation).
zdThe bare MobileBert Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd Z fdZe eej                  d             ee	e
e      	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd                     ZddZ xZS )	TFMobileBertModelc                P    t        |   |g|i | t        |d      | _        y )Nr  rY   )r[   r\   r  r  r@   rd   r   rf   rg   s       rK   r\   zTFMobileBertModel.__init__  s(    3&3F3/\JrM   batch_size, sequence_length
checkpointoutput_typer  c                <    | j                  |||||||||	|

      }|S )N)
r   r   r   r   r   r   r   rU  r_  r   )r  )r@   r   r   r   r   r   r   r   rU  r_  r   r   s               rK   rl   zTFMobileBertModel.call  s<    ( //))%'/!5# " 
 rM   c                    | j                   ry d| _         t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   y xY w)NTr  )ro   rp   r:   rq   r  rZ   rr   rt   s     rK   rr   zTFMobileBertModel.build  si    ::
4t,8t334 ,%%d+, , 9, ,r  r  )r   TFModelInputType | Noner   np.ndarray | tf.Tensor | Noner   r  r   r  r   r  r   r  r   Optional[bool]rU  r  r_  r  r   r  rO   z*Union[Tuple, TFBaseModelOutputWithPooling]ri   )rP   rQ   rR   r\   r   r$   MOBILEBERT_INPUTS_DOCSTRINGformatr"   _CHECKPOINT_FOR_DOCr   _CONFIG_FOR_DOCrl   rr   rx   ry   s   @rK   r  r    s    
K *+F+M+MNk+lm&0$ .28<8<6:377;,0/3&*#(* 6 6	
 4 1 5 * - $ ! 
4 n 8,rM   r  z
    MobileBert Model with two heads on top as done during the pretraining: a `masked language modeling` head and a
    `next sentence prediction (classification)` head.
    c                       e Zd Z fdZd Zd Ze eej                  d             e
ee      	 	 	 	 	 	 	 	 	 	 	 	 d		 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d
d                     ZddZd Z xZS )TFMobileBertForPreTrainingc                    t        |   |g|i | t        |d      | _        t	        |d      | _        t        |d      | _        y )Nr  rY   predictions___clsseq_relationship___cls)r[   r\   r  r  r  r  TFMobileBertOnlyNSPHeadseq_relationshipr  s       rK   r\   z#TFMobileBertForPreTraining.__init__  sH    3&3F3/\J.v<OP 7E] ^rM   c                .    | j                   j                   S ri   r  r~  s    rK   get_lm_headz&TFMobileBertForPreTraining.get_lm_head      +++rM   c                    t        j                  dt               | j                  dz   | j                  j                  z   dz   | j                  j                  j                  z   S NzMThe method get_prefix_bias_name is deprecated. Please use `get_bias` instead./)warningswarnFutureWarningrZ   r  r~  s    rK   get_prefix_bias_namez/TFMobileBertForPreTraining.get_prefix_bias_name  sM    egtuyy3!1!1!6!66<t?O?O?[?[?`?```rM   r  r  r  c                H   | j                  |||||||||	|
      }|dd \  }}| j                  |      }| j                  |      }d}|
 |d|
i}||d<   | j                  |||f      }|	s||f|dd z   }||f|z   S |S t	        ||||j
                  |j                        S )a9  
        Return:

        Examples:

        ```python
        >>> import tensorflow as tf
        >>> from transformers import AutoTokenizer, TFMobileBertForPreTraining

        >>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
        >>> model = TFMobileBertForPreTraining.from_pretrained("google/mobilebert-uncased")
        >>> input_ids = tf.constant(tokenizer.encode("Hello, my dog is cute"))[None, :]  # Batch size 1
        >>> outputs = model(input_ids)
        >>> prediction_scores, seq_relationship_scores = outputs[:2]
        ```	r   r   r   r   r   r   rU  r_  r   Nr   r0   r5   r0   rA   )r  r  r  rk   r]  )r  r  r  rL   r  rk   r]  )r@   r   r   r   r   r   r   r   rU  r_  r0   r5   r   r   r  rn  r  seq_relationship_score
total_lossd_labelsr  s                        rK   rl   zTFMobileBertForPreTraining.call  s   B //))%'/!5# " 
 *1!& ,,_=!%!6!6}!E
"5"A &)H.AH*+--XGXZpFq-rJ')?@712;NF/9/EZMF*Q6Q//$:!//))
 	
rM   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   qxY w# 1 sw Y   y xY w)NTr  r  r  )	ro   rp   r:   rq   r  rZ   rr   r  r  rt   s     rK   rr   z TFMobileBertForPreTraining.build   s
   ::
4t,8t334 ,%%d+,4-9t//445 -  &&t,-4+T2>t4499: 2%%++D12 2 ?, ,- -2 2r  c                    |dk(  r|dfS |fS Nzcls.predictions.decoder.weightz,mobilebert.embeddings.word_embeddings.weightrT   r@   	tf_weights     rK   tf_to_pt_weight_renamez1TFMobileBertForPreTraining.tf_to_pt_weight_rename.      88LLL<rM   NNNNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r   r  r   r  rU  r  r_  r  r0   r  r5   r  r   r  rO   z.Union[Tuple, TFMobileBertForPreTrainingOutput]ri   )rP   rQ   rR   r\   r  r  r   r$   r  r  r&   r  r  rl   rr   r  rx   ry   s   @rK   r  r    s   _,a *+F+M+MNk+lm+KZij .28<8<6:377;,0/3&*04=A#(?
*?
 6?
 6	?

 4?
 1?
 5?
 *?
 -?
 $?
 .?
 ;?
 !?
 
8?
 k n ?
B2 rM   r  z8MobileBert Model with a `language modeling` head on top.c            	           e Zd Zg dZ fdZd Zd Ze ee	j                  d             eeeedd      	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd	                     Zdd
Zd Z xZS )TFMobileBertForMaskedLM)r  r  cls.seq_relationshipc                v    t        |   |g|i | t        |dd      | _        t	        |d      | _        y )NFr  r  rZ   r  rY   )r[   r\   r  r  r  r  r  s       rK   r\   z TFMobileBertForMaskedLM.__init__>  s;    3&3F3/%Vbc.v<OPrM   c                .    | j                   j                   S ri   r  r~  s    rK   r  z#TFMobileBertForMaskedLM.get_lm_headD  r  rM   c                    t        j                  dt               | j                  dz   | j                  j                  z   dz   | j                  j
                  j                  z   S r  )r  r  r  rZ   mlmr  r~  s    rK   r  z,TFMobileBertForMaskedLM.get_prefix_bias_nameG  sG    egtuyy3.4txx7K7K7P7PPPrM   r  z'paris'g=
ףp=?r  r  r  expected_outputexpected_lossc                   | j                  |||||||||	|
      }|d   }| j                  ||      }|
dn| j                  |
|      }|	s|f|dd z   }||f|z   S |S t        |||j                  |j
                        S )az  
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels
        r  r   r   Nr   r  rA   rk   r]  )r  r  rL   r   rk   r]  )r@   r   r   r   r   r   r   r   rU  r_  r0   r   r   r  r  r  r  s                    rK   rl   zTFMobileBertForMaskedLM.callK  s    : //))%'/!5# " 
 "!* ,,_x,P~t4+?+?HY+Z')GABK7F)-)9TGf$EvE$!//))	
 	
rM   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr  r  )ro   rp   r:   rq   r  rZ   rr   r  rt   s     rK   rr   zTFMobileBertForMaskedLM.build  s    ::
4t,8t334 ,%%d+,4-9t//445 -  &&t,- - :, ,- -r  c                    |dk(  r|dfS |fS r  rT   r  s     rK   r  z.TFMobileBertForMaskedLM.tf_to_pt_weight_rename  r  rM   NNNNNNNNNNF)r   r  r   r  r   r  r   r  r   r  r   r  r   r  rU  r  r_  r  r0   r  r   r  rO   zUnion[Tuple, TFMaskedLMOutput]ri   )rP   rQ   rR   "_keys_to_ignore_on_load_unexpectedr\   r  r  r   r$   r  r  r"   r  r   r  rl   rr   r  rx   ry   s   @rK   r  r  5  s   *&Q,Q *+F+M+MNk+lm&$$! .28<8<6:377;,0/3&*04#(.
*.
 6.
 6	.

 4.
 1.
 5.
 *.
 -.
 $.
 ..
 !.
 
(.
 n .
`	- rM   r  c                  ,     e Zd Z fdZd ZddZ xZS )r  c                ~    t        |   di | t        j                  j	                  dd      | _        || _        y )Nr   r  rY   rT   )r[   r\   r   r]   r^   r  rd   re   s      rK   r\   z TFMobileBertOnlyNSPHead.__init__  s7    "6" % 2 21;M 2 NrM   c                (    | j                  |      }|S ri   )r  )r@   rn  r  s      rK   rl   zTFMobileBertOnlyNSPHead.call  s    !%!6!6}!E%%rM   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY w)NTr  )	ro   rp   r:   rq   r  rZ   rr   rd   r   rt   s     rK   rr   zTFMobileBertOnlyNSPHead.build  s    ::
4+T2>t4499: S%%++T49P9P,QRS S ?S Srv   ri   rw   ry   s   @rK   r  r    s    
&SrM   r  zPMobileBert Model with a `next sentence prediction (classification)` head on top.c                       e Zd ZddgZ fdZe eej                  d             e	e
e      	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )%TFMobileBertForNextSentencePredictionr  cls.predictionsc                t    t        |   |g|i | t        |d      | _        t	        |d      | _        y )Nr  rY   r  )r[   r\   r  r  r  clsr  s       rK   r\   z.TFMobileBertForNextSentencePrediction.__init__  s7    3&3F3/\J*68PQrM   r  r  c                   | j                  |||||||||	|
      }|d   }| j                  |      }|
dn| j                  |
|      }|	s|f|dd z   }||f|z   S |S t        |||j                  |j
                        S )a  
        Return:

        Examples:

        ```python
        >>> import tensorflow as tf
        >>> from transformers import AutoTokenizer, TFMobileBertForNextSentencePrediction

        >>> tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")
        >>> model = TFMobileBertForNextSentencePrediction.from_pretrained("google/mobilebert-uncased")

        >>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
        >>> next_sentence = "The sky is blue due to the shorter wavelength of blue light."
        >>> encoding = tokenizer(prompt, next_sentence, return_tensors="tf")

        >>> logits = model(encoding["input_ids"], token_type_ids=encoding["token_type_ids"])[0]
        ```r  r'   Nr  r   r  )r  r  rL   r   rk   r]  )r@   r   r   r   r   r   r   r   rU  r_  r5   r   r   rn  seq_relationship_scoresnext_sentence_lossr  s                    rK   rl   z*TFMobileBertForNextSentencePrediction.call  s    F //))%'/!5# " 
  
"&((="9 #* %%-@I`%a 	 -/'!"+=F7I7U')F2a[aa,#*!//))	
 	
rM   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr  r  )ro   rp   r:   rq   r  rZ   rr   r  rt   s     rK   rr   z+TFMobileBertForNextSentencePrediction.build  s    ::
4t,8t334 ,%%d+,4%1txx}}- %t$% % 2, ,% %r  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rU  r  r_  r  r5   r  r   r  rO   z+Union[Tuple, TFNextSentencePredictorOutput]ri   )rP   rQ   rR   r  r\   r   r$   r  r  r&   r   r  rl   rr   rx   ry   s   @rK   r  r    s     +?@R)S&R *+F+M+MNk+lm+HWfg .28<8<6:377;,0/3&*=A#(>
*>
 6>
 6	>

 4>
 1>
 5>
 *>
 ->
 $>
 ;>
 !>
 
5>
 h n >
@	%rM   r  z
    MobileBert Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c            	           e Zd Zg dZdgZ fdZe eej                  d             e
eeeee      	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )%TFMobileBertForSequenceClassificationr  r  r  r  r   c                   t        |   |g|i | |j                  | _        t        |d      | _        |j
                  |j
                  n|j                  }t        j                  j                  |      | _
        t        j                  j                  |j                  t        |j                        d      | _        || _        y )Nr  rY   
classifierr   r[   r\   
num_labelsr  r  classifier_dropoutr   r   r]   r   r   r^   r   r   r
  rd   r@   rd   r   rf   r  rg   s        rK   r\   z.TFMobileBertForSequenceClassification.__init__  s    3&3F3 ++/\J)/)B)B)NF%%TZTnTn 	 ||++,>?,,,,/&BZBZ2[bn - 
 rM   r  r  c                &   | j                  |||||||||	|
      }|d   }| j                  ||      }| j                  |      }|
dn| j                  |
|      }|	s|f|dd z   }||f|z   S |S t	        |||j
                  |j                        S )a  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        r  r'   r   Nr   r  )r  r   r
  rL   r   rk   r]  )r@   r   r   r   r   r   r   r   rU  r_  r0   r   r   rn  rA   r  r  s                    rK   rl   z*TFMobileBertForSequenceClassification.call%  s    : //))%'/!5# " 
  
]XF/~t4+?+?+OY,F)-)9TGf$EvE)!//))	
 	
rM   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY wNTr  r
  
ro   rp   r:   rq   r  rZ   rr   r
  rd   r   rt   s     rK   rr   z+TFMobileBertForSequenceClassification.build`      ::
4t,8t334 ,%%d+,4t,8t334 M%%tT4;;3J3J&KLM M 9, ,M M   C"%3C."C+.C7r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rU  r  r_  r  r0   r  r   r  rO   z(Union[Tuple, TFSequenceClassifierOutput]ri   )rP   rQ   rR   r  _keys_to_ignore_on_load_missingr\   r   r$   r  r  r"   '_CHECKPOINT_FOR_SEQUENCE_CLASSIFICATIONr   r  _SEQ_CLASS_EXPECTED_OUTPUT_SEQ_CLASS_EXPECTED_LOSSrl   rr   rx   ry   s   @rK   r  r    s   *& (2l# *+F+M+MNk+lm:.$2. .28<8<6:377;,0/3&*04#(0
*0
 60
 6	0

 40
 10
 50
 *0
 -0
 $0
 .0
 !0
 
20
 n 0
d	MrM   r  z
    MobileBert Model with a span classification head on top for extractive question-answering tasks like SQuAD (a
    linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
    c                       e Zd Zg dZ fdZe eej                  d             e	e
eeeeee      	 	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd                     Zd	dZ xZS )
 TFMobileBertForQuestionAnsweringr  r  r  r  r  c                
   t        |   |g|i | |j                  | _        t        |dd      | _        t
        j                  j                  |j                  t        |j                        d      | _
        || _        y )NFr  r  
qa_outputsr   )r[   r\   r  r  r  r   r]   r^   r   r   r  rd   r  s       rK   r\   z)TFMobileBertForQuestionAnswering.__init__}  su    3&3F3 ++/%Vbc,,,,/&BZBZ2[bn - 
 rM   r  )r  r  r  qa_target_start_indexqa_target_end_indexr  r  c                   | j                  |||||||||	|
      }|d   }| j                  |      }t        j                  |dd      \  }}t        j                  |d      }t        j                  |d      }d}|
||
|d}| j                  |||f      }|	s||f|dd z   }||f|z   S |S t        ||||j                  |j                        S )	a  
        start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        r  r   r   r   r   N)start_positionend_position)r  start_logits
end_logitsrk   r]  )	r  r  r:   splitsqueezerL   r   rk   r]  )r@   r   r   r   r   r   r   r   rU  r_  start_positionsend_positionsr   r   r  rA   r#  r$  r  r0   r  s                        rK   rl   z%TFMobileBertForQuestionAnswering.call  s   H //))%'/!5# " 
 "!*1#%88FAB#? jzz,R8ZZ
4
&=+D(7WF''z0JKD"J/'!"+=F)-)9TGf$EvE-%!!//))
 	
rM   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY w)NTr  r  )
ro   rp   r:   rq   r  rZ   rr   r  rd   r   rt   s     rK   rr   z&TFMobileBertForQuestionAnswering.build  r  r  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rU  r  r_  r  r'  r  r(  r  r   r  rO   z,Union[Tuple, TFQuestionAnsweringModelOutput]ri   )rP   rQ   rR   r  r\   r   r$   r  r  r"   _CHECKPOINT_FOR_QAr   r  _QA_TARGET_START_INDEX_QA_TARGET_END_INDEX_QA_EXPECTED_OUTPUT_QA_EXPECTED_LOSSrl   rr   rx   ry   s   @rK   r  r  l  s   *& *+F+M+MNk+lm%2$40+' .28<8<6:377;,0/3&*9=7;#(;
*;
 6;
 6	;

 4;
 1;
 5;
 *;
 -;
 $;
 7;
 5;
 !;
 
6;
 n ;
z	MrM   r  z
    MobileBert Model with a multiple choice classification head on top (a linear layer on top of the pooled output and
    a softmax) e.g. for RocStories/SWAG tasks.
    c                       e Zd Zg dZdgZ fdZe eej                  d             e
eee      	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )TFMobileBertForMultipleChoicer  r   c                .   t        |   |g|i | t        |d      | _        t        j
                  j                  |j                        | _        t        j
                  j                  dt        |j                        d      | _        || _        y )Nr  rY   r'   r
  r   )r[   r\   r  r  r   r]   r   r   r   r^   r   r   r
  rd   r  s       rK   r\   z&TFMobileBertForMultipleChoice.__init__  s{    3&3F3/\J||++F,F,FG,,,,/&2J2J"KR^ - 
 rM   z(batch_size, num_choices, sequence_lengthr  c                   |t        |      d   }t        |      d   }nt        |      d   }t        |      d   }|t        j                  |d|f      nd}|t        j                  |d|f      nd}|t        j                  |d|f      nd}|t        j                  |d|f      nd}|%t        j                  |d|t        |      d   f      nd}| j                  |||||||||	|
      }|d   }| j	                  ||      }| j                  |      }t        j                  |d|f      }|
dn| j                  |
|      }|	s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )	a5  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ..., num_choices]`
            where `num_choices` is the size of the second dimension of the input tensors. (See `input_ids` above)
        Nr'   r   r   r   )r_  r   r   r  )
r   r:   r?   r  r   r
  rL   r   rk   r]  )r@   r   r   r   r   r   r   r   rU  r_  r0   r   num_choices
seq_lengthflat_input_idsflat_attention_maskflat_token_type_idsflat_position_idsflat_inputs_embedsr   rn  rA   reshaped_logitsr  r  s                            rK   rl   z"TFMobileBertForMultipleChoice.call  s   8  $Y/2K#I.q1J$]3A6K#M215JDMDYIJ/?@_cN\Nhbjj"j9IJnrN\Nhbjj"j9IJnrJVJbBJJ|b*5EFhl ( JJ}r:z-7PQR7S&TU 	
 // # " 
  
]XF/**Vb+->?~t4+?+?+X%''!"+5F)-)9TGf$EvE*"!//))	
 	
rM   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY wr  r  rt   s     rK   rr   z#TFMobileBertForMultipleChoice.build@  r  r  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rU  r  r_  r  r0   r  r   r  rO   z)Union[Tuple, TFMultipleChoiceModelOutput]ri   )rP   rQ   rR   r  r  r\   r   r$   r  r  r"   r  r   r  rl   rr   rx   ry   s   @rK   r0  r0    s   *& (2l# *#**+UV  &/$ .28<8<6:377;,0/3&*04#(?
*?
 6?
 6	?

 4?
 1?
 5?
 *?
 -?
 $?
 .?
 !?
 
3?
 ?
B	MrM   r0  z
    MobileBert Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g.
    for Named-Entity-Recognition (NER) tasks.
    c            	           e Zd Zg dZdgZ fdZe eej                  d             e
eeeee      	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	d                     Zd
dZ xZS )"TFMobileBertForTokenClassificationr  r   c                   t        |   |g|i | |j                  | _        t        |dd      | _        |j
                  |j
                  n|j                  }t        j                  j                  |      | _
        t        j                  j                  |j                  t        |j                        d      | _        || _        y )NFr  r  r
  r   r  r  s        rK   r\   z+TFMobileBertForTokenClassification.__init__^  s    3&3F3 ++/%Vbc)/)B)B)NF%%TZTnTn 	 ||++,>?,,,,/&BZBZ2[bn - 
 rM   r  r  c                &   | j                  |||||||||	|
      }|d   }| j                  ||      }| j                  |      }|
dn| j                  |
|      }|	s|f|dd z   }||f|z   S |S t	        |||j
                  |j                        S )z
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        r  r   r   Nr   r  )r  r   r
  rL   r   rk   r]  )r@   r   r   r   r   r   r   r   rU  r_  r0   r   r   r  rA   r  r  s                    rK   rl   z'TFMobileBertForTokenClassification.calll  s    6 //))%'/!5# " 
 "!*,,,J1~t4+?+?+OY,F)-)9TGf$EvE&!//))	
 	
rM   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY wr  r  rt   s     rK   rr   z(TFMobileBertForTokenClassification.build  r  r  r  )r   r  r   r  r   r  r   r  r   r  r   r  r   r  rU  r  r_  r  r0   r  r   r  rO   z%Union[Tuple, TFTokenClassifierOutput]ri   )rP   rQ   rR   r  r  r\   r   r$   r  r  r"   $_CHECKPOINT_FOR_TOKEN_CLASSIFICATIONr   r  _TOKEN_CLASS_EXPECTED_OUTPUT_TOKEN_CLASS_EXPECTED_LOSSrl   rr   rx   ry   s   @rK   r=  r=  L  s   *& (2l# *+F+M+MNk+lm7+$40 .28<8<6:377;,0/3&*04#(.
*.
 6.
 6	.

 4.
 1.
 5.
 *.
 -.
 $.
 ..
 !.
 
/.
 n .
`	MrM   r=  )irS   
__future__r   r  dataclassesr   typingr   r   r   numpynp
tensorflowr:   activations_tfr	   modeling_tf_outputsr
   r   r   r   r   r   r   r   modeling_tf_utilsr   r   r   r   r   r   r   r   r   r   r   r   tf_utilsr   r   r    utilsr!   r"   r#   r$   r%   r&   configuration_mobilebertr(   
get_loggerrP   loggerr  r  rA  rB  rC  r*  r-  r.  r+  r,  r  r  r  r,   r]   LayerrV   LayerNormalizationr{   r   r   r   r   r   r  r  r  r   r*  r6  r;  rB  rS  re  rq  rw  r  r  r  r  MOBILEBERT_START_DOCSTRINGr  r  r  r  r  r  r  r  r0  r=  rT   rM   rK   <module>rU     s     "  ! ) )   /	 	 	    S R  7 
		H	%1 $ (L $l !  = '     +E '' ! Q Q8Mu||11 M64%,,11 40u||!! 0 %
:e U\\// e Pb 2 2 bJ+U\\// +B%3ELL.. %3P+++ +8&,++ &,R+** +4,+5<<%% ,+^+%,,$$ +43## 3.N&** N&b1&%,,,, 1&hH++ H@+%,,*<*< +>.5<<#5#5 .b-%,,,, -$ ~(ELL.. ~( ~(B%"3 % /{ / /<( T5 p j.,3 .,	.,b  d !<>Y d d N TVpq] 9;W ]  r] @Sell00 S& ZV%,GIe V%	V%r  \M,GIe \M\M~  eM'BD[ eMeMP  gM$?AU gMgMT  [M)DF_ [M[MrM   