
    sg+                      d Z ddlmZ ddlZddlZddlmZmZmZm	Z	 ddl
ZddlmZ ddlmZmZmZmZ ddlmZmZmZmZmZmZmZmZ dd	lmZ dd
lmZm Z m!Z! ddl"m#Z# dZ$g dg dgZ%g dg dg dgg dg dg dggZ&dZ' G d dejP                  jR                        Z* G d dejP                  jR                        Z+ G d dejP                  jR                        Z, G d dejP                  jR                        Z- G d d ejP                  jR                        Z. G d! d"ejP                  jR                        Z/ G d# d$ejP                  jR                        Z0 G d% d&ejP                  jR                        Z1 G d' d(ejP                  jR                        Z2e G d) d*ejP                  jR                               Z3 G d+ d,e      Z4d-Z5d.Z6 ed/e5       G d0 d1e4             Z7 G d2 d3ejP                  jR                        Z8 ed4e5       G d5 d6e4e             Z9 ed7e5       G d8 d9e4e             Z: ed:e5       G d; d<e4e             Z;y)=zTF 2.0 LayoutLMv3 model.    )annotationsN)ListOptionalTupleUnion   )get_tf_activation)TFBaseModelOutputTFQuestionAnsweringModelOutputTFSequenceClassifierOutputTFTokenClassifierOutput)TFPreTrainedModelTFQuestionAnsweringLossTFSequenceClassificationLossTFTokenClassificationLossget_initializerkeraskeras_serializableunpack_inputs)check_embeddings_within_bounds)add_start_docstrings%add_start_docstrings_to_model_forwardreplace_return_docstrings   )LayoutLMv3Configr   )      r   )r      r   )r   r   r      )   r   r      )	   
         )            )            )            g    חc                  4     e Zd ZdZd fdZddZddZ xZS )TFLayoutLMv3PatchEmbeddingsz$LayoutLMv3 image (patch) embeddings.c                   t        |   d	i | t        |j                  t        j
                  j                        r|j                  n|j                  |j                  f}t        j                  j                  |j                  ||dddt        |j                        d      | _        |j                  | _
        |j                  dz  |d   |d   z  z  | _        || _        y )
Nvalidchannels_lastTproj)filterskernel_sizestridespaddingdata_formatuse_biaskernel_initializernamer   r   r    )super__init__
isinstance
patch_sizecollectionsabcIterabler   layersConv2Dhidden_sizer   initializer_ranger7   
input_sizenum_patchesconfig)selfrN   kwargspatch_sizes	__class__s       h/var/www/html/venv/lib/python3.12/site-packages/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.pyrB   z$TFLayoutLMv3PatchEmbeddings.__init__B   s    "6" &++[__-E-EF ##V%6%67 	
 LL''&&#'.v/G/GH ( 	
	 "--"--q0k!n{ST~6UV    c                    t        j                  |g d      }| j                  |      }t        j                  |d| j                  | j
                  f      }|S )N)r   r   r   r   perm)tf	transposer7   reshaperM   rJ   )rO   pixel_values
embeddingss      rS   callz TFLayoutLMv3PatchEmbeddings.callW   sK     ||L|DYY|,
ZZ
R1A1A4CSCS,TU
rT   c                *   | j                   ry d| _         t        | dd       ft        j                  | j                  j
                        5  | j                  j                  d d d | j                  j                  g       d d d        y y # 1 sw Y   y xY w)NTr7   )	builtgetattrrY   
name_scoper7   r?   buildrN   num_channelsrO   input_shapes     rS   rc   z!TFLayoutLMv3PatchEmbeddings.build`   s}    ::
4&2tyy~~. N		tT4;;3K3K LMN N 3N Ns   4B		BrN   r   r\   	tf.Tensorreturnri   N__name__
__module____qualname____doc__rB   r^   rc   __classcell__rR   s   @rS   r3   r3   ?   s    .*NrT   r3   c                  |     e Zd ZdZd	 fdZd
dZddZddZddZ	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	ddZ
 xZS )TFLayoutLMv3TextEmbeddingszm
    LayoutLMv3 text embeddings. Same as `RobertaEmbeddings` but with added spatial (layout) embeddings.
    c                   t        |   di | t        j                  j	                  |j
                  |j                  t        |j                        d      | _	        t        j                  j	                  |j                  |j                  t        |j                        d      | _        t        j                  j                  |j                  d      | _        t        j                  j                  |j                         | _        |j$                  | _        t        j                  j	                  |j(                  |j                  t        |j                        d      | _        t        j                  j	                  |j,                  |j.                  t        |j                        d      | _        t        j                  j	                  |j,                  |j.                  t        |j                        d      | _        t        j                  j	                  |j,                  |j4                  t        |j                        d	      | _        t        j                  j	                  |j,                  |j4                  t        |j                        d
      | _        |j,                  | _        || _        y )Nword_embeddings)embeddings_initializerr?   token_type_embeddings	LayerNormepsilonr?   position_embeddingsx_position_embeddingsy_position_embeddingsh_position_embeddingsw_position_embeddingsr@   )rA   rB   r   rH   	Embedding
vocab_sizerJ   r   rK   rv   type_vocab_sizerx   LayerNormalizationlayer_norm_epsry   Dropouthidden_dropout_probdropoutpad_token_idpadding_token_indexmax_position_embeddingsr|   max_2d_position_embeddingscoordinate_sizer}   r~   
shape_sizer   r   max_2d_positionsrN   rO   rN   rP   rR   s      rS   rB   z#TFLayoutLMv3TextEmbeddings.__init__n   s;   "6"$||55#263K3K#L"	  6  
 &+\\%;%;""#263K3K#L(	 &< &
" 88AVAV]h8i||++F,F,FG#)#6#6 #(<<#9#9**#263K3K#L&	 $: $
  &+\\%;%;--""#263K3K#L(	 &< &
" &+\\%;%;--""#263K3K#L(	 &< &
" &+\\%;%;--#263K3K#L(	 &< &
" &+\\%;%;--#263K3K#L(	 &< &
" !' A ArT   c           	        	 |d d d d df   }|d d d d df   }|d d d d df   }|d d d d df   }	 | j                  |      }| j                  |      }| j                  |      }	| j                  |      }
| j                  dz
  }| j	                  t        j                  |d d d d df   |d d d d df   z
  d|            }| j                  t        j                  |d d d d df   |d d d d df   z
  d|            }t        j                  |||	|
||gd	      }|S # t         $ r}t        d      |d }~ww xY w# t         $ r}t        d| j                   d      |d }~ww xY w)
Nr   r   r   r   z9Bounding box is not of shape (batch_size, seq_length, 4).z0The `bbox` coordinate values should be within 0-z range.rX   axis)	
IndexErrorr}   r~   r   r   rY   clip_by_valuer   concat)rO   bboxleft_position_idsupper_position_idsright_position_idslower_position_ids	exceptionleft_position_embeddingsupper_position_embeddingsright_position_embeddingslower_position_embeddingsmax_position_idr   r   spatial_position_embeddingss                  rS   %calculate_spatial_position_embeddingsz@TFLayoutLMv3TextEmbeddings.calculate_spatial_position_embeddings   s   	i $Q1W!%aAg!%aAg!%aAg	'+'A'ABS'T$(,(B(BCU(V%(,(B(BCU(V%(,(B(BCU(V% //!3 $ : :T!Q']T!Q']:AO!
 !% : :T!Q']T!Q']:AO!

 ')ii()))%% 
'
# +*C  	iXY_hh	i  	B4CXCXBYY`a	s/   0D AD< 	D9(D44D9<	E$EE$c                2   t        j                  |      }|d   }| j                  dz   }| j                  |z   dz   }t        j                  ||t         j                        }|d   }t        j
                  |d|f      }t        j                  ||df      }|S )z
        We are provided embeddings directly. We cannot infer which are padded, so just generate sequential position
        ids.
        r   dtyper   )rY   shaper   rangeint32r[   tile)rO   inputs_embdsrf   sequence_lengthstart_index	end_indexposition_ids
batch_sizes           rS   &create_position_ids_from_inputs_embedszATFLayoutLMv3TextEmbeddings.create_position_ids_from_inputs_embeds   s    
 hh|,%a...2,,>B	xxYbhhG ^
zz,O0DEww|j!_=rT   c                    t        j                  t        j                  || j                        |j                        }t        j
                  |d      |z  }|| j                  z   }|S )z}
        Replace non-padding symbols with their position numbers. Position numbers begin at padding_token_index + 1.
        r   r   )rY   cast	not_equalr   r   cumsum)rO   	input_idsmaskr   s       rS   "create_position_ids_from_input_idsz=TFLayoutLMv3TextEmbeddings.create_position_ids_from_input_ids   sT     wwr||It/G/GH)//ZyyA.5#d&>&>>rT   c                J    || j                  |      S | j                  |      S rk   )r   r   )rO   r   inputs_embedss      rS   create_position_idsz.TFLayoutLMv3TextEmbeddings.create_position_ids   s*    >>}MM::9EErT   c                   || j                  ||      }|t        j                  |      }nt        j                  |      d d }|!t        j                  ||j                        }|1t        || j                  j                         | j                  |      }| j                  |      }||z   }	| j                  |      }
|	|
z  }	| j                  |      }|	|z  }	| j                  |	      }	| j                  |	|      }	|	S )NrX   r   training)r   rY   r   zerosr   r   rv   	input_dimrx   r|   r   ry   r   )rO   r   r   token_type_idsr   r   r   rf   rx   r]   r|   r   s               rS   r^   zTFLayoutLMv3TextEmbeddings.call   s    33I}ML ((9-K((=1#26K!XXk9K9KLN *9d6J6J6T6TU 00;M $ : :> J"%::
"66|D))
&*&P&PQU&V#11
^^J/
\\*x\@
rT   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | d	d       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   LxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   NxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)
NTrv   rx   ry   r|   r}   r~   r   r   )r`   ra   rY   rb   rv   r?   rc   rx   ry   rN   rJ   r|   r}   r~   r   r   re   s     rS   rc   z TFLayoutLMv3TextEmbeddings.build  s   ::
4*D1=t33889 1$$**40140$7Ct99>>? 7**00674d+7t~~223 L$$dD$++2I2I%JKL4.5At77<<= 5((..t4540$7Ct99>>? 7**006740$7Ct99>>? 7**006740$7Ct99>>? 7**006740$7Ct99>>? 7**0067 7 D)1 17 7L L5 57 77 77 77 7s`   K>%L?3L0L%
L2$L?>MM>LLL"%L/2L<?M	MM!rg   )r   ri   rj   ri   )r   ri   rj   ri   )r   ri   rj   ri   )r   ri   r   ri   rj   ri   )NNNNNF)r   tf.Tensor | Noner   ri   r   r   r   r   r   r   r   boolrj   ri   rk   )rm   rn   ro   rp   rB   r   r   r   r   r^   rc   rq   rr   s   @rS   rt   rt   i   s    0d'+RF '++/)-*.### # )	#
 '# (# # 
#J7rT   rt   c                  h     e Zd Zd fdZddZdd	dZ	 	 	 d
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZ xZS )TFLayoutLMv3SelfAttentionc                   t        |   d	i | |j                  |j                  z  dk7  r&t	        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _        t        j                  | j                        | _
        t        j                  j                  | j                  t        |j                        d      | _        t        j                  j                  | j                  t        |j                        d      | _        t        j                  j                  | j                  t        |j                        d      | _        t        j                  j'                  |j(                        | _        |j,                  | _        |j.                  | _        || _        y )
Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()queryr>   r?   keyvaluer@   )rA   rB   rJ   num_attention_heads
ValueErrorintattention_head_sizeall_head_sizemathsqrtattention_score_normaliserr   rH   Denser   rK   r   r   r   r   attention_probs_dropout_probr   has_relative_attention_biashas_spatial_attention_biasrN   r   s      rS   rB   z"TFLayoutLMv3SelfAttention.__init__+  s   "6" : ::a?#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PP*.))D4L4L*M'\\''.v/G/GH ( 


 <<%%.v/G/GH & 

 \\''.v/G/GH ( 

 ||++F,O,OP+1+M+M(*0*K*K'rT   c                    t        j                  |      }|d   |d   | j                  | j                  f}t        j                  ||      }t        j
                  |g d      S )Nr   r   r   r   r   r   rV   )rY   r   r   r   r[   rZ   )rO   xr   	new_shapes       rS   transpose_for_scoresz.TFLayoutLMv3SelfAttention.transpose_for_scoresM  sY    !H!H$$$$	
	 JJq)$||AL11rT   c                    ||z  }t        j                  t        j                  |d      d      }||z
  |z  }t         j                  j	                  |d      S )a  
        https://arxiv.org/abs/2105.13290 Section 2.4 Stabilization of training: Precision Bottleneck Relaxation
        (PB-Relax). A replacement of the original keras.layers.Softmax(axis=-1)(attention_scores). Seems the new
        attention_probs will result in a slower speed and a little bias. Can use
        tf.debugging.assert_near(standard_attention_probs, cogview_attention_probs, atol=1e-08) for comparison. The
        smaller atol (e.g., 1e-08), the better.
        rX   r   )rY   expand_dims
reduce_maxr   softmax)rO   attention_scoresalphascaled_attention_scores	max_valuenew_attention_scoress         rS   cogview_attentionz+TFLayoutLMv3SelfAttention.cogview_attentionX  sT     #3U":NN2==1Hr#RY[\	 7) CuLww3"==rT   c                .   | j                  | j                  |            }| j                  | j                  |            }	| j                  | j                  |            }
|
| j                  z  }t        j                  |g d      }t        j                  ||      }| j                  r"| j                  r|||z   | j                  z  z  }n| j                  r||| j                  z  z  }|||z  }| j                  |      }| j                  ||      }|||z  }t        j                  ||	      }t        j                  |g d      }t        j                  |      }t        j                  ||d   |d   | j                  f      }|r||f}|S |f}|S )N)r   r   r   r   rV   r   r   r   r   )r   r   r   r   r   rY   rZ   matmulr   r   r   r   r   r[   r   )rO   hidden_statesattention_mask	head_maskoutput_attentionsrel_pos
rel_2d_posr   	key_layervalue_layerquery_layernormalised_query_layertransposed_key_layerr   attention_probscontext_layerr   outputss                     rS   r^   zTFLayoutLMv3SelfAttention.calle  s    --dhh}.EF	//

=0IJ//

=0IJ "-t/N/N!N!||L 
 99%;=QR++0O0O:!59X9X XX--$*I*I II%. 001AB,,,J  -	9O		/;?
 '

E!HeAh0B0BC
 7H=/2 O\M]rT   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTr   r   r   )r`   ra   rY   rb   r   r?   rc   rN   rJ   r   r   re   s     rS   rc   zTFLayoutLMv3SelfAttention.build  s9   ::
4$'3tzz/ H

  $dkk.E.E!FGH4%1txx}}- FdDKK,C,CDEF4$'3tzz/ H

  $dkk.E.E!FGH H 4H HF FH Hs$   3E*<3E6-3F*E36E?Frg   )r   ri   )    )r   ri   r   zUnion[float, int]NNFr   ri   r   r   r   r   r   r   r   r   r   r   r   r   rj   z4Union[Tuple[tf.Tensor], Tuple[tf.Tensor, tf.Tensor]]rk   )	rm   rn   ro   rB   r   r   r^   rc   rq   rr   s   @rS   r   r   *  sx     D	2>& %)'+3 3 )3 $	3
  3 "3 %3 3 
>3jHrT   r   c                  2     e Zd Zd fdZdddZddZ xZS )TFLayoutLMv3SelfOutputc                x   t        |   di | t        j                  j	                  |j
                  t        |j                        d      | _        t        j                  j                  |j                  d      | _        t        j                  j                  |j                        | _        || _        y Ndenseunitsr>   r?   ry   rz   )rater@   rA   rB   r   rH   r   rJ   r   rK   r   r   r   ry   r   r   r   rN   r   s      rS   rB   zTFLayoutLMv3SelfOutput.__init__      "6"\\''$$IaIa9bip ( 

 88AVAV]h8i||++1K1K+LrT   c                z    | j                  |      }| j                  ||      }| j                  ||z         }|S Ninputs)r	  r   r   r   ry   rO   r   input_tensorr   s       rS   r^   zTFLayoutLMv3SelfOutput.call  ?    

-
8MHMml.JKrT   c                "   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY wNTr   ry   )
r`   ra   rY   rb   r   r?   rc   rN   rJ   ry   re   s     rS   rc   zTFLayoutLMv3SelfOutput.build  s    ::
4$'3tzz/ H

  $dkk.E.E!FGH4d+7t~~223 L$$dD$++2I2I%JKL L 8H HL L   3C9<3D9DDrg   Fr   ri   r  ri   r   r   rj   ri   rk   rm   rn   ro   rB   r^   rc   rq   rr   s   @rS   r   r         	LrT   r   c                  V     e Zd Zd fdZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZ xZS )TFLayoutLMv3Attentionc                l    t        |   di | t        |d      | _        t	        |d      | _        y )NrO   r?   outputr@   )rA   rB   r   self_attentionr   self_outputr   s      rS   rB   zTFLayoutLMv3Attention.__init__  s1    "6"7VL1&xHrT   c           	     v    | j                  |||||||      }| j                  |d   ||      }	|	f|dd  z   }
|
S )Nr   r   r   )r  r  )rO   r   r   r   r   r   r   r   self_outputsattention_outputr   s              rS   r^   zTFLayoutLMv3Attention.call  sf     ** + 
  ++LO]U]+^#%QR(88rT   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr  r  )r`   ra   rY   rb   r  r?   rc   r  re   s     rS   rc   zTFLayoutLMv3Attention.build  s    ::
4)40<t22778 0##))$/04-9t//445 -  &&t,- - :0 0- -   C%CCC rg   r   r   rk   r  rr   s   @rS   r  r    sl    I %)'+  ) $	
   " %  
>.	-rT   r  c                  0     e Zd Zd fdZddZddZ xZS )TFLayoutLMv3Intermediatec                T   t        |   di | t        j                  j	                  |j
                  t        |j                        d      | _        t        |j                  t              r"t        |j                        | _        || _        y |j                  | _        || _        y )Nr   r  r@   )rA   rB   r   rH   r   intermediate_sizer   rK   r   rC   
hidden_actstrr	   intermediate_act_fnrN   r   s      rS   rB   z!TFLayoutLMv3Intermediate.__init__  s    "6"\\''**vOgOg?hov ( 

 f''-'89J9J'KD$  (.'8'8D$rT   c                L    | j                  |      }| j                  |      }|S )Nr  )r   r'  )rO   r   s     rS   r^   zTFLayoutLMv3Intermediate.call   s(    

-
800?rT   c                (   | j                   ry d| _         t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   y xY w)NTr   )	r`   ra   rY   rb   r   r?   rc   rN   rJ   re   s     rS   rc   zTFLayoutLMv3Intermediate.build  s}    ::
4$'3tzz/ H

  $dkk.E.E!FGH H 4H Hs   3BBrg   )r   ri   rj   ri   rk   r  rr   s   @rS   r"  r"    s    HrT   r"  c                  2     e Zd Zd fdZdddZddZ xZS )TFLayoutLMv3Outputc                x   t        |   di | t        j                  j	                  |j
                  t        |j                        d      | _        t        j                  j                  |j                  d      | _        t        j                  j                  |j                        | _        || _        y r   r  r   s      rS   rB   zTFLayoutLMv3Output.__init__  r  rT   c                z    | j                  |      }| j                  ||      }| j                  ||z         }|S r  r
  r  s       rS   r^   zTFLayoutLMv3Output.call  r  rT   c                "   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   |xY w# 1 sw Y   y xY wr  )r`   ra   rY   rb   r   r?   rc   rN   r$  ry   rJ   re   s     rS   rc   zTFLayoutLMv3Output.build"  s    ::
4$'3tzz/ N

  $dkk.K.K!LMN4d+7t~~223 L$$dD$++2I2I%JKL L 8N NL Lr  rg   r  r  rk   r  rr   s   @rS   r+  r+    r  rT   r+  c                  V     e Zd Zd fdZ	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZ xZS )TFLayoutLMv3Layerc                    t        |   di | t        |d      | _        t	        |d      | _        t        |d      | _        y )N	attentionr  intermediater  r@   )rA   rB   r  r2  r"  r3  r+  bert_outputr   s      rS   rB   zTFLayoutLMv3Layer.__init__/  s?    "6".vKH4V.Q-f8DrT   c           	         | j                  |||||||      }|d   }	|dd  }
| j                  |	      }| j                  ||	|      }|f|
z   }
|
S )N)r   r   r   r   r   r   r   )r2  r3  r4  )rO   r   r   r   r   r   r   r   self_attention_outputsr  r   intermediate_outputlayer_outputs                rS   r^   zTFLayoutLMv3Layer.call5  s     "&/! "0 "
 2!4(,"//0@A''(;=MX`'a/G+rT   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   xY w# 1 sw Y   qxY w# 1 sw Y   y xY w)NTr2  r3  r4  )	r`   ra   rY   rb   r2  r?   rc   r3  r4  re   s     rS   rc   zTFLayoutLMv3Layer.buildO  s	   ::
4d+7t~~223 +$$T*+4.:t00556 .!!''-.4-9t//445 -  &&t,- - :+ +. .- -s$   D%%D1?D=%D.1D:=Erg   r   r   rk   r  rr   s   @rS   r0  r0  .  sl    E %)'+  ) $	
   " %  
>4-rT   r0  c                       e Zd Zd fdZd	dZ	 	 	 	 	 	 	 	 d
dZddZddZ	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZ	 xZ
S )TFLayoutLMv3Encoderc                D   t        |   di | || _        t        |j                        D cg c]  }t        |d|        c}| _        |j                  | _        |j                  | _        | j                  rg|j                  | _	        |j                  | _
        t        j                  j                  |j                  t        |j                         dd      | _        | j                  r|j$                  | _        |j&                  | _        t        j                  j                  |j                  t        |j                         dd      | _        t        j                  j                  |j                  t        |j                         dd      | _        y y c c}w )	Nzlayer.r  Frel_pos_bias)r  r>   r=   r?   rel_pos_x_biasrel_pos_y_biasr@   )rA   rB   rN   r   num_hidden_layersr0  layerr   r   rel_pos_binsmax_rel_posr   rH   r   r   r   rK   r=  max_rel_2d_posrel_2d_pos_binsr>  r?  )rO   rN   rP   irR   s       rS   rB   zTFLayoutLMv3Encoder.__init___  sf   "6"LQRXRjRjLklq'vaS\Bl
+1+M+M(*0*K*K'++ & 3 3D%11D % 2 200#263K3K#L#	 !3 !D **"("7"7D#)#9#9D "',,"4"400#263K3K#L%	 #5 #D #(,,"4"400#263K3K#L%	 #5 #D + ms   Fc                   |dz  }t        j                  |      }|dz  }||k  }t         j                  j                  t        j                  |t         j
                        |z        }t        j                  ||z        }||z  ||z
  z  }	||	z   }
t        j                  |
|j                        }
t        j                  |
|dz
        }
t        j                  |dkD  |j                        |z  t        j                  |||
      z   S )Nr   r   r   )	rY   absr   logr   float32r   minimumwhere)rO   relative_positionsnum_bucketsmax_distancebucketsmax_exact_bucketsis_smallbuckets_log_ratiodistance_log_ratiobuckets_big_offsetbuckets_bigs              rS   relative_position_bucketz,TFLayoutLMv3Encoder.relative_position_bucket  s     "Q&&&+, (1,.. GGKK(DGX(XY!XXl5F&FG 22kDU6UV 	 (*<<ggk7==9jjkAo>*Q.>LPRPXPXg{Q
 
 	
rT   c                P   t        j                  |d      t        j                  |d      z
  }| j                  |||      }t        j                  ||| j                        } ||      }t        j
                  |g d      }t        j                  || j                        }|S )Nr   rX   )depthr   )r   r   r   r   r   )rY   r   rW  one_hotcompute_dtyperZ   r   )	rO   dense_layerr   rN  rO  rel_pos_matrixr   rel_pos_one_hot	embeddings	            rS   _cal_pos_embz TFLayoutLMv3Encoder._cal_pos_emb  s     2>P\ceAff//\Z**WKtGYGYZ0	LLL9	GGIT-?-?@	rT   c                f    | j                  | j                  || j                  | j                        S rk   )ra  r=  rB  rC  )rO   r   s     rS   _cal_1d_pos_embz#TFLayoutLMv3Encoder._cal_1d_pos_emb  s,      !2!2L$BSBSUYUeUeffrT   c                   |d d d d df   }|d d d d df   }| j                  | j                  || j                  | j                        }| j                  | j                  || j                  | j                        }||z   }|S )Nr   r   )ra  r>  rE  rD  r?  )rO   r   position_coord_xposition_coord_y	rel_pos_x	rel_pos_yr   s          rS   _cal_2d_pos_embz#TFLayoutLMv3Encoder._cal_2d_pos_emb  s    1a=1a=%%  	
	 %%  	
	 *
rT   c
           
        |rdnd }
|rdnd }| j                   r| j                  |      nd }| j                  r| j                  |      nd }t	        | j
                        D ]6  \  }}|r|
|fz   }
|||   nd } ||||||||	      }|d   }|s.||d   fz   }8 |r|
|fz   }
|rt        ||
|      S t        d ||
|fD              S )Nr@   )r   r   r   r   r   last_hidden_stater   
attentionsc              3  &   K   | ]	  }||  y wrk   r@   ).0r   s     rS   	<genexpr>z+TFLayoutLMv3Encoder.call.<locals>.<genexpr>  s      ^c^os   )r   rc  r   ri  	enumeraterA  r
   tuple)rO   r   r   r   r   r   output_hidden_statesreturn_dictr   r   all_hidden_statesall_self_attentionsr   r   rF  layer_modulelayer_head_masklayer_outputss                     rS   r^   zTFLayoutLMv3Encoder.call  s   " #7BD$5b48<8X8X$&&|4^b373R3RT))$/X\
(4 	POA|#$58H$H!.7.CilO(!%!M *!,M &9]1=M<O&O#%	P(   1]4D D$"//.   $13DFY#Z  rT   c                   | j                   ry d| _         t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       Zt        j                  | j                  j
                        5  | j                  j                  d d | j                  g       d d d        t        | dd       K| j                  D ];  }t        j                  |j
                        5  |j                  d        d d d        = y y # 1 sw Y   1xY w# 1 sw Y   xY w# 1 sw Y   {xY w# 1 sw Y   nxY w)NTr=  r>  r?  rA  )r`   ra   rY   rb   r=  r?   rc   rB  r>  rE  r?  rA  )rO   rf   rA  s      rS   rc   zTFLayoutLMv3Encoder.build  s   ::
4.:t00556 I!!''tT5F5F(GHI4)40<t22778 N##))4t7K7K*LMN4)40<t22778 N##))4t7K7K*LMN4$'3 &]]5::. &KK%& && 4I IN NN N& &s0   )F#2)F0)F<G#F-0F9<GG	rg   )rM  ri   rN  r   rO  r   )r]  zkeras.layers.Denser   ri   rN  r   rO  r   )r   ri   )r   ri   )NNNFFTNF)r   ri   r   r   r   r   r   r   r   r   rs  r   rt  r   r   r   r   r   rj   oUnion[TFBaseModelOutput, Tuple[tf.Tensor], Tuple[tf.Tensor, tf.Tensor], Tuple[tf.Tensor, tf.Tensor, tf.Tensor]]rk   )rm   rn   ro   rB   rW  ra  rc  ri  r^   rc   rq   rr   s   @rS   r;  r;  ^  s     D
2'   	
  g* "&+/&*"'%* )-7 7 7 )	7
 $7  7 #7 7 '7 7
7r&rT   r;  c                       e Zd ZeZd fdZddZddZddZd Z	dddZ
ddZddZdd	Zdd
Ze	 	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd       Z xZS )TFLayoutLMv3MainLayerc                d   t        |   di | || _        |j                  rt	        |d      | _        |j                  rt        |d      | _        t        j                  j                  |j                  d      | _        t        j                  j                  |j                  d      | _        |j"                  s|j$                  r-|j&                  |j(                  z  }| j+                  ||f       t        j                  j                  dd	      | _        t/        |d
      | _        y )Nr]   r  patch_embedry   rz   r   )
image_sizegư>normencoderr@   )rA   rB   rN   
text_embedrt   r]   visual_embedr3   r  r   rH   r   r   ry   r   r   r   r   r   rL   rD   init_visual_bboxr  r;  r  )rO   rN   rP   r  rR   s       rS   rB   zTFLayoutLMv3MainLayer.__init__  s    "6"8lSDO:6VD"\\<<VEZEZal<mDN <<//0J0JQZ/[DL11V5V5V#..&2C2CC
%%*j1I%J7767RDI*6	BrT   c                   | j                   j                  r| j                   j                  | j                   j                  z  }| j	                  dd| j                   j
                  fddt        j                  d      | _        | j	                  d||z  dz   | j                   j
                  fddt        j                  d      | _	        | j                  ry d| _
        t        | dd       Mt        j                  | j                  j                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                   j                        5  | j                   j                  d        d d d        t        | d	d       Mt        j                  | j"                  j                        5  | j"                  j                  d        d d d        t        | d
d       dt        j                  | j$                  j                        5  | j$                  j                  d d | j                   j
                  g       d d d        t        | dd       Mt        j                  | j&                  j                        5  | j&                  j                  d        d d d        t        | dd       et        j                  | j(                  j                        5  | j(                  j                  d d | j                   j
                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   bxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)Nr   r   T	cls_token)r   initializer	trainabler   r?   	pos_embedr  r]   r  ry   r   r  )rN   r  rL   rD   
add_weightrJ   rY   rJ  r  r  r`   ra   rb   r  r?   rc   r]   r  ry   r   r  )rO   rf   r  s      rS   rc   zTFLayoutLMv3MainLayer.build$  s   ;;##//4;;3I3IIJ!__!T[[445#jj  - DN "__*z1A5t{{7N7NO#jj  - DN ::
4D)5t||001 )""4()4t,8t334 ,%%d+,4-9t//445 -  &&t,-4d+7t~~223 L$$dD$++2I2I%JKL4D)5t||001 )""4()4&2tyy~~. G		tT[[-D-D EFG G 3) ), ,- -L L) )G GsH   L*.L7M"3MM-3M)*L47MMMM&)M2c                .    | j                   j                  S rk   )r]   rv   )rO   s    rS   get_input_embeddingsz*TFLayoutLMv3MainLayer.get_input_embeddingsL  s    ...rT   c                :    || j                   j                  _        y rk   )r]   rv   weight)rO   r   s     rS   set_input_embeddingsz*TFLayoutLMv3MainLayer.set_input_embeddingsO  s    16''.rT   c                    t         )z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        )NotImplementedError)rO   heads_to_prunes     rS   _prune_headsz"TFLayoutLMv3MainLayer._prune_headsS  s
    
 "!rT   c                |   |\  }}t        j                  d||dz   z  |      |z  }t        j                  |d      }t        j                  ||dg      }t        j                  d||dz   z  |      |z  }t        j                  |d      }t        j                  |d|g      }t        j                  |d d d df   |d d |d d dd f   |dd  gd      }t        j
                  |ddg      }t        j                  dd|dz
  |dz
  ggt         j                        }t        j                  ||gd      | _	        y )Nr   r   r   rX   r   r   )
rY   r   r   r   stackr[   constantr   r   visual_bbox)	rO   r  max_lenheightwidthvisual_bbox_xvisual_bbox_yr  cls_token_boxs	            rS   r  z&TFLayoutLMv3MainLayer.init_visual_bboxZ  s0    #Guqy$97CuL}1=qz:Gvz$:GDN}1=6{;hh1crc6"M#2$6ae8Lm\]\^N_`
 jjr1g6aGaK1%E$FbhhW99m[%AJrT   c                    t        j                  | j                  d      }t        j                  ||ddg      }t        j                  ||      }|S )Nr   r   r   r   )rY   r   r  r   r   )rO   r   r   r  s       rS   calculate_visual_bboxz+TFLayoutLMv3MainLayer.calculate_visual_bboxq  sE    nnT%5%5A>ggkJ1+=>ggk7rT   c                *   | j                  |      }t        j                  |      d   }t        j                  | j                  |ddg      }t        j
                  ||gd      }t        | dd       || j                  z  }| j                  |      }|S )Nr   r   r   r  )	r  rY   r   r   r  r   ra   r  r  )rO   r\   r]   r   
cls_tokenss        rS   embed_imagez!TFLayoutLMv3MainLayer.embed_imagew  s    %%l3
 XXj)!,
WWT^^j!Q-?@
YY
J7a@
 4d+7$..(JYYz*
rT   c                \   t        |j                        }|dk(  rt        j                  |d      }nM|dk(  r/t        j                  |d      }t        j                  |d      }nt	        d|j                   d      t        j
                  || j                        }d|z
  t        z  }|S )Nr   r   r   r   z&Wrong shape for attention_mask (shape ).g      ?)lenr   rY   r   r   r   r\  LARGE_NEGATIVE)rO   r   n_dimsextended_attention_masks       rS   get_extended_attention_maskz1TFLayoutLMv3MainLayer.get_extended_attention_mask  s     ^))* Q;&(nn^!&L#q[ ')nn^!&L#&(nn5LST&U#EnFZFZE[[]^__ #%''*A4CUCU"V#&)@#@N"R&&rT   c                   |d g| j                   j                  z  S t        j                  |      }|dk(  rt        j                  |d      }t        j                  |d      }t        j                  |d      }t        j                  |d      }t        j
                  || j                   j                  ddddg      }ni|dk(  rFt        j                  |d      }t        j                  |d      }t        j                  |d      }n|dk7  rt        d|j                   d      t        j                  |      dk(  sJ d	t        j                  |       d
       t        j                  || j                        }|S )Nr   r   r   rX   r   r    z!Wrong shape for head_mask (shape r  zGot head_mask rank of z, but require 5.)
rN   r@  rY   rankr   r   r   r   r   r\  )rO   r   r  s      rS   get_head_maskz#TFLayoutLMv3MainLayer.get_head_mask  sD   6DKK9999#Q;yq9Iyq9Iyr:Iyr:IDKK991aAFI q[yq9Iyr:Iyr:Iq[@@QQSTUUwwy!Q&e*@AS@TTd(ee&GGIt'9'9:	rT   c           
        |	|	n| j                   j                  }	|
|
n| j                   j                  }
||n| j                   j                  }| t	        j
                  |      }|d   }|d   }nH| t	        j
                  |      }|d   }|d   }n&|t	        j
                  |      d   }nt        d      ||j                  }n=||j                  }n.||j                  }n||j                  }nt        j                  }||i|t	        j                  |f|      }|t	        j                  |f|      }|t	        j                  |df|      }| j                  ||||||      }d }d }|| j                  |      }t	        j                  |t	        j
                  |      d   f|      }||}nt	        j                  ||gd      }| j                   j                  r0| j                  ||      }||}nt	        j                  ||gd      }| j                   j                   s| j                   j                  rt	        j"                  dt	        j
                  |      d   |      }t	        j$                  |d      }t	        j&                  ||dg      }||_t	        j$                  t	        j"                  d|      d      }t	        j&                  ||dg      }t	        j                  ||gd      }n|}|||}nt	        j                  |gd      }| j)                  |      }| j+                  ||      }n| j                   j                   s| j                   j                  ru| j                   j                   rGt	        j$                  t	        j"                  d|      d      }t	        j&                  ||dg      }|}| j                   j                  r|}| j-                  |      }| j/                  |      }| j1                  |||||	|
|	      }|d   }|s	|f|dd  z   S t3        ||j4                  |j6                  
      S )Nr   r   zEYou have to specify either input_ids or inputs_embeds or pixel_valuesr   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   rs  rt  rk  )rN   r   rs  rt  rY   r   r   r   r   onesr   r]   r  r   r   r  r   r   r   r   ry   r   r  r  r  r
   r   rm  )rO   r   r   r   r   r   r   r   r\   r   rs  rt  r   rf   r   
seq_length	int_dtypeembedding_output
final_bboxfinal_position_idsvisual_embeddingsvisual_attention_maskr  visual_position_idsr  encoder_outputssequence_outputs                              rS   r^   zTFLayoutLMv3MainLayer.call  sW   6 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY ((9-K$QJ$QJ&((=1K$QJ$QJ%,/2Jdee  !I

I'&,,I'&,,II M$=%!#*j)A!S%!#:z*B)!T|xxZ ;9M##)-+!  /   
!# $ 0 0 > %'GGZBS9TUV9W,X`i$j!%!6!#N<Q+RYZ![ {{55"88YO<!,J!#D++>Q!GJ {{66$++:`:`&(hhq"((;L2Ma2PXa&b#&(nn5Hq&Q#&(gg.AJPQ?&S#(M,E#%>>"((1jPY2Zab#cL#%77<*a#IL)+LBU3V]^)_&)<&  ]%:#4 #%99.>@Q-RYZ#[ #~~.>?#||,<x|P[[448^8^{{66!~~bhhq*I.V]^_!ww|j!_E%1"{{55!
"&"B"B>"R &&y1	,,+2/!5# ' 	
 *!,#%(;;; -)77&11
 	
rT   rg   rk   )rj   zkeras.layers.Layer)r   ztf.Variable)i  )r  zTuple[int, int]r  r   )r   r   r   ztf.DTyperh   )r   ri   rj   ri   )r   r   rj   z(Union[tf.Tensor, List[tf.Tensor | None]]NNNNNNNNNNNFr   r   r   r   r   r   r   r   r   r   r   r   r   r   r\   r   r   Optional[bool]rs  r  rt  r  r   r   rj   r{  )rm   rn   ro   r   config_classrB   rc   r  r  r  r  r  r  r  r  r   r^   rq   rr   s   @rS   r}  r}    s   #LC*&GP/7"K.'62  '+!%+/+/)-&**.)-,0/3&*`
#`
 `
 )	`

 )`
 '`
 $`
 (`
 '`
 *`
 -`
 $`
 `

`
 `
rT   r}  c                  4     e Zd ZdZeZdZe fd       Z xZ	S )TFLayoutLMv3PreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    
layoutlmv3c                n    t         |   }t        j                  dt        j                  d      |d<   |S )N)NNr   r   r  )rA   input_signaturerY   
TensorSpecr   )rO   sigrR   s     rS   r  z+TFLayoutLMv3PreTrainedModel.input_signatureg  s,    g%mmORXXFKF
rT   )
rm   rn   ro   rp   r   r  base_model_prefixpropertyr  rq   rr   s   @rS   r  r  ^  s'    
 $L$ rT   r  a	  
    This model inherits from [`TFPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
    etc.)

    This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
    as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
    behavior.

    <Tip>

    TensorFlow models and layers in `transformers` accept two formats as input:

    - having all inputs as keyword arguments (like PyTorch models), or
    - having all inputs as a list, tuple or dict in the first positional argument.

    The reason the second format is supported is that Keras methods prefer this format when passing inputs to models
    and layers. Because of this support, when using methods like `model.fit()` things should "just work" for you - just
    pass your inputs and labels in any format that `model.fit()` supports! If, however, you want to use the second
    format outside of Keras methods like `fit()` and `predict()`, such as when creating your own layers or models with
    the Keras `Functional` API, there are three possibilities you can use to gather all the input Tensors in the first
    positional argument:

    - a single Tensor with `input_ids` only and nothing else: `model(input_ids)`
    - a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
    `model([input_ids, attention_mask])` or `model([input_ids, attention_mask, token_type_ids])`
    - a dictionary with one or several input Tensors associated to the input names given in the docstring:
    `model({"input_ids": input_ids, "token_type_ids": token_type_ids})`

    Note that when creating models and layers with
    [subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) then you don't need to worry
    about any of this, as you can just pass inputs like you would to any other Python function!

    </Tip>

    Parameters:
        config ([`LayoutLMv3Config`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~TFPreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`Numpy array` or `tf.Tensor` of shape `(batch_size, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
            token. See `pixel_values` for `patch_sequence_length`.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)

        bbox (`Numpy array` or `tf.Tensor` of shape `(batch_size, sequence_length, 4)`, *optional*):
            Bounding boxes of each input sequence tokens. Selected in the range `[0,
            config.max_2d_position_embeddings-1]`. Each bounding box should be a normalized version in (x0, y0, x1, y1)
            format, where (x0, y0) corresponds to the position of the upper left corner in the bounding box, and (x1,
            y1) represents the position of the lower right corner.

            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
            token. See `pixel_values` for `patch_sequence_length`.

        pixel_values (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
            Batch of document images. Each image is divided into patches of shape `(num_channels, config.patch_size,
            config.patch_size)` and the total number of patches (=`patch_sequence_length`) equals to `((height /
            config.patch_size) * (width / config.patch_size))`.

        attention_mask (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
            token. See `pixel_values` for `patch_sequence_length`.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`Numpy array` or `tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
            token. See `pixel_values` for `patch_sequence_length`.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`Numpy array` or `tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            Note that `sequence_length = token_sequence_length + patch_sequence_length + 1` where `1` is for [CLS]
            token. See `pixel_values` for `patch_sequence_length`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`tf.Tensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zdThe bare LayoutLMv3 Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd ZdgZ fdZe ee       ee	e
      	 	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd                     ZddZ xZS )	TFLayoutLMv3Modelr   c                P    t        |   |g|i | t        |d      | _        y )Nr  r  )rA   rB   r}  r  )rO   rN   r	  rP   rR   s       rS   rB   zTFLayoutLMv3Model.__init__  s(    3&3F3/\JrT   output_typer  c                @    | j                  |||||||||	|
||      }|S )a  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, TFAutoModel
        >>> from datasets import load_dataset

        >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
        >>> model = TFAutoModel.from_pretrained("microsoft/layoutlmv3-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
        >>> example = dataset[0]
        >>> image = example["image"]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = processor(image, words, boxes=boxes, return_tensors="tf")

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        ```)r   r   r   r   r   r   r   r\   r   rs  rt  r   )r  )rO   r   r   r   r   r   r   r   r\   r   rs  rt  r   r   s                 rS   r^   zTFLayoutLMv3Model.call  sC    ^ //))%'%/!5# " 
 rT   c                    | j                   ry d| _         t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   y xY w)NTr  )r`   ra   rY   rb   r  r?   rc   re   s     rS   rc   zTFLayoutLMv3Model.build0  si    ::
4t,8t334 ,%%d+, , 9, ,s   A11A:r  r  rk   )rm   rn   ro   "_keys_to_ignore_on_load_unexpectedrB   r   r   LAYOUTLMV3_INPUTS_DOCSTRINGr   r
   _CONFIG_FOR_DOCr^   rc   rq   rr   s   @rS   r  r    s     +:):&K *+FG+<?[ '+!%+/+/)-&**.)-,0/3&*;#; ; )	;
 ); '; $; (; '; *; -; $; ;
; \ H ;z,rT   r  c                  6     e Zd ZdZd fdZdddZddZ xZS )	TFLayoutLMv3ClassificationHeadz\
    Head for sentence-level classification tasks. Reference: RobertaClassificationHead
    c                   t        |   di | t        j                  j	                  |j
                  dt        |j                        d      | _        |j                  |j                  n|j                  }t        j                  j                  |d      | _        t        j                  j	                  |j                  t        |j                        d      | _        || _        y )	Ntanhr   )
activationr>   r?   r   r  out_projr   r@   )rA   rB   r   rH   r   rJ   r   rK   r   classifier_dropoutr   r   r   
num_labelsr  rN   )rO   rN   rP   r  rR   s       rS   rB   z'TFLayoutLMv3ClassificationHead.__init__>  s    "6"\\''.v/G/GH	 ( 

 *0)B)B)NF%%TZTnTn 	 ||++ , 
 **.v/G/GH + 

 rT   c                    | j                  ||      }| j                  |      }| j                  ||      }| j                  |      }|S )Nr   )r   r   r  )rO   r	  r   r   s       rS   r^   z#TFLayoutLMv3ClassificationHead.callT  sG    ,,v,9**W%,,w,:--(rT   c                   | j                   ry d| _         t        | dd       dt        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTr   r   r  )r`   ra   rY   rb   r   r?   rc   rN   rJ   r   r  re   s     rS   rc   z$TFLayoutLMv3ClassificationHead.build[  s*   ::
4$'3tzz/ H

  $dkk.E.E!FGH4D)5t||001 )""4()4T*6t}}112 K##T41H1H$IJK K 7H H) )K Ks$   3E<E3E+EE(+E4rg   r  )r	  ri   r   r   rj   ri   rk   rl   rr   s   @rS   r  r  9  s    ,KrT   r  a
  
    LayoutLMv3 Model with a sequence classification head on top (a linear layer on top of the final hidden state of the
    [CLS] token) e.g. for document image classification tasks such as the
    [RVL-CDIP](https://www.cs.cmu.edu/~aharley/rvl-cdip/) dataset.
    c                       e Zd ZdgZd fdZe ee       ee	e
      	 	 	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd                     Zd	dZ xZS )
%TFLayoutLMv3ForSequenceClassificationr   c                |    t        |   |fi | || _        t        |d      | _        t        |d      | _        y )Nr  r  
classifier)rA   rB   rN   r}  r  r  r  r   s      rS   rB   z.TFLayoutLMv3ForSequenceClassification.__init__v  s8    *6*/\J8lSrT   r  c                P   |
|
n| j                   j                  }
| j                  ||||||||	|
|||      }|d   dddddf   }| j                  ||      }|dn| j	                  ||      }|
s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, TFAutoModelForSequenceClassification
        >>> from datasets import load_dataset
        >>> import tensorflow as tf

        >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
        >>> model = TFAutoModelForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
        >>> example = dataset[0]
        >>> image = example["image"]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = processor(image, words, boxes=boxes, return_tensors="tf")
        >>> sequence_label = tf.convert_to_tensor([1])

        >>> outputs = model(**encoding, labels=sequence_label)
        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```Nr   r   r   r   r   r   rs  rt  r   r\   r   r   r   r   losslogitsr   rm  )rN   use_return_dictr  r  hf_compute_lossr   r   rm  )rO   r   r   r   r   r   r   labelsr   rs  rt  r   r\   r   r   r  r  r  r  s                      rS   r^   z*TFLayoutLMv3ForSequenceClassification.call|  s    h &1%<k$++B]B]//))%'/!5#% " 
 "!*Q1W-8D~t4+?+?+OY,F)-)9TGf$EvE)!//))	
 	
rT   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr  r  )r`   ra   rY   rb   r  r?   rc   r  re   s     rS   rc   z+TFLayoutLMv3ForSequenceClassification.build      ::
4t,8t334 ,%%d+,4t,8t334 ,%%d+, , 9, ,, ,r   rg   NNNNNNNNNNNNF)r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r  rs  r  rt  r  r   r   r\   r   r   r  rj   zUnion[TFSequenceClassifierOutput, Tuple[tf.Tensor], Tuple[tf.Tensor, tf.Tensor], Tuple[tf.Tensor, tf.Tensor, tf.Tensor], Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]]rk   )rm   rn   ro   r  rB   r   r   r  r   r   r  r^   rc   rq   rr   s   @rS   r  r  j  s    +:):&T *+FG+ETcd '++/+/)-&**.#',0/3&*!%)-#(O
#O
 )O
 )	O

 'O
 $O
 (O
 !O
 *O
 -O
 $O
 O
 'O
 !O

O
 e H O
b	,rT   r  a  
    LayoutLMv3 Model with a token classification head on top (a linear layer on top of the final hidden states) e.g.
    for sequence labeling (information extraction) tasks such as [FUNSD](https://guillaumejaume.github.io/FUNSD/),
    [SROIE](https://rrc.cvc.uab.es/?ch=13), [CORD](https://github.com/clovaai/cord) and
    [Kleister-NDA](https://github.com/applicaai/kleister-nda).
    c                       e Zd ZdgZd fdZe ee       ee	e
      	 	 	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd                     Zd	dZ xZS )
"TFLayoutLMv3ForTokenClassificationr   c                   t        |   |fi | |j                  | _        t        |d      | _        t
        j                  j                  |j                  d      | _	        |j                  dk  rLt
        j                  j                  |j                  t        |j                        d      | _        || _        y t        |d      | _        || _        y )Nr  r  r   r#   r  r   )rA   rB   r  r}  r  r   rH   r   r   r   r   r   rK   r  r  rN   r   s      rS   rB   z+TFLayoutLMv3ForTokenClassification.__init__  s    *6* ++/\J||++F,F,FY+Wr!#ll00!!#263K3K#L! 1 DO  =V,WDOrT   r  c                   ||n| j                   j                  }| j                  ||||||||	|
|||      }|t        j                  |      }nt        j                  |      dd }|d   }|d   ddd|f   }| j                  ||      }| j                  |      }|dn| j                  ||      }|s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )ag  
        labels (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, TFAutoModelForTokenClassification
        >>> from datasets import load_dataset

        >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
        >>> model = TFAutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base", num_labels=7)

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
        >>> example = dataset[0]
        >>> image = example["image"]
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]
        >>> word_labels = example["ner_tags"]

        >>> encoding = processor(image, words, boxes=boxes, word_labels=word_labels, return_tensors="tf")

        >>> outputs = model(**encoding)
        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```N)r   r   r   r   r   r   r   rs  rt  r\   r   rX   r   r   r   r  )rN   r  r  rY   r   r   r  r  r   r   rm  )rO   r   r   r   r   r   r   r   r  r   rs  rt  r\   r   r   rf   r  r  r  r  r  s                        rS   r^   z'TFLayoutLMv3ForTokenClassification.call  s%   l &1%<k$++B]B]//))%'/!5#% " 
  ((9-K((=1#26K ^
!!*Q^4,,,J1~t4+?+?+OY,F)-)9TGf$EvE&!//))	
 	
rT   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       et        j                  | j                  j
                        5  | j                  j                  d d | j                  j                  g       d d d        y y # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   y xY w)NTr  r   r  )r`   ra   rY   rb   r  r?   rc   r   r  rN   rJ   re   s     rS   rc   z(TFLayoutLMv3ForTokenClassification.buildW  s   ::
4t,8t334 ,%%d+,4D)5t||001 )""4()4t,8t334 M%%tT4;;3J3J&KLM M 9, ,) )M Ms$   D<%E?3E<EEErg   r  )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r  rs  r  rt  r  r\   r   r   r  rj   zUnion[TFTokenClassifierOutput, Tuple[tf.Tensor], Tuple[tf.Tensor, tf.Tensor], Tuple[tf.Tensor, tf.Tensor, tf.Tensor], Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]]rk   )rm   rn   ro   r  rB   r   r   r  r   r   r  r^   rc   rq   rr   s   @rS   r  r    s    +:):&  *+FG+BQ`a '+!%+/+/)-&**.#',0/3&*)-#(Y
#Y
 Y
 )	Y

 )Y
 'Y
 $Y
 (Y
 !Y
 *Y
 -Y
 $Y
 'Y
 !Y

Y
 b H Y
vMrT   r  a  
    LayoutLMv3 Model with a span classification head on top for extractive question-answering tasks such as
    [DocVQA](https://rrc.cvc.uab.es/?ch=17) (a linear layer on top of the text part of the hidden-states output to
    compute `span start logits` and `span end logits`).
    c                       e Zd ZdgZd fdZe ee       ee	e
      	 	 	 	 	 	 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd                     Zd	dZ xZS )
 TFLayoutLMv3ForQuestionAnsweringr   c                    t        |   |fi | |j                  | _        t        |d      | _        t        |d      | _        y )Nr  r  
qa_outputs)rA   rB   r  r}  r  r  r  r   s      rS   rB   z)TFLayoutLMv3ForQuestionAnswering.__init__r  s>    *6* ++/\J8lSrT   r  c                   ||n| j                   j                  }| j                  |||||||	|
||||      }|d   }| j                  ||      }t	        j
                  |dd      \  }}t	        j                  |d      }t	        j                  |d      }d}||||d	}| j                  |||f
      }|s||f|dd z   }||f|z   S |S t        ||||j                  |j                        S )ak  
        start_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, TFAutoModelForQuestionAnswering
        >>> from datasets import load_dataset
        >>> import tensorflow as tf

        >>> processor = AutoProcessor.from_pretrained("microsoft/layoutlmv3-base", apply_ocr=False)
        >>> model = TFAutoModelForQuestionAnswering.from_pretrained("microsoft/layoutlmv3-base")

        >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train", trust_remote_code=True)
        >>> example = dataset[0]
        >>> image = example["image"]
        >>> question = "what's his name?"
        >>> words = example["tokens"]
        >>> boxes = example["bboxes"]

        >>> encoding = processor(image, question, words, boxes=boxes, return_tensors="tf")
        >>> start_positions = tf.convert_to_tensor([1])
        >>> end_positions = tf.convert_to_tensor([3])

        >>> outputs = model(**encoding, start_positions=start_positions, end_positions=end_positions)
        >>> loss = outputs.loss
        >>> start_scores = outputs.start_logits
        >>> end_scores = outputs.end_logits
        ```Nr  r   r   r   rX   )r   num_or_size_splitsr   )inputr   )start_positionend_position)r  r   )r  start_logits
end_logitsr   rm  )rN   r  r  r  rY   splitsqueezer  r   r   rm  )rO   r   r   r   r   r   r   start_positionsend_positionsr   rs  r   r\   rt  r   r   r  r  r  r  r  r  r  s                          rS   r^   z%TFLayoutLMv3ForQuestionAnswering.callz  s2   B &1%<k$++B]B]//))%'/!5#% " 
 "!*8D#%88&QUW#X jzz2>ZZjr:
&=+D(7WF''j7Q'RD"J/'!"+=F)-)9TGf$EvE-%!!//))
 	
rT   c                   | j                   ry d| _         t        | dd       Mt        j                  | j                  j
                        5  | j                  j                  d        d d d        t        | dd       Nt        j                  | j                  j
                        5  | j                  j                  d        d d d        y y # 1 sw Y   exY w# 1 sw Y   y xY w)NTr  r  )r`   ra   rY   rb   r  r?   rc   r  re   s     rS   rc   z&TFLayoutLMv3ForQuestionAnswering.build  r  r   rg   )NNNNNNNNNNNNNF)r   r   r   r   r   r   r   r   r   r   r   r   r  r   r  r   r   r  rs  r  r   r   r\   r   rt  r  r   r   rj   zUnion[TFQuestionAnsweringModelOutput, Tuple[tf.Tensor], Tuple[tf.Tensor, tf.Tensor], Tuple[tf.Tensor, tf.Tensor, tf.Tensor], Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]]rk   )rm   rn   ro   r  rB   r   r   r  r   r   r  r^   rc   rq   rr   s   @rS   r  r  f  s     +:):&T *+FG+IXgh '++/+/)-&**.,0*.,0/3!%)-&*f
#f
 )f
 )	f

 'f
 $f
 (f
 *f
 (f
 *f
 -f
 f
 'f
 $f
 f
 
!f
 i H f
P	,rT   r  )<rp   
__future__r   rE   r   typingr   r   r   r   
tensorflowrY   activations_tfr	   modeling_tf_outputsr
   r   r   r   modeling_tf_utilsr   r   r   r   r   r   r   r   tf_utilsr   utilsr   r   r   configuration_layoutlmv3r   r  _DUMMY_INPUT_IDS_DUMMY_BBOXr  rH   Layerr3   rt   r   r   r  r"  r+  r0  r;  r}  r  LAYOUTLMV3_START_DOCSTRINGr  r  r  r  r  r  r@   rT   rS   <module>r     s    "   / /  / 	 	 	 7 k k 6 %   <1')9: 'N%,,"4"4 'NT~7!3!3 ~7B|H 2 2 |H@LU\\// L<&-ELL.. &-THu||11 H<L++ L<--** --`j&%,,,, j&Z O
ELL.. O
 O
d
"3  ' RJ Z jN,3 N,	N,b.KU\\%7%7 .Kb 
 g,,GIe g,g,T  ~M)DF_ ~M~MB 
 @,'BD[ @,@,rT   