
    sg'                         d Z ddlZddlZddlmZmZmZ ddlZddlZddlm	Z	 ddl
mZmZmZ ddlmZ ddlmZmZmZ dd	lmZmZmZmZmZmZ dd
lmZmZmZmZ ddl m!Z! ddl"m#Z#  e!jH                  e%      Z&dZ'dZ( G d de	jR                        Z*d<dZ+ G d de	jR                        Z, G d de	jR                        Z- G d de	jR                        Z. G d de	jR                        Z/ G d de	jR                        Z0 G d de	jR                        Z1 G d d e	jR                        Z2 G d! d"e	jR                        Z3 G d# d$e	jR                        Z4d%e4iZ5 G d& d'e	jR                        Z6 G d( d)e	jR                        Z7 G d* d+e	jR                        Z8 G d, d-e      Z9d.Z:d/Z; ed0e:       G d1 d2e9             Z< ed3e:       G d4 d5e9             Z= ed6e:       G d7 d8e9             Z> ed9e:       G d: d;e9             Z?y)=zPyTorch MarkupLM model.    N)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)add_start_docstrings%add_start_docstrings_to_model_forwardreplace_return_docstrings))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsMaskedLMOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModelapply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)logging   )MarkupLMConfigzmicrosoft/markuplm-baser   c                   *     e Zd ZdZ fdZddZ xZS )XPathEmbeddingszConstruct the embeddings from xpath tags and subscripts.

    We drop tree-id in this version, as its info can be covered by xpath.
    c           	         t         t        |           |j                  | _        t	        j
                  |j                  | j                  z  |j                        | _        t	        j                  |j                        | _        t	        j                         | _        t	        j
                  |j                  | j                  z  d|j                  z        | _        t	        j
                  d|j                  z  |j                        | _        t	        j                   t#        | j                        D cg c],  }t	        j$                  |j&                  |j                        . c}      | _        t	        j                   t#        | j                        D cg c],  }t	        j$                  |j*                  |j                        . c}      | _        y c c}w c c}w )N   )superr   __init__	max_depthr   Linearxpath_unit_hidden_sizehidden_sizexpath_unitseq2_embeddingsDropouthidden_dropout_probdropoutReLU
activationxpath_unitseq2_inner	inner2emb
ModuleListrange	Embeddingmax_xpath_tag_unit_embeddingsxpath_tag_sub_embeddingsmax_xpath_subs_unit_embeddingsxpath_subs_sub_embeddingsselfconfig_	__class__s      a/var/www/html/venv/lib/python3.12/site-packages/transformers/models/markuplm/modeling_markuplm.pyr!   zXPathEmbeddings.__init__>   s\   ot-/)))+63P3PSWSaSa3acicucu)v&zz&"<"<='')$&IIf.K.Kdnn.\^_bhbtbt^t$u!1v'9'9#96;M;MN(* t~~. VAA6C`C`a)
% *, t~~. VBBFDaDab*
&s   51G1Gc           	         g }g }t        | j                        D ]^  }|j                   | j                  |   |d d d d |f                |j                   | j                  |   |d d d d |f                ` t        j                  |d      }t        j                  |d      }||z   }| j                  | j                  | j                  | j                  |                        }|S )Ndim)r/   r"   appendr2   r4   torchcatr-   r)   r+   r,   )r6   xpath_tags_seqxpath_subs_seqxpath_tags_embeddingsxpath_subs_embeddingsixpath_embeddingss          r:   forwardzXPathEmbeddings.forwardX   s     " "t~~& 	eA!(()I)F)Fq)I.YZ\]_`Y`Ja)bc!(()J)G)G)J>Z[]^`aZaKb)cd	e !&		*?R H %		*?R H03HH>>$,,ttG`G`aqGr7s*tu    )NN)__name__
__module____qualname____doc__r!   rH   __classcell__r9   s   @r:   r   r   8   s    

4 rI   r   c                     | j                  |      j                         }t        j                  |d      j	                  |      |z   |z  }|j                         |z   S )a  
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: torch.Tensor x:

    Returns: torch.Tensor
    r   r=   )neintr@   cumsumtype_aslong)	input_idspadding_idxpast_key_values_lengthmaskincremental_indicess        r:   "create_position_ids_from_input_idsr[   k   sW     <<$((*D <<!4<<TBE[[_cc##%33rI   c                   >     e Zd ZdZ fdZd Z	 	 	 	 	 	 	 ddZ xZS )MarkupLMEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 l   t         t        |           || _        t	        j
                  |j                  |j                  |j                        | _	        t	        j
                  |j                  |j                        | _        |j                  | _        t        |      | _        t	        j
                  |j                  |j                        | _        t	        j"                  |j                  |j$                        | _        t	        j&                  |j(                        | _        | j-                  dt/        j0                  |j                        j3                  d      d       |j                  | _        t	        j
                  |j                  |j                  | j4                        | _        y )N)rW   epsposition_ids)r   r<   F)
persistent)r    r]   r!   r7   r   r0   
vocab_sizer%   pad_token_idword_embeddingsmax_position_embeddingsposition_embeddingsr"   r   rG   type_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsr'   r(   r)   register_bufferr@   arangeexpandrW   r6   r7   r9   s     r:   r!   zMarkupLMEmbeddings.__init__~   s=    $02!||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c )) / 7%'\\&2H2H&J\J\%]"f&8&8f>S>STzz&"<"<=ELL)G)GHOOPWXej 	 	
 "..#%<<**F,>,>DL\L\$
 rI   c                    |j                         dd }|d   }t        j                  | j                  dz   || j                  z   dz   t        j                  |j
                        }|j                  d      j                  |      S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr<   r   dtypedevicer   )sizer@   rm   rW   rU   rs   	unsqueezern   )r6   inputs_embedsinput_shapesequence_lengthra   s        r:   &create_position_ids_from_inputs_embedsz9MarkupLMEmbeddings.create_position_ids_from_inputs_embeds   s     $((*3B/%a.||q /D4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<rI   c                    ||j                         }n|j                         d d }||j                  n|j                  }	|+|t        || j                  |      }n| j	                  |      }|&t        j                  |t
        j                  |	      }|| j                  |      }|]| j                  j                  t        j                  t        t        |      | j                  gz         t
        j                  |	      z  }|]| j                  j                  t        j                  t        t        |      | j                  gz         t
        j                  |	      z  }|}
| j!                  |      }| j#                  |      }| j%                  ||      }|
|z   |z   |z   }| j'                  |      }| j)                  |      }|S )Nr<   rq   )rt   rs   r[   rW   ry   r@   zerosrU   re   r7   
tag_pad_idonestuplelistr"   subs_pad_idrg   ri   rG   rj   r)   )r6   rV   rB   rC   token_type_idsra   rv   rX   rw   rs   words_embeddingsrg   ri   rG   
embeddingss                  r:   rH   zMarkupLMEmbeddings.forward   s     #..*K',,.s3K%.%:!!@T@T$A)TM]M]_uv#JJ=Y!"[[EJJvVN  00;M !![[33ejjd;'4>>*::;5::V\7 N !![[44uzzd;'4>>*::;5::V\8 N )"66|D $ : :> J00P%(;;>SSVff
^^J/
\\*-
rI   )NNNNNNr   )rJ   rK   rL   rM   r!   ry   rH   rN   rO   s   @r:   r]   r]   {   s,    Q
2=&  2rI   r]   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )MarkupLMSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y Nr_   )r    r!   r   r#   r%   denserj   rk   r'   r(   r)   ro   s     r:   r!   zMarkupLMSelfOutput.__init__   s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=rI   hidden_statesinput_tensorreturnc                 r    | j                  |      }| j                  |      }| j                  ||z         }|S Nr   r)   rj   r6   r   r   s      r:   rH   zMarkupLMSelfOutput.forward   7    

=1]3}|'CDrI   rJ   rK   rL   r!   r@   TensorrH   rN   rO   s   @r:   r   r      1    >U\\  RWR^R^ rI   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MarkupLMIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r   )r    r!   r   r#   r%   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnro   s     r:   r!   zMarkupLMIntermediate.__init__   s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$rI   r   r   c                 J    | j                  |      }| j                  |      }|S r   )r   r   r6   r   s     r:   rH   zMarkupLMIntermediate.forward   s&    

=100?rI   r   rO   s   @r:   r   r      s#    9U\\ ell rI   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )MarkupLMOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )r    r!   r   r#   r   r%   r   rj   rk   r'   r(   r)   ro   s     r:   r!   zMarkupLMOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=rI   r   r   r   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r   r   r   s      r:   rH   zMarkupLMOutput.forward  r   rI   r   rO   s   @r:   r   r      r   rI   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MarkupLMPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r   )r    r!   r   r#   r%   r   Tanhr+   ro   s     r:   r!   zMarkupLMPooler.__init__  s9    YYv1163E3EF
'')rI   r   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   r+   )r6   r   first_token_tensorpooled_outputs       r:   rH   zMarkupLMPooler.forward  s6     +1a40

#566rI   r   rO   s   @r:   r   r     s#    $
U\\ ell rI   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MarkupLMPredictionHeadTransformc                 h   t         |           t        j                  |j                  |j                        | _        t        |j                  t              rt        |j                     | _
        n|j                  | _
        t        j                  |j                  |j                        | _        y r   )r    r!   r   r#   r%   r   r   r   r   r   transform_act_fnrj   rk   ro   s     r:   r!   z(MarkupLMPredictionHeadTransform.__init__  s{    YYv1163E3EF
f''-$*6+<+<$=D!$*$5$5D!f&8&8f>S>STrI   r   r   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r   )r   r   rj   r   s     r:   rH   z'MarkupLMPredictionHeadTransform.forward&  s4    

=1--m<}5rI   r   rO   s   @r:   r   r     s$    UU\\ ell rI   r   c                   *     e Zd Z fdZd Zd Z xZS )MarkupLMLMPredictionHeadc                 H   t         |           t        |      | _        t	        j
                  |j                  |j                  d      | _        t	        j                  t        j                  |j                              | _        | j                  | j                  _        y )NF)bias)r    r!   r   	transformr   r#   r%   rc   decoder	Parameterr@   r{   r   ro   s     r:   r!   z!MarkupLMLMPredictionHead.__init__/  sm    8@ yy!3!3V5F5FUSLLV->->!?@	 !IIrI   c                 :    | j                   | j                  _         y r   )r   r   r6   s    r:   _tie_weightsz%MarkupLMLMPredictionHead._tie_weights<  s     IIrI   c                 J    | j                  |      }| j                  |      }|S r   )r   r   r   s     r:   rH   z MarkupLMLMPredictionHead.forward?  s$    }5]3rI   )rJ   rK   rL   r!   r   rH   rN   rO   s   @r:   r   r   .  s    &&rI   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MarkupLMOnlyMLMHeadc                 B    t         |           t        |      | _        y r   )r    r!   r   predictionsro   s     r:   r!   zMarkupLMOnlyMLMHead.__init__G  s    3F;rI   sequence_outputr   c                 (    | j                  |      }|S r   )r   )r6   r   prediction_scoress      r:   rH   zMarkupLMOnlyMLMHead.forwardK  s     ,,_=  rI   r   rO   s   @r:   r   r   F  s#    <!u|| ! !rI   r   c                   P    e Zd Zd fd	Zdej
                  dej
                  fdZ	 	 	 	 	 	 ddej
                  deej                     deej                     deej                     d	eej                     d
ee	e	ej                           dee
   de	ej
                     fdZ xZS )MarkupLMSelfAttentionc                    t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                        | _        |xs t#        |dd      | _        | j$                  dk(  s| j$                  d	k(  rF|j&                  | _        t        j(                  d
|j&                  z  dz
  | j                        | _        |j,                  | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()position_embedding_typeabsoluterelative_keyrelative_key_query   r   )r    r!   r%   num_attention_headshasattr
ValueErrorrR   attention_head_sizeall_head_sizer   r#   querykeyvaluer'   attention_probs_dropout_probr)   getattrr   rf   r0   distance_embedding
is_decoderr6   r7   r   r9   s      r:   r!   zMarkupLMSelfAttention.__init__R  s    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF'> (
'-zC
$ ''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD# ++rI   xr   c                     |j                         d d | j                  | j                  fz   }|j                  |      }|j	                  dddd      S )Nr<   r   r   r   r
   )rt   r   r   viewpermute)r6   r   new_x_shapes      r:   transpose_for_scoresz*MarkupLMSelfAttention.transpose_for_scoresl  sL    ffhsmt'?'?AYAY&ZZFF;yyAq!$$rI   r   attention_mask	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsc                 $   | j                  |      }|d u}	|	r||d   }
|d   }|}n |	rC| j                  | j                  |            }
| j                  | j                  |            }|}n|y| j                  | j                  |            }
| j                  | j                  |            }t	        j
                  |d   |
gd      }
t	        j
                  |d   |gd      }n@| j                  | j                  |            }
| j                  | j                  |            }| j                  |      }|d u}| j                  r|
|f}t	        j                  ||
j                  dd            }| j                  dk(  s| j                  dk(  r|j                  d   |
j                  d   }}|rDt	        j                  |dz
  t        j                  |j                  	      j                  dd      }n@t	        j                  |t        j                  |j                  	      j                  dd      }t	        j                  |t        j                  |j                  	      j                  dd      }||z
  }| j!                  || j"                  z   dz
        }|j%                  |j&                  
      }| j                  dk(  rt	        j(                  d||      }||z   }nE| j                  dk(  r6t	        j(                  d||      }t	        j(                  d|
|      }||z   |z   }|t+        j,                  | j.                        z  }|||z   }t0        j2                  j5                  |d      }| j7                  |      }|||z  }t	        j                  ||      }|j9                  dddd      j;                         }|j=                         d d | j>                  fz   }|j                  |      }|r||fn|f}| j                  r||fz   }|S )Nr   r   r   r=   r<   r   r   rq   rr   zbhld,lrd->bhlrzbhrd,lrd->bhlrr
   ) r   r   r   r   r@   rA   r   matmul	transposer   shapetensorrU   rs   r   rm   r   rf   torr   einsummathsqrtr   r   
functionalsoftmaxr)   r   
contiguousrt   r   )r6   r   r   r   r   r   r   r   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layer	use_cacheattention_scoresquery_length
key_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapeoutputss                               r:   rH   zMarkupLMSelfAttention.forwardq  s    !JJ}5
 3$>."<&q)I(+K3N11$((;P2QRI33DJJ?T4UVK3N'11$((=2IJI33DJJ}4MNK		>!#4i"@aHI))^A%6$D!LK11$((=2IJI33DJJ}4MNK//0AB"$.	?? (5N !<<Y5H5HR5PQ''>9T=Y=Y]q=q'2'8'8';Y__Q=O*L!&j1nEJJWdWkWk!l!q!q" "'l%**UbUiUi!j!o!oprtu!v"\\*EJJ}OcOcdiijkmopN%6H#'#:#:8dFbFb;bef;f#g #7#:#:ARAR#:#S ++~=+0<<8H+Wk+l(#36N#N --1EE16>NP[]q1r./4||<LiYm/n,#36T#TWs#s +dii8P8P.QQ%/.@ --//0@b/I ,,7  -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=/2mM]?? 11GrI   r   NNNNNF)rJ   rK   rL   r!   r@   r   r   r   FloatTensorr   boolrH   rN   rO   s   @r:   r   r   Q  s    ,4%ell %u|| % 7;15=A>BDH,1c||c !!2!23c E--.	c
  ((9(9:c !)):): ;c !uU->->'?!@Ac $D>c 
u||	crI   r   eagerc                       e Zd Zd fd	Zd Z	 	 	 	 	 	 ddej                  deej                     deej                     deej                     deej                     dee	e	ej                           d	ee
   d
e	ej                     fdZ xZS )MarkupLMAttentionc                     t         |           t        |j                     ||      | _        t        |      | _        t               | _        y )Nr   )	r    r!   MARKUPLM_SELF_ATTENTION_CLASSES_attn_implementationr6   r   outputsetpruned_headsr   s      r:   r!   zMarkupLMAttention.__init__  sC    3F4O4OP,C
	 )0ErI   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r=   )lenr   r6   r   r   r  r   r   r   r   r  r   r   union)r6   headsindexs      r:   prune_headszMarkupLMAttention.prune_heads  s   u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:rI   r   r   r   r   r   r   r   r   c           	      p    | j                  |||||||      }| j                  |d   |      }	|	f|dd  z   }
|
S )Nr   r   )r6   r  )r6   r   r   r   r   r   r   r   self_outputsattention_outputr  s              r:   rH   zMarkupLMAttention.forward  sW     yy!"
  ;;|AF#%QR(88rI   r   r  )rJ   rK   rL   r!   r  r@   r   r   r  r   r  rH   rN   rO   s   @r:   r
  r
    s    ";* 7;15=A>BDH,1|| !!2!23 E--.	
  ((9(9: !)):): ; !uU->->'?!@A $D> 
u||	rI   r
  c                       e Zd Z fdZ	 	 	 	 	 	 ddej
                  deej                     deej                     deej                     deej                     deeeej                           dee	   d	eej
                     fd
Z
d Z xZS )MarkupLMLayerc                 f   t         |           |j                  | _        d| _        t	        |      | _        |j                  | _        |j                  | _        | j                  r,| j                  st        |  d      t	        |d      | _	        t        |      | _        t        |      | _        y )Nr   z> should be used as a decoder model if cross attention is addedr   r  )r    r!   chunk_size_feed_forwardseq_len_dimr
  	attentionr   add_cross_attentionr   crossattentionr   intermediater   r  ro   s     r:   r!   zMarkupLMLayer.__init__  s    '-'E'E$*62 ++#)#=#= ##?? D6)g!hii"3FT^"_D08$V,rI   r   r   r   r   r   r   r   r   c           	         ||d d nd }| j                  |||||      }	|	d   }
| j                  r|	dd }|	d   }n|	dd  }d }| j                  rT|Rt        | d      st        d|  d      ||d	d  nd }| j	                  |
||||||      }|d   }
||dd z   }|d   }|z   }t        | j                  | j                  | j                  |
      }|f|z   }| j                  r|fz   }|S )
Nr   )r   r   r   r   r<   r"  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r   r   r   r   r"  r   feed_forward_chunkr  r  )r6   r   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr  r  present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayer_outputs                    r:   rH   zMarkupLMLayer.forward   s}    :H9S>"1#5Y] !%/3 "0 "
 2!4 ??,Qr2G 6r :,QR0G'+$??4@4!12 =dV DD D  @N?Yrs(;_c%&*&9&9 %&)!'#  7q9 7" ==G ,C2+F( 14P P0##T%A%A4CSCSUe
  /G+ ??!2 44GrI   c                 L    | j                  |      }| j                  ||      }|S r   )r#  r  )r6   r  intermediate_outputr,  s       r:   r%  z MarkupLMLayer.feed_forward_chunka  s,    "//0@A{{#68HIrI   r  )rJ   rK   rL   r!   r@   r   r   r  r   r  rH   r%  rN   rO   s   @r:   r  r    s    -" 7;15=A>BDH,1?||? !!2!23? E--.	?
  ((9(9:? !)):): ;? !uU->->'?!@A? $D>? 
u||	?BrI   r  c                   D    e Zd Z fdZ	 	 	 	 	 	 	 	 	 ddej
                  deej                     deej                     deej                     deej                     deeeej                           dee	   d	ee	   d
ee	   dee	   de
eej
                     ef   fdZ xZS )MarkupLMEncoderc                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        d| _	        y c c}w )NF)
r    r!   r7   r   r.   r/   num_hidden_layersr  layergradient_checkpointingr5   s      r:   r!   zMarkupLMEncoder.__init__i  sN    ]]5IaIaCb#caM&$9#cd
&+# $ds   A#r   r   r   r   r   past_key_valuesr   r   output_hidden_statesreturn_dictr   c                    |	rdnd }|rdnd }|r| j                   j                  rdnd }| j                  r%| j                  r|rt        j                  d       d}|rdnd }t        | j                        D ]  \  }}|	r||fz   }|||   nd }|||   nd }| j                  r/| j                  r#| j                  |j                  |||||||      }n ||||||||      }|d   }|r	||d   fz  }|s|||d   fz   }| j                   j                  s||d   fz   } |	r||fz   }|
st        d |||||fD              S t        |||||	      S )
N zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fr   r<   r   r   c              3   $   K   | ]  }|| 
 y wr   r9  ).0vs     r:   	<genexpr>z*MarkupLMEncoder.forward.<locals>.<genexpr>  s      
 = 
s   )last_hidden_stater5  r   
attentionscross_attentions)r7   r!  r4  trainingloggerwarning_once	enumerater3  _gradient_checkpointing_func__call__r~   r   )r6   r   r   r   r   r   r5  r   r   r6  r7  all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacherF   layer_modulelayer_head_maskr   layer_outputss                       r:   rH   zMarkupLMEncoder.forwardo  s    #7BD$5b4%64;;;Z;Zr`d&&4==##p "	#,R$(4 #	VOA|#$58H$H!.7.CilO3B3N_Q/TXN**t}} $ A A ))!"#)*"%	! !-!"#)*"%! *!,M"}R'8&::" &9]1=M<O&O#;;22+?=QRCSBU+U(G#	VJ   1]4D D 
 "&%'(
 
 
 9+.+*1
 	
rI   )	NNNNNNFFT)rJ   rK   rL   r!   r@   r   r   r  r   r  r   r   rH   rN   rO   s   @r:   r0  r0  h  s   , 7;15=A>BEI$(,1/4&*S
||S
 !!2!23S
 E--.	S

  ((9(9:S
 !)):): ;S
 "%e.?.?(@"ABS
 D>S
 $D>S
 'tnS
 d^S
 
uU\\"$MM	NS
rI   r0  c                   d     e Zd ZdZeZdZd Zede	e
eej                  f      f fd       Z xZS )MarkupLMPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    markuplmc                    t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                        rz|j                  j                  j                  d| j                  j                         |j                  2|j                  j                  |j                     j                          yyt        |t        j                        rJ|j                  j                  j                          |j                  j                  j                  d       yy)zInitialize the weightsg        )meanstdN      ?)r   r   r#   weightdatanormal_r7   initializer_ranger   zero_r0   rW   rj   fill_)r6   modules     r:   _init_weightsz%MarkupLMPreTrainedModel._init_weights  s   fbii( MM&&CT[[5R5R&S{{&  &&( '-MM&&CT[[5R5R&S!!-""6#5#56<<> .-KK""$MM$$S) .rI   pretrained_model_name_or_pathc                 2    t        t        | 
  |g|i |S r   )r    rO  from_pretrained)clsr]  
model_argskwargsr9   s       r:   r_  z'MarkupLMPreTrainedModel.from_pretrained  s+    ,cB)
,6
:@
 	
rI   )rJ   rK   rL   rM   r   config_classbase_model_prefixr\  classmethodr   r   r   osPathLiker_  rN   rO   s   @r:   rO  rO    sK    
 "L"*  
HU3PRP[P[K[E\<] 
 
rI   rO  aK  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`MarkupLMConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)

        xpath_tags_seq (`torch.LongTensor` of shape `({0}, config.max_depth)`, *optional*):
            Tag IDs for each token in the input sequence, padded up to config.max_depth.

        xpath_subs_seq (`torch.LongTensor` of shape `({0}, config.max_depth)`, *optional*):
            Subscript IDs for each token in the input sequence, padded up to config.max_depth.

        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: `1` for
            tokens that are NOT MASKED, `0` for MASKED tokens.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`: `0` corresponds to a *sentence A* token, `1` corresponds to a *sentence B* token

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`: `1`
            indicates the head is **not masked**, `0` indicates the head is **masked**.
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            If set to `True`, the attentions tensors of all attention layers are returned. See `attentions` under
            returned tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            If set to `True`, the hidden states of all layers are returned. See `hidden_states` under returned tensors
            for more detail.
        return_dict (`bool`, *optional*):
            If set to `True`, the model will return a [`~file_utils.ModelOutput`] instead of a plain tuple.
zbThe bare MarkupLM Model transformer outputting raw hidden-states without any specific head on top.c                       e Zd Zd fd	Zd Zd Zd Z eej                  d             e
ee      	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     d	eej                     d
eej                      deej                     deej                     deej                      deej                      dee   dee   dee   deeef   fd              Zd Z xZS )MarkupLMModelc                     t         |   |       || _        t        |      | _        t        |      | _        |rt        |      nd | _        | j                          y r   )
r    r!   r7   r]   r   r0  encoderr   pooler	post_init)r6   r7   add_pooling_layerr9   s      r:   r!   zMarkupLMModel.__init__(  sK     ,V4&v.0AnV,t 	rI   c                 .    | j                   j                  S r   r   re   r   s    r:   get_input_embeddingsz"MarkupLMModel.get_input_embeddings4  s    ...rI   c                 &    || j                   _        y r   rp  )r6   r   s     r:   set_input_embeddingsz"MarkupLMModel.set_input_embeddings7  s    */'rI   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsrk  r3  r   r  )r6   heads_to_pruner3  r  s       r:   _prune_headszMarkupLMModel._prune_heads:  sE    
 +002 	CLE5LLu%//;;EB	CrI   batch_size, sequence_lengthoutput_typerc  rV   rB   rC   r   r   ra   r   rv   r   r6  r7  r   c                    |	|	n| j                   j                  }	|
|
n| j                   j                  }
||n| j                   j                  }||t	        d      |#| j                  ||       |j                         }n!||j                         dd }nt	        d      ||j                  n|j                  }|t        j                  ||      }|&t        j                  |t        j                  |      }|j                  d      j                  d      }|j                  | j                  	      }d
|z
  dz  }||j                         dk(  rh|j                  d      j                  d      j                  d      j                  d      }|j!                  | j                   j"                  dddd      }nB|j                         dk(  r/|j                  d      j                  d      j                  d      }|j                  t%        | j'                               j                  	      }ndg| j                   j"                  z  }| j)                  ||||||      }| j+                  ||||	|
|      }|d   }| j,                  | j-                  |      nd}|s
||f|dd z   S t/        |||j0                  |j2                  |j4                        S )a`  
        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, MarkupLMModel

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> model = MarkupLMModel.from_pretrained("microsoft/markuplm-base")

        >>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"

        >>> encoding = processor(html_string, return_tensors="pt")

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        >>> list(last_hidden_states.shape)
        [1, 4, 768]
        ```NzDYou cannot specify both input_ids and inputs_embeds at the same timer<   z5You have to specify either input_ids or inputs_embeds)rs   rq   r   r   r   rT  g     r   )rV   rB   rC   ra   r   rv   )r   r   r6  r7  )r>  pooler_outputr   r?  r@  )r7   r   r6  use_return_dictr   %warn_if_padding_and_no_attention_maskrt   rs   r@   r}   r{   rU   ru   r   rr   r>   rn   r2  next
parametersr   rk  rl  r   r   r?  r@  )r6   rV   rB   rC   r   r   ra   r   rv   r   r6  r7  rw   rs   extended_attention_maskembedding_outputencoder_outputsr   r   s                      r:   rH   zMarkupLMModel.forwardB  s   H 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU%.%:!!@T@T!"ZZFCN!"[[EJJvVN"0":":1"="G"G"J"9"<"<4::"<"N#&)@#@H"L }}!#%//2<<Q?II"MWWXZ[	%,,T[[-J-JBPRTVXZ[	A%%//2<<R@JJ2N	!40A+B+H+HII!>!>>I??))%)' + 
 ,,#/!5# ' 
 *!,8<8OO4UY#]3oab6III;-')77&11,==
 	
rI   c                 J    d}|D ]  }|t        fd|D              fz  } |S )Nr9  c              3   t   K   | ]/  }|j                  d j                  |j                               1 yw)r   N)index_selectr   rs   )r;  
past_statebeam_idxs     r:   r=  z/MarkupLMModel._reorder_cache.<locals>.<genexpr>  s.     nU_j--aZ=N=N1OPns   58)r~   )r6   r5  r  reordered_past
layer_pasts     `  r:   _reorder_cachezMarkupLMModel._reorder_cache  s=    ) 	Jncmnn N	 rI   )T)NNNNNNNNNNN)rJ   rK   rL   r!   rq  rs  rw  r   MARKUPLM_INPUTS_DOCSTRINGformatr   r   _CONFIG_FOR_DOCr   r@   
LongTensorr  r  r   r   rH   r  rN   rO   s   @r:   ri  ri  "  sm   
/0C ++D+K+KLi+jk+Wfuv 1559596:59371559,0/3&*e
E,,-e
 !!1!12e
 !!1!12	e

 !!2!23e
 !!1!12e
 u//0e
 E--.e
   1 12e
 $D>e
 'tne
 d^e
 
uBB	Ce
 w le
PrI   ri  z
    MarkupLM Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
    layers on top of the hidden-states output to compute `span start logits` and `span end logits`).
    c            !           e Zd Z fdZ eej                  d             eee	      	 	 	 	 	 	 	 	 	 	 	 	 	 dde
ej                     de
ej                     de
ej                     de
ej                     de
ej                     d	e
ej                     d
e
ej                     de
ej                     de
ej                     de
ej                     de
e   de
e   de
e   deeej                     ef   fd              Z xZS )MarkupLMForQuestionAnsweringc                     t         |   |       |j                  | _        t        |d      | _        t        j                  |j                  |j                        | _        | j                          y NF)rn  )
r    r!   
num_labelsri  rP  r   r#   r%   
qa_outputsrm  ro   s     r:   r!   z%MarkupLMForQuestionAnswering.__init__  sU      ++%fF))F$6$68I8IJ 	rI   rx  ry  rV   rB   rC   r   r   ra   r   rv   start_positionsend_positionsr   r6  r7  r   c                 ,   ||n| j                   j                  }| j                  |||||||||||      }|d   }| j                  |      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d}|	|
t        |	j                               dkD  r|	j                  d      }	t        |
j                               dkD  r|
j                  d      }
|j                  d      }|	j                  d|       |
j                  d|       t        |      } |||	      } |||
      }||z   dz  }|s||f|dd z   }||f|z   S |S t        ||||j                  |j                  	      S )
a  
        start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, MarkupLMForQuestionAnswering
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base-finetuned-websrc")
        >>> model = MarkupLMForQuestionAnswering.from_pretrained("microsoft/markuplm-base-finetuned-websrc")

        >>> html_string = "<html> <head> <title>My name is Niels</title> </head> </html>"
        >>> question = "What's his name?"

        >>> encoding = processor(html_string, questions=question, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> answer_start_index = outputs.start_logits.argmax()
        >>> answer_end_index = outputs.end_logits.argmax()

        >>> predict_answer_tokens = encoding.input_ids[0, answer_start_index : answer_end_index + 1]
        >>> processor.decode(predict_answer_tokens).strip()
        'Niels'
        ```N
rB   rC   r   r   ra   r   rv   r   r6  r7  r   r   r<   r=   )ignore_indexr   )lossstart_logits
end_logitsr   r?  )r7   r}  rP  r  splitsqueezer   r  rt   clamp_r   r   r   r?  )r6   rV   rB   rC   r   r   ra   r   rv   r  r  r   r6  r7  r  r   logitsr  r  
total_lossignored_indexloss_fct
start_lossend_lossr  s                            r:   rH   z$MarkupLMForQuestionAnswering.forward  s   l &1%<k$++B]B]--))))%'/!5#   
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M""1m4  M2']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
rI   )NNNNNNNNNNNNN)rJ   rK   rL   r!   r   r  r  r   r   r  r   r@   r   r  r   r   rH   rN   rO   s   @r:   r  r    s~    ++D+K+KLi+jk+GVef -115151515/3,0042604,0/3&*f
ELL)f
 !.f
 !.	f

 !.f
 !.f
 u||,f
 ELL)f
  -f
 "%,,/f
  -f
 $D>f
 'tnf
 d^f
 
uU\\"$@@	Af
 g lf
rI   r  z9MarkupLM Model with a `token_classification` head on top.c                       e Zd Z fdZ eej                  d             eee	      	 	 	 	 	 	 	 	 	 	 	 	 dde
ej                     de
ej                     de
ej                     de
ej                     de
ej                     d	e
ej                     d
e
ej                     de
ej                     de
ej                     de
e   de
e   de
e   deeej                     ef   fd              Z xZS )MarkupLMForTokenClassificationc                 d   t         |   |       |j                  | _        t        |d      | _        |j
                  |j
                  n|j                  }t        j                  |      | _	        t        j                  |j                  |j                        | _        | j                          y r  )r    r!   r  ri  rP  classifier_dropoutr(   r   r'   r)   r#   r%   
classifierrm  r6   r7   r  r9   s      r:   r!   z'MarkupLMForTokenClassification.__init__6  s      ++%fF)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rI   rx  ry  rV   rB   rC   r   r   ra   r   rv   labelsr   r6  r7  r   c                    ||n| j                   j                  }| j                  |||||||||
||      }|d   }| j                  |      }d}|	Ft	               } ||j                  d| j                   j                        |	j                  d            }|s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, AutoModelForTokenClassification
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> processor.parse_html = False
        >>> model = AutoModelForTokenClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

        >>> nodes = ["hello", "world"]
        >>> xpaths = ["/html/body/div/li[1]/div/span", "/html/body/div/li[1]/div/span"]
        >>> node_labels = [1, 2]
        >>> encoding = processor(nodes=nodes, xpaths=xpaths, node_labels=node_labels, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```Nr  r   r<   r   r  r  r   r?  )
r7   r}  rP  r  r   r   r  r   r   r?  )r6   rV   rB   rC   r   r   ra   r   rv   r  r   r6  r7  r  r   r   r  r  r  s                      r:   rH   z&MarkupLMForTokenClassification.forwardD  s    X &1%<k$++B]B]--))))%'/!5#   
 "!* OOO<')H!&&r4;;+A+ABBD
 ')GABK7F)-)9TGf$EvE$$!//))	
 	
rI   NNNNNNNNNNNN)rJ   rK   rL   r!   r   r  r  r   r   r  r   r@   r   r  r   r   rH   rN   rO   s   @r:   r  r  3  sd    ++D+K+KLi+jk>X -115151515/3,004)-,0/3&*N
ELL)N
 !.N
 !.	N

 !.N
 !.N
 u||,N
 ELL)N
  -N
 &N
 $D>N
 'tnN
 d^N
 
uU\\"N2	3N
 Y lN
rI   r  z
    MarkupLM Model transformer with a sequence classification/regression head on top (a linear layer on top of the
    pooled output) e.g. for GLUE tasks.
    c                       e Zd Z fdZ eej                  d             eee	      	 	 	 	 	 	 	 	 	 	 	 	 dde
ej                     de
ej                     de
ej                     de
ej                     de
ej                     d	e
ej                     d
e
ej                     de
ej                     de
ej                     de
e   de
e   de
e   deeej                     ef   fd              Z xZS )!MarkupLMForSequenceClassificationc                 n   t         |   |       |j                  | _        || _        t	        |      | _        |j                  |j                  n|j                  }t        j                  |      | _
        t        j                  |j                  |j                        | _        | j                          y r   )r    r!   r  r7   ri  rP  r  r(   r   r'   r)   r#   r%   r  rm  r  s      r:   r!   z*MarkupLMForSequenceClassification.__init__  s      ++%f-)/)B)B)NF%%TZTnTn 	 zz"45))F$6$68I8IJ 	rI   rx  ry  rV   rB   rC   r   r   ra   r   rv   r  r   r6  r7  r   c                 D   ||n| j                   j                  }| j                  |||||||||
||      }|d   }| j                  |      }| j	                  |      }d}|	| j                   j
                  | j                  dk(  rd| j                   _        nl| j                  dkD  rL|	j                  t        j                  k(  s|	j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j
                  dk(  rIt               }| j                  dk(  r& ||j                         |	j                               }n |||	      }n| j                   j
                  dk(  r=t               } ||j                  d| j                        |	j                  d            }n,| j                   j
                  dk(  rt               } |||	      }|s|f|dd z   }||f|z   S |S t!        |||j"                  |j$                  	      S )
a&  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

        Returns:

        Examples:

        ```python
        >>> from transformers import AutoProcessor, AutoModelForSequenceClassification
        >>> import torch

        >>> processor = AutoProcessor.from_pretrained("microsoft/markuplm-base")
        >>> model = AutoModelForSequenceClassification.from_pretrained("microsoft/markuplm-base", num_labels=7)

        >>> html_string = "<html> <head> <title>Page Title</title> </head> </html>"
        >>> encoding = processor(html_string, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**encoding)

        >>> loss = outputs.loss
        >>> logits = outputs.logits
        ```Nr  r   
regressionsingle_label_classificationmulti_label_classificationr<   r   r  )r7   r}  rP  r)   r  problem_typer  rr   r@   rU   rR   r	   r  r   r   r   r   r   r?  )r6   rV   rB   rC   r   r   ra   r   rv   r  r   r6  r7  r  r   r  r  r  r  s                      r:   rH   z)MarkupLMForSequenceClassification.forward  s   V &1%<k$++B]B]--))))%'/!5#   
  
]3/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
rI   r  )rJ   rK   rL   r!   r   r  r  r   r   r  r   r@   r   r  r   r   rH   rN   rO   s   @r:   r  r    sg    ++D+K+KLi+jk+CRab -115151515/3,004)-,0/3&*]
ELL)]
 !.]
 !.	]

 !.]
 !.]
 u||,]
 ELL)]
  -]
 &]
 $D>]
 'tn]
 d^]
 
uU\\"$<<	=]
 c l]
rI   r  )r   )@rM   r   rf  typingr   r   r   r@   torch.utils.checkpointr   torch.nnr   r   r	   activationsr   
file_utilsr   r   r   modeling_outputsr   r   r   r   r   r   modeling_utilsr   r   r   r   utilsr   configuration_markuplmr   
get_loggerrJ   rB  _CHECKPOINT_FOR_DOCr  Moduler   r[   r]   r   r   r   r   r   r   r   r   r  r
  r  r0  rO  MARKUPLM_START_DOCSTRINGr  ri  r  r  r  r9  rI   r:   <module>r     s0     	 ) )    A A ! 
    2 
		H	%/ "/ bii / f4 _ _F 299  RYY RYY  bii $ryy 0!")) !CBII CN "# 0		 0hSBII SnZ
bii Z
z
o 
B	 . b hL+ L	L^  t
#: t
t
n UWop`
%< `
 q`
F  p
(? p
p
rI   