
    sg                        d Z ddlZddlmZ ddlmZmZmZmZ ddl	Z	ddl
Z	ddl	mZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZ ddlmZmZmZmZmZ ddl m!Z!  ejD                  e#      Z$dZ%dZ&dZ'dZ(e G d de             Z) G d dejT                        Z+ G d dejT                        Z, G d dejT                        Z- G d dejT                        Z. G d dejT                        Z/ G d d ejT                        Z0 G d! d"ejT                        Z1 G d# d$ejT                        Z2 G d% d&ejT                        Z3 G d' d(ejT                        Z4 G d) d*ejT                        Z5 G d+ d,ejT                        Z6 G d- d.ejT                        Z7 G d/ d0e      Z8 ed1e'       G d2 d3e8             Z9 ed4e'       G d5 d6e8             Z: ed7e'       G d8 d9e8             Z; ed:e'       G d; d<e8             Z<y)=zPyTorch Bros model.    N)	dataclass)ListOptionalTupleUnion)nn)CrossEntropyLoss   )ACT2FN))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsTokenClassifierOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)ModelOutputadd_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )
BrosConfigzjinho8345/bros-base-uncasedr   aK  
    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
    and behavior.

    Parameters:
        config ([`BrosConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`BrosProcessor`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)

        bbox ('torch.FloatTensor' of shape '(batch_size, num_boxes, 4)'):
            Bounding box coordinates for each token in the input sequence. Each bounding box is a list of four values
            (x1, y1, x2, y2), where (x1, y1) is the top left corner, and (x2, y2) is the bottom right corner of the
            bounding box.

        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)

        bbox_first_token_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to indicate the first token of each bounding box. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)

        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)

        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.

        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.

        return_dict (`bool`, *optional*):
            Whether or not to return a [`~file_utils.ModelOutput`] instead of a plain tuple.
c                       e Zd ZU dZdZeej                     ed<   dZ	ej                  ed<   dZ
ej                  ed<   dZeeej                        ed<   dZeeej                        ed<   y)BrosSpadeOutputa  
    Base class for outputs of token classification models.

    Args:
        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided) :
            Classification loss.
        initial_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.num_labels)`):
            Classification scores for entity initial tokens (before SoftMax).
        subsequent_token_logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, sequence_length+1)`):
            Classification scores for entity sequence tokens (before SoftMax).
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nlossinitial_token_logitssubsequent_token_logitshidden_states
attentions)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   r   r         Y/var/www/html/venv/lib/python3.12/site-packages/transformers/models/bros/modeling_bros.pyr   r      st    . )-D(5$$
%,.2%++215U..58<M8E%"3"345<59Ju00129r)   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BrosPositionalEmbedding1Dc                     t         t        |           |j                  | _        ddt	        j
                  d| j                  d      | j                  z  z  z  }| j                  d|       y )Nr   i'          g       @inv_freq)superr,   __init__dim_bbox_sinusoid_emb_1dr%   arangeregister_buffer)selfconfigr/   	__class__s      r*   r1   z"BrosPositionalEmbedding1D.__init__   sa    '79(.(G(G%ell3(E(EsKdNkNkkl
 	Z2r)   pos_seqreturnc                    |j                         }|\  }}}|j                  |||d      | j                  j                  ddd| j                  dz        z  }t	        j
                  |j                         |j                         gd      }|S )Nr      dim)sizeviewr/   r2   r%   catsincos)r5   r8   seq_sizeb1b2b3sinusoid_inppos_embs           r*   forwardz!BrosPositionalEmbedding1D.forward   s    <<>
B||BB2T]]5G5G1aQUQnQnrsQs5tt))\--/1A1A1CD"Mr)   r!   r"   r#   r1   r%   TensorrJ   __classcell__r7   s   @r*   r,   r,      s#    3u||  r)   r,   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BrosPositionalEmbedding2Dc                     t         t        |           |j                  | _        t	        |      | _        t	        |      | _        y N)r0   rP   r1   dim_bboxr,   	x_pos_emb	y_pos_embr5   r6   r7   s     r*   r1   z"BrosPositionalEmbedding2D.__init__   s4    '7926:26:r)   bboxr9   c                    g }t        | j                        D ]U  }|dz  dk(  r&|j                  | j                  |d|f                1|j                  | j	                  |d|f                W t        j                  |d      }|S )Nr;   r   .r<   r=   )rangerS   appendrT   rU   r%   rA   )r5   rW   stackibbox_pos_embs        r*   rJ   z!BrosPositionalEmbedding2D.forward   s|    t}}% 	;A1uzT^^DaL9:T^^DaL9:		;
 yyB/r)   rK   rN   s   @r*   rP   rP      s#    ;ELL U\\ r)   rP   c                   >     e Zd Z fdZdej
                  fdZ xZS )BrosBboxEmbeddingsc                     t         t        |           t        |      | _        t        j                  |j                  |j                  d      | _	        y )NF)bias)
r0   r_   r1   rP   bbox_sinusoid_embr   Lineardim_bbox_sinusoid_emb_2ddim_bbox_projectionbbox_projectionrV   s     r*   r1   zBrosBboxEmbeddings.__init__   s@     $02!:6!B!yy)H)H&JdJdkpqr)   rW   c                     |j                  dd      }|d d d d d d d f   |d d d d d d d f   z
  }| j                  |      }| j                  |      }|S )Nr   r   )	transposerb   rf   )r5   rW   bbox_tbbox_posr]   s        r*   rJ   zBrosBboxEmbeddings.forward   s\    1%$1a-(6!T1a-+@@--h7++L9r)   rK   rN   s   @r*   r_   r_      s    r
ELL r)   r_   c                        e Zd ZdZ fdZ	 	 	 	 	 d
deej                     deej                     deej                     deej                     dedej                  fd	Z	 xZ
S )BrosTextEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 d   t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        t#        |dd      | _        | j'                  dt)        j*                  |j                        j-                  d             | j'                  dt)        j.                  | j0                  j3                         t(        j4                  | j0                  j6                        d	
       y )N)padding_idxepsposition_embedding_typeabsoluteposition_ids)r   r<   token_type_idsdtypedeviceF)
persistent)r0   r1   r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutgetattrrq   r4   r%   r3   expandzerosrs   r?   longrw   rV   s     r*   r1   zBrosTextEmbeddings.__init__   s8   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]" f&8&8f>S>STzz&"<"<='.v7PR\']$^U\\&:X:X-Y-`-`ah-ijKK!!&&(jj((//
  	 	
r)   	input_idsrt   rs   inputs_embedspast_key_values_lengthr9   c                 Z   ||j                         }n|j                         d d }|d   }|| j                  d d |||z   f   }|st        | d      r-| j                  d d d |f   }|j	                  |d   |      }	|	}n:t        j                  |t
        j                  | j                  j                        }|| j                  |      }| j                  |      }
||
z   }| j                  dk(  r| j                  |      }||z  }| j                  |      }| j                  |      }|S )Nr<   r   rt   r   ru   rr   )r?   rs   hasattrrt   r   r%   r   r   rw   r}   r   rq   r   r   r   )r5   r   rt   rs   r   r   input_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedr   
embeddingsr   s                r*   rJ   zBrosTextEmbeddings.forward   sF     #..*K',,.s3K ^
,,Q0FVlIl0l-lmL!t-.*.*=*=a*n*M'3J3Q3QR]^_R`bl3m0!A!&[

SWSdSdSkSk!l  00;M $ : :> J"%::
'':5"&":":<"H--J^^J/
\\*-
r)   )NNNNr   )r!   r"   r#   r$   r1   r   r%   rL   intrJ   rM   rN   s   @r*   rl   rl      s    Q
4 -115/304&'$ELL)$ !.$ u||,	$
  -$ !$$ 
$r)   rl   c                   b    e Zd Z fdZdej
                  fdZ	 	 	 	 	 	 ddej
                  dej
                  deej
                     deej
                     deej
                     d	eej
                     d
eeeej                           deej
                     deej
                     fdZ
 xZS )BrosSelfAttentionc                    t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                        | _        t#        |dd      | _        | j$                  dk(  s| j$                  d	k(  rF|j&                  | _        t        j(                  d
|j&                  z  dz
  | j                        | _        |j,                  | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()rq   rr   relative_keyrelative_key_queryr;   r   )r0   r1   r{   num_attention_headsr   
ValueErrorr   attention_head_sizeall_head_sizer   rc   querykeyvaluer   attention_probs_dropout_probr   r   rq   r~   ry   distance_embedding
is_decoderrV   s     r*   r1   zBrosSelfAttention.__init__  s    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF'.v7PR\']$''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD# ++r)   xc                     |j                         d d | j                  | j                  fz   } |j                  | }|j	                  dddd      S )Nr<   r   r;   r   r
   )r?   r   r   r@   permute)r5   r   new_x_shapes      r*   transpose_for_scoresz&BrosSelfAttention.transpose_for_scores3  sV    ffhsm$$$$'
 
 AFFK yyAq!$$r)   r   r]   attention_mask	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsr9   c	                    | j                  |      }	|d u}
|
r||d   }|d   }|}n |
rC| j                  | j                  |            }| j                  | j                  |            }|}n|y| j                  | j                  |            }| j                  | j                  |            }t	        j
                  |d   |gd      }t	        j
                  |d   |gd      }n@| j                  | j                  |            }| j                  | j                  |            }| j                  |	      }| j                  r||f}t	        j                  ||j                  dd            }| j                  dk(  s| j                  dk(  rF|j                         d   }t	        j                  |t        j                  |j                  	      j                  dd      }t	        j                  |t        j                  |j                  	      j                  dd      }||z
  }| j                  || j                   z   dz
        }|j#                  |j$                  
      }| j                  dk(  rt	        j&                  d||      }||z   }nE| j                  dk(  r6t	        j&                  d||      }t	        j&                  d||      }||z   |z   }|j(                  \  }}}}|j                  ||||      }|j+                  g d      }t	        j&                  d||f      }||z   }|t-        j.                  | j0                        z  }|||z   } t3        j4                  d      |      }| j7                  |      }|||z  }t	        j                  ||      }|j+                  dddd      j9                         }|j                         d d | j:                  fz   } |j                  | }|r||fn|f}| j                  r||fz   }|S )Nr   r   r;   r=   r<   r   r   ru   rv   zbhld,lrd->bhlrzbhrd,lrd->bhlr)r;   r   r   r
   zbnid,bijd->bnijr
   )r   r   r   r   r%   rA   r   matmulrh   rq   r?   r3   r   rw   r@   r   r~   torv   einsumshaper   mathsqrtr   r   Softmaxr   
contiguousr   )r5   r   r]   r   r   r   r   r   r   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layerattention_scoresr   position_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_key
batch_sizen_headd_headbbox_pos_scoresattention_probscontext_layernew_context_layer_shapeoutputss                                  r*   rJ   zBrosSelfAttention.forward;  s    !JJ}5
 3$>."<&q)I(+K3N11$((;P2QRI33DJJ?T4UVK3N'11$((=2IJI33DJJ}4MNK		>!#4i"@aHI))^A%6$D!LK11$((=2IJI33DJJ}4MNK//0AB?? (5N !<<Y5H5HR5PQ''>9T=Y=Y]q=q&++-a0J"\\*EJJ}OcOcdiijlnopN"\\*EJJ}OcOcdiijkmopN%6H#'#:#:8dFbFb;bef;f#g #7#:#:ARAR#:#S ++~=+0<<8H+Wk+l(#36N#N --1EE16>NP[]q1r./4||<LiYm/n,#36T#TWs#s  2=1B1B.
FJ#((ZVT#++L9,,'8;:UV+o=+dii8P8P.QQ%/.@ -"**,-=> ,,7  -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S***,CD6G=/2mM]?? 11Gr)   NNNNNF)r!   r"   r#   r1   r%   rL   r   r   r   r&   rJ   rM   rN   s   @r*   r   r     s    ,0%ell % 26,08<9=DH49f||f llf !.	f
 ELL)f  (5f !) 6f !uU->->'?!@Af $ELL1f 
u||	fr)   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )BrosSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y Nro   )r0   r1   r   rc   r{   denser   r   r   r   r   rV   s     r*   r1   zBrosSelfOutput.__init__  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r)   r   input_tensorr9   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S rR   r   r   r   r5   r   r   s      r*   rJ   zBrosSelfOutput.forward  7    

=1]3}|'CDr)   rK   rN   s   @r*   r   r     1    >U\\  RWR^R^ r)   r   c                   4    e Zd Z fdZd Z	 	 	 	 	 	 ddej                  dej                  deej                     deej                     deej                     deej                     d	eeeej                           d
ee
   deej                     fdZ xZS )BrosAttentionc                     t         |           t        |      | _        t	        |      | _        t               | _        y rR   )r0   r1   r   r5   r   outputsetpruned_headsrV   s     r*   r1   zBrosAttention.__init__  s0    %f-	$V,Er)   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r=   )lenr   r5   r   r   r   r   r   r   r   r   r   r   union)r5   headsindexs      r*   prune_headszBrosAttention.prune_heads  s   u:?7II))II))	
u -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r)   r   r]   r   r   r   r   r   r   r9   c	           
      t    | j                  ||||||||      }	| j                  |	d   |      }
|
f|	dd  z   }|S )Nr   r]   r   r   r   r   r   r   r   r   )r5   r   )r5   r   r]   r   r   r   r   r   r   self_outputsattention_outputr   s               r*   rJ   zBrosAttention.forward  s_     yy'%)"7#9)/ ! 	
  ;;|AF#%QR(88r)   r   )r!   r"   r#   r1   r   r%   rL   r   r   r&   boolrJ   rM   rN   s   @r*   r   r     s    ";2 26,08<9=DH,1|| ll !.	
 ELL)  (5 !) 6 !uU->->'?!@A $D> 
u||	r)   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BrosIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y rR   )r0   r1   r   rc   r{   intermediate_sizer   
isinstance
hidden_actstrr   intermediate_act_fnrV   s     r*   r1   zBrosIntermediate.__init__  s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r)   r   r9   c                 J    | j                  |      }| j                  |      }|S rR   )r   r   )r5   r   s     r*   rJ   zBrosIntermediate.forward  s&    

=100?r)   rK   rN   s   @r*   r   r     s#    9U\\ ell r)   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )
BrosOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )r0   r1   r   rc   r   r{   r   r   r   r   r   r   rV   s     r*   r1   zBrosOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r)   r   r   r9   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S rR   r   r   s      r*   rJ   zBrosOutput.forward   r   r)   rK   rN   s   @r*   r   r     r   r)   r   c                   4    e Zd Z fdZ	 	 	 	 	 	 ddej
                  dej
                  deej                     deej                     deej                     deej                     deeeej                           d	ee	   d
eej
                     fdZ
d Z xZS )	BrosLayerc                 b   t         |           |j                  | _        d| _        t	        |      | _        |j                  | _        |j                  | _        | j                  r*| j                  st        |  d      t	        |      | _	        t        |      | _        t        |      | _        y )Nr   z> should be used as a decoder model if cross attention is added)r0   r1   chunk_size_feed_forwardseq_len_dimr   	attentionr   add_cross_attention	Exceptioncrossattentionr   intermediater   r   rV   s     r*   r1   zBrosLayer.__init__  s    '-'E'E$&v. ++#)#=#= ##??4&(f ghh"/"7D,V4 (r)   r   r]   r   r   r   r   r   r   r9   c	           	         ||d d nd }	| j                  ||||||	      }
|
d   }| j                  r|
dd }|
d   }n|
dd  }d }| j                  rT|Rt        | d      rt        d|  d      ||d	d  nd }| j	                  |||||||      }|d   }||dd z   }|d   }|z   }t        | j                  | j                  | j                  |      }|f|z   }| j                  r|fz   }|S )
Nr;   )r]   r   r   r   r   r   r   r<   r  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r  r   r   r  r  r   feed_forward_chunkr   r   )r5   r   r]   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr   r   present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayer_outputs                     r*   rJ   zBrosLayer.forward  s    :H9S>"1#5Y] !%%)/3 "0 "
 2!4 ??,Qr2G 6r :,QR0G'+$??4@t-.=dV  Dd  e 
 @N?Yrs(;_c%&*&9&9 %&)!'#  7q9 7" ==G ,C2+F( 14P P0##((	
  /G+ ??!2 44Gr)   c                 L    | j                  |      }| j                  ||      }|S rR   )r  r   )r5   r   intermediate_outputr  s       r*   r  zBrosLayer.feed_forward_chunk[  s,    "//0@A{{#68HIr)   r   )r!   r"   r#   r1   r%   rL   r   r&   r   r   rJ   r  rM   rN   s   @r*   r   r     s    )$ 7;15=A>BDH,1C||C llC !!2!23	C
 E--.C  ((9(9:C !)):): ;C !uU->->'?!@AC $D>C 
u||	CJr)   r   c                   \    e Zd Z fdZ	 	 	 	 	 	 	 	 	 ddej
                  dej
                  deej                     deej                     deej                     deej                     deeeej                           d	ee	   d
ee	   dee	   dee	   de
eej
                     ef   fdZ xZS )BrosEncoderc                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        y c c}w rR   )	r0   r1   r6   r   
ModuleListrY   num_hidden_layersr   layer)r5   r6   _r7   s      r*   r1   zBrosEncoder.__init__b  sC    ]]uVE]E]?^#_!If$5#_`
#_s   Ar   r]   r   r   r   r   past_key_values	use_cacher   output_hidden_statesreturn_dictr9   c                    |
rdnd }|	rdnd }|	r| j                   j                  rdnd }|rdnd }t        | j                        D ]  \  }}|
r||fz   }|||   nd }|||   nd }t	        | j                   dd      rH| j
                  r<|rt        j                  d       d}| j                  |j                  |||||||	      }n |||||||||	      }|d   }|r	||d   fz  }|	s||d   fz   }| j                   j                  s||d	   fz   } |
r||fz   }|st        d
 |||||fD              S t        |||||      S )Nr(   gradient_checkpointingFzh`use_cache=True` is incompatible with `config.gradient_checkpointing=True`. Setting `use_cache=False`...r   r   r<   r   r;   c              3   $   K   | ]  }|| 
 y wrR   r(   ).0vs     r*   	<genexpr>z&BrosEncoder.forward.<locals>.<genexpr>  s      
 = 
s   )last_hidden_stater  r   r    cross_attentions)r6   r  	enumerater  r   trainingloggerwarning_gradient_checkpointing_func__call__tupler   )r5   r   r]   r   r   r   r   r  r  r   r  r  all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacher\   layer_modulelayer_head_maskr   layer_outputss                        r*   rJ   zBrosEncoder.forwardg  s    #7BD$5b4%64;;;Z;Zr`d#,R$(4 *	VOA|#$58H$H!.7.CilO3B3N_Q/TXNt{{$<eDNN/ !&I $ A A ))! "#)*%	! !-"/!-#1-*?+A#1&7	! *!,M"}R'8&::" &9]1=M<O&O#;;22+?=QRCSBU+U(U*	VX   1]4D D 
 "&%'(
 
 
 9+.+*1
 	
r)   )	NNNNNNFFT)r!   r"   r#   r1   r%   rL   r   r&   r   r   r   r   rJ   rM   rN   s   @r*   r  r  a  s   a 7;15=A>BEI$(,1/4&*T
||T
 llT
 !!2!23	T

 E--.T
  ((9(9:T
 !)):): ;T
 "%e.?.?(@"ABT
 D>T
 $D>T
 'tnT
 d^T
 
uU\\"$MM	NT
r)   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )
BrosPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y rR   )r0   r1   r   rc   r{   r   Tanh
activationrV   s     r*   r1   zBrosPooler.__init__  s9    YYv1163E3EF
'')r)   r   r9   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   r6  )r5   r   first_token_tensorpooled_outputs       r*   rJ   zBrosPooler.forward  s6     +1a40

#566r)   rK   rN   s   @r*   r3  r3    s#    $
U\\ ell r)   r3  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )BrosRelationExtractorc                 R   t         |           |j                  | _        |j                  | _        |j                  | _        |j                  | _        t        j                  | j                        | _	        t        j                  | j                  | j                  | j
                  z        | _        t        j                  | j                  | j                  | j
                  z        | _        t        j                  t        j                  d| j                              | _        y )Nr   )r0   r1   n_relationsr{   backbone_hidden_sizehead_hidden_sizeclassifier_dropout_probr   r   droprc   r   r   	Parameterr%   r   
dummy_noderV   s     r*   r1   zBrosRelationExtractor.__init__  s    !--$*$6$6! & 2 2'-'E'E$JJt;;<	YYt88$:J:JTMbMb:bc
99T668H8H4K`K`8`a,,u{{1d6O6O'PQr)   r   r   c           	         | j                  | j                  |            }| j                  j                  d      j	                  d|j                  d      d      }t        j                  ||gd      }| j                  | j                  |            }|j                  |j                  d      |j                  d      | j                  | j                        }|j                  |j                  d      |j                  d      | j                  | j                        }t        j                  |j                  dddd      |j                  dddd            }|S )Nr   r   axisr;   r
   )r   rA  rC  	unsqueezerepeatr?   r%   rA   r   r@   r=  r?  r   r   )r5   r   r   	dummy_vecrelation_scores        r*   rJ   zBrosRelationExtractor.forward  s   jj;!78OO--a0779>>!;LaP	IIy)41=	HHTYYy12	!&&Q!1!1!!4d6F6FH]H]
 NN9>>!#4innQ6GIYIY[_[p[pq	1a+Y->->q!Q-J
 r)   rK   rN   s   @r*   r;  r;    s$    R5<< ELL r)   r;  c                       e Zd ZdZeZdZd Zy)BrosPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    brosc                    t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                        rz|j                  j                  j                  d| j                  j                         |j                  2|j                  j                  |j                     j                          yyt        |t        j                        rJ|j                  j                  j                          |j                  j                  j                  d       yy)zInitialize the weightsr.   )meanstdNg      ?)r   r   rc   weightdatanormal_r6   initializer_rangera   zero_ry   rn   r   fill_)r5   modules     r*   _init_weightsz!BrosPreTrainedModel._init_weights  s   fbii( MM&&CT[[5R5R&S{{&  &&( '-MM&&CT[[5R5R&S!!-""6#5#56<<> .-KK""$MM$$S) .r)   N)r!   r"   r#   r$   r   config_classbase_model_prefixrX  r(   r)   r*   rL  rL    s    
 L*r)   rL  z^The bare Bros Model transformer outputting raw hidden-states without any specific head on top.c            #       (    e Zd Zd fd	Zd Zd Zd Z eej                  d             e
ee      	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     d	eej                     d
eej                     deej                     deej                     deej                     deej                     deej                     deeej"                        dee   dee   dee   dee   deeej                     ef   fd              Z xZS )	BrosModelc                     t         |   |       || _        t        |      | _        t        |      | _        t        |      | _        |rt        |      nd | _
        | j                          y rR   )r0   r1   r6   rl   r   r_   bbox_embeddingsr  encoderr3  poolerinit_weights)r5   r6   add_pooling_layerr7   s      r*   r1   zBrosModel.__init__  sX     ,V41&9"6*,=j(4r)   c                 .    | j                   j                  S rR   r   r}   )r5   s    r*   get_input_embeddingszBrosModel.get_input_embeddings  s    ...r)   c                 &    || j                   _        y rR   rd  )r5   r   s     r*   set_input_embeddingszBrosModel.set_input_embeddings  s    */'r)   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr_  r  r  r   )r5   heads_to_pruner  r   s       r*   _prune_headszBrosModel._prune_heads!  sE    
 +002 	CLE5LLu%//;;EB	Cr)   batch_size, sequence_lengthoutput_typerY  r   rW   r   rt   rs   r   r   r   r   r  r  r   r  r  r9   c                     ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j                   j                  r||n| j                   j
                  }nd}||t        d      ||j                         }n!||j                         dd }nt        d      |t        d      |\  }}||j                  n|j                  }|
|
d   d   j                  d   nd}|t        j                  ||	      }|pt        | j                  d
      r4| j                  j                  ddd|f   }|j                  ||      }|}n&t        j                   |t        j"                  |      }| j%                  |||      }| j                   j                  rE|C|j                         \  }}}||f}|	t        j                  ||	      }	| j'                  |	      }nd}| j)                  || j                   j*                        }| j                  |||||      }|j                  d   dk(  r|ddddg df   }|| j                   j,                  z  }| j/                  |      }| j1                  |||||||
||||      }|d   } | j2                  | j3                  |       nd}!|s
| |!f|dd z   S t5        | |!|j6                  |j8                  |j:                  |j<                        S )a  
        Returns:

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosModel

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosModel.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        >>> last_hidden_states = outputs.last_hidden_state
        ```NFzDYou cannot specify both input_ids and inputs_embeds at the same timer<   z5You have to specify either input_ids or inputs_embedszYou have to specify bboxr   r;   )rw   rt   ru   )r   rs   rt   r   r      )r   r   r;   r   r;   r
   r   r
   )
r]   r   r   r   r   r  r  r   r  r  r   )r"  pooler_outputr  r   r    r#  )r6   r   r  use_return_dictr   r  r   r?   rw   r   r%   onesr   r   rt   r   r   r   get_extended_attention_maskinvert_attention_maskget_head_maskr  
bbox_scaler^  r_  r`  r   r  r   r    r#  )"r5   r   rW   r   rt   rs   r   r   r   r   r  r  r   r  r  r   r   r   rw   r   r   r   extended_attention_maskencoder_batch_sizeencoder_sequence_lengthr  encoder_hidden_shapeencoder_extended_attention_maskembedding_outputscaled_bboxbbox_position_embeddingsencoder_outputssequence_outputr9  s"                                     r*   rJ   zBrosModel.forward)  sV   N 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B];;!!%.%:	@U@UII ]%>cdd"#..*K&',,.s3KTUU<788!,
J%.%:!!@T@T DSC^!3A!6!<!<Q!?de!"ZZFCN!t(89*.//*H*HKZK*X'3J3Q3QR\^h3i0!A!&[

SY!Z 150P0PQ_alnt0u ;;!!&;&G=R=W=W=Y: 7$68O#P %-).4HQW)X&.2.H.HI_.`+.2+ &&y$++2O2OP	??%)'#9 + 
 ::b>Q1667DT[[333#'#7#7#D ,,12"7#B+/!5# ' 
 *!,8<8OO4UY#]3oab6III;-'+;;)77&11,==
 	
r)   )T)NNNNNNNNNNNNNN)r!   r"   r#   r1   re  rg  rk  r   BROS_INPUTS_DOCSTRINGformatr   r   _CONFIG_FOR_DOCr   r%   rL   r   r&   r   r   r   rJ   rM   rN   s   @r*   r\  r\  
  s   

/0C ++@+G+GHe+fg+Wfuv -1'+1515/3,0048<9==A$(,0/3&*K
ELL)K
 u||$K
 !.	K

 !.K
 u||,K
 ELL)K
  -K
  (5K
 !) 6K
 "$u'8'8"9:K
 D>K
 $D>K
 'tnK
 d^K
  
uU\\"$PP	Q!K
 w hK
r)   r\  z
    Bros Model with a token classification head on top (a linear layer on top of the hidden-states output) e.g. for
    Named-Entity-Recognition (NER) tasks.
    c                       e Zd ZdgZ fdZ eej                  d             ee	e
      	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     d	eej                     d
eej                     deej                     deej                     deej                     dee   dee   dee   deeej                     e	f   fd              Z xZS )BrosForTokenClassificationr`  c                 `   t         |   |       |j                  | _        t        |      | _        t        |d      r|j                  n|j                  }t        j                  |      | _
        t        j                  |j                  |j                        | _        | j                          y Nclassifier_dropout)r0   r1   
num_labelsr\  rM  r   r  r   r   r   r   rc   r{   
classifierra  r5   r6   r  r7   s      r*   r1   z#BrosForTokenClassification.__init__  s      ++f%	)09M)NF%%TZTnTn 	 zz"45))F$6$68I8IJr)   rl  rm  r   rW   r   bbox_first_token_maskrt   rs   r   r   labelsr   r  r  r9   c                 B   ||n| j                   j                  }| j                  ||||||||
||
      }|d   }| j                  |      }| j	                  |      }d}|	t               }|J|j                  d      } ||j                  d| j                        |   |	j                  d      |         }n2 ||j                  d| j                        |	j                  d            }|s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )ax  

        Returns:

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```N)	rW   r   rt   rs   r   r   r   r  r  r   r<   r;   r   logitsr   r    )r6   rr  rM  r   r  r	   r@   r  r   r   r    )r5   r   rW   r   r  rt   rs   r   r   r  r   r  r  r   r  r  r   loss_fctr   s                      r*   rJ   z"BrosForTokenClassification.forward  sF   L &1%<k$++B]B]))))%'/!5#  
 "!*,,71')H$0(=(B(B2(F%KKDOO45JKV[[Y[_]rMs  B @&++b/RY,F)-)9TGf$EvE$!//))	
 	
r)   NNNNNNNNNNNNr!   r"   r#   "_keys_to_ignore_on_load_unexpectedr1   r   r  r  r   r   r  r   r%   rL   r   r   r   rJ   rM   rN   s   @r*   r  r    sp    +4& ++@+G+GHe+fg+@_ -1'+158<15/3,004)-,0/3&*L
ELL)L
 u||$L
 !.	L

  (5L
 !.L
 u||,L
 ELL)L
  -L
 &L
 $D>L
 'tnL
 d^L
 
uU\\"$99	:L
 ` hL
r)   r  a  
    Bros Model with a token classification head on top (initial_token_layers and subsequent_token_layer on top of the
    hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. The initial_token_classifier is used to
    predict the first token of each entity, and the subsequent_token_classifier is used to predict the subsequent
    tokens within an entity. Compared to BrosForTokenClassification, this model is more robust to serialization errors
    since it predicts next token from one token.
    c            !           e Zd ZdgZ fdZ eej                  d             ee	e
      	 	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     d	eej                     d
eej                     deej                     deej                     deej                     deej                     dee   dee   dee   deeej                     e	f   fd              Z xZS )!BrosSpadeEEForTokenClassificationr`  c           	      f   t         |   |       || _        |j                  | _        |j                  | _        |j
                  | _        t        |      | _        t        |d      r|j                  n|j                  }t        j                  t        j                  |      t        j                  |j
                  |j
                        t        j                  |      t        j                  |j
                  |j                              | _        t#        |      | _        | j'                          y r  )r0   r1   r6   r  r=  r{   r>  r\  rM  r   r  r   r   
Sequentialr   rc   initial_token_classifierr;  subsequent_token_classifierra  r  s      r*   r1   z*BrosSpadeEEForTokenClassification.__init__.  s      ++!--$*$6$6!f%	)09M)NF%%TZTnTn 	
 )+JJ)*IIf((&*<*<=JJ)*IIf((&*;*;<	)
% ,A+H(r)   rl  rm  r   rW   r   r  rt   rs   r   r   initial_token_labelssubsequent_token_labelsr   r  r  r9   c                    ||n| j                   j                  }| j                  ||||||||||
      }|d   }|j                  dd      j	                         }| j                  |      j                  dd      j	                         }| j                  ||      j                  d      }d|z
  }|j                  \  }}|j                  }t        j                  |t        j                  |dg      j                  |      gd      j                         }|j                  |dddddf   t        j                   |j"                        j$                        }t        j&                  ||dz         j                  |      j                         }|j                  |dddddf   t        j                   |j"                        j$                        }|j)                  d      j                         }d}|	|
t+               }|	j)                  d      }	|;|j)                  d      } ||j)                  d| j,                        |   |	|         }n# ||j)                  d| j,                        |	      }|
j)                  d      }
 ||j)                  d|dz         |   |
|         }||z   }|s||f|dd z   }||f|z   S |S t/        ||||j0                  |j2                        S )	a  
        Returns:

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeEEForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeEEForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```N
r   rW   r   rt   rs   r   r   r   r  r  r   r   rE  r<   r;   )r   r   r   r   r    )r6   rr  rM  rh   r   r  r  squeezer   rw   r%   rA   r   r   r   masked_fillfinforv   mineyer@   r	   r  r   r   r    )r5   r   rW   r   r  rt   rs   r   r   r  r  r   r  r  r   last_hidden_statesr   r   inv_attention_maskr   max_seq_lengthrw   invalid_token_maskself_token_masksubsequent_token_maskr   r  initial_token_losssubsequent_token_lossr   s                                 r*   rJ   z)BrosSpadeEEForTokenClassification.forwardG  s   L &1%<k$++B]B]))))%'/!5#  
 %QZ/99!Q?JJL#<<=OPZZ[\^_`kkm"&"B"BCUWi"j"r"rst"u /%7%=%="
N#**"YY(:EKKUV<X<[<[\b<c'dklmrrt"9"E"Eq$z*EKK8O8U8U,V,Z,Z#
  ))NNQ4FGJJ6RWWY"9"E"ED!QJ'5L5R5R)S)W)W#
 !/ 3 3B 7 < < >+0G0S')H $8#<#<R#@ $0(=(B(B2(F%%-(--b$//BCXY()>?&"
 &..B.G.GDOO.\^r%s"&=&B&B2&F#$,',,R!1CDEZ['(=>%!
 &(==D*,CDwqr{RF)-)9TGf$EvE!5$;!//))
 	
r)   )NNNNNNNNNNNNN)r!   r"   r#   r  r1   r   r  r  r   r   r  r   r%   rL   r   r   r   rJ   rM   rN   s   @r*   r  r  !  s    +4&2 ++@+G+GHe+fg?Y -1'+158<15/3,0047;:>,0/3&*g
ELL)g
 u||$g
 !.	g

  (5g
 !.g
 u||,g
 ELL)g
  -g
 'u||4g
 "*%,,!7g
 $D>g
 'tng
 d^g
 
uU\\"O3	4g
 Z hg
r)   r  z
    Bros Model with a token classification head on top (a entity_linker layer on top of the hidden-states output) e.g.
    for Entity-Linking. The entity_linker is used to predict intra-entity links (one entity to another entity).
    c                       e Zd ZdgZ fdZ eej                  d             ee	e
      	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     d	eej                     d
eej                     deej                     deej                     deej                     dee   dee   dee   deeej                     e	f   fd              Z xZS )!BrosSpadeELForTokenClassificationr`  c                 @   t         |   |       || _        |j                  | _        |j                  | _        |j
                  | _        t        |      | _        t        |d      r|j                  n|j                   t        |      | _        | j                          y r  )r0   r1   r6   r  r=  r{   r>  r\  rM  r   r  r   r;  entity_linkerra  rV   s     r*   r1   z*BrosSpadeELForTokenClassification.__init__  s      ++!--$*$6$6!f%	&-f6J&K	"	"QWQkQk26:r)   rl  rm  r   rW   r   r  rt   rs   r   r   r  r   r  r  r9   c                 T   ||n| j                   j                  }| j                  ||||||||
||
      }|d   }|j                  dd      j	                         }| j                  ||      j                  d      }d}|	qt               }|j                  \  }}|j                  }t        j                  ||dz         j                  |      j                         }|j                  d      }t        j                  | t        j                   |dgt        j                        j                  |      gd      }|j#                  |dddddf   t        j$                  |j&                        j(                        }|j#                  |dddddf   t        j$                  |j&                        j(                        } ||j                  d|dz         |   |	j                  d      |         }|s|f|dd z   }||f|z   S |S t+        |||j,                  |j.                  	      S )
a  
        Returns:

        Examples:

        ```python
        >>> import torch
        >>> from transformers import BrosProcessor, BrosSpadeELForTokenClassification

        >>> processor = BrosProcessor.from_pretrained("jinho8345/bros-base-uncased")

        >>> model = BrosSpadeELForTokenClassification.from_pretrained("jinho8345/bros-base-uncased")

        >>> encoding = processor("Hello, my dog is cute", add_special_tokens=False, return_tensors="pt")
        >>> bbox = torch.tensor([[[0, 0, 1, 1]]]).repeat(1, encoding["input_ids"].shape[-1], 1)
        >>> encoding["bbox"] = bbox

        >>> outputs = model(**encoding)
        ```Nr  r   r   r<   r   rE  r;   r  )r6   rr  rM  rh   r   r  r  r	   r   rw   r%   r  r   r   r@   rA   r   r  r  rv   r  r   r   r    )r5   r   rW   r   r  rt   rs   r   r   r  r   r  r  r   r  r  r   r  r   r  rw   r  maskr   s                           r*   rJ   z)BrosSpadeELForTokenClassification.forward  s%   H &1%<k$++B]B]))))%'/!5#  
 %QZ/99!Q?JJL##$68JKSSTUV')H)7)=)=&J#**F#ii8JKNNvV[[]O(--b1D$)II**KKQuzzBEEfM %! ''(=aqj(I5;;W]WcWcKdKhKhiF''a
(CU[[QWQ]Q]E^EbEbcFFKKNQ,>?Ev{{SUW[G\]DY,F)-)9TGf$EvE$!//))	
 	
r)   r  r  rN   s   @r*   r  r    sp    +4& ++@+G+GHe+fg+@_ -1'+158<15/3,004)-,0/3&*U
ELL)U
 u||$U
 !.	U

  (5U
 !.U
 u||,U
 ELL)U
  -U
 &U
 $D>U
 'tnU
 d^U
 
uU\\"$99	:U
 ` hU
r)   r  )=r$   r   dataclassesr   typingr   r   r   r   r%   torch.utils.checkpointr   torch.nnr	   activationsr   modeling_outputsr   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   r   r   r   configuration_brosr   
get_loggerr!   r&  _CHECKPOINT_FOR_DOCr  BROS_START_DOCSTRINGr  r   Moduler,   rP   r_   rl   r   r   r   r   r   r   r  r3  r;  rL  r\  r  r  r  r(   r)   r*   <module>r     s4     ! / /    % ! 
 . l l  + 
		H	%3 	 A H :k : :>		 *		 & ? ?DG		 GVRYY 3BII 3nryy  W		 WtZ
")) Z
| BII D*/ *4 dh
# h
	h
V  ^
!4 ^
^
B  	E
(; E
	E
P  h
(; h
h
r)   