
    sgF                       d Z ddlZddlZddlmZ ddlmZmZmZ ddl	Z	ddl	m
Z
 ddlmZ ddlmZ dd	lmZmZmZmZ dd
lmZ ddlmZmZmZ ddlmZmZmZmZ ddlm Z   ejB                  e"      Z#dZ$dZ%dZ&dZ'd Z( G d de
jR                        Z* G d de
jR                        Z+ G d de
jR                        Z,de+iZ- G d de
jR                        Z. G d de
jR                        Z/ G d d e
jR                        Z0 G d! d"e
jR                        Z1 G d# d$e
jR                        Z2 G d% d&e
jR                        Z3e G d' d(e             Z4e G d) d*e             Z5e G d+ d,e             Z6e G d- d.e             Z7 G d/ d0e
jR                        Z8 G d1 d2e
jR                        Z9 G d3 d4e
jR                        Z: G d5 d6e
jR                        Z; G d7 d8e
jR                        Z<d9Z=d:Z> G d; d<e      Z? G d= d>e?      Z@ ed?e=       G d@ dAe?             ZA edBe=       G dC dDe?             ZB edEe=       G dF dGe?             ZC edHe=       G dI dJe?             ZDdKZE edLe=       G dM dNe?             ZFy)OzPyTorch REALM model.    N)	dataclass)OptionalTupleUnion)nn)CrossEntropyLoss   )ACT2FN))BaseModelOutputWithPastAndCrossAttentions,BaseModelOutputWithPoolingAndCrossAttentionsMaskedLMOutputModelOutput)PreTrainedModel)apply_chunking_to_forward find_pruneable_heads_and_indicesprune_linear_layer)add_start_docstrings%add_start_docstrings_to_model_forwardloggingreplace_return_docstrings   )RealmConfigz(google/realm-cc-news-pretrained-embedderz'google/realm-cc-news-pretrained-encoderz&google/realm-cc-news-pretrained-scorerr   c           	      
   	 ddl }ddl}ddl}t        j                  j                  |      }t        j                  d|        |j                  j                  |      }g }g }	|D ]^  \  }
}t        j                  d|
 d|        |j                  j                  ||
      }|j                  |
       |	j                  |       ` t        ||	      D ]  \  }
}t        | t               r5d|
vr1t        j                  d|
 d	| j"                  j$                   d
       L|
j'                  d      s|
j'                  d      r4t        | t(              r$|
j+                  dd      }
|
j+                  dd      }
|
j'                  d      s|
j'                  d      r"t        | t,              r|
j+                  dd      }
|
j'                  d      r}t        | t               rdnd}|
j+                  d| d      }
|
j+                  d| d      }
|
j+                  d| d      }
|
j+                  d| d      }
|
j+                  d| d      }
|
j'                  d      rt        | t.              rdnd}|
j+                  d| d      }
|
j+                  d| d       }
|
j+                  d!| d"      }
|
j+                  d#| d$      }
|
j+                  d%| d      }
|
j+                  d&| d$      }
nO|
j'                  d'      r>t        | t.              rdnd}|
j+                  d(| d       }
|
j+                  d)| d"      }
|
j1                  d*      }
t3        d+ |
D              r)t        j                  dd*j5                  |
              | }|
D ]  }|j7                  d,|      r|j1                  d-|      }n|g}|d   d.k(  s|d   d/k(  rt9        |d0      }n-|d   d1k(  s|d   d2k(  rt9        |d3      }n	 t9        ||d         }t=        |      d4k\  st?        |d5         }||   } d6d d7k(  rt9        |d0      }n|d.k(  r|jA                  |      }	 |jB                  |jB                  k(  s"J d8|jB                   d9|jB                   d:       	 t        j                  d;|
        tI        jJ                  |      |_&         | S # t        $ r t        j                  d        w xY w# t:        $ r+ t        j                  dd*j5                  |
              Y w xY w# tD        $ r1}|xjF                  |jB                  |jB                  fz  c_#         d}~ww xY w)<z'Load tf checkpoints in a pytorch model.r   NzLoading a TensorFlow model in PyTorch, requires TensorFlow to be installed. Please see https://www.tensorflow.org/install/ for installation instructions.z&Converting TensorFlow checkpoint from zLoading TF weight z with shape readerz	Skipping z as it is not z's parameterbertclszbert/zreader/realm/zcls/zreader/cls/zrealm/ zreader/zreader/module/bert/zreader/module/cls/zreader/dense/zqa_outputs/dense_intermediate/zreader/dense_1/zqa_outputs/dense_output/zreader/layer_normalizationzqa_outputs/layer_normalizationzmodule/module/module/z	embedder/z!module/module/module/module/bert/zmodule/module/module/LayerNorm/zcls/LayerNorm/zmodule/module/module/dense/z
cls/dense/z,module/module/module/module/cls/predictions/zcls/predictions/zmodule/module/module/bert/z%module/module/module/cls/predictions/zmodule/module/zmodule/module/LayerNorm/zmodule/module/dense//c              3   $   K   | ]  }|d v  
 yw))adam_vadam_mAdamWeightDecayOptimizerAdamWeightDecayOptimizer_1global_stepN ).0ns     f/var/www/html/venv/lib/python3.12/site-packages/transformers/models/deprecated/realm/modeling_realm.py	<genexpr>z+load_tf_weights_in_realm.<locals>.<genexpr>p   s      
 nn
   z[A-Za-z]+_\d+z_(\d+)kernelgammaweightoutput_biasbetabias   r   i_embeddingszPointer shape z and array shape z mismatchedzInitialize PyTorch weight )'renumpy
tensorflowImportErrorloggererrorospathabspathinfotrainlist_variablesload_variableappendzip
isinstanceRealmReader	__class____name__
startswithRealmForOpenQAreplaceRealmKnowledgeAugEncoderRealmEmbeddersplitanyjoin	fullmatchgetattrAttributeErrorlenint	transposeshapeAssertionErrorargstorch
from_numpydata)modelconfigtf_checkpoint_pathr3   nptftf_path	init_varsnamesarraysnamerT   arrayreader_prefixembedder_prefixpointerm_namescope_namesnumes                       r(   load_tf_weights_in_realmrl   .   sN   
 ggoo01G
KK8	BC''0IEF  e(l5'BC&&w5Te	 5&) M/ee[)hd.BKK)D68P8P7QQ]^_ OOF#tu'=:eUcCd<<9D<<6D OOF#tu'=:eUmCn<<2D ??8$",UK"@BiM<< 5-7OPD<< 4t6LMD<<M?B`1abD<< 1m_D\3]^D<< <Om>noD ??23$.um$Db+O<< CGXX^E_`D<< AoEVVdCefD<< =/ARR\?]^D<< NSbRccsPtuD<< <@QQW>XYD<< GOK\\lImnD__-.$.um$Db+O<< :>O~<^_D<< 6?:K:8VWDzz#  

 
 KK)CHHTN#345 	'F||,f5 hhy&9%h1~)[^w-F!'84Q=0KNf4L!'62%g{1~>G ;1$+a.)!#,#	'$ #$<=(gx0GxLL'E	,Y.?}KXY,
 	078''.[M/\ LC  Q	
 	\ & KK)CHHTN+; <=  	FFw}}ekk22F	s5   S 0S%;T S"%0TT	U%,UUc                        e Zd ZdZ fdZ	 	 	 	 	 d
deej                     deej                     deej                     deej                     de	dej                  fd	Z xZS )RealmEmbeddingszGConstruct the embeddings from word, position and token_type embeddings.c                 >   t         |           t        j                  |j                  |j
                  |j                        | _        t        j                  |j                  |j
                        | _	        t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        t#        |dd      | _        | j'                  dt)        j*                  |j                        j-                  d      d       | j'                  d	t)        j.                  | j0                  j3                         t(        j4                  
      d       y )N)padding_idxepsposition_embedding_typeabsoluteposition_ids)r   F)
persistenttoken_type_idsdtype)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutrO   rs   register_bufferrW   arangeexpandzerosru   sizelongselfr[   rD   s     r(   r|   zRealmEmbeddings.__init__   s/   !||F,=,=v?Q?Q_e_r_rs#%<<0N0NPVPbPb#c %'\\&2H2H&J\J\%]" f&8&8f>S>STzz&"<"<='.v7PR\']$ELL)G)GHOOPWXej 	 	
 	ekk$*;*;*@*@*B%**Ubg 	 	
    	input_idsrx   ru   inputs_embedspast_key_values_lengthreturnc                 Z   ||j                         }n|j                         d d }|d   }|| j                  d d |||z   f   }|st        | d      r-| j                  d d d |f   }|j	                  |d   |      }	|	}n:t        j                  |t
        j                  | j                  j                        }|| j                  |      }| j                  |      }
||
z   }| j                  dk(  r| j                  |      }||z  }| j                  |      }| j                  |      }|S )Nrv   r   rx   r   rz   devicert   )r   ru   hasattrrx   r   rW   r   r   r   r   r   rs   r   r   r   )r   r   rx   ru   r   r   input_shape
seq_lengthbuffered_token_type_ids buffered_token_type_ids_expandedr   
embeddingsr   s                r(   forwardzRealmEmbeddings.forward   sH     #..*K',,.s3K ^
,,Q0FVlIl0l-lmL
 !t-.*.*=*=a*n*M'3J3Q3QR]^_R`bl3m0!A!&[

SWSdSdSkSk!l  00;M $ : :> J"%::
'':5"&":":<"H--J^^J/
\\*-
r   )NNNNr   )rE   
__module____qualname____doc__r|   r   rW   
LongTensorFloatTensorrR   Tensorr   __classcell__rD   s   @r(   rn   rn      s    Q
* 15593759&''E,,-' !!1!12' u//0	'
   1 12' !$' 
'r   rn   c                   P    e Zd Zd fd	Zdej
                  dej
                  fdZ	 	 	 	 	 	 ddej
                  deej                     deej                     deej                     d	eej                     d
ee	e	ej                           dee
   de	ej
                     fdZ xZS )RealmSelfAttentionc                    t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                        | _        |xs t#        |dd      | _        | j$                  dk(  s| j$                  d	k(  rF|j&                  | _        t        j(                  d
|j&                  z  dz
  | j                        | _        |j,                  | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()rs   rt   relative_keyrelative_key_queryr1   r   )r{   r|   r   num_attention_headsr   
ValueErrorrR   attention_head_sizeall_head_sizer   Linearquerykeyvaluer   attention_probs_dropout_probr   rO   rs   r   r}   distance_embedding
is_decoderr   r[   rs   rD   s      r(   r|   zRealmSelfAttention.__init__   s    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
zz&"E"EF'> (
'-zC
$ ''>9T=Y=Y]q=q+1+I+ID(&(ll1v7U7U3UXY3Y[_[s[s&tD# ++r   xr   c                     |j                         d d | j                  | j                  fz   }|j                  |      }|j	                  dddd      S )Nrv   r   r1   r      )r   r   r   viewpermute)r   r   new_x_shapes      r(   transpose_for_scoresz'RealmSelfAttention.transpose_for_scores   sL    ffhsmt'?'?AYAY&ZZFF;yyAq!$$r   hidden_statesattention_mask	head_maskencoder_hidden_statesencoder_attention_maskpast_key_valueoutput_attentionsc                 $   | j                  |      }|d u}	|	r||d   }
|d   }|}n |	rC| j                  | j                  |            }
| j                  | j                  |            }|}n|y| j                  | j                  |            }
| j                  | j                  |            }t	        j
                  |d   |
gd      }
t	        j
                  |d   |gd      }n@| j                  | j                  |            }
| j                  | j                  |            }| j                  |      }|d u}| j                  r|
|f}t	        j                  ||
j                  dd            }| j                  dk(  s| j                  dk(  r|j                  d   |
j                  d   }}|rDt	        j                  |dz
  t        j                  |j                  	      j                  dd      }n@t	        j                  |t        j                  |j                  	      j                  dd      }t	        j                  |t        j                  |j                  	      j                  dd      }||z
  }| j!                  || j"                  z   dz
        }|j%                  |j&                  
      }| j                  dk(  rt	        j(                  d||      }||z   }nE| j                  dk(  r6t	        j(                  d||      }t	        j(                  d|
|      }||z   |z   }|t+        j,                  | j.                        z  }|||z   }t0        j2                  j5                  |d      }| j7                  |      }|||z  }t	        j                  ||      }|j9                  dddd      j;                         }|j=                         d d | j>                  fz   }|j                  |      }|r||fn|f}| j                  r||fz   }|S )Nr   r   r1   dimrv   r   r   r   ry   zbhld,lrd->bhlrzbhrd,lrd->bhlrr   ) r   r   r   r   rW   catr   matmulrS   rs   rT   tensorr   r   r   r   r   r   torz   einsummathsqrtr   r   
functionalsoftmaxr   r   
contiguousr   r   )r   r   r   r   r   r   r   r   mixed_query_layeris_cross_attention	key_layervalue_layerquery_layer	use_cacheattention_scoresquery_length
key_lengthposition_ids_lposition_ids_rdistancepositional_embeddingrelative_position_scoresrelative_position_scores_queryrelative_position_scores_keyattention_probscontext_layernew_context_layer_shapeoutputss                               r(   r   zRealmSelfAttention.forward   s    !JJ}5
 3$>."<&q)I(+K3N11$((;P2QRI33DJJ?T4UVK3N'11$((=2IJI33DJJ}4MNK		>!#4i"@aHI))^A%6$D!LK11$((=2IJI33DJJ}4MNK//0AB"$.	?? (5N !<<Y5H5HR5PQ''>9T=Y=Y]q=q'2'8'8';Y__Q=O*L!&j1nEJJWdWkWk!l!q!q" "'l%**UbUiUi!j!o!oprtu!v"\\*EJJ}OcOcdiijkmopN%6H#'#:#:8dFbFb;bef;f#g #7#:#:ARAR#:#S ++~=+0<<8H+Wk+l(#36N#N --1EE16>NP[]q1r./4||<LiYm/n,#36T#TWs#s +dii8P8P.QQ%/.@ --//0@b/I ,,7  -	9O_kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=/2mM]?? 11Gr   NNNNNNF)rE   r   r   r|   rW   r   r   r   r   r   boolr   r   r   s   @r(   r   r      s    ,4%ell %u|| % 7;15=A>BDH,1c||c !!2!23c E--.	c
  ((9(9:c !)):): ;c !uU->->'?!@Ac $D>c 
u||	cr   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )RealmSelfOutputc                 (   t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                        | _
        y Nrq   )r{   r|   r   r   r   denser   r   r   r   r   r   s     r(   r|   zRealmSelfOutput.__init__`  s`    YYv1163E3EF
f&8&8f>S>STzz&"<"<=r   r   input_tensorr   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r   r   r   r   r   r   r   s      r(   r   zRealmSelfOutput.forwardf  7    

=1]3}|'CDr   rE   r   r   r|   rW   r   r   r   r   s   @r(   r   r   _  1    >U\\  RWR^R^ r   r   eagerc                       e Zd Zd fd	Zd Z	 	 	 	 	 	 ddej                  deej                     deej                     deej                     deej                     dee	e	ej                           d	ee
   d
e	ej                     fdZ xZS )RealmAttentionc                     t         |           t        |j                     ||      | _        t        |      | _        t               | _        y )Nrs   )	r{   r|   REALM_SELF_ATTENTION_CLASSES_attn_implementationr   r   outputsetpruned_headsr   s      r(   r|   zRealmAttention.__init__s  sC    01L1LM,C
	 &f-Er   c                 >   t        |      dk(  ry t        || j                  j                  | j                  j                  | j
                        \  }}t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _        t        | j                  j                  |      | j                  _	        t        | j                  j                  |d      | j                  _        | j                  j                  t        |      z
  | j                  _        | j                  j                  | j                  j                  z  | j                  _        | j
                  j                  |      | _        y )Nr   r   r   )rQ   r   r   r   r   r  r   r   r   r   r  r   r   union)r   headsindexs      r(   prune_headszRealmAttention.prune_heads{  s   u:?749900$))2O2OQUQbQb
u
 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r   r   r   r   r   r   r   r   r   c           	      p    | j                  |||||||      }| j                  |d   |      }	|	f|dd  z   }
|
S )Nr   r   )r   r  )r   r   r   r   r   r   r   r   self_outputsattention_outputr   s              r(   r   zRealmAttention.forward  sW     yy!"
  ;;|AF#%QR(88r   r   r   )rE   r   r   r|   r
  rW   r   r   r   r   r   r   r   r   s   @r(   r   r   r  s    ";* 7;15=A>BDH,1|| !!2!23 E--.	
  ((9(9: !)):): ; !uU->->'?!@A $D> 
u||	r   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )RealmIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y r   )r{   r|   r   r   r   intermediate_sizer   rB   
hidden_actstrr
   intermediate_act_fnr   s     r(   r|   zRealmIntermediate.__init__  s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r   r   r   c                 J    | j                  |      }| j                  |      }|S r   )r   r  r   r   s     r(   r   zRealmIntermediate.forward  s&    

=100?r   r   r   s   @r(   r  r    s#    9U\\ ell r   r  c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )RealmOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )r{   r|   r   r   r  r   r   r   r   r   r   r   r   s     r(   r|   zRealmOutput.__init__  s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r   r   r   r   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r   r   r   s      r(   r   zRealmOutput.forward  r   r   r   r   s   @r(   r  r    r   r   r  c                       e Zd Z fdZ	 	 	 	 	 	 ddej
                  deej                     deej                     deej                     deej                     deeeej                           dee	   d	eej
                     fd
Z
d Z xZS )
RealmLayerc                 f   t         |           |j                  | _        d| _        t	        |      | _        |j                  | _        |j                  | _        | j                  r,| j                  st        |  d      t	        |d      | _	        t        |      | _        t        |      | _        y )Nr   z> should be used as a decoder model if cross attention is addedrt   r   )r{   r|   chunk_size_feed_forwardseq_len_dimr   	attentionr   add_cross_attentionr   crossattentionr  intermediater  r  r   s     r(   r|   zRealmLayer.__init__  s    '-'E'E$'/ ++#)#=#= ##?? D6)g!hii"0Q["\D-f5!&)r   r   r   r   r   r   r   r   r   c           	         ||d d nd }| j                  |||||      }	|	d   }
| j                  r|	dd }|	d   }n|	dd  }d }| j                  rT|Rt        | d      st        d|  d      ||d	d  nd }| j	                  |
||||||      }|d   }
||dd z   }|d   }|z   }t        | j                  | j                  | j                  |
      }|f|z   }| j                  r|fz   }|S )
Nr1   )r   r   r   r   rv   r"  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )	r   r   r   r   r"  r   feed_forward_chunkr  r  )r   r   r   r   r   r   r   r   self_attn_past_key_valueself_attention_outputsr  r   present_key_valuecross_attn_present_key_valuecross_attn_past_key_valuecross_attention_outputslayer_outputs                    r(   r   zRealmLayer.forward  s}    :H9S>"1#5Y] !%/3 "0 "
 2!4 ??,Qr2G 6r :,QR0G'+$??4@4!12 =dV DD D  @N?Yrs(;_c%&*&9&9 %&)!'#  7q9 7" ==G ,C2+F( 14P P0##T%A%A4CSCSUe
  /G+ ??!2 44Gr   c                 L    | j                  |      }| j                  ||      }|S r   )r#  r  )r   r  intermediate_outputr,  s       r(   r%  zRealmLayer.feed_forward_chunk  s,    "//0@A{{#68HIr   r   )rE   r   r   r|   rW   r   r   r   r   r   r   r%  r   r   s   @r(   r  r    s    *" 7;15=A>BDH,1?||? !!2!23? E--.	?
  ((9(9:? !)):): ;? !uU->->'?!@A? $D>? 
u||	?Br   r  c                   D    e Zd Z fdZ	 	 	 	 	 	 	 	 	 ddej
                  deej                     deej                     deej                     deej                     deeeej                           dee	   d	ee	   d
ee	   dee	   de
eej
                     ef   fdZ xZS )RealmEncoderc                     t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        d| _	        y c c}w )NF)
r{   r|   r[   r   
ModuleListrangenum_hidden_layersr  layergradient_checkpointing)r   r[   _rD   s      r(   r|   zRealmEncoder.__init__  sN    ]]fF^F^@_#`1Jv$6#`a
&+# $as   A#r   r   r   r   r   past_key_valuesr   r   output_hidden_statesreturn_dictr   c                    |	rdnd }|rdnd }|r| j                   j                  rdnd }| j                  r%| j                  r|rt        j                  d       d}|rdnd }t        | j                        D ]  \  }}|	r||fz   }|||   nd }|||   nd }| j                  r/| j                  r#| j                  |j                  |||||||      }n ||||||||      }|d   }|r	||d   fz  }|s|||d   fz   }| j                   j                  s||d   fz   } |	r||fz   }|
st        d |||||fD              S t        |||||	      S )
Nr%   zZ`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...Fr   rv   r   r1   c              3   $   K   | ]  }|| 
 y wr   r%   )r&   vs     r(   r)   z'RealmEncoder.forward.<locals>.<genexpr>a  s      
 = 
r*   )last_hidden_stater8  r   
attentionscross_attentions)r[   r!  r6  trainingr7   warning_once	enumerater5  _gradient_checkpointing_func__call__tupler   )r   r   r   r   r   r   r8  r   r   r9  r:  all_hidden_statesall_self_attentionsall_cross_attentionsnext_decoder_cacheilayer_modulelayer_head_maskr   layer_outputss                       r(   r   zRealmEncoder.forward  s    #7BD$5b4%64;;;Z;Zr`d&&4==##p "	#,R$(4 #	VOA|#$58H$H!.7.CilO3B3N_Q/TXN**t}} $ A A ))!"#)*"%	! !-!"#)*"%! *!,M"}R'8&::" &9]1=M<O&O#;;22+?=QRCSBU+U(G#	VJ   1]4D D 
 "&%'(
 
 
 9+.+*1
 	
r   )	NNNNNNFFT)rE   r   r   r|   rW   r   r   r   r   r   r   r   r   r   r   s   @r(   r0  r0    s   , 7;15=A>BEI$(,1/4&*S
||S
 !!2!23S
 E--.	S

  ((9(9:S
 !)):): ;S
 "%e.?.?(@"ABS
 D>S
 $D>S
 'tnS
 d^S
 
uU\\"$MM	NS
r   r0  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )RealmPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r   )r{   r|   r   r   r   r   Tanh
activationr   s     r(   r|   zRealmPooler.__init__v  s9    YYv1163E3EF
'')r   r   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   rS  )r   r   first_token_tensorpooled_outputs       r(   r   zRealmPooler.forward{  s6     +1a40

#566r   r   r   s   @r(   rP  rP  u  s#    $
U\\ ell r   rP  c                       e Zd ZU dZdZej                  ed<   dZe	e
ej                        ed<   dZe	e
ej                        ed<   y)RealmEmbedderOutputa*  
    Outputs of [`RealmEmbedder`] models.

    Args:
        projected_score (`torch.FloatTensor` of shape `(batch_size, config.retriever_proj_size)`):

            Projected score.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nprojected_scorer   r?  )rE   r   r   r   rY  rW   r   __annotations__r   r   r   r?  r%   r   r(   rX  rX    sM    ( *.OU&&-8<M8E%"3"345<59Ju00129r   rX  c                   x    e Zd ZU dZdZej                  ed<   dZej                  ed<   dZ	ej                  ed<   y)RealmScorerOutputa'  
    Outputs of [`RealmScorer`] models.

    Args:
        relevance_score (`torch.FloatTensor` of shape `(batch_size, config.num_candidates)`):
            The relevance score of document candidates (before softmax).
        query_score (`torch.FloatTensor` of shape `(batch_size, config.retriever_proj_size)`):
            Query score derived from the query embedder.
        candidate_score (`torch.FloatTensor` of shape `(batch_size, config.num_candidates, config.retriever_proj_size)`):
            Candidate score derived from the embedder.
    Nrelevance_scorequery_scorecandidate_score)
rE   r   r   r   r]  rW   r   rZ  r^  r_  r%   r   r(   r\  r\    s9    
 *.OU&&-%)K""))-OU&&-r   r\  c                      e Zd ZU dZdZej                  ed<   dZej                  ed<   dZ	ej                  ed<   dZ
ej                  ed<   dZej                  ed<   dZej                  ed<   dZej                  ed	<   dZej"                  ed
<   dZej"                  ed<   dZeeej                        ed<   dZeeej                        ed<   y)RealmReaderOutputa+	  
    Outputs of [`RealmReader`] models.

    Args:
        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `start_positions`, `end_positions`, `has_answers` are provided):
            Total loss.
        retriever_loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `start_positions`, `end_positions`, `has_answers` are provided):
            Retriever loss.
        reader_loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `start_positions`, `end_positions`, `has_answers` are provided):
            Reader loss.
        retriever_correct (`torch.BoolTensor` of shape `(config.searcher_beam_size,)`, *optional*):
            Whether or not an evidence block contains answer.
        reader_correct (`torch.BoolTensor` of shape `(config.reader_beam_size, num_candidates)`, *optional*):
            Whether or not a span candidate contains answer.
        block_idx (`torch.LongTensor` of shape `()`):
            The index of the retrieved evidence block in which the predicted answer is most likely.
        candidate (`torch.LongTensor` of shape `()`):
            The index of the retrieved span candidates in which the predicted answer is most likely.
        start_pos (`torch.IntTensor` of shape `()`):
            Predicted answer starting position in *RealmReader*'s inputs.
        end_pos (`torch.IntTensor` of shape `()`):
            Predicted answer ending position in *RealmReader*'s inputs.
        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
            shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nlossretriever_lossreader_lossretriever_correctreader_correct	block_idx	candidate	start_posend_posr   r?  )rE   r   r   r   rb  rW   r   rZ  rc  rd  re  
BoolTensorrf  rg  r   rh  ri  int32rj  r   r   r   r?  r%   r   r(   ra  ra    s    !F #D%

"(,NE%%,%)K"")*.u''.'+NE$$+"&Iu&"&Iu&!Iu{{!GU[[8<M8E%"3"345<59Ju00129r   ra  c                   B    e Zd ZU dZdZeed<   dZej                  ed<   y)RealmForOpenQAOutputz

    Outputs of [`RealmForOpenQA`] models.

    Args:
        reader_output (`dict`):
            Reader output.
        predicted_answer_ids (`torch.LongTensor` of shape `(answer_sequence_length)`):
            Predicted answer ids.
    Nreader_outputpredicted_answer_ids)
rE   r   r   r   ro  dictrZ  rp  rW   r   r%   r   r(   rn  rn    s$    	 M4-1%**1r   rn  c                   $     e Zd Z fdZd Z xZS )RealmPredictionHeadTransformc                 h   t         |           t        j                  |j                  |j                        | _        t        |j                  t              rt        |j                     | _
        n|j                  | _
        t        j                  |j                  |j                        | _        y r   )r{   r|   r   r   r   r   rB   r  r  r
   transform_act_fnr   r   r   s     r(   r|   z%RealmPredictionHeadTransform.__init__  s{    YYv1163E3EF
f''-$*6+<+<$=D!$*$5$5D!f&8&8f>S>STr   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r   )r   ru  r   r  s     r(   r   z$RealmPredictionHeadTransform.forward  s4    

=1--m<}5r   rE   r   r   r|   r   r   r   s   @r(   rs  rs    s    Ur   rs  c                   *     e Zd Z fdZd Zd Z xZS )RealmLMPredictionHeadc                 H   t         |           t        |      | _        t	        j
                  |j                  |j                  d      | _        t	        j                  t        j                  |j                              | _        | j                  | j                  _        y )NF)r0   )r{   r|   rs  	transformr   r   r   r~   decoder	ParameterrW   r   r0   r   s     r(   r|   zRealmLMPredictionHead.__init__  sm    5f= yy!3!3V5F5FUSLLV->->!?@	 !IIr   c                 :    | j                   | j                  _         y r   )r0   r|  r   s    r(   _tie_weightsz"RealmLMPredictionHead._tie_weights  s     IIr   c                 J    | j                  |      }| j                  |      }|S r   )r{  r|  r  s     r(   r   zRealmLMPredictionHead.forward  s$    }5]3r   )rE   r   r   r|   r  r   r   r   s   @r(   ry  ry    s    &&r   ry  c                   $     e Zd Z fdZd Z xZS )RealmOnlyMLMHeadc                 B    t         |           t        |      | _        y r   )r{   r|   ry  predictionsr   s     r(   r|   zRealmOnlyMLMHead.__init__  s    08r   c                 (    | j                  |      }|S r   )r  )r   sequence_outputprediction_scoress      r(   r   zRealmOnlyMLMHead.forward"  s     ,,_=  r   rw  r   s   @r(   r  r    s    9!r   r  c                   $     e Zd Z fdZd Z xZS )RealmScorerProjectionc                     t         |           t        |      | _        t	        j
                  |j                  |j                        | _        t	        j                  |j                  |j                        | _	        y r   )r{   r|   ry  r  r   r   r   retriever_proj_sizer   r   r   r   s     r(   r|   zRealmScorerProjection.__init__(  sW    08YYv1163M3MN
f&@&@fF[F[\r   c                 J    | j                  |      }| j                  |      }|S r   )r   r   r  s     r(   r   zRealmScorerProjection.forward.  s$    

=1}5r   rw  r   s   @r(   r  r  '  s    ]r   r  c                   $     e Zd Z fdZd Z xZS )RealmReaderProjectionc                 p   t         |           || _        t        j                  |j
                  |j                  dz        | _        t        j                  |j                  d      | _        t        j                  |j                  |j                        | _        t        j                         | _        y )Nr1   r   rq   )r{   r|   r[   r   r   r   span_hidden_sizedense_intermediatedense_outputr   reader_layer_norm_epslayer_normalizationReLUrelur   s     r(   r|   zRealmReaderProjection.__init__5  s    "$))F,>,>@W@WZ[@["\IIf&=&=qA#%<<0G0GVMiMi#j GGI	r   c                      fd}t         j                  fd} j                  |      }|j                  dd      \  }} ||      \  }}}	t        j                  |d|      }
t        j                  |d|      }|
|z   } j                  |      } j                  |      } j                  |      j                  d      }| ||	|j                        z  }|||fS )	Nc                 b     j                   \  } fdt        fdt        	j                  j                        D         \  }}t        j                  |d      }t        j                  |d      }t        j                   d|      }t        j                   d|      }||z  }|||fS )aK  
            Generate span candidates.

            Args:
                masks: <bool> [num_retrievals, max_sequence_len]

            Returns:
                starts: <int32> [num_spans] ends: <int32> [num_spans] span_masks: <int32> [num_retrievals, num_spans]
                whether spans locate in evidence block.
            c                     t        j                  | z
  dz   j                        }t        j                  | dz
  j                        }||fS )Nr   r   )rW   r   r   )widthcurrent_startscurrent_endsmasksmax_sequence_lens      r(   _spans_given_widthzRRealmReaderProjection.forward.<locals>.span_candidates.<locals>._spans_given_widthK  sN    !&.>.F.JSXS_S_!`$||EAI7GPUP\P\]%|33r   c              3   4   K   | ]  } |d z           yw)r   Nr%   )r&   wr  s     r(   r)   zIRealmReaderProjection.forward.<locals>.span_candidates.<locals>.<genexpr>P  s      fq!3AE!: fs   r   rv   r   r	  )rT   rA   r3  r[   max_span_widthrW   r   index_select)
r  r7  startsendsstart_masks	end_masks
span_masksr  r  r   s
   `      @@r(   span_candidatesz6RealmReaderProjection.forward.<locals>.span_candidates>  s     #(++A4
  fE$++JdJdDe fgLFD YYvq)F99T1%D  ,,U&IK**5bEI$y0J4++r   c                 j    d| j                  |      z
  t        j                  |      j                  z  S N      ?typerW   finfominmaskrz   s     r(   mask_to_scorez4RealmReaderProjection.forward.<locals>.mask_to_score]  s*    $))E**ekk%.@.D.DDDr   r1   rv   r   r   r  ry   )
rW   float32r  chunkr  r  r  r  squeezerz   )r   r   
block_maskr  r  start_projectionend_projectioncandidate_startscandidate_endscandidate_maskcandidate_start_projectionscandidate_end_projectionscandidate_hiddenreader_logitss   `             r(   r   zRealmReaderProjection.forward=  s    	,> ',mm 	E //>+8+>+>qb+>+I(.;J:;V8..&+&8&89IqXh&i#$)$6$6~1Tb$c!69RR  99%56334DE))*:;CCBG~]=P=PQQ.>>r   rw  r   s   @r(   r  r  4  s    7?r   r  aH  
    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`RealmConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
a5
  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        position_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:

            - 1 indicates the head is **not masked**,
            - 0 indicates the head is **masked**.

        inputs_embeds (`torch.FloatTensor` of shape `({0}, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        output_attentions (`bool`, *optional*):
            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
            tensors for more detail.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
c                   (    e Zd ZdZeZeZdZd Z	d Z
y)RealmPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    realmc                    t        |t        j                        rm|j                  j                  j                  d| j                  j                         |j                  %|j                  j                  j                          yyt        |t        j                        rz|j                  j                  j                  d| j                  j                         |j                  2|j                  j                  |j                     j                          yyt        |t        j                        rJ|j                  j                  j                          |j                  j                  j                  d       yy)zInitialize the weightsg        )meanstdNr  )rB   r   r   r-   rY   normal_r[   initializer_ranger0   zero_r}   rp   r   fill_)r   modules     r(   _init_weightsz"RealmPreTrainedModel._init_weights  s   fbii( MM&&CT[[5R5R&S{{&  &&( '-MM&&CT[[5R5R&S!!-""6#5#56<<> .-KK""$MM$$S) .r   c                     g }|D ]W  }||j                  d       |j                  }t        |      dkD  r|j                  d|d   f      }|j                  |       Y |S )z.Flatten inputs' shape to (-1, input_shape[-1])Nr1   rv   )r@   rT   rQ   r   )r   inputsflattened_inputsr   r   s        r(   _flatten_inputsz$RealmPreTrainedModel._flatten_inputs  sm     	0F~ ''-$ll{#a'#[["k"o)>?F ''/	0  r   N)rE   r   r   r   r   config_classrl   load_tf_weightsbase_model_prefixr  r  r%   r   r(   r  r    s#    
 L.O*  r   r  c                   X     e Zd ZdZd fd	Zd Zd Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ xZ	S )	RealmBertModelz?
    Same as the original BertModel but remove docstrings.
    c                     t         |   |       || _        t        |      | _        t        |      | _        |rt        |      nd | _        | j                          y r   )
r{   r|   r[   rn   r   r0  encoderrP  pooler	post_init)r   r[   add_pooling_layerrD   s      r(   r|   zRealmBertModel.__init__  sK     )&1#F+->k&)D 	r   c                 .    | j                   j                  S r   r   r   r  s    r(   get_input_embeddingsz#RealmBertModel.get_input_embeddings  s    ...r   c                 &    || j                   _        y r   r  r   r   s     r(   set_input_embeddingsz#RealmBertModel.set_input_embeddings  s    */'r   c                     |j                         D ]7  \  }}| j                  j                  |   j                  j	                  |       9 y)z
        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
        class PreTrainedModel
        N)itemsr  r5  r   r
  )r   heads_to_pruner5  r  s       r(   _prune_headszRealmBertModel._prune_heads  sE    
 +002 	CLE5LLu%//;;EB	Cr   c                    ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }| j                   j                  r|
|
n| j                   j
                  }
nd}
||t        d      |#| j                  ||       |j                         }n!||j                         d d }nt        d      |\  }}||j                  n|j                  }|	|	d   d   j                  d   nd}|t        j                  |||z   f|      }|pt        | j                  d      r4| j                  j                  d d d |f   }|j!                  ||      }|}n&t        j"                  |t        j$                  |	      }| j'                  ||      }| j                   j                  rE|C|j                         \  }}}||f}|t        j                  ||      }| j)                  |      }nd }| j+                  || j                   j,                        }| j                  |||||
      }| j/                  ||||||	|
|||
      }|d   }| j0                  | j1                  |      nd }|s
||f|dd  z   S t3        |||j4                  |j6                  |j8                  |j:                        S )NFzDYou cannot specify both input_ids and inputs_embeds at the same timerv   z5You have to specify either input_ids or inputs_embedsr   r1   r  rx   r   )r   ru   rx   r   r   )	r   r   r   r   r8  r   r   r9  r:  r   )r>  pooler_outputr8  r   r?  r@  )r[   r   r9  use_return_dictr   r   r   %warn_if_padding_and_no_attention_maskr   r   rT   rW   onesr   r   rx   r   r   r   get_extended_attention_maskinvert_attention_maskget_head_maskr4  r  r  r   r8  r   r?  r@  )r   r   r   rx   ru   r   r   r   r   r8  r   r   r9  r:  r   
batch_sizer   r   r   r   r   extended_attention_maskencoder_batch_sizeencoder_sequence_lengthr7  encoder_hidden_shapeencoder_extended_attention_maskembedding_outputencoder_outputsr  rV  s                                  r(   r   zRealmBertModel.forward  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B];;!!%.%:	@U@UII ]%>cdd"66y.Q#..*K&',,.s3KTUU!,
J%.%:!!@T@T DSC^!3A!6!<!<Q!?de!"ZZ*jCY6Y)ZdjkN!t(89*.//*H*HKZK*X'3J3Q3QR\^h3i0!A!&[

SY!Z 150P0PQ_al0m ;;!!&;&G=R=W=W=Y: 7$68O#P %-).4HQW)X&.2.H.HI_.`+.2+ &&y$++2O2OP	??%)'#9 + 
 ,,2"7#B+/!5# ' 
 *!,8<8OO4UY#]3oab6III;-'+;;)77&11,==
 	
r   )TNNNNNNNNNNNNN)
rE   r   r   r   r|   r  r  r  r   r   r   s   @r(   r  r    sL    /0C "#!l
r   r  z`The embedder of REALM outputting projected score that will be used to calculate relevance score.c                   z    e Zd ZdgZ fdZd Zd Z eej                  d             e
ee      	 	 	 	 	 	 	 	 	 ddeej                     deej                      d	eej                     d
eej                     deej                      deej                      dee   dee   dee   deeef   fd              Z xZS )rJ   zcls.predictions.decoder.biasc                     t         |   |       t        | j                        | _        t        | j                        | _        | j                          y r   )r{   r|   r  r[   r  r  r   r  r   s     r(   r|   zRealmEmbedder.__init__r  s:     #DKK0
(5r   c                 B    | j                   j                  j                  S r   r  r   r   r  s    r(   r  z"RealmEmbedder.get_input_embeddingsy      zz$$444r   c                 :    || j                   j                  _        y r   r  r  s     r(   r  z"RealmEmbedder.set_input_embeddings|      05

-r   batch_size, sequence_lengthoutput_typer  r   r   rx   ru   r   r   r   r9  r:  r   c
                     |	|	n| j                   j                  }	| j                  |||||||||		      }
|
d   }| j                  |      }|	s	|f|
dd z   S t	        ||
j
                  |
j                        S )a  
        Returns:

        Example:

        ```python
        >>> from transformers import AutoTokenizer, RealmEmbedder
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("google/realm-cc-news-pretrained-embedder")
        >>> model = RealmEmbedder.from_pretrained("google/realm-cc-news-pretrained-embedder")

        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> outputs = model(**inputs)

        >>> projected_score = outputs.projected_score
        ```
        r   rx   ru   r   r   r   r9  r:  r   r1   r	   )rY  r   r?  )r[   r  r  r   rX  r   r?  )r   r   r   rx   ru   r   r   r   r9  r:  realm_outputsr  rY  s                r(   r   zRealmEmbedder.forward  s    B &1%<k$++B]B]

))%'/!5# # 

 &a(((=1#%a(:::& /+99(33 r   )	NNNNNNNNN)rE   r   r   _tied_weights_keysr|   r  r  r   REALM_INPUTS_DOCSTRINGformatr   rX  _CONFIG_FOR_DOCr   rW   r   r   r   r   r   r   r   r   s   @r(   rJ   rJ   k  s-   
 9956 ++A+H+HIf+gh+>_] 156:59371559,0/3&*9E,,-9 !!2!239 !!1!12	9
 u//09 E--.9   1 129 $D>9 'tn9 d^9 
u))	*9 ^ i9r   rJ   zoThe scorer of REALM outputting relevance scores representing the score of document candidates (before softmax).c            !           e Zd ZdZd fd	Z eej                  d             ee	e
      	 	 	 	 	 	 	 	 	 	 	 	 	 ddeej                     deej                     deej                     deej                     d	eej                     d
eej                     deej                     deej                     deej                     deej                     dee   dee   dee   deee	f   fd              Z xZS )RealmScorerz
    Args:
        query_embedder ([`RealmEmbedder`]):
            Embedder for input sequences. If not specified, it will use the same embedder as candidate sequences.
    c                     t         |   |       t        | j                        | _        ||n| j                  | _        | j                          y r   )r{   r|   rJ   r[   embedderquery_embedderr  )r   r[   r  rD   s      r(   r|   zRealmScorer.__init__  s@     %dkk20>0JnPTP]P]r   r   r  r   r   rx   ru   candidate_input_idscandidate_attention_maskcandidate_token_type_idscandidate_inputs_embedsr   r   r   r9  r:  r   c                    ||n| j                   j                  }||
t        d      ||t        d      | j                  |||||	|
|||	      }| j	                  |||      \  }}}| j                  |||||	||||	      }|d   }|d   }|j                  d| j                   j                  | j                   j                        }t        j                  d||      }|s|||fS t        |||      S )a
  
        candidate_input_ids (`torch.LongTensor` of shape `(batch_size, num_candidates, sequence_length)`):
            Indices of candidate input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        candidate_attention_mask (`torch.FloatTensor` of shape `(batch_size, num_candidates, sequence_length)`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        candidate_token_type_ids (`torch.LongTensor` of shape `(batch_size, num_candidates, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.

            [What are token type IDs?](../glossary#token-type-ids)
        candidate_inputs_embeds (`torch.FloatTensor` of shape `(batch_size * num_candidates, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `candidate_input_ids` you can choose to directly pass an embedded
            representation. This is useful if you want more control over how to convert *candidate_input_ids* indices
            into associated vectors than the model's internal embedding lookup matrix.

        Returns:

        Example:

        ```python
        >>> import torch
        >>> from transformers import AutoTokenizer, RealmScorer

        >>> tokenizer = AutoTokenizer.from_pretrained("google/realm-cc-news-pretrained-scorer")
        >>> model = RealmScorer.from_pretrained("google/realm-cc-news-pretrained-scorer", num_candidates=2)

        >>> # batch_size = 2, num_candidates = 2
        >>> input_texts = ["How are you?", "What is the item in the picture?"]
        >>> candidates_texts = [["Hello world!", "Nice to meet you!"], ["A cute cat.", "An adorable dog."]]

        >>> inputs = tokenizer(input_texts, return_tensors="pt")
        >>> candidates_inputs = tokenizer.batch_encode_candidates(candidates_texts, max_length=10, return_tensors="pt")

        >>> outputs = model(
        ...     **inputs,
        ...     candidate_input_ids=candidates_inputs.input_ids,
        ...     candidate_attention_mask=candidates_inputs.attention_mask,
        ...     candidate_token_type_ids=candidates_inputs.token_type_ids,
        ... )
        >>> relevance_score = outputs.relevance_score
        ```z5You have to specify either input_ids or input_embeds.zJYou have to specify either candidate_input_ids or candidate_inputs_embeds.r  r   rv   z
bd,bnd->bn)r]  r^  r_  )r[   r  r   r  r  r  r   num_candidatesr  rW   r   r\  )r   r   r   rx   ru   r  r  r  r  r   r   r   r9  r:  query_outputsflattened_input_idsflattened_attention_maskflattened_token_type_idscandidate_outputsr^  r_  r]  s                         r(   r   zRealmScorer.forward  sH   R &1%<k$++B]B]!6TUU&+B+Jijj++))%'/!5# , 

 UYThTh!9;SU
Q	68P !MM33%1/!5# * 

 $A&+A.)..r4;;3M3Mt{{OnOno,,|[/R"K@@ +Ve
 	
r   r   r  )rE   r   r   r   r|   r   r  r  r   r\  r	  r   rW   r   r   r   r   r   r   r   r   s   @r(   r  r    s   
 ++A+H+HIf+gh+<?[ 156:5937:>@D?C?C1559,0/3&*z
E,,-z
 !!2!23z
 !!1!12	z

 u//0z
 &e&6&67z
 #+5+<+<"=z
 #+5+;+;"<z
 "*%*;*;!<z
 E--.z
   1 12z
 $D>z
 'tnz
 d^z
 
u''	(z
 \ iz
r   r  zrThe knowledge-augmented encoder of REALM outputting masked language model logits and marginal log-likelihood loss.c                       e Zd ZdgZ fdZd Zd Zd Zd Z e	e
j                  d             eee      	 	 	 	 	 	 	 	 	 	 	 	 dd	eej"                     d
eej$                     deej"                     deej"                     deej$                     deej$                     deej$                     deej"                     deej"                     dee   dee   dee   deeef   fd              Z xZS )rI   zcls.predictions.decoderc                     t         |   |       t        | j                        | _        t        | j                        | _        | j                          y r   )r{   r|   r  r[   r  r  r   r  r   s     r(   r|   z!RealmKnowledgeAugEncoder.__init__X  s:     #DKK0
#DKK0r   c                 B    | j                   j                  j                  S r   r  r  s    r(   r  z-RealmKnowledgeAugEncoder.get_input_embeddings^  r  r   c                 :    || j                   j                  _        y r   r  r  s     r(   r  z-RealmKnowledgeAugEncoder.set_input_embeddingsa  r  r   c                 B    | j                   j                  j                  S r   )r   r  r|  r  s    r(   get_output_embeddingsz.RealmKnowledgeAugEncoder.get_output_embeddingsd  s    xx##+++r   c                     || j                   j                  _        |j                  | j                   j                  _        y r   )r   r  r|  r0   )r   new_embeddingss     r(   set_output_embeddingsz.RealmKnowledgeAugEncoder.set_output_embeddingsg  s,    '5$$2$7$7!r   z+batch_size, num_candidates, sequence_lengthr  r   r   rx   ru   r   r   r]  labelsmlm_maskr   r9  r:  r   c                 0   ||n| j                   j                  }||t        d      | j                  |||      \  }}}| j	                  |||||||
||	      }|d   }| j                  |      }|}d}|h|j                         \  }}|	&t        j                  |t        j                        }	n|	j                  t        j                        }	t        d      }|j                  d| j                   j                        }|j                  d	| j                   j                        j                  d      } |||      j                  || j                   j                  |       }|j!                  d      j#                  d      }||z   }|j%                  d	      }t        j&                  t        j(                  ||	z        t        j(                  |	      z         }|s|f|d
d z   }||f|z   S |S t+        |||j,                  |j.                        S )a  
        relevance_score (`torch.FloatTensor` of shape `(batch_size, num_candidates)`, *optional*):
            Relevance score derived from RealmScorer, must be specified if you want to compute the masked language
            modeling loss.

        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        mlm_mask (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Mask to avoid calculating joint loss on certain positions. If not specified, the loss will not be masked.
            Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

        Returns:

        Example:

        ```python
        >>> import torch
        >>> from transformers import AutoTokenizer, RealmKnowledgeAugEncoder

        >>> tokenizer = AutoTokenizer.from_pretrained("google/realm-cc-news-pretrained-encoder")
        >>> model = RealmKnowledgeAugEncoder.from_pretrained(
        ...     "google/realm-cc-news-pretrained-encoder", num_candidates=2
        ... )

        >>> # batch_size = 2, num_candidates = 2
        >>> text = [["Hello world!", "Nice to meet you!"], ["The cute cat.", "The adorable dog."]]

        >>> inputs = tokenizer.batch_encode_candidates(text, max_length=10, return_tensors="pt")
        >>> outputs = model(**inputs)
        >>> logits = outputs.logits
        ```NzZYou have to specify `relevance_score` when `labels` is specified in order to compute loss.r  r   ry   none)	reductionrv   r   r1   r	   )rb  logitsr   r?  )r[   r  r   r  r  r   r   rW   	ones_liker  r  r   r   r~   tiler  log_softmax	unsqueeze	logsumexpnansumsumr   r   r?  )r   r   r   rx   ru   r   r   r]  r#  r$  r   r9  r:  r  r  r  joint_outputsjoint_outputr  r_  masked_lm_lossr  r   loss_fct
mlm_logitsmlm_targetsmasked_lm_log_probcandidate_log_probjoint_gold_log_probmarginal_gold_log_probsr  s                                  r(   r   z RealmKnowledgeAugEncoder.forwardk  s2   p &1%<k$++B]B]/"9l  UYThTh~~U
Q	68P 

33%'/!5# # 

 %Q' HH\2)%+[[]"J
 ??6G#==7 (&9H +//DKK4J4JKJ ++a)C)CDII"MK"*:{"C"H"HDKK66
# " "1!<!<R!@!J!J2!N"47I"I&9&C&CA&F##ll5995Lx5W+X[`[d[dem[n+nooN')M!A,>>F3A3M^%.YSYY$'55$//	
 	
r   )NNNNNNNNNNNN)rE   r   r   r  r|   r  r  r  r"  r   r  r  r   r   r	  r   rW   r   r   r   r   r   r   r   r   s   @r(   rI   rI   P  s    4456,8 +%%&ST >X 156:593715597;-1/3,0/3&*x
E,,-x
 !!2!23x
 !!1!12	x

 u//0x
 E--.x
   1 12x
 "%"3"34x
 ))*x
 5++,x
 $D>x
 'tnx
 d^x
 
un$	%x
 Yx
r   rI   zThe reader of REALM.c            #           e Zd Z fdZ eej                  d             eee	      	 	 	 	 	 	 	 	 	 	 	 	 	 	 dde
ej                     de
ej                     de
ej                     de
ej                     de
ej                     d	e
ej                     d
e
ej                     de
ej                     de
ej                     de
ej                     de
ej                     de
e   de
e   de
e   deeef   fd              Z xZS )rC   c                     t         |   |       |j                  | _        t        |      | _        t        |      | _        t        |      | _        | j                          y r   )
r{   r|   
num_labelsr  r  r  r   r  
qa_outputsr  r   s     r(   r|   zRealmReader.__init__  sK      ++#F+
#F+/7r   z!reader_beam_size, sequence_lengthr  r   r   rx   ru   r   r   r]  r  start_positionsend_positionshas_answersr   r9  r:  r   c                    ||n| j                   j                  }|t        d      |t        d      |j                  d      | j                   j                  k  rt        d      | j                  |||||||||	      }|d   }| j                  ||d| j                   j                         \  }}}t        j                  |d| j                   j                   d      }||z  }t        j                  t        j                  |d	      j                        }t        j                  t        j                  |d	      j                        }t        j                  |d|
      }t        j                  |d|
      }d}d}d}d}d}|	.|
+|(d }d }|j                  d      } |	j                  d|       }	|
j                  d|       }
|}t        j                  |      }! ||||	d| j                   j                   |
d| j                   j                         }t        j                  |      }" |||      } ||j!                  d      |j!                  d            }||!j#                  t        j$                        z  }||"j#                  t        j$                        z  }||z   j'                         }|s||||f|dd z   }#|
|||||f|#z   S |#S t)        ||||||||||j*                  |j,                        S )ar  
        relevance_score (`torch.FloatTensor` of shape `(searcher_beam_size,)`, *optional*):
            Relevance score, which must be specified if you want to compute the logits and marginal log loss.
        block_mask (`torch.BoolTensor` of shape `(searcher_beam_size, sequence_length)`, *optional*):
            The mask of the evidence block, which must be specified if you want to compute the logits and marginal log
            loss.
        start_positions (`torch.LongTensor` of shape `(searcher_beam_size,)`, *optional*):
            Labels for position (index) of the start of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        end_positions (`torch.LongTensor` of shape `(searcher_beam_size,)`, *optional*):
            Labels for position (index) of the end of the labelled span for computing the token classification loss.
            Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence
            are not taken into account for computing the loss.
        has_answers (`torch.BoolTensor` of shape `(searcher_beam_size,)`, *optional*):
            Whether or not the evidence block has answer(s).

        Returns:
        NzCYou have to specify `relevance_score` to calculate logits and loss.zOYou have to specify `block_mask` to separate question block and evidence block.r   zQThe input sequence length must be greater than or equal to config.max_span_width.r  r   rv   r   r  c                    t        j                  t        j                  t        j                  | d      d      t        j                  |d            }t        j                  t        j                  t        j                  |d      d      t        j                  |d            }t        j                  t        j                  ||      d      S )zCompute correct span.r   rv   r   )rW   eqr,  rL   logical_and)r  r  gold_starts	gold_endsis_gold_startis_gold_ends         r(   compute_correct_candidatesz7RealmReader.forward.<locals>.compute_correct_candidatesK  s     !&OOEOO4Da$H!Leoo^ikmNn! $hhOOEOONA$FJEOO\egiLj
 yy!2!2=+!NPQRRr   c                     t         j                  fd}t        j                  |  ||| j                        z   d      }t        j                  | d      }||z
  S )z3Loss based on the negative marginal log-likelihood.c                 j    d| j                  |      z
  t        j                  |      j                  z  S r  r  r  s     r(   r  zERealmReader.forward.<locals>.marginal_log_loss.<locals>.mask_to_score[  s*    $))E"22ekk%6H6L6LLLr   ry   rv   r   )rW   r  r-  rz   )r(  
is_correctr  log_numeratorlog_denominators        r(   marginal_log_lossz.RealmReader.forward.<locals>.marginal_log_lossX  sR     /4mm M !&zY_YeYe9f0fln o"'//&b"A&66r   )r  r  rE  rF  r1   )rb  rc  rd  re  rf  rg  rh  ri  rj  r   r?  )r[   r  r   r   r  r  r=  reader_beam_sizerW   r,  argmaxmaxvaluesr  clamprL   r   r  r  r  ra  r   r?  )$r   r   r   rx   ru   r   r   r]  r  r>  r?  r@  r   r9  r:  r   r  r  r  r  retriever_logitspredicted_block_indexpredicted_candidatepredicted_startpredicted_end
total_lossrc  rd  re  rf  rI  rO  ignored_indexany_retriever_correctany_reader_correctr  s$                                       r(   r   zRealmReader.forward  s7   L &1%<k$++B]B]"bccnooq!DKK$>$>>pqq**))%'/!5#  

 "!* ;?//ZDKK,H,HI;
7' !???1t{{?[?[+\^`a)) %UYY}!-L-S-S T#ll599]+J+Q+QR,,-=1L_`**>qH[\
 &=+DI`S	7 ,003M-33BFO)//MBM +$)II.?$@!7!1-+A0L0LM'DKK,H,HI	N "'>!:.@QRN+M,>,>r,BNDWDWXZD[\K388GGN-225==AAK(;6<<>J+-@/S`adklmlndooF ) nk;Ln]`ff  !)#/)+)%!!//))
 	
r   )NNNNNNNNNNNNNN)rE   r   r   r|   r   r  r  r   ra  r	  r   rW   r   r   rk  r   r   r   r   r   r   s   @r(   rC   rC     s    ++A+H+HIl+mn+<?[ 156:593715597;156:4826,0/3&*W
E,,-W
 !!2!23W
 !!1!12	W

 u//0W
 E--.W
   1 12W
 "%"3"34W
 U--.W
 "%"2"23W
   0 01W
 e../W
 $D>W
 'tnW
 d^W
  
u''	(!W
 \ oW
r   rC   ay  
    Args:
        input_ids (`torch.LongTensor` of shape `({0})`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        attention_mask (`torch.FloatTensor` of shape `({0})`, *optional*):
            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

            - 1 for tokens that are **not masked**,
            - 0 for tokens that are **masked**.

            [What are attention masks?](../glossary#attention-mask)
        token_type_ids (`torch.LongTensor` of shape `({0})`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,
            1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token (should not be used in this model by design).

            [What are token type IDs?](../glossary#token-type-ids)
        answer_ids (`list` of shape `(num_answers, answer_length)`, *optional*):
            Answer ids for computing the marginal log-likelihood loss. Indices should be in `[-1, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-1` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
z?`RealmForOpenQA` for end-to-end open domain question answering.c                   &    e Zd Zd fd	Zed        Zd Z eej                  d             e
ee      	 	 	 	 ddeej                     deej                      deej                     d	eej                     d
ee   deeef   fd              Z xZS )rG   c           
      n   t         |   |       t        |      | _        t	        |      | _        | j                  dt        j                  d      j                  |j                  |j                  ft        j                  t        j                  d                   || _        | j                          y )N	block_embr%   cpu)r   rz   r   )r{   r|   rJ   r  rC   r   r   rW   r   	new_emptynum_block_recordsr  r  r   	retrieverr  )r   r[   rd  rD   s      r(   r|   zRealmForOpenQA.__init__  s     %f-!&)KKO%%..0J0JKmm||E* & 	
 #r   c                 r    | j                   r| j                  j                  S | j                  j                  S r   )rA  r[   searcher_beam_sizerP  r  s    r(   rf  z!RealmForOpenQA.searcher_beam_size  s)    ==;;111{{+++r   c                 D    | j                   j                  |      | _         y)zSend `self.block_emb` to a specific device.

        Args:
            device (`str` or `torch.device`):
                The device to which `self.block_emb` will be sent.
        N)r`  r   )r   r   s     r(   block_embedding_toz!RealmForOpenQA.block_embedding_to  s     **62r   z1, sequence_lengthr  r   r   rx   
answer_idsr:  r   c                 @   ||n| j                   j                  }||j                  d   dk7  rt        d      | j	                  |||d      }|d   }t        j                  d| j                  |j                  | j                  j                              }t        j                  || j                  d      \  }	}
|
j                         }
t        j                  | j                  d|
	      }| j                  |
j                         ||| j                   j                   
      \  }}}}|j                  | j"                  j                        }|j$                  j'                  t
        j(                        j                  | j"                  j                        }|j+                         j-                  |j.                  j'                  t
        j(                               |t        j0                  |t
        j(                  | j"                  j                        }t        j0                  |t
        j2                  | j"                  j                        }t        j0                  |t
        j2                  | j"                  j                        }t        j                  d|j                         |j                  | j"                  j                              }| j#                  |j4                  d| j                   j6                   |j8                  d| j                   j6                   |j.                  d| j                   j6                   |||||d	      }|j4                  |j:                     }||j<                  |j>                  dz    }|s||fS tA        ||      S )a  
        Returns:

        Example:

        ```python
        >>> import torch
        >>> from transformers import RealmForOpenQA, RealmRetriever, AutoTokenizer

        >>> retriever = RealmRetriever.from_pretrained("google/realm-orqa-nq-openqa")
        >>> tokenizer = AutoTokenizer.from_pretrained("google/realm-orqa-nq-openqa")
        >>> model = RealmForOpenQA.from_pretrained("google/realm-orqa-nq-openqa", retriever=retriever)

        >>> question = "Who is the pioneer in modern computer science?"
        >>> question_ids = tokenizer([question], return_tensors="pt")
        >>> answer_ids = tokenizer(
        ...     ["alan mathison turing"],
        ...     add_special_tokens=False,
        ...     return_token_type_ids=False,
        ...     return_attention_mask=False,
        ... ).input_ids

        >>> reader_output, predicted_answer_ids = model(**question_ids, answer_ids=answer_ids, return_dict=False)
        >>> predicted_answer = tokenizer.decode(predicted_answer_ids)
        >>> loss = reader_output.loss
        ```r   r   z'The batch_size of the inputs must be 1.T)r   rx   r   r:  z	BD,QD->QBrv   )kr   r  )
max_lengthr  r   zD,BD->B)	r   r   rx   r]  r  r@  r>  r?  r:  )ro  rp  )!r[   r  rT   r   r  rW   r   r`  r   r   topkrf  r  r  rd  ra  reader_seq_lenr   special_tokens_maskr  r   logical_not_logical_and_rx   r   r   r   rP  r   rg  ri  rj  rn  )r   r   r   rx   ri  r:  question_outputsquestion_projectionbatch_scoresr7  retrieved_block_idsretrieved_block_embr@  ri  rj  concat_inputsr  retrieved_logitsro  predicted_blockrp  s                        r(   r   zRealmForOpenQA.forward  s   J &1%<k$++B]B] Y__Q%71%<FGG==~ko ) 
 /q1 ||KATAWAWX\XfXfXmXmAno!&LD<S<SY[!\199;#00QNab :>##%y*IcIc :H :
6Y &((););<"66;;EJJGJJRVR]R]RdRdJe
!..}/K/K/P/PQVQ[Q[/\]",,{%**T[[M_M_`KYejjI[I[\Ill7%**T[[EWEWXG !<<*2246I6L6LT[[M_M_6`
 #--a$++2N2NO(77DKK<X<XY(77DKK<X<XY,!#%! $ 

 (11-2I2IJ.}/F/FI^I^abIbc "666#'!5
 	
r   r   )NNNN)rE   r   r   r|   propertyrf  rh  r   REALM_FOR_OPEN_QA_DOCSTRINGr  r   rn  r	  r   rW   r   r   r   r   r   r   r   r   s   @r(   rG   rG     s    
  , ,
3 ++F+M+MNb+cd+?o^ 7;5915&*a
E,,-a
 !!2!23a
 !!1!12	a

 U--.a
 d^a
 
u**	+a
 _ ea
r   rG   )Gr   r   r9   dataclassesr   typingr   r   r   rW   r   torch.nnr   activationsr
   modeling_outputsr   r   r   r   modeling_utilsr   pytorch_utilsr   r   r   utilsr   r   r   r   configuration_realmr   
get_loggerrE   r7   _EMBEDDER_CHECKPOINT_FOR_DOC_ENCODER_CHECKPOINT_FOR_DOC_SCORER_CHECKPOINT_FOR_DOCr	  rl   Modulern   r   r   r  r   r  r  r  r0  rP  rX  r\  ra  rn  rs  ry  r  r  r  REALM_START_DOCSTRINGr  r  r  rJ   r  rI   rC   r{  rG   r%   r   r(   <module>r     s     	 ! ) )   % "  / m m u u , 
		H	%I G E hV=bii =@C CLbii    
0RYY 0f		 ")) S SlZ
299 Z
z"))  :+ : :4 . . .$ .: .: .:b 2; 2 2 299 "BII .!ryy !
BII 
@?BII @?F	 / d% ? % PL
) L
^ fK( K	K\ uL
& L
	L
^ 
R
3 R

R
j ,.CDd
& d
 Ed
N B ED
) D
	D
r   