
    sg                         d Z ddlZddlmZmZ ddlZddlmZ ddl	m
Z
 ddlmZ dZ ej                  e      Zd	ed
edej&                  fdZ G d d      Z G d d      Zy)z%REALM Retriever model implementation.    N)OptionalUnion)hf_hub_download   )AutoTokenizer)loggingzblock_records.npyblock_records_pathnum_block_recordsreturnc                     dd l mc m} |j                  j	                  | d      }|j                  |d      }t        |j                  d      j                               }|S )Nr   i    )buffer_sizeT)drop_remainder   )	tensorflow.compat.v1compatv1dataTFRecordDatasetbatchnexttakeas_numpy_iterator)r	   r
   tfblocks_dataset	np_records        g/var/www/html/venv/lib/python3.12/site-packages/transformers/models/deprecated/realm/retrieval_realm.pyconvert_tfrecord_to_npr   !   s]    %%WW,,-?M^,_N#))*;D)QN^((+==?@I    c                   &    e Zd ZdZ	 	 	 	 ddZd Zy)ScaNNSearcherztNote that ScaNNSearcher cannot currently be used within the model. In future versions, it might however be included.c                     ddl m}  |||d      }|j                  |||      }|j                  |      }|j	                         | _        y)zBuild scann searcher.r   )builderdot_product)dbnum_neighborsdistance_measure)
num_leavesnum_leaves_to_searchtraining_sample_size)dimensions_per_blockN)#scann.scann_ops.py.scann_ops_pybindr"   treescore_ahbuildsearcher)	selfr$   r%   r*   r'   r(   r)   Builderr"   s	            r   __init__zScaNNSearcher.__init__.   sU     	KR}}],,!8Lcw  
 ""8L"Mr   c                     | j                   j                  |j                         j                               \  }}|j	                  d      S )Nint64)r/   search_batcheddetachcpuastype)r0   question_projectionretrieved_block_ids_s       r   r5   zScaNNSearcher.search_batchedC   s@    !%!=!=>Q>X>X>Z>^>^>`!aQ"))'22r   N)   i  d   i )__name__
__module____qualname____doc__r2   r5    r   r   r    r    +   s    ~  #(*3r   r    c                   p     e Zd ZdZ fdZddZedeee	e
j                  f      fd       Zd Zd Z xZS )	RealmRetrieverah  The retriever of REALM outputting the retrieved evidence block and whether the block has answers as well as answer
    positions."

        Parameters:
            block_records (`np.ndarray`):
                A numpy array which cantains evidence texts.
            tokenizer ([`RealmTokenizer`]):
                The tokenizer to encode retrieved texts.
    c                 >    t         |           || _        || _        y N)superr2   block_records	tokenizer)r0   rH   rI   	__class__s      r   r2   zRealmRetriever.__init__S   s    *"r   c                    t        j                  | j                  |d      }| j                  j	                  |d   d      }g }g }	|D ]2  }
|j                  |       |	j                  |
j	                                4 | j                  ||	ddd|      }|j                  |      }|| j                  ||      |fz   S d d d |fS )Nr   )indicesaxisT)skip_special_tokens)padding
truncationreturn_special_tokens_mask
max_length)npr   rH   rI   decodeappendconvert_to_tensorsblock_has_answer)r0   r:   question_input_ids
answer_idsrR   return_tensorsretrieved_blocksquestiontext	text_pairretrieved_blockconcat_inputsconcat_inputs_tensorss                r   __call__zRealmRetriever.__call__X   s    774#5#5?RYZ[>>(();A)>TX(Y	/ 	7OKK!_3356	7 )TdW[hr ' 
 !. @ @ P!((
CG\F^^^$&;<<r   pretrained_model_name_or_pathc                     t         j                  j                  |      r%t         j                  j                  |t              }nt        d|t        d|}t        j                  |d      }t        j                  |g|i |} | ||      S )N)repo_idfilenameT)allow_picklerB   )
ospathisdirjoin_REALM_BLOCK_RECORDS_FILENAMEr   rS   loadr   from_pretrained)clsrc   init_inputskwargsr	   rH   rI   s          r   rn   zRealmRetriever.from_pretrainedm   s    77==67!#.KMj!k!0 "5@]"ag"  2F!112OhR]hagh	=),,r   c                     t        j                  t        j                  j	                  |t
              | j                         | j                  j                  |       y rF   )	rS   saverh   ri   rk   rl   rH   rI   save_pretrained)r0   save_directorys     r   rt   zRealmRetriever.save_pretrained{   s8    
^-JKTM_M_`&&~6r   c           	      b   g }g }g }d}|j                   D ]T  }|j                         }|j                  | j                  j                        }	|	dz   ||	dz   d j                  | j                  j                        z   }
|j                  g        |j                  g        |D ]n  }t        |	dz   |
      D ]Z  }|d   ||   k(  s|||t        |      z    |k(  s$|d   j                  |       |d   j                  |t        |      z   dz
         \ p t        |d         dk(  r|j                  d       #|j                  d       t        |d         |kD  sGt        |d         }W t        ||      D ]0  \  }}t        |      |k  sdg|t        |      z
  z  }||z  }||z  }2 |||fS )z&check if retrieved_blocks has answers.r   r   NFT)		input_idstolistindexrI   sep_token_idrU   rangelenzip)r0   r`   rY   has_answers	start_posend_posmax_answersinput_idinput_id_listfirst_sep_idxsecond_sep_idxansweridx
start_pos_end_pos_paddeds                   r   rW   zRealmRetriever.block_has_answer   s   	%// 	5H$OO-M)//0K0KLM*Q.}q?P?R1S1Y1YZ^ZhZhZuZu1vvNR NN2$ F !2NC FCayM#$66(sS[/@AVK%bM005#BK..sS[/@1/DE	FF 9R=!Q&""5)""4(y}%3"%im"4K)	5. %(	7$; 	# J:,s:!>?f$
F"		#
 Iw..r   )Npt)r>   r?   r@   rA   r2   rb   classmethodr   r   strrh   PathLikern   rt   rW   __classcell__)rJ   s   @r   rD   rD   H   sM    #
=* -HU3PRP[P[K[E\<] - -7#/r   rD   )rA   rh   typingr   r   numpyrS   huggingface_hubr    r   utilsr   rl   
get_loggerr>   loggerr   intndarrayr   r    rD   rB   r   r   <module>r      sl    , 	 "  +   !4  
		H	%s s rzz 3 3:\/ \/r   