
    sg                         d Z ddlZddlZddlmZmZ ddlmZ ddlm	Z	 ddl
mZ  e	j                  e      Z G d	 d
      Zy)zTokenization classes for RAG.    N)ListOptional   )BatchEncoding)logging   )	RagConfigc                       e Zd Zd Zd Zed        Zd Zd Zd Z	d Z
d Z	 	 	 	 	 	 dd
ee   deee      dee   dee   dedededefdZy	)RagTokenizerc                 B    || _         || _        | j                   | _        y N)question_encoder	generatorcurrent_tokenizer)selfr   r   s      [/var/www/html/venv/lib/python3.12/site-packages/transformers/models/rag/tokenization_rag.py__init__zRagTokenizer.__init__   s     0"!%!6!6    c                 z   t         j                  j                  |      rt        d| d      t        j                  |d       t         j                  j                  |d      }t         j                  j                  |d      }| j                  j                  |       | j                  j                  |       y )NzProvided path (z#) should be a directory, not a fileT)exist_okquestion_encoder_tokenizergenerator_tokenizer)	ospathisfile
ValueErrormakedirsjoinr   save_pretrainedr   )r   save_directoryquestion_encoder_pathgenerator_paths       r   r   zRagTokenizer.save_pretrained#   s    77>>.)~.>>abcc
NT2 "^=Y Zn6KL--.CD&&~6r   c                     ddl m} |j                  dd       }|t        j                  |      }|j	                  ||j
                  d      }|j	                  ||j                  d      } | ||      S )N   )AutoTokenizerconfigr   )r&   	subfolderr   )r   r   )auto.tokenization_autor%   popr	   from_pretrainedr   r   )clspretrained_model_name_or_pathkwargsr%   r&   r   r   s          r   r*   zRagTokenizer.from_pretrained,   s     	;Hd+>../LMF(88)&2I2IUq 9 
 "11)&2B2BNc 2 
	 $4	JJr   c                 &     | j                   |i |S r   )r   r   argsr-   s      r   __call__zRagTokenizer.__call__>   s    %t%%t6v66r   c                 :     | j                   j                  |i |S r   )r   batch_decoder/   s      r   r3   zRagTokenizer.batch_decodeA   s    *t~~**D;F;;r   c                 :     | j                   j                  |i |S r   )r   decoder/   s      r   r5   zRagTokenizer.decodeD   s    $t~~$$d5f55r   c                 &    | j                   | _        y r   )r   r   r   s    r   _switch_to_input_modez"RagTokenizer._switch_to_input_modeG   s    !%!6!6r   c                 &    | j                   | _        y r   )r   r   r7   s    r   _switch_to_target_modez#RagTokenizer._switch_to_target_modeJ   s    !%r   N	src_texts	tgt_texts
max_lengthmax_target_lengthpaddingreturn_tensors
truncationreturnc           
          t        j                  dt               || j                  j                  } | |fd||||d|}	||	S || j                  j                  } | d|d||||d|}
|
d   |	d<   |	S )Nu4  `prepare_seq2seq_batch` is deprecated and will be removed in version 5 of 🤗 Transformers. Use the regular `__call__` method to prepare your inputs and the tokenizer under the `with_target_tokenizer` context manager to prepare your targets. See the documentation of your specific tokenizer for more detailsT)add_special_tokensr@   r=   r?   rA   )text_targetrD   r@   r?   r=   rA   	input_idslabels )warningswarnFutureWarningr   model_max_length)r   r;   r<   r=   r>   r?   r@   rA   r-   model_inputsrG   s              r   prepare_seq2seq_batchz"RagTokenizer.prepare_seq2seq_batchM   s     	 	
 //@@J
#)!!
 
 $ $ 6 6 G G 
!#)(!
 
 "(!4Xr   )NNNlongestNT)__name__
__module____qualname__r   r   classmethodr*   r1   r3   r5   r8   r:   r   strr   intboolr   rN   rH   r   r   r   r      s    7
7 K K"7<670 *.$(+/ ",9, DI&, SM	,
 $C=, , , , 
,r   r   )__doc__r   rI   typingr   r   tokenization_utils_baser   utilsr   configuration_ragr	   
get_loggerrP   loggerr   rH   r   r   <module>r^      s<    $ 	  ! 4  ( 
		H	%\ \r   