Ë
    ©sg\
  ã                   ó<   — d Z ddlmZ ddlmZmZmZ  G d„ d«      Zy)z Tokenization utils for RoFormer.é    )ÚList)ÚNormalizedStringÚPreTokenizedStringÚnormalizersc                   ó:   — e Zd Zd	d„Zdededee   fd„Zdefd„Z	y)
ÚJiebaPreTokenizerÚreturnNc                 ó–   — || _         t        j                  dddd¬«      | _        	 dd l}|| _        y # t        $ r t	        d«      ‚w xY w)NFT)Ú
clean_textÚhandle_chinese_charsÚstrip_accentsÚ	lowercaser   zkYou need to install rjieba to use RoFormerTokenizer. See https://pypi.org/project/rjieba/ for installation.)Úvocabr   ÚBertNormalizerÚrjiebaÚImportErrorÚjieba)Úselfr   r   s      úb/var/www/html/venv/lib/python3.12/site-packages/transformers/models/roformer/tokenization_utils.pyÚ__init__zJiebaPreTokenizer.__init__   s]   € ØˆŒ
Ü&×5Ñ5ØØ!%ØØô	
ˆÔð	Ûð ˆ
øô ò 	ÜðIóð ð	ús	   §3 ³AÚiÚnormalized_stringc                 ó\  — g }| j                   j                  t        |«      d¬«      D ]€  \  }}}|| j                  v r|j	                  ||| «       Œ*| j
                  j                  |«      j                  «       }|D ])  }|sŒ|t        |«      z   }|j	                  ||| «       |}Œ+ Œ‚ |S )NF)Úhmm)	r   ÚtokenizeÚstrr   Úappendr   Únormalize_strÚsplitÚlen)r   r   r   ÚsplitsÚtokenÚstartÚendÚ
token_lists           r   Újieba_splitzJiebaPreTokenizer.jieba_split(   s¹   € Øˆð "&§¡×!4Ñ!4´SÐ9JÓ5KÐQVÐ!4Ó!Wò 		$ÑˆE5˜#Ø˜Ÿ
™
Ñ"Ø—‘Ð/°°cÐ:Õ;à!×-Ñ-×;Ñ;¸EÓB×HÑHÓJ
Ø'ò $EÚØ#¤c¨%£jÑ0˜ØŸ™Ð&7¸¸cÐ&BÔCØ #™ñ	$ð		$ð* ˆó    Úpretokc                 ó:   — |j                  | j                  «       y )N)r   r&   )r   r(   s     r   Úpre_tokenizezJiebaPreTokenizer.pre_tokenizeC   s   € Ø‰T×%Ñ%Õ&r'   )r	   N)
Ú__name__Ú
__module__Ú__qualname__r   Úintr   r   r&   r   r*   © r'   r   r   r      s5   „ óð"˜Sð Ð5Eð È$ÐO_ÑJ`ó ð6'Ð#5ô 'r'   r   N)Ú__doc__Útypingr   Ú
tokenizersr   r   r   r   r/   r'   r   ú<module>r3      s   ðñ 'å ç HÑ H÷.'ò .'r'   