
    sg                         d Z ddlZddlZddlmZmZmZmZ ddlZ	ddl
mZ ddlmZmZmZmZmZ ddlmZmZmZmZmZ  e       rddlZ ej2                  e      Zdd	iZd
 Zd Z G d de      Zy)z!Tokenization class for Pop2Piano.    N)ListOptionalTupleUnion   )BatchFeature)
AddedTokenBatchEncodingPaddingStrategyPreTrainedTokenizerTruncationStrategy)
TensorTypeis_pretty_midi_availableloggingrequires_backendsto_numpyvocabz
vocab.jsonc                 ,    || z  }|t        ||      }|S N)minnumbercutoff_time_idxcurrent_idxs      g/var/www/html/venv/lib/python3.12/site-packages/transformers/models/pop2piano/tokenization_pop2piano.pytoken_time_to_noter   '   s$    6K"+7    c                 |    ||    1||    }||k  r%|}|j                  ||| |g       |dk(  rd n|}||| <   |S ||| <   |S )Nr   )append)	r   current_velocitydefault_velocitynote_onsets_readyr   notes	onset_idx
offset_idxonsets_readys	            r   token_note_to_noter'   /   sl     ,%f-	{"$JLL)Z9IJK#3q#84kL(4f% L %0&!Lr   c                       e Zd ZdZddgZeZ	 	 	 	 	 	 d( fd	Zed        Z	d Z
dedefd	Zd)defd
Zdej                   dededefdZ	 	 	 d*dej                   dej                   dededef
dZd+dej                   dedefdZd,dej                   dej                   defdZd+dedee   dee   fdZ	 	 d-deej                   eej:                     f   dee   dee   defdZ 	 	 d-deej                   eej:                     f   dee   dee   defdZ!	 	 	 	 	 	 	 d.deej                   eej:                     eeej:                        f   dee"ee#f   dee"eef   dee   d ee   d!ee"   d"eeee$f      d#e"defd$Z%	 d/d%e&d&e"fd'Z' xZ(S )0Pop2PianoTokenizera  
    Constructs a Pop2Piano tokenizer. This tokenizer does not require training.

    This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
    this superclass for more information regarding those methods.

    Args:
        vocab (`str`):
            Path to the vocab file which contains the vocabulary.
        default_velocity (`int`, *optional*, defaults to 77):
            Determines the default velocity to be used while creating midi Notes.
        num_bars (`int`, *optional*, defaults to 2):
            Determines cutoff_time_idx in for each token.
        unk_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"-1"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        eos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to 1):
            The end of sequence token.
        pad_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to 0):
             A special token used to make arrays of tokens the same size for batching purpose. Will then be ignored by
            attention mechanisms or loss computation.
        bos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to 2):
            The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
    	token_idsattention_maskc                 2   t        |t              rt        |dd      n|}t        |t              rt        |dd      n|}t        |t              rt        |dd      n|}t        |t              rt        |dd      n|}|| _        || _        t        |d      5 }	t        j                  |	      | _        d d d        | j                  j                         D 
ci c]  \  }
}||

 c}}
| _
        t        | 0  d||||d| y # 1 sw Y   QxY wc c}}
w )NF)lstriprstriprb)	unk_token	eos_token	pad_token	bos_token )
isinstancestrr	   r!   num_barsopenjsonloadencoderitemsdecodersuper__init__)selfr   r!   r7   r0   r1   r2   r3   kwargsfilekv	__class__s               r   r?   zPop2PianoTokenizer.__init__[   s    JTT]_bIcJyuEir	IST]_bIcJyuEir	IST]_bIcJyuEir	IST]_bIcJyuEir	 0  % 	+$99T?DL	+ *.););)=>A1> 	
		

 	
	+ 	+ ?s   DDDc                 ,    t        | j                        S )z-Returns the vocabulary size of the tokenizer.)lenr;   r@   s    r   
vocab_sizezPop2PianoTokenizer.vocab_size}   s     4<<  r   c                 B    t        | j                  fi | j                  S )z(Returns the vocabulary of the tokenizer.)dictr;   added_tokens_encoderrH   s    r   	get_vocabzPop2PianoTokenizer.get_vocab   s    DLL>D$=$=>>r   token_idreturnc                     | j                   j                  || j                   d      }|j                  d      }dj	                  |dd       t        |d         }}||gS )a?  
        Decodes the token ids generated by the transformer into notes.

        Args:
            token_id (`int`):
                This denotes the ids generated by the transformers to be converted to Midi tokens.

        Returns:
            `List`: A list consists of token_type (`str`) and value (`int`).
        _TOKEN_TIME_   Nr   )r=   getr0   splitjoinint)r@   rN   token_type_value
token_typevalues        r   _convert_id_to_tokenz'Pop2PianoTokenizer._convert_id_to_token   si      <<++H8H6TU+11#6HH%5ab%9:C@PQR@S<TE
E""r   c                 j    | j                   j                  | d| t        | j                              S )a  
        Encodes the Midi tokens to transformer generated token ids.

        Args:
            token (`int`):
                This denotes the token value.
            token_type (`str`):
                This denotes the type of the token. There are four types of midi tokens such as "TOKEN_TIME",
                "TOKEN_VELOCITY", "TOKEN_NOTE" and "TOKEN_SPECIAL".

        Returns:
            `int`: returns the id of the token.
        rR   )r;   rT   rW   r0   )r@   tokenrY   s      r   _convert_token_to_idz'Pop2PianoTokenizer._convert_token_to_id   s/     ||5':, 7T^^9LMMr   tokensbeat_offset_idxbars_per_batchr   c                     d}t        t        |            D ]X  }||   }|||z  dz  z   }||z   }	| j                  |||	      }
t        |
      dk(  r;||
}@t        j                  ||
fd      }Z |g S |S )a  
        Converts relative tokens to notes which are then used to generate pretty midi object.

        Args:
            tokens (`numpy.ndarray`):
                Tokens to be converted to notes.
            beat_offset_idx (`int`):
                Denotes beat offset index for each note in generated Midi.
            bars_per_batch (`int`):
                A parameter to control the Midi output generation.
            cutoff_time_idx (`int`):
                Denotes the cutoff time index for each note in generated Midi.
        N   )	start_idxr   r   )axis)rangerG   relative_tokens_ids_to_notesnpconcatenate)r@   r_   r`   ra   r   r#   index_tokens
_start_idx_cutoff_time_idx_notess              r   "relative_batch_tokens_ids_to_notesz5Pop2PianoTokenizer.relative_batch_tokens_ids_to_notes   s    * 3v;' 	@EUmG(5>+AA+EEJ.;66$ 0 7 F 6{avQ?	@" =Ir   beatstepc                 j    |dn|}| j                  ||||      }| j                  ||||         }|S )al  
        Converts tokens to Midi. This method calls `relative_batch_tokens_ids_to_notes` method to convert batch tokens
        to notes then uses `notes_to_midi` method to convert them to Midi.

        Args:
            tokens (`numpy.ndarray`):
                Denotes tokens which alongside beatstep will be converted to Midi.
            beatstep (`np.ndarray`):
                We get beatstep from feature extractor which is also used to get Midi.
            beat_offset_idx (`int`, *optional*, defaults to 0):
                Denotes beat offset index for each note in generated Midi.
            bars_per_batch (`int`, *optional*, defaults to 2):
                A parameter to control the Midi output generation.
            cutoff_time_idx (`int`, *optional*, defaults to 12):
                Denotes the cutoff time index for each note in generated Midi.
        r   )r_   r`   ra   r   )
offset_sec)ro   notes_to_midi)r@   r_   rp   r`   ra   r   r#   midis           r   !relative_batch_tokens_ids_to_midiz4Pop2PianoTokenizer.relative_batch_tokens_ids_to_midi   sT    0  /6!O77+)+	 8 
 !!%h>W!Xr   rd   c           
      &   |D cg c]  }| j                  |       }}|}d}t        t        | j                  j	                         D cg c]  }|j                  d       c}      dz         D 	cg c]  }	d }
}	g }|D ]Y  \  }}|dk(  r|dk(  s nI|dk(  rt        |||      }'|dk(  r|}/|d	k(  rt        ||| j                  |
||
      }Pt        d       t        |
      D ]J  \  }}|	||dz   }nt        ||dz         }t        ||      }|j                  |||| j                  g       L t        |      dk(  rg S t        j                  |      }|dddf   dz  |dddf   z   }||j!                            }|S c c}w c c}w c c}	w )a  
        Converts relative tokens to notes which will then be used to create Pretty Midi objects.

        Args:
            tokens (`numpy.ndarray`):
                Relative Tokens which will be converted to notes.
            start_idx (`float`):
                A parameter which denotes the starting index.
            cutoff_time_idx (`float`, *optional*):
                A parameter used while converting tokens to notes.
        r   NOTErS   NTOKEN_SPECIAL
TOKEN_TIMEr   TOKEN_VELOCITY
TOKEN_NOTE)r   r    r!   r"   r   r#   zToken type not understood!   )r[   rf   sumr;   keysendswithr   r'   r!   
ValueError	enumeratemaxr   rG   rh   arrayargsort)r@   r_   rd   r   r]   wordsr   r    rC   ir"   r#   rY   r   pitch
note_onsetcutoffr%   
note_orders                      r   rg   z/Pop2PianoTokenizer.relative_tokens_ids_to_notes   s    @FFe**51FF+0RVR^R^RcRcRe5fQajj6H5f1gjk1k+lmaTmm"' 	?J_,Q;|+0!?P[ //#) |+*!%5%)%:%:&7 + !!=>>+	?. "++<!= 		UE:%"*'!^F *q.AF f5
j*eT=R=RST		U u:?IHHUOEq!ts*U1a4[8J*,,./EL] G 6gms   F	F	1	Fr#   rr   c                 f   t        | dg       t        j                  dd      }t        j                  d      }g }|D ]=  \  }}}	}
t        j                  |
|	||   |z
  ||   |z
        }|j                  |       ? ||_        |j                  j                  |       |j                          |S )a  
        Converts notes to Midi.

        Args:
            notes (`numpy.ndarray`):
                This is used to create Pretty Midi objects.
            beatstep (`numpy.ndarray`):
                This is the extrapolated beatstep that we get from feature extractor.
            offset_sec (`int`, *optional*, defaults to 0.0):
                This represents the offset seconds which is used while creating each Pretty Midi Note.
        pretty_midii  g      ^@)
resolutioninitial_tempor   )program)velocityr   startend)	r   r   
PrettyMIDI
InstrumentNoter   r#   instrumentsremove_invalid_notes)r@   r#   rp   rr   new_pmnew_inst	new_notesr$   r%   r   r   new_notes               r   rs   z Pop2PianoTokenizer.notes_to_midi4  s     	$0''3eL))!4	6; 	'2Iz5("''!y)J6Z(:5	H X&	' #!!(+##%r   save_directoryfilename_prefixc                 ~   t         j                  j                  |      st        j	                  d| d       yt         j                  j                  ||r|dz   ndt        d   z         }t        |d      5 }|j                  t        j                  | j                               ddd       |fS # 1 sw Y   |fS xY w)a}  
        Saves the tokenizer's vocabulary dictionary to the provided save_directory.

        Args:
            save_directory (`str`):
                A path to the directory where to saved. It will be created if it doesn't exist.
            filename_prefix (`Optional[str]`, *optional*):
                A prefix to add to the names of the files saved by the tokenizer.
        zVocabulary path (z) should be a directoryN- r   w)ospathisdirloggererrorrV   VOCAB_FILES_NAMESr8   writer9   dumpsr;   )r@   r   r   out_vocab_filerB   s        r   save_vocabularyz"Pop2PianoTokenizer.save_vocabularyT  s     ww}}^,LL,^,<<STU o_s22QbcjQkk
 .#& 	1$JJtzz$,,/0	1   	1   s   7/B11B<truncation_strategy
max_lengthc           	      R   t        | dg       t        |d   t        j                        r_t	        j
                  |D cg c]0  }|j                  |j                  |j                  |j                  g2 c}      j                  dd      }t	        j                  |      j                  t        j                        }|ddddf   j                         }t        |dz         D cg c]  }g  }}|D ]3  \  }	}
}}||	   j!                  ||g       ||
   j!                  |dg       5 g }d}t#        |      D ]  \  }}t%        |      dk(  r|j!                  | j'                  |d             |D ]\  \  }}t)        |dkD        }||k7  r#|}|j!                  | j'                  |d	             |j!                  | j'                  |d
             ^  t%        |      }|t*        j,                  k7  r$|r"||kD  r | j.                  d|||z
  |d|\  }}}t1        d|i      S c c}w c c}w )a  
        This is the `encode_plus` method for `Pop2PianoTokenizer`. It converts the midi notes to the transformer
        generated token ids. It only works on a single batch, to process multiple batches please use
        `batch_encode_plus` or `__call__` method.

        Args:
            notes (`numpy.ndarray` of shape `[sequence_length, 4]` or `list` of `pretty_midi.Note` objects):
                This represents the midi notes. If `notes` is a `numpy.ndarray`:
                    - Each sequence must have 4 values, they are `onset idx`, `offset idx`, `pitch` and `velocity`.
                If `notes` is a `list` containing `pretty_midi.Note` objects:
                    - Each sequence must have 4 attributes, they are `start`, `end`, `pitch` and `velocity`.
            truncation_strategy ([`~tokenization_utils_base.TruncationStrategy`], *optional*):
                Indicates the truncation strategy that is going to be used during truncation.
            max_length (`int`, *optional*):
                Maximum length of the returned list and optionally padding length (see above).

        Returns:
            `BatchEncoding` containing the tokens ids.
        r   r   rc   N   rS   ry   rz   r{   )idsnum_tokens_to_remover   r*   r4   )r   r5   r   r   rh   r   r   r   r   r   reshaperoundastypeint32r   rf   r   r   rG   r^   rW   r   DO_NOT_TRUNCATEtruncate_sequencesr
   )r@   r#   r   r   rA   	each_notemax_time_idxr   timesonsetoffsetr   r   r_   r    time	total_lenrR   s                     r   encode_pluszPop2PianoTokenizer.encode_plusk  s2   6 	$0 eAh 0 01HHhmn[d)//9==)//9CUCUVngb!n 
 &&rxx0QU|'')"L1$46777.3 	-*E65(%L 12&M  %,	-  ' 		NGAt4yA~MM$33A|DE#' Nxx!|,#x/'/$MM$";";HFV"WXd77|LMN			N K	 "4"D"DDXadnXn2422 %.%;$7 	LFAq k6233I o 8s   5H%	H$c           	          g }t        t        |            D ]-  }|j                   | j                  ||   f||d|d          / t	        d|i      S )a  
        This is the `batch_encode_plus` method for `Pop2PianoTokenizer`. It converts the midi notes to the transformer
        generated token ids. It works on multiple batches by calling `encode_plus` multiple times in a loop.

        Args:
            notes (`numpy.ndarray` of shape `[batch_size, sequence_length, 4]` or `list` of `pretty_midi.Note` objects):
                This represents the midi notes. If `notes` is a `numpy.ndarray`:
                    - Each sequence must have 4 values, they are `onset idx`, `offset idx`, `pitch` and `velocity`.
                If `notes` is a `list` containing `pretty_midi.Note` objects:
                    - Each sequence must have 4 attributes, they are `start`, `end`, `pitch` and `velocity`.
            truncation_strategy ([`~tokenization_utils_base.TruncationStrategy`], *optional*):
                Indicates the truncation strategy that is going to be used during truncation.
            max_length (`int`, *optional*):
                Maximum length of the returned list and optionally padding length (see above).

        Returns:
            `BatchEncoding` containing the tokens ids.
        )r   r   r*   )rf   rG   r   r   r
   )r@   r#   r   r   rA   encoded_batch_token_idsr   s          r   batch_encode_plusz$Pop2PianoTokenizer.batch_encode_plus  sy    4 #%s5z" 	A#**   !H(;) 	
 	 k+BCDDr   padding
truncationpad_to_multiple_ofreturn_attention_maskreturn_tensorsverbosec	           	      R   t        |t        j                        r|j                  dk(  nt        |d   t              }
 | j
                  d|||||d|	\  }}}}	|
r|dn|} | j                  d|||d|	}n | j                  d|||d|	}| j                  |||||||      }|S )a  
        This is the `__call__` method for `Pop2PianoTokenizer`. It converts the midi notes to the transformer generated
        token ids.

        Args:
            notes (`numpy.ndarray` of shape `[batch_size, max_sequence_length, 4]` or `list` of `pretty_midi.Note` objects):
                This represents the midi notes.

                If `notes` is a `numpy.ndarray`:
                    - Each sequence must have 4 values, they are `onset idx`, `offset idx`, `pitch` and `velocity`.
                If `notes` is a `list` containing `pretty_midi.Note` objects:
                    - Each sequence must have 4 attributes, they are `start`, `end`, `pitch` and `velocity`.
            padding (`bool`, `str` or [`~file_utils.PaddingStrategy`], *optional*, defaults to `False`):
                Activates and controls padding. Accepts the following values:

                - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
                  sequence if provided).
                - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum
                  acceptable input length for the model if that argument is not provided.
                - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different
                  lengths).
            truncation (`bool`, `str` or [`~tokenization_utils_base.TruncationStrategy`], *optional*, defaults to `False`):
                Activates and controls truncation. Accepts the following values:

                - `True` or `'longest_first'`: Truncate to a maximum length specified with the argument `max_length` or
                  to the maximum acceptable input length for the model if that argument is not provided. This will
                  truncate token by token, removing a token from the longest sequence in the pair if a pair of
                  sequences (or a batch of pairs) is provided.
                - `'only_first'`: Truncate to a maximum length specified with the argument `max_length` or to the
                  maximum acceptable input length for the model if that argument is not provided. This will only
                  truncate the first sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
                - `'only_second'`: Truncate to a maximum length specified with the argument `max_length` or to the
                  maximum acceptable input length for the model if that argument is not provided. This will only
                  truncate the second sequence of a pair if a pair of sequences (or a batch of pairs) is provided.
                - `False` or `'do_not_truncate'` (default): No truncation (i.e., can output batch with sequence lengths
                  greater than the model maximum admissible input size).
            max_length (`int`, *optional*):
                Controls the maximum length to use by one of the truncation/padding parameters. If left unset or set to
                `None`, this will use the predefined model maximum length if a maximum length is required by one of the
                truncation/padding parameters. If the model has no specific maximum input length (like XLNet)
                truncation/padding to a maximum length will be deactivated.
            pad_to_multiple_of (`int`, *optional*):
                If set will pad the sequence to a multiple of the provided value. This is especially useful to enable
                the use of Tensor Cores on NVIDIA hardware with compute capability `>= 7.5` (Volta).
            return_attention_mask (`bool`, *optional*):
                Whether to return the attention mask. If left to the default, will return the attention mask according
                to the specific tokenizer's default, defined by the `return_outputs` attribute.

                [What are attention masks?](../glossary#attention-mask)
            return_tensors (`str` or [`~file_utils.TensorType`], *optional*):
                If set, will return tensors instead of list of python integers. Acceptable values are:

                - `'tf'`: Return TensorFlow `tf.constant` objects.
                - `'pt'`: Return PyTorch `torch.Tensor` objects.
                - `'np'`: Return Numpy `np.ndarray` objects.
            verbose (`bool`, *optional*, defaults to `True`):
                Whether or not to print more information and warnings.

        Returns:
            `BatchEncoding` containing the token_ids.
        r   r   )r   r   r   r   r   T)r#   r   r   )r   r   r   r   r   r   r4   )	r5   rh   ndarrayndimlist"_get_padding_truncation_strategiesr   r   pad)r@   r#   r   r   r   r   r   r   r   rA   
is_batchedpadding_strategyr   r*   s                 r   __call__zPop2PianoTokenizer.__call__  s   d )35"**(EUZZ1_:V[\]V^`dKe
 ElDDkDk E
!!1E
 E
A-z6 ,A,IDOd!... $7% 	I )(( $7% 	I HH$!1"7)  
	 r   feature_extractor_outputreturn_midic                 N   t        t        |d      xr t        |d      xr t        |d            }|s |d   j                  d   dkD  rt        d      |rt	        |d   dddf   dk(        |d   j                  d   k7  s%|d   j                  d   |d	   j                  d   k7  rAt        d
|j                  d    d|d   j                  d    d|d	   j                  d          |d   j                  d   |j                  d   k7  rt        d|d   j                  d    d|j                  d          |d   j                  d   dk7  s|d	   j                  d   dk7  r2t        d|d   j                  d    d|d	   j                  d    d      |r&t        j                  |d   dddf   dk(        d   }n|j                  d   g}g }g }d}t        |      D ]  \  }	}
|||
 }|dddt        j                  t        j                  |t        | j                        k(        d         dz   f   }|d   |	   }|d	   |	   }|rx|d   |	   }|d   |	   }|dt        j                  t        j                  |dk(        d         dz    }|dt        j                  t        j                  |dk(        d         dz    }t        |      }t        |      }t        |      }| j                  ||| j                  | j                  dz   dz        }|j                  d   j                  D ]C  }|xj                   |d   z  c_        |xj"                  |d   z  c_        |j%                  |       E |j%                  |       ||
dz   z  } |rt'        ||d      S t'        d|i      S )aF  
        This is the `batch_decode` method for `Pop2PianoTokenizer`. It converts the token_ids generated by the
        transformer to midi_notes and returns them.

        Args:
            token_ids (`Union[np.ndarray, torch.Tensor, tf.Tensor]`):
                Output token_ids of `Pop2PianoConditionalGeneration` model.
            feature_extractor_output (`BatchFeature`):
                Denotes the output of `Pop2PianoFeatureExtractor.__call__`. It must contain `"beatstep"` and
                `"extrapolated_beatstep"`. Also `"attention_mask_beatsteps"` and
                `"attention_mask_extrapolated_beatstep"`
                 should be present if they were returned by the feature extractor.
            return_midi (`bool`, *optional*, defaults to `True`):
                Whether to return midi object or not.
        Returns:
            If `return_midi` is True:
                - `BatchEncoding` containing both `notes` and `pretty_midi.pretty_midi.PrettyMIDI` objects.
            If `return_midi` is False:
                - `BatchEncoding` containing `notes`.
        r+   attention_mask_beatsteps$attention_mask_extrapolated_beatstep	beatstepsr   rS   zattention_mask, attention_mask_beatsteps and attention_mask_extrapolated_beatstep must be present for batched inputs! But one of them were not present.Nextrapolated_beatstepzbLength mistamtch between token_ids, beatsteps and extrapolated_beatstep! Found token_ids length - z, beatsteps shape - z$ and extrapolated_beatsteps shape - z!Found attention_mask of length - z but token_ids of length - zLength mistamtch of beatsteps and extrapolated_beatstep! Since attention_mask is not present the number of examples must be 1, But found beatsteps length - z", extrapolated_beatsteps length - .rc   )r_   rp   ra   r   )r#   pretty_midi_objectsr#   )boolhasattrshaper   r}   rh   wherer   r   rW   r1   r   ru   r7   r   r#   r   r   r   r
   )r@   r*   r   r   attention_masks_present	batch_idx
notes_listpretty_midi_objects_listrd   rj   end_idxeach_tokens_idsr   r   r   r   pretty_midi_objectnotes                     r   batch_decodezPop2PianoTokenizer.batch_decodeU  s   8 #',.>? Z02LMZ02XY#
 '+CK+P+V+VWX+Y\]+]H  # ,-=>q!tDIJ+K8>>qAB+K8>>qA+,CDJJ1MN !**3//!*<)==QRjkvRwR}R}~  SA  RB B::RSj:k:q:qrs:t9uw 
 ((89??BiooVWFXX 78PQa8b8h8hij8k7l  mH  IR  IX  IX  YZ  I[  H\  ]  )5;;A>!C+,CDJJ1MQRR 44L[4Y4_4_`a4b3c  dF  G_  `w  Gx  G~  G~  @  GA  FB  BCD 
 #!9:J!KAqD!QUV!VWXYZI"+,I
#% 	'	2 #	%NE7'	':O-a1r266"((?VYZ^ZhZhViCi:jkl:m3nqr3r1r.rsO0=eDI$<=T$UV[$\! '+CD^+_`e+f(7O:884 &&^rxx8PTU8U/VWX/Y(Z]^(^_	(=XbffRXX&Ja&OPQRSTWXX)% '7O +I$,-B$C!!%!G!G&.#}}!%!2a 7	 "H " +66q9?? (

il*
IaL(!!$'(
 %++,>?1$IG#	%J  :Nf!ghhgz233r   )M   r   z-1102)ry   )r   r      r   )g        )NN)FNNNNNT)T))__name__
__module____qualname____doc__model_input_namesr   vocab_files_namesr?   propertyrI   rM   rW   r   r[   r^   rh   r   ro   ru   floatrg   rs   r6   r   r   r   r   r   r   r   r   r
   r   r   r   r   r   r   r   r   __classcell__)rE   s   @r   r)   r)   >   s   2 %&67)
  
D ! !?#S #T #$Nc N *

* * 	*
 *`  !! 

  **  	 
    H:2:: :% :bg :x2::  QT @!c !HSM !]bcf]g !4 =A$(	E4RZZk&6&6!778E4 &&89E4 SM	E4 
E4T =A$(	%ERZZk&6&6!778%E &&89%E SM	%E 
%E\ 6;;?$(,004;?zJJ!!"k&&'(*
z tS/12z $%778z SMz %SMz  (~z !sJ!78z z 
z@ !	w4 #/w4 	w4r   r)   ) r   r9   r   typingr   r   r   r   numpyrh   feature_extraction_utilsr   tokenization_utilsr	   r
   r   r   r   utilsr   r   r   r   r   r   
get_loggerr   r   r   r   r'   r)   r4   r   r   <module>r      sm    (  	 / /  4 u u _ _ 			H	% \ 
N
4, N
4r   