
    sg+                     >    d Z ddlZddlmZ ddlmZ  G d de      Zy)z$
Speech processor class for M-CTC-T
    N)contextmanager   )ProcessorMixinc                   R     e Zd ZdZdZdZ fdZd Zd Zd Z	d Z
ed	        Z xZS )
MCTCTProcessora[  
    Constructs a MCTCT processor which wraps a MCTCT feature extractor and a MCTCT tokenizer into a single processor.

    [`MCTCTProcessor`] offers all the functionalities of [`MCTCTFeatureExtractor`] and [`AutoTokenizer`]. See the
    [`~MCTCTProcessor.__call__`] and [`~MCTCTProcessor.decode`] for more information.

    Args:
        feature_extractor (`MCTCTFeatureExtractor`):
            An instance of [`MCTCTFeatureExtractor`]. The feature extractor is a required input.
        tokenizer (`AutoTokenizer`):
            An instance of [`AutoTokenizer`]. The tokenizer is a required input.
    MCTCTFeatureExtractorAutoTokenizerc                 V    t         |   ||       | j                  | _        d| _        y )NF)super__init__feature_extractorcurrent_processor_in_target_context_manager)selfr   	tokenizer	__class__s      h/var/www/html/venv/lib/python3.12/site-packages/transformers/models/deprecated/mctct/processing_mctct.pyr   zMCTCTProcessor.__init__*   s)    *I6!%!7!7*/'    c                    | j                   r | j                  |i |S d|v r't        j                  d       |j	                  d      }n|j	                  dd      }|j	                  dd      }|j	                  dd      }t        |      dkD  r
|d   }|dd }||t        d	      | | j                  |g|d|i|}| | j                  |fi |}|S |S d
   d<   |S )a  
        When used in normal mode, this method forwards all its arguments to MCTCTFeatureExtractor's
        [`~MCTCTFeatureExtractor.__call__`] and returns its output. If used in the context
        [`~MCTCTProcessor.as_target_processor`] this method forwards all its arguments to AutoTokenizer's
        [`~AutoTokenizer.__call__`]. Please refer to the doctsring of the above two methods for more information.
        
raw_speechzLUsing `raw_speech` as a keyword argument is deprecated. Use `audio` instead.audioNsampling_ratetextr      zAYou need to specify either an `audio` or `text` input to process.	input_idslabels)	r   r   warningswarnpoplen
ValueErrorr   r   )r   argskwargsr   r   r   inputs	encodingss           r   __call__zMCTCTProcessor.__call__/   s    **)4))4:6::6!MMhiJJ|,EJJw-E

?D9zz&$'t9q=GE8D=T\`aa+T++E`D``Y_`F&t6v6I<M](5F8Mr   c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to AutoTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please refer
        to the docstring of this method for more information.
        )r   batch_decoder   r"   r#   s      r   r(   zMCTCTProcessor.batch_decodeU   s     
 +t~~**D;F;;r   c                 p   | j                   r | j                  j                  |i |S |j                  dd      }|j                  dd      }t	        |      dkD  r
|d   }|dd }|  | j
                  j                  |g|i |}| | j                  j                  |fi |}||S ||S |d   |d<   |S )a  
        When used in normal mode, this method forwards all its arguments to MCTCTFeatureExtractor's
        [`~MCTCTFeatureExtractor.pad`] and returns its output. If used in the context
        [`~MCTCTProcessor.as_target_processor`] this method forwards all its arguments to PreTrainedTokenizer's
        [`~PreTrainedTokenizer.pad`]. Please refer to the docstring of the above two methods for more information.
        input_featuresNr   r   r   r   )r   r   padr   r    r   r   )r   r"   r#   r+   r   s        r   r,   zMCTCTProcessor.pad\   s     **-4))--t>v>>$4d;Hd+t9q=!!WN8D%7T3377XXQWXN'T^^''9&9F>!!#M'-k':N8$!!r   c                 :     | j                   j                  |i |S )z
        This method forwards all its arguments to AutoTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to the
        docstring of this method for more information.
        )r   decoder)   s      r   r.   zMCTCTProcessor.decodez   s     
 %t~~$$d5f55r   c              #      K   t        j                  d       d| _        | j                  | _        d | j
                  | _        d| _        yw)z
        Temporarily sets the tokenizer for processing the input. Useful for encoding the labels when fine-tuning MCTCT.
        z`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your labels by using the argument `text` of the regular `__call__` method (either in the same call as your audio inputs, or in a separate call.TNF)r   r   r   r   r   r   )r   s    r   as_target_processorz"MCTCTProcessor.as_target_processor   sH     
 	8	

 +/'!%!%!7!7*/'s   AA)__name__
__module____qualname____doc__feature_extractor_classtokenizer_classr   r&   r(   r,   r.   r   r0   __classcell__)r   s   @r   r   r      sC     6%O0
$L<"<6 0 0r   r   )r4   r   
contextlibr   processing_utilsr   r    r   r   <module>r;      s#     % /v0^ v0r   