
    sg	                     h    d dl Z ddlmZmZmZ ddlmZ ddlmZ  e       rd dl	m
Z
  G d d	e      Zy)
    N   )SpeechT5ForTextToSpeechSpeechT5HifiGanSpeechT5Processor)is_datasets_available   )PipelineTool)load_datasetc                   \     e Zd ZdZdZdZeZeZ	e
ZddddiZdZ fd	Zdd
Zd Zd Z xZS )TextToSpeechToolzmicrosoft/speecht5_ttszfThis is a tool that reads an English text out loud. It returns a waveform object containing the sound.text_to_speechtextstringz&The text to read out loud (in English))typedescriptionaudioc                 H    | j                   d| _         t        | 	          y )Nzmicrosoft/speecht5_hifigan)post_processorsupersetup)self	__class__s    U/var/www/html/venv/lib/python3.12/site-packages/transformers/agents/text_to_speech.pyr   zTextToSpeechTool.setup*   s     &">D    c                     | j                  |dd      }|Mt               st        d      t        ddd      }t	        j
                  |d   d	         j                  d
      }|d   |dS )NptT)r   return_tensors
truncationzADatasets needs to be installed if not passing speaker embeddings.zMatthijs/cmu-arctic-xvectors
validation)splittrust_remote_codei  xvectorr   	input_ids)r#   speaker_embeddings)pre_processorr   ImportErrorr
   torchtensor	unsqueeze)r   r   r$   inputsembeddings_datasets        r   encodezTextToSpeechTool.encode/   s}    ##dt#T%(*!"eff!-.lVZ" "'.@.Fy.Q!R!\!\]^!_#K0HZ[[r   c                     t        j                         5   | j                  j                  di |cd d d        S # 1 sw Y   y xY w)N )r'   no_gradmodelgenerate_speech)r   r*   s     r   forwardzTextToSpeechTool.forward=   s7    ]]_ 	8-4::--77	8 	8 	8s	   ;Ac                     t        j                         5  | j                  |      j                         j	                         cd d d        S # 1 sw Y   y xY wN)r'   r/   r   cpudetach)r   outputss     r   decodezTextToSpeechTool.decodeA   s@    ]]_ 	?&&w/335<<>	? 	? 	?s   -AAr4   )__name__
__module____qualname__default_checkpointr   namer   pre_processor_classr   model_classr   post_processor_classr*   output_typer   r,   r2   r8   __classcell__)r   s   @r   r   r      sS    1p  D+)K*x8`abFK
\8?r   r   )r'   models.speecht5r   r   r   utilsr   toolsr	   datasetsr
   r   r.   r   r   <module>rG      s-   $  Y Y )  %&?| &?r   