
    sg"                     $    d Z ddlZ G d d      Zy)z"English Normalizer class for CLVP.    Nc                       e Zd Zd ZdedefdZdedefdZdedefdZdedefd	Z	dedefd
Z
dedefdZdedefdZdedefdZdedefdZdedefdZd Zy)EnglishNormalizerc                     dD cg c]1  }t        j                  d|d   z  t         j                        |d   f3 c}| _        g d| _        g d| _        g d| _        y c c}w )N))mrsmisess)mrmister)drdoctor)stsaint)cocompany)jrjunior)majmajor)gengeneral)drsdoctors)revreverend)lt
lieutenant)hon	honorable)sgtsergeant)captcaptain)esqesquire)ltdlimited)colcolonel)ftfortz\b%s\.r      )
 onetwothreefourfivesixseveneightnine)
teneleventwelvethirteenfourteenfifteensixteen	seventeeneighteennineteen)
r+   r+   twentythirtyfortyfiftysixtyseventyeightyninety)recompile
IGNORECASE_abbreviationsonesteenstens)selfxs     ]/var/www/html/venv/lib/python3.12/site-packages/transformers/models/clvp/number_normalizer.py__init__zEnglishNormalizer.__init__   sc    
 ZZ
QqT)2==91Q4@
0 a	

 k	K
s   6Anumreturnc                    |dk(  ry|dk  rd| j                  t        |            z   S |dk  r| j                  |   S |dk  r| j                  |dz
     S |dk  r6| j                  |dz     |dz  dk7  rd| j                  |dz        z   z   S dz   S |d	k  r9| j                  |dz     d
z   |dz  dk7  rd| j                  |dz        z   z   S dz   S |dk  r;| j                  |d	z        dz   |d	z  dk7  rd| j                  |d	z        z   z   S dz   S |dk  r;| j                  |dz        dz   |dz  dk7  rd| j                  |dz        z   z   S dz   S |dk  r;| j                  |dz        dz   |dz  dk7  rd| j                  |dz        z   z   S dz   S |dk  r;| j                  |dz        dz   |dz  dk7  rd| j                  |dz        z   z   S dz   S |dk  r;| j                  |dz        dz   |dz  dk7  rd| j                  |dz        z   z   S dz   S y)ax  
        Converts numbers(`int`) to words(`str`).

        Please note that it only supports upto - "'nine hundred ninety-nine quadrillion, nine hundred ninety-nine
        trillion, nine hundred ninety-nine billion, nine hundred ninety-nine million, nine hundred ninety-nine
        thousand, nine hundred ninety-nine'" or `number_to_words(999_999_999_999_999_999)`.
        r   zerozminus 
      d   -r+      hundred i@B z	 thousand, i ʚ;z millionl    J)z billionl     I5 z	 trillionl     NZoz quadrillionznumber out of range)number_to_wordsabsrK   rL   rM   )rN   rR   s     rP   r^   z!EnglishNormalizer.number_to_words?   s	    !81Wd223s8<<<2X99S>!2X::cBh''3Y99SBY'SVY[S[_`S`31E1EcBh1O+Oiifhii4Z		#*%
2_beh_hlm_mcD<P<PQTWZQZ<[6[vsuv 9_$$SD[1>ADjAo4$..sTz::W TVW
 = $$SI%56CF?VWCW4$..sY??a ^`a
 $$$$SM%9:GJ]GZ^_G_4$..s]/BCCi fhi
 (($$S,=%=>KNQbKbfgKg4$..s5F/FGGq npq
 ,,$$S,A%AB ! 22a7 4//6K0KLL  )    textc                 D    |j                  dd      j                  d      S )z+
        Converts unicode to ascii
        asciiignorezutf-8)encodedecoderN   ra   s     rP   convert_to_asciiz"EnglishNormalizer.convert_to_asciiz   s      {{7H-44W==r`   mc                 l   |j                  d      }|j                  d      }t        |      dkD  r|dz   S |d   rt        |d         nd}t        |      dkD  r|d   rt        |d         nd}|r!|r|dk(  rdnd}|dk(  rdnd	}|d
|d|d
|S |r|dk(  rdnd}|d
|S |r|dk(  rdnd	}|d
|S y)zZ
        This method is used to expand numerical dollar values into spoken words.
        r*   .   z dollarsr   dollardollarscentcentsr\   r]   zzero dollars)groupsplitlenint)rN   ri   matchpartsrn   rp   dollar_unit	cent_units           rP   _expand_dollarsz!EnglishNormalizer._expand_dollars   s     
C u:>:%%#(8#eAh-!$UaE!HE!H!u&-l(	K"'1*'I%,k5)LL&-l(	K%{33"'1*'I#Y//!r`   c                 D    |j                  d      j                  dd      S )zF
        This method is used to remove commas from sentences.
        r*   ,r+   rq   replacerN   ri   s     rP   _remove_commasz EnglishNormalizer._remove_commas   s     wwqz!!#r**r`   c                 D    |j                  d      j                  dd      S )zO
        This method is used to expand '.' into spoken word ' point '.
        r*   rk   z point r|   r~   s     rP   _expand_decimal_pointz'EnglishNormalizer._expand_decimal_point   s     wwqz!!#y11r`   c                     dddd}t        |j                  d      dd       }d|d	z  k  r|d	z  d
k  rd}n|j                  |dz  d      }| j                  |      |z   S )z`
        This method is used to expand ordinals such as '1st', '2nd' into spoken words.
        r   ndrd)r*   rl      r   NrV   rX   rW   th)rt   rq   getr^   )rN   rR   ordinal_suffixessuffixs       rP   _expand_ordinalz!EnglishNormalizer._expand_ordinal   sp      $6#))A,s#$s?sSyBF%))#(D9F##C(611r`   c                    t        |j                  d            }|dkD  r\|dk  rW|dk(  ry|dkD  r|dk  rd| j                  |dz        z   S |dz  dk(  r| j                  |dz        d	z   S | j                  |      S | j                  |      S )
a  
        This method acts as a preprocessing step for numbers between 1000 and 3000 (same as the original repository,
        link :
        https://github.com/neonbjb/tortoise-tts/blob/4003544b6ff4b68c09856e04d3eff9da26d023c2/tortoise/utils/tokenizer.py#L86)
        r   rZ   i  i  ztwo thousandi  ztwo thousand rX   r[   )rt   rq   r^   )rN   ri   rR   s      rP   _expand_numberz EnglishNormalizer._expand_number   s     !''!*o:#*d{%td
&)=)=cCi)HHHsa++C3J7*DD++C00'',,r`   c                 b   t        j                  t        j                  d      | j                  |      }t        j                  t        j                  d      d|      }t        j                  t        j                  d      | j                  |      }t        j                  t        j                  d      | j
                  |      }t        j                  t        j                  d      | j                  |      }t        j                  t        j                  d      | j                  |      }|S )z
        This method is used to normalize numbers within a text such as converting the numbers to words, removing
        commas, etc.
        z([0-9][0-9\,]+[0-9])u   £([0-9\,]*[0-9]+)z	\1 poundsz\$([0-9\.\,]*[0-9]+)z([0-9]+\.[0-9]+)z[0-9]+(st|nd|rd|th)z[0-9]+)rG   subrH   r   ry   r   r   r   rg   s     rP   normalize_numbersz#EnglishNormalizer.normalize_numbers   s    
 vvbjj!894;N;NPTUvvbjj!67tLvvbjj!894;O;OQUVvvbjj!45t7Q7QSWXvvbjj!78$:N:NPTUvvbjj+T-@-@$Gr`   c                 \    | j                   D ]  \  }}t        j                  |||      } |S )z/
        Expands the abbreviate words.
        )rJ   rG   r   )rN   ra   regexreplacements       rP   expand_abbreviationsz&EnglishNormalizer.expand_abbreviations   s5     #'"5"5 	4E;66%d3D	4r`   c                 V    t        j                  t        j                  d      d|      S )z.
        Removes multiple whitespaces
        z\s+r\   )rG   r   rH   rg   s     rP   collapse_whitespacez%EnglishNormalizer.collapse_whitespace   s      vvbjj(#t44r`   c                     | j                  |      }|j                         }| j                  |      }| j                  |      }| j	                  |      }|j                  dd      }|S )z
        Converts text to ascii, numbers / number-like quantities to their spelt-out counterparts and expands
        abbreviations
        "r+   )rh   lowerr   r   r   r}   rg   s     rP   __call__zEnglishNormalizer.__call__   sd     $$T*zz|%%d+((.''-||C$r`   N)__name__
__module____qualname__rQ   rt   strr^   rh   ry   r   r   r   r   r   r   r   r    r`   rP   r   r      s    'kR9)3 9)3 9)v>S >S >" " "0+ + +2s 2s 223 23 2- - -(c c   5 5 5r`   r   )__doc__rG   r   r   r`   rP   <module>r      s     ) 	X Xr`   