symbols.py 812 B

123456789101112131415161718192021
  1. """ from https://github.com/keithito/tacotron """
  2. '''
  3. Defines the set of symbols used in text input to the model.
  4. The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. '''
  5. from text import cmudict
  6. _punctuation = '!\'",.:;? '
  7. _math = '#%&*+-/[]()'
  8. _special = '_@©°½—₩€$'
  9. _accented = 'áçéêëñöøćž'
  10. _numbers = '0123456789'
  11. _letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
  12. # Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as
  13. # uppercase letters):
  14. _arpabet = ['@' + s for s in cmudict.valid_symbols]
  15. # Export all symbols:
  16. symbols = list(_punctuation + _math + _special + _accented + _numbers + _letters) + _arpabet