app.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. import html
  2. import traceback
  3. import gradio as gr
  4. from fish_speech.text import parse_text_to_segments, segments_to_phones
  5. HEADER_MD = """
  6. # Fish Speech
  7. 基于 VITS 和 GPT 的多语种语音合成. 项目很大程度上基于 Rcell 的 GPT-VITS.
  8. """
  9. TEXTBOX_PLACEHOLDER = """在启用自动音素的情况下, 模型默认会全自动将输入文本转换为音素. 例如:
  10. 测试一下 Hugging face, BGM声音很大吗?那我改一下. 世界、こんにちは。
  11. 会被转换为:
  12. <Segment ZH: '测试一下' -> 'c e4 sh ir4 y i2 x ia4'>
  13. <Segment EN: ' Hugging face, BGM' -> 'HH AH1 G IH0 NG F EY1 S , B AE1 G M'>
  14. <Segment ZH: '声音很大吗?那我改一下.' -> 'sh eng1 y in1 h en3 d a4 m a5 ? n a4 w o2 g ai3 y i2 x ia4 .'>
  15. <Segment ZH: '世界,' -> 'sh ir4 j ie4 ,'>
  16. <Segment JP: 'こんにちは.' -> 'k o N n i ch i w a .'>
  17. 如你所见, 最后的句子被分割为了两个部分, 因为该日文包含了汉字, 你可以使用 <jp>...</jp> 标签来指定日文优先级. 例如:
  18. 测试一下 Hugging face, BGM声音很大吗?那我改一下. <jp>世界、こんにちは。</jp>
  19. 可以看到, 日文部分被正确地分割了出来:
  20. ...
  21. <Segment JP: '世界,こんにちは.' -> 's e k a i , k o N n i ch i w a .'>
  22. """
  23. def build_html_error_message(error):
  24. return f"""
  25. <div style="color: red; font-weight: bold;">
  26. {html.escape(error)}
  27. </div>
  28. """
  29. def prepare_text(text, input_mode, language0, language1, language2):
  30. lines = text.splitlines()
  31. languages = [language0, language1, language2]
  32. languages = [
  33. {
  34. "中文": "ZH",
  35. "日文": "JP",
  36. "英文": "EN",
  37. }[language]
  38. for language in languages
  39. ]
  40. if len(set(languages)) != len(languages):
  41. return [], build_html_error_message("语言优先级不能重复.")
  42. if input_mode != "自动音素转换":
  43. return [
  44. [idx, line, "-", "-"]
  45. for idx, line in enumerate(lines)
  46. if line.strip() != ""
  47. ], None
  48. rows = []
  49. for idx, line in enumerate(lines):
  50. if line.strip() == "":
  51. continue
  52. try:
  53. segments = parse_text_to_segments(line, order=languages)
  54. except Exception:
  55. traceback.print_exc()
  56. err = traceback.format_exc()
  57. return [], build_html_error_message(f"解析 '{line}' 时发生错误. \n\n{err}")
  58. for segment in segments:
  59. rows.append([idx, segment.text, segment.language, segment.phones])
  60. return rows, None
  61. with gr.Blocks(theme=gr.themes.Base()) as app:
  62. gr.Markdown(HEADER_MD)
  63. with gr.Row():
  64. with gr.Column(scale=3):
  65. text = gr.Textbox(label="输入文本", placeholder=TEXTBOX_PLACEHOLDER, lines=3)
  66. with gr.Row():
  67. with gr.Tab(label="合成参数"):
  68. gr.Markdown("配置常见合成参数.")
  69. input_mode = gr.Dropdown(
  70. choices=["手动输入音素/文本", "自动音素转换"],
  71. value="手动输入音素/文本",
  72. label="输入模式",
  73. )
  74. with gr.Tab(label="语言优先级"):
  75. gr.Markdown("该参数只在自动音素转换时生效.")
  76. with gr.Column(scale=1):
  77. language0 = gr.Dropdown(
  78. choices=["中文", "日文", "英文"],
  79. label="语言 1",
  80. value="中文",
  81. )
  82. with gr.Column(scale=1):
  83. language1 = gr.Dropdown(
  84. choices=["中文", "日文", "英文"],
  85. label="语言 2",
  86. value="日文",
  87. )
  88. with gr.Column(scale=1):
  89. language2 = gr.Dropdown(
  90. choices=["中文", "日文", "英文"],
  91. label="语言 3",
  92. value="英文",
  93. )
  94. with gr.Row():
  95. with gr.Column(scale=2):
  96. generate = gr.Button(value="合成", variant="primary")
  97. with gr.Column(scale=1):
  98. clear = gr.Button(value="清空")
  99. with gr.Column(scale=3):
  100. error = gr.HTML(label="错误信息")
  101. parsed_text = gr.Dataframe(label="解析结果", headers=["ID", "文本", "语言", "音素"])
  102. audio = gr.Audio(label="合成音频")
  103. # Language & Text Parsing
  104. kwargs = dict(
  105. inputs=[text, input_mode, language0, language1, language2],
  106. outputs=[parsed_text, error],
  107. trigger_mode="always_last",
  108. )
  109. text.change(prepare_text, **kwargs)
  110. input_mode.change(prepare_text, **kwargs)
  111. language0.change(prepare_text, **kwargs)
  112. language1.change(prepare_text, **kwargs)
  113. language2.change(prepare_text, **kwargs)
  114. if __name__ == "__main__":
  115. app.launch(show_api=False)