Parcourir la source

Add Audio Select to WebUI (#556)

* Add Audio Select to WebUI

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
PoTaTo il y a 1 an
Parent
commit
aa9f85a205

+ 3 - 0
docs/en/inference.md

@@ -118,6 +118,9 @@ python -m tools.webui \
     --decoder-config-name firefly_gan_vq
 ```
 
+!!! note
+    You can save the label file and reference audio file in advance to the examples folder in the main directory (which you need to create yourself), so that you can directly call them in the WebUI.
+
 !!! note
     You can use Gradio environment variables, such as `GRADIO_SHARE`, `GRADIO_SERVER_PORT`, `GRADIO_SERVER_NAME` to configure WebUI.
 

+ 3 - 0
docs/ja/inference.md

@@ -151,6 +151,9 @@ python -m tools.webui \
     --decoder-config-name firefly_gan_vq
 ```
 
+!!! note
+    ラベルファイルと参照音声ファイルをメインディレクトリの examples フォルダ(自分で作成する必要があります)に事前に保存しておくことで、WebUI で直接呼び出すことができます。
+
 !!! note
     Gradio 環境変数(`GRADIO_SHARE`、`GRADIO_SERVER_PORT`、`GRADIO_SERVER_NAME`など)を使用して WebUI を構成できます。
 

+ 3 - 0
docs/pt/inference.md

@@ -147,6 +147,9 @@ python -m tools.webui \
     --decoder-config-name firefly_gan_vq
 ```
 
+!!! note
+    Você pode salvar antecipadamente o arquivo de rótulos e o arquivo de áudio de referência na pasta examples do diretório principal (que você precisa criar), para que possa chamá-los diretamente na WebUI.
+    
 !!! note
     É possível usar variáveis de ambiente do Gradio, como `GRADIO_SHARE`, `GRADIO_SERVER_PORT`, `GRADIO_SERVER_NAME`, para configurar a WebUI.
 

+ 3 - 0
docs/zh/inference.md

@@ -128,6 +128,9 @@ python -m tools.webui \
     --decoder-config-name firefly_gan_vq
 ```
 
+!!! note
+    你可以提前将label文件和参考音频文件保存到主目录下的examples文件夹(需要自行创建),这样你可以直接在WebUI中调用它们。
+
 !!! note
     你可以使用 Gradio 环境变量, 如 `GRADIO_SHARE`, `GRADIO_SERVER_PORT`, `GRADIO_SERVER_NAME` 来配置 WebUI.
 

+ 2 - 1
fish_speech/i18n/locale/en_US.json

@@ -118,5 +118,6 @@
   "new": "new",
   "Realtime Transform Text": "Realtime Transform Text",
   "Normalization Result Preview (Currently Only Chinese)": "Normalization Result Preview (Currently Only Chinese)",
-  "Text Normalization": "Text Normalization"
+  "Text Normalization": "Text Normalization",
+  "Select Example Audio": "Select Example Audio"
 }

+ 2 - 1
fish_speech/i18n/locale/es_ES.json

@@ -118,5 +118,6 @@
   "new": "nuevo",
   "Realtime Transform Text": "Transformación de Texto en Tiempo Real",
   "Normalization Result Preview (Currently Only Chinese)": "Vista Previa del Resultado de Normalización (Actualmente Solo Chino)",
-  "Text Normalization": "Normalización de Texto"
+  "Text Normalization": "Normalización de Texto",
+  "Select Example Audio": "Selecionar áudio de exemplo"
 }

+ 2 - 2
fish_speech/i18n/locale/ja_JP.json

@@ -118,6 +118,6 @@
   "new": "新規",
   "Realtime Transform Text": "リアルタイム変換テキスト",
   "Normalization Result Preview (Currently Only Chinese)": "正規化結果プレビュー(現在は中国語のみ)",
-  "Text Normalization": "テキスト正規化"
-
+  "Text Normalization": "テキスト正規化",
+  "Select Example Audio": "サンプル音声を選択"
 }

+ 2 - 1
fish_speech/i18n/locale/zh_CN.json

@@ -118,5 +118,6 @@
   "new": "创建新的检查点",
   "Realtime Transform Text": "实时规范化文本",
   "Normalization Result Preview (Currently Only Chinese)": "规范化结果预览",
-  "Text Normalization": "文本规范化"
+  "Text Normalization": "文本规范化",
+  "Select Example Audio": "选择参考音频"
 }

+ 34 - 0
tools/webui.py

@@ -324,6 +324,20 @@ def build_app():
                         enable_reference_audio = gr.Checkbox(
                             label=i18n("Enable Reference Audio"),
                         )
+
+                        # Add dropdown for selecting example audio files
+                        examples_dir = Path("examples")
+                        if not examples_dir.exists():
+                            examples_dir.mkdir()
+                        example_audio_files = [
+                            f.name for f in examples_dir.glob("*.wav")
+                        ] + [f.name for f in examples_dir.glob("*.mp3")]
+                        example_audio_dropdown = gr.Dropdown(
+                            label=i18n("Select Example Audio"),
+                            choices=[""] + example_audio_files,
+                            value="",
+                        )
+
                         reference_audio = gr.Audio(
                             label=i18n("Reference Audio"),
                             type="filepath",
@@ -383,6 +397,26 @@ def build_app():
             fn=normalize_text, inputs=[text, if_refine_text], outputs=[refined_text]
         )
 
+        def select_example_audio(audio_file):
+            if audio_file:
+                audio_path = examples_dir / audio_file
+                lab_file = audio_path.with_suffix(".lab")
+
+                if lab_file.exists():
+                    lab_content = lab_file.read_text(encoding="utf-8").strip()
+                else:
+                    lab_content = ""
+
+                return str(audio_path), lab_content, True
+            return None, "", False
+
+        # Connect the dropdown to update reference audio and text
+        example_audio_dropdown.change(
+            fn=select_example_audio,
+            inputs=[example_audio_dropdown],
+            outputs=[reference_audio, reference_text, enable_reference_audio],
+        )
+
         # # Submit
         generate.click(
             inference_wrapper,