vor 2 Jahren · 12915468c8
--- a/.gitignore
+++ b/.gitignore
@@ -19,7 +19,7 @@ filelists
 
															 /.idea
														
 
															 ffmpeg.exe
														
 
															 ffprobe.exe
														
 
															-asr-label-win-x64.exe
														
 
															+asr-label*
														
 
															 /.cache
														
 
															 /fishenv
														
 
															 /.locale
														
--- a/fish_speech/i18n/locale/zh_CN.json
+++ b/fish_speech/i18n/locale/zh_CN.json
@@ -56,6 +56,7 @@
 
															     "Open Tensorboard": "打开 Tensorboard",
														
 
															     "Opened labeler in browser": "在浏览器中打开标注工具",
														
 
															     "Optional Label Language": "[可选] 标注语言",
														
 
															+    "Optional online ver": "[可选] 使用在线版",
														
 
															     "Output Path": "输出路径",
														
 
															     "Path error, please check the model file exists in the corresponding path": "路径错误，请检查模型文件是否存在于相应路径",
														
 
															     "Precision": "精度",
														
--- a/fish_speech/webui/launch_utils.py
+++ b/fish_speech/webui/launch_utils.py
@@ -1,3 +1,4 @@
 
															+import importlib.util
														
 
															 import os
														
 
															 import subprocess
														
 
															 import sys
														
@@ -17,6 +18,11 @@ GIT = (
 
															 GIT = str(GIT)
														
 
															+def is_module_installed(module_name: str) -> bool:
														
 
															+    spec = importlib.util.find_spec(module_name)
														
 
															+    return spec is not None
														
 
															+
														
 
															+
														
 
															 @lru_cache()
														
 
															 def commit_hash():
														
 
															     try:
														
--- a/fish_speech/webui/manage.py
+++ b/fish_speech/webui/manage.py
@@ -8,7 +8,6 @@ import shutil
 
															 import signal
														
 
															 import subprocess
														
 
															 import sys
														
 
															-import webbrowser
														
 
															 from pathlib import Path
														
 
															 import gradio as gr
														
@@ -18,7 +17,7 @@ from loguru import logger
 
															 from tqdm import tqdm
														
 
															 from fish_speech.i18n import i18n
														
 
															-from fish_speech.webui.launch_utils import Seafoam, versions_html
														
 
															+from fish_speech.webui.launch_utils import Seafoam, is_module_installed, versions_html
														
 
															 PYTHON = os.path.join(os.environ.get("PYTHON_FOLDERPATH", ""), "python")
														
 
															 sys.path.insert(0, "")
														
@@ -51,6 +50,15 @@ def build_html_ok_message(msg):
 
															     """
														
 
															+def build_html_href(link, desc, msg):
														
 
															+    return f"""
														
 
															+    <span style="color: green; font-weight: bold; display: inline-block">
														
 
															+        {html.escape(msg)}
														
 
															+        <a href="{link}">{desc}</a>
														
 
															+    </span>
														
 
															+    """
														
 
															+
														
 
															+
														
 
															 def load_data_in_raw(path):
														
 
															     with open(path, "r", encoding="utf-8") as file:
														
 
															         data = file.read()
														
@@ -94,14 +102,42 @@ def kill_process(pid):
 
															 def change_label(if_label):
														
 
															     global p_label
														
 
															-    if if_label == True:
														
 
															-        # 设置要访问的URL
														
 
															-        url = "https://text-labeler.pages.dev/"
														
 
															-        webbrowser.open(url)
														
 
															-        yield i18n("Opened labeler in browser")
														
 
															-    elif if_label == False:
														
 
															+    if if_label == True and p_label is None:
														
 
															+        url = "http://localhost:3000"
														
 
															+        remote_url = "https://text-labeler.pages.dev/"
														
 
															+        p_label = subprocess.Popen(
														
 
															+            [
														
 
															+                "asr-label-linux-x64"
														
 
															+                if sys.platform == "linux"
														
 
															+                else "asr-label-win-x64.exe"
														
 
															+            ]
														
 
															+        )
														
 
															+        yield build_html_href(
														
 
															+            link=remote_url,
														
 
															+            desc=i18n("Optional online ver"),
														
 
															+            msg=i18n("Opened labeler in browser"),
														
 
															+        )
														
 
															+
														
 
															+    elif if_label == False and p_label is not None:
														
 
															+        kill_process(p_label.pid)
														
 
															         p_label = None
														
 
															-        yield "Nothing"
														
 
															+        yield build_html_ok_message("Nothing")
														
 
															+
														
 
															+
														
 
															+def clean_infer_cache():
														
 
															+    import tempfile
														
 
															+
														
 
															+    temp_dir = Path(tempfile.gettempdir())
														
 
															+    gradio_dir = str(temp_dir / "gradio")
														
 
															+    try:
														
 
															+        shutil.rmtree(gradio_dir)
														
 
															+        logger.info(f"Deleted cached audios: {gradio_dir}")
														
 
															+    except PermissionError:
														
 
															+        logger.info(f"Permission denied: Unable to delete {gradio_dir}")
														
 
															+    except FileNotFoundError:
														
 
															+        logger.info(f"{gradio_dir} was not found")
														
 
															+    except Exception as e:
														
 
															+        logger.info(f"An error occurred: {e}")
														
 
															 def change_infer(
														
@@ -124,6 +160,9 @@ def change_infer(
 
															         yield build_html_ok_message(
														
 
															             i18n("Inferring interface is launched at {}").format(url)
														
 
															         )
														
 
															+
														
 
															+        clean_infer_cache()
														
 
															+
														
 
															         p_infer = subprocess.Popen(
														
 
															             [
														
 
															                 PYTHON,
														
@@ -141,7 +180,7 @@ def change_infer(
 
															             env=env,
														
 
															         )
														
 
															-    elif if_infer == False and p_infer != None:
														
 
															+    elif if_infer == False and p_infer is not None:
														
 
															         kill_process(p_infer.pid)
														
 
															         p_infer = None
														
 
															         yield build_html_error_message(i18n("Infer interface is closed"))
														
@@ -585,7 +624,7 @@ def fresh_llama_model():
 
															     )
														
 
															-def llama_lora_merge(llama_weight, lora_weight, llama_lora_output):
														
 
															+def llama_lora_merge(llama_weight, lora_llama_config, lora_weight, llama_lora_output):
														
 
															     if (
														
 
															         lora_weight is None
														
 
															         or not Path(lora_weight).exists()
														
@@ -601,7 +640,7 @@ def llama_lora_merge(llama_weight, lora_weight, llama_lora_output):
 
															         PYTHON,
														
 
															         "tools/llama/merge_lora.py",
														
 
															         "--llama-config",
														
 
															-        "dual_ar_2_codebook_large",
														
 
															+        lora_llama_config,
														
 
															         "--lora-config",
														
 
															         "r_8_alpha_16",
														
 
															         "--llama-weight",
														
@@ -902,6 +941,15 @@ with gr.Blocks(
 
															                                 allow_custom_value=True,
														
 
															                                 interactive=True,
														
 
															                             )
														
 
															+                            lora_llama_config = gr.Dropdown(
														
 
															+                                label=i18n("LLAMA Model Config"),
														
 
															+                                choices=[
														
 
															+                                    "dual_ar_2_codebook_large",
														
 
															+                                    "dual_ar_2_codebook_medium",
														
 
															+                                ],
														
 
															+                                value="dual_ar_2_codebook_large",
														
 
															+                                allow_custom_value=True,
														
 
															+                            )
														
 
															                         with gr.Row(equal_height=False):
														
 
															                             llama_lora_output = gr.Dropdown(
														
 
															                                 label=i18n("Output Path"),
														
@@ -994,7 +1042,13 @@ with gr.Blocks(
 
															                                         "Compile the model can significantly reduce the inference time, but will increase cold start time"
														
 
															                                     ),
														
 
															                                     choices=["Yes", "No"],
														
 
															-                                    value="Yes",
														
 
															+                                    value="Yes"
														
 
															+                                    if (
														
 
															+                                        sys.platform == "linux"
														
 
															+                                        or is_module_installed("triton")
														
 
															+                                    )
														
 
															+                                    else "No",
														
 
															+                                    interactive=is_module_installed("triton"),
														
 
															                                 )
														
 
															                                 infer_llama_config = gr.Dropdown(
														
 
															                                     label=i18n("LLAMA Model Config"),
														
@@ -1134,7 +1188,7 @@ with gr.Blocks(
 
															     llama_ckpt.change(fn=fresh_llama_ckpt, inputs=[], outputs=[llama_ckpt])
														
 
															     llama_lora_merge_btn.click(
														
 
															         fn=llama_lora_merge,
														
 
															-        inputs=[llama_weight, lora_weight, llama_lora_output],
														
 
															+        inputs=[llama_weight, lora_llama_config, lora_weight, llama_lora_output],
														
 
															         outputs=[train_error],
														
 
															     )
														
 
															     infer_checkbox.change(
														
--- a/tools/webui.py
+++ b/tools/webui.py
@@ -39,6 +39,7 @@ HEADER_MD = f"""# Fish Speech
 
															 TEXTBOX_PLACEHOLDER = i18n("Put your text here.")
														
 
															 SPACE_IMPORTED = False
														
 
															+cached_audio = np.zeros((1,))
														
 
															 def build_html_error_message(error):
														
@@ -122,6 +123,8 @@ def inference(
 
															         yield wav_chunk_header(), None
														
 
															     segments = []
														
 
															+    global cached_audio
														
 
															+    cached_audio = np.zeros((1,))
														
 
															     while True:
														
 
															         result = payload["response_queue"].get()
														
 
															         if result == "next":
														
@@ -141,6 +144,7 @@ def inference(
 
															         fake_audios = fake_audios.float().cpu().numpy()
														
 
															         if streaming:
														
 
															+            cached_audio = np.concatenate([cached_audio, fake_audios], axis=0)
														
 
															             yield (fake_audios * 32768).astype(np.int16).tobytes(), None
														
 
															         else:
														
 
															             segments.append(fake_audios)
														
@@ -296,6 +300,11 @@ def build_app():
 
															             [audio, error],
														
 
															             concurrency_limit=1,
														
 
															         )
														
 
															+
														
 
															+        def transfer_audio():
														
 
															+            global cached_audio
														
 
															+            return (vqgan_model.sampling_rate, cached_audio)
														
 
															+
														
 
															         generate_stream.click(
														
 
															             inference_stream,
														
 
															             [
														
@@ -312,7 +321,7 @@ def build_app():
 
															             ],
														
 
															             [stream_audio, error],
														
 
															             concurrency_limit=10,
														
 
															-        )
														
 
															+        ).then(transfer_audio, None, audio)
														
 
															     return app