Browse Source

Available in win10 (#225)

* Automatically download models

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix

* Ensure mirror enabled

* no_proxy before mirror

* resume download

* Remove old starter

* Optimize train config pages

* No test.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Make sure it is available in win10 environment

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add api usage example

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
spicysama 2 years ago
parent
commit
a2442902ac
4 changed files with 140 additions and 27 deletions
  1. 4 4
      install_env.bat
  2. 10 11
      run_cmd.bat
  3. 6 12
      start.bat
  4. 120 0
      tools/post_api.py

+ 4 - 4
install_env.bat

@@ -1,17 +1,16 @@
 @echo off
 chcp 65001
 
-:: 设置是否使用镜像站的标志,true 表示使用,false 表示不使用
 set USE_MIRROR=true
 echo use_mirror = %USE_MIRROR%
-
-set no_proxy="127.0.0.1, 0.0.0.0, localhost"
 setlocal enabledelayedexpansion
 
 cd /D "%~dp0"
 
 set PATH="%PATH%";%SystemRoot%\system32
 
+echo %PATH%
+
 :: 安装Miniconda
 :: 检查是否有特殊字符
 echo "%CD%"| findstr /R /C:"[!#\$%&()\*+,;<=>?@\[\]\^`{|}~\u4E00-\u9FFF ] " >nul && (
@@ -20,6 +19,7 @@ echo "%CD%"| findstr /R /C:"[!#\$%&()\*+,;<=>?@\[\]\^`{|}~\u4E00-\u9FFF ] " >nul
         goto end
     )
 )
+
 :: 解决跨驱动器安装问题
 set TMP=%CD%\fishenv
 set TEMP=%CD%\fishenv
@@ -35,6 +35,7 @@ set API_FLAG_PATH=%~dp0API_FLAGS.txt
 set MINICONDA_DOWNLOAD_URL=https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-py310_23.3.1-0-Windows-x86_64.exe
 set MINICONDA_CHECKSUM=307194e1f12bbeb52b083634e89cc67db4f7980bd542254b43d3309eaf7cb358
 set conda_exists=F
+
 :: 确定是否要安装conda
 call "%CONDA_ROOT_PREFIX%\_conda.exe" --version >nul 2>&1
 if "%ERRORLEVEL%" EQU "0" set conda_exists=T
@@ -134,7 +135,6 @@ for %%p in (%packages%) do (
 )
 
 
-
 if not "!install_packages!"=="" (
     echo.
     echo 正在安装以下包: !install_packages!

+ 10 - 11
run_cmd.bat

@@ -8,45 +8,44 @@ cd /D "%~dp0"
 
 set PATH="%PATH%";%SystemRoot%\system32
 
-:: 检查是否有特殊字符
+
 echo "%CD%"| findstr /R /C:"[!#\$%&()\*+,;<=>?@\[\]\^`{|}~\u4E00-\u9FFF ] " >nul && (
     echo.
-    echo 当前路径中存在特殊字符,请使fish-speech的路径不含特殊字符后再运行。 && (
+    echo There are special characters in the current path, please make the path of fish-speech free of special characters before running. && (
         goto end
     )
 )
 
-:: 解决跨驱动器安装问题
+
 set TMP=%CD%\fishenv
 set TEMP=%CD%\fishenv
 
-:: 取消激活已经激活的环境
+
 (call conda deactivate && call conda deactivate && call conda deactivate) 2>nul
 
-:: 安装路径配置
+
 set CONDA_ROOT_PREFIX=%cd%\fishenv\conda
 set INSTALL_ENV_DIR=%cd%\fishenv\env
 
-:: 环境隔离
+
 set PYTHONNOUSERSITE=1
 set PYTHONPATH=
 set PYTHONHOME=
 set "CUDA_PATH=%INSTALL_ENV_DIR%"
 set "CUDA_HOME=%CUDA_PATH%"
 
-:: 激活环境
+
 call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%"
-:: 检查环境是否成功激活
+
 if errorlevel 1 (
     echo.
-    echo 环境激活失败
+    echo Environment activation failed.
     goto end
 ) else (
     echo.
-    echo 环境激活成功
+    echo Environment activation succeeded.
 )
 
-:: 进入cmd
 cmd /k "%*"
 
 :end

+ 6 - 12
start.bat

@@ -6,7 +6,6 @@ set PYTHONPATH=%~dp0
 set PYTHON_CMD=%cd%\fishenv\env\python
 set API_FLAG_PATH=%~dp0API_FLAGS.txt
 
-:: 设置Hugging Face镜像源
 set no_proxy="localhost, 127.0.0.1, 0.0.0.0"
 set HF_ENDPOINT=https://hf-mirror.com
 %PYTHON_CMD% .\tools\download_models.py
@@ -15,11 +14,10 @@ setlocal enabledelayedexpansion
 
 set "API_FLAGS="
 set "flags="
-:: 检查API_FLAG文件是否存在
+
 if exist "%API_FLAG_PATH%" (
     for /f "usebackq tokens=*" %%a in ("%API_FLAG_PATH%") do (
         set "line=%%a"
-        :: 去除行尾的反斜杠和空白字符,并且跳过以#开头的行
         if not "!line:~0,1!"=="#" (
             set "line=!line: =<SPACE>!"
             set "line=!line:\=!"
@@ -31,26 +29,23 @@ if exist "%API_FLAG_PATH%" (
     )
 )
 
-:: 去除API_FLAGS变量最后的空格
+
 if not "!API_FLAGS!"=="" set "API_FLAGS=!API_FLAGS:~0,-1!"
 
-:: 初始化 flags 变量
 set "flags="
 
-:: 检查是否包含 --api 参数
 echo !API_FLAGS! | findstr /C:"--api" >nul 2>&1
 if !errorlevel! equ 0 (
     echo.
-    echo 启动HTTP API推理
+    echo Start HTTP API...
     set "mode=api"
     goto process_flags
 )
 
-:: 检查是否包含 --infer 参数
 echo !API_FLAGS! | findstr /C:"--infer" >nul 2>&1
 if !errorlevel! equ 0 (
     echo.
-    echo 启动WebUI推理
+    echo Start WebUI Inference...
     set "mode=infer"
     goto process_flags
 )
@@ -63,12 +58,10 @@ for %%p in (!API_FLAGS!) do (
     )
 )
 
-:: 去除 flags 变量开头的空格
 if not "!flags!"=="" set "flags=!flags:~1!"
 
 echo Debug: flags = !flags!
 
-:: 根据 mode 变量启动相应的推理
 if "!mode!"=="api" (
     %PYTHON_CMD% -m tools.api !flags!
 ) else if "!mode!"=="infer" (
@@ -76,9 +69,10 @@ if "!mode!"=="api" (
 )
 
 echo.
-echo 接下来启动页面
+echo Next launch the page...
 %PYTHON_CMD% fish_speech\webui\manage.py
 
+
 :end
 endlocal
 pause

+ 120 - 0
tools/post_api.py

@@ -0,0 +1,120 @@
+import argparse
+import base64
+import json
+
+import pyaudio
+import requests
+
+
+def wav_to_base64(file_path):
+    with open(file_path, "rb") as wav_file:
+        wav_content = wav_file.read()
+        base64_encoded = base64.b64encode(wav_content)
+        return base64_encoded.decode("utf-8")
+
+
+def play_audio(audio_content, format, channels, rate):
+    p = pyaudio.PyAudio()
+    stream = p.open(format=format, channels=channels, rate=rate, output=True)
+    stream.write(audio_content)
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Send a WAV file and text to a server and receive synthesized audio."
+    )
+
+    parser.add_argument(
+        "--url", "-u", type=str, required=True, help="URL of the server"
+    )
+    parser.add_argument(
+        "--text", "-t", type=str, required=True, help="Text to be synthesized"
+    )
+    parser.add_argument(
+        "--reference_audio", "-ra", type=str, required=True, help="Path to the WAV file"
+    )
+    parser.add_argument(
+        "--reference_text",
+        "-rt",
+        type=str,
+        required=True,
+        help="Reference text for voice synthesis",
+    )
+    parser.add_argument(
+        "--max_new_tokens", type=int, default=0, help="Maximum new tokens to generate"
+    )
+    parser.add_argument(
+        "--chunk_length", type=int, default=30, help="Chunk length for synthesis"
+    )
+    parser.add_argument(
+        "--top_p", type=float, default=0.7, help="Top-p sampling for synthesis"
+    )
+    parser.add_argument(
+        "--repetition_penalty",
+        type=float,
+        default=1.5,
+        help="Repetition penalty for synthesis",
+    )
+    parser.add_argument(
+        "--temperature", type=float, default=0.7, help="Temperature for sampling"
+    )
+    parser.add_argument(
+        "--speaker", type=str, default=None, help="Speaker ID for voice synthesis"
+    )
+    parser.add_argument("--format", type=str, default="wav", help="Audio format")
+    parser.add_argument(
+        "--streaming", type=bool, default=False, help="Enable streaming response"
+    )
+    parser.add_argument(
+        "--channels", type=int, default=1, help="Number of audio channels"
+    )
+    parser.add_argument("--rate", type=int, default=44100, help="Sample rate for audio")
+
+    args = parser.parse_args()
+
+    base64_audio = wav_to_base64(args.reference_audio)
+
+    data = {
+        "text": args.text,
+        "reference_text": args.reference_text,
+        "reference_audio": base64_audio,
+        "max_new_tokens": args.max_new_tokens,
+        "chunk_length": args.chunk_length,
+        "top_p": args.top_p,
+        "repetition_penalty": args.repetition_penalty,
+        "temperature": args.temperature,
+        "speaker": args.speaker,
+        "format": args.format,
+        "streaming": args.streaming,
+    }
+
+    response = requests.post(args.url, json=data, stream=args.streaming)
+
+    audio_format = pyaudio.paInt16  # Assuming 16-bit PCM format
+
+    if response.status_code == 200:
+        if args.streaming:
+            p = pyaudio.PyAudio()
+            stream = p.open(
+                format=audio_format, channels=args.channels, rate=args.rate, output=True
+            )
+            for chunk in response.iter_content(chunk_size=1024):
+                if chunk:
+                    stream.write(chunk)
+            stream.stop_stream()
+            stream.close()
+            p.terminate()
+        else:
+            audio_content = response.content
+
+            with open("generated_audio.wav", "wb") as audio_file:
+                audio_file.write(audio_content)
+
+            play_audio(audio_content, audio_format, args.channels, args.rate)
+            print("Audio has been saved to 'generated_audio.wav'.")
+    else:
+        print(f"Request failed with status code {response.status_code}")
+        print(response.json())