2 лет назад · a2442902ac
--- a/install_env.bat
+++ b/install_env.bat
@@ -1,17 +1,16 @@
 
				 @echo off
			
 
				 chcp 65001
			
 
				 
			
 
				-:: 设置是否使用镜像站的标志，true 表示使用，false 表示不使用
			
 
				 set USE_MIRROR=true
			
 
				 echo use_mirror = %USE_MIRROR%
			
 
				-
			
 
				-set no_proxy="127.0.0.1, 0.0.0.0, localhost"
			
 
				 setlocal enabledelayedexpansion
			
 
				 
			
 
				 cd /D "%~dp0"
			
 
				 
			
 
				 set PATH="%PATH%";%SystemRoot%\system32
			
 
				 
			
 
				+echo %PATH%
			
 
				+
			
 
				 :: 安装Miniconda
			
 
				 :: 检查是否有特殊字符
			
 
				 echo "%CD%"| findstr /R /C:"[!#\$%&()\*+,;<=>?@\[\]\^`{|}~\u4E00-\u9FFF ] " >nul && (
			
@@ -20,6 +19,7 @@ echo "%CD%"| findstr /R /C:"[!#\$%&()\*+,;<=>?@\[\]\^`{|}~\u4E00-\u9FFF ] " >nul
 
				         goto end
			
 
				     )
			
 
				 )
			
 
				+
			
 
				 :: 解决跨驱动器安装问题
			
 
				 set TMP=%CD%\fishenv
			
 
				 set TEMP=%CD%\fishenv
			
@@ -35,6 +35,7 @@ set API_FLAG_PATH=%~dp0API_FLAGS.txt
 
				 set MINICONDA_DOWNLOAD_URL=https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-py310_23.3.1-0-Windows-x86_64.exe
			
 
				 set MINICONDA_CHECKSUM=307194e1f12bbeb52b083634e89cc67db4f7980bd542254b43d3309eaf7cb358
			
 
				 set conda_exists=F
			
 
				+
			
 
				 :: 确定是否要安装conda
			
 
				 call "%CONDA_ROOT_PREFIX%\_conda.exe" --version >nul 2>&1
			
 
				 if "%ERRORLEVEL%" EQU "0" set conda_exists=T
			
@@ -134,7 +135,6 @@ for %%p in (%packages%) do (
 
				 )
			
 
				 
			
 
				 
			
 
				-
			
 
				 if not "!install_packages!"=="" (
			
 
				     echo.
			
 
				     echo 正在安装以下包: !install_packages!
			
--- a/run_cmd.bat
+++ b/run_cmd.bat
@@ -8,45 +8,44 @@ cd /D "%~dp0"
 
				 
			
 
				 set PATH="%PATH%";%SystemRoot%\system32
			
 
				 
			
 
				-:: 检查是否有特殊字符
			
 
				+
			
 
				 echo "%CD%"| findstr /R /C:"[!#\$%&()\*+,;<=>?@\[\]\^`{|}~\u4E00-\u9FFF ] " >nul && (
			
 
				     echo.
			
 
				-    echo 当前路径中存在特殊字符，请使fish-speech的路径不含特殊字符后再运行。 && (
			
 
				+    echo There are special characters in the current path, please make the path of fish-speech free of special characters before running. && (
			
 
				         goto end
			
 
				     )
			
 
				 )
			
 
				 
			
 
				-:: 解决跨驱动器安装问题
			
 
				+
			
 
				 set TMP=%CD%\fishenv
			
 
				 set TEMP=%CD%\fishenv
			
 
				 
			
 
				-:: 取消激活已经激活的环境
			
 
				+
			
 
				 (call conda deactivate && call conda deactivate && call conda deactivate) 2>nul
			
 
				 
			
 
				-:: 安装路径配置
			
 
				+
			
 
				 set CONDA_ROOT_PREFIX=%cd%\fishenv\conda
			
 
				 set INSTALL_ENV_DIR=%cd%\fishenv\env
			
 
				 
			
 
				-:: 环境隔离
			
 
				+
			
 
				 set PYTHONNOUSERSITE=1
			
 
				 set PYTHONPATH=
			
 
				 set PYTHONHOME=
			
 
				 set "CUDA_PATH=%INSTALL_ENV_DIR%"
			
 
				 set "CUDA_HOME=%CUDA_PATH%"
			
 
				 
			
 
				-:: 激活环境
			
 
				+
			
 
				 call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%"
			
 
				-:: 检查环境是否成功激活
			
 
				+
			
 
				 if errorlevel 1 (
			
 
				     echo.
			
 
				-    echo 环境激活失败
			
 
				+    echo Environment activation failed.
			
 
				     goto end
			
 
				 ) else (
			
 
				     echo.
			
 
				-    echo 环境激活成功
			
 
				+    echo Environment activation succeeded.
			
 
				 )
			
 
				 
			
 
				-:: 进入cmd
			
 
				 cmd /k "%*"
			
 
				 
			
 
				 :end
			
--- a/start.bat
+++ b/start.bat
@@ -6,7 +6,6 @@ set PYTHONPATH=%~dp0
 
				 set PYTHON_CMD=%cd%\fishenv\env\python
			
 
				 set API_FLAG_PATH=%~dp0API_FLAGS.txt
			
 
				 
			
 
				-:: 设置Hugging Face镜像源
			
 
				 set no_proxy="localhost, 127.0.0.1, 0.0.0.0"
			
 
				 set HF_ENDPOINT=https://hf-mirror.com
			
 
				 %PYTHON_CMD% .\tools\download_models.py
			
@@ -15,11 +14,10 @@ setlocal enabledelayedexpansion
 
				 
			
 
				 set "API_FLAGS="
			
 
				 set "flags="
			
 
				-:: 检查API_FLAG文件是否存在
			
 
				+
			
 
				 if exist "%API_FLAG_PATH%" (
			
 
				     for /f "usebackq tokens=*" %%a in ("%API_FLAG_PATH%") do (
			
 
				         set "line=%%a"
			
 
				-        :: 去除行尾的反斜杠和空白字符，并且跳过以#开头的行
			
 
				         if not "!line:~0,1!"=="#" (
			
 
				             set "line=!line: =<SPACE>!"
			
 
				             set "line=!line:\=!"
			
@@ -31,26 +29,23 @@ if exist "%API_FLAG_PATH%" (
 
				     )
			
 
				 )
			
 
				 
			
 
				-:: 去除API_FLAGS变量最后的空格
			
 
				+
			
 
				 if not "!API_FLAGS!"=="" set "API_FLAGS=!API_FLAGS:~0,-1!"
			
 
				 
			
 
				-:: 初始化 flags 变量
			
 
				 set "flags="
			
 
				 
			
 
				-:: 检查是否包含 --api 参数
			
 
				 echo !API_FLAGS! | findstr /C:"--api" >nul 2>&1
			
 
				 if !errorlevel! equ 0 (
			
 
				     echo.
			
 
				-    echo 启动HTTP API推理
			
 
				+    echo Start HTTP API...
			
 
				     set "mode=api"
			
 
				     goto process_flags
			
 
				 )
			
 
				 
			
 
				-:: 检查是否包含 --infer 参数
			
 
				 echo !API_FLAGS! | findstr /C:"--infer" >nul 2>&1
			
 
				 if !errorlevel! equ 0 (
			
 
				     echo.
			
 
				-    echo 启动WebUI推理
			
 
				+    echo Start WebUI Inference...
			
 
				     set "mode=infer"
			
 
				     goto process_flags
			
 
				 )
			
@@ -63,12 +58,10 @@ for %%p in (!API_FLAGS!) do (
 
				     )
			
 
				 )
			
 
				 
			
 
				-:: 去除 flags 变量开头的空格
			
 
				 if not "!flags!"=="" set "flags=!flags:~1!"
			
 
				 
			
 
				 echo Debug: flags = !flags!
			
 
				 
			
 
				-:: 根据 mode 变量启动相应的推理
			
 
				 if "!mode!"=="api" (
			
 
				     %PYTHON_CMD% -m tools.api !flags!
			
 
				 ) else if "!mode!"=="infer" (
			
@@ -76,9 +69,10 @@ if "!mode!"=="api" (
 
				 )
			
 
				 
			
 
				 echo.
			
 
				-echo 接下来启动页面
			
 
				+echo Next launch the page...
			
 
				 %PYTHON_CMD% fish_speech\webui\manage.py
			
 
				 
			
 
				+
			
 
				 :end
			
 
				 endlocal
			
 
				 pause
			
--- a/tools/post_api.py
+++ b/tools/post_api.py
@@ -0,0 +1,120 @@
 
				+import argparse
			
 
				+import base64
			
 
				+import json
			
 
				+
			
 
				+import pyaudio
			
 
				+import requests
			
 
				+
			
 
				+
			
 
				+def wav_to_base64(file_path):
			
 
				+    with open(file_path, "rb") as wav_file:
			
 
				+        wav_content = wav_file.read()
			
 
				+        base64_encoded = base64.b64encode(wav_content)
			
 
				+        return base64_encoded.decode("utf-8")
			
 
				+
			
 
				+
			
 
				+def play_audio(audio_content, format, channels, rate):
			
 
				+    p = pyaudio.PyAudio()
			
 
				+    stream = p.open(format=format, channels=channels, rate=rate, output=True)
			
 
				+    stream.write(audio_content)
			
 
				+    stream.stop_stream()
			
 
				+    stream.close()
			
 
				+    p.terminate()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description="Send a WAV file and text to a server and receive synthesized audio."
			
 
				+    )
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        "--url", "-u", type=str, required=True, help="URL of the server"
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--text", "-t", type=str, required=True, help="Text to be synthesized"
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--reference_audio", "-ra", type=str, required=True, help="Path to the WAV file"
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--reference_text",
			
 
				+        "-rt",
			
 
				+        type=str,
			
 
				+        required=True,
			
 
				+        help="Reference text for voice synthesis",
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--max_new_tokens", type=int, default=0, help="Maximum new tokens to generate"
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--chunk_length", type=int, default=30, help="Chunk length for synthesis"
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--top_p", type=float, default=0.7, help="Top-p sampling for synthesis"
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--repetition_penalty",
			
 
				+        type=float,
			
 
				+        default=1.5,
			
 
				+        help="Repetition penalty for synthesis",
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--temperature", type=float, default=0.7, help="Temperature for sampling"
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--speaker", type=str, default=None, help="Speaker ID for voice synthesis"
			
 
				+    )
			
 
				+    parser.add_argument("--format", type=str, default="wav", help="Audio format")
			
 
				+    parser.add_argument(
			
 
				+        "--streaming", type=bool, default=False, help="Enable streaming response"
			
 
				+    )
			
 
				+    parser.add_argument(
			
 
				+        "--channels", type=int, default=1, help="Number of audio channels"
			
 
				+    )
			
 
				+    parser.add_argument("--rate", type=int, default=44100, help="Sample rate for audio")
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    base64_audio = wav_to_base64(args.reference_audio)
			
 
				+
			
 
				+    data = {
			
 
				+        "text": args.text,
			
 
				+        "reference_text": args.reference_text,
			
 
				+        "reference_audio": base64_audio,
			
 
				+        "max_new_tokens": args.max_new_tokens,
			
 
				+        "chunk_length": args.chunk_length,
			
 
				+        "top_p": args.top_p,
			
 
				+        "repetition_penalty": args.repetition_penalty,
			
 
				+        "temperature": args.temperature,
			
 
				+        "speaker": args.speaker,
			
 
				+        "format": args.format,
			
 
				+        "streaming": args.streaming,
			
 
				+    }
			
 
				+
			
 
				+    response = requests.post(args.url, json=data, stream=args.streaming)
			
 
				+
			
 
				+    audio_format = pyaudio.paInt16  # Assuming 16-bit PCM format
			
 
				+
			
 
				+    if response.status_code == 200:
			
 
				+        if args.streaming:
			
 
				+            p = pyaudio.PyAudio()
			
 
				+            stream = p.open(
			
 
				+                format=audio_format, channels=args.channels, rate=args.rate, output=True
			
 
				+            )
			
 
				+            for chunk in response.iter_content(chunk_size=1024):
			
 
				+                if chunk:
			
 
				+                    stream.write(chunk)
			
 
				+            stream.stop_stream()
			
 
				+            stream.close()
			
 
				+            p.terminate()
			
 
				+        else:
			
 
				+            audio_content = response.content
			
 
				+
			
 
				+            with open("generated_audio.wav", "wb") as audio_file:
			
 
				+                audio_file.write(audio_content)
			
 
				+
			
 
				+            play_audio(audio_content, audio_format, args.channels, args.rate)
			
 
				+            print("Audio has been saved to 'generated_audio.wav'.")
			
 
				+    else:
			
 
				+        print(f"Request failed with status code {response.status_code}")
			
 
				+        print(response.json())