1 год назад · 027bfe1006
--- a/.github/workflows/build-windows-package.yml
+++ b/.github/workflows/build-windows-package.yml
@@ -42,7 +42,7 @@ jobs:
 
				         env:
			
 
				           HF_TOKEN: ${{ secrets.HF_TOKEN }}
			
 
				         run: |
			
 
				-          if [ "${{ github.actor }}" = "AnyaCoder" ]; then
			
 
				+          if [ "${{ github.actor }}" = "NoNeedToUpload" ]; then
			
 
				             echo "Author is AnyaCoder. Performing the zipping && upload."
			
 
				             zip -qr fish-speech-main-${{ github.run_id }}.zip ./fish-speech
			
 
				             huggingface-cli upload SpicyqSama007/fish-speech-packed ./fish-speech-main-${{ github.run_id }}.zip fish-speech-main-${{ github.run_id }}.zip
			
--- a/docs/en/index.md
+++ b/docs/en/index.md
@@ -99,7 +99,7 @@ conda activate fish-speech
 
				 pip3 install torch torchvision torchaudio
			
 
				 
			
 
				 # Install fish-speech
			
 
				-pip3 install -e .
			
 
				+pip3 install -e .[stable]
			
 
				 
			
 
				 # (Ubuntu / Debian User) Install sox
			
 
				 apt install libsox-dev
			
--- a/docs/ja/index.md
+++ b/docs/ja/index.md
@@ -1,4 +1,4 @@
 
				-# Fish Speechの紹介
			
 
				+# Fish Speech の紹介
			
 
				 
			
 
				 <div>
			
 
				 <a target="_blank" href="https://discord.gg/Es5qTB9BcN">
			
@@ -14,7 +14,7 @@
 
				 
			
 
				 !!! warning
			
 
				     私たちは、コードベースの違法な使用について一切の責任を負いません。お住まいの地域の DMCA（デジタルミレニアム著作権法）およびその他の関連法を参照してください。 <br/>
			
 
				-    このコードベースとモデルは、CC-BY-NC-SA-4.0ライセンス下でリリースされています。
			
 
				+    このコードベースとモデルは、CC-BY-NC-SA-4.0 ライセンス下でリリースされています。
			
 
				 
			
 
				 <p align="center">
			
 
				    <img src="../assets/figs/diagram.png" width="75%">
			
@@ -27,7 +27,7 @@
 
				 
			
 
				 ## Windows セットアップ
			
 
				 
			
 
				-Windowにて開発を行っている方へ: 本コードベースを実行するのに WSL2 または Docker を利用することができます。
			
 
				+Window にて開発を行っている方へ: 本コードベースを実行するのに WSL2 または Docker を利用することができます。
			
 
				 
			
 
				 あまり詳しくない人は、Linux 環境なしでコードベースを実行するために以下の手順に従ってください。（モデルコンパイル機能`torch.compile`を利用できます。）：
			
 
				 
			
@@ -99,7 +99,7 @@ conda activate fish-speech
 
				 pip3 install torch torchvision torchaudio
			
 
				 
			
 
				 # fish-speechをインストールします。
			
 
				-pip3 install -e .
			
 
				+pip3 install -e .[stable]
			
 
				 
			
 
				 # (Ubuntu / Debianユーザー) soxをインストールします。
			
 
				 apt install libsox-dev
			
--- a/docs/pt/index.md
+++ b/docs/pt/index.md
@@ -13,8 +13,8 @@
 
				 </div>
			
 
				 
			
 
				 !!! warning
			
 
				-     Não nos responsabilizamos por qualquer uso ilegal do código-fonte. Consulte as leis locais sobre DMCA (Digital Millennium Copyright Act) e outras leis relevantes em sua região. <br/>
			
 
				-     Este repositório de código e os modelos são distribuídos sob a licença CC-BY-NC-SA-4.0.
			
 
				+    Não nos responsabilizamos por qualquer uso ilegal do código-fonte. Consulte as leis locais sobre DMCA (Digital Millennium Copyright Act) e outras leis relevantes em sua região. <br/>
			
 
				+    Este repositório de código e os modelos são distribuídos sob a licença CC-BY-NC-SA-4.0.
			
 
				 
			
 
				 <p align="center">
			
 
				    <img src="../assets/figs/diagrama.png" width="75%">
			
@@ -102,7 +102,7 @@ conda activate fish-speech
 
				 pip3 install torch torchvision torchaudio
			
 
				 
			
 
				 # Instale o fish-speech
			
 
				-pip3 install -e .
			
 
				+pip3 install -e .[stable]
			
 
				 
			
 
				 # Para os Usuário do Ubuntu / Debian: Instale o sox
			
 
				 apt install libsox-dev
			
--- a/docs/zh/index.md
+++ b/docs/zh/index.md
@@ -13,8 +13,8 @@
 
				 </div>
			
 
				 
			
 
				 !!! warning
			
 
				-    我们不对代码库的任何非法使用承担任何责任. 请参阅您当地关于 DMCA (数字千年法案) 和其他相关法律法规. <br/>
			
 
				-    此代码库与所有模型根据 CC-BY-NC-SA-4.0 许可证发布.
			
 
				+   我们不对代码库的任何非法使用承担任何责任. 请参阅您当地关于 DMCA (数字千年法案) 和其他相关法律法规. <br/>
			
 
				+   此代码库与所有模型根据 CC-BY-NC-SA-4.0 许可证发布.
			
 
				 
			
 
				 <p align="center">
			
 
				    <img src="../assets/figs/diagram.png" width="75%">
			
@@ -31,28 +31,27 @@ Windows 专业用户可以考虑 WSL2 或 docker 来运行代码库。
 
				 
			
 
				 Windows 非专业用户可考虑以下为免 Linux 环境的基础运行方法（附带模型编译功能，即 `torch.compile`）：
			
 
				 
			
 
				-
			
 
				 1. 解压项目压缩包。
			
 
				 2. 点击 `install_env.bat` 安装环境。
			
 
				-    - 可以通过编辑 `install_env.bat` 的 `USE_MIRROR` 项来决定是否使用镜像站下载。
			
 
				-    - `USE_MIRROR=false` 使用原始站下载最新稳定版 `torch` 环境。`USE_MIRROR=true` 为从镜像站下载最新 `torch` 环境。默认为 `true`。
			
 
				-    - 可以通过编辑 `install_env.bat` 的 `INSTALL_TYPE` 项来决定是否启用可编译环境下载。
			
 
				-    - `INSTALL_TYPE=preview` 下载开发版编译环境。`INSTALL_TYPE=stable` 下载稳定版不带编译环境。
			
 
				-3. 若第2步 `INSTALL_TYPE=preview` 则执行这一步（可跳过，此步为激活编译模型环境）
			
 
				-    1. 使用如下链接下载 LLVM 编译器。
			
 
				-        - [LLVM-17.0.6（原站站点下载）](https://huggingface.co/fishaudio/fish-speech-1/resolve/main/LLVM-17.0.6-win64.exe?download=true)
			
 
				-        - [LLVM-17.0.6（镜像站点下载）](https://hf-mirror.com/fishaudio/fish-speech-1/resolve/main/LLVM-17.0.6-win64.exe?download=true)
			
 
				-        - 下载完 `LLVM-17.0.6-win64.exe` 后，双击进行安装，选择合适的安装位置，最重要的是勾选 `Add Path to Current User` 添加环境变量。
			
 
				-        - 确认安装完成。
			
 
				-    2. 下载安装 Microsoft Visual C++ 可再发行程序包，解决潜在 .dll 丢失问题。
			
 
				-        - [MSVC++ 14.40.33810.0 下载](https://aka.ms/vs/17/release/vc_redist.x64.exe)
			
 
				-    3. 下载安装 Visual Studio 社区版以获取 MSVC++ 编译工具, 解决 LLVM 的头文件依赖问题。
			
 
				-        - [Visual Studio 下载](https://visualstudio.microsoft.com/zh-hans/downloads/)
			
 
				-        - 安装好Visual Studio Installer之后，下载Visual Studio Community 2022
			
 
				-        - 如下图点击`修改`按钮，找到`使用C++的桌面开发`项，勾选下载
			
 
				-    4. 下载安装 [CUDA Toolkit 12](https://developer.nvidia.com/cuda-12-1-0-download-archive?target_os=Windows&target_arch=x86_64)
			
 
				-4. 双击 `start.bat` 打开训练推理WebUI管理界面. 如有需要，可照下列提示修改`API_FLAGS`.
			
 
				-   
			
 
				+   - 可以通过编辑 `install_env.bat` 的 `USE_MIRROR` 项来决定是否使用镜像站下载。
			
 
				+   - `USE_MIRROR=false` 使用原始站下载最新稳定版 `torch` 环境。`USE_MIRROR=true` 为从镜像站下载最新 `torch` 环境。默认为 `true`。
			
 
				+   - 可以通过编辑 `install_env.bat` 的 `INSTALL_TYPE` 项来决定是否启用可编译环境下载。
			
 
				+   - `INSTALL_TYPE=preview` 下载开发版编译环境。`INSTALL_TYPE=stable` 下载稳定版不带编译环境。
			
 
				+3. 若第 2 步 `INSTALL_TYPE=preview` 则执行这一步（可跳过，此步为激活编译模型环境）
			
 
				+   1. 使用如下链接下载 LLVM 编译器。
			
 
				+      - [LLVM-17.0.6（原站站点下载）](https://huggingface.co/fishaudio/fish-speech-1/resolve/main/LLVM-17.0.6-win64.exe?download=true)
			
 
				+      - [LLVM-17.0.6（镜像站点下载）](https://hf-mirror.com/fishaudio/fish-speech-1/resolve/main/LLVM-17.0.6-win64.exe?download=true)
			
 
				+      - 下载完 `LLVM-17.0.6-win64.exe` 后，双击进行安装，选择合适的安装位置，最重要的是勾选 `Add Path to Current User` 添加环境变量。
			
 
				+      - 确认安装完成。
			
 
				+   2. 下载安装 Microsoft Visual C++ 可再发行程序包，解决潜在 .dll 丢失问题。
			
 
				+      - [MSVC++ 14.40.33810.0 下载](https://aka.ms/vs/17/release/vc_redist.x64.exe)
			
 
				+   3. 下载安装 Visual Studio 社区版以获取 MSVC++ 编译工具, 解决 LLVM 的头文件依赖问题。
			
 
				+      - [Visual Studio 下载](https://visualstudio.microsoft.com/zh-hans/downloads/)
			
 
				+      - 安装好 Visual Studio Installer 之后，下载 Visual Studio Community 2022
			
 
				+      - 如下图点击`修改`按钮，找到`使用C++的桌面开发`项，勾选下载
			
 
				+   4. 下载安装 [CUDA Toolkit 12](https://developer.nvidia.com/cuda-12-1-0-download-archive?target_os=Windows&target_arch=x86_64)
			
 
				+4. 双击 `start.bat` 打开训练推理 WebUI 管理界面. 如有需要，可照下列提示修改`API_FLAGS`.
			
 
				+
			
 
				 !!! info "可选"
			
 
				 
			
 
				     想启动 推理 WebUI 界面？编辑项目根目录下的 `API_FLAGS.txt`, 前三行修改成如下格式:
			
@@ -74,9 +73,8 @@ Windows 非专业用户可考虑以下为免 Linux 环境的基础运行方法
 
				     ```
			
 
				 
			
 
				 !!! info "可选"
			
 
				-    
			
 
				-    双击 `run_cmd.bat` 进入本项目的 conda/python 命令行环境
			
 
				 
			
 
				+    双击 `run_cmd.bat` 进入本项目的 conda/python 命令行环境
			
 
				 
			
 
				 ## Linux 配置
			
 
				 
			
@@ -89,18 +87,21 @@ conda activate fish-speech
 
				 pip3 install torch torchvision torchaudio
			
 
				 
			
 
				 # 安装 fish-speech
			
 
				-pip3 install -e .
			
 
				+pip3 install -e .[stable]
			
 
				 
			
 
				 # (Ubuntu / Debian 用户) 安装 sox
			
 
				 apt install libsox-dev
			
 
				 ```
			
 
				+
			
 
				 ## Docker 配置
			
 
				+
			
 
				 1. 安装 NVIDIA Container Toolkit：
			
 
				 
			
 
				     Docker 如果想使用 GPU 进行模型训练和推理，需要安装 NVIDIA Container Toolkit ：
			
 
				 
			
 
				     对于 Ubuntu 用户：
			
 
				-    ``` bash
			
 
				+
			
 
				+    ```bash
			
 
				     # 添加远程仓库
			
 
				     curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
			
 
				         && curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
			
@@ -111,14 +112,15 @@ apt install libsox-dev
 
				     sudo apt-get install -y nvidia-container-toolkit
			
 
				     # 重启 Docker 服务
			
 
				     sudo systemctl restart docker
			
 
				-    ``` 
			
 
				+    ```
			
 
				+
			
 
				     对于使用其他 Linux 发行版的用户，安装指南请参考：[NVIDIA Container Toolkit Install-guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)。
			
 
				-   
			
 
				+
			
 
				     注：对于中国大陆的用户，您可能需要使用代理来完成相关工具的安装。
			
 
				 
			
 
				-2. 拉取并运行 fish-speech 镜像 
			
 
				-   
			
 
				-    ``` shell
			
 
				+2. 拉取并运行 fish-speech 镜像
			
 
				+
			
 
				+    ```shell
			
 
				     # 拉取镜像
			
 
				     docker pull lengyue233/fish-speech
			
 
				     # 运行镜像
			
@@ -132,9 +134,9 @@ apt install libsox-dev
 
				     ```
			
 
				 
			
 
				 3. 下载模型依赖
			
 
				-   
			
 
				+
			
 
				     确保您在 docker 容器内的终端，然后再从我们的 huggingface 仓库下载所需的 `vqgan` 和 `llama` 模型。
			
 
				-   
			
 
				+
			
 
				     ```bash
			
 
				     huggingface-cli download fishaudio/fish-speech-1.2-sft --local-dir checkpoints/fish-speech-1.2-sft
			
 
				     ```
			
@@ -149,11 +151,10 @@ apt install libsox-dev
 
				 
			
 
				     在 docker 容器内的终端，输入 `export GRADIO_SERVER_NAME="0.0.0.0"` ，从而让外部可以访问 docker 内的 gradio 服务。
			
 
				     接着在 docker 容器内的终端，输入 `python tools/webui.py` 即可开启 WebUI 服务。
			
 
				-    
			
 
				+
			
 
				     如果是 WSL 或者是 MacOS ，访问 [http://localhost:7860](http://localhost:7860) 即可打开 WebUI 界面。
			
 
				-    
			
 
				-    如果是部署在服务器上，更换 localhost 为您的服务器 ip 即可。
			
 
				 
			
 
				+    如果是部署在服务器上，更换 localhost 为您的服务器 ip 即可。
			
 
				 
			
 
				 ## 更新日志
			
 
				 
			
--- a/install_env.bat
+++ b/install_env.bat
@@ -125,23 +125,24 @@ if errorlevel 1 (
 
				     echo successfully create env.
			
 
				 )
			
 
				 
			
 
				+set "packages=torch torchvision torchaudio fish-speech"
			
 
				 
			
 
				-set "packages=torch torchvision torchaudio openai-whisper fish-speech"
			
 
				-
			
 
				-if "!INSTALL_TYPE!" == "preview" (
			
 
				+if "%INSTALL_TYPE%"=="preview" (
			
 
				     set "packages=!packages! triton_windows"
			
 
				 )
			
 
				 
			
 
				 set "HF_ENDPOINT=https://huggingface.co"
			
 
				 set "no_proxy="
			
 
				-if "!USE_MIRROR!" == "true" (
			
 
				+if "%USE_MIRROR%"=="true" (
			
 
				     set "HF_ENDPOINT=https://hf-mirror.com"
			
 
				-    set "no_proxy=localhost, 127.0.0.1, 0.0.0.0"
			
 
				+    set "no_proxy=localhost,127.0.0.1,0.0.0.0"
			
 
				 )
			
 
				+
			
 
				 echo "HF_ENDPOINT: !HF_ENDPOINT!"
			
 
				 echo "NO_PROXY: !no_proxy!"
			
 
				 
			
 
				 set "install_packages="
			
 
				+
			
 
				 for %%p in (%packages%) do (
			
 
				     %PIP_CMD% show %%p >nul 2>&1
			
 
				     if errorlevel 1 (
			
@@ -149,143 +150,112 @@ for %%p in (%packages%) do (
 
				     )
			
 
				 )
			
 
				 
			
 
				-if not "!install_packages!"=="" (
			
 
				+if not "%install_packages%"=="" (
			
 
				     echo.
			
 
				-    echo Installing: !install_packages!
			
 
				-    for %%p in (!install_packages!) do (
			
 
				-        if "!INSTALL_TYPE!"=="preview" (
			
 
				-            if "%%p"=="torch" (
			
 
				-                set "WHEEL_FILE=torch-2.4.0.dev20240427+cu121-cp310-cp310-win_amd64.whl"
			
 
				-                set "URL=!HF_ENDPOINT!/datasets/SpicyqSama007/windows_compile/resolve/main/torch-2.4.0.dev20240427_cu121-cp310-cp310-win_amd64.whl?download=true"
			
 
				-                set "CHKSUM=b091308f4cb74e63d0323afd67c92f2279d9e488d8cbf467bcc7b939bcd74e0b"
			
 
				-                :TORCH_DOWNLOAD
			
 
				-                echo "%CD%\!WHEEL_FILE!"
			
 
				-                if not exist "%CD%\!WHEEL_FILE!" (
			
 
				-                    call curl -Lk "!URL!" --output "!WHEEL_FILE!"
			
 
				-                )
			
 
				-                for /f "delims=" %%I in ('certutil -hashfile "!WHEEL_FILE!" SHA256 ^| find /i "!CHKSUM!"') do (
			
 
				-                    set "FILE_VALID=true"
			
 
				-                )
			
 
				-                if not defined FILE_VALID (
			
 
				-                    echo File checksum does not match, re-downloading...
			
 
				-                    del "!WHEEL_FILE!"
			
 
				-                    goto TORCH_DOWNLOAD
			
 
				-                )
			
 
				-                echo "OK for !WHEEL_FILE!"
			
 
				-                %PIP_CMD% install "%CD%\!WHEEL_FILE!" --no-warn-script-location
			
 
				-                del "!WHEEL_FILE!"
			
 
				-            ) else if "%%p"=="torchvision" (
			
 
				-                set "WHEEL_FILE=torchvision-0.19.0.dev20240428+cu121-cp310-cp310-win_amd64.whl"
			
 
				-                set "URL=!HF_ENDPOINT!/datasets/SpicyqSama007/windows_compile/resolve/main/torchvision-0.19.0.dev20240428_cu121-cp310-cp310-win_amd64.whl?download=true"
			
 
				-                set "CHKSUM=7e46d0a89534013f001563d15e80f9eb431089571720c51f2cc595feeb01d785"
			
 
				-                :TORCHVISION_DOWNLOAD
			
 
				-                if not exist "!WHEEL_FILE!" (
			
 
				-                    call curl -Lk "!URL!" --output "!WHEEL_FILE!"
			
 
				-                )
			
 
				-                for /f "delims=" %%I in ('certutil -hashfile "!WHEEL_FILE!" SHA256 ^| find /i "!CHKSUM!"') do (
			
 
				-                    set "FILE_VALID=true"
			
 
				-                )
			
 
				-                if not defined FILE_VALID (
			
 
				-                    echo File checksum does not match, re-downloading...
			
 
				-                    del "!WHEEL_FILE!"
			
 
				-                    goto TORCHVISION_DOWNLOAD
			
 
				-                )
			
 
				-                echo "OK for !WHEEL_FILE!"
			
 
				-                %PIP_CMD% install "%CD%\!WHEEL_FILE!" --no-warn-script-location
			
 
				-                del "!WHEEL_FILE!"
			
 
				-            ) else if "%%p"=="torchaudio" (
			
 
				-                set "WHEEL_FILE=torchaudio-2.2.0.dev20240427+cu121-cp310-cp310-win_amd64.whl"
			
 
				-                set "URL=!HF_ENDPOINT!/datasets/SpicyqSama007/windows_compile/resolve/main/torchaudio-2.2.0.dev20240427_cu121-cp310-cp310-win_amd64.whl?download=true"
			
 
				-                set "CHKSUM=abafb4bc82cbc6f58f18e1b95191bc1884c28e404781082db2eb540b4fae8a5d"
			
 
				-                :TORCHAUDIO_DOWNLOAD
			
 
				-                if not exist "!WHEEL_FILE!" (
			
 
				-                    call curl -Lk "!URL!" --output "!WHEEL_FILE!"
			
 
				-                )
			
 
				-                for /f "delims=" %%I in ('certutil -hashfile "!WHEEL_FILE!" SHA256 ^| find /i "!CHKSUM!"') do (
			
 
				-                    set "FILE_VALID=true"
			
 
				-                )
			
 
				-                if not defined FILE_VALID (
			
 
				-                    echo File checksum does not match, re-downloading...
			
 
				-                    del "!WHEEL_FILE!"
			
 
				-                    goto TORCHAUDIO_DOWNLOAD
			
 
				-                )
			
 
				-                echo "OK for !WHEEL_FILE!"
			
 
				-                %PIP_CMD% install "%CD%\!WHEEL_FILE!" --no-warn-script-location
			
 
				-                del "!WHEEL_FILE!"
			
 
				-            ) else if "%%p"=="openai-whisper" (
			
 
				-                %PIP_CMD% install openai-whisper --no-warn-script-location
			
 
				-            ) else if "%%p"=="fish-speech" (
			
 
				-                %PIP_CMD% install -e .
			
 
				-            ) else if "%%p"=="triton_windows" (
			
 
				-                set "WHEEL_FILE=triton_windows-0.1.0-py3-none-any.whl"
			
 
				-                set "URL=!HF_ENDPOINT!/datasets/SpicyqSama007/windows_compile/resolve/main/triton_windows-0.1.0-py3-none-any.whl?download=true"
			
 
				-                set "CHKSUM=2cc998638180f37cf5025ab65e48c7f629aa5a369176cfa32177d2bd9aa26a0a"
			
 
				-                :TRITON_DOWNLOAD
			
 
				-                if not exist "!WHEEL_FILE!" (
			
 
				-                    call curl -Lk "!URL!" --output "!WHEEL_FILE!"
			
 
				-                )
			
 
				-                for /f "delims=" %%I in ('certutil -hashfile "!WHEEL_FILE!" SHA256 ^| find /i "!CHKSUM!"') do (
			
 
				-                    set "FILE_VALID=true"
			
 
				-                )
			
 
				-                if not defined FILE_VALID (
			
 
				-                    echo File checksum does not match, re-downloading...
			
 
				-                    del "!WHEEL_FILE!"
			
 
				-                    goto TRITON_DOWNLOAD
			
 
				-                )
			
 
				-                echo "OK for !WHEEL_FILE!"
			
 
				-                %PIP_CMD% install "%CD%\!WHEEL_FILE!" --no-warn-script-location
			
 
				-                del "!WHEEL_FILE!"
			
 
				-            )
			
 
				-            
			
 
				+    echo Installing: %install_packages%
			
 
				+    
			
 
				+    for %%p in (%install_packages%) do (
			
 
				+        if "%INSTALL_TYPE%"=="preview" (
			
 
				+            call :install_preview %%p
			
 
				+        ) else (
			
 
				+            call :install_stable %%p
			
 
				         )
			
 
				     )
			
 
				 )
			
 
				 
			
 
				-set "install_packages="
			
 
				-for %%p in (%packages%) do (
			
 
				-    %PIP_CMD% show %%p >nul 2>&1
			
 
				-    if errorlevel 1 (
			
 
				-        set "install_packages=!install_packages! %%p"
			
 
				-    )
			
 
				+endlocal
			
 
				+echo "Environment Check: Success."
			
 
				+pause
			
 
				+
			
 
				+goto :EOF
			
 
				+
			
 
				+:install_preview
			
 
				+setlocal
			
 
				+
			
 
				+if "%1"=="torch" (
			
 
				+    call :download_and_install "torch-2.4.0.dev20240427+cu121-cp310-cp310-win_amd64.whl" ^
			
 
				+        "%HF_ENDPOINT%/datasets/SpicyqSama007/windows_compile/resolve/main/torch-2.4.0.dev20240427_cu121-cp310-cp310-win_amd64.whl?download=true" ^
			
 
				+        "b091308f4cb74e63d0323afd67c92f2279d9e488d8cbf467bcc7b939bcd74e0b"
			
 
				+
			
 
				+) else if "%1"=="torchvision" (
			
 
				+    call :download_and_install "torchvision-0.19.0.dev20240428+cu121-cp310-cp310-win_amd64.whl" ^
			
 
				+        "%HF_ENDPOINT%/datasets/SpicyqSama007/windows_compile/resolve/main/torchvision-0.19.0.dev20240428_cu121-cp310-cp310-win_amd64.whl?download=true" ^
			
 
				+        "7e46d0a89534013f001563d15e80f9eb431089571720c51f2cc595feeb01d785"
			
 
				+
			
 
				+) else if "%1"=="torchaudio" (
			
 
				+    call :download_and_install "torchaudio-2.2.0.dev20240427+cu121-cp310-cp310-win_amd64.whl" ^
			
 
				+        "%HF_ENDPOINT%/datasets/SpicyqSama007/windows_compile/resolve/main/torchaudio-2.2.0.dev20240427_cu121-cp310-cp310-win_amd64.whl?download=true" ^
			
 
				+        "abafb4bc82cbc6f58f18e1b95191bc1884c28e404781082db2eb540b4fae8a5d"
			
 
				+
			
 
				+) else if "%1"=="fish-speech" (
			
 
				+    %PIP_CMD% install -e . --upgrade-strategy only-if-needed
			
 
				+
			
 
				+) else if "%1"=="triton_windows" (
			
 
				+    call :download_and_install "triton_windows-0.1.0-py3-none-any.whl" ^
			
 
				+        "%HF_ENDPOINT%/datasets/SpicyqSama007/windows_compile/resolve/main/triton_windows-0.1.0-py3-none-any.whl?download=true" ^
			
 
				+        "2cc998638180f37cf5025ab65e48c7f629aa5a369176cfa32177d2bd9aa26a0a"
			
 
				 )
			
 
				 
			
 
				-if not "!install_packages!"=="" (
			
 
				-    echo.
			
 
				-    echo Installing: !install_packages!
			
 
				-
			
 
				-    for %%p in (!install_packages!) do (
			
 
				-        if "!USE_MIRROR!"=="true" (
			
 
				-            if "%%p"=="torch" (
			
 
				-                %PIP_CMD% install torch --index-url https://mirror.sjtu.edu.cn/pytorch-wheels/cu121 --no-warn-script-location
			
 
				-            ) else if "%%p"=="torchvision" (
			
 
				-                %PIP_CMD% install torchvision --index-url https://mirror.sjtu.edu.cn/pytorch-wheels/cu121 --no-warn-script-location
			
 
				-            ) else if "%%p"=="torchaudio" (
			
 
				-                %PIP_CMD% install torchaudio --index-url https://mirror.sjtu.edu.cn/pytorch-wheels/cu121 --no-warn-script-location
			
 
				-            ) else if "%%p"=="openai-whisper" (
			
 
				-                %PIP_CMD% install -i https://pypi.tuna.tsinghua.edu.cn/simple openai-whisper --no-warn-script-location
			
 
				-            ) else if "%%p"=="fish-speech" (
			
 
				-                %PIP_CMD% install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple
			
 
				-            )
			
 
				-        ) 
			
 
				-
			
 
				-        if "!USE_MIRROR!"=="false" (
			
 
				-            if "%%p"=="torch" (
			
 
				-                %PIP_CMD% install torch --index-url https://download.pytorch.org/whl/cu121 --no-warn-script-location
			
 
				-            ) else if "%%p"=="torchvision" (
			
 
				-                %PIP_CMD% install torchvision --index-url https://download.pytorch.org/whl/cu121 --no-warn-script-location
			
 
				-            ) else if "%%p"=="torchaudio" (
			
 
				-                %PIP_CMD% install torchaudio --index-url https://download.pytorch.org/whl/cu121 --no-warn-script-location
			
 
				-            ) else if "%%p"=="openai-whisper" (
			
 
				-                %PIP_CMD% install openai-whisper --no-warn-script-location
			
 
				-            ) else if "%%p"=="fish-speech" (
			
 
				-                %PIP_CMD% install -e .
			
 
				-            )
			
 
				-        )
			
 
				-        
			
 
				+endlocal
			
 
				+goto :EOF
			
 
				+
			
 
				+:install_stable
			
 
				+if "%USE_MIRROR%"=="true" (
			
 
				+    if "%1"=="torch" (
			
 
				+        %PIP_CMD% install torch==2.3.1 --index-url https://mirror.sjtu.edu.cn/pytorch-wheels/cu121 --no-warn-script-location
			
 
				+
			
 
				+    ) else if "%1"=="torchvision" (
			
 
				+        %PIP_CMD% install torchvision==0.18.1 --index-url https://mirror.sjtu.edu.cn/pytorch-wheels/cu121 --no-warn-script-location
			
 
				+
			
 
				+    ) else if "%1"=="torchaudio" (
			
 
				+        %PIP_CMD% install torchaudio==2.3.1 --index-url https://mirror.sjtu.edu.cn/pytorch-wheels/cu121 --no-warn-script-location
			
 
				+
			
 
				+    ) else if "%1"=="fish-speech" (
			
 
				+        %PIP_CMD% install -e . -i https://pypi.tuna.tsinghua.edu.cn/simple
			
 
				+    )
			
 
				+
			
 
				+) else (
			
 
				+    if "%1"=="torch" (
			
 
				+        %PIP_CMD% install torch==2.3.1 --index-url https://download.pytorch.org/whl/cu121 --no-warn-script-location
			
 
				+
			
 
				+    ) else if "%1"=="torchvision" (
			
 
				+        %PIP_CMD% install torchvision==0.18.1 --index-url https://download.pytorch.org/whl/cu121 --no-warn-script-location
			
 
				+
			
 
				+    ) else if "%1"=="torchaudio" (
			
 
				+        %PIP_CMD% install torchaudio==2.3.1 --index-url https://download.pytorch.org/whl/cu121 --no-warn-script-location
			
 
				+
			
 
				+    ) else if "%1"=="fish-speech" (
			
 
				+        %PIP_CMD% install -e .
			
 
				     )
			
 
				 )
			
 
				-echo Environment Check: Success.
			
 
				+
			
 
				+goto :EOF
			
 
				+
			
 
				+:download_and_install
			
 
				+setlocal
			
 
				+
			
 
				+set "WHEEL_FILE=%1"
			
 
				+set "URL=%2"
			
 
				+set "CHKSUM=%3"
			
 
				+
			
 
				+:DOWNLOAD
			
 
				+if not exist "%WHEEL_FILE%" (
			
 
				+    call curl -Lk "%URL%" --output "%WHEEL_FILE%"
			
 
				+)
			
 
				+
			
 
				+for /f "delims=" %%I in ("certutil -hashfile %WHEEL_FILE% SHA256 ^| find /i %CHKSUM%") do (
			
 
				+    set "FILE_VALID=true"
			
 
				+)
			
 
				+
			
 
				+if not defined FILE_VALID (
			
 
				+    echo File checksum does not match, re-downloading...
			
 
				+    del "%WHEEL_FILE%"
			
 
				+    goto DOWNLOAD
			
 
				+)
			
 
				+
			
 
				+echo "OK for %WHEEL_FILE%"
			
 
				+%PIP_CMD% install "%WHEEL_FILE%" --no-warn-script-location
			
 
				+del "%WHEEL_FILE%"
			
 
				 
			
 
				 endlocal
			
 
				-:end
			
 
				-pause
			
 
				+goto :EOF
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,6 @@ dependencies = [
 
				     "loralib>=0.1.2",
			
 
				     "natsort>=8.4.0",
			
 
				     "pyrootutils>=1.0.4",
			
 
				-    "torch==2.3.1",
			
 
				     "vector_quantize_pytorch>=1.14.24",
			
 
				     "resampy>=0.4.3",
			
 
				     "einx[torch]==0.2.2",
			
@@ -42,13 +41,12 @@ dependencies = [
 
				     "modelscope==1.16.1",
			
 
				     "funasr==1.1.2",
			
 
				     "opencc-python-reimplemented==0.1.7",
			
 
				-    "torchaudio",
			
 
				 ]
			
 
				 
			
 
				 [project.optional-dependencies]
			
 
				-asr = [
			
 
				-    "openai-whisper",
			
 
				-    "modelscope"
			
 
				+stable = [
			
 
				+    "torch==2.3.1",
			
 
				+    "torchaudio",
			
 
				 ]
			
 
				 
			
 
				 [build-system]