Sfoglia il codice sorgente

Docker overhaul. (#1100)

* Changed deprecated `huggingface-cli` command to `hf`.

* Updated version of pyproject.toml and dockerfiles for GPU support.

* Updated uv.lock to the latest versions.

* Enhanced Dockerfile
- added multi-targets for web UI or server
- added multi-backend support
- improved dependency installation with caching
- added non-root user
- implemented health checks for web UI and API server
- added dynamic entrypoint creation
- added environment validation.

* Added docker compose files.
Extended .dockerignore file.

* Updated inference and install docs for docker and uv.
Fixed warnings in pyproject.toml

* Removed old docker files.

* Pre-commit fixes and uv.lock update.

* Updated default CUDA version to 12.6.0 and backend to cuda.
Valentin Schröter 6 mesi fa
parent
commit
cccad3e098
16 ha cambiato i file con 1314 aggiunte e 482 eliminazioni
  1. 165 6
      .dockerignore
  2. 23 0
      compose.base.yml
  3. 26 0
      compose.yml
  4. 0 18
      docker-compose.dev.yml
  5. 398 0
      docker/Dockerfile
  6. 0 50
      dockerfile
  7. 1 1
      docs/ar/inference.md
  8. 61 1
      docs/en/inference.md
  9. 15 3
      docs/en/install.md
  10. 1 1
      docs/ja/inference.md
  11. 1 1
      docs/ko/inference.md
  12. 1 1
      docs/pt/inference.md
  13. 1 1
      docs/zh/inference.md
  14. 1 1
      inference.ipynb
  15. 65 1
      pyproject.toml
  16. 555 397
      uv.lock

+ 165 - 6
.dockerignore

@@ -1,7 +1,166 @@
+# .dockerignore
+
+# Git and version control
 .git
-.github
-results
-data
-*.filelist
-/data_server/target
-checkpoints
+.gitignore
+.gitattributes
+.gitmodules
+
+# Documentation
+*.md
+docs/
+!README*
+LICENSE*
+CHANGELOG*
+
+# IDE and editor files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+Thumbs.db
+
+# Python cache and build artifacts
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# Virtual environments
+venv/
+env/
+ENV/
+.venv/
+.env/
+
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.nox/
+coverage.xml
+*.cover
+.hypothesis/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+*.ipynb
+
+# Logs
+*.log
+logs/
+
+# Temporary files
+tmp/
+temp/
+*.tmp
+*.temp
+
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Docker files (except the one being used)
+docker/
+Dockerfile*
+docker-compose*.yml
+.dockerignore
+
+# Checkpoints and models (should be mounted)
+checkpoints/
+models/
+*.pth
+*.ckpt
+*.safetensors
+*.bin
+
+# Reference voices (should be mounted)
+references/
+
+# Generated audio files
+*.wav
+*.mp3
+*.flac
+*.ogg
+generated_audio.wav
+fake.wav
+fake.npy
+
+# Cache directories
+.cache/
+cache/
+.uv_cache/
+
+# Development files
+.env
+.env.local
+.env.development
+.env.test
+.env.production
+
+# Test files
+test_*.py
+*_test.py
+tests/
+
+# CI/CD
+.github/
+.gitlab-ci.yml
+.travis.yml
+.circleci/
+azure-pipelines.yml
+
+# Monitoring and profiling
+.prof
+*.prof
+
+# Backup files
+*.bak
+*.backup
+*.old
+
+# Large data files
+*.csv
+*.json
+*.jsonl
+*.parquet
+*.h5
+*.hdf5
+
+# Audio processing temporary files
+*.tmp.wav
+*.temp.wav
+
+# OLD:
+# .github
+# results
+# data
+# *.filelist
+# /data_server/target
+# checkpoints
+# .venv

+ 23 - 0
compose.base.yml

@@ -0,0 +1,23 @@
+services:
+  app-base:
+    build:
+      context: .
+      dockerfile: docker/Dockerfile
+      args:
+        BACKEND: ${BACKEND:-cuda}     # or cpu
+        UV_VERSION: ${UV_VERSION:-0.8.15}
+    volumes:
+      - ./checkpoints:/app/checkpoints
+      - ./references:/app/references
+    environment:
+      COMPILE: ${COMPILE:-0}
+    # GPU (remove this block if CPU-only):
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    tty: true
+    stdin_open: true

+ 26 - 0
compose.yml

@@ -0,0 +1,26 @@
+name: fish-speech
+
+services:
+  webui:
+    extends:
+      file: compose.base.yml
+      service: app-base
+    build:
+      target: webui
+    environment:
+      COMPILE: ${COMPILE:-0}
+    profiles: ["webui"]
+    ports:
+      - "${GRADIO_PORT:-7860}:7860"
+
+  server:
+    extends:
+      file: compose.base.yml
+      service: app-base
+    build:
+      target: server
+    environment:
+      COMPILE: ${COMPILE:-0}
+    profiles: ["server"]
+    ports:
+      - "${API_PORT:-8080}:8080"

+ 0 - 18
docker-compose.dev.yml

@@ -1,18 +0,0 @@
-version: '3.8'
-
-services:
-  fish-speech:
-    build:
-      context: .
-      dockerfile: dockerfile.dev
-    container_name: fish-speech
-    volumes:
-      - ./:/exp
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]
-    command: tail -f /dev/null

+ 398 - 0
docker/Dockerfile

@@ -0,0 +1,398 @@
+# docker/Dockerfile
+
+# IMPORTANT: The docker images do not contain the checkpoints. You need to mount the checkpoints to the container.
+
+# Build the image:
+#   docker build \
+#       --platform linux/amd64 \
+#       -f docker/Dockerfile \
+#       --build-arg BACKEND=[cuda, cpu] \
+#       --target [webui, server] \
+#       -t fish-speech-[webui, server]:[cuda, cpu] .
+
+# e.g. for building the webui:
+#   docker build \
+#       --platform linux/amd64 \
+#       -f docker/Dockerfile \
+#       --build-arg BACKEND=cuda \
+#       --target webui \
+#       -t fish-speech-webui:cuda .
+
+# e.g. for building the server:
+#   docker build \
+#       --platform linux/amd64 \
+#       -f docker/Dockerfile \
+#       --build-arg BACKEND=cuda \
+#       --target server \
+#       -t fish-speech-server:cuda .
+
+
+
+# Multi-platform build:
+#   docker buildx build \
+#       --platform linux/amd64,linux/arm64 \
+#       -f docker/Dockerfile \
+#       --build-arg BACKEND=cpu \
+#       --target webui \
+#       -t fish-speech-webui:cpu .
+
+
+# Running the image interactively:
+#   docker run \
+#       --gpus all \
+#       -v /path/to/fish-speech/checkpoints:/app/checkpoints \
+#       -e COMPILE=1 \ ... or -e COMPILE=0 \
+#       -it fish-speech-[webui, server]:[cuda, cpu]
+
+# E.g. running the webui:
+#   docker run \
+#       --gpus all \
+#       -v ./checkpoints:/app/checkpoints \
+#       -e COMPILE=1 \
+#       -p 7860:7860 \
+#       fish-speech-webui:cuda
+
+# E.g. running the server:
+#   docker run \
+#       --gpus all \
+#       -v ./checkpoints:/app/checkpoints \
+#       -p 8080:8080 \
+#       -it fish-speech-server:cuda
+
+
+# Select the specific cuda version (see https://hub.docker.com/r/nvidia/cuda/)
+ARG CUDA_VER=12.6.0
+# Adapt the uv extra to fit the cuda version (one of [cu126, cu128, cu129])
+ARG UV_EXTRA=cu126
+ARG BACKEND=cuda
+
+ARG UBUNTU_VER=24.04
+ARG PY_VER=3.12
+ARG UV_VERSION=0.8.15
+
+# Create non-root user early for security
+ARG USERNAME=fish
+ARG USER_UID=1000
+ARG USER_GID=1000
+
+##############################################################
+# Base stage per backend
+##############################################################
+
+# --- CUDA (x86_64) ---
+FROM nvidia/cuda:${CUDA_VER}-cudnn-runtime-ubuntu${UBUNTU_VER} AS base-cuda
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install system dependencies in a single layer with cleanup
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    set -eux \
+    && rm -f /etc/apt/apt.conf.d/docker-clean \
+    && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' >/etc/apt/apt.conf.d/keep-cache \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends \
+        python3-pip \
+        python3-dev \
+        git \
+        ca-certificates \
+        curl \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# --- CPU-only (portable x86_64) ---
+FROM python:${PY_VER}-slim AS base-cpu
+ENV UV_EXTRA=cpu
+
+# Install system dependencies in a single layer with cleanup
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    set -eux \
+    && rm -f /etc/apt/apt.conf.d/docker-clean \
+    && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' >/etc/apt/apt.conf.d/keep-cache \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends \
+        git \
+        ca-certificates \
+        curl \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+
+##############################################################
+# UV stage
+##############################################################
+
+ARG UV_VERSION
+FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv-bin
+
+##############################################################
+# Shared app base stage
+##############################################################
+
+FROM base-${BACKEND} AS app-base
+
+ARG PY_VER
+ARG BACKEND
+ARG USERNAME
+ARG USER_UID
+ARG USER_GID
+ARG UV_VERSION
+ARG UV_EXTRA
+
+ENV BACKEND=${BACKEND} \
+    DEBIAN_FRONTEND=noninteractive \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+
+# System dependencies for audio processing
+ARG DEPENDENCIES=" \
+    libsox-dev \
+    build-essential \
+    cmake \
+    libasound-dev \
+    portaudio19-dev \
+    libportaudio2 \
+    libportaudiocpp0 \
+    ffmpeg"
+
+# Install system dependencies with caching and cleanup
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    set -eux \
+    && rm -f /etc/apt/apt.conf.d/docker-clean \
+    && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' >/etc/apt/apt.conf.d/keep-cache \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends ${DEPENDENCIES} \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install specific uv version
+COPY --from=uv-bin /uv /uvx /bin/
+
+# RUN groupadd --gid ${USER_GID} ${USERNAME} \
+#     && useradd --uid ${USER_UID} --gid ${USER_GID} -m ${USERNAME} \
+#     && mkdir -p /app /home/${USERNAME}/.cache \
+#     && chown -R ${USERNAME}:${USERNAME} /app /home/${USERNAME}/.cache
+
+# Create non-root user (or use existing user)
+RUN set -eux; \
+    if getent group ${USER_GID} >/dev/null 2>&1; then \
+        echo "Group ${USER_GID} already exists"; \
+    else \
+        groupadd -g ${USER_GID} ${USERNAME}; \
+    fi; \
+    if id -u ${USER_UID} >/dev/null 2>&1; then \
+        echo "User ${USER_UID} already exists, using existing user"; \
+        EXISTING_USER=$(id -un ${USER_UID}); \
+        mkdir -p /app /home/${EXISTING_USER}/.cache; \
+        chown -R ${USER_UID}:${USER_GID} /app /home/${EXISTING_USER}/.cache; \
+    else \
+        useradd -m -u ${USER_UID} -g ${USER_GID} ${USERNAME}; \
+        mkdir -p /app /home/${USERNAME}/.cache; \
+        chown -R ${USERNAME}:${USERNAME} /app /home/${USERNAME}/.cache; \
+    fi
+
+# Create references directory with proper permissions for the non-root user
+RUN mkdir -p /app/references \
+    && chown -R ${USER_UID}:${USER_GID} /app/references \
+    && chmod 755 /app/references
+
+# Set working directory
+WORKDIR /app
+
+# Copy dependency files first for better caching
+COPY --chown=${USER_UID}:${USER_GID} pyproject.toml uv.lock README.md ./
+
+# Switch to non-root user for package installation
+USER ${USER_UID}:${USER_GID}
+
+# Install Python dependencies (cacheable by lockfiles)
+# Use a generic cache path that works regardless of username
+RUN --mount=type=cache,target=/tmp/uv-cache,uid=${USER_UID},gid=${USER_GID} \
+    uv python pin ${PY_VER} \
+    && uv sync --extra ${UV_EXTRA} --frozen --no-install-project
+
+# Copy application code
+COPY --chown=${USER_UID}:${USER_GID} . .
+
+# Install the local package after copying source code
+RUN uv sync --extra ${UV_EXTRA} --frozen
+
+# Create common entrypoint script
+RUN printf '%s\n' \
+    '#!/bin/bash' \
+    'set -euo pipefail' \
+    '' \
+    '# Set user info from build args' \
+    'USER_UID='${USER_UID} \
+    'USER_GID='${USER_GID} \
+    '' \
+    '# Logging function' \
+    'log() { echo "[$(date +"%Y-%m-%d %H:%M:%S")] $*" >&2; }' \
+    '' \
+    '# Validate environment' \
+    'validate_env() {' \
+    '    if [ ! -d "/app/checkpoints" ]; then' \
+    '        log "WARNING: /app/checkpoints directory not found. Please mount your checkpoints."' \
+    '    fi' \
+    '    if [ ! -d "/app/references" ]; then' \
+    '        log "WARNING: /app/references directory not found. Please mount your references."' \
+    '    else' \
+    '        # Check if we can write to references directory' \
+    '        if [ ! -w "/app/references" ]; then' \
+    '            log "ERROR: Cannot write to /app/references directory. Please ensure the mounted directory has proper permissions for user with UID ${USER_UID}."' \
+    '            log "You can fix this by running: sudo chown -R ${USER_UID}:${USER_GID} /path/to/your/references"' \
+    '            exit 1' \
+    '        fi' \
+    '    fi' \
+    '}' \
+    '' \
+    '# Build device arguments' \
+    'build_device_args() {' \
+    '    if [ "${BACKEND:-}" = "cpu" ]; then' \
+    '        echo "--device cpu"' \
+    '    fi' \
+    '}' \
+    '' \
+    '# Build compile arguments' \
+    'build_compile_args() {' \
+    '    if [ "${1:-}" = "compile" ] || [ "${COMPILE:-}" = "1" ] || [ "${COMPILE:-}" = "true" ]; then' \
+    '        echo "--compile"' \
+    '        shift' \
+    '    fi' \
+    '    echo "$@"' \
+    '}' \
+    '' \
+    '# Health check function' \
+    'health_check() {' \
+    '    local port=${1:-7860}' \
+    '    local endpoint=${2:-/health}' \
+    '    curl -f http://localhost:${port}${endpoint} 2>/dev/null || exit 1' \
+    '}' \
+    > /app/common.sh && chmod +x /app/common.sh
+
+##############################################################
+# App stages
+##############################################################
+
+# Gradio WebUI
+FROM app-base AS webui
+ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1
+
+ARG GRADIO_SERVER_NAME="0.0.0.0"
+ARG GRADIO_SERVER_PORT=7860
+ARG LLAMA_CHECKPOINT_PATH="checkpoints/openaudio-s1-mini"
+ARG DECODER_CHECKPOINT_PATH="checkpoints/openaudio-s1-mini/codec.pth"
+ARG DECODER_CONFIG_NAME="modded_dac_vq"
+
+
+# Expose port
+EXPOSE ${GRADIO_SERVER_PORT}
+
+# Set environment variables
+ENV GRADIO_SERVER_NAME=${GRADIO_SERVER_NAME}
+ENV GRADIO_SERVER_PORT=${GRADIO_SERVER_PORT}
+ENV LLAMA_CHECKPOINT_PATH=${LLAMA_CHECKPOINT_PATH}
+ENV DECODER_CHECKPOINT_PATH=${DECODER_CHECKPOINT_PATH}
+ENV DECODER_CONFIG_NAME=${DECODER_CONFIG_NAME}
+
+# Create webui entrypoint
+RUN printf '%s\n' \
+    '#!/bin/bash' \
+    'source /app/common.sh' \
+    '' \
+    'log "Starting Fish Speech WebUI..."' \
+    'validate_env' \
+    '' \
+    'DEVICE_ARGS=$(build_device_args)' \
+    'COMPILE_ARGS=$(build_compile_args "$@")' \
+    '' \
+    'log "Device args: ${DEVICE_ARGS:-none}"' \
+    'log "Compile args: ${COMPILE_ARGS}"' \
+    'log "Server: ${GRADIO_SERVER_NAME}:${GRADIO_SERVER_PORT}"' \
+    '' \
+    'exec uv run tools/run_webui.py \' \
+    '  --llama-checkpoint-path "${LLAMA_CHECKPOINT_PATH}" \' \
+    '  --decoder-checkpoint-path "${DECODER_CHECKPOINT_PATH}" \' \
+    '  --decoder-config-name "${DECODER_CONFIG_NAME}" \' \
+    '  ${DEVICE_ARGS} ${COMPILE_ARGS}' \
+    > /app/start_webui.sh && chmod +x /app/start_webui.sh
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:${GRADIO_SERVER_PORT}/health || exit 1
+
+ENTRYPOINT ["/app/start_webui.sh"]
+
+# API Server
+FROM app-base AS server
+ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1
+
+ARG API_SERVER_NAME="0.0.0.0"
+ARG API_SERVER_PORT=8080
+ARG LLAMA_CHECKPOINT_PATH="checkpoints/openaudio-s1-mini"
+ARG DECODER_CHECKPOINT_PATH="checkpoints/openaudio-s1-mini/codec.pth"
+ARG DECODER_CONFIG_NAME="modded_dac_vq"
+
+# Expose port
+EXPOSE ${API_SERVER_PORT}
+
+# Set environment variables
+ENV API_SERVER_NAME=${API_SERVER_NAME}
+ENV API_SERVER_PORT=${API_SERVER_PORT}
+ENV LLAMA_CHECKPOINT_PATH=${LLAMA_CHECKPOINT_PATH}
+ENV DECODER_CHECKPOINT_PATH=${DECODER_CHECKPOINT_PATH}
+ENV DECODER_CONFIG_NAME=${DECODER_CONFIG_NAME}
+
+# Create server entrypoint
+RUN printf '%s\n' \
+    '#!/bin/bash' \
+    'source /app/common.sh' \
+    '' \
+    'log "Starting Fish Speech API Server..."' \
+    'validate_env' \
+    '' \
+    'DEVICE_ARGS=$(build_device_args)' \
+    'COMPILE_ARGS=$(build_compile_args "$@")' \
+    '' \
+    'log "Device args: ${DEVICE_ARGS:-none}"' \
+    'log "Compile args: ${COMPILE_ARGS}"' \
+    'log "Server: ${API_SERVER_NAME}:${API_SERVER_PORT}"' \
+    '' \
+    'exec uv run tools/api_server.py \' \
+    '  --listen "${API_SERVER_NAME}:${API_SERVER_PORT}" \' \
+    '  --llama-checkpoint-path "${LLAMA_CHECKPOINT_PATH}" \' \
+    '  --decoder-checkpoint-path "${DECODER_CHECKPOINT_PATH}" \' \
+    '  --decoder-config-name "${DECODER_CONFIG_NAME}" \' \
+    '  ${DEVICE_ARGS} ${COMPILE_ARGS}' \
+    > /app/start_server.sh && chmod +x /app/start_server.sh
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:${API_SERVER_PORT}/v1/health || exit 1
+
+ENTRYPOINT ["/app/start_server.sh"]
+
+# Development stage
+FROM app-base AS dev
+USER root
+
+# Install development tools
+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
+    apt-get update \
+    && apt-get install -y --no-install-recommends \
+        vim \
+        htop \
+        strace \
+        gdb \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+USER ${USER_UID}:${USER_GID}
+
+# Install development dependencies
+RUN uv sync --extra ${UV_EXTRA} --dev
+
+# Default to bash for development
+ENTRYPOINT ["/bin/bash"]

+ 0 - 50
dockerfile

@@ -1,50 +0,0 @@
-FROM python:3.12-slim-bookworm AS stage-1
-ARG TARGETARCH
-
-ARG HUGGINGFACE_MODEL=fish-speech-1.5
-ARG HF_ENDPOINT=https://huggingface.co
-
-WORKDIR /opt/fish-speech
-
-RUN set -ex \
-    && pip install huggingface_hub \
-    && HF_ENDPOINT=${HF_ENDPOINT} huggingface-cli download --resume-download fishaudio/${HUGGINGFACE_MODEL} --local-dir checkpoints/${HUGGINGFACE_MODEL}
-
-FROM python:3.12-slim-bookworm
-ARG TARGETARCH
-
-ARG DEPENDENCIES="  \
-    ca-certificates \
-    libsox-dev \
-    build-essential \
-    cmake \
-    libasound-dev \
-    portaudio19-dev \
-    libportaudio2 \
-    libportaudiocpp0 \
-    ffmpeg"
-
-RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
-    --mount=type=cache,target=/var/lib/apt,sharing=locked \
-    set -ex \
-    && rm -f /etc/apt/apt.conf.d/docker-clean \
-    && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' >/etc/apt/apt.conf.d/keep-cache \
-    && apt-get update \
-    && apt-get -y install --no-install-recommends ${DEPENDENCIES} \
-    && echo "no" | dpkg-reconfigure dash
-
-WORKDIR /opt/fish-speech
-
-COPY . .
-
-RUN --mount=type=cache,target=/root/.cache,sharing=locked \
-    set -ex \
-    && pip install -e .[stable]
-
-COPY --from=stage-1 /opt/fish-speech/checkpoints /opt/fish-speech/checkpoints
-
-ENV GRADIO_SERVER_NAME="0.0.0.0"
-
-EXPOSE 7860
-
-CMD ["./entrypoint.sh"]

+ 1 - 1
docs/ar/inference.md

@@ -9,7 +9,7 @@
 أولاً تحتاج إلى تحميل أوزان النموذج:
 
 ```bash
-huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
+hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
 ```
 
 ## استنتاج سطر الأوامر

+ 61 - 1
docs/en/inference.md

@@ -9,7 +9,13 @@ We support command line, HTTP API and WebUI for inference, you can choose any me
 First you need to download the model weights:
 
 ```bash
-huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
+
+# Requires "huggingface_hub[cli]" to be installed
+# pip install huggingface_hub[cli]
+# or 
+# uv tool install huggingface_hub[cli]
+
+hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
 ```
 
 ## Command Line Inference
@@ -68,6 +74,13 @@ python -m tools.api_server \
     --llama-checkpoint-path "checkpoints/openaudio-s1-mini" \
     --decoder-checkpoint-path "checkpoints/openaudio-s1-mini/codec.pth" \
     --decoder-config-name modded_dac_vq
+
+# or with uv
+uv run tools/api_server.py \
+    --listen 0.0.0.0:8080 \
+    --llama-checkpoint-path "checkpoints/openaudio-s1-mini" \
+    --decoder-checkpoint-path "checkpoints/openaudio-s1-mini/codec.pth" \
+    --decoder-config-name modded_dac_vq
 ```
 
 > If you want to speed up inference, you can add the `--compile` parameter.
@@ -104,3 +117,50 @@ python -m tools.run_webui
     You can use Gradio environment variables, such as `GRADIO_SHARE`, `GRADIO_SERVER_PORT`, `GRADIO_SERVER_NAME` to configure WebUI.
 
 Enjoy!
+
+
+## Using Docker
+You can use docker to start the web ui or the server:
+
+### Using Docker Compose
+```bash
+# To start the server
+docker compose --profile server up
+# Or with compile
+COMPILE=1 docker compose --profile server up
+
+# To start the web ui
+docker compose --profile webui up
+# Or with compile
+COMPILE=1 docker compose --profile webui up
+```
+
+```bash
+# Select the target, either `webui` or `server`
+docker build \
+    --platform linux/amd64 \
+    -f docker/Dockerfile \
+    --build-arg BACKEND=cuda \
+    --target [webui, server] \
+    -t fish-speech-[webui, server]:cuda .
+
+# Starting the web ui
+docker run -d \
+    --name fish-speech-webui \
+    --gpus all \
+    -p 7860:7860 \
+    -v ./checkpoints:/app/checkpoints \
+    -v ./references:/app/references \
+    -e COMPILE=1 \
+    --rm fish-speech-webui:cuda
+
+# Starting the server
+docker run -d \
+    --name fish-speech-server \
+    --gpus all \
+    -p 8080:8080 \
+    -v ./checkpoints:/app/checkpoints \
+    -v ./references:/app/references \
+    -e COMPILE=1 \
+    --rm fish-speech-server:cuda
+```

+ 15 - 3
docs/en/install.md

@@ -3,7 +3,7 @@
 - GPU Memory: 12GB (Inference)
 - System: Linux, WSL
 
-## Setup
+## System Setup
 
 First you need install pyaudio and sox, which is used for audio processing.
 
@@ -17,14 +17,21 @@ apt install portaudio19-dev libsox-dev ffmpeg
 conda create -n fish-speech python=3.12
 conda activate fish-speech
 
+# Select the correct cuda version for your system from [cu126, cu128, cu129]
+pip install -e .[cu129]
+# Or for cpu only
+pip install -e .[cpu]
+# You can also omit the extra if you want to use the default torch index
 pip install -e .
 ```
 
 ### UV
 
 ```bash
-
-uv sync --python 3.12
+# Select the correct cuda version for your system from [cu126, cu128, cu129]
+uv sync --python 3.12 --extra cu129
+# Or for cpu only
+uv sync --python 3.12 --extra cpu
 ```
 ### Intel Arc XPU support
 
@@ -41,3 +48,8 @@ pip install -e .
 
 !!! warning
     The `compile` option is not supported on windows and macOS, if you want to run with compile, you need to install trition by yourself.
+
+
+## Docker Setup
+
+See [inference](./inference.md) to use docker for the webui or the API server.

+ 1 - 1
docs/ja/inference.md

@@ -9,7 +9,7 @@
 まず、モデルの重みをダウンロードする必要があります:
 
 ```bash
-huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
+hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
 ```
 
 ## コマンドライン推論

+ 1 - 1
docs/ko/inference.md

@@ -9,7 +9,7 @@
 먼저 모델 가중치를 다운로드해야 합니다:
 
 ```bash
-huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
+hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
 ```
 
 ## 명령줄 추론

+ 1 - 1
docs/pt/inference.md

@@ -9,7 +9,7 @@ Suportamos linha de comando, API HTTP e WebUI para inferência, você pode escol
 Primeiro você precisa baixar os pesos do modelo:
 
 ```bash
-huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
+hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
 ```
 
 ## Inferência por Linha de Comando

+ 1 - 1
docs/zh/inference.md

@@ -9,7 +9,7 @@
 首先您需要下载模型权重:
 
 ```bash
-huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
+hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
 ```
 
 ## 命令行推理

+ 1 - 1
inference.ipynb

@@ -61,7 +61,7 @@
     "# !set HF_ENDPOINT=https://hf-mirror.com\n",
     "# !export HF_ENDPOINT=https://hf-mirror.com \n",
     "\n",
-    "!huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini/"
+    "!hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini/"
    ]
   },
   {

+ 65 - 1
pyproject.toml

@@ -8,7 +8,7 @@ description = "Fish Speech"
 readme = "README.md"
 requires-python = ">=3.10"
 keywords = ["TTS", "Speech"]
-license = {text = "Apache-2.0"}
+license = "Apache-2.0"
 classifiers = [
     "Programming Language :: Python :: 3",
 ]
@@ -52,6 +52,68 @@ stable = [
     "torch>=2.5.1",
     "torchaudio",
 ]
+cpu = [
+  "torch>=2.5.1",
+  "torchaudio",
+]
+cu126 = [
+  "torch>=2.5.1",
+  "torchaudio",
+]
+cu128 = [
+  "torch>=2.5.1",
+  "torchaudio",
+]
+cu129 = [
+  "torch>=2.5.1",
+  "torchaudio",
+]
+
+[tool.uv]
+conflicts = [
+  [
+    { extra = "cpu" },
+    { extra = "cu126" },
+    { extra = "cu128" },
+    { extra = "cu129" },
+  ],
+]
+
+[tool.uv.sources]
+torch = [
+  { index = "pytorch-cpu", extra = "cpu" },
+  { index = "pytorch-cu126", extra = "cu126" },
+  { index = "pytorch-cu128", extra = "cu128" },
+  { index = "pytorch-cu129", extra = "cu129" },
+]
+torchaudio = [
+  { index = "pytorch-cpu", extra = "cpu" },
+  { index = "pytorch-cu126", extra = "cu126" },
+  { index = "pytorch-cu128", extra = "cu128" },
+  { index = "pytorch-cu129", extra = "cu129" },
+]
+
+[[tool.uv.index]]
+name = "pytorch-cpu"
+url = "https://download.pytorch.org/whl/cpu"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cu126"
+url = "https://download.pytorch.org/whl/cu126"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cu128"
+url = "https://download.pytorch.org/whl/cu128"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cu129"
+url = "https://download.pytorch.org/whl/cu129"
+explicit = true
+
+
 
 [build-system]
 requires = ["setuptools", "setuptools-scm"]
@@ -59,3 +121,5 @@ build-backend = "setuptools.build_meta"
 
 [tool.setuptools]
 packages = ["fish_speech", "tools"]
+
+[tool.setuptools_scm]

File diff suppressed because it is too large
+ 555 - 397
uv.lock


Some files were not shown because too many files changed in this diff