9 mesi fa · cccad3e098
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,7 +1,166 @@
 
				+# .dockerignore
			
 
				+
			
 
				+# Git and version control
			
 
				 .git
			
 
				-.github
			
 
				-results
			
 
				-data
			
 
				-*.filelist
			
 
				-/data_server/target
			
 
				-checkpoints
			
 
				+.gitignore
			
 
				+.gitattributes
			
 
				+.gitmodules
			
 
				+
			
 
				+# Documentation
			
 
				+*.md
			
 
				+docs/
			
 
				+!README*
			
 
				+LICENSE*
			
 
				+CHANGELOG*
			
 
				+
			
 
				+# IDE and editor files
			
 
				+.vscode/
			
 
				+.idea/
			
 
				+*.swp
			
 
				+*.swo
			
 
				+*~
			
 
				+.DS_Store
			
 
				+Thumbs.db
			
 
				+
			
 
				+# Python cache and build artifacts
			
 
				+__pycache__/
			
 
				+*.py[cod]
			
 
				+*$py.class
			
 
				+*.so
			
 
				+.Python
			
 
				+build/
			
 
				+develop-eggs/
			
 
				+dist/
			
 
				+downloads/
			
 
				+eggs/
			
 
				+.eggs/
			
 
				+lib/
			
 
				+lib64/
			
 
				+parts/
			
 
				+sdist/
			
 
				+var/
			
 
				+wheels/
			
 
				+*.egg-info/
			
 
				+.installed.cfg
			
 
				+*.egg
			
 
				+MANIFEST
			
 
				+
			
 
				+# Virtual environments
			
 
				+venv/
			
 
				+env/
			
 
				+ENV/
			
 
				+.venv/
			
 
				+.env/
			
 
				+
			
 
				+# Testing
			
 
				+.pytest_cache/
			
 
				+.coverage
			
 
				+htmlcov/
			
 
				+.tox/
			
 
				+.nox/
			
 
				+coverage.xml
			
 
				+*.cover
			
 
				+.hypothesis/
			
 
				+
			
 
				+# Jupyter Notebook
			
 
				+.ipynb_checkpoints
			
 
				+*.ipynb
			
 
				+
			
 
				+# Logs
			
 
				+*.log
			
 
				+logs/
			
 
				+
			
 
				+# Temporary files
			
 
				+tmp/
			
 
				+temp/
			
 
				+*.tmp
			
 
				+*.temp
			
 
				+
			
 
				+# OS generated files
			
 
				+.DS_Store
			
 
				+.DS_Store?
			
 
				+._*
			
 
				+.Spotlight-V100
			
 
				+.Trashes
			
 
				+ehthumbs.db
			
 
				+Thumbs.db
			
 
				+
			
 
				+# Docker files (except the one being used)
			
 
				+docker/
			
 
				+Dockerfile*
			
 
				+docker-compose*.yml
			
 
				+.dockerignore
			
 
				+
			
 
				+# Checkpoints and models (should be mounted)
			
 
				+checkpoints/
			
 
				+models/
			
 
				+*.pth
			
 
				+*.ckpt
			
 
				+*.safetensors
			
 
				+*.bin
			
 
				+
			
 
				+# Reference voices (should be mounted)
			
 
				+references/
			
 
				+
			
 
				+# Generated audio files
			
 
				+*.wav
			
 
				+*.mp3
			
 
				+*.flac
			
 
				+*.ogg
			
 
				+generated_audio.wav
			
 
				+fake.wav
			
 
				+fake.npy
			
 
				+
			
 
				+# Cache directories
			
 
				+.cache/
			
 
				+cache/
			
 
				+.uv_cache/
			
 
				+
			
 
				+# Development files
			
 
				+.env
			
 
				+.env.local
			
 
				+.env.development
			
 
				+.env.test
			
 
				+.env.production
			
 
				+
			
 
				+# Test files
			
 
				+test_*.py
			
 
				+*_test.py
			
 
				+tests/
			
 
				+
			
 
				+# CI/CD
			
 
				+.github/
			
 
				+.gitlab-ci.yml
			
 
				+.travis.yml
			
 
				+.circleci/
			
 
				+azure-pipelines.yml
			
 
				+
			
 
				+# Monitoring and profiling
			
 
				+.prof
			
 
				+*.prof
			
 
				+
			
 
				+# Backup files
			
 
				+*.bak
			
 
				+*.backup
			
 
				+*.old
			
 
				+
			
 
				+# Large data files
			
 
				+*.csv
			
 
				+*.json
			
 
				+*.jsonl
			
 
				+*.parquet
			
 
				+*.h5
			
 
				+*.hdf5
			
 
				+
			
 
				+# Audio processing temporary files
			
 
				+*.tmp.wav
			
 
				+*.temp.wav
			
 
				+
			
 
				+# OLD:
			
 
				+# .github
			
 
				+# results
			
 
				+# data
			
 
				+# *.filelist
			
 
				+# /data_server/target
			
 
				+# checkpoints
			
 
				+# .venv
			
--- a/compose.base.yml
+++ b/compose.base.yml
@@ -0,0 +1,23 @@
 
				+services:
			
 
				+  app-base:
			
 
				+    build:
			
 
				+      context: .
			
 
				+      dockerfile: docker/Dockerfile
			
 
				+      args:
			
 
				+        BACKEND: ${BACKEND:-cuda}     # or cpu
			
 
				+        UV_VERSION: ${UV_VERSION:-0.8.15}
			
 
				+    volumes:
			
 
				+      - ./checkpoints:/app/checkpoints
			
 
				+      - ./references:/app/references
			
 
				+    environment:
			
 
				+      COMPILE: ${COMPILE:-0}
			
 
				+    # GPU (remove this block if CPU-only):
			
 
				+    deploy:
			
 
				+      resources:
			
 
				+        reservations:
			
 
				+          devices:
			
 
				+            - driver: nvidia
			
 
				+              count: all
			
 
				+              capabilities: [gpu]
			
 
				+    tty: true
			
 
				+    stdin_open: true
			
--- a/compose.yml
+++ b/compose.yml
@@ -0,0 +1,26 @@
 
				+name: fish-speech
			
 
				+
			
 
				+services:
			
 
				+  webui:
			
 
				+    extends:
			
 
				+      file: compose.base.yml
			
 
				+      service: app-base
			
 
				+    build:
			
 
				+      target: webui
			
 
				+    environment:
			
 
				+      COMPILE: ${COMPILE:-0}
			
 
				+    profiles: ["webui"]
			
 
				+    ports:
			
 
				+      - "${GRADIO_PORT:-7860}:7860"
			
 
				+
			
 
				+  server:
			
 
				+    extends:
			
 
				+      file: compose.base.yml
			
 
				+      service: app-base
			
 
				+    build:
			
 
				+      target: server
			
 
				+    environment:
			
 
				+      COMPILE: ${COMPILE:-0}
			
 
				+    profiles: ["server"]
			
 
				+    ports:
			
 
				+      - "${API_PORT:-8080}:8080"
			
--- a/docker-compose.dev.yml
+++ b/docker-compose.dev.yml
@@ -1,18 +0,0 @@
 
				-version: '3.8'
			
 
				-
			
 
				-services:
			
 
				-  fish-speech:
			
 
				-    build:
			
 
				-      context: .
			
 
				-      dockerfile: dockerfile.dev
			
 
				-    container_name: fish-speech
			
 
				-    volumes:
			
 
				-      - ./:/exp
			
 
				-    deploy:
			
 
				-      resources:
			
 
				-        reservations:
			
 
				-          devices:
			
 
				-            - driver: nvidia
			
 
				-              count: all
			
 
				-              capabilities: [gpu]
			
 
				-    command: tail -f /dev/null
			
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -0,0 +1,398 @@
 
				+# docker/Dockerfile
			
 
				+
			
 
				+# IMPORTANT: The docker images do not contain the checkpoints. You need to mount the checkpoints to the container.
			
 
				+
			
 
				+# Build the image:
			
 
				+#   docker build \
			
 
				+#       --platform linux/amd64 \
			
 
				+#       -f docker/Dockerfile \
			
 
				+#       --build-arg BACKEND=[cuda, cpu] \
			
 
				+#       --target [webui, server] \
			
 
				+#       -t fish-speech-[webui, server]:[cuda, cpu] .
			
 
				+
			
 
				+# e.g. for building the webui:
			
 
				+#   docker build \
			
 
				+#       --platform linux/amd64 \
			
 
				+#       -f docker/Dockerfile \
			
 
				+#       --build-arg BACKEND=cuda \
			
 
				+#       --target webui \
			
 
				+#       -t fish-speech-webui:cuda .
			
 
				+
			
 
				+# e.g. for building the server:
			
 
				+#   docker build \
			
 
				+#       --platform linux/amd64 \
			
 
				+#       -f docker/Dockerfile \
			
 
				+#       --build-arg BACKEND=cuda \
			
 
				+#       --target server \
			
 
				+#       -t fish-speech-server:cuda .
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Multi-platform build:
			
 
				+#   docker buildx build \
			
 
				+#       --platform linux/amd64,linux/arm64 \
			
 
				+#       -f docker/Dockerfile \
			
 
				+#       --build-arg BACKEND=cpu \
			
 
				+#       --target webui \
			
 
				+#       -t fish-speech-webui:cpu .
			
 
				+
			
 
				+
			
 
				+# Running the image interactively:
			
 
				+#   docker run \
			
 
				+#       --gpus all \
			
 
				+#       -v /path/to/fish-speech/checkpoints:/app/checkpoints \
			
 
				+#       -e COMPILE=1 \ ... or -e COMPILE=0 \
			
 
				+#       -it fish-speech-[webui, server]:[cuda, cpu]
			
 
				+
			
 
				+# E.g. running the webui:
			
 
				+#   docker run \
			
 
				+#       --gpus all \
			
 
				+#       -v ./checkpoints:/app/checkpoints \
			
 
				+#       -e COMPILE=1 \
			
 
				+#       -p 7860:7860 \
			
 
				+#       fish-speech-webui:cuda
			
 
				+
			
 
				+# E.g. running the server:
			
 
				+#   docker run \
			
 
				+#       --gpus all \
			
 
				+#       -v ./checkpoints:/app/checkpoints \
			
 
				+#       -p 8080:8080 \
			
 
				+#       -it fish-speech-server:cuda
			
 
				+
			
 
				+
			
 
				+# Select the specific cuda version (see https://hub.docker.com/r/nvidia/cuda/)
			
 
				+ARG CUDA_VER=12.6.0
			
 
				+# Adapt the uv extra to fit the cuda version (one of [cu126, cu128, cu129])
			
 
				+ARG UV_EXTRA=cu126
			
 
				+ARG BACKEND=cuda
			
 
				+
			
 
				+ARG UBUNTU_VER=24.04
			
 
				+ARG PY_VER=3.12
			
 
				+ARG UV_VERSION=0.8.15
			
 
				+
			
 
				+# Create non-root user early for security
			
 
				+ARG USERNAME=fish
			
 
				+ARG USER_UID=1000
			
 
				+ARG USER_GID=1000
			
 
				+
			
 
				+##############################################################
			
 
				+# Base stage per backend
			
 
				+##############################################################
			
 
				+
			
 
				+# --- CUDA (x86_64) ---
			
 
				+FROM nvidia/cuda:${CUDA_VER}-cudnn-runtime-ubuntu${UBUNTU_VER} AS base-cuda
			
 
				+ENV DEBIAN_FRONTEND=noninteractive
			
 
				+
			
 
				+# Install system dependencies in a single layer with cleanup
			
 
				+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
			
 
				+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
			
 
				+    set -eux \
			
 
				+    && rm -f /etc/apt/apt.conf.d/docker-clean \
			
 
				+    && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' >/etc/apt/apt.conf.d/keep-cache \
			
 
				+    && apt-get update \
			
 
				+    && apt-get install -y --no-install-recommends \
			
 
				+        python3-pip \
			
 
				+        python3-dev \
			
 
				+        git \
			
 
				+        ca-certificates \
			
 
				+        curl \
			
 
				+    && apt-get clean \
			
 
				+    && rm -rf /var/lib/apt/lists/*
			
 
				+
			
 
				+# --- CPU-only (portable x86_64) ---
			
 
				+FROM python:${PY_VER}-slim AS base-cpu
			
 
				+ENV UV_EXTRA=cpu
			
 
				+
			
 
				+# Install system dependencies in a single layer with cleanup
			
 
				+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
			
 
				+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
			
 
				+    set -eux \
			
 
				+    && rm -f /etc/apt/apt.conf.d/docker-clean \
			
 
				+    && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' >/etc/apt/apt.conf.d/keep-cache \
			
 
				+    && apt-get update \
			
 
				+    && apt-get install -y --no-install-recommends \
			
 
				+        git \
			
 
				+        ca-certificates \
			
 
				+        curl \
			
 
				+    && apt-get clean \
			
 
				+    && rm -rf /var/lib/apt/lists/*
			
 
				+
			
 
				+
			
 
				+##############################################################
			
 
				+# UV stage
			
 
				+##############################################################
			
 
				+
			
 
				+ARG UV_VERSION
			
 
				+FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv-bin
			
 
				+
			
 
				+##############################################################
			
 
				+# Shared app base stage
			
 
				+##############################################################
			
 
				+
			
 
				+FROM base-${BACKEND} AS app-base
			
 
				+
			
 
				+ARG PY_VER
			
 
				+ARG BACKEND
			
 
				+ARG USERNAME
			
 
				+ARG USER_UID
			
 
				+ARG USER_GID
			
 
				+ARG UV_VERSION
			
 
				+ARG UV_EXTRA
			
 
				+
			
 
				+ENV BACKEND=${BACKEND} \
			
 
				+    DEBIAN_FRONTEND=noninteractive \
			
 
				+    PYTHONDONTWRITEBYTECODE=1 \
			
 
				+    PYTHONUNBUFFERED=1
			
 
				+
			
 
				+# System dependencies for audio processing
			
 
				+ARG DEPENDENCIES=" \
			
 
				+    libsox-dev \
			
 
				+    build-essential \
			
 
				+    cmake \
			
 
				+    libasound-dev \
			
 
				+    portaudio19-dev \
			
 
				+    libportaudio2 \
			
 
				+    libportaudiocpp0 \
			
 
				+    ffmpeg"
			
 
				+
			
 
				+# Install system dependencies with caching and cleanup
			
 
				+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
			
 
				+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
			
 
				+    set -eux \
			
 
				+    && rm -f /etc/apt/apt.conf.d/docker-clean \
			
 
				+    && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' >/etc/apt/apt.conf.d/keep-cache \
			
 
				+    && apt-get update \
			
 
				+    && apt-get install -y --no-install-recommends ${DEPENDENCIES} \
			
 
				+    && apt-get clean \
			
 
				+    && rm -rf /var/lib/apt/lists/*
			
 
				+
			
 
				+# Install specific uv version
			
 
				+COPY --from=uv-bin /uv /uvx /bin/
			
 
				+
			
 
				+# RUN groupadd --gid ${USER_GID} ${USERNAME} \
			
 
				+#     && useradd --uid ${USER_UID} --gid ${USER_GID} -m ${USERNAME} \
			
 
				+#     && mkdir -p /app /home/${USERNAME}/.cache \
			
 
				+#     && chown -R ${USERNAME}:${USERNAME} /app /home/${USERNAME}/.cache
			
 
				+
			
 
				+# Create non-root user (or use existing user)
			
 
				+RUN set -eux; \
			
 
				+    if getent group ${USER_GID} >/dev/null 2>&1; then \
			
 
				+        echo "Group ${USER_GID} already exists"; \
			
 
				+    else \
			
 
				+        groupadd -g ${USER_GID} ${USERNAME}; \
			
 
				+    fi; \
			
 
				+    if id -u ${USER_UID} >/dev/null 2>&1; then \
			
 
				+        echo "User ${USER_UID} already exists, using existing user"; \
			
 
				+        EXISTING_USER=$(id -un ${USER_UID}); \
			
 
				+        mkdir -p /app /home/${EXISTING_USER}/.cache; \
			
 
				+        chown -R ${USER_UID}:${USER_GID} /app /home/${EXISTING_USER}/.cache; \
			
 
				+    else \
			
 
				+        useradd -m -u ${USER_UID} -g ${USER_GID} ${USERNAME}; \
			
 
				+        mkdir -p /app /home/${USERNAME}/.cache; \
			
 
				+        chown -R ${USERNAME}:${USERNAME} /app /home/${USERNAME}/.cache; \
			
 
				+    fi
			
 
				+
			
 
				+# Create references directory with proper permissions for the non-root user
			
 
				+RUN mkdir -p /app/references \
			
 
				+    && chown -R ${USER_UID}:${USER_GID} /app/references \
			
 
				+    && chmod 755 /app/references
			
 
				+
			
 
				+# Set working directory
			
 
				+WORKDIR /app
			
 
				+
			
 
				+# Copy dependency files first for better caching
			
 
				+COPY --chown=${USER_UID}:${USER_GID} pyproject.toml uv.lock README.md ./
			
 
				+
			
 
				+# Switch to non-root user for package installation
			
 
				+USER ${USER_UID}:${USER_GID}
			
 
				+
			
 
				+# Install Python dependencies (cacheable by lockfiles)
			
 
				+# Use a generic cache path that works regardless of username
			
 
				+RUN --mount=type=cache,target=/tmp/uv-cache,uid=${USER_UID},gid=${USER_GID} \
			
 
				+    uv python pin ${PY_VER} \
			
 
				+    && uv sync --extra ${UV_EXTRA} --frozen --no-install-project
			
 
				+
			
 
				+# Copy application code
			
 
				+COPY --chown=${USER_UID}:${USER_GID} . .
			
 
				+
			
 
				+# Install the local package after copying source code
			
 
				+RUN uv sync --extra ${UV_EXTRA} --frozen
			
 
				+
			
 
				+# Create common entrypoint script
			
 
				+RUN printf '%s\n' \
			
 
				+    '#!/bin/bash' \
			
 
				+    'set -euo pipefail' \
			
 
				+    '' \
			
 
				+    '# Set user info from build args' \
			
 
				+    'USER_UID='${USER_UID} \
			
 
				+    'USER_GID='${USER_GID} \
			
 
				+    '' \
			
 
				+    '# Logging function' \
			
 
				+    'log() { echo "[$(date +"%Y-%m-%d %H:%M:%S")] $*" >&2; }' \
			
 
				+    '' \
			
 
				+    '# Validate environment' \
			
 
				+    'validate_env() {' \
			
 
				+    '    if [ ! -d "/app/checkpoints" ]; then' \
			
 
				+    '        log "WARNING: /app/checkpoints directory not found. Please mount your checkpoints."' \
			
 
				+    '    fi' \
			
 
				+    '    if [ ! -d "/app/references" ]; then' \
			
 
				+    '        log "WARNING: /app/references directory not found. Please mount your references."' \
			
 
				+    '    else' \
			
 
				+    '        # Check if we can write to references directory' \
			
 
				+    '        if [ ! -w "/app/references" ]; then' \
			
 
				+    '            log "ERROR: Cannot write to /app/references directory. Please ensure the mounted directory has proper permissions for user with UID ${USER_UID}."' \
			
 
				+    '            log "You can fix this by running: sudo chown -R ${USER_UID}:${USER_GID} /path/to/your/references"' \
			
 
				+    '            exit 1' \
			
 
				+    '        fi' \
			
 
				+    '    fi' \
			
 
				+    '}' \
			
 
				+    '' \
			
 
				+    '# Build device arguments' \
			
 
				+    'build_device_args() {' \
			
 
				+    '    if [ "${BACKEND:-}" = "cpu" ]; then' \
			
 
				+    '        echo "--device cpu"' \
			
 
				+    '    fi' \
			
 
				+    '}' \
			
 
				+    '' \
			
 
				+    '# Build compile arguments' \
			
 
				+    'build_compile_args() {' \
			
 
				+    '    if [ "${1:-}" = "compile" ] || [ "${COMPILE:-}" = "1" ] || [ "${COMPILE:-}" = "true" ]; then' \
			
 
				+    '        echo "--compile"' \
			
 
				+    '        shift' \
			
 
				+    '    fi' \
			
 
				+    '    echo "$@"' \
			
 
				+    '}' \
			
 
				+    '' \
			
 
				+    '# Health check function' \
			
 
				+    'health_check() {' \
			
 
				+    '    local port=${1:-7860}' \
			
 
				+    '    local endpoint=${2:-/health}' \
			
 
				+    '    curl -f http://localhost:${port}${endpoint} 2>/dev/null || exit 1' \
			
 
				+    '}' \
			
 
				+    > /app/common.sh && chmod +x /app/common.sh
			
 
				+
			
 
				+##############################################################
			
 
				+# App stages
			
 
				+##############################################################
			
 
				+
			
 
				+# Gradio WebUI
			
 
				+FROM app-base AS webui
			
 
				+ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1
			
 
				+
			
 
				+ARG GRADIO_SERVER_NAME="0.0.0.0"
			
 
				+ARG GRADIO_SERVER_PORT=7860
			
 
				+ARG LLAMA_CHECKPOINT_PATH="checkpoints/openaudio-s1-mini"
			
 
				+ARG DECODER_CHECKPOINT_PATH="checkpoints/openaudio-s1-mini/codec.pth"
			
 
				+ARG DECODER_CONFIG_NAME="modded_dac_vq"
			
 
				+
			
 
				+
			
 
				+# Expose port
			
 
				+EXPOSE ${GRADIO_SERVER_PORT}
			
 
				+
			
 
				+# Set environment variables
			
 
				+ENV GRADIO_SERVER_NAME=${GRADIO_SERVER_NAME}
			
 
				+ENV GRADIO_SERVER_PORT=${GRADIO_SERVER_PORT}
			
 
				+ENV LLAMA_CHECKPOINT_PATH=${LLAMA_CHECKPOINT_PATH}
			
 
				+ENV DECODER_CHECKPOINT_PATH=${DECODER_CHECKPOINT_PATH}
			
 
				+ENV DECODER_CONFIG_NAME=${DECODER_CONFIG_NAME}
			
 
				+
			
 
				+# Create webui entrypoint
			
 
				+RUN printf '%s\n' \
			
 
				+    '#!/bin/bash' \
			
 
				+    'source /app/common.sh' \
			
 
				+    '' \
			
 
				+    'log "Starting Fish Speech WebUI..."' \
			
 
				+    'validate_env' \
			
 
				+    '' \
			
 
				+    'DEVICE_ARGS=$(build_device_args)' \
			
 
				+    'COMPILE_ARGS=$(build_compile_args "$@")' \
			
 
				+    '' \
			
 
				+    'log "Device args: ${DEVICE_ARGS:-none}"' \
			
 
				+    'log "Compile args: ${COMPILE_ARGS}"' \
			
 
				+    'log "Server: ${GRADIO_SERVER_NAME}:${GRADIO_SERVER_PORT}"' \
			
 
				+    '' \
			
 
				+    'exec uv run tools/run_webui.py \' \
			
 
				+    '  --llama-checkpoint-path "${LLAMA_CHECKPOINT_PATH}" \' \
			
 
				+    '  --decoder-checkpoint-path "${DECODER_CHECKPOINT_PATH}" \' \
			
 
				+    '  --decoder-config-name "${DECODER_CONFIG_NAME}" \' \
			
 
				+    '  ${DEVICE_ARGS} ${COMPILE_ARGS}' \
			
 
				+    > /app/start_webui.sh && chmod +x /app/start_webui.sh
			
 
				+
			
 
				+# Health check
			
 
				+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
			
 
				+    CMD curl -f http://localhost:${GRADIO_SERVER_PORT}/health || exit 1
			
 
				+
			
 
				+ENTRYPOINT ["/app/start_webui.sh"]
			
 
				+
			
 
				+# API Server
			
 
				+FROM app-base AS server
			
 
				+ENV PYTHONDONTWRITEBYTECODE=1 PYTHONUNBUFFERED=1
			
 
				+
			
 
				+ARG API_SERVER_NAME="0.0.0.0"
			
 
				+ARG API_SERVER_PORT=8080
			
 
				+ARG LLAMA_CHECKPOINT_PATH="checkpoints/openaudio-s1-mini"
			
 
				+ARG DECODER_CHECKPOINT_PATH="checkpoints/openaudio-s1-mini/codec.pth"
			
 
				+ARG DECODER_CONFIG_NAME="modded_dac_vq"
			
 
				+
			
 
				+# Expose port
			
 
				+EXPOSE ${API_SERVER_PORT}
			
 
				+
			
 
				+# Set environment variables
			
 
				+ENV API_SERVER_NAME=${API_SERVER_NAME}
			
 
				+ENV API_SERVER_PORT=${API_SERVER_PORT}
			
 
				+ENV LLAMA_CHECKPOINT_PATH=${LLAMA_CHECKPOINT_PATH}
			
 
				+ENV DECODER_CHECKPOINT_PATH=${DECODER_CHECKPOINT_PATH}
			
 
				+ENV DECODER_CONFIG_NAME=${DECODER_CONFIG_NAME}
			
 
				+
			
 
				+# Create server entrypoint
			
 
				+RUN printf '%s\n' \
			
 
				+    '#!/bin/bash' \
			
 
				+    'source /app/common.sh' \
			
 
				+    '' \
			
 
				+    'log "Starting Fish Speech API Server..."' \
			
 
				+    'validate_env' \
			
 
				+    '' \
			
 
				+    'DEVICE_ARGS=$(build_device_args)' \
			
 
				+    'COMPILE_ARGS=$(build_compile_args "$@")' \
			
 
				+    '' \
			
 
				+    'log "Device args: ${DEVICE_ARGS:-none}"' \
			
 
				+    'log "Compile args: ${COMPILE_ARGS}"' \
			
 
				+    'log "Server: ${API_SERVER_NAME}:${API_SERVER_PORT}"' \
			
 
				+    '' \
			
 
				+    'exec uv run tools/api_server.py \' \
			
 
				+    '  --listen "${API_SERVER_NAME}:${API_SERVER_PORT}" \' \
			
 
				+    '  --llama-checkpoint-path "${LLAMA_CHECKPOINT_PATH}" \' \
			
 
				+    '  --decoder-checkpoint-path "${DECODER_CHECKPOINT_PATH}" \' \
			
 
				+    '  --decoder-config-name "${DECODER_CONFIG_NAME}" \' \
			
 
				+    '  ${DEVICE_ARGS} ${COMPILE_ARGS}' \
			
 
				+    > /app/start_server.sh && chmod +x /app/start_server.sh
			
 
				+
			
 
				+# Health check
			
 
				+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
			
 
				+    CMD curl -f http://localhost:${API_SERVER_PORT}/v1/health || exit 1
			
 
				+
			
 
				+ENTRYPOINT ["/app/start_server.sh"]
			
 
				+
			
 
				+# Development stage
			
 
				+FROM app-base AS dev
			
 
				+USER root
			
 
				+
			
 
				+# Install development tools
			
 
				+RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
			
 
				+    --mount=type=cache,target=/var/lib/apt,sharing=locked \
			
 
				+    apt-get update \
			
 
				+    && apt-get install -y --no-install-recommends \
			
 
				+        vim \
			
 
				+        htop \
			
 
				+        strace \
			
 
				+        gdb \
			
 
				+    && apt-get clean \
			
 
				+    && rm -rf /var/lib/apt/lists/*
			
 
				+
			
 
				+USER ${USER_UID}:${USER_GID}
			
 
				+
			
 
				+# Install development dependencies
			
 
				+RUN uv sync --extra ${UV_EXTRA} --dev
			
 
				+
			
 
				+# Default to bash for development
			
 
				+ENTRYPOINT ["/bin/bash"]
			
--- a/dockerfile
+++ b/dockerfile
@@ -1,50 +0,0 @@
 
				-FROM python:3.12-slim-bookworm AS stage-1
			
 
				-ARG TARGETARCH
			
 
				-
			
 
				-ARG HUGGINGFACE_MODEL=fish-speech-1.5
			
 
				-ARG HF_ENDPOINT=https://huggingface.co
			
 
				-
			
 
				-WORKDIR /opt/fish-speech
			
 
				-
			
 
				-RUN set -ex \
			
 
				-    && pip install huggingface_hub \
			
 
				-    && HF_ENDPOINT=${HF_ENDPOINT} huggingface-cli download --resume-download fishaudio/${HUGGINGFACE_MODEL} --local-dir checkpoints/${HUGGINGFACE_MODEL}
			
 
				-
			
 
				-FROM python:3.12-slim-bookworm
			
 
				-ARG TARGETARCH
			
 
				-
			
 
				-ARG DEPENDENCIES="  \
			
 
				-    ca-certificates \
			
 
				-    libsox-dev \
			
 
				-    build-essential \
			
 
				-    cmake \
			
 
				-    libasound-dev \
			
 
				-    portaudio19-dev \
			
 
				-    libportaudio2 \
			
 
				-    libportaudiocpp0 \
			
 
				-    ffmpeg"
			
 
				-
			
 
				-RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
			
 
				-    --mount=type=cache,target=/var/lib/apt,sharing=locked \
			
 
				-    set -ex \
			
 
				-    && rm -f /etc/apt/apt.conf.d/docker-clean \
			
 
				-    && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' >/etc/apt/apt.conf.d/keep-cache \
			
 
				-    && apt-get update \
			
 
				-    && apt-get -y install --no-install-recommends ${DEPENDENCIES} \
			
 
				-    && echo "no" | dpkg-reconfigure dash
			
 
				-
			
 
				-WORKDIR /opt/fish-speech
			
 
				-
			
 
				-COPY . .
			
 
				-
			
 
				-RUN --mount=type=cache,target=/root/.cache,sharing=locked \
			
 
				-    set -ex \
			
 
				-    && pip install -e .[stable]
			
 
				-
			
 
				-COPY --from=stage-1 /opt/fish-speech/checkpoints /opt/fish-speech/checkpoints
			
 
				-
			
 
				-ENV GRADIO_SERVER_NAME="0.0.0.0"
			
 
				-
			
 
				-EXPOSE 7860
			
 
				-
			
 
				-CMD ["./entrypoint.sh"]
			
--- a/docs/ar/inference.md
+++ b/docs/ar/inference.md
@@ -9,7 +9,7 @@
 
				 أولاً تحتاج إلى تحميل أوزان النموذج:
			
 
				 
			
 
				 ```bash
			
 
				-huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
			
 
				+hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
			
 
				 ```
			
 
				 
			
 
				 ## استنتاج سطر الأوامر
			
--- a/docs/en/inference.md
+++ b/docs/en/inference.md
@@ -9,7 +9,13 @@ We support command line, HTTP API and WebUI for inference, you can choose any me
 
				 First you need to download the model weights:
			
 
				 
			
 
				 ```bash
			
 
				-huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
			
 
				+
			
 
				+# Requires "huggingface_hub[cli]" to be installed
			
 
				+# pip install huggingface_hub[cli]
			
 
				+# or 
			
 
				+# uv tool install huggingface_hub[cli]
			
 
				+
			
 
				+hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
			
 
				 ```
			
 
				 
			
 
				 ## Command Line Inference
			
@@ -68,6 +74,13 @@ python -m tools.api_server \
 
				     --llama-checkpoint-path "checkpoints/openaudio-s1-mini" \
			
 
				     --decoder-checkpoint-path "checkpoints/openaudio-s1-mini/codec.pth" \
			
 
				     --decoder-config-name modded_dac_vq
			
 
				+
			
 
				+# or with uv
			
 
				+uv run tools/api_server.py \
			
 
				+    --listen 0.0.0.0:8080 \
			
 
				+    --llama-checkpoint-path "checkpoints/openaudio-s1-mini" \
			
 
				+    --decoder-checkpoint-path "checkpoints/openaudio-s1-mini/codec.pth" \
			
 
				+    --decoder-config-name modded_dac_vq
			
 
				 ```
			
 
				 
			
 
				 > If you want to speed up inference, you can add the `--compile` parameter.
			
@@ -104,3 +117,50 @@ python -m tools.run_webui
 
				     You can use Gradio environment variables, such as `GRADIO_SHARE`, `GRADIO_SERVER_PORT`, `GRADIO_SERVER_NAME` to configure WebUI.
			
 
				 
			
 
				 Enjoy!
			
 
				+
			
 
				+
			
 
				+## Using Docker
			
 
				+You can use docker to start the web ui or the server:
			
 
				+
			
 
				+### Using Docker Compose
			
 
				+```bash
			
 
				+# To start the server
			
 
				+docker compose --profile server up
			
 
				+# Or with compile
			
 
				+COMPILE=1 docker compose --profile server up
			
 
				+
			
 
				+# To start the web ui
			
 
				+docker compose --profile webui up
			
 
				+# Or with compile
			
 
				+COMPILE=1 docker compose --profile webui up
			
 
				+```
			
 
				+
			
 
				+```bash
			
 
				+# Select the target, either `webui` or `server`
			
 
				+docker build \
			
 
				+    --platform linux/amd64 \
			
 
				+    -f docker/Dockerfile \
			
 
				+    --build-arg BACKEND=cuda \
			
 
				+    --target [webui, server] \
			
 
				+    -t fish-speech-[webui, server]:cuda .
			
 
				+
			
 
				+# Starting the web ui
			
 
				+docker run -d \
			
 
				+    --name fish-speech-webui \
			
 
				+    --gpus all \
			
 
				+    -p 7860:7860 \
			
 
				+    -v ./checkpoints:/app/checkpoints \
			
 
				+    -v ./references:/app/references \
			
 
				+    -e COMPILE=1 \
			
 
				+    --rm fish-speech-webui:cuda
			
 
				+
			
 
				+# Starting the server
			
 
				+docker run -d \
			
 
				+    --name fish-speech-server \
			
 
				+    --gpus all \
			
 
				+    -p 8080:8080 \
			
 
				+    -v ./checkpoints:/app/checkpoints \
			
 
				+    -v ./references:/app/references \
			
 
				+    -e COMPILE=1 \
			
 
				+    --rm fish-speech-server:cuda
			
 
				+```
			
--- a/docs/en/install.md
+++ b/docs/en/install.md
@@ -3,7 +3,7 @@
 
				 - GPU Memory: 12GB (Inference)
			
 
				 - System: Linux, WSL
			
 
				 
			
 
				-## Setup
			
 
				+## System Setup
			
 
				 
			
 
				 First you need install pyaudio and sox, which is used for audio processing.
			
 
				 
			
@@ -17,14 +17,21 @@ apt install portaudio19-dev libsox-dev ffmpeg
 
				 conda create -n fish-speech python=3.12
			
 
				 conda activate fish-speech
			
 
				 
			
 
				+# Select the correct cuda version for your system from [cu126, cu128, cu129]
			
 
				+pip install -e .[cu129]
			
 
				+# Or for cpu only
			
 
				+pip install -e .[cpu]
			
 
				+# You can also omit the extra if you want to use the default torch index
			
 
				 pip install -e .
			
 
				 ```
			
 
				 
			
 
				 ### UV
			
 
				 
			
 
				 ```bash
			
 
				-
			
 
				-uv sync --python 3.12
			
 
				+# Select the correct cuda version for your system from [cu126, cu128, cu129]
			
 
				+uv sync --python 3.12 --extra cu129
			
 
				+# Or for cpu only
			
 
				+uv sync --python 3.12 --extra cpu
			
 
				 ```
			
 
				 ### Intel Arc XPU support
			
 
				 
			
@@ -41,3 +48,8 @@ pip install -e .
 
				 
			
 
				 !!! warning
			
 
				     The `compile` option is not supported on windows and macOS, if you want to run with compile, you need to install trition by yourself.
			
 
				+
			
 
				+
			
 
				+## Docker Setup
			
 
				+
			
 
				+See [inference](./inference.md) to use docker for the webui or the API server.
			
--- a/docs/ja/inference.md
+++ b/docs/ja/inference.md
@@ -9,7 +9,7 @@
 
				 まず、モデルの重みをダウンロードする必要があります：
			
 
				 
			
 
				 ```bash
			
 
				-huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
			
 
				+hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
			
 
				 ```
			
 
				 
			
 
				 ## コマンドライン推論
			
--- a/docs/ko/inference.md
+++ b/docs/ko/inference.md
@@ -9,7 +9,7 @@
 
				 먼저 모델 가중치를 다운로드해야 합니다:
			
 
				 
			
 
				 ```bash
			
 
				-huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
			
 
				+hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
			
 
				 ```
			
 
				 
			
 
				 ## 명령줄 추론
			
--- a/docs/pt/inference.md
+++ b/docs/pt/inference.md
@@ -9,7 +9,7 @@ Suportamos linha de comando, API HTTP e WebUI para inferência, você pode escol
 
				 Primeiro você precisa baixar os pesos do modelo:
			
 
				 
			
 
				 ```bash
			
 
				-huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
			
 
				+hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
			
 
				 ```
			
 
				 
			
 
				 ## Inferência por Linha de Comando
			
--- a/docs/zh/inference.md
+++ b/docs/zh/inference.md
@@ -9,7 +9,7 @@
 
				 首先您需要下载模型权重：
			
 
				 
			
 
				 ```bash
			
 
				-huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
			
 
				+hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini
			
 
				 ```
			
 
				 
			
 
				 ## 命令行推理
			
--- a/inference.ipynb
+++ b/inference.ipynb
@@ -61,7 +61,7 @@
 
				     "# !set HF_ENDPOINT=https://hf-mirror.com\n",
			
 
				     "# !export HF_ENDPOINT=https://hf-mirror.com \n",
			
 
				     "\n",
			
 
				-    "!huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini/"
			
 
				+    "!hf download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini/"
			
 
				    ]
			
 
				   },
			
 
				   {
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,7 @@ description = "Fish Speech"
 
				 readme = "README.md"
			
 
				 requires-python = ">=3.10"
			
 
				 keywords = ["TTS", "Speech"]
			
 
				-license = {text = "Apache-2.0"}
			
 
				+license = "Apache-2.0"
			
 
				 classifiers = [
			
 
				     "Programming Language :: Python :: 3",
			
 
				 ]
			
@@ -52,6 +52,68 @@ stable = [
 
				     "torch>=2.5.1",
			
 
				     "torchaudio",
			
 
				 ]
			
 
				+cpu = [
			
 
				+  "torch>=2.5.1",
			
 
				+  "torchaudio",
			
 
				+]
			
 
				+cu126 = [
			
 
				+  "torch>=2.5.1",
			
 
				+  "torchaudio",
			
 
				+]
			
 
				+cu128 = [
			
 
				+  "torch>=2.5.1",
			
 
				+  "torchaudio",
			
 
				+]
			
 
				+cu129 = [
			
 
				+  "torch>=2.5.1",
			
 
				+  "torchaudio",
			
 
				+]
			
 
				+
			
 
				+[tool.uv]
			
 
				+conflicts = [
			
 
				+  [
			
 
				+    { extra = "cpu" },
			
 
				+    { extra = "cu126" },
			
 
				+    { extra = "cu128" },
			
 
				+    { extra = "cu129" },
			
 
				+  ],
			
 
				+]
			
 
				+
			
 
				+[tool.uv.sources]
			
 
				+torch = [
			
 
				+  { index = "pytorch-cpu", extra = "cpu" },
			
 
				+  { index = "pytorch-cu126", extra = "cu126" },
			
 
				+  { index = "pytorch-cu128", extra = "cu128" },
			
 
				+  { index = "pytorch-cu129", extra = "cu129" },
			
 
				+]
			
 
				+torchaudio = [
			
 
				+  { index = "pytorch-cpu", extra = "cpu" },
			
 
				+  { index = "pytorch-cu126", extra = "cu126" },
			
 
				+  { index = "pytorch-cu128", extra = "cu128" },
			
 
				+  { index = "pytorch-cu129", extra = "cu129" },
			
 
				+]
			
 
				+
			
 
				+[[tool.uv.index]]
			
 
				+name = "pytorch-cpu"
			
 
				+url = "https://download.pytorch.org/whl/cpu"
			
 
				+explicit = true
			
 
				+
			
 
				+[[tool.uv.index]]
			
 
				+name = "pytorch-cu126"
			
 
				+url = "https://download.pytorch.org/whl/cu126"
			
 
				+explicit = true
			
 
				+
			
 
				+[[tool.uv.index]]
			
 
				+name = "pytorch-cu128"
			
 
				+url = "https://download.pytorch.org/whl/cu128"
			
 
				+explicit = true
			
 
				+
			
 
				+[[tool.uv.index]]
			
 
				+name = "pytorch-cu129"
			
 
				+url = "https://download.pytorch.org/whl/cu129"
			
 
				+explicit = true
			
 
				+
			
 
				+
			
 
				 
			
 
				 [build-system]
			
 
				 requires = ["setuptools", "setuptools-scm"]
			
@@ -59,3 +121,5 @@ build-backend = "setuptools.build_meta"
 
				 
			
 
				 [tool.setuptools]
			
 
				 packages = ["fish_speech", "tools"]
			
 
				+
			
 
				+[tool.setuptools_scm]
			
--- a/uv.lock
+++ b/uv.lock