version: '3.8' services: triton-server: build: context: . dockerfile: Dockerfile image: my-tritonserver:25.05-vllm-python-py3 ports: - "8000:8000" environment: - TZ=Asia/Shanghai - PYTHONDONTWRITEBYTECODE=1 - PYTHONUNBUFFERED=1 - PIP_DISABLE_PIP_VERSION_CHECK=on # 如果需要GPU支持,取消下面的注释 deploy: resources: reservations: devices: - driver: nvidia count: 1 capabilities: [gpu] # 如果需要挂载模型数据卷,取消下面的注释 # volumes: # - ./models:/app/models # 如果需要设置资源限制,取消下面的注释 # mem_limit: 8g # cpus: 4.0