services: # vLLM - Qwen3 0.6B vllm-0.6b: image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/vllm/vllm-openai:latest container_name: vllm-qwen3-0.6b ports: - "8100:8000" command: > --model Qwen/Qwen3-Embedding-0.6B --dtype float16 --port 8000 volumes: # 挂载预下载的模型目录 - ./models/Qwen3-Embedding-0.6B:/app/models/Qwen3-Embedding-0.6B # 可选:挂载缓存目录 - ./cache:/root/.cache/huggingface environment: # 设置 Hugging Face 国内镜像 - HF_ENDPOINT=https://hf-mirror.com # 可选:设置 Python 包索引镜像 - PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [ gpu ] # 添加健康检查 healthcheck: test: [ "CMD", "curl", "-f", "http://localhost:8000/health" ] interval: 30s timeout: 10s retries: 3 start_period: 40s # # vLLM - Qwen3 4B # vllm-4b: # image: vllm/vllm-openai:latest # container_name: vllm-qwen3-4b # ports: # - "8200:8000" # command: > # --model Qwen/Qwen3-Embedding-4B # --dtype float16 # --api-port 8000 # volumes: # - ./models:/root/.cache/huggingface # # # vLLM - Qwen3 8B # vllm-8b: # image: vllm/vllm-openai:latest # container_name: vllm-qwen3-8b # ports: # - "8300:8000" # command: > # --model Qwen/Qwen3-Embedding-8B # --dtype float16 # --api-port 8000 # volumes: # - ./models:/root/.cache/huggingface # Milvus 向量数据库 # milvus: # image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/milvusdb/milvus:v2.4.5 ## image: milvusdb/milvus:v2.4.4-standalone # container_name: milvus # ports: # - "19530:19530" # - "9091:9091" # environment: # - ETCD_USE_EMBED=true # - MINIO_USE_EMBED=true # - PULSAR_USE_EMBED=true # volumes: # - ./milvus_data:/var/lib/milvus # command: ["milvus", "run", "standalone"]