#services: # # vLLM - Qwen3 0.6B # vllm-0.6b: # image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/vllm/vllm-openai:v0.91 # container_name: vllm-qwen3-0.6b # ports: # - "8100:8000" # command: > # --model Qwen/Qwen3-Embedding-0.6B # --dtype float16 # --port 8000 # volumes: # # 挂载预下载的模型目录 # - ./models/Qwen3-Embedding-0.6B:/app/models/Qwen3-Embedding-0.6B # # 可选:挂载缓存目录 # - ./cache:/root/.cache/huggingface # environment: # # 设置 Hugging Face 国内镜像 # - HF_ENDPOINT=https://hf-mirror.com # # 可选:设置 Python 包索引镜像 # - PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple # deploy: # resources: # reservations: # devices: # - driver: nvidia # count: all # capabilities: [ gpu ] # # 添加健康检查 # healthcheck: # test: [ "CMD", "curl", "-f", "http://localhost:8000/health" ] # interval: 30s # timeout: 10s # retries: 3 # start_period: 40s ## # vLLM - Qwen3 4B ## vllm-4b: ## image: vllm/vllm-openai:latest ## container_name: vllm-qwen3-4b ## ports: ## - "8200:8000" ## command: > ## --model Qwen/Qwen3-Embedding-4B ## --dtype float16 ## --api-port 8000 ## volumes: ## - ./models:/root/.cache/huggingface ## ## # vLLM - Qwen3 8B ## vllm-8b: ## image: vllm/vllm-openai:latest ## container_name: vllm-qwen3-8b ## ports: ## - "8300:8000" ## command: > ## --model Qwen/Qwen3-Embedding-8B ## --dtype float16 ## --api-port 8000 ## volumes: ## - ./models:/root/.cache/huggingface # # # Milvus 向量数据库 ## milvus: ## image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/milvusdb/milvus:v2.4.5 ### image: milvusdb/milvus:v2.4.4-standalone ## container_name: milvus ## ports: ## - "19530:19530" ## - "9091:9091" ## environment: ## - ETCD_USE_EMBED=true ## - MINIO_USE_EMBED=true ## - PULSAR_USE_EMBED=true ## volumes: ## - ./milvus_data:/var/lib/milvus ## command: ["milvus", "run", "standalone"]