version: "3.8" services: # vLLM - Qwen3 0.6B vllm-0.6b: image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/vllm/vllm-openai:latest container_name: vllm-qwen3-0.6b ports: - "8100:8000" environment: - HF_ENDPOINT=https://hf-mirror.com command: > --model Qwen/Qwen3-Embedding-0.6B --dtype float16 --port 8000 volumes: - ./models:/root/.cache/huggingface deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [ gpu ] # # vLLM - Qwen3 4B # vllm-4b: # image: vllm/vllm-openai:latest # container_name: vllm-qwen3-4b # ports: # - "8200:8000" # command: > # --model Qwen/Qwen3-Embedding-4B # --dtype float16 # --api-port 8000 # volumes: # - ./models:/root/.cache/huggingface # # # vLLM - Qwen3 8B # vllm-8b: # image: vllm/vllm-openai:latest # container_name: vllm-qwen3-8b # ports: # - "8300:8000" # command: > # --model Qwen/Qwen3-Embedding-8B # --dtype float16 # --api-port 8000 # volumes: # - ./models:/root/.cache/huggingface # Milvus 向量数据库 # milvus: # image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/milvusdb/milvus:v2.4.5 ## image: milvusdb/milvus:v2.4.4-standalone # container_name: milvus # ports: # - "19530:19530" # - "9091:9091" # environment: # - ETCD_USE_EMBED=true # - MINIO_USE_EMBED=true # - PULSAR_USE_EMBED=true # volumes: # - ./milvus_data:/var/lib/milvus # command: ["milvus", "run", "standalone"]