123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- services:
- # vLLM - Qwen3 0.6B
- vllm-0.6b:
- image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/vllm/vllm-openai:latest
- container_name: vllm-qwen3-0.6b
- ports:
- - "8100:8000"
- command: >
- sh -c "
- pip install --upgrade vllm &&
- pip install --upgrade transformers &&
- python -m vllm.entrypoints.openai.api_server --model /app/models/Qwen3-Embedding-0.6B --dtype float16 --port 8000
- "
- volumes:
- # 挂载预下载的模型目录
- - ./models/Qwen3-Embedding-0.6B:/app/models/Qwen3-Embedding-0.6B
- # 可选:挂载缓存目录
- - ./cache:/root/.cache/huggingface
- environment:
- # 设置 Hugging Face 国内镜像
- - HF_ENDPOINT=https://hf-mirror.com
- # 可选:设置 Python 包索引镜像
- - PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- count: all
- capabilities: [ gpu ]
- # 添加健康检查
- healthcheck:
- test: [ "CMD", "curl", "-f", "http://localhost:8000/health" ]
- interval: 30s
- timeout: 10s
- retries: 3
- start_period: 40s
- # # vLLM - Qwen3 4B
- # vllm-4b:
- # image: vllm/vllm-openai:latest
- # container_name: vllm-qwen3-4b
- # ports:
- # - "8200:8000"
- # command: >
- # --model Qwen/Qwen3-Embedding-4B
- # --dtype float16
- # --api-port 8000
- # volumes:
- # - ./models:/root/.cache/huggingface
- #
- # # vLLM - Qwen3 8B
- # vllm-8b:
- # image: vllm/vllm-openai:latest
- # container_name: vllm-qwen3-8b
- # ports:
- # - "8300:8000"
- # command: >
- # --model Qwen/Qwen3-Embedding-8B
- # --dtype float16
- # --api-port 8000
- # volumes:
- # - ./models:/root/.cache/huggingface
- # Milvus 向量数据库
- # milvus:
- # image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/milvusdb/milvus:v2.4.5
- ## image: milvusdb/milvus:v2.4.4-standalone
- # container_name: milvus
- # ports:
- # - "19530:19530"
- # - "9091:9091"
- # environment:
- # - ETCD_USE_EMBED=true
- # - MINIO_USE_EMBED=true
- # - PULSAR_USE_EMBED=true
- # volumes:
- # - ./milvus_data:/var/lib/milvus
- # command: ["milvus", "run", "standalone"]
|