12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061 |
- version: "3.8"
- services:
- # vLLM - Qwen3 0.6B
- vllm-0.6b:
- image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/vllm/vllm-openai:v0.9.2
- container_name: vllm-qwen3-0.6b
- ports:
- - "8100:8000"
- command: >
- --model Qwen/Qwen3-Embedding-0.6B
- --dtype float16
- --api-port 8000
- volumes:
- - ./models:/root/.cache/huggingface
- # # vLLM - Qwen3 4B
- # vllm-4b:
- # image: vllm/vllm-openai:latest
- # container_name: vllm-qwen3-4b
- # ports:
- # - "8200:8000"
- # command: >
- # --model Qwen/Qwen3-Embedding-4B
- # --dtype float16
- # --api-port 8000
- # volumes:
- # - ./models:/root/.cache/huggingface
- #
- # # vLLM - Qwen3 8B
- # vllm-8b:
- # image: vllm/vllm-openai:latest
- # container_name: vllm-qwen3-8b
- # ports:
- # - "8300:8000"
- # command: >
- # --model Qwen/Qwen3-Embedding-8B
- # --dtype float16
- # --api-port 8000
- # volumes:
- # - ./models:/root/.cache/huggingface
- # Milvus 向量数据库
- milvus:
- image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/milvusdb/milvus:v2.4.4-standalone
- container_name: milvus
- ports:
- - "19530:19530"
- - "9091:9091"
- environment:
- - ETCD_USE_EMBED=true
- - MINIO_USE_EMBED=true
- - PULSAR_USE_EMBED=true
- volumes:
- - ./milvus_data:/var/lib/milvus
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- count: all
- capabilities: [ gpu ]
|