|
@@ -1,76 +1,76 @@
|
|
|
-services:
|
|
|
- # vLLM - Qwen3 0.6B
|
|
|
- vllm-0.6b:
|
|
|
- image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/vllm/vllm-openai:v0.91
|
|
|
- container_name: vllm-qwen3-0.6b
|
|
|
- ports:
|
|
|
- - "8100:8000"
|
|
|
- command: >
|
|
|
- --model Qwen/Qwen3-Embedding-0.6B
|
|
|
- --dtype float16
|
|
|
- --port 8000
|
|
|
- volumes:
|
|
|
- # 挂载预下载的模型目录
|
|
|
- - ./models/Qwen3-Embedding-0.6B:/app/models/Qwen3-Embedding-0.6B
|
|
|
- # 可选:挂载缓存目录
|
|
|
- - ./cache:/root/.cache/huggingface
|
|
|
- environment:
|
|
|
- # 设置 Hugging Face 国内镜像
|
|
|
- - HF_ENDPOINT=https://hf-mirror.com
|
|
|
- # 可选:设置 Python 包索引镜像
|
|
|
- - PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
|
|
|
- deploy:
|
|
|
- resources:
|
|
|
- reservations:
|
|
|
- devices:
|
|
|
- - driver: nvidia
|
|
|
- count: all
|
|
|
- capabilities: [ gpu ]
|
|
|
- # 添加健康检查
|
|
|
- healthcheck:
|
|
|
- test: [ "CMD", "curl", "-f", "http://localhost:8000/health" ]
|
|
|
- interval: 30s
|
|
|
- timeout: 10s
|
|
|
- retries: 3
|
|
|
- start_period: 40s
|
|
|
-# # vLLM - Qwen3 4B
|
|
|
-# vllm-4b:
|
|
|
-# image: vllm/vllm-openai:latest
|
|
|
-# container_name: vllm-qwen3-4b
|
|
|
+#services:
|
|
|
+# # vLLM - Qwen3 0.6B
|
|
|
+# vllm-0.6b:
|
|
|
+# image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/vllm/vllm-openai:v0.91
|
|
|
+# container_name: vllm-qwen3-0.6b
|
|
|
# ports:
|
|
|
-# - "8200:8000"
|
|
|
+# - "8100:8000"
|
|
|
# command: >
|
|
|
-# --model Qwen/Qwen3-Embedding-4B
|
|
|
+# --model Qwen/Qwen3-Embedding-0.6B
|
|
|
# --dtype float16
|
|
|
-# --api-port 8000
|
|
|
+# --port 8000
|
|
|
# volumes:
|
|
|
-# - ./models:/root/.cache/huggingface
|
|
|
-#
|
|
|
-# # vLLM - Qwen3 8B
|
|
|
-# vllm-8b:
|
|
|
-# image: vllm/vllm-openai:latest
|
|
|
-# container_name: vllm-qwen3-8b
|
|
|
-# ports:
|
|
|
-# - "8300:8000"
|
|
|
-# command: >
|
|
|
-# --model Qwen/Qwen3-Embedding-8B
|
|
|
-# --dtype float16
|
|
|
-# --api-port 8000
|
|
|
-# volumes:
|
|
|
-# - ./models:/root/.cache/huggingface
|
|
|
-
|
|
|
- # Milvus 向量数据库
|
|
|
-# milvus:
|
|
|
-# image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/milvusdb/milvus:v2.4.5
|
|
|
-## image: milvusdb/milvus:v2.4.4-standalone
|
|
|
-# container_name: milvus
|
|
|
-# ports:
|
|
|
-# - "19530:19530"
|
|
|
-# - "9091:9091"
|
|
|
+# # 挂载预下载的模型目录
|
|
|
+# - ./models/Qwen3-Embedding-0.6B:/app/models/Qwen3-Embedding-0.6B
|
|
|
+# # 可选:挂载缓存目录
|
|
|
+# - ./cache:/root/.cache/huggingface
|
|
|
# environment:
|
|
|
-# - ETCD_USE_EMBED=true
|
|
|
-# - MINIO_USE_EMBED=true
|
|
|
-# - PULSAR_USE_EMBED=true
|
|
|
-# volumes:
|
|
|
-# - ./milvus_data:/var/lib/milvus
|
|
|
-# command: ["milvus", "run", "standalone"]
|
|
|
+# # 设置 Hugging Face 国内镜像
|
|
|
+# - HF_ENDPOINT=https://hf-mirror.com
|
|
|
+# # 可选:设置 Python 包索引镜像
|
|
|
+# - PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
|
|
|
+# deploy:
|
|
|
+# resources:
|
|
|
+# reservations:
|
|
|
+# devices:
|
|
|
+# - driver: nvidia
|
|
|
+# count: all
|
|
|
+# capabilities: [ gpu ]
|
|
|
+# # 添加健康检查
|
|
|
+# healthcheck:
|
|
|
+# test: [ "CMD", "curl", "-f", "http://localhost:8000/health" ]
|
|
|
+# interval: 30s
|
|
|
+# timeout: 10s
|
|
|
+# retries: 3
|
|
|
+# start_period: 40s
|
|
|
+## # vLLM - Qwen3 4B
|
|
|
+## vllm-4b:
|
|
|
+## image: vllm/vllm-openai:latest
|
|
|
+## container_name: vllm-qwen3-4b
|
|
|
+## ports:
|
|
|
+## - "8200:8000"
|
|
|
+## command: >
|
|
|
+## --model Qwen/Qwen3-Embedding-4B
|
|
|
+## --dtype float16
|
|
|
+## --api-port 8000
|
|
|
+## volumes:
|
|
|
+## - ./models:/root/.cache/huggingface
|
|
|
+##
|
|
|
+## # vLLM - Qwen3 8B
|
|
|
+## vllm-8b:
|
|
|
+## image: vllm/vllm-openai:latest
|
|
|
+## container_name: vllm-qwen3-8b
|
|
|
+## ports:
|
|
|
+## - "8300:8000"
|
|
|
+## command: >
|
|
|
+## --model Qwen/Qwen3-Embedding-8B
|
|
|
+## --dtype float16
|
|
|
+## --api-port 8000
|
|
|
+## volumes:
|
|
|
+## - ./models:/root/.cache/huggingface
|
|
|
+#
|
|
|
+# # Milvus 向量数据库
|
|
|
+## milvus:
|
|
|
+## image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/milvusdb/milvus:v2.4.5
|
|
|
+### image: milvusdb/milvus:v2.4.4-standalone
|
|
|
+## container_name: milvus
|
|
|
+## ports:
|
|
|
+## - "19530:19530"
|
|
|
+## - "9091:9091"
|
|
|
+## environment:
|
|
|
+## - ETCD_USE_EMBED=true
|
|
|
+## - MINIO_USE_EMBED=true
|
|
|
+## - PULSAR_USE_EMBED=true
|
|
|
+## volumes:
|
|
|
+## - ./milvus_data:/var/lib/milvus
|
|
|
+## command: ["milvus", "run", "standalone"]
|