9 месяцев назад · b29cd8acd6
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,76 +1,76 @@
 
				-services:
			
 
				-  # vLLM - Qwen3 0.6B
			
 
				-  vllm-0.6b:
			
 
				-    image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/vllm/vllm-openai:v0.91
			
 
				-    container_name: vllm-qwen3-0.6b
			
 
				-    ports:
			
 
				-      - "8100:8000"
			
 
				-    command: >
			
 
				-      --model Qwen/Qwen3-Embedding-0.6B
			
 
				-      --dtype float16
			
 
				-      --port 8000
			
 
				-    volumes:
			
 
				-      # 挂载预下载的模型目录
			
 
				-      - ./models/Qwen3-Embedding-0.6B:/app/models/Qwen3-Embedding-0.6B
			
 
				-      # 可选：挂载缓存目录
			
 
				-      - ./cache:/root/.cache/huggingface
			
 
				-    environment:
			
 
				-      # 设置 Hugging Face 国内镜像
			
 
				-      - HF_ENDPOINT=https://hf-mirror.com
			
 
				-      # 可选：设置 Python 包索引镜像
			
 
				-      - PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
			
 
				-    deploy:
			
 
				-      resources:
			
 
				-        reservations:
			
 
				-          devices:
			
 
				-            - driver: nvidia
			
 
				-              count: all
			
 
				-              capabilities: [ gpu ]
			
 
				-    # 添加健康检查
			
 
				-    healthcheck:
			
 
				-      test: [ "CMD", "curl", "-f", "http://localhost:8000/health" ]
			
 
				-      interval: 30s
			
 
				-      timeout: 10s
			
 
				-      retries: 3
			
 
				-      start_period: 40s
			
 
				-#  # vLLM - Qwen3 4B
			
 
				-#  vllm-4b:
			
 
				-#    image: vllm/vllm-openai:latest
			
 
				-#    container_name: vllm-qwen3-4b
			
 
				+#services:
			
 
				+#  # vLLM - Qwen3 0.6B
			
 
				+#  vllm-0.6b:
			
 
				+#    image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/vllm/vllm-openai:v0.91
			
 
				+#    container_name: vllm-qwen3-0.6b
			
 
				 #    ports:
			
 
				-#      - "8200:8000"
			
 
				+#      - "8100:8000"
			
 
				 #    command: >
			
 
				-#      --model Qwen/Qwen3-Embedding-4B
			
 
				+#      --model Qwen/Qwen3-Embedding-0.6B
			
 
				 #      --dtype float16
			
 
				-#      --api-port 8000
			
 
				+#      --port 8000
			
 
				 #    volumes:
			
 
				-#      - ./models:/root/.cache/huggingface
			
 
				-#
			
 
				-#  # vLLM - Qwen3 8B
			
 
				-#  vllm-8b:
			
 
				-#    image: vllm/vllm-openai:latest
			
 
				-#    container_name: vllm-qwen3-8b
			
 
				-#    ports:
			
 
				-#      - "8300:8000"
			
 
				-#    command: >
			
 
				-#      --model Qwen/Qwen3-Embedding-8B
			
 
				-#      --dtype float16
			
 
				-#      --api-port 8000
			
 
				-#    volumes:
			
 
				-#      - ./models:/root/.cache/huggingface
			
 
				-
			
 
				-  # Milvus 向量数据库
			
 
				-#  milvus:
			
 
				-#    image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/milvusdb/milvus:v2.4.5
			
 
				-##    image: milvusdb/milvus:v2.4.4-standalone
			
 
				-#    container_name: milvus
			
 
				-#    ports:
			
 
				-#      - "19530:19530"
			
 
				-#      - "9091:9091"
			
 
				+#      # 挂载预下载的模型目录
			
 
				+#      - ./models/Qwen3-Embedding-0.6B:/app/models/Qwen3-Embedding-0.6B
			
 
				+#      # 可选：挂载缓存目录
			
 
				+#      - ./cache:/root/.cache/huggingface
			
 
				 #    environment:
			
 
				-#      - ETCD_USE_EMBED=true
			
 
				-#      - MINIO_USE_EMBED=true
			
 
				-#      - PULSAR_USE_EMBED=true
			
 
				-#    volumes:
			
 
				-#      - ./milvus_data:/var/lib/milvus
			
 
				-#    command: ["milvus", "run", "standalone"]
			
 
				+#      # 设置 Hugging Face 国内镜像
			
 
				+#      - HF_ENDPOINT=https://hf-mirror.com
			
 
				+#      # 可选：设置 Python 包索引镜像
			
 
				+#      - PIP_INDEX_URL=https://pypi.tuna.tsinghua.edu.cn/simple
			
 
				+#    deploy:
			
 
				+#      resources:
			
 
				+#        reservations:
			
 
				+#          devices:
			
 
				+#            - driver: nvidia
			
 
				+#              count: all
			
 
				+#              capabilities: [ gpu ]
			
 
				+#    # 添加健康检查
			
 
				+#    healthcheck:
			
 
				+#      test: [ "CMD", "curl", "-f", "http://localhost:8000/health" ]
			
 
				+#      interval: 30s
			
 
				+#      timeout: 10s
			
 
				+#      retries: 3
			
 
				+#      start_period: 40s
			
 
				+##  # vLLM - Qwen3 4B
			
 
				+##  vllm-4b:
			
 
				+##    image: vllm/vllm-openai:latest
			
 
				+##    container_name: vllm-qwen3-4b
			
 
				+##    ports:
			
 
				+##      - "8200:8000"
			
 
				+##    command: >
			
 
				+##      --model Qwen/Qwen3-Embedding-4B
			
 
				+##      --dtype float16
			
 
				+##      --api-port 8000
			
 
				+##    volumes:
			
 
				+##      - ./models:/root/.cache/huggingface
			
 
				+##
			
 
				+##  # vLLM - Qwen3 8B
			
 
				+##  vllm-8b:
			
 
				+##    image: vllm/vllm-openai:latest
			
 
				+##    container_name: vllm-qwen3-8b
			
 
				+##    ports:
			
 
				+##      - "8300:8000"
			
 
				+##    command: >
			
 
				+##      --model Qwen/Qwen3-Embedding-8B
			
 
				+##      --dtype float16
			
 
				+##      --api-port 8000
			
 
				+##    volumes:
			
 
				+##      - ./models:/root/.cache/huggingface
			
 
				+#
			
 
				+#  # Milvus 向量数据库
			
 
				+##  milvus:
			
 
				+##    image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/milvusdb/milvus:v2.4.5
			
 
				+###    image: milvusdb/milvus:v2.4.4-standalone
			
 
				+##    container_name: milvus
			
 
				+##    ports:
			
 
				+##      - "19530:19530"
			
 
				+##      - "9091:9091"
			
 
				+##    environment:
			
 
				+##      - ETCD_USE_EMBED=true
			
 
				+##      - MINIO_USE_EMBED=true
			
 
				+##      - PULSAR_USE_EMBED=true
			
 
				+##    volumes:
			
 
				+##      - ./milvus_data:/var/lib/milvus
			
 
				+##    command: ["milvus", "run", "standalone"]