luojunhui 1 day ago
parent
commit
2cb5af465e
2 changed files with 6 additions and 37 deletions
  1. 6 17
      Dockerfile
  2. 0 20
      docker-compose.yaml

+ 6 - 17
Dockerfile

@@ -1,20 +1,9 @@
 FROM egs-registry.cn-hangzhou.cr.aliyuncs.com/egs/vllm:0.8.2-pytorch2.6-cu124-20250328
 
-WORKDIR /app
+COPY models/Qwen3-Embedding-4B /models/Qwen3-Embedding-4B
 
-ENV PYTHONDONTWRITEBYTECODE=1
-ENV PYTHONUNBUFFERED=1
-ENV PIP_DISABLE_PIP_VERSION_CHECK=on
-ENV TZ=Asia/Shanghai
-
-RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone \
-
-COPY requirements.txt .
-RUN pip install --no-cache-dir --upgrade pillow
-RUN pip install --no-cache-dir -r requirements.txt
-
-COPY . .
-
-EXPOSE 8000
-
-CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model/Qwen3-Embedding-4B", "--dtype", "float16", "--host", "0.0.0.0", "--port", "8000"]
+CMD ["python", "-m", "vllm.entrypoints.openai.api_server", \
+     "--model", "/models/Qwen3-Embedding-4B", \
+     "--dtype", "float16", \
+     "--port", "8000", \
+     "--host", "0.0.0.0"]

+ 0 - 20
docker-compose.yaml

@@ -1,20 +0,0 @@
-services:
-  app:
-    build:
-      context: .
-      dockerfile: Dockerfile
-    container_name: vector-server-app
-    ports:
-      - "8000:8000"
-    volumes:
-      - .:/app
-      - ./models/Qwen3-Embedding-4B:/app/model/Qwen3-Embedding-4B  # 挂载模型
-    environment:
-      - PYTHONUNBUFFERED=1
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]