services: # vllm服务 vllm-qwen: image: vllm-qwen container_name: vllm-qwen ports: - "8000:8000" deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] restart: always # 后端服务 vector-app: build: . container_name: vector-app ports: - "8001:8001" restart: always