services: vllm-qwen: image: vllm-qwen container_name: vllm-qwen ports: - "8000:8000" deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] restart: always