Server
/
llm_vector_server


			
				
					
						
						
							12345678910111213
							FROM egs-registry.cn-hangzhou.cr.aliyuncs.com/egs/vllm:0.8.2-pytorch2.6-cu124-20250328

COPY models/Qwen3-Embedding-4B /models/Qwen3-Embedding-4B

COPY requirements.txt requirements.txt

RUN pip install -r requirements.txt

CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", \
     "--model", "/models/Qwen3-Embedding-4B", \
     "--dtype", "float16", \
     "--port", "8000", \
     "--host", "0.0.0.0"]