vector_app.py 809 B

12345678910111213141516171819202122232425262728293031323334
  1. from quart import Quart
  2. from quart_cors import cors
  3. from pymilvus import connections
  4. from vllm import LLM, SamplingParams
  5. from applications.config import LOCAL_MODEL_CONFIG, DEFAULT_MODEL
  6. from routes import server_routes
  7. app = Quart(__name__)
  8. # llm
  9. llm = None
  10. # 连接向量数据库
  11. connections.connect("default", host="milvus", port="19530")
  12. # 注册路由
  13. app_route = server_routes(llm, connections)
  14. app.register_blueprint(app_route)
  15. @app.before_serving
  16. async def load_model():
  17. """在服务启动前加载模型"""
  18. global llm
  19. MODEL_PATH = LOCAL_MODEL_CONFIG[DEFAULT_MODEL]
  20. if llm is None:
  21. llm = LLM(
  22. model=MODEL_PATH,
  23. dtype="float16", # 节省显存
  24. trust_remote_code=True
  25. )
  26. print(f"{MODEL_PATH} 模型加载完成!")