vector_app.py 893 B

1234567891011121314151617181920212223242526272829303132333435
  1. from quart import Quart
  2. from quart_cors import cors
  3. # from pymilvus import connections
  4. from vllm import LLM, SamplingParams
  5. from applications.config import LOCAL_MODEL_CONFIG, DEFAULT_MODEL
  6. from routes import server_routes
  7. app = Quart(__name__)
  8. # llm
  9. llm = None
  10. # 连接向量数据库
  11. # connections.connect("default", host="milvus", port="19530")
  12. # connections.connect("default", host="milvus", port="19530")
  13. connections = None
  14. # 注册路由
  15. app_route = server_routes(llm, connections)
  16. app.register_blueprint(app_route)
  17. @app.before_serving
  18. async def load_model():
  19. """在服务启动前加载模型"""
  20. global llm
  21. MODEL_PATH = LOCAL_MODEL_CONFIG[DEFAULT_MODEL]
  22. if llm is None:
  23. llm = LLM(
  24. model=MODEL_PATH,
  25. dtype="float16", # 节省显存
  26. trust_remote_code=True
  27. )
  28. print(f"{MODEL_PATH} 模型加载完成!")