from quart import Quart from quart_cors import cors # from pymilvus import connections from vllm import LLM, SamplingParams from applications.config import LOCAL_MODEL_CONFIG, DEFAULT_MODEL from routes import server_routes app = Quart(__name__) MODEL_PATH = LOCAL_MODEL_CONFIG[DEFAULT_MODEL] llm = LLM( model=MODEL_PATH, dtype="float16", # 节省显存 trust_remote_code=True ) print(f"{MODEL_PATH} 模型加载完成!") # 连接向量数据库 # connections.connect("default", host="milvus", port="19530") # connections.connect("default", host="milvus", port="19530") connections = None # 注册路由 app_route = server_routes(llm, connections) app.register_blueprint(app_route)