Quellcode durchsuchen

Merge branch 'feature/xueyiming/2025-09-28-add-ext' of Server/rag_server into master

xueyiming vor 1 Woche
Ursprung
Commit
996a7e3422

+ 2 - 1
applications/async_task/chunk_task.py

@@ -35,6 +35,7 @@ class ChunkEmbeddingTask(TopicAwarePackerV2):
         dataset_id = data.get("dataset_id", 0)  # 默认知识库 id 为 0
         re_chunk = data.get("re_chunk", False)
         dont_chunk = data.get("dont_chunk", False)
+        ext = data.get("ext", None)
         if re_chunk:
             await self.content_manager.update_content_info(
                 doc_id=doc_id,
@@ -46,7 +47,7 @@ class ChunkEmbeddingTask(TopicAwarePackerV2):
             flag = True
         else:
             flag = await self.content_manager.insert_content(
-                doc_id, text, text_type, title, dataset_id
+                doc_id, text, text_type, title, dataset_id, ext
             )
         if not flag:
             return []

+ 3 - 3
applications/utils/chat/rag_chat_agent.py

@@ -98,7 +98,7 @@ class RAGChatAgent:
     """
         return prompt
 
-    async def search_with_deepseek(self, query):
+    async def llm_search(self, query):
         prompt = self.create_query_prompt(query)
         response = await fetch_deepseek_completion(
             model="DeepSeek-V3", prompt=prompt, output_type="json"
@@ -135,9 +135,9 @@ class RAGChatAgent:
 
         return prompt
 
-    async def select_with_deepseek(self, chat_res, search_res):
+    async def make_decision(self, chat_res, search_res):
         prompt = self.select_prompt(chat_res, search_res)
         response = await fetch_deepseek_completion(
-            model="DeepSeek-V3", prompt=prompt, output_type="json"
+            model="DeepSeek-R1", prompt=prompt, output_type="json"
         )
         return response

+ 4 - 4
applications/utils/mysql/contents.py

@@ -2,14 +2,14 @@ from .base import BaseMySQLClient
 
 
 class Contents(BaseMySQLClient):
-    async def insert_content(self, doc_id, text, text_type, title, dataset_id):
+    async def insert_content(self, doc_id, text, text_type, title, dataset_id, ext):
         query = """
             INSERT IGNORE INTO contents
-                (doc_id, text, text_type, title, dataset_id)
-            VALUES (%s, %s, %s, %s, %s);
+                (doc_id, text, text_type, title, dataset_id, ext)
+            VALUES (%s, %s, %s, %s, %s, %s);
         """
         return await self.pool.async_save(
-            query=query, params=(doc_id, text, text_type, title, dataset_id)
+            query=query, params=(doc_id, text, text_type, title, dataset_id, ext)
         )
 
     async def update_content_info(self, doc_id, text, text_type, title, dataset_id):

+ 12 - 12
mcp_server/server.py

@@ -66,24 +66,24 @@ async def rag_search(query_text: str):
     resource = get_resource_manager()
     chat_result_mapper = ChatResult(resource.mysql_client)
     rag_chat_agent = RAGChatAgent()
-    chat_res = await rag_chat_agent.chat_with_deepseek(query_text, query_results)
-    deepseek_search = await rag_chat_agent.search_with_deepseek(query_text)
-    select = await rag_chat_agent.select_with_deepseek(chat_res, deepseek_search)
+    chat_result = await rag_chat_agent.chat_with_deepseek(query_text, query_results)
+    llm_search_result = await rag_chat_agent.llm_search(query_text)
+    decision = await rag_chat_agent.make_decision(chat_result, llm_search_result)
     data = {
-        "result": select["result"],
-        "status": select["status"],
-        "relevance_score": select["relevance_score"],
+        "result": decision["result"],
+        "status": decision["status"],
+        "relevance_score": decision["relevance_score"],
     }
     await chat_result_mapper.insert_chat_result(
         query_text,
         dataset_id_strs,
         json.dumps(query_results, ensure_ascii=False),
-        chat_res["summary"],
-        chat_res["relevance_score"],
-        chat_res["status"],
-        deepseek_search["answer"],
-        deepseek_search["source"],
-        deepseek_search["status"],
+        chat_result["summary"],
+        chat_result["relevance_score"],
+        chat_result["status"],
+        llm_search_result["answer"],
+        llm_search_result["source"],
+        llm_search_result["status"],
     )
 
     return data

+ 48 - 10
routes/buleprint.py

@@ -394,20 +394,20 @@ async def chat():
             result["datasetName"] = dataset_name
 
     rag_chat_agent = RAGChatAgent()
-    chat_res = await rag_chat_agent.chat_with_deepseek(query_text, query_results)
-    deepseek_search = await rag_chat_agent.search_with_deepseek(query_text)
-    select = await rag_chat_agent.select_with_deepseek(chat_res, deepseek_search)
-    data = {"results": query_results, "chat_res": select["result"]}
+    chat_result = await rag_chat_agent.chat_with_deepseek(query_text, query_results)
+    llm_search = await rag_chat_agent.llm_search(query_text)
+    decision = await rag_chat_agent.make_decision(chat_result, llm_search)
+    data = {"results": query_results, "chat_res": decision["result"]}
     await chat_result_mapper.insert_chat_result(
         query_text,
         dataset_id_strs,
         json.dumps(data, ensure_ascii=False),
-        chat_res["summary"],
-        chat_res["relevance_score"],
-        chat_res["status"],
-        deepseek_search["answer"],
-        deepseek_search["source"],
-        deepseek_search["status"],
+        chat_result["summary"],
+        chat_result["relevance_score"],
+        chat_result["status"],
+        llm_search["answer"],
+        llm_search["source"],
+        llm_search["status"],
     )
     return jsonify({"status_code": 200, "detail": "success", "data": data})
 
@@ -482,3 +482,41 @@ async def delete_task():
     )
     await build_graph_task.deal(doc_id)
     return jsonify({"status_code": 200, "detail": "success", "data": {}})
+
+@server_bp.route("/rag/search", methods=["POST"])
+async def rag_search():
+    body = await request.get_json()
+    query_text = body.get("queryText")
+    dataset_id_strs = "11,12"
+    dataset_ids = dataset_id_strs.split(",")
+    search_type = "hybrid"
+
+    query_results = await query_search(
+        query_text=query_text,
+        filters={"dataset_id": dataset_ids},
+        search_type=search_type,
+    )
+
+    resource = get_resource_manager()
+    chat_result_mapper = ChatResult(resource.mysql_client)
+    rag_chat_agent = RAGChatAgent()
+    chat_result = await rag_chat_agent.chat_with_deepseek(query_text, query_results)
+    llm_search = await rag_chat_agent.llm_search(query_text)
+    decision = await rag_chat_agent.make_decision(chat_result, llm_search)
+    data = {
+        "result": decision["result"],
+        "status": decision["status"],
+        "relevance_score": decision["relevance_score"],
+    }
+    await chat_result_mapper.insert_chat_result(
+        query_text,
+        dataset_id_strs,
+        json.dumps(query_results, ensure_ascii=False),
+        chat_result["summary"],
+        chat_result["relevance_score"],
+        chat_result["status"],
+        llm_search["answer"],
+        llm_search["source"],
+        llm_search["status"],
+    )
+    return jsonify({"status_code": 200, "detail": "success", "data": data})