Explorar o código

增加对外问答接口

xueyiming hai 2 semanas
pai
achega
3e74e894ab

+ 3 - 1
applications/async_task/auto_rechunk_task.py

@@ -20,7 +20,9 @@ class AutoRechunkTask:
     async def rechunk(task):
         url = "http://192.168.100.31:8001/api/chunk"
         async with AsyncHttpClient() as http_client:
-            response = await http_client.post(url, json=task, headers={"Content-Type": "application/json"})
+            response = await http_client.post(
+                url, json=task, headers={"Content-Type": "application/json"}
+            )
         return response
 
     async def deal(self):

+ 17 - 16
applications/utils/chat/chat_classifier.py

@@ -8,10 +8,11 @@ class ChatClassifier:
     @staticmethod
     def generate_summary_prompt(query, search_results):
         """
-        生成总结的prompt。
+        生成总结的prompt。交给AI根据搜索结果判断内容是否对回答问题有帮助,
+        并结合内容生成总结和判断是否能回答问题。
 
         :param query: 问题
-        :param search_results: 搜索结果列表,每个元素包含 'content', 'contentSummary', 'score'
+        :param search_results: 搜索结果列表,每个元素包含 'content', 'contentSummary'
         :return: 生成的总结prompt
         """
 
@@ -20,38 +21,38 @@ class ChatClassifier:
         weighted_summaries = []
         weighted_contents = []
 
+        # 将所有搜索结果的摘要和内容按相似度排序
         for result in search_results:
             content = result["content"]
             content_summary = result["contentSummary"]
-            score = result["score"]
 
-            weighted_summaries.append((content_summary, score))
-            weighted_contents.append((content, score))
+            weighted_summaries.append(content_summary)
+            weighted_contents.append(content)
 
-        weighted_summaries.sort(key=lambda x: x[1], reverse=True)
-        weighted_contents.sort(key=lambda x: x[1], reverse=True)
+        # 拼接加权摘要和内容
+        prompt += "\n-- 内容摘要 --\n"
+        for summary in weighted_summaries:
+            prompt += f"摘要: {summary}\n"
 
-        prompt += "\n-- 加权内容摘要 --\n"
-        for summary, score in weighted_summaries:
-            prompt += f"摘要: {summary} | 相似度: {score:.2f}\n"
-
-        prompt += "\n-- 加权内容 --\n"
-        for content, score in weighted_contents:
-            prompt += f"内容: {content} | 相似度: {score:.2f}\n"
+        prompt += "\n-- 内容 --\n"
+        for content in weighted_contents:
+            prompt += f"内容: {content}\n"
 
         # 约束 AI 输出 JSON
         prompt += """
-    请基于上述内容生成一个总结,并返回 JSON 格式,结构如下:
+    请根据上述内容判断能否回答问题,并生成一个总结,返回 JSON 格式,结构如下:
 
     {
       "query": "<原始问题>",
       "summary": "<简洁总结>",
-      "relevance_score": <0到1之间的小数,表示总结与问题的相关度>
+      "relevance_score": <0到1之间的小数,表示总结与问题的相关度>,
+      "status": <判断能否回答这个问题,0代表不能回答,1代表可以回答>
     }
 
     注意:
     - 只输出 JSON,不要额外解释。
     - relevance_score 数字越大,表示总结和问题越相关。
+    - 请根据问题和给定的搜索结果内容,判断是否能回答该问题。返回一个 0 或 1 的 status,表示能否回答问题。
     """
 
         return prompt

+ 5 - 4
applications/utils/mysql/mapper.py

@@ -345,13 +345,14 @@ class ContentChunks(BaseMySQLClient):
 
 class ChatResult(BaseMySQLClient):
     async def insert_chat_result(
-        self, query_text, dataset_ids, search_res, chat_res, score
+        self, query_text, dataset_ids, search_res, chat_res, score, has_answer
     ):
         query = """
                     INSERT INTO chat_res
-                        (query, dataset_ids, search_res, chat_res, score) 
-                        VALUES (%s, %s, %s, %s, %s);
+                        (query, dataset_ids, search_res, chat_res, score, has_answer) 
+                        VALUES (%s, %s, %s, %s, %s, %s);
                 """
         return await self.pool.async_save(
-            query=query, params=(query_text, dataset_ids, search_res, chat_res, score)
+            query=query,
+            params=(query_text, dataset_ids, search_res, chat_res, score, has_answer),
         )

+ 61 - 0
routes/buleprint.py

@@ -422,6 +422,7 @@ async def chat():
         json.dumps(data, ensure_ascii=False),
         chat_res["summary"],
         chat_res["relevance_score"],
+        chat_res["status"],
     )
     return jsonify({"status_code": 200, "detail": "success", "data": data})
 
@@ -471,6 +472,66 @@ async def chunk_list():
         }
     )
 
+
+@server_bp.route("/chat/detail", methods=["POST"])
+async def chat_detail():
+    body = await request.get_json()
+    query_text = body.get("query")
+    dataset_id_strs = "11,12"
+    dataset_ids = dataset_id_strs.split(",")
+    search_type = "hybrid"
+    query_results = await query_search(
+        query_text=query_text,
+        filters={"dataset_id": dataset_ids},
+        search_type=search_type,
+    )
+    resource = get_resource_manager()
+    content_chunk_mapper = ContentChunks(resource.mysql_client)
+    contents_mapper = Contents(resource.mysql_client)
+    chat_result_mapper = ChatResult(resource.mysql_client)
+    res = []
+    for result in query_results["results"]:
+        content_chunks = await content_chunk_mapper.select_chunk_content(
+            doc_id=result["doc_id"], chunk_id=result["chunk_id"]
+        )
+        contents = await contents_mapper.select_content_by_doc_id(result["doc_id"])
+        if not content_chunks:
+            return jsonify(
+                {"status_code": 500, "detail": "content_chunk not found", "data": {}}
+            )
+        if not contents:
+            return jsonify(
+                {"status_code": 500, "detail": "contents not found", "data": {}}
+            )
+        content_chunk = content_chunks[0]
+        content = contents[0]
+        res.append(
+            {
+                "contentChunk": content_chunk["text"],
+                "contentSummary": content_chunk["summary"],
+                "content": content["text"],
+                "score": result["score"],
+            }
+        )
+
+    chat_classifier = ChatClassifier()
+    chat_res = await chat_classifier.chat_with_deepseek(query_text, res)
+    data = {
+        "result": chat_res["summary"],
+        "status": chat_res["status"],
+        "metaData": res,
+    }
+    await chat_result_mapper.insert_chat_result(
+        query_text,
+        dataset_id_strs,
+        json.dumps(data, ensure_ascii=False),
+        chat_res["summary"],
+        chat_res["relevance_score"],
+        chat_res["status"],
+    )
+    return jsonify({"status_code": 200, "detail": "success", "data": data})
+
+
 @server_bp.route("/auto_rechunk", methods=["GET"])
 async def auto_rechunk():
     resource = get_resource_manager()