|
@@ -1,4 +1,5 @@
|
|
|
import asyncio
|
|
|
+import json
|
|
|
import traceback
|
|
|
import uuid
|
|
|
from typing import Dict, Any
|
|
@@ -19,6 +20,7 @@ from applications.async_task import ChunkEmbeddingTask, DeleteTask
|
|
|
from applications.search import HybridSearch
|
|
|
from applications.utils.chat import ChatClassifier
|
|
|
from applications.utils.mysql import Dataset, Contents, ContentChunks
|
|
|
+from applications.utils.mysql.mapper import ChatRes
|
|
|
|
|
|
server_bp = Blueprint("api", __name__, url_prefix="/api")
|
|
|
server_bp = cors(server_bp, allow_origin="*")
|
|
@@ -359,13 +361,15 @@ async def query():
|
|
|
@server_bp.route("/chat", methods=["GET"])
|
|
|
async def chat():
|
|
|
query_text = request.args.get("query")
|
|
|
- dataset_ids = request.args.get("datasetIds").split(",")
|
|
|
+ dataset_id_strs = request.args.get("datasetIds")
|
|
|
+ dataset_ids = dataset_id_strs.split(",")
|
|
|
search_type = request.args.get("search_type", "hybrid")
|
|
|
query_results = await query_search(query_text=query_text, filters={"dataset_id": dataset_ids},
|
|
|
search_type=search_type)
|
|
|
resource = get_resource_manager()
|
|
|
content_chunk_mapper = ContentChunks(resource.mysql_client)
|
|
|
dataset_mapper = Dataset(resource.mysql_client)
|
|
|
+ chat_res_mapper = ChatRes(resource.mysql_client)
|
|
|
res = []
|
|
|
for result in query_results['results']:
|
|
|
content_chunks = await content_chunk_mapper.select_chunk_content(doc_id=result['doc_id'],
|
|
@@ -394,7 +398,59 @@ async def chat():
|
|
|
|
|
|
chat_classifier = ChatClassifier()
|
|
|
chat_res = await chat_classifier.chat_with_deepseek(query_text, res)
|
|
|
- data = {'results': res, 'chat_res': chat_res}
|
|
|
+ data = {'results': res, 'chat_res': chat_res['summary']}
|
|
|
+ await chat_res_mapper.insert_chat_res(query_text, dataset_id_strs, json.dumps(data, ensure_ascii=False),
|
|
|
+ chat_res['summary'], chat_res['relevance_score'])
|
|
|
return jsonify({'status_code': 200,
|
|
|
'detail': "success",
|
|
|
'data': data})
|
|
|
+
|
|
|
+
|
|
|
+@server_bp.route("/chunk/list", methods=["GET"])
|
|
|
+async def chunk_list():
|
|
|
+ resource = get_resource_manager()
|
|
|
+ content_chunk = ContentChunks(resource.mysql_client)
|
|
|
+
|
|
|
+ # 从 URL 查询参数获取分页和过滤参数
|
|
|
+ page_num = int(request.args.get("page", 1))
|
|
|
+ page_size = int(request.args.get("pageSize", 10))
|
|
|
+ doc_id = request.args.get("docId")
|
|
|
+ if not doc_id:
|
|
|
+ return jsonify({
|
|
|
+ "status_code": 500,
|
|
|
+ "detail": "docId not found",
|
|
|
+ "data": {}
|
|
|
+ })
|
|
|
+
|
|
|
+ # 调用 select_contents,获取分页字典
|
|
|
+ result = await content_chunk.select_chunk_contents(page_num=page_num, page_size=page_size, doc_id=doc_id)
|
|
|
+
|
|
|
+ if not result:
|
|
|
+ return jsonify({
|
|
|
+ "status_code": 500,
|
|
|
+ "detail": "chunk is empty",
|
|
|
+ "data": {}
|
|
|
+ })
|
|
|
+ # 格式化 entities,只保留必要字段
|
|
|
+ entities = [
|
|
|
+ {
|
|
|
+ "id": row['id'],
|
|
|
+ "chunk_id": row['chunk_id'],
|
|
|
+ "doc_id": row["doc_id"],
|
|
|
+ "summary": row.get("summary") or "",
|
|
|
+ "text": row.get("text") or "",
|
|
|
+ }
|
|
|
+ for row in result["entities"]
|
|
|
+ ]
|
|
|
+
|
|
|
+ return jsonify({
|
|
|
+ "status_code": 200,
|
|
|
+ "detail": "success",
|
|
|
+ "data": {
|
|
|
+ "entities": entities,
|
|
|
+ "total_count": result["total_count"],
|
|
|
+ "page": result["page"],
|
|
|
+ "page_size": result["page_size"],
|
|
|
+ "total_pages": result["total_pages"]
|
|
|
+ }
|
|
|
+ })
|