|
@@ -163,11 +163,7 @@ async def dataset_list():
|
|
|
for dataset, count in zip(datasets, counts)
|
|
|
]
|
|
|
|
|
|
- return jsonify({
|
|
|
- "status_code": 200,
|
|
|
- "detail": "success",
|
|
|
- "data": data_list
|
|
|
- })
|
|
|
+ return jsonify({"status_code": 200, "detail": "success", "data": data_list})
|
|
|
|
|
|
|
|
|
@server_bp.route("/dataset/add", methods=["POST"])
|
|
@@ -178,16 +174,10 @@ async def add_dataset():
|
|
|
body = await request.get_json()
|
|
|
name = body.get("name")
|
|
|
if not name:
|
|
|
- return jsonify({
|
|
|
- "status_code": 400,
|
|
|
- "detail": "name is required"
|
|
|
- })
|
|
|
+ return jsonify({"status_code": 400, "detail": "name is required"})
|
|
|
# 执行新增
|
|
|
await dataset.add_dataset(name)
|
|
|
- return jsonify({
|
|
|
- "status_code": 200,
|
|
|
- "detail": "success"
|
|
|
- })
|
|
|
+ return jsonify({"status_code": 200, "detail": "success"})
|
|
|
|
|
|
|
|
|
@server_bp.route("/content/get", methods=["GET"])
|
|
@@ -198,33 +188,27 @@ async def get_content():
|
|
|
# 获取请求参数
|
|
|
doc_id = request.args.get("docId")
|
|
|
if not doc_id:
|
|
|
- return jsonify({
|
|
|
- "status_code": 400,
|
|
|
- "detail": "doc_id is required",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify({"status_code": 400, "detail": "doc_id is required", "data": {}})
|
|
|
|
|
|
# 查询内容
|
|
|
rows = await contents.select_content_by_doc_id(doc_id)
|
|
|
|
|
|
if not rows:
|
|
|
- return jsonify({
|
|
|
- "status_code": 404,
|
|
|
- "detail": "content not found",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify({"status_code": 404, "detail": "content not found", "data": {}})
|
|
|
|
|
|
row = rows[0]
|
|
|
|
|
|
- return jsonify({
|
|
|
- "status_code": 200,
|
|
|
- "detail": "success",
|
|
|
- "data": {
|
|
|
- "title": row.get("title", ""),
|
|
|
- "text": row.get("text", ""),
|
|
|
- "doc_id": row.get("doc_id", "")
|
|
|
+ return jsonify(
|
|
|
+ {
|
|
|
+ "status_code": 200,
|
|
|
+ "detail": "success",
|
|
|
+ "data": {
|
|
|
+ "title": row.get("title", ""),
|
|
|
+ "text": row.get("text", ""),
|
|
|
+ "doc_id": row.get("doc_id", ""),
|
|
|
+ },
|
|
|
}
|
|
|
- })
|
|
|
+ )
|
|
|
|
|
|
|
|
|
@server_bp.route("/content/list", methods=["GET"])
|
|
@@ -240,6 +224,7 @@ async def content_list():
|
|
|
|
|
|
# order_by 可以用 JSON 字符串传递
|
|
|
import json
|
|
|
+
|
|
|
order_by_str = request.args.get("order_by", '{"id":"desc"}')
|
|
|
try:
|
|
|
order_by = json.loads(order_by_str)
|
|
@@ -265,22 +250,33 @@ async def content_list():
|
|
|
for row in result["entities"]
|
|
|
]
|
|
|
|
|
|
- return jsonify({
|
|
|
- "status_code": 200,
|
|
|
- "detail": "success",
|
|
|
- "data": {
|
|
|
- "entities": entities,
|
|
|
- "total_count": result["total_count"],
|
|
|
- "page": result["page"],
|
|
|
- "page_size": result["page_size"],
|
|
|
- "total_pages": result["total_pages"]
|
|
|
+ return jsonify(
|
|
|
+ {
|
|
|
+ "status_code": 200,
|
|
|
+ "detail": "success",
|
|
|
+ "data": {
|
|
|
+ "entities": entities,
|
|
|
+ "total_count": result["total_count"],
|
|
|
+ "page": result["page"],
|
|
|
+ "page_size": result["page_size"],
|
|
|
+ "total_pages": result["total_pages"],
|
|
|
+ },
|
|
|
}
|
|
|
- })
|
|
|
+ )
|
|
|
|
|
|
|
|
|
-async def query_search(query_text, filters=None, search_type='', anns_field='vector_text',
|
|
|
- search_params=BASE_MILVUS_SEARCH_PARAMS, _source=False, es_size=10000, sort_by=None,
|
|
|
- milvus_size=20, limit=10):
|
|
|
+async def query_search(
|
|
|
+ query_text,
|
|
|
+ filters=None,
|
|
|
+ search_type="",
|
|
|
+ anns_field="vector_text",
|
|
|
+ search_params=BASE_MILVUS_SEARCH_PARAMS,
|
|
|
+ _source=False,
|
|
|
+ es_size=10000,
|
|
|
+ sort_by=None,
|
|
|
+ milvus_size=20,
|
|
|
+ limit=10,
|
|
|
+):
|
|
|
if filters is None:
|
|
|
filters = {}
|
|
|
query_vector = await get_basic_embedding(text=query_text, model=DEFAULT_MODEL)
|
|
@@ -322,40 +318,46 @@ async def query():
|
|
|
query_text = request.args.get("query")
|
|
|
dataset_ids = request.args.get("datasetIds").split(",")
|
|
|
search_type = request.args.get("search_type", "hybrid")
|
|
|
- query_results = await query_search(query_text=query_text, filters={"dataset_id": dataset_ids},
|
|
|
- search_type=search_type)
|
|
|
+ query_results = await query_search(
|
|
|
+ query_text=query_text,
|
|
|
+ filters={"dataset_id": dataset_ids},
|
|
|
+ search_type=search_type,
|
|
|
+ )
|
|
|
resource = get_resource_manager()
|
|
|
content_chunk_mapper = ContentChunks(resource.mysql_client)
|
|
|
dataset_mapper = Dataset(resource.mysql_client)
|
|
|
res = []
|
|
|
- for result in query_results['results']:
|
|
|
- content_chunks = await content_chunk_mapper.select_chunk_content(doc_id=result['doc_id'],
|
|
|
- chunk_id=result['chunk_id'])
|
|
|
+ for result in query_results["results"]:
|
|
|
+ content_chunks = await content_chunk_mapper.select_chunk_content(
|
|
|
+ doc_id=result["doc_id"], chunk_id=result["chunk_id"]
|
|
|
+ )
|
|
|
if not content_chunks:
|
|
|
- return jsonify({
|
|
|
- "status_code": 500,
|
|
|
- "detail": "content_chunk not found",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify(
|
|
|
+ {"status_code": 500, "detail": "content_chunk not found", "data": {}}
|
|
|
+ )
|
|
|
content_chunk = content_chunks[0]
|
|
|
- datasets = await dataset_mapper.select_dataset_by_id(content_chunk['dataset_id'])
|
|
|
+ datasets = await dataset_mapper.select_dataset_by_id(
|
|
|
+ content_chunk["dataset_id"]
|
|
|
+ )
|
|
|
if not datasets:
|
|
|
- return jsonify({
|
|
|
- "status_code": 500,
|
|
|
- "detail": "dataset not found",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify(
|
|
|
+ {"status_code": 500, "detail": "dataset not found", "data": {}}
|
|
|
+ )
|
|
|
dataset = datasets[0]
|
|
|
dataset_name = None
|
|
|
if dataset:
|
|
|
- dataset_name = dataset['name']
|
|
|
+ dataset_name = dataset["name"]
|
|
|
res.append(
|
|
|
- {'docId': content_chunk['doc_id'], 'content': content_chunk['text'],
|
|
|
- 'contentSummary': content_chunk['summary'], 'score': result['score'], 'datasetName': dataset_name})
|
|
|
- data = {'results': res}
|
|
|
- return jsonify({'status_code': 200,
|
|
|
- 'detail': "success",
|
|
|
- 'data': data})
|
|
|
+ {
|
|
|
+ "docId": content_chunk["doc_id"],
|
|
|
+ "content": content_chunk["text"],
|
|
|
+ "contentSummary": content_chunk["summary"],
|
|
|
+ "score": result["score"],
|
|
|
+ "datasetName": dataset_name,
|
|
|
+ }
|
|
|
+ )
|
|
|
+ data = {"results": res}
|
|
|
+ return jsonify({"status_code": 200, "detail": "success", "data": data})
|
|
|
|
|
|
|
|
|
@server_bp.route("/chat", methods=["GET"])
|
|
@@ -364,46 +366,57 @@ async def chat():
|
|
|
dataset_id_strs = request.args.get("datasetIds")
|
|
|
dataset_ids = dataset_id_strs.split(",")
|
|
|
search_type = request.args.get("search_type", "hybrid")
|
|
|
- query_results = await query_search(query_text=query_text, filters={"dataset_id": dataset_ids},
|
|
|
- search_type=search_type)
|
|
|
+ query_results = await query_search(
|
|
|
+ query_text=query_text,
|
|
|
+ filters={"dataset_id": dataset_ids},
|
|
|
+ search_type=search_type,
|
|
|
+ )
|
|
|
resource = get_resource_manager()
|
|
|
content_chunk_mapper = ContentChunks(resource.mysql_client)
|
|
|
dataset_mapper = Dataset(resource.mysql_client)
|
|
|
chat_res_mapper = ChatRes(resource.mysql_client)
|
|
|
res = []
|
|
|
- for result in query_results['results']:
|
|
|
- content_chunks = await content_chunk_mapper.select_chunk_content(doc_id=result['doc_id'],
|
|
|
- chunk_id=result['chunk_id'])
|
|
|
+ for result in query_results["results"]:
|
|
|
+ content_chunks = await content_chunk_mapper.select_chunk_content(
|
|
|
+ doc_id=result["doc_id"], chunk_id=result["chunk_id"]
|
|
|
+ )
|
|
|
if not content_chunks:
|
|
|
- return jsonify({
|
|
|
- "status_code": 500,
|
|
|
- "detail": "content_chunk not found",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify(
|
|
|
+ {"status_code": 500, "detail": "content_chunk not found", "data": {}}
|
|
|
+ )
|
|
|
content_chunk = content_chunks[0]
|
|
|
- datasets = await dataset_mapper.select_dataset_by_id(content_chunk['dataset_id'])
|
|
|
+ datasets = await dataset_mapper.select_dataset_by_id(
|
|
|
+ content_chunk["dataset_id"]
|
|
|
+ )
|
|
|
if not datasets:
|
|
|
- return jsonify({
|
|
|
- "status_code": 500,
|
|
|
- "detail": "dataset not found",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify(
|
|
|
+ {"status_code": 500, "detail": "dataset not found", "data": {}}
|
|
|
+ )
|
|
|
dataset = datasets[0]
|
|
|
dataset_name = None
|
|
|
if dataset:
|
|
|
- dataset_name = dataset['name']
|
|
|
+ dataset_name = dataset["name"]
|
|
|
res.append(
|
|
|
- {'docId': content_chunk['doc_id'], 'content': content_chunk['text'],
|
|
|
- 'contentSummary': content_chunk['summary'], 'score': result['score'], 'datasetName': dataset_name})
|
|
|
+ {
|
|
|
+ "docId": content_chunk["doc_id"],
|
|
|
+ "content": content_chunk["text"],
|
|
|
+ "contentSummary": content_chunk["summary"],
|
|
|
+ "score": result["score"],
|
|
|
+ "datasetName": dataset_name,
|
|
|
+ }
|
|
|
+ )
|
|
|
|
|
|
chat_classifier = ChatClassifier()
|
|
|
chat_res = await chat_classifier.chat_with_deepseek(query_text, res)
|
|
|
- data = {'results': res, 'chat_res': chat_res['summary']}
|
|
|
- await chat_res_mapper.insert_chat_res(query_text, dataset_id_strs, json.dumps(data, ensure_ascii=False),
|
|
|
- chat_res['summary'], chat_res['relevance_score'])
|
|
|
- return jsonify({'status_code': 200,
|
|
|
- 'detail': "success",
|
|
|
- 'data': data})
|
|
|
+ data = {"results": res, "chat_res": chat_res["summary"]}
|
|
|
+ await chat_res_mapper.insert_chat_res(
|
|
|
+ query_text,
|
|
|
+ dataset_id_strs,
|
|
|
+ json.dumps(data, ensure_ascii=False),
|
|
|
+ chat_res["summary"],
|
|
|
+ chat_res["relevance_score"],
|
|
|
+ )
|
|
|
+ return jsonify({"status_code": 200, "detail": "success", "data": data})
|
|
|
|
|
|
|
|
|
@server_bp.route("/chunk/list", methods=["GET"])
|
|
@@ -416,26 +429,20 @@ async def chunk_list():
|
|
|
page_size = int(request.args.get("pageSize", 10))
|
|
|
doc_id = request.args.get("docId")
|
|
|
if not doc_id:
|
|
|
- return jsonify({
|
|
|
- "status_code": 500,
|
|
|
- "detail": "docId not found",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify({"status_code": 500, "detail": "docId not found", "data": {}})
|
|
|
|
|
|
# 调用 select_contents,获取分页字典
|
|
|
- result = await content_chunk.select_chunk_contents(page_num=page_num, page_size=page_size, doc_id=doc_id)
|
|
|
+ result = await content_chunk.select_chunk_contents(
|
|
|
+ page_num=page_num, page_size=page_size, doc_id=doc_id
|
|
|
+ )
|
|
|
|
|
|
if not result:
|
|
|
- return jsonify({
|
|
|
- "status_code": 500,
|
|
|
- "detail": "chunk is empty",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify({"status_code": 500, "detail": "chunk is empty", "data": {}})
|
|
|
# 格式化 entities,只保留必要字段
|
|
|
entities = [
|
|
|
{
|
|
|
- "id": row['id'],
|
|
|
- "chunk_id": row['chunk_id'],
|
|
|
+ "id": row["id"],
|
|
|
+ "chunk_id": row["chunk_id"],
|
|
|
"doc_id": row["doc_id"],
|
|
|
"summary": row.get("summary") or "",
|
|
|
"text": row.get("text") or "",
|
|
@@ -443,14 +450,16 @@ async def chunk_list():
|
|
|
for row in result["entities"]
|
|
|
]
|
|
|
|
|
|
- return jsonify({
|
|
|
- "status_code": 200,
|
|
|
- "detail": "success",
|
|
|
- "data": {
|
|
|
- "entities": entities,
|
|
|
- "total_count": result["total_count"],
|
|
|
- "page": result["page"],
|
|
|
- "page_size": result["page_size"],
|
|
|
- "total_pages": result["total_pages"]
|
|
|
+ return jsonify(
|
|
|
+ {
|
|
|
+ "status_code": 200,
|
|
|
+ "detail": "success",
|
|
|
+ "data": {
|
|
|
+ "entities": entities,
|
|
|
+ "total_count": result["total_count"],
|
|
|
+ "page": result["page"],
|
|
|
+ "page_size": result["page_size"],
|
|
|
+ "total_pages": result["total_pages"],
|
|
|
+ },
|
|
|
}
|
|
|
- })
|
|
|
+ )
|