|
@@ -161,11 +161,7 @@ async def dataset_list():
|
|
|
for dataset, count in zip(datasets, counts)
|
|
|
]
|
|
|
|
|
|
- return jsonify({
|
|
|
- "status_code": 200,
|
|
|
- "detail": "success",
|
|
|
- "data": data_list
|
|
|
- })
|
|
|
+ return jsonify({"status_code": 200, "detail": "success", "data": data_list})
|
|
|
|
|
|
|
|
|
@server_bp.route("/dataset/add", methods=["POST"])
|
|
@@ -176,16 +172,10 @@ async def add_dataset():
|
|
|
body = await request.get_json()
|
|
|
name = body.get("name")
|
|
|
if not name:
|
|
|
- return jsonify({
|
|
|
- "status_code": 400,
|
|
|
- "detail": "name is required"
|
|
|
- })
|
|
|
+ return jsonify({"status_code": 400, "detail": "name is required"})
|
|
|
# 执行新增
|
|
|
await dataset.add_dataset(name)
|
|
|
- return jsonify({
|
|
|
- "status_code": 200,
|
|
|
- "detail": "success"
|
|
|
- })
|
|
|
+ return jsonify({"status_code": 200, "detail": "success"})
|
|
|
|
|
|
|
|
|
@server_bp.route("/content/get", methods=["GET"])
|
|
@@ -196,33 +186,27 @@ async def get_content():
|
|
|
# 获取请求参数
|
|
|
doc_id = request.args.get("docId")
|
|
|
if not doc_id:
|
|
|
- return jsonify({
|
|
|
- "status_code": 400,
|
|
|
- "detail": "doc_id is required",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify({"status_code": 400, "detail": "doc_id is required", "data": {}})
|
|
|
|
|
|
# 查询内容
|
|
|
rows = await contents.select_content_by_doc_id(doc_id)
|
|
|
|
|
|
if not rows:
|
|
|
- return jsonify({
|
|
|
- "status_code": 404,
|
|
|
- "detail": "content not found",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify({"status_code": 404, "detail": "content not found", "data": {}})
|
|
|
|
|
|
row = rows[0]
|
|
|
|
|
|
- return jsonify({
|
|
|
- "status_code": 200,
|
|
|
- "detail": "success",
|
|
|
- "data": {
|
|
|
- "title": row.get("title", ""),
|
|
|
- "text": row.get("text", ""),
|
|
|
- "doc_id": row.get("doc_id", "")
|
|
|
+ return jsonify(
|
|
|
+ {
|
|
|
+ "status_code": 200,
|
|
|
+ "detail": "success",
|
|
|
+ "data": {
|
|
|
+ "title": row.get("title", ""),
|
|
|
+ "text": row.get("text", ""),
|
|
|
+ "doc_id": row.get("doc_id", ""),
|
|
|
+ },
|
|
|
}
|
|
|
- })
|
|
|
+ )
|
|
|
|
|
|
|
|
|
@server_bp.route("/content/list", methods=["GET"])
|
|
@@ -238,6 +222,7 @@ async def content_list():
|
|
|
|
|
|
# order_by 可以用 JSON 字符串传递
|
|
|
import json
|
|
|
+
|
|
|
order_by_str = request.args.get("order_by", '{"id":"desc"}')
|
|
|
try:
|
|
|
order_by = json.loads(order_by_str)
|
|
@@ -263,22 +248,33 @@ async def content_list():
|
|
|
for row in result["entities"]
|
|
|
]
|
|
|
|
|
|
- return jsonify({
|
|
|
- "status_code": 200,
|
|
|
- "detail": "success",
|
|
|
- "data": {
|
|
|
- "entities": entities,
|
|
|
- "total_count": result["total_count"],
|
|
|
- "page": result["page"],
|
|
|
- "page_size": result["page_size"],
|
|
|
- "total_pages": result["total_pages"]
|
|
|
+ return jsonify(
|
|
|
+ {
|
|
|
+ "status_code": 200,
|
|
|
+ "detail": "success",
|
|
|
+ "data": {
|
|
|
+ "entities": entities,
|
|
|
+ "total_count": result["total_count"],
|
|
|
+ "page": result["page"],
|
|
|
+ "page_size": result["page_size"],
|
|
|
+ "total_pages": result["total_pages"],
|
|
|
+ },
|
|
|
}
|
|
|
- })
|
|
|
+ )
|
|
|
|
|
|
|
|
|
-async def query_search(query_text, filters=None, search_type='', anns_field='vector_text',
|
|
|
- search_params=BASE_MILVUS_SEARCH_PARAMS, _source=False, es_size=10000, sort_by=None,
|
|
|
- milvus_size=20, limit=10):
|
|
|
+async def query_search(
|
|
|
+ query_text,
|
|
|
+ filters=None,
|
|
|
+ search_type="",
|
|
|
+ anns_field="vector_text",
|
|
|
+ search_params=BASE_MILVUS_SEARCH_PARAMS,
|
|
|
+ _source=False,
|
|
|
+ es_size=10000,
|
|
|
+ sort_by=None,
|
|
|
+ milvus_size=20,
|
|
|
+ limit=10,
|
|
|
+):
|
|
|
if filters is None:
|
|
|
filters = {}
|
|
|
query_vector = await get_basic_embedding(text=query_text, model=DEFAULT_MODEL)
|
|
@@ -320,40 +316,46 @@ async def query():
|
|
|
query_text = request.args.get("query")
|
|
|
dataset_ids = request.args.get("datasetIds").split(",")
|
|
|
search_type = request.args.get("search_type", "hybrid")
|
|
|
- query_results = await query_search(query_text=query_text, filters={"dataset_id": dataset_ids},
|
|
|
- search_type=search_type)
|
|
|
+ query_results = await query_search(
|
|
|
+ query_text=query_text,
|
|
|
+ filters={"dataset_id": dataset_ids},
|
|
|
+ search_type=search_type,
|
|
|
+ )
|
|
|
resource = get_resource_manager()
|
|
|
content_chunk_mapper = ContentChunks(resource.mysql_client)
|
|
|
dataset_mapper = Dataset(resource.mysql_client)
|
|
|
res = []
|
|
|
- for result in query_results['results']:
|
|
|
- content_chunks = await content_chunk_mapper.select_chunk_content(doc_id=result['doc_id'],
|
|
|
- chunk_id=result['chunk_id'])
|
|
|
+ for result in query_results["results"]:
|
|
|
+ content_chunks = await content_chunk_mapper.select_chunk_content(
|
|
|
+ doc_id=result["doc_id"], chunk_id=result["chunk_id"]
|
|
|
+ )
|
|
|
if not content_chunks:
|
|
|
- return jsonify({
|
|
|
- "status_code": 500,
|
|
|
- "detail": "content_chunk not found",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify(
|
|
|
+ {"status_code": 500, "detail": "content_chunk not found", "data": {}}
|
|
|
+ )
|
|
|
content_chunk = content_chunks[0]
|
|
|
- datasets = await dataset_mapper.select_dataset_by_id(content_chunk['dataset_id'])
|
|
|
+ datasets = await dataset_mapper.select_dataset_by_id(
|
|
|
+ content_chunk["dataset_id"]
|
|
|
+ )
|
|
|
if not datasets:
|
|
|
- return jsonify({
|
|
|
- "status_code": 500,
|
|
|
- "detail": "dataset not found",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify(
|
|
|
+ {"status_code": 500, "detail": "dataset not found", "data": {}}
|
|
|
+ )
|
|
|
dataset = datasets[0]
|
|
|
dataset_name = None
|
|
|
if dataset:
|
|
|
- dataset_name = dataset['name']
|
|
|
+ dataset_name = dataset["name"]
|
|
|
res.append(
|
|
|
- {'docId': content_chunk['doc_id'], 'content': content_chunk['text'],
|
|
|
- 'contentSummary': content_chunk['summary'], 'score': result['score'], 'datasetName': dataset_name})
|
|
|
- data = {'results': res}
|
|
|
- return jsonify({'status_code': 200,
|
|
|
- 'detail': "success",
|
|
|
- 'data': data})
|
|
|
+ {
|
|
|
+ "docId": content_chunk["doc_id"],
|
|
|
+ "content": content_chunk["text"],
|
|
|
+ "contentSummary": content_chunk["summary"],
|
|
|
+ "score": result["score"],
|
|
|
+ "datasetName": dataset_name,
|
|
|
+ }
|
|
|
+ )
|
|
|
+ data = {"results": res}
|
|
|
+ return jsonify({"status_code": 200, "detail": "success", "data": data})
|
|
|
|
|
|
|
|
|
@server_bp.route("/chat", methods=["GET"])
|
|
@@ -361,40 +363,46 @@ async def chat():
|
|
|
query_text = request.args.get("query")
|
|
|
dataset_ids = request.args.get("datasetIds").split(",")
|
|
|
search_type = request.args.get("search_type", "hybrid")
|
|
|
- query_results = await query_search(query_text=query_text, filters={"dataset_id": dataset_ids},
|
|
|
- search_type=search_type)
|
|
|
+ query_results = await query_search(
|
|
|
+ query_text=query_text,
|
|
|
+ filters={"dataset_id": dataset_ids},
|
|
|
+ search_type=search_type,
|
|
|
+ )
|
|
|
resource = get_resource_manager()
|
|
|
content_chunk_mapper = ContentChunks(resource.mysql_client)
|
|
|
dataset_mapper = Dataset(resource.mysql_client)
|
|
|
res = []
|
|
|
- for result in query_results['results']:
|
|
|
- content_chunks = await content_chunk_mapper.select_chunk_content(doc_id=result['doc_id'],
|
|
|
- chunk_id=result['chunk_id'])
|
|
|
+ for result in query_results["results"]:
|
|
|
+ content_chunks = await content_chunk_mapper.select_chunk_content(
|
|
|
+ doc_id=result["doc_id"], chunk_id=result["chunk_id"]
|
|
|
+ )
|
|
|
if not content_chunks:
|
|
|
- return jsonify({
|
|
|
- "status_code": 500,
|
|
|
- "detail": "content_chunk not found",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify(
|
|
|
+ {"status_code": 500, "detail": "content_chunk not found", "data": {}}
|
|
|
+ )
|
|
|
content_chunk = content_chunks[0]
|
|
|
- datasets = await dataset_mapper.select_dataset_by_id(content_chunk['dataset_id'])
|
|
|
+ datasets = await dataset_mapper.select_dataset_by_id(
|
|
|
+ content_chunk["dataset_id"]
|
|
|
+ )
|
|
|
if not datasets:
|
|
|
- return jsonify({
|
|
|
- "status_code": 500,
|
|
|
- "detail": "dataset not found",
|
|
|
- "data": {}
|
|
|
- })
|
|
|
+ return jsonify(
|
|
|
+ {"status_code": 500, "detail": "dataset not found", "data": {}}
|
|
|
+ )
|
|
|
dataset = datasets[0]
|
|
|
dataset_name = None
|
|
|
if dataset:
|
|
|
- dataset_name = dataset['name']
|
|
|
+ dataset_name = dataset["name"]
|
|
|
res.append(
|
|
|
- {'docId': content_chunk['doc_id'], 'content': content_chunk['text'],
|
|
|
- 'contentSummary': content_chunk['summary'], 'score': result['score'], 'datasetName': dataset_name})
|
|
|
+ {
|
|
|
+ "docId": content_chunk["doc_id"],
|
|
|
+ "content": content_chunk["text"],
|
|
|
+ "contentSummary": content_chunk["summary"],
|
|
|
+ "score": result["score"],
|
|
|
+ "datasetName": dataset_name,
|
|
|
+ }
|
|
|
+ )
|
|
|
|
|
|
chat_classifier = ChatClassifier()
|
|
|
chat_res = await chat_classifier.chat_with_deepseek(query_text, res)
|
|
|
- data = {'results': res, 'chat_res': chat_res}
|
|
|
- return jsonify({'status_code': 200,
|
|
|
- 'detail': "success",
|
|
|
- 'data': data})
|
|
|
+ data = {"results": res, "chat_res": chat_res}
|
|
|
+ return jsonify({"status_code": 200, "detail": "success", "data": data})
|