|
@@ -1,5 +1,6 @@
|
|
import asyncio
|
|
import asyncio
|
|
import json
|
|
import json
|
|
|
|
+import os
|
|
import traceback
|
|
import traceback
|
|
import uuid
|
|
import uuid
|
|
from typing import Dict, Any
|
|
from typing import Dict, Any
|
|
@@ -19,8 +20,10 @@ from applications.config import (
|
|
from applications.resource import get_resource_manager
|
|
from applications.resource import get_resource_manager
|
|
from applications.search import HybridSearch
|
|
from applications.search import HybridSearch
|
|
from applications.utils.chat import RAGChatAgent
|
|
from applications.utils.chat import RAGChatAgent
|
|
-from applications.utils.mysql import Dataset, Contents, ContentChunks, ChatResult
|
|
|
|
|
|
+from applications.utils.mysql import Dataset, Contents, ContentChunks, ChatResult, Books
|
|
from applications.api.qwen import QwenClient
|
|
from applications.api.qwen import QwenClient
|
|
|
|
+from applications.utils.oss.oss_client import OSSClient
|
|
|
|
+from applications.utils.pdf.book_extract import book_extract
|
|
from applications.utils.spider.study import study
|
|
from applications.utils.spider.study import study
|
|
|
|
|
|
server_bp = Blueprint("api", __name__, url_prefix="/api")
|
|
server_bp = Blueprint("api", __name__, url_prefix="/api")
|
|
@@ -629,3 +632,90 @@ async def chat_history():
|
|
},
|
|
},
|
|
}
|
|
}
|
|
)
|
|
)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+@server_bp.route("/upload/file", methods=["POST"])
|
|
|
|
+async def upload_pdf():
|
|
|
|
+ # 获取前端上传的文件
|
|
|
|
+ # 先等待 request.files 属性来确保文件已加载
|
|
|
|
+ files = await request.files
|
|
|
|
+
|
|
|
|
+ # 获取文件对象
|
|
|
|
+ file = files.get("file")
|
|
|
|
+
|
|
|
|
+ if file:
|
|
|
|
+ # 检查文件扩展名是否是 .pdf
|
|
|
|
+ if not file.filename.lower().endswith(".pdf"):
|
|
|
|
+ return jsonify(
|
|
|
|
+ {
|
|
|
|
+ "status": "error",
|
|
|
|
+ "message": "Invalid file format. Only PDF files are allowed.",
|
|
|
|
+ }
|
|
|
|
+ ), 400
|
|
|
|
+
|
|
|
|
+ # 获取文件名
|
|
|
|
+ filename = file.filename
|
|
|
|
+ print(filename)
|
|
|
|
+
|
|
|
|
+ book_id = f"book-{uuid.uuid4()}"
|
|
|
|
+ # 检查文件的 MIME 类型是否是 application/pdf
|
|
|
|
+ if file.content_type != "application/pdf":
|
|
|
|
+ return jsonify(
|
|
|
|
+ {
|
|
|
|
+ "status": "error",
|
|
|
|
+ "message": "Invalid MIME type. Only PDF files are allowed.",
|
|
|
|
+ }
|
|
|
|
+ ), 400
|
|
|
|
+
|
|
|
|
+ # 保存到本地(可选,视需要)
|
|
|
|
+ file_path = os.path.join("/tmp", book_id) # 临时存储路径
|
|
|
|
+ await file.save(file_path)
|
|
|
|
+ resource = get_resource_manager()
|
|
|
|
+ books = Books(resource.mysql_client)
|
|
|
|
+ # 上传到 OSS
|
|
|
|
+ try:
|
|
|
|
+ oss_client = OSSClient()
|
|
|
|
+ # 上传文件到 OSS
|
|
|
|
+ oss_path = f"rag/pdfs/{book_id}"
|
|
|
|
+ oss_client.upload_file(file_path, oss_path)
|
|
|
|
+ await books.insert_book(book_id, filename, oss_path)
|
|
|
|
+ # os.remove(file_path)
|
|
|
|
+ return jsonify(
|
|
|
|
+ {
|
|
|
|
+ "status": "success",
|
|
|
|
+ "message": f"File {filename} uploaded successfully to OSS!",
|
|
|
|
+ }
|
|
|
|
+ ), 200
|
|
|
|
+ except Exception as e:
|
|
|
|
+ return jsonify({"status": "error", "message": str(e)}), 500
|
|
|
|
+ else:
|
|
|
|
+ return jsonify({"status": "error", "message": "No file uploaded."}), 400
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+@server_bp.route("/process/book", methods=["GET"])
|
|
|
|
+async def process_book():
|
|
|
|
+ resource = get_resource_manager()
|
|
|
|
+ books_mapper = Books(resource.mysql_client)
|
|
|
|
+ oss_client = OSSClient()
|
|
|
|
+ books = await books_mapper.select_init_books()
|
|
|
|
+ for book in books:
|
|
|
|
+ extract_status = books_mapper.select_book_extract_status(book.get("book_id"))[
|
|
|
|
+ 0
|
|
|
|
+ ]["extract_status"]
|
|
|
|
+ if extract_status == 0:
|
|
|
|
+ await books_mapper.update_book_extract_status(book.get("book_id"), 1)
|
|
|
|
+ book_id = book.get("book_id")
|
|
|
|
+ book_path = os.path.join("/tmp", book.get("book_id"))
|
|
|
|
+ if not os.path.exists(book_path):
|
|
|
|
+ oss_path = f"rag/pdfs/{book_id}"
|
|
|
|
+ oss_client.download_file(oss_path, book_path)
|
|
|
|
+ res = await book_extract(book_path, book_id)
|
|
|
|
+ if res:
|
|
|
|
+ await books_mapper.update_book_extract_result(
|
|
|
|
+ book_id, res.get("results").get(book_id).get("content_list")
|
|
|
|
+ )
|
|
|
|
+ doc_id = f"doc-{uuid.uuid4()}"
|
|
|
|
+ chunk_task = ChunkBooksTask(doc_id=doc_id, resource=resource)
|
|
|
|
+ body = {"book_id": book_id}
|
|
|
|
+ await chunk_task.deal(body)
|
|
|
|
+ return jsonify({"status": "success"})
|