Ver Fonte

Merge branch 'feature/xueyiming/2025-10-09-update-query' of Server/rag_server into master

xueyiming há 1 mês atrás
pai
commit
ada16d072a

+ 18 - 0
applications/utils/oss/oss_client.py

@@ -88,3 +88,21 @@ class OSSClient:
             return False
         except Exception as e:
             raise Exception(f"Error checking file existence on OSS: {str(e)}")
+
+    def generate_url(self, oss_file_path, expire=3600, inline=True):
+        if not oss_file_path:
+            return ""
+        try:
+            headers = {}
+            if inline:
+                headers["response-content-disposition"] = "inline"
+            url = self.bucket.sign_url("GET", oss_file_path, expire, headers=headers)
+            return url
+        except Exception as e:
+            raise Exception(f"Error generating URL for OSS file: {str(e)}")
+
+
+if __name__ == "__main__":
+    oss = OSSClient()
+    url = oss.generate_url("rag/pdfs/book-9e1babe8-c8fb-4740-9008-c495837cfbae.pdf")
+    print(url)

+ 2 - 2
applications/utils/task/async_task.py

@@ -34,10 +34,10 @@ async def handle_books():
             if extract_status == 0:
                 # 更新提取状态为处理中
                 await books_mapper.update_book_extract_status(book_id, 1)
-                book_path = os.path.join("/tmp", book_id)
+                book_path = os.path.join("/tmp", book_id + ".pdf")
 
                 if not os.path.exists(book_path):
-                    oss_path = f"rag/pdfs/{book_id}"
+                    oss_path = f"rag/pdfs/{book_id}.pdf"
                     try:
                         # 下载书籍文件
                         await oss_client.download_file(oss_path, book_path)

+ 16 - 14
routes/blueprint.py

@@ -249,7 +249,7 @@ async def get_content():
         return jsonify({"status_code": 404, "detail": "content not found", "data": {}})
 
     row = rows[0]
-
+    oss_client = OSSClient()
     return jsonify(
         {
             "status_code": 200,
@@ -258,6 +258,10 @@ async def get_content():
                 "title": row.get("title", ""),
                 "text": row.get("text", ""),
                 "doc_id": row.get("doc_id", ""),
+                "textType": row.get("text_type"),
+                "url": oss_client.generate_url(row.get("text"))
+                if row.get("text_type") == 3
+                else "",
             },
         }
     )
@@ -291,7 +295,7 @@ async def content_list():
         doc_status=doc_status,
         order_by=order_by,
     )
-
+    oss_client = OSSClient()
     # 格式化 entities,只保留必要字段
     entities = [
         {
@@ -299,6 +303,10 @@ async def content_list():
             "title": row.get("title") or "",
             "text": row.get("text") or "",
             "statusDesc": "可用" if row.get("status") == 2 else "不可用",
+            "textType": row.get("text_type"),
+            "url": oss_client.generate_url(row.get("text"))
+            if row.get("text_type") == 3
+            else "",
         }
         for row in result["entities"]
     ]
@@ -520,11 +528,8 @@ async def upload_pdf():
         # 检查文件扩展名是否是 .pdf
         if not file.filename.lower().endswith(".pdf"):
             return jsonify(
-                {
-                    "status": "error",
-                    "message": "Invalid file format. Only PDF files are allowed.",
-                }
-            ), 400
+                {"status_code": 400, "detail": "Only PDF files are allowed."}
+            )
 
         # 获取文件名
         filename = file.filename
@@ -533,14 +538,11 @@ async def upload_pdf():
         # 检查文件的 MIME 类型是否是 application/pdf
         if file.content_type != "application/pdf":
             return jsonify(
-                {
-                    "status": "error",
-                    "message": "Invalid MIME type. Only PDF files are allowed.",
-                }
-            ), 400
+                {"status_code": 400, "detail": "Only PDF files are allowed."}
+            )
 
         # 保存到本地(可选,视需要)
-        file_path = os.path.join("/tmp", book_id)  # 临时存储路径
+        file_path = os.path.join("/tmp", book_id + ".pdf")  # 临时存储路径
         await file.save(file_path)
         resource = get_resource_manager()
         books = Books(resource.mysql_client)
@@ -548,7 +550,7 @@ async def upload_pdf():
         try:
             oss_client = OSSClient()
             # 上传文件到 OSS
-            oss_path = f"rag/pdfs/{book_id}"
+            oss_path = f"rag/pdfs/{book_id}.pdf"
             oss_client.upload_file(file_path, oss_path)
             await books.insert_book(book_id, filename, oss_path)
             return jsonify({"status_code": 200, "detail": "success"})