Bladeren bron

Merge branch 'master' of https://git.yishihui.com/Server/rag_server

luojunhui 3 weken geleden
bovenliggende
commit
d0afa20b2f

+ 1 - 2
applications/async_task/chunk_task.py

@@ -267,7 +267,6 @@ class ChunkEmbeddingTask(TopicAwarePackerV2):
 class ChunkBooksTask(ChunkEmbeddingTask):
     """图书类型分块任务"""
 
-    BOOK_PDF_DATASET_ID = 21
     BOOK_PDF_TYPE = 3
 
     async def _process_each_book(self, book_id):
@@ -279,7 +278,7 @@ class ChunkBooksTask(ChunkEmbeddingTask):
         raw_chunks = await self.chunk_books(
             sentence_list=book_texts,
             text_type=self.BOOK_PDF_TYPE,
-            dataset_id=self.BOOK_PDF_DATASET_ID,
+            dataset_id=result[0]["dataset_id"],
         )
         if not raw_chunks:
             await self.content_manager.update_content_status(

+ 5 - 5
applications/utils/mysql/books.py

@@ -12,7 +12,7 @@ class Books(BaseMySQLClient):
 
     async def get_book_extract_detail(self, book_id):
         query = """
-            SELECT book_name, book_oss_path, extract_result FROM books WHERE book_id = %s;
+            SELECT book_name, book_oss_path, extract_result, dataset_id FROM books WHERE book_id = %s;
         """
         return await self.pool.async_fetch(query=query, params=(book_id,))
 
@@ -24,13 +24,13 @@ class Books(BaseMySQLClient):
             query=query, params=(new_status, book_id, ori_status)
         )
 
-    async def insert_book(self, book_id, book_name, book_oss_path, doc_id):
+    async def insert_book(self, book_id, book_name, book_oss_path, doc_id, dataset_id):
         query = """
-            INSERT INTO books (book_id, book_name, book_oss_path, doc_id)
-             VALUES (%s, %s, %s, %s);
+            INSERT INTO books (book_id, book_name, book_oss_path, doc_id, dataset_id)
+             VALUES (%s, %s, %s, %s, %s);
         """
         return await self.pool.async_save(
-            query=query, params=(book_id, book_name, book_oss_path, doc_id)
+            query=query, params=(book_id, book_name, book_oss_path, doc_id, dataset_id)
         )
 
     async def select_init_books(self):

+ 0 - 2
applications/utils/task/async_task.py

@@ -1,10 +1,8 @@
 import asyncio
 import json
 import os
-import uuid
 
 from applications.api import get_basic_embedding
-from applications.api.qwen import QwenClient
 from applications.async_task import ChunkBooksTask
 from applications.config import BASE_MILVUS_SEARCH_PARAMS, DEFAULT_MODEL
 from applications.resource import get_resource_manager

+ 4 - 4
routes/blueprint.py

@@ -524,7 +524,8 @@ async def upload_pdf():
     # 获取前端上传的文件
     # 先等待 request.files 属性来确保文件已加载
     files = await request.files
-
+    form = await request.form  # 这是一个协程对象,需要用 await 等待
+    dataset_id = form.get("dataset_id")  # 获取表单中的 "dataset_id" 参数
     # 获取文件对象
     file = files.get("file")
 
@@ -559,11 +560,10 @@ async def upload_pdf():
             oss_client.upload_file(file_path, oss_path)
             doc_id = f"doc-{uuid.uuid4()}"
             BOOK_PDF_TYPE = 3
-            BOOK_PDF_DATASET_ID = 21
             await content_manager.insert_content(
-                doc_id, oss_path, BOOK_PDF_TYPE, filename, BOOK_PDF_DATASET_ID, None
+                doc_id, oss_path, BOOK_PDF_TYPE, filename, dataset_id, None
             )
-            await books.insert_book(book_id, filename, oss_path, doc_id)
+            await books.insert_book(book_id, filename, oss_path, doc_id, dataset_id)
             return jsonify({"status_code": 200, "detail": "success"})
         except Exception as e:
             return jsonify({"status_code": 500, "detail": str(e)})

+ 1 - 1
vector_app.py

@@ -33,5 +33,5 @@ async def shutdown():
 
 # 注册路由
 from routes import server_bp
-
+app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024
 app.register_blueprint(server_bp)