Pārlūkot izejas kodu

新增 auto_build_graph task

luojunhui 14 stundas atpakaļ
vecāks
revīzija
01f56aab54

+ 9 - 0
applications/async_task/build_graph.py

@@ -69,3 +69,12 @@ class BuildGraph(AsyncNeo4jRepository):
         chunk_list = await self.get_chunk_list_from_es(doc_id)
         for chunk in chunk_list:
             await self.add_single_chunk(chunk)
+
+    async def deal_batch(self, dataset_id):
+        """async task"""
+        doc_ids = await self.chunk_manager.get_ungraphed_docs(dataset_id)
+        for doc_id in doc_ids:
+            try:
+                await self.deal(doc_id)
+            except Exception as e:
+                print(f"failed to build graph for doc {doc_id}: {e}")

+ 12 - 0
applications/utils/mysql/content_chunks.py

@@ -188,3 +188,15 @@ class ContentChunks(BaseMySQLClient):
             "page_size": page_size,
             "total_pages": total_pages,
         }
+
+    # 获取未建图的文档 id
+    async def get_ungraphed_docs(self, dataset_id) -> list[str]:
+        query = """
+            SELECT DISTINCT doc_id
+            FROM content_chunks 
+            WHERE dataset_id = %s AND status = 1 AND es_status = 2 AND graph_status = 0
+            ORDER BY id DESC LIMIT 100;
+        """
+        result = await self.pool.async_fetch(query=query, params=(dataset_id, ))
+        return [i['doc_id'] for i in result]
+

+ 7 - 1
routes/buleprint.py

@@ -504,13 +504,19 @@ async def delete_task():
     if not doc_id:
         return jsonify({"status_code": 500, "detail": "docId not found", "data": {}})
 
+    dataset_id: str = body.get("dataset_id", 12)
+    batch: bool = body.get("batch_process", False)
+
     resource = get_resource_manager()
     build_graph_task = BuildGraph(
         neo4j=resource.graph_client,
         es_client=resource.es_client,
         mysql_client=resource.mysql_client,
     )
-    await build_graph_task.deal(doc_id)
+    if batch:
+        await build_graph_task.deal_batch(dataset_id)
+    else:
+        await build_graph_task.deal(doc_id)
     return jsonify({"status_code": 200, "detail": "success", "data": {}})