Explorar o código

新增 dont_chunk模块

luojunhui hai 1 semana
pai
achega
d3b375cec4
Modificáronse 1 ficheiros con 11 adicións e 12 borrados
  1. 11 12
      applications/async_task/chunk_task.py

+ 11 - 12
applications/async_task/chunk_task.py

@@ -1,6 +1,5 @@
 import asyncio
 from typing import List
-from tqdm import tqdm
 
 from applications.api import get_basic_embedding
 from applications.utils.async_utils import run_tasks_with_asyncio_task_group
@@ -233,17 +232,17 @@ class ChunkEmbeddingTask(TopicAwarePackerV2):
             if not chunks:
                 return
 
-            # dev
-            for chunk in tqdm(chunks):
-                await self.save_each_chunk(chunk)
-
-            # await run_tasks_with_asyncio_task_group(
-            #     task_list=chunks,
-            #     handler=self.save_each_chunk,
-            #     description="处理单篇文章分块",
-            #     unit="chunk",
-            #     max_concurrency=10,
-            # )
+            # # dev
+            # for chunk in tqdm(chunks):
+            #     await self.save_each_chunk(chunk)
+
+            await run_tasks_with_asyncio_task_group(
+                task_list=chunks,
+                handler=self.save_each_chunk,
+                description="处理单篇文章分块",
+                unit="chunk",
+                max_concurrency=10,
+            )
 
             await self.content_manager.update_content_status(
                 doc_id=self.doc_id,