|
@@ -1,6 +1,5 @@
|
|
import asyncio
|
|
import asyncio
|
|
from typing import List
|
|
from typing import List
|
|
-from tqdm import tqdm
|
|
|
|
|
|
|
|
from applications.api import get_basic_embedding
|
|
from applications.api import get_basic_embedding
|
|
from applications.utils.async_utils import run_tasks_with_asyncio_task_group
|
|
from applications.utils.async_utils import run_tasks_with_asyncio_task_group
|
|
@@ -233,17 +232,17 @@ class ChunkEmbeddingTask(TopicAwarePackerV2):
|
|
if not chunks:
|
|
if not chunks:
|
|
return
|
|
return
|
|
|
|
|
|
- # dev
|
|
|
|
- for chunk in tqdm(chunks):
|
|
|
|
- await self.save_each_chunk(chunk)
|
|
|
|
-
|
|
|
|
- # await run_tasks_with_asyncio_task_group(
|
|
|
|
- # task_list=chunks,
|
|
|
|
- # handler=self.save_each_chunk,
|
|
|
|
- # description="处理单篇文章分块",
|
|
|
|
- # unit="chunk",
|
|
|
|
- # max_concurrency=10,
|
|
|
|
- # )
|
|
|
|
|
|
+ # # dev
|
|
|
|
+ # for chunk in tqdm(chunks):
|
|
|
|
+ # await self.save_each_chunk(chunk)
|
|
|
|
+
|
|
|
|
+ await run_tasks_with_asyncio_task_group(
|
|
|
|
+ task_list=chunks,
|
|
|
|
+ handler=self.save_each_chunk,
|
|
|
|
+ description="处理单篇文章分块",
|
|
|
|
+ unit="chunk",
|
|
|
|
+ max_concurrency=10,
|
|
|
|
+ )
|
|
|
|
|
|
await self.content_manager.update_content_status(
|
|
await self.content_manager.update_content_status(
|
|
doc_id=self.doc_id,
|
|
doc_id=self.doc_id,
|