|
@@ -38,21 +38,24 @@ class ChunkEmbeddingTask(TopicAwarePackerV2):
|
|
re_chunk: bool,
|
|
re_chunk: bool,
|
|
) -> List[Chunk]:
|
|
) -> List[Chunk]:
|
|
if re_chunk:
|
|
if re_chunk:
|
|
- flag = await self.content_manager.update_content_info(
|
|
|
|
|
|
+ await self.content_manager.update_content_info(
|
|
doc_id=doc_id,
|
|
doc_id=doc_id,
|
|
text=text,
|
|
text=text,
|
|
text_type=text_type,
|
|
text_type=text_type,
|
|
title=title,
|
|
title=title,
|
|
dataset_id=dataset_id,
|
|
dataset_id=dataset_id,
|
|
)
|
|
)
|
|
|
|
+ flag = True
|
|
else:
|
|
else:
|
|
flag = await self.content_manager.insert_content(
|
|
flag = await self.content_manager.insert_content(
|
|
doc_id, text, text_type, title, dataset_id
|
|
doc_id, text, text_type, title, dataset_id
|
|
)
|
|
)
|
|
|
|
+ print(flag)
|
|
if not flag:
|
|
if not flag:
|
|
return []
|
|
return []
|
|
else:
|
|
else:
|
|
raw_chunks = await self.chunk(text, text_type, dataset_id)
|
|
raw_chunks = await self.chunk(text, text_type, dataset_id)
|
|
|
|
+ print(raw_chunks)
|
|
if not raw_chunks:
|
|
if not raw_chunks:
|
|
await self.content_manager.update_content_status(
|
|
await self.content_manager.update_content_status(
|
|
doc_id=doc_id,
|
|
doc_id=doc_id,
|
|
@@ -229,6 +232,7 @@ class ChunkEmbeddingTask(TopicAwarePackerV2):
|
|
chunks = await self._chunk_each_content(
|
|
chunks = await self._chunk_each_content(
|
|
self.doc_id, text, text_type, title, dataset_id, re_chunk
|
|
self.doc_id, text, text_type, title, dataset_id, re_chunk
|
|
)
|
|
)
|
|
|
|
+ print(chunks)
|
|
if not chunks:
|
|
if not chunks:
|
|
return
|
|
return
|
|
|
|
|