Bläddra i källkod

新增 rechunk 功能

luojunhui 2 veckor sedan
förälder
incheckning
0cce682c7f

+ 0 - 3
applications/async_task/chunk_task.py

@@ -50,12 +50,10 @@ class ChunkEmbeddingTask(TopicAwarePackerV2):
             flag = await self.content_manager.insert_content(
                 doc_id, text, text_type, title, dataset_id
             )
-        print(flag)
         if not flag:
             return []
         else:
             raw_chunks = await self.chunk(text, text_type, dataset_id)
-            print(raw_chunks)
             if not raw_chunks:
                 await self.content_manager.update_content_status(
                     doc_id=doc_id,
@@ -232,7 +230,6 @@ class ChunkEmbeddingTask(TopicAwarePackerV2):
             chunks = await self._chunk_each_content(
                 self.doc_id, text, text_type, title, dataset_id, re_chunk
             )
-            print(chunks)
             if not chunks:
                 return
 

+ 1 - 1
applications/utils/chunks/topic_aware_chunking.py

@@ -37,7 +37,7 @@ class TopicAwareChunker(BoundaryDetector, SplitTextIntoSentences):
             return {}
 
         sentences_embeddings = await self._encode_batch(sentence_list)
-        boundaries = self.detect_boundaries(sentence_list, sentences_embeddings, True)
+        boundaries = self.detect_boundaries(sentence_list, sentences_embeddings)
         return {
             "sentence_list": sentence_list,
             "boundaries": boundaries,