Parcourir la source

性能优化,优化计算 token 的 cpu 开销

luojunhui il y a 1 semaine
Parent
commit
9f2fd7770f
1 fichiers modifiés avec 1 ajouts et 1 suppressions
  1. 1 1
      applications/utils/nlp/split_text_into_sentences.py

+ 1 - 1
applications/utils/nlp/split_text_into_sentences.py

@@ -13,6 +13,6 @@ class SplitTextIntoSentences:
 
     @staticmethod
     def lang_chain_tokenize(text: str) -> List[str]:
-        splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
+        splitter = RecursiveCharacterTextSplitter(chunk_size=64, chunk_overlap=16)
         docs = splitter.split_text(text)
         return docs