import re import nltk from typing import List from langchain.text_splitter import RecursiveCharacterTextSplitter class SplitTextIntoSentences: @staticmethod def nltk_sent_tokenize(text: str) -> List[str]: """especially for English""" return [s.strip() for s in nltk.sent_tokenize(text) if s.strip()] @staticmethod def lang_chain_tokenize(text: str) -> List[str]: splitter = RecursiveCharacterTextSplitter(chunk_size=128, chunk_overlap=16) docs = splitter.split_text(text) return docs