1 vecka sedan · 2bec342945
--- a/applications/utils/neo4j/models.py
+++ b/applications/utils/neo4j/models.py
@@ -40,7 +40,7 @@ MERGE (gc:GraphChunk {milvus_id: $milvus_id})
 
				       gc.domain    = $domain,
			
 
				       gc.text_type = $text_type,
			
 
				       gc.task_type = $task_type,
			
 
				-      gc.doc_id    = $doc_id
			
 
				+      gc.doc_id    = $doc_id,
			
 
				       gc.display_name = $chunk_id + "--" + $doc_id
			
 
				 
			
 
				 MERGE (gc)-[:BELONGS_TO]->(d)
			
--- a/applications/utils/nlp/split_text_into_sentences.py
+++ b/applications/utils/nlp/split_text_into_sentences.py
@@ -1,6 +1,5 @@
 
				 import re
			
 
				 import nltk
			
 
				-import jieba
			
 
				 
			
 
				 from typing import List
			
 
				 from langchain.text_splitter import RecursiveCharacterTextSplitter
			
@@ -12,22 +11,6 @@ class SplitTextIntoSentences:
 
				         """especially for English"""
			
 
				         return [s.strip() for s in nltk.sent_tokenize(text) if s.strip()]
			
 
				 
			
 
				-    @staticmethod
			
 
				-    def jieba_sent_tokenize(text: str) -> List[str]:
			
 
				-        """especially for Chinese"""
			
 
				-        words = list(jieba.cut(text))
			
 
				-        sentence_list: List = []
			
 
				-        buf = ""
			
 
				-        for w in words:
			
 
				-            buf += w
			
 
				-            if re.match(r"[。！？!?；;…]", w):  # 遇到标点就断句
			
 
				-                sentence_list.append(buf.strip())
			
 
				-                buf = ""
			
 
				-
			
 
				-        if buf.strip():
			
 
				-            sentence_list.append(buf.strip())
			
 
				-        return sentence_list
			
 
				-
			
 
				     @staticmethod
			
 
				     def lang_chain_tokenize(text: str) -> List[str]:
			
 
				         splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)
			
--- a/vector_app.py
+++ b/vector_app.py
@@ -1,4 +1,3 @@
 
				-import jieba
			
 
				 from quart import Quart
			
 
				 
			
 
				 from applications.config import LOCAL_MODEL_CONFIG, DEFAULT_MODEL
			
@@ -23,7 +22,6 @@ resource_manager = init_resource_manager(
 
				 async def startup():
			
 
				     await resource_manager.startup()
			
 
				     print("Resource manager is ready.")
			
 
				-    jieba.initialize()
			
 
				     print("Jieba dictionary loaded successfully")