Bläddra i källkod

新增 requirements的包

luojunhui 1 vecka sedan
förälder
incheckning
2bec342945

+ 1 - 1
applications/utils/neo4j/models.py

@@ -40,7 +40,7 @@ MERGE (gc:GraphChunk {milvus_id: $milvus_id})
       gc.domain    = $domain,
       gc.text_type = $text_type,
       gc.task_type = $task_type,
-      gc.doc_id    = $doc_id
+      gc.doc_id    = $doc_id,
       gc.display_name = $chunk_id + "--" + $doc_id
 
 MERGE (gc)-[:BELONGS_TO]->(d)

+ 0 - 17
applications/utils/nlp/split_text_into_sentences.py

@@ -1,6 +1,5 @@
 import re
 import nltk
-import jieba
 
 from typing import List
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -12,22 +11,6 @@ class SplitTextIntoSentences:
         """especially for English"""
         return [s.strip() for s in nltk.sent_tokenize(text) if s.strip()]
 
-    @staticmethod
-    def jieba_sent_tokenize(text: str) -> List[str]:
-        """especially for Chinese"""
-        words = list(jieba.cut(text))
-        sentence_list: List = []
-        buf = ""
-        for w in words:
-            buf += w
-            if re.match(r"[。!?!?;;…]", w):  # 遇到标点就断句
-                sentence_list.append(buf.strip())
-                buf = ""
-
-        if buf.strip():
-            sentence_list.append(buf.strip())
-        return sentence_list
-
     @staticmethod
     def lang_chain_tokenize(text: str) -> List[str]:
         splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=10)

+ 0 - 2
vector_app.py

@@ -1,4 +1,3 @@
-import jieba
 from quart import Quart
 
 from applications.config import LOCAL_MODEL_CONFIG, DEFAULT_MODEL
@@ -23,7 +22,6 @@ resource_manager = init_resource_manager(
 async def startup():
     await resource_manager.startup()
     print("Resource manager is ready.")
-    jieba.initialize()
     print("Jieba dictionary loaded successfully")