Просмотр исходного кода

replace cn2an with WeTextProcessing for text normalization (#33)

faceair 2 лет назад
Родитель
Сommit
597970a1a5
2 измененных файлов с 4 добавлено и 7 удалено
  1. 3 6
      fish_speech/text/chinese.py
  2. 1 1
      pyproject.toml

+ 3 - 6
fish_speech/text/chinese.py

@@ -1,9 +1,9 @@
 import os
 import re
 
-import cn2an
 import jieba.posseg as psg
 from pypinyin import Style, lazy_pinyin
+from tn.chinese.normalizer import Normalizer
 
 from fish_speech.text.symbols import punctuation
 from fish_speech.text.tone_sandhi import ToneSandhi
@@ -16,7 +16,7 @@ pinyin_to_symbol_map = {
     for line in open(OPENCPOP_DICT_PATH).readlines()
 }
 
-
+normalizer = Normalizer()
 tone_modifier = ToneSandhi()
 
 
@@ -123,10 +123,7 @@ def _g2p(segments):
 
 
 def text_normalize(text):
-    numbers = re.findall(r"\d+(?:\.?\d+)?", text)
-    for number in numbers:
-        text = text.replace(number, cn2an.an2cn(number), 1)
-    return text
+    return normalizer.normalize(text)
 
 
 if __name__ == "__main__":

+ 1 - 1
pyproject.toml

@@ -24,7 +24,6 @@ dependencies = [
     "vector-quantize-pytorch>=1.10.0",
     "rich>=13.5.3",
     "gradio>=4.0.0",
-    "cn2an>=0.5.22",
     "pypinyin>=0.49.0",
     "jieba>=0.42.1",
     "g2p-en>=2.1.0",
@@ -34,6 +33,7 @@ dependencies = [
     "kui>=1.6.0",
     "zibai-server>=0.9.0",
     "loguru>=0.6.0",
+    "WeTextProcessing>=0.1.10",
 ]
 
 [project.optional-dependencies]