Lengyue 2 лет назад
Родитель
Сommit
4304fab202
2 измененных файлов с 12 добавлено и 4 удалено
  1. 11 3
      fish_speech/text/parser.py
  2. 1 1
      fish_speech/text/symbols.py

+ 11 - 3
fish_speech/text/parser.py

@@ -148,7 +148,13 @@ def parse_unknown_segment(text, order):
         else:
             detected_language = None
 
-            for language in order:
+            _order = order.copy()
+            if last_language is not None:
+                # Prioritize the last language
+                _order.remove(last_language)
+                _order.insert(0, last_language)
+
+            for language in _order:
                 for start, end in language_unicode_range_map[language]:
                     if start <= ord(char) <= end:
                         detected_language = language
@@ -208,5 +214,7 @@ if __name__ == "__main__":
     )
     print(segments)
 
-    ids = segments_to_ids(segments)
-    print(ids)
+    segments = parse_text_to_segments(
+        "毕业然后复活卡b站推荐bug加流量。Hugging face, BGM 声音很大吗?那我改一下Ё。君の虜になってしまえばきっと"  # noqa: E501
+    )
+    print(segments)

+ 1 - 1
fish_speech/text/symbols.py

@@ -52,7 +52,7 @@ symbols_to_id = {s: i for i, s in enumerate(symbols)}
 language_id_map = {pad: 0, "ZH": 1, "JP": 2, "EN": 3}
 language_unicode_range_map = {
     "ZH": [(0x4E00, 0x9FFF)],
-    "JP": [(0x3040, 0x309F), (0x30A0, 0x30FF), (0x31F0, 0x31FF)],
+    "JP": [(0x4E00, 0x9FFF), (0x3040, 0x309F), (0x30A0, 0x30FF), (0x31F0, 0x31FF)],
     "EN": [(0x0000, 0x007F)],
 }
 num_languages = len(language_id_map.keys())