Просмотр исходного кода

Revert "Fix timbre problem (#1009)"

This reverts commit 9021a57dceb420fed208f7ad7d9101065e6812c6.
Stardust·减 10 месяцев назад
Родитель
Сommit
343a62b60a
4 измененных файлов с 27 добавлено и 128 удалено
  1. 25 105
      .gitignore
  2. 0 1
      data
  3. 2 20
      fish_speech/models/text2semantic/inference.py
  4. 0 2
      uv.lock

+ 25 - 105
.gitignore

@@ -1,113 +1,33 @@
-# =============================================================================
-# Fish Speech - .gitignore
-# =============================================================================
-
-# Operating System Files
-# -----------------------
 .DS_Store
-.DS_Store?
-._*
-.Spotlight-V100
-.Trashes
-ehthumbs.db
-Thumbs.db
-
-# IDEs and Editors
-# ----------------
-.vscode/
-.idea/
-*.swp
-*.swo
-*~
-
-# Python
-# ------
-__pycache__/
-*.py[cod]
-*$py.class
-*.so
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-
-# Virtual Environments
-# --------------------
-.env
-.venv
-env/
-venv/
-ENV/
-env.bak/
-venv.bak/
-/fishenv/
-
-# Project Dependencies
-# --------------------
+.pgx.*
 .pdm-python
 /fish_speech.egg-info
-
-# Data and Model Files
-# --------------------
-/data/
-/results/
-/checkpoints/
-/references/
-/demo-audios/
-/example/
-filelists/
+__pycache__
+/results
+/data
+/*.test.sh
 *.filelist
-
-# Audio Files
-# -----------
-*.wav
-*.mp3
-*.flac
-*.ogg
-*.m4a
-
-# Data Files
-# ----------
-*.npy
-*.npz
-*.pkl
-*.pickle
-*.lab
+filelists
 /fish_speech/text/cmudict_cache.pickle
-
-# Cache and Temporary Files
-# --------------------------
-/.cache/
-/.gradio/
-/.locale/
-.pgx.*
-*log
-*.log
-
-# External Tools
-# --------------
+/checkpoints
+/.vscode
+/data_server/target
+/*.npy
+/*.wav
+/*.mp3
+/*.lab
+/results
+/data
+/.idea
 ffmpeg.exe
 ffprobe.exe
-/faster_whisper/
-
-# Server Related
-# --------------
-/data_server/target/
-
-# Test Files
-# ----------
-/*.test.sh
 asr-label*
+/.cache
+/fishenv
+/.locale
+/demo-audios
+/references
+/example
+/faster_whisper
+/.gradio
+*log

+ 0 - 1
data

@@ -1 +0,0 @@
-/mnt/users/whaledolphin/data

+ 2 - 20
fish_speech/models/text2semantic/inference.py

@@ -339,7 +339,7 @@ def generate_long(
     temperature: float = 0.8,
     compile: bool = False,
     iterative_prompt: bool = True,
-    chunk_length: int = 512,
+    chunk_length: int = 150,
     prompt_text: Optional[str | list[str]] = None,
     prompt_tokens: Optional[torch.Tensor | list[torch.Tensor]] = None,
 ):
@@ -365,24 +365,6 @@ def generate_long(
     texts = split_text(text, chunk_length) if iterative_prompt else [text]
     max_length = model.config.max_seq_len
 
-    # if use_prompt:
-    #     base_content_sequence.append(
-    #         [
-    #             TextPart(text=prompt_text[0]),
-    #             VQPart(codes=prompt_tokens[0]),
-    #         ],
-    #         add_end=True,
-    #     )
-
-    # for text in texts:
-    #     content_sequence = ContentSequence(modality=None)
-    #     base_content_sequence.append(
-    #         [
-    #             TextPart(text=text),
-    #         ],
-    #         add_end=True,
-    #     )
-
     if use_prompt:
         for t, c in zip(prompt_text, prompt_tokens):
             base_content_sequence.append(
@@ -403,7 +385,7 @@ def generate_long(
 
     encoded = []
     for text in texts:
-        content_sequence = ContentSequence(modality="text")
+        content_sequence = ContentSequence(modality=None)
         content_sequence.append(TextPart(text=text))
         encoded.append(
             content_sequence.encode_for_inference(

+ 0 - 2
uv.lock

@@ -942,7 +942,6 @@ dependencies = [
     { name = "cachetools" },
     { name = "datasets" },
     { name = "descript-audio-codec" },
-    { name = "descript-audiotools" },
     { name = "einops" },
     { name = "einx", extra = ["torch"] },
     { name = "faster-whisper" },
@@ -986,7 +985,6 @@ requires-dist = [
     { name = "cachetools" },
     { name = "datasets", specifier = "==2.18.0" },
     { name = "descript-audio-codec" },
-    { name = "descript-audiotools" },
     { name = "einops", specifier = ">=0.7.0" },
     { name = "einx", extras = ["torch"], specifier = "==0.2.2" },
     { name = "faster-whisper" },