1 год назад · 343a62b60a
--- a/.gitignore
+++ b/.gitignore
@@ -1,113 +1,33 @@
 
				-# =============================================================================
			
 
				-# Fish Speech - .gitignore
			
 
				-# =============================================================================
			
 
				-
			
 
				-# Operating System Files
			
 
				-# -----------------------
			
 
				 .DS_Store
			
 
				-.DS_Store?
			
 
				-._*
			
 
				-.Spotlight-V100
			
 
				-.Trashes
			
 
				-ehthumbs.db
			
 
				-Thumbs.db
			
 
				-
			
 
				-# IDEs and Editors
			
 
				-# ----------------
			
 
				-.vscode/
			
 
				-.idea/
			
 
				-*.swp
			
 
				-*.swo
			
 
				-*~
			
 
				-
			
 
				-# Python
			
 
				-# ------
			
 
				-__pycache__/
			
 
				-*.py[cod]
			
 
				-*$py.class
			
 
				-*.so
			
 
				-.Python
			
 
				-build/
			
 
				-develop-eggs/
			
 
				-dist/
			
 
				-downloads/
			
 
				-eggs/
			
 
				-.eggs/
			
 
				-lib/
			
 
				-lib64/
			
 
				-parts/
			
 
				-sdist/
			
 
				-var/
			
 
				-wheels/
			
 
				-*.egg-info/
			
 
				-.installed.cfg
			
 
				-*.egg
			
 
				-MANIFEST
			
 
				-
			
 
				-# Virtual Environments
			
 
				-# --------------------
			
 
				-.env
			
 
				-.venv
			
 
				-env/
			
 
				-venv/
			
 
				-ENV/
			
 
				-env.bak/
			
 
				-venv.bak/
			
 
				-/fishenv/
			
 
				-
			
 
				-# Project Dependencies
			
 
				-# --------------------
			
 
				+.pgx.*
			
 
				 .pdm-python
			
 
				 /fish_speech.egg-info
			
 
				-
			
 
				-# Data and Model Files
			
 
				-# --------------------
			
 
				-/data/
			
 
				-/results/
			
 
				-/checkpoints/
			
 
				-/references/
			
 
				-/demo-audios/
			
 
				-/example/
			
 
				-filelists/
			
 
				+__pycache__
			
 
				+/results
			
 
				+/data
			
 
				+/*.test.sh
			
 
				 *.filelist
			
 
				-
			
 
				-# Audio Files
			
 
				-# -----------
			
 
				-*.wav
			
 
				-*.mp3
			
 
				-*.flac
			
 
				-*.ogg
			
 
				-*.m4a
			
 
				-
			
 
				-# Data Files
			
 
				-# ----------
			
 
				-*.npy
			
 
				-*.npz
			
 
				-*.pkl
			
 
				-*.pickle
			
 
				-*.lab
			
 
				+filelists
			
 
				 /fish_speech/text/cmudict_cache.pickle
			
 
				-
			
 
				-# Cache and Temporary Files
			
 
				-# --------------------------
			
 
				-/.cache/
			
 
				-/.gradio/
			
 
				-/.locale/
			
 
				-.pgx.*
			
 
				-*log
			
 
				-*.log
			
 
				-
			
 
				-# External Tools
			
 
				-# --------------
			
 
				+/checkpoints
			
 
				+/.vscode
			
 
				+/data_server/target
			
 
				+/*.npy
			
 
				+/*.wav
			
 
				+/*.mp3
			
 
				+/*.lab
			
 
				+/results
			
 
				+/data
			
 
				+/.idea
			
 
				 ffmpeg.exe
			
 
				 ffprobe.exe
			
 
				-/faster_whisper/
			
 
				-
			
 
				-# Server Related
			
 
				-# --------------
			
 
				-/data_server/target/
			
 
				-
			
 
				-# Test Files
			
 
				-# ----------
			
 
				-/*.test.sh
			
 
				 asr-label*
			
 
				+/.cache
			
 
				+/fishenv
			
 
				+/.locale
			
 
				+/demo-audios
			
 
				+/references
			
 
				+/example
			
 
				+/faster_whisper
			
 
				+/.gradio
			
 
				+*log
			
--- a/data
+++ b/data
@@ -1 +0,0 @@
 
				-/mnt/users/whaledolphin/data
			
--- a/fish_speech/models/text2semantic/inference.py
+++ b/fish_speech/models/text2semantic/inference.py
@@ -339,7 +339,7 @@ def generate_long(
 
				     temperature: float = 0.8,
			
 
				     compile: bool = False,
			
 
				     iterative_prompt: bool = True,
			
 
				-    chunk_length: int = 512,
			
 
				+    chunk_length: int = 150,
			
 
				     prompt_text: Optional[str | list[str]] = None,
			
 
				     prompt_tokens: Optional[torch.Tensor | list[torch.Tensor]] = None,
			
 
				 ):
			
@@ -365,24 +365,6 @@ def generate_long(
 
				     texts = split_text(text, chunk_length) if iterative_prompt else [text]
			
 
				     max_length = model.config.max_seq_len
			
 
				 
			
 
				-    # if use_prompt:
			
 
				-    #     base_content_sequence.append(
			
 
				-    #         [
			
 
				-    #             TextPart(text=prompt_text[0]),
			
 
				-    #             VQPart(codes=prompt_tokens[0]),
			
 
				-    #         ],
			
 
				-    #         add_end=True,
			
 
				-    #     )
			
 
				-
			
 
				-    # for text in texts:
			
 
				-    #     content_sequence = ContentSequence(modality=None)
			
 
				-    #     base_content_sequence.append(
			
 
				-    #         [
			
 
				-    #             TextPart(text=text),
			
 
				-    #         ],
			
 
				-    #         add_end=True,
			
 
				-    #     )
			
 
				-
			
 
				     if use_prompt:
			
 
				         for t, c in zip(prompt_text, prompt_tokens):
			
 
				             base_content_sequence.append(
			
@@ -403,7 +385,7 @@ def generate_long(
 
				 
			
 
				     encoded = []
			
 
				     for text in texts:
			
 
				-        content_sequence = ContentSequence(modality="text")
			
 
				+        content_sequence = ContentSequence(modality=None)
			
 
				         content_sequence.append(TextPart(text=text))
			
 
				         encoded.append(
			
 
				             content_sequence.encode_for_inference(
			
--- a/uv.lock
+++ b/uv.lock
@@ -942,7 +942,6 @@ dependencies = [
 
				     { name = "cachetools" },
			
 
				     { name = "datasets" },
			
 
				     { name = "descript-audio-codec" },
			
 
				-    { name = "descript-audiotools" },
			
 
				     { name = "einops" },
			
 
				     { name = "einx", extra = ["torch"] },
			
 
				     { name = "faster-whisper" },
			
@@ -986,7 +985,6 @@ requires-dist = [
 
				     { name = "cachetools" },
			
 
				     { name = "datasets", specifier = "==2.18.0" },
			
 
				     { name = "descript-audio-codec" },
			
 
				-    { name = "descript-audiotools" },
			
 
				     { name = "einops", specifier = ">=0.7.0" },
			
 
				     { name = "einx", extras = ["torch"], specifier = "==0.2.2" },
			
 
				     { name = "faster-whisper" },