Selaa lähdekoodia

optimize speaker format

Lengyue 1 vuosi sitten
vanhempi
commit
4640ea7320
2 muutettua tiedostoa jossa 4 lisäystä ja 4 poistoa
  1. 2 2
      fish_speech/datasets/text.py
  2. 2 2
      tools/llama/generate.py

+ 2 - 2
fish_speech/datasets/text.py

@@ -441,10 +441,10 @@ class AutoAugTextDataset(IterableDataset):
         add_bos: bool = True,
     ):
         if speaker is not None:
-            sentences = [f"[SPK: {speaker}]"] + sentences
+            speaker = "assistant"
 
         final_text = "<|im_start|>user<|im_sep|>" + " ".join(sentences) + "<|im_end|>"
-        final_text = final_text + "<|im_start|>assistant<|im_sep|>"
+        final_text = final_text + f"<|im_start|>{speaker}<|im_sep|>"
 
         encoded = self.tokenizer.encode(
             final_text,

+ 2 - 2
tools/llama/generate.py

@@ -292,10 +292,10 @@ def encode_tokens(
     string = clean_text(string)
 
     if speaker is not None:
-        string = f"[SPK: {speaker}] {string}"
+        speaker = "assistant"
 
     string = (
-        f"<|im_start|>user<|im_sep|>{string}<|im_end|><|im_start|>assistant<|im_sep|>"
+        f"<|im_start|>user<|im_sep|>{string}<|im_end|><|im_start|>{speaker}<|im_sep|>"
     )
     if bos:
         string = f"<|begin_of_sequence|>{string}"