Pārlūkot izejas kodu

update deps & remove audios

Lengyue 1 gadu atpakaļ
vecāks
revīzija
6e88efb5d8

BIN
docs/assets/audios/0_input.wav


BIN
docs/assets/audios/0_output.wav


BIN
docs/assets/audios/10_output.wav


BIN
docs/assets/audios/11_output.wav


BIN
docs/assets/audios/1_input.wav


BIN
docs/assets/audios/1_output.wav


BIN
docs/assets/audios/2_input.wav


BIN
docs/assets/audios/2_output.wav


BIN
docs/assets/audios/3_output.wav


BIN
docs/assets/audios/4_output.wav


BIN
docs/assets/audios/5_output.wav


BIN
docs/assets/audios/6_input.wav


BIN
docs/assets/audios/6_output.wav


BIN
docs/assets/audios/7_input.wav


BIN
docs/assets/audios/7_output.wav


BIN
docs/assets/audios/8_output.wav


BIN
docs/assets/audios/9_output.wav


BIN
generated_audios/v1-sft/zh/out_4.wav


BIN
generated_audios/v1-sft/zh/out_5.wav


+ 2 - 2
tools/api.py

@@ -193,10 +193,10 @@ def parse_args():
     parser.add_argument(
         "--llama-checkpoint-path",
         type=str,
-        default="checkpoints/text2semantic-medium-v1-2k.pth",
+        default="checkpoints/text2semantic-sft-large-v1-4k.pth",
     )
     parser.add_argument(
-        "--llama-config-name", type=str, default="dual_ar_2_codebook_medium"
+        "--llama-config-name", type=str, default="dual_ar_2_codebook_large"
     )
     parser.add_argument(
         "--vqgan-checkpoint-path",

+ 6 - 5
tools/webui.py

@@ -2,14 +2,12 @@ import html
 import os
 import threading
 from argparse import ArgumentParser
-from io import BytesIO
 from pathlib import Path
 
 import gradio as gr
 import librosa
 import torch
 from loguru import logger
-from torchaudio import functional as AF
 from transformers import AutoTokenizer
 
 from tools.llama.generate import launch_thread_safe_queue
@@ -74,7 +72,10 @@ def inference(
     speaker,
 ):
     if args.max_gradio_length > 0 and len(text) > args.max_gradio_length:
-        return None, "Text is too long, please keep it under 1000 characters."
+        return (
+            None,
+            f"Text is too long, please keep it under {args.max_gradio_length} characters.",
+        )
 
     # Parse reference audio aka prompt
     prompt_tokens = None
@@ -266,10 +267,10 @@ def parse_args():
     parser.add_argument(
         "--llama-checkpoint-path",
         type=Path,
-        default="checkpoints/text2semantic-medium-v1-2k.pth",
+        default="checkpoints/text2semantic-sft-large-v1-4k.pth",
     )
     parser.add_argument(
-        "--llama-config-name", type=str, default="dual_ar_2_codebook_medium"
+        "--llama-config-name", type=str, default="dual_ar_2_codebook_large"
     )
     parser.add_argument(
         "--vqgan-checkpoint-path",