1 anno fa · 2f2b5a3788
--- a/fish_speech/i18n/locale/en_US.json
+++ b/fish_speech/i18n/locale/en_US.json
@@ -1,119 +1,122 @@
 
				 {
			
 
				-    "16-mixed is recommended for 10+ series GPU": "16-mixed is recommended for 10+ series GPU",
			
 
				-    "5 to 10 seconds of reference audio, useful for specifying speaker.": "5 to 10 seconds of reference audio, useful for specifying speaker.",
			
 
				-    "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).",
			
 
				-    "Accumulate Gradient Batches": "Accumulate Gradient Batches",
			
 
				-    "Add to Processing Area": "Add to Processing Area",
			
 
				-    "Added path successfully!": "Added path successfully!",
			
 
				-    "Advanced Config": "Advanced Config",
			
 
				-    "Base LLAMA Model": "Base LLAMA Model",
			
 
				-    "Batch Inference": "Batch Inference",
			
 
				-    "Batch Size": "Batch Size",
			
 
				-    "Changing with the Model Path": "Changing with the Model Path",
			
 
				-    "Chinese": "Chinese",
			
 
				-    "Compile Model": "Compile Model",
			
 
				-    "Compile the model can significantly reduce the inference time, but will increase cold start time": "Compile the model can significantly reduce the inference time, but will increase cold start time",
			
 
				-    "Copy": "Copy",
			
 
				-    "Data Preprocessing": "Data Preprocessing",
			
 
				-    "Data Preprocessing Path": "Data Preprocessing Path",
			
 
				-    "Data Source": "Data Source",
			
 
				-    "Decoder Model Config": "Decoder Model Config",
			
 
				-    "Decoder Model Path": "Decoder Model Path",
			
 
				-    "Disabled": "Disabled",
			
 
				-    "Enable Reference Audio": "Enable Reference Audio",
			
 
				-    "English": "English",
			
 
				-    "Error Message": "Error Message",
			
 
				-    "File Preprocessing": "File Preprocessing",
			
 
				-    "Generate": "Generate",
			
 
				-    "Generated Audio": "Generated Audio",
			
 
				-    "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format": "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format",
			
 
				-    "Infer interface is closed": "Infer interface is closed",
			
 
				-    "Inference Configuration": "Inference Configuration",
			
 
				-    "Inference Server Configuration": "Inference Server Configuration",
			
 
				-    "Inference Server Error": "Inference Server Error",
			
 
				-    "Inferring interface is launched at {}": "Inferring interface is launched at {}",
			
 
				-    "Initial Learning Rate": "Initial Learning Rate",
			
 
				-    "Input Audio & Source Path for Transcription": "Input Audio & Source Path for Transcription",
			
 
				-    "Input Text": "Input Text",
			
 
				-    "Invalid path: {}": "Invalid path: {}",
			
 
				-    "It is recommended to use CUDA, if you have low configuration, use CPU": "It is recommended to use CUDA, if you have low configuration, use CPU",
			
 
				-    "Iterative Prompt Length, 0 means off": "Iterative Prompt Length, 0 means off",
			
 
				-    "Japanese": "Japanese",
			
 
				-    "LLAMA Configuration": "LLAMA Configuration",
			
 
				-    "LLAMA Model Config": "LLAMA Model Config",
			
 
				-    "LLAMA Model Path": "LLAMA Model Path",
			
 
				-    "Labeling Device": "Labeling Device",
			
 
				-    "LoRA Model to be merged": "LoRA Model to be merged",
			
 
				-    "Maximum Audio Duration": "Maximum Audio Duration",
			
 
				-    "Maximum Length per Sample": "Maximum Length per Sample",
			
 
				-    "Maximum Training Steps": "Maximum Training Steps",
			
 
				-    "Maximum tokens per batch, 0 means no limit": "Maximum tokens per batch, 0 means no limit",
			
 
				-    "Merge": "Merge",
			
 
				-    "Merge LoRA": "Merge LoRA",
			
 
				-    "Merge successfully": "Merge successfully",
			
 
				-    "Minimum Audio Duration": "Minimum Audio Duration",
			
 
				-    "Model Output Path": "Model Output Path",
			
 
				-    "Model Size": "Model Size",
			
 
				-    "Move": "Move",
			
 
				-    "Move files successfully": "Move files successfully",
			
 
				-    "No audio generated, please check the input text.": "No audio generated, please check the input text.",
			
 
				-    "No selected options": "No selected options",
			
 
				-    "Number of Workers": "Number of Workers",
			
 
				-    "Open Inference Server": "Open Inference Server",
			
 
				-    "Open Labeler WebUI": "Open Labeler WebUI",
			
 
				-    "Open Tensorboard": "Open Tensorboard",
			
 
				-    "Opened labeler in browser": "Opened labeler in browser",
			
 
				-    "Optional Label Language": "Optional Label Language",
			
 
				-    "Optional online ver": "Optional online ver",
			
 
				-    "Output Path": "Output Path",
			
 
				-    "Path error, please check the model file exists in the corresponding path": "Path error, please check the model file exists in the corresponding path",
			
 
				-    "Precision": "Precision",
			
 
				-    "Probability of applying Speaker Condition": "Probability of applying Speaker Condition",
			
 
				-    "Put your text here.": "Put your text here.",
			
 
				-    "Reference Audio": "Reference Audio",
			
 
				-    "Reference Text": "Reference Text",
			
 
				-    "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.",
			
 
				-    "Remove Selected Data": "Remove Selected Data",
			
 
				-    "Removed path successfully!": "Removed path successfully!",
			
 
				-    "Repetition Penalty": "Repetition Penalty",
			
 
				-    "Save model every n steps": "Save model every n steps",
			
 
				-    "Select LLAMA ckpt": "Select LLAMA ckpt",
			
 
				-    "Select VITS ckpt": "Select VITS ckpt",
			
 
				-    "Select VQGAN ckpt": "Select VQGAN ckpt",
			
 
				-    "Select source file processing method": "Select source file processing method",
			
 
				-    "Select the model to be trained (Depending on the Tab page you are on)": "Select the model to be trained (Depending on the Tab page you are on)",
			
 
				-    "Selected: {}": "Selected: {}",
			
 
				-    "Speaker": "Speaker",
			
 
				-    "Speaker is identified by the folder name": "Speaker is identified by the folder name",
			
 
				-    "Start Training": "Start Training",
			
 
				-    "Streaming Audio": "Streaming Audio",
			
 
				-    "Streaming Generate": "Streaming Generate",
			
 
				-    "Tensorboard Host": "Tensorboard Host",
			
 
				-    "Tensorboard Log Path": "Tensorboard Log Path",
			
 
				-    "Tensorboard Port": "Tensorboard Port",
			
 
				-    "Tensorboard interface is closed": "Tensorboard interface is closed",
			
 
				-    "Tensorboard interface is launched at {}": "Tensorboard interface is launched at {}",
			
 
				-    "Text is too long, please keep it under {} characters.": "Text is too long, please keep it under {} characters.",
			
 
				-    "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.": "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.",
			
 
				-    "Training Configuration": "Training Configuration",
			
 
				-    "Training Error": "Training Error",
			
 
				-    "Training stopped": "Training stopped",
			
 
				-    "Type name of the speaker": "Type name of the speaker",
			
 
				-    "Type the path or select from the dropdown": "Type the path or select from the dropdown",
			
 
				-    "Use LoRA": "Use LoRA",
			
 
				-    "Use LoRA can save GPU memory, but may reduce the quality of the model": "Use LoRA can save GPU memory, but may reduce the quality of the model",
			
 
				-    "Use filelist": "Use filelist",
			
 
				-    "Use large for 10G+ GPU, medium for 5G, small for 2G": "Use large for 10G+ GPU, medium for 5G, small for 2G",
			
 
				-    "VITS Configuration": "VITS Configuration",
			
 
				-    "VQGAN Configuration": "VQGAN Configuration",
			
 
				-    "Validation Batch Size": "Validation Batch Size",
			
 
				-    "View the status of the preprocessing folder (use the slider to control the depth of the tree)": "View the status of the preprocessing folder (use the slider to control the depth of the tree)",
			
 
				-    "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.",
			
 
				-    "WebUI Host": "WebUI Host",
			
 
				-    "WebUI Port": "WebUI Port",
			
 
				-    "Whisper Model": "Whisper Model",
			
 
				-    "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).",
			
 
				-    "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU": "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU",
			
 
				-    "latest": "latest",
			
 
				-    "new": "new"
			
 
				+  "16-mixed is recommended for 10+ series GPU": "16-mixed is recommended for 10+ series GPU",
			
 
				+  "5 to 10 seconds of reference audio, useful for specifying speaker.": "5 to 10 seconds of reference audio, useful for specifying speaker.",
			
 
				+  "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).",
			
 
				+  "Accumulate Gradient Batches": "Accumulate Gradient Batches",
			
 
				+  "Add to Processing Area": "Add to Processing Area",
			
 
				+  "Added path successfully!": "Added path successfully!",
			
 
				+  "Advanced Config": "Advanced Config",
			
 
				+  "Base LLAMA Model": "Base LLAMA Model",
			
 
				+  "Batch Inference": "Batch Inference",
			
 
				+  "Batch Size": "Batch Size",
			
 
				+  "Changing with the Model Path": "Changing with the Model Path",
			
 
				+  "Chinese": "Chinese",
			
 
				+  "Compile Model": "Compile Model",
			
 
				+  "Compile the model can significantly reduce the inference time, but will increase cold start time": "Compile the model can significantly reduce the inference time, but will increase cold start time",
			
 
				+  "Copy": "Copy",
			
 
				+  "Data Preprocessing": "Data Preprocessing",
			
 
				+  "Data Preprocessing Path": "Data Preprocessing Path",
			
 
				+  "Data Source": "Data Source",
			
 
				+  "Decoder Model Config": "Decoder Model Config",
			
 
				+  "Decoder Model Path": "Decoder Model Path",
			
 
				+  "Disabled": "Disabled",
			
 
				+  "Enable Reference Audio": "Enable Reference Audio",
			
 
				+  "English": "English",
			
 
				+  "Error Message": "Error Message",
			
 
				+  "File Preprocessing": "File Preprocessing",
			
 
				+  "Generate": "Generate",
			
 
				+  "Generated Audio": "Generated Audio",
			
 
				+  "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format": "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format",
			
 
				+  "Infer interface is closed": "Infer interface is closed",
			
 
				+  "Inference Configuration": "Inference Configuration",
			
 
				+  "Inference Server Configuration": "Inference Server Configuration",
			
 
				+  "Inference Server Error": "Inference Server Error",
			
 
				+  "Inferring interface is launched at {}": "Inferring interface is launched at {}",
			
 
				+  "Initial Learning Rate": "Initial Learning Rate",
			
 
				+  "Input Audio & Source Path for Transcription": "Input Audio & Source Path for Transcription",
			
 
				+  "Input Text": "Input Text",
			
 
				+  "Invalid path: {}": "Invalid path: {}",
			
 
				+  "It is recommended to use CUDA, if you have low configuration, use CPU": "It is recommended to use CUDA, if you have low configuration, use CPU",
			
 
				+  "Iterative Prompt Length, 0 means off": "Iterative Prompt Length, 0 means off",
			
 
				+  "Japanese": "Japanese",
			
 
				+  "LLAMA Configuration": "LLAMA Configuration",
			
 
				+  "LLAMA Model Config": "LLAMA Model Config",
			
 
				+  "LLAMA Model Path": "LLAMA Model Path",
			
 
				+  "Labeling Device": "Labeling Device",
			
 
				+  "LoRA Model to be merged": "LoRA Model to be merged",
			
 
				+  "Maximum Audio Duration": "Maximum Audio Duration",
			
 
				+  "Maximum Length per Sample": "Maximum Length per Sample",
			
 
				+  "Maximum Training Steps": "Maximum Training Steps",
			
 
				+  "Maximum tokens per batch, 0 means no limit": "Maximum tokens per batch, 0 means no limit",
			
 
				+  "Merge": "Merge",
			
 
				+  "Merge LoRA": "Merge LoRA",
			
 
				+  "Merge successfully": "Merge successfully",
			
 
				+  "Minimum Audio Duration": "Minimum Audio Duration",
			
 
				+  "Model Output Path": "Model Output Path",
			
 
				+  "Model Size": "Model Size",
			
 
				+  "Move": "Move",
			
 
				+  "Move files successfully": "Move files successfully",
			
 
				+  "No audio generated, please check the input text.": "No audio generated, please check the input text.",
			
 
				+  "No selected options": "No selected options",
			
 
				+  "Number of Workers": "Number of Workers",
			
 
				+  "Open Inference Server": "Open Inference Server",
			
 
				+  "Open Labeler WebUI": "Open Labeler WebUI",
			
 
				+  "Open Tensorboard": "Open Tensorboard",
			
 
				+  "Opened labeler in browser": "Opened labeler in browser",
			
 
				+  "Optional Label Language": "Optional Label Language",
			
 
				+  "Optional online ver": "Optional online ver",
			
 
				+  "Output Path": "Output Path",
			
 
				+  "Path error, please check the model file exists in the corresponding path": "Path error, please check the model file exists in the corresponding path",
			
 
				+  "Precision": "Precision",
			
 
				+  "Probability of applying Speaker Condition": "Probability of applying Speaker Condition",
			
 
				+  "Put your text here.": "Put your text here.",
			
 
				+  "Reference Audio": "Reference Audio",
			
 
				+  "Reference Text": "Reference Text",
			
 
				+  "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.",
			
 
				+  "Remove Selected Data": "Remove Selected Data",
			
 
				+  "Removed path successfully!": "Removed path successfully!",
			
 
				+  "Repetition Penalty": "Repetition Penalty",
			
 
				+  "Save model every n steps": "Save model every n steps",
			
 
				+  "Select LLAMA ckpt": "Select LLAMA ckpt",
			
 
				+  "Select VITS ckpt": "Select VITS ckpt",
			
 
				+  "Select VQGAN ckpt": "Select VQGAN ckpt",
			
 
				+  "Select source file processing method": "Select source file processing method",
			
 
				+  "Select the model to be trained (Depending on the Tab page you are on)": "Select the model to be trained (Depending on the Tab page you are on)",
			
 
				+  "Selected: {}": "Selected: {}",
			
 
				+  "Speaker": "Speaker",
			
 
				+  "Speaker is identified by the folder name": "Speaker is identified by the folder name",
			
 
				+  "Start Training": "Start Training",
			
 
				+  "Streaming Audio": "Streaming Audio",
			
 
				+  "Streaming Generate": "Streaming Generate",
			
 
				+  "Tensorboard Host": "Tensorboard Host",
			
 
				+  "Tensorboard Log Path": "Tensorboard Log Path",
			
 
				+  "Tensorboard Port": "Tensorboard Port",
			
 
				+  "Tensorboard interface is closed": "Tensorboard interface is closed",
			
 
				+  "Tensorboard interface is launched at {}": "Tensorboard interface is launched at {}",
			
 
				+  "Text is too long, please keep it under {} characters.": "Text is too long, please keep it under {} characters.",
			
 
				+  "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.": "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.",
			
 
				+  "Training Configuration": "Training Configuration",
			
 
				+  "Training Error": "Training Error",
			
 
				+  "Training stopped": "Training stopped",
			
 
				+  "Type name of the speaker": "Type name of the speaker",
			
 
				+  "Type the path or select from the dropdown": "Type the path or select from the dropdown",
			
 
				+  "Use LoRA": "Use LoRA",
			
 
				+  "Use LoRA can save GPU memory, but may reduce the quality of the model": "Use LoRA can save GPU memory, but may reduce the quality of the model",
			
 
				+  "Use filelist": "Use filelist",
			
 
				+  "Use large for 10G+ GPU, medium for 5G, small for 2G": "Use large for 10G+ GPU, medium for 5G, small for 2G",
			
 
				+  "VITS Configuration": "VITS Configuration",
			
 
				+  "VQGAN Configuration": "VQGAN Configuration",
			
 
				+  "Validation Batch Size": "Validation Batch Size",
			
 
				+  "View the status of the preprocessing folder (use the slider to control the depth of the tree)": "View the status of the preprocessing folder (use the slider to control the depth of the tree)",
			
 
				+  "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.",
			
 
				+  "WebUI Host": "WebUI Host",
			
 
				+  "WebUI Port": "WebUI Port",
			
 
				+  "Whisper Model": "Whisper Model",
			
 
				+  "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).",
			
 
				+  "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU": "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU",
			
 
				+  "latest": "latest",
			
 
				+  "new": "new",
			
 
				+  "Realtime Transform Text": "Realtime Transform Text",
			
 
				+  "Normalization Result Preview (Currently Only Chinese)": "Normalization Result Preview (Currently Only Chinese)",
			
 
				+  "Text Normalization": "Text Normalization"
			
 
				 }
			
--- a/fish_speech/i18n/locale/es_ES.json
+++ b/fish_speech/i18n/locale/es_ES.json
@@ -1,119 +1,122 @@
 
				 {
			
 
				-    "16-mixed is recommended for 10+ series GPU": "se recomienda 16-mixed para GPU de la serie 10+",
			
 
				-    "5 to 10 seconds of reference audio, useful for specifying speaker.": "5 a 10 segundos de audio de referencia, útil para especificar el hablante.",
			
 
				-    "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "Un modelo de texto a voz basado en VQ-GAN y Llama desarrollado por [Fish Audio](https://fish.audio).",
			
 
				-    "Accumulate Gradient Batches": "Acumular lotes de gradientes",
			
 
				-    "Add to Processing Area": "Agregar al Área de Procesamiento",
			
 
				-    "Added path successfully!": "¡Ruta agregada exitosamente!",
			
 
				-    "Advanced Config": "Configuración Avanzada",
			
 
				-    "Base LLAMA Model": "Modelo Base LLAMA",
			
 
				-    "Batch Inference": "Inferencia por Lote",
			
 
				-    "Batch Size": "Tamaño del Lote",
			
 
				-    "Changing with the Model Path": "Cambiando con la Ruta del Modelo",
			
 
				-    "Chinese": "Chino",
			
 
				-    "Compile Model": "Compilar Modelo",
			
 
				-    "Compile the model can significantly reduce the inference time, but will increase cold start time": "Compilar el modelo puede reducir significativamente el tiempo de inferencia, pero aumentará el tiempo de inicio en frío",
			
 
				-    "Copy": "Copiar",
			
 
				-    "Data Preprocessing": "Preprocesamiento de Datos",
			
 
				-    "Data Preprocessing Path": "Ruta de Preprocesamiento de Datos",
			
 
				-    "Data Source": "Fuente de Datos",
			
 
				-    "Decoder Model Config": "Configuración del modelo decodificador",
			
 
				-    "Decoder Model Path": "Ruta del modelo decodificador",
			
 
				-    "Disabled": "Desactivado",
			
 
				-    "Enable Reference Audio": "Habilitar Audio de Referencia",
			
 
				-    "English": "Inglés",
			
 
				-    "Error Message": "Mensaje de Error",
			
 
				-    "File Preprocessing": "Preprocesamiento de Archivos",
			
 
				-    "Generate": "Generar",
			
 
				-    "Generated Audio": "Audio Generado",
			
 
				-    "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format": "Si no hay texto correspondiente para el audio, aplique ASR para asistencia, soporte para formato .txt o .lab",
			
 
				-    "Infer interface is closed": "La interfaz de inferencia está cerrada",
			
 
				-    "Inference Configuration": "Configuración de Inferencia",
			
 
				-    "Inference Server Configuration": "Configuración del Servidor de Inferencia",
			
 
				-    "Inference Server Error": "Error del Servidor de Inferencia",
			
 
				-    "Inferring interface is launched at {}": "La interfaz de inferencia se ha lanzado en {}",
			
 
				-    "Initial Learning Rate": "Tasa de Aprendizaje Inicial",
			
 
				-    "Input Audio & Source Path for Transcription": "Audio de Entrada y Ruta de Origen para Transcripción",
			
 
				-    "Input Text": "Texto de Entrada",
			
 
				-    "Invalid path: {}": "Ruta inválida: {}",
			
 
				-    "It is recommended to use CUDA, if you have low configuration, use CPU": "Se recomienda usar CUDA, si tiene una configuración baja, use CPU",
			
 
				-    "Iterative Prompt Length, 0 means off": "Longitud de la Indicación Iterativa, 0 significa apagado",
			
 
				-    "Japanese": "Japonés",
			
 
				-    "LLAMA Configuration": "Configuración de LLAMA",
			
 
				-    "LLAMA Model Config": "Configuración del Modelo LLAMA",
			
 
				-    "LLAMA Model Path": "Ruta del Modelo LLAMA",
			
 
				-    "Labeling Device": "Dispositivo de Etiquetado",
			
 
				-    "LoRA Model to be merged": "Modelo LoRA a fusionar",
			
 
				-    "Maximum Audio Duration": "Duración máxima de audio",
			
 
				-    "Maximum Length per Sample": "Longitud Máxima por Muestra",
			
 
				-    "Maximum Training Steps": "Pasos Máximos de Entrenamiento",
			
 
				-    "Maximum tokens per batch, 0 means no limit": "Máximo de tokens por lote, 0 significa sin límite",
			
 
				-    "Merge": "Fusionar",
			
 
				-    "Merge LoRA": "Fusionar LoRA",
			
 
				-    "Merge successfully": "Fusionado exitosamente",
			
 
				-    "Minimum Audio Duration": "Duración mínima de audio",
			
 
				-    "Model Output Path": "Ruta de Salida del Modelo",
			
 
				-    "Model Size": "Tamaño del Modelo",
			
 
				-    "Move": "Mover",
			
 
				-    "Move files successfully": "Archivos movidos exitosamente",
			
 
				-    "No audio generated, please check the input text.": "No se generó audio, por favor verifique el texto de entrada.",
			
 
				-    "No selected options": "No hay opciones seleccionadas",
			
 
				-    "Number of Workers": "Número de Trabajadores",
			
 
				-    "Open Inference Server": "Abrir Servidor de Inferencia",
			
 
				-    "Open Labeler WebUI": "Abrir Interfaz Web del Etiquetador",
			
 
				-    "Open Tensorboard": "Abrir Tensorboard",
			
 
				-    "Opened labeler in browser": "Se abrió el etiquetador en el navegador",
			
 
				-    "Optional Label Language": "Idioma de Etiquetado Opcional",
			
 
				-    "Optional online ver": "Ver en línea opcional",
			
 
				-    "Output Path": "Ruta de Salida",
			
 
				-    "Path error, please check the model file exists in the corresponding path": "Error de ruta, por favor verifique que el archivo del modelo exista en la ruta correspondiente",
			
 
				-    "Precision": "Precisión",
			
 
				-    "Probability of applying Speaker Condition": "Probabilidad de aplicar Condición de Hablante",
			
 
				-    "Put your text here.": "Ponga su texto aquí.",
			
 
				-    "Reference Audio": "Audio de Referencia",
			
 
				-    "Reference Text": "Texto de Referencia",
			
 
				-    "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "El código relacionado se publica bajo la Licencia BSD-3-Clause, y los pesos se publican bajo la Licencia CC BY-NC-SA 4.0.",
			
 
				-    "Remove Selected Data": "Eliminar Datos Seleccionados",
			
 
				-    "Removed path successfully!": "¡Ruta eliminada exitosamente!",
			
 
				-    "Repetition Penalty": "Penalización por Repetición",
			
 
				-    "Save model every n steps": "Guardar modelo cada n pasos",
			
 
				-    "Select LLAMA ckpt": "Seleccionar punto de control LLAMA",
			
 
				-    "Select VITS ckpt": "Seleccionar punto de control VITS",
			
 
				-    "Select VQGAN ckpt": "Seleccionar punto de control VQGAN",
			
 
				-    "Select source file processing method": "Seleccione el método de procesamiento de archivos fuente",
			
 
				-    "Select the model to be trained (Depending on the Tab page you are on)": "Seleccione el modelo a entrenar (Dependiendo de la pestaña en la que se encuentre)",
			
 
				-    "Selected: {}": "Seleccionado: {}",
			
 
				-    "Speaker": "Hablante",
			
 
				-    "Speaker is identified by the folder name": "El hablante se identifica por el nombre de la carpeta",
			
 
				-    "Start Training": "Iniciar Entrenamiento",
			
 
				-    "Streaming Audio": "transmisión de audio",
			
 
				-    "Streaming Generate": "síntesis en flujo",
			
 
				-    "Tensorboard Host": "Host de Tensorboard",
			
 
				-    "Tensorboard Log Path": "Ruta de Registro de Tensorboard",
			
 
				-    "Tensorboard Port": "Puerto de Tensorboard",
			
 
				-    "Tensorboard interface is closed": "La interfaz de Tensorboard está cerrada",
			
 
				-    "Tensorboard interface is launched at {}": "La interfaz de Tensorboard se ha lanzado en {}",
			
 
				-    "Text is too long, please keep it under {} characters.": "El texto es demasiado largo, por favor manténgalo por debajo de {} caracteres.",
			
 
				-    "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.": "La ruta de la carpeta de entrada a la izquierda o la lista de archivos. Ya sea que esté marcado o no, se utilizará para el entrenamiento posterior en esta lista.",
			
 
				-    "Training Configuration": "Configuración de Entrenamiento",
			
 
				-    "Training Error": "Error de Entrenamiento",
			
 
				-    "Training stopped": "Entrenamiento detenido",
			
 
				-    "Type name of the speaker": "Escriba el nombre del hablante",
			
 
				-    "Type the path or select from the dropdown": "Escriba la ruta o seleccione de la lista desplegable",
			
 
				-    "Use LoRA": "Usar LoRA",
			
 
				-    "Use LoRA can save GPU memory, but may reduce the quality of the model": "Usar LoRA puede ahorrar memoria GPU, pero puede reducir la calidad del modelo",
			
 
				-    "Use filelist": "Usar lista de archivos",
			
 
				-    "Use large for 10G+ GPU, medium for 5G, small for 2G": "Use grande para GPU de 10G+, mediano para 5G, pequeño para 2G",
			
 
				-    "VITS Configuration": "Configuración de VITS",
			
 
				-    "VQGAN Configuration": "Configuración de VQGAN",
			
 
				-    "Validation Batch Size": "Tamaño del Lote de Validación",
			
 
				-    "View the status of the preprocessing folder (use the slider to control the depth of the tree)": "Vea el estado de la carpeta de preprocesamiento (use el control deslizante para controlar la profundidad del árbol)",
			
 
				-    "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "No somos responsables de ningún mal uso del modelo, por favor considere sus leyes y regulaciones locales antes de usarlo.",
			
 
				-    "WebUI Host": "Host de WebUI",
			
 
				-    "WebUI Port": "Puerto de WebUI",
			
 
				-    "Whisper Model": "Modelo Whisper",
			
 
				-    "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "Puede encontrar el código fuente [aquí](https://github.com/fishaudio/fish-speech) y los modelos [aquí](https://huggingface.co/fishaudio/fish-speech-1).",
			
 
				-    "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU": "Se recomienda bf16-true para GPU de la serie 30+, se recomienda 16-mixed para GPU de la serie 10+",
			
 
				-    "latest": "más reciente",
			
 
				-    "new": "nuevo"
			
 
				+  "16-mixed is recommended for 10+ series GPU": "se recomienda 16-mixed para GPU de la serie 10+",
			
 
				+  "5 to 10 seconds of reference audio, useful for specifying speaker.": "5 a 10 segundos de audio de referencia, útil para especificar el hablante.",
			
 
				+  "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "Un modelo de texto a voz basado en VQ-GAN y Llama desarrollado por [Fish Audio](https://fish.audio).",
			
 
				+  "Accumulate Gradient Batches": "Acumular lotes de gradientes",
			
 
				+  "Add to Processing Area": "Agregar al Área de Procesamiento",
			
 
				+  "Added path successfully!": "¡Ruta agregada exitosamente!",
			
 
				+  "Advanced Config": "Configuración Avanzada",
			
 
				+  "Base LLAMA Model": "Modelo Base LLAMA",
			
 
				+  "Batch Inference": "Inferencia por Lote",
			
 
				+  "Batch Size": "Tamaño del Lote",
			
 
				+  "Changing with the Model Path": "Cambiando con la Ruta del Modelo",
			
 
				+  "Chinese": "Chino",
			
 
				+  "Compile Model": "Compilar Modelo",
			
 
				+  "Compile the model can significantly reduce the inference time, but will increase cold start time": "Compilar el modelo puede reducir significativamente el tiempo de inferencia, pero aumentará el tiempo de inicio en frío",
			
 
				+  "Copy": "Copiar",
			
 
				+  "Data Preprocessing": "Preprocesamiento de Datos",
			
 
				+  "Data Preprocessing Path": "Ruta de Preprocesamiento de Datos",
			
 
				+  "Data Source": "Fuente de Datos",
			
 
				+  "Decoder Model Config": "Configuración del modelo decodificador",
			
 
				+  "Decoder Model Path": "Ruta del modelo decodificador",
			
 
				+  "Disabled": "Desactivado",
			
 
				+  "Enable Reference Audio": "Habilitar Audio de Referencia",
			
 
				+  "English": "Inglés",
			
 
				+  "Error Message": "Mensaje de Error",
			
 
				+  "File Preprocessing": "Preprocesamiento de Archivos",
			
 
				+  "Generate": "Generar",
			
 
				+  "Generated Audio": "Audio Generado",
			
 
				+  "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format": "Si no hay texto correspondiente para el audio, aplique ASR para asistencia, soporte para formato .txt o .lab",
			
 
				+  "Infer interface is closed": "La interfaz de inferencia está cerrada",
			
 
				+  "Inference Configuration": "Configuración de Inferencia",
			
 
				+  "Inference Server Configuration": "Configuración del Servidor de Inferencia",
			
 
				+  "Inference Server Error": "Error del Servidor de Inferencia",
			
 
				+  "Inferring interface is launched at {}": "La interfaz de inferencia se ha lanzado en {}",
			
 
				+  "Initial Learning Rate": "Tasa de Aprendizaje Inicial",
			
 
				+  "Input Audio & Source Path for Transcription": "Audio de Entrada y Ruta de Origen para Transcripción",
			
 
				+  "Input Text": "Texto de Entrada",
			
 
				+  "Invalid path: {}": "Ruta inválida: {}",
			
 
				+  "It is recommended to use CUDA, if you have low configuration, use CPU": "Se recomienda usar CUDA, si tiene una configuración baja, use CPU",
			
 
				+  "Iterative Prompt Length, 0 means off": "Longitud de la Indicación Iterativa, 0 significa apagado",
			
 
				+  "Japanese": "Japonés",
			
 
				+  "LLAMA Configuration": "Configuración de LLAMA",
			
 
				+  "LLAMA Model Config": "Configuración del Modelo LLAMA",
			
 
				+  "LLAMA Model Path": "Ruta del Modelo LLAMA",
			
 
				+  "Labeling Device": "Dispositivo de Etiquetado",
			
 
				+  "LoRA Model to be merged": "Modelo LoRA a fusionar",
			
 
				+  "Maximum Audio Duration": "Duración máxima de audio",
			
 
				+  "Maximum Length per Sample": "Longitud Máxima por Muestra",
			
 
				+  "Maximum Training Steps": "Pasos Máximos de Entrenamiento",
			
 
				+  "Maximum tokens per batch, 0 means no limit": "Máximo de tokens por lote, 0 significa sin límite",
			
 
				+  "Merge": "Fusionar",
			
 
				+  "Merge LoRA": "Fusionar LoRA",
			
 
				+  "Merge successfully": "Fusionado exitosamente",
			
 
				+  "Minimum Audio Duration": "Duración mínima de audio",
			
 
				+  "Model Output Path": "Ruta de Salida del Modelo",
			
 
				+  "Model Size": "Tamaño del Modelo",
			
 
				+  "Move": "Mover",
			
 
				+  "Move files successfully": "Archivos movidos exitosamente",
			
 
				+  "No audio generated, please check the input text.": "No se generó audio, por favor verifique el texto de entrada.",
			
 
				+  "No selected options": "No hay opciones seleccionadas",
			
 
				+  "Number of Workers": "Número de Trabajadores",
			
 
				+  "Open Inference Server": "Abrir Servidor de Inferencia",
			
 
				+  "Open Labeler WebUI": "Abrir Interfaz Web del Etiquetador",
			
 
				+  "Open Tensorboard": "Abrir Tensorboard",
			
 
				+  "Opened labeler in browser": "Se abrió el etiquetador en el navegador",
			
 
				+  "Optional Label Language": "Idioma de Etiquetado Opcional",
			
 
				+  "Optional online ver": "Ver en línea opcional",
			
 
				+  "Output Path": "Ruta de Salida",
			
 
				+  "Path error, please check the model file exists in the corresponding path": "Error de ruta, por favor verifique que el archivo del modelo exista en la ruta correspondiente",
			
 
				+  "Precision": "Precisión",
			
 
				+  "Probability of applying Speaker Condition": "Probabilidad de aplicar Condición de Hablante",
			
 
				+  "Put your text here.": "Ponga su texto aquí.",
			
 
				+  "Reference Audio": "Audio de Referencia",
			
 
				+  "Reference Text": "Texto de Referencia",
			
 
				+  "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "El código relacionado se publica bajo la Licencia BSD-3-Clause, y los pesos se publican bajo la Licencia CC BY-NC-SA 4.0.",
			
 
				+  "Remove Selected Data": "Eliminar Datos Seleccionados",
			
 
				+  "Removed path successfully!": "¡Ruta eliminada exitosamente!",
			
 
				+  "Repetition Penalty": "Penalización por Repetición",
			
 
				+  "Save model every n steps": "Guardar modelo cada n pasos",
			
 
				+  "Select LLAMA ckpt": "Seleccionar punto de control LLAMA",
			
 
				+  "Select VITS ckpt": "Seleccionar punto de control VITS",
			
 
				+  "Select VQGAN ckpt": "Seleccionar punto de control VQGAN",
			
 
				+  "Select source file processing method": "Seleccione el método de procesamiento de archivos fuente",
			
 
				+  "Select the model to be trained (Depending on the Tab page you are on)": "Seleccione el modelo a entrenar (Dependiendo de la pestaña en la que se encuentre)",
			
 
				+  "Selected: {}": "Seleccionado: {}",
			
 
				+  "Speaker": "Hablante",
			
 
				+  "Speaker is identified by the folder name": "El hablante se identifica por el nombre de la carpeta",
			
 
				+  "Start Training": "Iniciar Entrenamiento",
			
 
				+  "Streaming Audio": "transmisión de audio",
			
 
				+  "Streaming Generate": "síntesis en flujo",
			
 
				+  "Tensorboard Host": "Host de Tensorboard",
			
 
				+  "Tensorboard Log Path": "Ruta de Registro de Tensorboard",
			
 
				+  "Tensorboard Port": "Puerto de Tensorboard",
			
 
				+  "Tensorboard interface is closed": "La interfaz de Tensorboard está cerrada",
			
 
				+  "Tensorboard interface is launched at {}": "La interfaz de Tensorboard se ha lanzado en {}",
			
 
				+  "Text is too long, please keep it under {} characters.": "El texto es demasiado largo, por favor manténgalo por debajo de {} caracteres.",
			
 
				+  "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.": "La ruta de la carpeta de entrada a la izquierda o la lista de archivos. Ya sea que esté marcado o no, se utilizará para el entrenamiento posterior en esta lista.",
			
 
				+  "Training Configuration": "Configuración de Entrenamiento",
			
 
				+  "Training Error": "Error de Entrenamiento",
			
 
				+  "Training stopped": "Entrenamiento detenido",
			
 
				+  "Type name of the speaker": "Escriba el nombre del hablante",
			
 
				+  "Type the path or select from the dropdown": "Escriba la ruta o seleccione de la lista desplegable",
			
 
				+  "Use LoRA": "Usar LoRA",
			
 
				+  "Use LoRA can save GPU memory, but may reduce the quality of the model": "Usar LoRA puede ahorrar memoria GPU, pero puede reducir la calidad del modelo",
			
 
				+  "Use filelist": "Usar lista de archivos",
			
 
				+  "Use large for 10G+ GPU, medium for 5G, small for 2G": "Use grande para GPU de 10G+, mediano para 5G, pequeño para 2G",
			
 
				+  "VITS Configuration": "Configuración de VITS",
			
 
				+  "VQGAN Configuration": "Configuración de VQGAN",
			
 
				+  "Validation Batch Size": "Tamaño del Lote de Validación",
			
 
				+  "View the status of the preprocessing folder (use the slider to control the depth of the tree)": "Vea el estado de la carpeta de preprocesamiento (use el control deslizante para controlar la profundidad del árbol)",
			
 
				+  "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "No somos responsables de ningún mal uso del modelo, por favor considere sus leyes y regulaciones locales antes de usarlo.",
			
 
				+  "WebUI Host": "Host de WebUI",
			
 
				+  "WebUI Port": "Puerto de WebUI",
			
 
				+  "Whisper Model": "Modelo Whisper",
			
 
				+  "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "Puede encontrar el código fuente [aquí](https://github.com/fishaudio/fish-speech) y los modelos [aquí](https://huggingface.co/fishaudio/fish-speech-1).",
			
 
				+  "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU": "Se recomienda bf16-true para GPU de la serie 30+, se recomienda 16-mixed para GPU de la serie 10+",
			
 
				+  "latest": "más reciente",
			
 
				+  "new": "nuevo",
			
 
				+  "Realtime Transform Text": "Transformación de Texto en Tiempo Real",
			
 
				+  "Normalization Result Preview (Currently Only Chinese)": "Vista Previa del Resultado de Normalización (Actualmente Solo Chino)",
			
 
				+  "Text Normalization": "Normalización de Texto"
			
 
				 }
			
--- a/fish_speech/i18n/locale/ja_JP.json
+++ b/fish_speech/i18n/locale/ja_JP.json
@@ -1,119 +1,123 @@
 
				 {
			
 
				-    "16-mixed is recommended for 10+ series GPU": "10シリーズ以降のGPUには16-mixedをお勧めします",
			
 
				-    "5 to 10 seconds of reference audio, useful for specifying speaker.": "話者を指定するのに役立つ、5～10秒のリファレンスオーディオ。",
			
 
				-    "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "[Fish Audio](https://fish.audio)が開発したVQ-GANとLlamaに基づくテキスト音声合成モデル。",
			
 
				-    "Accumulate Gradient Batches": "勾配バッチの累積",
			
 
				-    "Add to Processing Area": "処理エリアに追加",
			
 
				-    "Added path successfully!": "パスの追加に成功しました！",
			
 
				-    "Advanced Config": "詳細設定",
			
 
				-    "Base LLAMA Model": "基本LLAMAモデル",
			
 
				-    "Batch Inference": "バッチ推論",
			
 
				-    "Batch Size": "バッチサイズ",
			
 
				-    "Changing with the Model Path": "モデルのパスに伴って変化する",
			
 
				-    "Chinese": "中国語",
			
 
				-    "Compile Model": "モデルのコンパイル",
			
 
				-    "Compile the model can significantly reduce the inference time, but will increase cold start time": "モデルをコンパイルすると推論時間を大幅に短縮できますが、コールドスタート時間が長くなります",
			
 
				-    "Copy": "コピー",
			
 
				-    "Data Preprocessing": "データ前処理",
			
 
				-    "Data Preprocessing Path": "データ前処理パス",
			
 
				-    "Data Source": "データソース",
			
 
				-    "Decoder Model Config": "デコーダーモデルの構成",
			
 
				-    "Decoder Model Path": "デコーダーモデルのパス",
			
 
				-    "Disabled": "無効",
			
 
				-    "Enable Reference Audio": "リファレンスオーディオを有効にする",
			
 
				-    "English": "英語",
			
 
				-    "Error Message": "エラーメッセージ",
			
 
				-    "File Preprocessing": "文書前处理",
			
 
				-    "Generate": "生成",
			
 
				-    "Generated Audio": "生成されたオーディオ",
			
 
				-    "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format": "音声に対応するテキストがない場合は、ASRを適用してサポートします。.txtまたは.lab形式をサポートしています",
			
 
				-    "Infer interface is closed": "推論インターフェースが閉じられています",
			
 
				-    "Inference Configuration": "推論設定",
			
 
				-    "Inference Server Configuration": "推論サーバー設定",
			
 
				-    "Inference Server Error": "推論サーバーエラー",
			
 
				-    "Inferring interface is launched at {}": "推論インターフェースが{}で起動しました",
			
 
				-    "Initial Learning Rate": "初期学習率",
			
 
				-    "Input Audio & Source Path for Transcription": "入力オーディオと文字起こしのソースパス",
			
 
				-    "Input Text": "入力テキスト",
			
 
				-    "Invalid path: {}": "無効なパス: {}",
			
 
				-    "It is recommended to use CUDA, if you have low configuration, use CPU": "CUDAの使用をお勧めします。低い構成の場合はCPUを使用してください",
			
 
				-    "Iterative Prompt Length, 0 means off": "反復プロンプト長。0はオフを意味します",
			
 
				-    "Japanese": "日本語",
			
 
				-    "LLAMA Configuration": "LLAMA設定",
			
 
				-    "LLAMA Model Config": "LLAMAモデル設定",
			
 
				-    "LLAMA Model Path": "LLAMAモデルパス",
			
 
				-    "Labeling Device": "ラベリングデバイス",
			
 
				-    "LoRA Model to be merged": "マージするLoRAモデル",
			
 
				-    "Maximum Audio Duration": "最大オーディオの長さ",
			
 
				-    "Maximum Length per Sample": "サンプルあたりの最大長",
			
 
				-    "Maximum Training Steps": "最大トレーニングステップ数",
			
 
				-    "Maximum tokens per batch, 0 means no limit": "バッチあたりの最大トークン数。0は制限なしを意味します",
			
 
				-    "Merge": "マージ",
			
 
				-    "Merge LoRA": "LoRAのマージ",
			
 
				-    "Merge successfully": "マージに成功しました",
			
 
				-    "Minimum Audio Duration": "最小オーディオの長さ",
			
 
				-    "Model Output Path": "モデル出力パス",
			
 
				-    "Model Size": "モデルサイズ",
			
 
				-    "Move": "移動",
			
 
				-    "Move files successfully": "ファイルの移動に成功しました",
			
 
				-    "No audio generated, please check the input text.": "オーディオが生成されていません。入力テキストを確認してください。",
			
 
				-    "No selected options": "選択されたオプションはありません",
			
 
				-    "Number of Workers": "ワーカー数",
			
 
				-    "Open Inference Server": "推論サーバーを開く",
			
 
				-    "Open Labeler WebUI": "ラベラーWebUIを開く",
			
 
				-    "Open Tensorboard": "Tensorboardを開く",
			
 
				-    "Opened labeler in browser": "ブラウザでラベラーを開きました",
			
 
				-    "Optional Label Language": "オプションのラベル言語",
			
 
				-    "Optional online ver": "オプションのオンラインバージョン",
			
 
				-    "Output Path": "出力パス",
			
 
				-    "Path error, please check the model file exists in the corresponding path": "パスエラー。対応するパスにモデルファイルが存在するか確認してください",
			
 
				-    "Precision": "精度",
			
 
				-    "Probability of applying Speaker Condition": "話者条件を適用する確率",
			
 
				-    "Put your text here.": "ここにテキストを入力してください。",
			
 
				-    "Reference Audio": "リファレンスオーディオ",
			
 
				-    "Reference Text": "リファレンステキスト",
			
 
				-    "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "関連コードはBSD-3-Clauseライセンスの下でリリースされ、重みはCC BY-NC-SA 4.0ライセンスの下でリリースされます。",
			
 
				-    "Remove Selected Data": "選択したデータを削除",
			
 
				-    "Removed path successfully!": "パスの削除に成功しました！",
			
 
				-    "Repetition Penalty": "反復ペナルティ",
			
 
				-    "Save model every n steps": "nステップごとにモデルを保存",
			
 
				-    "Select LLAMA ckpt": " LLAMA チェックポイントを選択",
			
 
				-    "Select VITS ckpt": "VITS チェックポイントを選択",
			
 
				-    "Select VQGAN ckpt": "VQGAN チェックポイントを選択",
			
 
				-    "Select source file processing method": "ソースファイルの処理方法を選択",
			
 
				-    "Select the model to be trained (Depending on the Tab page you are on)": "タブページに応じてトレーニングするモデルを選択してください",
			
 
				-    "Selected: {}": "選択済み: {}",
			
 
				-    "Speaker": "話者",
			
 
				-    "Speaker is identified by the folder name": "話者はフォルダ名で識別されます",
			
 
				-    "Start Training": "トレーニング開始",
			
 
				-    "Streaming Audio": "ストリーミングオーディオ",
			
 
				-    "Streaming Generate": "ストリーミング合成",
			
 
				-    "Tensorboard Host": "Tensorboardホスト",
			
 
				-    "Tensorboard Log Path": "Tensorboardログパス",
			
 
				-    "Tensorboard Port": "Tensorboardポート",
			
 
				-    "Tensorboard interface is closed": "Tensorboardインターフェースが閉じられています",
			
 
				-    "Tensorboard interface is launched at {}": "Tensorboardインターフェースが{}で起動されました",
			
 
				-    "Text is too long, please keep it under {} characters.": "テキストが長すぎます。{}文字以内に抑えてください。",
			
 
				-    "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.": "左側の入力フォルダまたはファイルリストのパス。チェックの有無にかかわらず、このリストの後続のトレーニングに使用されます。",
			
 
				-    "Training Configuration": "トレーニング設定",
			
 
				-    "Training Error": "トレーニングエラー",
			
 
				-    "Training stopped": "トレーニングが停止しました",
			
 
				-    "Type name of the speaker": "話者の名前を入力",
			
 
				-    "Type the path or select from the dropdown": "パスを入力するか、ドロップダウンから選択してください",
			
 
				-    "Use LoRA": "LoRAを使用",
			
 
				-    "Use LoRA can save GPU memory, but may reduce the quality of the model": "LoRAを使用するとGPUメモリを節約できますが、モデルの品質が低下する可能性があります",
			
 
				-    "Use filelist": "ファイルリストを使用",
			
 
				-    "Use large for 10G+ GPU, medium for 5G, small for 2G": "10G以上のGPUには大、5Gには中、2Gには小を使用してください",
			
 
				-    "VITS Configuration": "VITS の構成",
			
 
				-    "VQGAN Configuration": "VQGAN の構成",
			
 
				-    "Validation Batch Size": "検証バッチサイズ",
			
 
				-    "View the status of the preprocessing folder (use the slider to control the depth of the tree)": "前処理フォルダの状態を表示（スライダーを使用してツリーの深さを制御）",
			
 
				-    "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "モデルの誤用については一切責任を負いません。使用する前に、現地の法律と規制を考慮してください。",
			
 
				-    "WebUI Host": "WebUIホスト",
			
 
				-    "WebUI Port": "WebUIポート",
			
 
				-    "Whisper Model": "Whisperモデル",
			
 
				-    "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "ソースコードは[こちら](https://github.com/fishaudio/fish-speech)、モデルは[こちら](https://huggingface.co/fishaudio/fish-speech-1)にあります。",
			
 
				-    "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU": "30シリーズ以降のGPUにはbf16-trueを、10シリーズ以降のGPUには16-mixedをお勧めします",
			
 
				-    "latest": "最新",
			
 
				-    "new": "新規"
			
 
				+  "16-mixed is recommended for 10+ series GPU": "10シリーズ以降のGPUには16-mixedをお勧めします",
			
 
				+  "5 to 10 seconds of reference audio, useful for specifying speaker.": "話者を指定するのに役立つ、5～10秒のリファレンスオーディオ。",
			
 
				+  "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "[Fish Audio](https://fish.audio)が開発したVQ-GANとLlamaに基づくテキスト音声合成モデル。",
			
 
				+  "Accumulate Gradient Batches": "勾配バッチの累積",
			
 
				+  "Add to Processing Area": "処理エリアに追加",
			
 
				+  "Added path successfully!": "パスの追加に成功しました！",
			
 
				+  "Advanced Config": "詳細設定",
			
 
				+  "Base LLAMA Model": "基本LLAMAモデル",
			
 
				+  "Batch Inference": "バッチ推論",
			
 
				+  "Batch Size": "バッチサイズ",
			
 
				+  "Changing with the Model Path": "モデルのパスに伴って変化する",
			
 
				+  "Chinese": "中国語",
			
 
				+  "Compile Model": "モデルのコンパイル",
			
 
				+  "Compile the model can significantly reduce the inference time, but will increase cold start time": "モデルをコンパイルすると推論時間を大幅に短縮できますが、コールドスタート時間が長くなります",
			
 
				+  "Copy": "コピー",
			
 
				+  "Data Preprocessing": "データ前処理",
			
 
				+  "Data Preprocessing Path": "データ前処理パス",
			
 
				+  "Data Source": "データソース",
			
 
				+  "Decoder Model Config": "デコーダーモデルの構成",
			
 
				+  "Decoder Model Path": "デコーダーモデルのパス",
			
 
				+  "Disabled": "無効",
			
 
				+  "Enable Reference Audio": "リファレンスオーディオを有効にする",
			
 
				+  "English": "英語",
			
 
				+  "Error Message": "エラーメッセージ",
			
 
				+  "File Preprocessing": "文書前处理",
			
 
				+  "Generate": "生成",
			
 
				+  "Generated Audio": "生成されたオーディオ",
			
 
				+  "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format": "音声に対応するテキストがない場合は、ASRを適用してサポートします。.txtまたは.lab形式をサポートしています",
			
 
				+  "Infer interface is closed": "推論インターフェースが閉じられています",
			
 
				+  "Inference Configuration": "推論設定",
			
 
				+  "Inference Server Configuration": "推論サーバー設定",
			
 
				+  "Inference Server Error": "推論サーバーエラー",
			
 
				+  "Inferring interface is launched at {}": "推論インターフェースが{}で起動しました",
			
 
				+  "Initial Learning Rate": "初期学習率",
			
 
				+  "Input Audio & Source Path for Transcription": "入力オーディオと文字起こしのソースパス",
			
 
				+  "Input Text": "入力テキスト",
			
 
				+  "Invalid path: {}": "無効なパス: {}",
			
 
				+  "It is recommended to use CUDA, if you have low configuration, use CPU": "CUDAの使用をお勧めします。低い構成の場合はCPUを使用してください",
			
 
				+  "Iterative Prompt Length, 0 means off": "反復プロンプト長。0はオフを意味します",
			
 
				+  "Japanese": "日本語",
			
 
				+  "LLAMA Configuration": "LLAMA設定",
			
 
				+  "LLAMA Model Config": "LLAMAモデル設定",
			
 
				+  "LLAMA Model Path": "LLAMAモデルパス",
			
 
				+  "Labeling Device": "ラベリングデバイス",
			
 
				+  "LoRA Model to be merged": "マージするLoRAモデル",
			
 
				+  "Maximum Audio Duration": "最大オーディオの長さ",
			
 
				+  "Maximum Length per Sample": "サンプルあたりの最大長",
			
 
				+  "Maximum Training Steps": "最大トレーニングステップ数",
			
 
				+  "Maximum tokens per batch, 0 means no limit": "バッチあたりの最大トークン数。0は制限なしを意味します",
			
 
				+  "Merge": "マージ",
			
 
				+  "Merge LoRA": "LoRAのマージ",
			
 
				+  "Merge successfully": "マージに成功しました",
			
 
				+  "Minimum Audio Duration": "最小オーディオの長さ",
			
 
				+  "Model Output Path": "モデル出力パス",
			
 
				+  "Model Size": "モデルサイズ",
			
 
				+  "Move": "移動",
			
 
				+  "Move files successfully": "ファイルの移動に成功しました",
			
 
				+  "No audio generated, please check the input text.": "オーディオが生成されていません。入力テキストを確認してください。",
			
 
				+  "No selected options": "選択されたオプションはありません",
			
 
				+  "Number of Workers": "ワーカー数",
			
 
				+  "Open Inference Server": "推論サーバーを開く",
			
 
				+  "Open Labeler WebUI": "ラベラーWebUIを開く",
			
 
				+  "Open Tensorboard": "Tensorboardを開く",
			
 
				+  "Opened labeler in browser": "ブラウザでラベラーを開きました",
			
 
				+  "Optional Label Language": "オプションのラベル言語",
			
 
				+  "Optional online ver": "オプションのオンラインバージョン",
			
 
				+  "Output Path": "出力パス",
			
 
				+  "Path error, please check the model file exists in the corresponding path": "パスエラー。対応するパスにモデルファイルが存在するか確認してください",
			
 
				+  "Precision": "精度",
			
 
				+  "Probability of applying Speaker Condition": "話者条件を適用する確率",
			
 
				+  "Put your text here.": "ここにテキストを入力してください。",
			
 
				+  "Reference Audio": "リファレンスオーディオ",
			
 
				+  "Reference Text": "リファレンステキスト",
			
 
				+  "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "関連コードはBSD-3-Clauseライセンスの下でリリースされ、重みはCC BY-NC-SA 4.0ライセンスの下でリリースされます。",
			
 
				+  "Remove Selected Data": "選択したデータを削除",
			
 
				+  "Removed path successfully!": "パスの削除に成功しました！",
			
 
				+  "Repetition Penalty": "反復ペナルティ",
			
 
				+  "Save model every n steps": "nステップごとにモデルを保存",
			
 
				+  "Select LLAMA ckpt": " LLAMA チェックポイントを選択",
			
 
				+  "Select VITS ckpt": "VITS チェックポイントを選択",
			
 
				+  "Select VQGAN ckpt": "VQGAN チェックポイントを選択",
			
 
				+  "Select source file processing method": "ソースファイルの処理方法を選択",
			
 
				+  "Select the model to be trained (Depending on the Tab page you are on)": "タブページに応じてトレーニングするモデルを選択してください",
			
 
				+  "Selected: {}": "選択済み: {}",
			
 
				+  "Speaker": "話者",
			
 
				+  "Speaker is identified by the folder name": "話者はフォルダ名で識別されます",
			
 
				+  "Start Training": "トレーニング開始",
			
 
				+  "Streaming Audio": "ストリーミングオーディオ",
			
 
				+  "Streaming Generate": "ストリーミング合成",
			
 
				+  "Tensorboard Host": "Tensorboardホスト",
			
 
				+  "Tensorboard Log Path": "Tensorboardログパス",
			
 
				+  "Tensorboard Port": "Tensorboardポート",
			
 
				+  "Tensorboard interface is closed": "Tensorboardインターフェースが閉じられています",
			
 
				+  "Tensorboard interface is launched at {}": "Tensorboardインターフェースが{}で起動されました",
			
 
				+  "Text is too long, please keep it under {} characters.": "テキストが長すぎます。{}文字以内に抑えてください。",
			
 
				+  "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.": "左側の入力フォルダまたはファイルリストのパス。チェックの有無にかかわらず、このリストの後続のトレーニングに使用されます。",
			
 
				+  "Training Configuration": "トレーニング設定",
			
 
				+  "Training Error": "トレーニングエラー",
			
 
				+  "Training stopped": "トレーニングが停止しました",
			
 
				+  "Type name of the speaker": "話者の名前を入力",
			
 
				+  "Type the path or select from the dropdown": "パスを入力するか、ドロップダウンから選択してください",
			
 
				+  "Use LoRA": "LoRAを使用",
			
 
				+  "Use LoRA can save GPU memory, but may reduce the quality of the model": "LoRAを使用するとGPUメモリを節約できますが、モデルの品質が低下する可能性があります",
			
 
				+  "Use filelist": "ファイルリストを使用",
			
 
				+  "Use large for 10G+ GPU, medium for 5G, small for 2G": "10G以上のGPUには大、5Gには中、2Gには小を使用してください",
			
 
				+  "VITS Configuration": "VITS の構成",
			
 
				+  "VQGAN Configuration": "VQGAN の構成",
			
 
				+  "Validation Batch Size": "検証バッチサイズ",
			
 
				+  "View the status of the preprocessing folder (use the slider to control the depth of the tree)": "前処理フォルダの状態を表示（スライダーを使用してツリーの深さを制御）",
			
 
				+  "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "モデルの誤用については一切責任を負いません。使用する前に、現地の法律と規制を考慮してください。",
			
 
				+  "WebUI Host": "WebUIホスト",
			
 
				+  "WebUI Port": "WebUIポート",
			
 
				+  "Whisper Model": "Whisperモデル",
			
 
				+  "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "ソースコードは[こちら](https://github.com/fishaudio/fish-speech)、モデルは[こちら](https://huggingface.co/fishaudio/fish-speech-1)にあります。",
			
 
				+  "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU": "30シリーズ以降のGPUにはbf16-trueを、10シリーズ以降のGPUには16-mixedをお勧めします",
			
 
				+  "latest": "最新",
			
 
				+  "new": "新規",
			
 
				+  "Realtime Transform Text": "リアルタイム変換テキスト",
			
 
				+  "Normalization Result Preview (Currently Only Chinese)": "正規化結果プレビュー（現在は中国語のみ）",
			
 
				+  "Text Normalization": "テキスト正規化"
			
 
				+
			
 
				 }
			
--- a/fish_speech/i18n/locale/zh_CN.json
+++ b/fish_speech/i18n/locale/zh_CN.json
@@ -1,119 +1,122 @@
 
				 {
			
 
				-    "16-mixed is recommended for 10+ series GPU": "10+ 系列 GPU 建议使用 16-mixed",
			
 
				-    "5 to 10 seconds of reference audio, useful for specifying speaker.": "5 到 10 秒的参考音频，适用于指定音色。",
			
 
				-    "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "由 [Fish Audio](https://fish.audio) 研发的基于 VQ-GAN 和 Llama 的多语种语音合成.",
			
 
				-    "Accumulate Gradient Batches": "梯度累积批次",
			
 
				-    "Add to Processing Area": "加入处理区",
			
 
				-    "Added path successfully!": "添加路径成功!",
			
 
				-    "Advanced Config": "高级参数",
			
 
				-    "Base LLAMA Model": "基础 LLAMA 模型",
			
 
				-    "Batch Inference": "批量推理",
			
 
				-    "Batch Size": "批次大小",
			
 
				-    "Changing with the Model Path": "随模型路径变化",
			
 
				-    "Chinese": "中文",
			
 
				-    "Compile Model": "编译模型",
			
 
				-    "Compile the model can significantly reduce the inference time, but will increase cold start time": "编译模型可以显著减少推理时间，但会增加冷启动时间",
			
 
				-    "Copy": "复制",
			
 
				-    "Data Preprocessing": "数据预处理",
			
 
				-    "Data Preprocessing Path": "数据预处理路径",
			
 
				-    "Data Source": "数据源",
			
 
				-    "Decoder Model Config": "解码器模型配置",
			
 
				-    "Decoder Model Path": "解码器模型路径",
			
 
				-    "Disabled": "禁用",
			
 
				-    "Enable Reference Audio": "启用参考音频",
			
 
				-    "English": "英文",
			
 
				-    "Error Message": "错误信息",
			
 
				-    "File Preprocessing": "文件预处理",
			
 
				-    "Generate": "生成",
			
 
				-    "Generated Audio": "音频",
			
 
				-    "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format": "如果音频没有对应的文本，可以应用 ASR 辅助，支持 .txt 或 .lab 格式",
			
 
				-    "Infer interface is closed": "推理界面已关闭",
			
 
				-    "Inference Configuration": "推理配置",
			
 
				-    "Inference Server Configuration": "推理服务器配置",
			
 
				-    "Inference Server Error": "推理服务器错误",
			
 
				-    "Inferring interface is launched at {}": "推理界面已在 {} 上启动",
			
 
				-    "Initial Learning Rate": "初始学习率",
			
 
				-    "Input Audio & Source Path for Transcription": "输入音频和转录源路径",
			
 
				-    "Input Text": "输入文本",
			
 
				-    "Invalid path: {}": "无效路径: {}",
			
 
				-    "It is recommended to use CUDA, if you have low configuration, use CPU": "建议使用 CUDA，如果配置较低，使用 CPU",
			
 
				-    "Iterative Prompt Length, 0 means off": "迭代提示长度，0 表示关闭",
			
 
				-    "Japanese": "日文",
			
 
				-    "LLAMA Configuration": "LLAMA 配置",
			
 
				-    "LLAMA Model Config": "LLAMA 模型配置",
			
 
				-    "LLAMA Model Path": "LLAMA 模型路径",
			
 
				-    "Labeling Device": "标注加速设备",
			
 
				-    "LoRA Model to be merged": "要合并的 LoRA 模型",
			
 
				-    "Maximum Audio Duration": "最大音频时长",
			
 
				-    "Maximum Length per Sample": "每个样本的最大长度",
			
 
				-    "Maximum Training Steps": "最大训练步数",
			
 
				-    "Maximum tokens per batch, 0 means no limit": "每批最大令牌数，0 表示无限制",
			
 
				-    "Merge": "合并",
			
 
				-    "Merge LoRA": "合并 LoRA",
			
 
				-    "Merge successfully": "合并成功",
			
 
				-    "Minimum Audio Duration": "最小音频时长",
			
 
				-    "Model Output Path": "模型输出路径",
			
 
				-    "Model Size": "模型规模",
			
 
				-    "Move": "移动",
			
 
				-    "Move files successfully": "移动文件成功",
			
 
				-    "No audio generated, please check the input text.": "没有生成音频，请检查输入文本.",
			
 
				-    "No selected options": "没有选择的选项",
			
 
				-    "Number of Workers": "数据加载进程数",
			
 
				-    "Open Inference Server": "打开推理服务器",
			
 
				-    "Open Labeler WebUI": "打开标注工具",
			
 
				-    "Open Tensorboard": "打开 Tensorboard",
			
 
				-    "Opened labeler in browser": "在浏览器中打开标注工具",
			
 
				-    "Optional Label Language": "[可选] 标注语言",
			
 
				-    "Optional online ver": "[可选] 使用在线版",
			
 
				-    "Output Path": "输出路径",
			
 
				-    "Path error, please check the model file exists in the corresponding path": "路径错误，请检查模型文件是否存在于相应路径",
			
 
				-    "Precision": "精度",
			
 
				-    "Probability of applying Speaker Condition": "应用说话人条件的概率",
			
 
				-    "Put your text here.": "在此处输入文本.",
			
 
				-    "Reference Audio": "参考音频",
			
 
				-    "Reference Text": "参考文本",
			
 
				-    "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "相关代码使用 BSD-3-Clause 许可证发布，权重使用 CC BY-NC-SA 4.0 许可证发布.",
			
 
				-    "Remove Selected Data": "移除选中数据",
			
 
				-    "Removed path successfully!": "移除路径成功!",
			
 
				-    "Repetition Penalty": "重复惩罚",
			
 
				-    "Save model every n steps": "每 n 步保存模型",
			
 
				-    "Select LLAMA ckpt": "选择 LLAMA 检查点",
			
 
				-    "Select VITS ckpt": "选择 VITS 检查点",
			
 
				-    "Select VQGAN ckpt": "选择 VQGAN 检查点",
			
 
				-    "Select source file processing method": "选择源文件处理方法",
			
 
				-    "Select the model to be trained (Depending on the Tab page you are on)": "根据您所在的选项卡页面选择要训练的模型",
			
 
				-    "Selected: {}": "已选择: {}",
			
 
				-    "Speaker": "说话人",
			
 
				-    "Speaker is identified by the folder name": "自动根据父目录名称识别说话人",
			
 
				-    "Start Training": "开始训练",
			
 
				-    "Streaming Audio": "流式音频",
			
 
				-    "Streaming Generate": "流式合成",
			
 
				-    "Tensorboard Host": "Tensorboard 监听地址",
			
 
				-    "Tensorboard Log Path": "Tensorboard 日志路径",
			
 
				-    "Tensorboard Port": "Tensorboard 端口",
			
 
				-    "Tensorboard interface is closed": "Tensorboard 界面已关闭",
			
 
				-    "Tensorboard interface is launched at {}": "Tensorboard 界面已在 {} 上启动",
			
 
				-    "Text is too long, please keep it under {} characters.": "文本太长，请保持在 {} 个字符以内.",
			
 
				-    "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.": "左侧输入文件夹的路径或文件列表。无论是否选中，都将在此列表中用于后续训练.",
			
 
				-    "Training Configuration": "训练配置",
			
 
				-    "Training Error": "训练错误",
			
 
				-    "Training stopped": "训练已停止",
			
 
				-    "Type name of the speaker": "输入说话人的名称",
			
 
				-    "Type the path or select from the dropdown": "输入路径或从下拉菜单中选择",
			
 
				-    "Use LoRA": "使用 LoRA",
			
 
				-    "Use LoRA can save GPU memory, but may reduce the quality of the model": "使用 LoRA 可以节省 GPU 内存，但可能会降低模型质量",
			
 
				-    "Use filelist": "使用文件列表",
			
 
				-    "Use large for 10G+ GPU, medium for 5G, small for 2G": "10G+ GPU 使用 large, 5G 使用 medium, 2G 使用 small",
			
 
				-    "VITS Configuration": "VITS 配置",
			
 
				-    "VQGAN Configuration": "VQGAN 配置",
			
 
				-    "Validation Batch Size": "验证批次大小",
			
 
				-    "View the status of the preprocessing folder (use the slider to control the depth of the tree)": "查看预处理文件夹的状态 (使用滑块控制树的深度)",
			
 
				-    "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "我们不对模型的任何滥用负责，请在使用之前考虑您当地的法律法规.",
			
 
				-    "WebUI Host": "WebUI 监听地址",
			
 
				-    "WebUI Port": "WebUI 端口",
			
 
				-    "Whisper Model": "Whisper 模型",
			
 
				-    "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "你可以在 [这里](https://github.com/fishaudio/fish-speech) 找到源代码和 [这里](https://huggingface.co/fishaudio/fish-speech-1) 找到模型.",
			
 
				-    "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU": "30+ 系列 GPU 建议使用 bf16-true, 10+ 系列 GPU 建议使用 16-mixed",
			
 
				-    "latest": "最近的检查点",
			
 
				-    "new": "创建新的检查点"
			
 
				+  "16-mixed is recommended for 10+ series GPU": "10+ 系列 GPU 建议使用 16-mixed",
			
 
				+  "5 to 10 seconds of reference audio, useful for specifying speaker.": "5 到 10 秒的参考音频，适用于指定音色。",
			
 
				+  "A text-to-speech model based on VQ-GAN and Llama developed by [Fish Audio](https://fish.audio).": "由 [Fish Audio](https://fish.audio) 研发的基于 VQ-GAN 和 Llama 的多语种语音合成.",
			
 
				+  "Accumulate Gradient Batches": "梯度累积批次",
			
 
				+  "Add to Processing Area": "加入处理区",
			
 
				+  "Added path successfully!": "添加路径成功!",
			
 
				+  "Advanced Config": "高级参数",
			
 
				+  "Base LLAMA Model": "基础 LLAMA 模型",
			
 
				+  "Batch Inference": "批量推理",
			
 
				+  "Batch Size": "批次大小",
			
 
				+  "Changing with the Model Path": "随模型路径变化",
			
 
				+  "Chinese": "中文",
			
 
				+  "Compile Model": "编译模型",
			
 
				+  "Compile the model can significantly reduce the inference time, but will increase cold start time": "编译模型可以显著减少推理时间，但会增加冷启动时间",
			
 
				+  "Copy": "复制",
			
 
				+  "Data Preprocessing": "数据预处理",
			
 
				+  "Data Preprocessing Path": "数据预处理路径",
			
 
				+  "Data Source": "数据源",
			
 
				+  "Decoder Model Config": "解码器模型配置",
			
 
				+  "Decoder Model Path": "解码器模型路径",
			
 
				+  "Disabled": "禁用",
			
 
				+  "Enable Reference Audio": "启用参考音频",
			
 
				+  "English": "英文",
			
 
				+  "Error Message": "错误信息",
			
 
				+  "File Preprocessing": "文件预处理",
			
 
				+  "Generate": "生成",
			
 
				+  "Generated Audio": "音频",
			
 
				+  "If there is no corresponding text for the audio, apply ASR for assistance, support .txt or .lab format": "如果音频没有对应的文本，可以应用 ASR 辅助，支持 .txt 或 .lab 格式",
			
 
				+  "Infer interface is closed": "推理界面已关闭",
			
 
				+  "Inference Configuration": "推理配置",
			
 
				+  "Inference Server Configuration": "推理服务器配置",
			
 
				+  "Inference Server Error": "推理服务器错误",
			
 
				+  "Inferring interface is launched at {}": "推理界面已在 {} 上启动",
			
 
				+  "Initial Learning Rate": "初始学习率",
			
 
				+  "Input Audio & Source Path for Transcription": "输入音频和转录源路径",
			
 
				+  "Input Text": "输入文本",
			
 
				+  "Invalid path: {}": "无效路径: {}",
			
 
				+  "It is recommended to use CUDA, if you have low configuration, use CPU": "建议使用 CUDA，如果配置较低，使用 CPU",
			
 
				+  "Iterative Prompt Length, 0 means off": "迭代提示长度，0 表示关闭",
			
 
				+  "Japanese": "日文",
			
 
				+  "LLAMA Configuration": "LLAMA 配置",
			
 
				+  "LLAMA Model Config": "LLAMA 模型配置",
			
 
				+  "LLAMA Model Path": "LLAMA 模型路径",
			
 
				+  "Labeling Device": "标注加速设备",
			
 
				+  "LoRA Model to be merged": "要合并的 LoRA 模型",
			
 
				+  "Maximum Audio Duration": "最大音频时长",
			
 
				+  "Maximum Length per Sample": "每个样本的最大长度",
			
 
				+  "Maximum Training Steps": "最大训练步数",
			
 
				+  "Maximum tokens per batch, 0 means no limit": "每批最大令牌数，0 表示无限制",
			
 
				+  "Merge": "合并",
			
 
				+  "Merge LoRA": "合并 LoRA",
			
 
				+  "Merge successfully": "合并成功",
			
 
				+  "Minimum Audio Duration": "最小音频时长",
			
 
				+  "Model Output Path": "模型输出路径",
			
 
				+  "Model Size": "模型规模",
			
 
				+  "Move": "移动",
			
 
				+  "Move files successfully": "移动文件成功",
			
 
				+  "No audio generated, please check the input text.": "没有生成音频，请检查输入文本.",
			
 
				+  "No selected options": "没有选择的选项",
			
 
				+  "Number of Workers": "数据加载进程数",
			
 
				+  "Open Inference Server": "打开推理服务器",
			
 
				+  "Open Labeler WebUI": "打开标注工具",
			
 
				+  "Open Tensorboard": "打开 Tensorboard",
			
 
				+  "Opened labeler in browser": "在浏览器中打开标注工具",
			
 
				+  "Optional Label Language": "[可选] 标注语言",
			
 
				+  "Optional online ver": "[可选] 使用在线版",
			
 
				+  "Output Path": "输出路径",
			
 
				+  "Path error, please check the model file exists in the corresponding path": "路径错误，请检查模型文件是否存在于相应路径",
			
 
				+  "Precision": "精度",
			
 
				+  "Probability of applying Speaker Condition": "应用说话人条件的概率",
			
 
				+  "Put your text here.": "在此处输入文本.",
			
 
				+  "Reference Audio": "参考音频",
			
 
				+  "Reference Text": "参考文本",
			
 
				+  "Related code are released under BSD-3-Clause License, and weights are released under CC BY-NC-SA 4.0 License.": "相关代码使用 BSD-3-Clause 许可证发布，权重使用 CC BY-NC-SA 4.0 许可证发布.",
			
 
				+  "Remove Selected Data": "移除选中数据",
			
 
				+  "Removed path successfully!": "移除路径成功!",
			
 
				+  "Repetition Penalty": "重复惩罚",
			
 
				+  "Save model every n steps": "每 n 步保存模型",
			
 
				+  "Select LLAMA ckpt": "选择 LLAMA 检查点",
			
 
				+  "Select VITS ckpt": "选择 VITS 检查点",
			
 
				+  "Select VQGAN ckpt": "选择 VQGAN 检查点",
			
 
				+  "Select source file processing method": "选择源文件处理方法",
			
 
				+  "Select the model to be trained (Depending on the Tab page you are on)": "根据您所在的选项卡页面选择要训练的模型",
			
 
				+  "Selected: {}": "已选择: {}",
			
 
				+  "Speaker": "说话人",
			
 
				+  "Speaker is identified by the folder name": "自动根据父目录名称识别说话人",
			
 
				+  "Start Training": "开始训练",
			
 
				+  "Streaming Audio": "流式音频",
			
 
				+  "Streaming Generate": "流式合成",
			
 
				+  "Tensorboard Host": "Tensorboard 监听地址",
			
 
				+  "Tensorboard Log Path": "Tensorboard 日志路径",
			
 
				+  "Tensorboard Port": "Tensorboard 端口",
			
 
				+  "Tensorboard interface is closed": "Tensorboard 界面已关闭",
			
 
				+  "Tensorboard interface is launched at {}": "Tensorboard 界面已在 {} 上启动",
			
 
				+  "Text is too long, please keep it under {} characters.": "文本太长，请保持在 {} 个字符以内.",
			
 
				+  "The path of the input folder on the left or the filelist. Whether checked or not, it will be used for subsequent training in this list.": "左侧输入文件夹的路径或文件列表。无论是否选中，都将在此列表中用于后续训练.",
			
 
				+  "Training Configuration": "训练配置",
			
 
				+  "Training Error": "训练错误",
			
 
				+  "Training stopped": "训练已停止",
			
 
				+  "Type name of the speaker": "输入说话人的名称",
			
 
				+  "Type the path or select from the dropdown": "输入路径或从下拉菜单中选择",
			
 
				+  "Use LoRA": "使用 LoRA",
			
 
				+  "Use LoRA can save GPU memory, but may reduce the quality of the model": "使用 LoRA 可以节省 GPU 内存，但可能会降低模型质量",
			
 
				+  "Use filelist": "使用文件列表",
			
 
				+  "Use large for 10G+ GPU, medium for 5G, small for 2G": "10G+ GPU 使用 large, 5G 使用 medium, 2G 使用 small",
			
 
				+  "VITS Configuration": "VITS 配置",
			
 
				+  "VQGAN Configuration": "VQGAN 配置",
			
 
				+  "Validation Batch Size": "验证批次大小",
			
 
				+  "View the status of the preprocessing folder (use the slider to control the depth of the tree)": "查看预处理文件夹的状态 (使用滑块控制树的深度)",
			
 
				+  "We are not responsible for any misuse of the model, please consider your local laws and regulations before using it.": "我们不对模型的任何滥用负责，请在使用之前考虑您当地的法律法规.",
			
 
				+  "WebUI Host": "WebUI 监听地址",
			
 
				+  "WebUI Port": "WebUI 端口",
			
 
				+  "Whisper Model": "Whisper 模型",
			
 
				+  "You can find the source code [here](https://github.com/fishaudio/fish-speech) and models [here](https://huggingface.co/fishaudio/fish-speech-1).": "你可以在 [这里](https://github.com/fishaudio/fish-speech) 找到源代码和 [这里](https://huggingface.co/fishaudio/fish-speech-1) 找到模型.",
			
 
				+  "bf16-true is recommended for 30+ series GPU, 16-mixed is recommended for 10+ series GPU": "30+ 系列 GPU 建议使用 bf16-true, 10+ 系列 GPU 建议使用 16-mixed",
			
 
				+  "latest": "最近的检查点",
			
 
				+  "new": "创建新的检查点",
			
 
				+  "Realtime Transform Text": "实时规范化文本",
			
 
				+  "Normalization Result Preview (Currently Only Chinese)": "规范化结果预览",
			
 
				+  "Text Normalization": "文本规范化"
			
 
				 }
			
--- a/fish_speech/text/chn_text_norm/.gitignore
+++ b/fish_speech/text/chn_text_norm/.gitignore
@@ -0,0 +1,114 @@
 
				+# Byte-compiled / optimized / DLL files
			
 
				+__pycache__/
			
 
				+*.py[cod]
			
 
				+*$py.class
			
 
				+
			
 
				+# C extensions
			
 
				+*.so
			
 
				+
			
 
				+# Distribution / packaging
			
 
				+.Python
			
 
				+build/
			
 
				+develop-eggs/
			
 
				+dist/
			
 
				+downloads/
			
 
				+eggs/
			
 
				+.eggs/
			
 
				+lib/
			
 
				+lib64/
			
 
				+parts/
			
 
				+sdist/
			
 
				+var/
			
 
				+wheels/
			
 
				+*.egg-info/
			
 
				+.installed.cfg
			
 
				+*.egg
			
 
				+MANIFEST
			
 
				+
			
 
				+# PyInstaller
			
 
				+#  Usually these files are written by a python script from a template
			
 
				+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
			
 
				+*.manifest
			
 
				+*.spec
			
 
				+
			
 
				+# Installer logs
			
 
				+pip-log.txt
			
 
				+pip-delete-this-directory.txt
			
 
				+
			
 
				+# Unit test / coverage reports
			
 
				+htmlcov/
			
 
				+.tox/
			
 
				+.coverage
			
 
				+.coverage.*
			
 
				+.cache
			
 
				+nosetests.xml
			
 
				+coverage.xml
			
 
				+*.cover
			
 
				+.hypothesis/
			
 
				+.pytest_cache/
			
 
				+
			
 
				+# Translations
			
 
				+*.mo
			
 
				+*.pot
			
 
				+
			
 
				+# Django stuff:
			
 
				+*.log
			
 
				+local_settings.py
			
 
				+db.sqlite3
			
 
				+
			
 
				+# Flask stuff:
			
 
				+instance/
			
 
				+.webassets-cache
			
 
				+
			
 
				+# Scrapy stuff:
			
 
				+.scrapy
			
 
				+
			
 
				+# Sphinx documentation
			
 
				+docs/_build/
			
 
				+
			
 
				+# PyBuilder
			
 
				+target/
			
 
				+
			
 
				+# Jupyter Notebook
			
 
				+.ipynb_checkpoints
			
 
				+
			
 
				+# pyenv
			
 
				+.python-version
			
 
				+
			
 
				+# celery beat schedule file
			
 
				+celerybeat-schedule
			
 
				+
			
 
				+# SageMath parsed files
			
 
				+*.sage.py
			
 
				+
			
 
				+# Environments
			
 
				+.env
			
 
				+.venv
			
 
				+env/
			
 
				+venv/
			
 
				+ENV/
			
 
				+env.bak/
			
 
				+venv.bak/
			
 
				+
			
 
				+# Spyder project settings
			
 
				+.spyderproject
			
 
				+.spyproject
			
 
				+
			
 
				+# Rope project settings
			
 
				+.ropeproject
			
 
				+
			
 
				+# mkdocs documentation
			
 
				+/site
			
 
				+
			
 
				+# mypy
			
 
				+.mypy_cache/
			
 
				+
			
 
				+# JetBrains PyCharm
			
 
				+.idea
			
 
				+
			
 
				+# Customize
			
 
				+references
			
 
				+url.txt
			
 
				+
			
 
				+# Git
			
 
				+.git
			
--- a/fish_speech/text/chn_text_norm/README.md
+++ b/fish_speech/text/chn_text_norm/README.md
@@ -0,0 +1,36 @@
 
				+# This account is no longer in use, see [Atomicoo](https://github.com/atomicoo) for my latest works.
			
 
				+
			
 
				+# Chn Text Norm
			
 
				+
			
 
				+this is a repository for chinese text normalization (no longer maintained).
			
 
				+
			
 
				+## Quick Start ##
			
 
				+
			
 
				+### Git Clone Repo ###
			
 
				+
			
 
				+git clone this repo to the root directory of your project which need to use it.
			
 
				+
			
 
				+    cd /path/to/proj
			
 
				+    git clone https://github.com/Joee1995/chn-text-norm.git
			
 
				+
			
 
				+after that, your doc tree should be:
			
 
				+```
			
 
				+proj                     # root of your project
			
 
				+|--- chn_text_norm       # this chn-text-norm tool
			
 
				+     |--- text.py
			
 
				+     |--- ...
			
 
				+|--- text_normalize.py   # your text normalization code
			
 
				+|--- ...
			
 
				+```
			
 
				+
			
 
				+### How to Use ? ###
			
 
				+
			
 
				+    # text_normalize.py
			
 
				+    from chn_text_norm.text import *
			
 
				+    
			
 
				+    raw_text = 'your raw text'
			
 
				+    text = Text(raw_text=raw_text).normalize()
			
 
				+
			
 
				+### How to add quantums ###
			
 
				+
			
 
				+打开test.py，然后你就知道怎么做了。
			
--- a/fish_speech/text/chn_text_norm/__init__.py
+++ b/fish_speech/text/chn_text_norm/__init__.py
--- a/fish_speech/text/chn_text_norm/basic_class.py
+++ b/fish_speech/text/chn_text_norm/basic_class.py
@@ -0,0 +1,172 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""基本类
			
 
				+中文字符类
			
 
				+中文数字/数位类
			
 
				+中文数字类
			
 
				+中文数位类
			
 
				+中文数字系统类
			
 
				+中文数学符号类
			
 
				+*中文其他符号类
			
 
				+"""
			
 
				+
			
 
				+__author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
			
 
				+__data__ = "2019-05-02"
			
 
				+
			
 
				+from fish_speech.text.chn_text_norm.basic_constant import NUMBERING_TYPES
			
 
				+
			
 
				+
			
 
				+class ChineseChar(object):
			
 
				+    """
			
 
				+    中文字符
			
 
				+    每个字符对应简体和繁体,
			
 
				+    e.g. 简体 = '负', 繁体 = '負'
			
 
				+    转换时可转换为简体或繁体
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, simplified, traditional):
			
 
				+        self.simplified = simplified
			
 
				+        self.traditional = traditional
			
 
				+        self.__repr__ = self.__str__
			
 
				+
			
 
				+    def __str__(self):
			
 
				+        return self.simplified or self.traditional or None
			
 
				+
			
 
				+    def __repr__(self):
			
 
				+        return self.__str__()
			
 
				+
			
 
				+
			
 
				+class ChineseNumberUnit(ChineseChar):
			
 
				+    """
			
 
				+    中文数字/数位字符
			
 
				+    每个字符除繁简体外还有一个额外的大写字符
			
 
				+    e.g. '陆' 和 '陸'
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, power, simplified, traditional, big_s, big_t):
			
 
				+        super(ChineseNumberUnit, self).__init__(simplified, traditional)
			
 
				+        self.power = power
			
 
				+        self.big_s = big_s
			
 
				+        self.big_t = big_t
			
 
				+
			
 
				+    def __str__(self):
			
 
				+        return "10^{}".format(self.power)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def create(cls, index, value, numbering_type=NUMBERING_TYPES[1], small_unit=False):
			
 
				+
			
 
				+        if small_unit:
			
 
				+            return ChineseNumberUnit(
			
 
				+                power=index + 1,
			
 
				+                simplified=value[0],
			
 
				+                traditional=value[1],
			
 
				+                big_s=value[1],
			
 
				+                big_t=value[1],
			
 
				+            )
			
 
				+        elif numbering_type == NUMBERING_TYPES[0]:
			
 
				+            return ChineseNumberUnit(
			
 
				+                power=index + 8,
			
 
				+                simplified=value[0],
			
 
				+                traditional=value[1],
			
 
				+                big_s=value[0],
			
 
				+                big_t=value[1],
			
 
				+            )
			
 
				+        elif numbering_type == NUMBERING_TYPES[1]:
			
 
				+            return ChineseNumberUnit(
			
 
				+                power=(index + 2) * 4,
			
 
				+                simplified=value[0],
			
 
				+                traditional=value[1],
			
 
				+                big_s=value[0],
			
 
				+                big_t=value[1],
			
 
				+            )
			
 
				+        elif numbering_type == NUMBERING_TYPES[2]:
			
 
				+            return ChineseNumberUnit(
			
 
				+                power=pow(2, index + 3),
			
 
				+                simplified=value[0],
			
 
				+                traditional=value[1],
			
 
				+                big_s=value[0],
			
 
				+                big_t=value[1],
			
 
				+            )
			
 
				+        else:
			
 
				+            raise ValueError(
			
 
				+                "Counting type should be in {0} ({1} provided).".format(
			
 
				+                    NUMBERING_TYPES, numbering_type
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+
			
 
				+class ChineseNumberDigit(ChineseChar):
			
 
				+    """
			
 
				+    中文数字字符
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self, value, simplified, traditional, big_s, big_t, alt_s=None, alt_t=None
			
 
				+    ):
			
 
				+        super(ChineseNumberDigit, self).__init__(simplified, traditional)
			
 
				+        self.value = value
			
 
				+        self.big_s = big_s
			
 
				+        self.big_t = big_t
			
 
				+        self.alt_s = alt_s
			
 
				+        self.alt_t = alt_t
			
 
				+
			
 
				+    def __str__(self):
			
 
				+        return str(self.value)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def create(cls, i, v):
			
 
				+        return ChineseNumberDigit(i, v[0], v[1], v[2], v[3])
			
 
				+
			
 
				+
			
 
				+class ChineseMath(ChineseChar):
			
 
				+    """
			
 
				+    中文数位字符
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, simplified, traditional, symbol, expression=None):
			
 
				+        super(ChineseMath, self).__init__(simplified, traditional)
			
 
				+        self.symbol = symbol
			
 
				+        self.expression = expression
			
 
				+        self.big_s = simplified
			
 
				+        self.big_t = traditional
			
 
				+
			
 
				+
			
 
				+CC, CNU, CND, CM = ChineseChar, ChineseNumberUnit, ChineseNumberDigit, ChineseMath
			
 
				+
			
 
				+
			
 
				+class NumberSystem(object):
			
 
				+    """
			
 
				+    中文数字系统
			
 
				+    """
			
 
				+
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				+class MathSymbol(object):
			
 
				+    """
			
 
				+    用于中文数字系统的数学符号 (繁/简体), e.g.
			
 
				+    positive = ['正', '正']
			
 
				+    negative = ['负', '負']
			
 
				+    point = ['点', '點']
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, positive, negative, point):
			
 
				+        self.positive = positive
			
 
				+        self.negative = negative
			
 
				+        self.point = point
			
 
				+
			
 
				+    def __iter__(self):
			
 
				+        for v in self.__dict__.values():
			
 
				+            yield v
			
 
				+
			
 
				+
			
 
				+# class OtherSymbol(object):
			
 
				+#     """
			
 
				+#     其他符号
			
 
				+#     """
			
 
				+#
			
 
				+#     def __init__(self, sil):
			
 
				+#         self.sil = sil
			
 
				+#
			
 
				+#     def __iter__(self):
			
 
				+#         for v in self.__dict__.values():
			
 
				+#             yield v
			
--- a/fish_speech/text/chn_text_norm/basic_constant.py
+++ b/fish_speech/text/chn_text_norm/basic_constant.py
@@ -0,0 +1,30 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""基本常量
			
 
				+中文数字/数位/符号字符常量
			
 
				+"""
			
 
				+
			
 
				+__author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
			
 
				+__data__ = "2019-05-02"
			
 
				+
			
 
				+CHINESE_DIGIS = "零一二三四五六七八九"
			
 
				+BIG_CHINESE_DIGIS_SIMPLIFIED = "零壹贰叁肆伍陆柒捌玖"
			
 
				+BIG_CHINESE_DIGIS_TRADITIONAL = "零壹貳參肆伍陸柒捌玖"
			
 
				+SMALLER_BIG_CHINESE_UNITS_SIMPLIFIED = "十百千万"
			
 
				+SMALLER_BIG_CHINESE_UNITS_TRADITIONAL = "拾佰仟萬"
			
 
				+LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED = "亿兆京垓秭穰沟涧正载"
			
 
				+LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL = "億兆京垓秭穰溝澗正載"
			
 
				+SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED = "十百千万"
			
 
				+SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL = "拾佰仟萬"
			
 
				+
			
 
				+ZERO_ALT = "〇"
			
 
				+ONE_ALT = "幺"
			
 
				+TWO_ALTS = ["两", "兩"]
			
 
				+
			
 
				+POSITIVE = ["正", "正"]
			
 
				+NEGATIVE = ["负", "負"]
			
 
				+POINT = ["点", "點"]
			
 
				+# PLUS = [u'加', u'加']
			
 
				+# SIL = [u'杠', u'槓']
			
 
				+
			
 
				+# 中文数字系统类型
			
 
				+NUMBERING_TYPES = ["low", "mid", "high"]
			
--- a/fish_speech/text/chn_text_norm/basic_util.py
+++ b/fish_speech/text/chn_text_norm/basic_util.py
@@ -0,0 +1,342 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""基本方法
			
 
				+创建中文数字系统 方法
			
 
				+中文字符串 <=> 数字串 方法
			
 
				+数字串 <=> 中文字符串 方法
			
 
				+"""
			
 
				+
			
 
				+__author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
			
 
				+__data__ = "2019-05-02"
			
 
				+
			
 
				+from fish_speech.text.chn_text_norm.basic_class import *
			
 
				+from fish_speech.text.chn_text_norm.basic_constant import *
			
 
				+
			
 
				+
			
 
				+def create_system(numbering_type=NUMBERING_TYPES[1]):
			
 
				+    """
			
 
				+    根据数字系统类型返回创建相应的数字系统，默认为 mid
			
 
				+    NUMBERING_TYPES = ['low', 'mid', 'high']: 中文数字系统类型
			
 
				+        low:  '兆' = '亿' * '十' = $10^{9}$,  '京' = '兆' * '十', etc.
			
 
				+        mid:  '兆' = '亿' * '万' = $10^{12}$, '京' = '兆' * '万', etc.
			
 
				+        high: '兆' = '亿' * '亿' = $10^{16}$, '京' = '兆' * '兆', etc.
			
 
				+    返回对应的数字系统
			
 
				+    """
			
 
				+
			
 
				+    # chinese number units of '亿' and larger
			
 
				+    all_larger_units = zip(
			
 
				+        LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED,
			
 
				+        LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL,
			
 
				+    )
			
 
				+    larger_units = [
			
 
				+        CNU.create(i, v, numbering_type, False) for i, v in enumerate(all_larger_units)
			
 
				+    ]
			
 
				+    # chinese number units of '十, 百, 千, 万'
			
 
				+    all_smaller_units = zip(
			
 
				+        SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED,
			
 
				+        SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL,
			
 
				+    )
			
 
				+    smaller_units = [
			
 
				+        CNU.create(i, v, small_unit=True) for i, v in enumerate(all_smaller_units)
			
 
				+    ]
			
 
				+    # digis
			
 
				+    chinese_digis = zip(
			
 
				+        CHINESE_DIGIS,
			
 
				+        CHINESE_DIGIS,
			
 
				+        BIG_CHINESE_DIGIS_SIMPLIFIED,
			
 
				+        BIG_CHINESE_DIGIS_TRADITIONAL,
			
 
				+    )
			
 
				+    digits = [CND.create(i, v) for i, v in enumerate(chinese_digis)]
			
 
				+    digits[0].alt_s, digits[0].alt_t = ZERO_ALT, ZERO_ALT
			
 
				+    digits[1].alt_s, digits[1].alt_t = ONE_ALT, ONE_ALT
			
 
				+    digits[2].alt_s, digits[2].alt_t = TWO_ALTS[0], TWO_ALTS[1]
			
 
				+
			
 
				+    # symbols
			
 
				+    positive_cn = CM(POSITIVE[0], POSITIVE[1], "+", lambda x: x)
			
 
				+    negative_cn = CM(NEGATIVE[0], NEGATIVE[1], "-", lambda x: -x)
			
 
				+    point_cn = CM(POINT[0], POINT[1], ".", lambda x, y: float(str(x) + "." + str(y)))
			
 
				+    # sil_cn = CM(SIL[0], SIL[1], '-', lambda x, y: float(str(x) + '-' + str(y)))
			
 
				+    system = NumberSystem()
			
 
				+    system.units = smaller_units + larger_units
			
 
				+    system.digits = digits
			
 
				+    system.math = MathSymbol(positive_cn, negative_cn, point_cn)
			
 
				+    # system.symbols = OtherSymbol(sil_cn)
			
 
				+    return system
			
 
				+
			
 
				+
			
 
				+def chn2num(chinese_string, numbering_type=NUMBERING_TYPES[1]):
			
 
				+
			
 
				+    def get_symbol(char, system):
			
 
				+        for u in system.units:
			
 
				+            if char in [u.traditional, u.simplified, u.big_s, u.big_t]:
			
 
				+                return u
			
 
				+        for d in system.digits:
			
 
				+            if char in [
			
 
				+                d.traditional,
			
 
				+                d.simplified,
			
 
				+                d.big_s,
			
 
				+                d.big_t,
			
 
				+                d.alt_s,
			
 
				+                d.alt_t,
			
 
				+            ]:
			
 
				+                return d
			
 
				+        for m in system.math:
			
 
				+            if char in [m.traditional, m.simplified]:
			
 
				+                return m
			
 
				+
			
 
				+    def string2symbols(chinese_string, system):
			
 
				+        int_string, dec_string = chinese_string, ""
			
 
				+        for p in [system.math.point.simplified, system.math.point.traditional]:
			
 
				+            if p in chinese_string:
			
 
				+                int_string, dec_string = chinese_string.split(p)
			
 
				+                break
			
 
				+        return [get_symbol(c, system) for c in int_string], [
			
 
				+            get_symbol(c, system) for c in dec_string
			
 
				+        ]
			
 
				+
			
 
				+    def correct_symbols(integer_symbols, system):
			
 
				+        """
			
 
				+        一百八 to 一百八十
			
 
				+        一亿一千三百万 to 一亿 一千万 三百万
			
 
				+        """
			
 
				+
			
 
				+        if integer_symbols and isinstance(integer_symbols[0], CNU):
			
 
				+            if integer_symbols[0].power == 1:
			
 
				+                integer_symbols = [system.digits[1]] + integer_symbols
			
 
				+
			
 
				+        if len(integer_symbols) > 1:
			
 
				+            if isinstance(integer_symbols[-1], CND) and isinstance(
			
 
				+                integer_symbols[-2], CNU
			
 
				+            ):
			
 
				+                integer_symbols.append(
			
 
				+                    CNU(integer_symbols[-2].power - 1, None, None, None, None)
			
 
				+                )
			
 
				+
			
 
				+        result = []
			
 
				+        unit_count = 0
			
 
				+        for s in integer_symbols:
			
 
				+            if isinstance(s, CND):
			
 
				+                result.append(s)
			
 
				+                unit_count = 0
			
 
				+            elif isinstance(s, CNU):
			
 
				+                current_unit = CNU(s.power, None, None, None, None)
			
 
				+                unit_count += 1
			
 
				+
			
 
				+            if unit_count == 1:
			
 
				+                result.append(current_unit)
			
 
				+            elif unit_count > 1:
			
 
				+                for i in range(len(result)):
			
 
				+                    if (
			
 
				+                        isinstance(result[-i - 1], CNU)
			
 
				+                        and result[-i - 1].power < current_unit.power
			
 
				+                    ):
			
 
				+                        result[-i - 1] = CNU(
			
 
				+                            result[-i - 1].power + current_unit.power,
			
 
				+                            None,
			
 
				+                            None,
			
 
				+                            None,
			
 
				+                            None,
			
 
				+                        )
			
 
				+        return result
			
 
				+
			
 
				+    def compute_value(integer_symbols):
			
 
				+        """
			
 
				+        Compute the value.
			
 
				+        When current unit is larger than previous unit, current unit * all previous units will be used as all previous units.
			
 
				+        e.g. '两千万' = 2000 * 10000 not 2000 + 10000
			
 
				+        """
			
 
				+        value = [0]
			
 
				+        last_power = 0
			
 
				+        for s in integer_symbols:
			
 
				+            if isinstance(s, CND):
			
 
				+                value[-1] = s.value
			
 
				+            elif isinstance(s, CNU):
			
 
				+                value[-1] *= pow(10, s.power)
			
 
				+                if s.power > last_power:
			
 
				+                    value[:-1] = list(map(lambda v: v * pow(10, s.power), value[:-1]))
			
 
				+                    last_power = s.power
			
 
				+                value.append(0)
			
 
				+        return sum(value)
			
 
				+
			
 
				+    system = create_system(numbering_type)
			
 
				+    int_part, dec_part = string2symbols(chinese_string, system)
			
 
				+    int_part = correct_symbols(int_part, system)
			
 
				+    int_str = str(compute_value(int_part))
			
 
				+    dec_str = "".join([str(d.value) for d in dec_part])
			
 
				+    if dec_part:
			
 
				+        return "{0}.{1}".format(int_str, dec_str)
			
 
				+    else:
			
 
				+        return int_str
			
 
				+
			
 
				+
			
 
				+def num2chn(
			
 
				+    number_string,
			
 
				+    numbering_type=NUMBERING_TYPES[1],
			
 
				+    big=False,
			
 
				+    traditional=False,
			
 
				+    alt_zero=False,
			
 
				+    alt_one=False,
			
 
				+    alt_two=True,
			
 
				+    use_zeros=True,
			
 
				+    use_units=True,
			
 
				+):
			
 
				+
			
 
				+    def get_value(value_string, use_zeros=True):
			
 
				+
			
 
				+        striped_string = value_string.lstrip("0")
			
 
				+
			
 
				+        # record nothing if all zeros
			
 
				+        if not striped_string:
			
 
				+            return []
			
 
				+
			
 
				+        # record one digits
			
 
				+        elif len(striped_string) == 1:
			
 
				+            if use_zeros and len(value_string) != len(striped_string):
			
 
				+                return [system.digits[0], system.digits[int(striped_string)]]
			
 
				+            else:
			
 
				+                return [system.digits[int(striped_string)]]
			
 
				+
			
 
				+        # recursively record multiple digits
			
 
				+        else:
			
 
				+            result_unit = next(
			
 
				+                u for u in reversed(system.units) if u.power < len(striped_string)
			
 
				+            )
			
 
				+            result_string = value_string[: -result_unit.power]
			
 
				+            return (
			
 
				+                get_value(result_string)
			
 
				+                + [result_unit]
			
 
				+                + get_value(striped_string[-result_unit.power :])
			
 
				+            )
			
 
				+
			
 
				+    system = create_system(numbering_type)
			
 
				+
			
 
				+    int_dec = number_string.split(".")
			
 
				+    if len(int_dec) == 1:
			
 
				+        int_string = int_dec[0]
			
 
				+        dec_string = ""
			
 
				+    elif len(int_dec) == 2:
			
 
				+        int_string = int_dec[0]
			
 
				+        dec_string = int_dec[1]
			
 
				+    else:
			
 
				+        raise ValueError(
			
 
				+            "invalid input num string with more than one dot: {}".format(number_string)
			
 
				+        )
			
 
				+
			
 
				+    if use_units and len(int_string) > 1:
			
 
				+        result_symbols = get_value(int_string)
			
 
				+    else:
			
 
				+        result_symbols = [system.digits[int(c)] for c in int_string]
			
 
				+    dec_symbols = [system.digits[int(c)] for c in dec_string]
			
 
				+    if dec_string:
			
 
				+        result_symbols += [system.math.point] + dec_symbols
			
 
				+
			
 
				+    if alt_two:
			
 
				+        liang = CND(
			
 
				+            2,
			
 
				+            system.digits[2].alt_s,
			
 
				+            system.digits[2].alt_t,
			
 
				+            system.digits[2].big_s,
			
 
				+            system.digits[2].big_t,
			
 
				+        )
			
 
				+        for i, v in enumerate(result_symbols):
			
 
				+            if isinstance(v, CND) and v.value == 2:
			
 
				+                next_symbol = (
			
 
				+                    result_symbols[i + 1] if i < len(result_symbols) - 1 else None
			
 
				+                )
			
 
				+                previous_symbol = result_symbols[i - 1] if i > 0 else None
			
 
				+                if isinstance(next_symbol, CNU) and isinstance(
			
 
				+                    previous_symbol, (CNU, type(None))
			
 
				+                ):
			
 
				+                    if next_symbol.power != 1 and (
			
 
				+                        (previous_symbol is None) or (previous_symbol.power != 1)
			
 
				+                    ):
			
 
				+                        result_symbols[i] = liang
			
 
				+
			
 
				+    # if big is True, '两' will not be used and `alt_two` has no impact on output
			
 
				+    if big:
			
 
				+        attr_name = "big_"
			
 
				+        if traditional:
			
 
				+            attr_name += "t"
			
 
				+        else:
			
 
				+            attr_name += "s"
			
 
				+    else:
			
 
				+        if traditional:
			
 
				+            attr_name = "traditional"
			
 
				+        else:
			
 
				+            attr_name = "simplified"
			
 
				+
			
 
				+    result = "".join([getattr(s, attr_name) for s in result_symbols])
			
 
				+
			
 
				+    # if not use_zeros:
			
 
				+    #     result = result.strip(getattr(system.digits[0], attr_name))
			
 
				+
			
 
				+    if alt_zero:
			
 
				+        result = result.replace(
			
 
				+            getattr(system.digits[0], attr_name), system.digits[0].alt_s
			
 
				+        )
			
 
				+
			
 
				+    if alt_one:
			
 
				+        result = result.replace(
			
 
				+            getattr(system.digits[1], attr_name), system.digits[1].alt_s
			
 
				+        )
			
 
				+
			
 
				+    for i, p in enumerate(POINT):
			
 
				+        if result.startswith(p):
			
 
				+            return CHINESE_DIGIS[0] + result
			
 
				+
			
 
				+    # ^10, 11, .., 19
			
 
				+    if (
			
 
				+        len(result) >= 2
			
 
				+        and result[1]
			
 
				+        in [
			
 
				+            SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED[0],
			
 
				+            SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL[0],
			
 
				+        ]
			
 
				+        and result[0]
			
 
				+        in [
			
 
				+            CHINESE_DIGIS[1],
			
 
				+            BIG_CHINESE_DIGIS_SIMPLIFIED[1],
			
 
				+            BIG_CHINESE_DIGIS_TRADITIONAL[1],
			
 
				+        ]
			
 
				+    ):
			
 
				+        result = result[1:]
			
 
				+
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    # 测试程序
			
 
				+    all_chinese_number_string = (
			
 
				+        CHINESE_DIGIS
			
 
				+        + BIG_CHINESE_DIGIS_SIMPLIFIED
			
 
				+        + BIG_CHINESE_DIGIS_TRADITIONAL
			
 
				+        + LARGER_CHINESE_NUMERING_UNITS_SIMPLIFIED
			
 
				+        + LARGER_CHINESE_NUMERING_UNITS_TRADITIONAL
			
 
				+        + SMALLER_CHINESE_NUMERING_UNITS_SIMPLIFIED
			
 
				+        + SMALLER_CHINESE_NUMERING_UNITS_TRADITIONAL
			
 
				+        + ZERO_ALT
			
 
				+        + ONE_ALT
			
 
				+        + "".join(TWO_ALTS + POSITIVE + NEGATIVE + POINT)
			
 
				+    )
			
 
				+
			
 
				+    print("num:", chn2num("一万零四百零三点八零五"))
			
 
				+    print("num:", chn2num("一亿六点三"))
			
 
				+    print("num:", chn2num("一亿零六点三"))
			
 
				+    print("num:", chn2num("两千零一亿六点三"))
			
 
				+    # print('num:', chn2num('一零零八六'))
			
 
				+    print("txt:", num2chn("10260.03", alt_zero=True))
			
 
				+    print("txt:", num2chn("20037.090", numbering_type="low", traditional=True))
			
 
				+    print("txt:", num2chn("100860001.77", numbering_type="high", big=True))
			
 
				+    print(
			
 
				+        "txt:",
			
 
				+        num2chn(
			
 
				+            "059523810880",
			
 
				+            alt_one=True,
			
 
				+            alt_two=False,
			
 
				+            use_lzeros=True,
			
 
				+            use_rzeros=True,
			
 
				+            use_units=False,
			
 
				+        ),
			
 
				+    )
			
 
				+
			
 
				+    print(all_chinese_number_string)
			
--- a/fish_speech/text/chn_text_norm/cardinal.py
+++ b/fish_speech/text/chn_text_norm/cardinal.py
@@ -0,0 +1,32 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""CARDINAL类 (包含小数DECIMAL类)
			
 
				+纯数 <=> 中文字符串 方法
			
 
				+中文字符串 <=> 纯数 方法
			
 
				+"""
			
 
				+
			
 
				+__author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
			
 
				+__data__ = "2019-05-03"
			
 
				+
			
 
				+from fish_speech.text.chn_text_norm.basic_util import *
			
 
				+
			
 
				+
			
 
				+class Cardinal:
			
 
				+    """
			
 
				+    CARDINAL类
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, cardinal=None, chntext=None):
			
 
				+        self.cardinal = cardinal
			
 
				+        self.chntext = chntext
			
 
				+
			
 
				+    def chntext2cardinal(self):
			
 
				+        return chn2num(self.chntext)
			
 
				+
			
 
				+    def cardinal2chntext(self):
			
 
				+        return num2chn(self.cardinal)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    # 测试程序
			
 
				+    print(Cardinal(cardinal="21357.230").cardinal2chntext())
			
--- a/fish_speech/text/chn_text_norm/date.py
+++ b/fish_speech/text/chn_text_norm/date.py
@@ -0,0 +1,75 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""DATE类
			
 
				+日期 <=> 中文字符串 方法
			
 
				+中文字符串 <=> 日期 方法
			
 
				+"""
			
 
				+
			
 
				+__author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
			
 
				+__data__ = "2019-05-07"
			
 
				+
			
 
				+from fish_speech.text.chn_text_norm.cardinal import Cardinal
			
 
				+from fish_speech.text.chn_text_norm.digit import Digit
			
 
				+
			
 
				+
			
 
				+class Date:
			
 
				+    """
			
 
				+    DATE类
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, date=None, chntext=None):
			
 
				+        self.date = date
			
 
				+        self.chntext = chntext
			
 
				+
			
 
				+    # def chntext2date(self):
			
 
				+    #     chntext = self.chntext
			
 
				+    #     try:
			
 
				+    #         year, other = chntext.strip().split('年', maxsplit=1)
			
 
				+    #         year = Digit(chntext=year).digit2chntext() + '年'
			
 
				+    #     except ValueError:
			
 
				+    #         other = chntext
			
 
				+    #         year = ''
			
 
				+    #     if other:
			
 
				+    #         try:
			
 
				+    #             month, day = other.strip().split('月', maxsplit=1)
			
 
				+    #             month = Cardinal(chntext=month).chntext2cardinal() + '月'
			
 
				+    #         except ValueError:
			
 
				+    #             day = chntext
			
 
				+    #             month = ''
			
 
				+    #         if day:
			
 
				+    #             day = Cardinal(chntext=day[:-1]).chntext2cardinal() + day[-1]
			
 
				+    #     else:
			
 
				+    #         month = ''
			
 
				+    #         day = ''
			
 
				+    #     date = year + month + day
			
 
				+    #     self.date = date
			
 
				+    #     return self.date
			
 
				+
			
 
				+    def date2chntext(self):
			
 
				+        date = self.date
			
 
				+        try:
			
 
				+            year, other = date.strip().split("年", maxsplit=1)
			
 
				+            year = Digit(digit=year).digit2chntext() + "年"
			
 
				+        except ValueError:
			
 
				+            other = date
			
 
				+            year = ""
			
 
				+        if other:
			
 
				+            try:
			
 
				+                month, day = other.strip().split("月", maxsplit=1)
			
 
				+                month = Cardinal(cardinal=month).cardinal2chntext() + "月"
			
 
				+            except ValueError:
			
 
				+                day = date
			
 
				+                month = ""
			
 
				+            if day:
			
 
				+                day = Cardinal(cardinal=day[:-1]).cardinal2chntext() + day[-1]
			
 
				+        else:
			
 
				+            month = ""
			
 
				+            day = ""
			
 
				+        chntext = year + month + day
			
 
				+        self.chntext = chntext
			
 
				+        return self.chntext
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    # 测试
			
 
				+    print(Date(date="09年3月16日").date2chntext())
			
--- a/fish_speech/text/chn_text_norm/digit.py
+++ b/fish_speech/text/chn_text_norm/digit.py
@@ -0,0 +1,32 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""DIGIT类
			
 
				+数字串 <=> 中文字符串 方法
			
 
				+中文字符串 <=> 数字串 方法
			
 
				+"""
			
 
				+
			
 
				+__author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
			
 
				+__data__ = "2019-05-03"
			
 
				+
			
 
				+from fish_speech.text.chn_text_norm.basic_util import *
			
 
				+
			
 
				+
			
 
				+class Digit:
			
 
				+    """
			
 
				+    DIGIT类
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, digit=None, chntext=None):
			
 
				+        self.digit = digit
			
 
				+        self.chntext = chntext
			
 
				+
			
 
				+    # def chntext2digit(self):
			
 
				+    #     return chn2num(self.chntext)
			
 
				+
			
 
				+    def digit2chntext(self):
			
 
				+        return num2chn(self.digit, alt_two=False, use_units=False)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    # 测试程序
			
 
				+    print(Digit(digit="2016").digit2chntext())
			
--- a/fish_speech/text/chn_text_norm/fraction.py
+++ b/fish_speech/text/chn_text_norm/fraction.py
@@ -0,0 +1,35 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""FRACTION类
			
 
				+分数 <=> 中文字符串 方法
			
 
				+中文字符串 <=> 分数 方法
			
 
				+"""
			
 
				+
			
 
				+__author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
			
 
				+__data__ = "2019-05-03"
			
 
				+
			
 
				+from fish_speech.text.chn_text_norm.basic_util import *
			
 
				+
			
 
				+
			
 
				+class Fraction:
			
 
				+    """
			
 
				+    FRACTION类
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, fraction=None, chntext=None):
			
 
				+        self.fraction = fraction
			
 
				+        self.chntext = chntext
			
 
				+
			
 
				+    def chntext2fraction(self):
			
 
				+        denominator, numerator = self.chntext.split("分之")
			
 
				+        return chn2num(numerator) + "/" + chn2num(denominator)
			
 
				+
			
 
				+    def fraction2chntext(self):
			
 
				+        numerator, denominator = self.fraction.split("/")
			
 
				+        return num2chn(denominator) + "分之" + num2chn(numerator)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    # 测试程序
			
 
				+    print(Fraction(fraction="2135/7230").fraction2chntext())
			
 
				+    print(Fraction(chntext="五百八十一分之三百六十九").chntext2fraction())
			
--- a/fish_speech/text/chn_text_norm/money.py
+++ b/fish_speech/text/chn_text_norm/money.py
@@ -0,0 +1,43 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""MONEY类
			
 
				+金钱 <=> 中文字符串 方法
			
 
				+中文字符串 <=> 金钱 方法
			
 
				+"""
			
 
				+import re
			
 
				+
			
 
				+__author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
			
 
				+__data__ = "2019-05-08"
			
 
				+
			
 
				+from fish_speech.text.chn_text_norm.cardinal import Cardinal
			
 
				+
			
 
				+
			
 
				+class Money:
			
 
				+    """
			
 
				+    MONEY类
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, money=None, chntext=None):
			
 
				+        self.money = money
			
 
				+        self.chntext = chntext
			
 
				+
			
 
				+    # def chntext2money(self):
			
 
				+    #     return self.money
			
 
				+
			
 
				+    def money2chntext(self):
			
 
				+        money = self.money
			
 
				+        pattern = re.compile(r"(\d+(\.\d+)?)")
			
 
				+        matchers = pattern.findall(money)
			
 
				+        if matchers:
			
 
				+            for matcher in matchers:
			
 
				+                money = money.replace(
			
 
				+                    matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext()
			
 
				+                )
			
 
				+        self.chntext = money
			
 
				+        return self.chntext
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    # 测试
			
 
				+    print(Money(money="21.5万元").money2chntext())
			
 
				+    print(Money(money="230块5毛").money2chntext())
			
--- a/fish_speech/text/chn_text_norm/percentage.py
+++ b/fish_speech/text/chn_text_norm/percentage.py
@@ -0,0 +1,33 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""PERCENTAGE类
			
 
				+百分数 <=> 中文字符串 方法
			
 
				+中文字符串 <=> 百分数 方法
			
 
				+"""
			
 
				+
			
 
				+__author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
			
 
				+__data__ = "2019-05-06"
			
 
				+
			
 
				+from fish_speech.text.chn_text_norm.basic_util import *
			
 
				+
			
 
				+
			
 
				+class Percentage:
			
 
				+    """
			
 
				+    PERCENTAGE类
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, percentage=None, chntext=None):
			
 
				+        self.percentage = percentage
			
 
				+        self.chntext = chntext
			
 
				+
			
 
				+    def chntext2percentage(self):
			
 
				+        return chn2num(self.chntext.strip().strip("百分之")) + "%"
			
 
				+
			
 
				+    def percentage2chntext(self):
			
 
				+        return "百分之" + num2chn(self.percentage.strip().strip("%"))
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    # 测试程序
			
 
				+    print(Percentage(chntext="百分之五十六点零三").chntext2percentage())
			
 
				+    print(Percentage(percentage="65.3%").percentage2chntext())
			
--- a/fish_speech/text/chn_text_norm/telephone.py
+++ b/fish_speech/text/chn_text_norm/telephone.py
@@ -0,0 +1,51 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""TELEPHONE类
			
 
				+电话号码 <=> 中文字符串 方法
			
 
				+中文字符串 <=> 电话号码 方法
			
 
				+"""
			
 
				+
			
 
				+__author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
			
 
				+__data__ = "2019-05-03"
			
 
				+
			
 
				+from fish_speech.text.chn_text_norm.basic_util import *
			
 
				+
			
 
				+
			
 
				+class TelePhone:
			
 
				+    """
			
 
				+    TELEPHONE类
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, telephone=None, raw_chntext=None, chntext=None):
			
 
				+        self.telephone = telephone
			
 
				+        self.raw_chntext = raw_chntext
			
 
				+        self.chntext = chntext
			
 
				+
			
 
				+    # def chntext2telephone(self):
			
 
				+    #     sil_parts = self.raw_chntext.split('<SIL>')
			
 
				+    #     self.telephone = '-'.join([
			
 
				+    #         str(chn2num(p)) for p in sil_parts
			
 
				+    #     ])
			
 
				+    #     return self.telephone
			
 
				+
			
 
				+    def telephone2chntext(self, fixed=False):
			
 
				+
			
 
				+        if fixed:
			
 
				+            sil_parts = self.telephone.split("-")
			
 
				+            self.raw_chntext = "<SIL>".join(
			
 
				+                [num2chn(part, alt_two=False, use_units=False) for part in sil_parts]
			
 
				+            )
			
 
				+            self.chntext = self.raw_chntext.replace("<SIL>", "")
			
 
				+        else:
			
 
				+            sp_parts = self.telephone.strip("+").split()
			
 
				+            self.raw_chntext = "<SP>".join(
			
 
				+                [num2chn(part, alt_two=False, use_units=False) for part in sp_parts]
			
 
				+            )
			
 
				+            self.chntext = self.raw_chntext.replace("<SP>", "")
			
 
				+        return self.chntext
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    # 测试程序
			
 
				+    print(TelePhone(telephone="0595-23980880").telephone2chntext())
			
 
				+    # print(TelePhone(raw_chntext='零五九五杠二三八六五零九八').chntext2telephone())
			
--- a/fish_speech/text/chn_text_norm/text.py
+++ b/fish_speech/text/chn_text_norm/text.py
@@ -0,0 +1,177 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+TEXT类
			
 
				+"""
			
 
				+
			
 
				+__author__ = "Zhiyang Zhou <zyzhou@stu.xmu.edu.cn>"
			
 
				+__data__ = "2019-05-03"
			
 
				+
			
 
				+import re
			
 
				+
			
 
				+from fish_speech.text.chn_text_norm.cardinal import Cardinal
			
 
				+from fish_speech.text.chn_text_norm.date import Date
			
 
				+from fish_speech.text.chn_text_norm.digit import Digit
			
 
				+from fish_speech.text.chn_text_norm.fraction import Fraction
			
 
				+from fish_speech.text.chn_text_norm.money import Money
			
 
				+from fish_speech.text.chn_text_norm.percentage import Percentage
			
 
				+from fish_speech.text.chn_text_norm.telephone import TelePhone
			
 
				+
			
 
				+CURRENCY_NAMES = (
			
 
				+    "(人民币|美元|日元|英镑|欧元|马克|法郎|加拿大元|澳元|港币|先令|芬兰马克|爱尔兰镑|"
			
 
				+    "里拉|荷兰盾|埃斯库多|比塞塔|印尼盾|林吉特|新西兰元|比索|卢布|新加坡元|韩元|泰铢)"
			
 
				+)
			
 
				+CURRENCY_UNITS = "((亿|千万|百万|万|千|百)|(亿|千万|百万|万|千|百|)元|(亿|千万|百万|万|千|百|)块|角|毛|分)"
			
 
				+COM_QUANTIFIERS = (
			
 
				+    "(匹|张|座|回|场|尾|条|个|首|阙|阵|网|炮|顶|丘|棵|只|支|袭|辆|挑|担|颗|壳|窠|曲|墙|群|腔|"
			
 
				+    "砣|座|客|贯|扎|捆|刀|令|打|手|罗|坡|山|岭|江|溪|钟|队|单|双|对|出|口|头|脚|板|跳|枝|件|贴|"
			
 
				+    "针|线|管|名|位|身|堂|课|本|页|家|户|层|丝|毫|厘|分|钱|两|斤|担|铢|石|钧|锱|忽|(千|毫|微)克|"
			
 
				+    "毫|厘|分|寸|尺|丈|里|寻|常|铺|程|(千|分|厘|毫|微)米|撮|勺|合|升|斗|石|盘|碗|碟|叠|桶|笼|盆|"
			
 
				+    "盒|杯|钟|斛|锅|簋|篮|盘|桶|罐|瓶|壶|卮|盏|箩|箱|煲|啖|袋|钵|年|月|日|季|刻|时|周|天|秒|分|旬|"
			
 
				+    "纪|岁|世|更|夜|春|夏|秋|冬|代|伏|辈|丸|泡|粒|颗|幢|堆|条|根|支|道|面|片|张|颗|块|人|抽)"
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class Text:
			
 
				+    """
			
 
				+    Text类
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, raw_text, norm_text=None):
			
 
				+        self.raw_text = "^" + raw_text + "$"
			
 
				+        self.norm_text = norm_text
			
 
				+
			
 
				+    def _particular(self):
			
 
				+        text = self.norm_text
			
 
				+        pattern = re.compile(r"(([a-zA-Z]+)二([a-zA-Z]+))")
			
 
				+        matchers = pattern.findall(text)
			
 
				+        if matchers:
			
 
				+            # print('particular')
			
 
				+            for matcher in matchers:
			
 
				+                text = text.replace(matcher[0], matcher[1] + "2" + matcher[2], 1)
			
 
				+        self.norm_text = text
			
 
				+        return self.norm_text
			
 
				+
			
 
				+    def normalize(self):
			
 
				+        text = self.raw_text
			
 
				+
			
 
				+        # 规范化日期
			
 
				+        pattern = re.compile(
			
 
				+            r"\D+((([089]\d|(19|20)\d{2})年)?(\d{1,2}月(\d{1,2}[日号])?)?)"
			
 
				+        )
			
 
				+        matchers = pattern.findall(text)
			
 
				+        if matchers:
			
 
				+            # print('date')
			
 
				+            for matcher in matchers:
			
 
				+                text = text.replace(matcher[0], Date(date=matcher[0]).date2chntext(), 1)
			
 
				+
			
 
				+        # 规范化金钱
			
 
				+        pattern = re.compile(
			
 
				+            r"\D+((\d+(\.\d+)?)[多余几]?"
			
 
				+            + CURRENCY_UNITS
			
 
				+            + "(\d"
			
 
				+            + CURRENCY_UNITS
			
 
				+            + "?)?)"
			
 
				+        )
			
 
				+        matchers = pattern.findall(text)
			
 
				+        if matchers:
			
 
				+            # print('money')
			
 
				+            for matcher in matchers:
			
 
				+                text = text.replace(
			
 
				+                    matcher[0], Money(money=matcher[0]).money2chntext(), 1
			
 
				+                )
			
 
				+
			
 
				+        # 规范化固话/手机号码
			
 
				+        # 手机
			
 
				+        # http://www.jihaoba.com/news/show/13680
			
 
				+        # 移动：139、138、137、136、135、134、159、158、157、150、151、152、188、187、182、183、184、178、198
			
 
				+        # 联通：130、131、132、156、155、186、185、176
			
 
				+        # 电信：133、153、189、180、181、177
			
 
				+        pattern = re.compile(r"\D((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})\D")
			
 
				+        matchers = pattern.findall(text)
			
 
				+        if matchers:
			
 
				+            # print('telephone')
			
 
				+            for matcher in matchers:
			
 
				+                text = text.replace(
			
 
				+                    matcher[0], TelePhone(telephone=matcher[0]).telephone2chntext(), 1
			
 
				+                )
			
 
				+        # 固话
			
 
				+        pattern = re.compile(r"\D((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{6,7})\D")
			
 
				+        matchers = pattern.findall(text)
			
 
				+        if matchers:
			
 
				+            # print('fixed telephone')
			
 
				+            for matcher in matchers:
			
 
				+                text = text.replace(
			
 
				+                    matcher[0],
			
 
				+                    TelePhone(telephone=matcher[0]).telephone2chntext(fixed=True),
			
 
				+                    1,
			
 
				+                )
			
 
				+
			
 
				+        # 规范化分数
			
 
				+        pattern = re.compile(r"(\d+/\d+)")
			
 
				+        matchers = pattern.findall(text)
			
 
				+        if matchers:
			
 
				+            # print('fraction')
			
 
				+            for matcher in matchers:
			
 
				+                text = text.replace(
			
 
				+                    matcher, Fraction(fraction=matcher).fraction2chntext(), 1
			
 
				+                )
			
 
				+
			
 
				+        # 规范化百分数
			
 
				+        text = text.replace("％", "%")
			
 
				+        pattern = re.compile(r"(\d+(\.\d+)?%)")
			
 
				+        matchers = pattern.findall(text)
			
 
				+        if matchers:
			
 
				+            # print('percentage')
			
 
				+            for matcher in matchers:
			
 
				+                text = text.replace(
			
 
				+                    matcher[0],
			
 
				+                    Percentage(percentage=matcher[0]).percentage2chntext(),
			
 
				+                    1,
			
 
				+                )
			
 
				+
			
 
				+        # 规范化纯数+量词
			
 
				+        pattern = re.compile(r"(\d+(\.\d+)?)[多余几]?" + COM_QUANTIFIERS)
			
 
				+        matchers = pattern.findall(text)
			
 
				+        if matchers:
			
 
				+            # print('cardinal+quantifier')
			
 
				+            for matcher in matchers:
			
 
				+                text = text.replace(
			
 
				+                    matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1
			
 
				+                )
			
 
				+
			
 
				+        # 规范化数字编号
			
 
				+        pattern = re.compile(r"(\d{4,32})")
			
 
				+        matchers = pattern.findall(text)
			
 
				+        if matchers:
			
 
				+            # print('digit')
			
 
				+            for matcher in matchers:
			
 
				+                text = text.replace(matcher, Digit(digit=matcher).digit2chntext(), 1)
			
 
				+
			
 
				+        # 规范化纯数
			
 
				+        pattern = re.compile(r"(\d+(\.\d+)?)")
			
 
				+        matchers = pattern.findall(text)
			
 
				+        if matchers:
			
 
				+            # print('cardinal')
			
 
				+            for matcher in matchers:
			
 
				+                text = text.replace(
			
 
				+                    matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1
			
 
				+                )
			
 
				+
			
 
				+        self.norm_text = text
			
 
				+        self._particular()
			
 
				+
			
 
				+        return self.norm_text.lstrip("^").rstrip("$")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    # 测试程序
			
 
				+    print(Text(raw_text="固话：0595-23865596或23880880。").normalize())
			
 
				+    print(Text(raw_text="手机：+86 19859213959或15659451527。").normalize())
			
 
				+    print(Text(raw_text="分数：32477/76391。").normalize())
			
 
				+    print(Text(raw_text="百分数：80.03%。").normalize())
			
 
				+    print(Text(raw_text="编号：31520181154418。").normalize())
			
 
				+    print(Text(raw_text="纯数：2983.07克或12345.60米。").normalize())
			
 
				+    print(Text(raw_text="日期：1999年2月20日或09年3月15号。").normalize())
			
 
				+    print(Text(raw_text="金钱：12块5，34.5元，20.1万").normalize())
			
 
				+    print(Text(raw_text="特殊：O2O或B2C。").normalize())
			
--- a/tools/webui.py
+++ b/tools/webui.py
@@ -17,7 +17,9 @@ from transformers import AutoTokenizer
 
				 
			
 
				 pyrootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
			
 
				 
			
 
				+
			
 
				 from fish_speech.i18n import i18n
			
 
				+from fish_speech.text.chn_text_norm.text import Text as ChnNormedText
			
 
				 from tools.api import decode_vq_tokens, encode_reference
			
 
				 from tools.llama.generate import (
			
 
				     GenerateRequest,
			
@@ -243,6 +245,13 @@ def wav_chunk_header(sample_rate=44100, bit_depth=16, channels=1):
 
				     return wav_header_bytes
			
 
				 
			
 
				 
			
 
				+def normalize_text(user_input, use_normalization):
			
 
				+    if use_normalization:
			
 
				+        return ChnNormedText(raw_text=user_input).normalize()
			
 
				+    else:
			
 
				+        return user_input
			
 
				+
			
 
				+
			
 
				 def build_app():
			
 
				     with gr.Blocks(theme=gr.themes.Base()) as app:
			
 
				         gr.Markdown(HEADER_MD)
			
@@ -258,8 +267,24 @@ def build_app():
 
				         with gr.Row():
			
 
				             with gr.Column(scale=3):
			
 
				                 text = gr.Textbox(
			
 
				-                    label=i18n("Input Text"), placeholder=TEXTBOX_PLACEHOLDER, lines=15
			
 
				+                    label=i18n("Input Text"), placeholder=TEXTBOX_PLACEHOLDER, lines=10
			
 
				                 )
			
 
				+                refined_text = gr.Textbox(
			
 
				+                    label=i18n("Realtime Transform Text"),
			
 
				+                    placeholder=i18n(
			
 
				+                        "Normalization Result Preview (Currently Only Chinese)"
			
 
				+                    ),
			
 
				+                    lines=5,
			
 
				+                    interactive=False,
			
 
				+                )
			
 
				+
			
 
				+                with gr.Row():
			
 
				+                    if_refine_text = gr.Checkbox(
			
 
				+                        label=i18n("Text Normalization"),
			
 
				+                        value=True,
			
 
				+                        scale=0,
			
 
				+                        min_width=150,
			
 
				+                    )
			
 
				 
			
 
				                 with gr.Row():
			
 
				                     with gr.Tab(label=i18n("Advanced Config")):
			
@@ -368,11 +393,16 @@ def build_app():
 
				                             value="\U0001F3A7 " + i18n("Streaming Generate"),
			
 
				                             variant="primary",
			
 
				                         )
			
 
				+
			
 
				+        text.input(
			
 
				+            fn=normalize_text, inputs=[text, if_refine_text], outputs=[refined_text]
			
 
				+        )
			
 
				+
			
 
				         # # Submit
			
 
				         generate.click(
			
 
				             inference_wrapper,
			
 
				             [
			
 
				-                text,
			
 
				+                refined_text,
			
 
				                 enable_reference_audio,
			
 
				                 reference_audio,
			
 
				                 reference_text,
			
@@ -391,7 +421,7 @@ def build_app():
 
				         generate_stream.click(
			
 
				             inference_stream,
			
 
				             [
			
 
				-                text,
			
 
				+                refined_text,
			
 
				                 enable_reference_audio,
			
 
				                 reference_audio,
			
 
				                 reference_text,
			
@@ -413,10 +443,10 @@ def parse_args():
 
				     parser.add_argument(
			
 
				         "--llama-checkpoint-path",
			
 
				         type=Path,
			
 
				-        default="checkpoints/text2semantic-sft-medium-v1-4k.pth",
			
 
				+        default="checkpoints/text2semantic-sft-large-v1.1-4k.pth",
			
 
				     )
			
 
				     parser.add_argument(
			
 
				-        "--llama-config-name", type=str, default="dual_ar_2_codebook_medium"
			
 
				+        "--llama-config-name", type=str, default="dual_ar_2_codebook_large"
			
 
				     )
			
 
				     parser.add_argument(
			
 
				         "--decoder-checkpoint-path",