Bladeren bron

feature: 添加音量配置&本地删除缓存文件

huangzhichao 2 dagen geleden
bovenliggende
commit
7ed3f51605

+ 3 - 0
.gitignore

@@ -17,3 +17,6 @@ venv/
 
 # Local environment
 .env
+
+# Temp cache directory
+temp/

+ 1 - 1
app/api/routes.py

@@ -1,6 +1,6 @@
 from fastapi import APIRouter, Depends
 from .deps import get_speech_service
-from ..schemas.llm import TextToSpeechResponse, TextToSpeechRequest
+from ..schemas.speech import TextToSpeechResponse, TextToSpeechRequest
 from ..services.speech_service import SpeechService
 
 

BIN
app/audio/betty.mp3


BIN
app/audio/zhichu.mp3


BIN
app/audio/zhida.mp3


BIN
app/audio/zhifei.mp3


BIN
app/audio/zhiming.mp3


BIN
app/audio/zhiru.mp3


+ 1 - 1
app/providers/base.py

@@ -1,6 +1,6 @@
 from typing import List, Optional, Protocol
 
-from ..schemas.llm import ChatMessage, ChatResponse, TextToSpeechResponse
+from ..schemas.speech import ChatMessage, ChatResponse, TextToSpeechResponse
 
 
 class LLMProvider(Protocol):

+ 1 - 1
app/providers/mock_provider.py

@@ -1,6 +1,6 @@
 from typing import List, Optional
 
-from ..schemas.llm import ChatMessage, ChatResponse, Usage
+from ..schemas.speech import ChatMessage, ChatResponse, Usage
 
 
 class MockProvider:

+ 12 - 6
app/providers/speech_provider.py

@@ -11,13 +11,13 @@ from dashscope.audio.tts import ResultCallback, SpeechSynthesizer, SpeechSynthes
 
 import requests
 
-from ..schemas.llm import TextToSpeechResponse
+from ..schemas.speech import TextToSpeechResponse
 from ..core.config import get_settings
 
 settings = get_settings()
 # Configure DashScope API key from env/.env
 dashscope.api_key = settings.dashscope_api_key or ""
-UPLOAD_PATH = settings.upload_path or 'https://api.piaoquantv.com/ad/file/upload'
+UPLOAD_PATH = settings.upload_path
 
 
 def _safe_filename(name: str) -> str:
@@ -26,10 +26,10 @@ def _safe_filename(name: str) -> str:
 
 
 class SpeechProvider:
-    def text_to_speech(self, pitch: float, rate: float, filename: str, text: str, *, model: Optional[str] = None, format: Optional[str] = None) -> TextToSpeechResponse:
-        # Resolve output path under app/audio and ensure directory exists
-        app_dir = Path(__file__).resolve().parents[1]  # .../app
-        audio_dir = app_dir / "audio"
+    def text_to_speech(self, volume: int, pitch: float, rate: float, filename: str, text: str, *, model: Optional[str] = None, format: Optional[str] = None) -> TextToSpeechResponse:
+        # Resolve output path under project-root/temp and ensure directory exists
+        project_root = Path(__file__).resolve().parents[2]  # repo root
+        audio_dir = project_root / "temp"
         audio_dir.mkdir(parents=True, exist_ok=True)
 
         # determine desired output format (default mp3 for smaller size)
@@ -55,6 +55,7 @@ class SpeechProvider:
 
         SpeechSynthesizer.call(
             model=(model or 'sambert-zhifei-v1'),
+            volume=volume,
             text=text,
             pitch=pitch,
             rate=rate,
@@ -68,6 +69,11 @@ class SpeechProvider:
         # After synthesis completes, upload the file to OSS
         try:
             url = _upload_file(UPLOAD_PATH, out_path)
+            # Upload succeeded; remove local audio file to save space
+            try:
+                Path(out_path).unlink(missing_ok=True)
+            except Exception as del_err:
+                print(f"[warn] Failed to delete local audio {out_path}: {del_err}")
             return TextToSpeechResponse(audio_url=url)
         except Exception as e:
             # If upload fails, fall back to local path to avoid breaking

+ 1 - 0
app/schemas/llm.py → app/schemas/speech.py

@@ -32,6 +32,7 @@ class TextToSpeechResponse(BaseModel):
     audio_url: str
 
 class TextToSpeechRequest(BaseModel):
+    volume: int = 1
     pitch: float = 1
     rate: float = 1
     filename: str

+ 0 - 16
app/services/llm_service.py

@@ -1,16 +0,0 @@
-from ..schemas.llm import ChatRequest, ChatResponse
-from ..providers.base import LLMProvider
-
-
-class LLMService:
-    def __init__(self, provider: LLMProvider) -> None:
-        self._provider = provider
-
-    def chat(self, req: ChatRequest) -> ChatResponse:
-        return self._provider.chat(
-            req.messages,
-            model=req.model,
-            temperature=req.temperature,
-            max_tokens=req.max_tokens,
-        )
-

+ 2 - 1
app/services/speech_service.py

@@ -1,4 +1,4 @@
-from ..schemas.llm import TextToSpeechRequest, TextToSpeechResponse
+from ..schemas.speech import TextToSpeechRequest, TextToSpeechResponse
 from ..providers.speech_provider import SpeechProvider
 
 
@@ -8,6 +8,7 @@ class SpeechService:
 
     def text_to_speech(self, req: TextToSpeechRequest) -> TextToSpeechResponse:
         return self._provider.text_to_speech(
+            req.volume,
             req.pitch,
             req.rate,
             req.filename,