|
@@ -11,13 +11,17 @@ from dashscope.audio.tts import ResultCallback, SpeechSynthesizer, SpeechSynthes
|
|
|
|
|
|
import requests
|
|
|
|
|
|
-from ..schemas.speech import TextToSpeechResponse
|
|
|
+from ..schemas.speech import TextToSpeechResponse, DataPayload
|
|
|
from ..core.config import get_settings
|
|
|
+from ..core.logger import get_logger
|
|
|
|
|
|
settings = get_settings()
|
|
|
# Configure DashScope API key from env/.env
|
|
|
dashscope.api_key = settings.dashscope_api_key or ""
|
|
|
-UPLOAD_PATH = settings.upload_path
|
|
|
+UPLOAD_PATH = settings.upload_path or ""
|
|
|
+
|
|
|
+# module logger
|
|
|
+logger = get_logger("speech_provider")
|
|
|
|
|
|
|
|
|
def _safe_filename(name: str) -> str:
|
|
@@ -30,11 +34,30 @@ class SpeechProvider:
|
|
|
# Resolve output path under project-root/temp and ensure directory exists
|
|
|
project_root = Path(__file__).resolve().parents[2] # repo root
|
|
|
audio_dir = project_root / "temp"
|
|
|
- audio_dir.mkdir(parents=True, exist_ok=True)
|
|
|
+ try:
|
|
|
+ audio_dir.mkdir(parents=True, exist_ok=True)
|
|
|
+ except Exception as e:
|
|
|
+ logger.error("Failed to create audio directory %s: %s", audio_dir, e, exc_info=True)
|
|
|
+ return TextToSpeechResponse(code=1, data=None, msg=f"create audio dir failed: {e}")
|
|
|
+
|
|
|
+ # Basic input validation
|
|
|
+ if not isinstance(text, str) or not text.strip():
|
|
|
+ msg = "text is required"
|
|
|
+ logger.error(msg)
|
|
|
+ return TextToSpeechResponse(code=1, data=None, msg=msg)
|
|
|
+ if not isinstance(filename, str) or not filename.strip():
|
|
|
+ msg = "filename is required"
|
|
|
+ logger.error(msg)
|
|
|
+ return TextToSpeechResponse(code=1, data=None, msg=msg)
|
|
|
+ if not dashscope.api_key:
|
|
|
+ msg = "DASHSCOPE_API_KEY is missing"
|
|
|
+ logger.error(msg)
|
|
|
+ return TextToSpeechResponse(code=1, data=None, msg=msg)
|
|
|
|
|
|
# determine desired output format (default mp3 for smaller size)
|
|
|
audio_format = (format or 'mp3').lower()
|
|
|
if audio_format not in {"wav", "mp3"}:
|
|
|
+ logger.info("unsupported format '%s', fallback to mp3", audio_format)
|
|
|
audio_format = "mp3"
|
|
|
|
|
|
# choose extension and sample rate
|
|
@@ -53,18 +76,39 @@ class SpeechProvider:
|
|
|
audio_format=audio_format,
|
|
|
)
|
|
|
|
|
|
- SpeechSynthesizer.call(
|
|
|
- model=(model or 'sambert-zhifei-v1'),
|
|
|
- volume=volume,
|
|
|
- text=text,
|
|
|
- pitch=pitch,
|
|
|
- rate=rate,
|
|
|
- format=audio_format,
|
|
|
- sample_rate=sample_rate,
|
|
|
- callback=callback,
|
|
|
- word_timestamp_enabled=True,
|
|
|
- phoneme_timestamp_enabled=True,
|
|
|
- )
|
|
|
+ # Run TTS with robust error handling
|
|
|
+ try:
|
|
|
+ SpeechSynthesizer.call(
|
|
|
+ model=(model or 'sambert-zhifei-v1'),
|
|
|
+ volume=volume,
|
|
|
+ text=text,
|
|
|
+ pitch=pitch,
|
|
|
+ rate=rate,
|
|
|
+ format=audio_format,
|
|
|
+ sample_rate=sample_rate,
|
|
|
+ callback=callback,
|
|
|
+ word_timestamp_enabled=True,
|
|
|
+ phoneme_timestamp_enabled=True,
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ logger.error("TTS call failed", exc_info=True)
|
|
|
+ # Ensure any open file handles are closed
|
|
|
+ try:
|
|
|
+ callback.on_complete()
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ return TextToSpeechResponse(code=1, data=None, msg=str(e))
|
|
|
+
|
|
|
+ if callback.had_error:
|
|
|
+ # TTS reported an error via callback
|
|
|
+ base_msg = callback.error_message or "speech synthesis failed"
|
|
|
+ # Enrich message with model error code/status when available
|
|
|
+ if callback.error_code or callback.status_code is not None:
|
|
|
+ msg = f"[{callback.error_code or 'Error'}] {base_msg} (status={callback.status_code})"
|
|
|
+ else:
|
|
|
+ msg = base_msg
|
|
|
+ logger.error("TTS callback error: %s", msg)
|
|
|
+ return TextToSpeechResponse(code=1, data=None, msg=msg)
|
|
|
|
|
|
# After synthesis completes, upload the file to OSS
|
|
|
try:
|
|
@@ -73,12 +117,16 @@ class SpeechProvider:
|
|
|
try:
|
|
|
Path(out_path).unlink(missing_ok=True)
|
|
|
except Exception as del_err:
|
|
|
- print(f"[warn] Failed to delete local audio {out_path}: {del_err}")
|
|
|
- return TextToSpeechResponse(audio_url=url)
|
|
|
+ logger.warning("Failed to delete local audio %s: %s", out_path, del_err)
|
|
|
+ return TextToSpeechResponse(
|
|
|
+ code=0,
|
|
|
+ data=DataPayload(audio_url=url),
|
|
|
+ msg='success'
|
|
|
+ )
|
|
|
except Exception as e:
|
|
|
- # If upload fails, fall back to local path to avoid breaking
|
|
|
- print(f"[warn] Upload failed: {e}")
|
|
|
- return TextToSpeechResponse(audio_url=str(out_path))
|
|
|
+ # Keep local file for inspection; report error message
|
|
|
+ logger.error("Upload failed", exc_info=True)
|
|
|
+ return TextToSpeechResponse(code=1, data=None, msg=str(e))
|
|
|
|
|
|
|
|
|
class Callback(ResultCallback):
|
|
@@ -90,22 +138,31 @@ class Callback(ResultCallback):
|
|
|
self.wav_file = None
|
|
|
self._fh = None
|
|
|
self.audio_format = audio_format
|
|
|
+ self.had_error = False
|
|
|
+ self.error_message: Optional[str] = None
|
|
|
+ self.error_code: Optional[str] = None
|
|
|
+ self.status_code: Optional[int] = None
|
|
|
|
|
|
def on_open(self):
|
|
|
- print('Speech synthesizer is opened.')
|
|
|
- # Ensure parent directory exists (in case not created earlier)
|
|
|
- Path(self.out_path).parent.mkdir(parents=True, exist_ok=True)
|
|
|
- if self.audio_format == "wav":
|
|
|
- self.wav_file = wave_open(self.out_path, 'wb')
|
|
|
- self.wav_file.setnchannels(self.channels)
|
|
|
- self.wav_file.setsampwidth(self.sampwidth)
|
|
|
- self.wav_file.setframerate(self.sample_rate)
|
|
|
- else:
|
|
|
- # For mp3 (and other compressed formats), write raw bytes
|
|
|
- self._fh = open(self.out_path, 'wb')
|
|
|
+ logger.info('Speech synthesizer opened')
|
|
|
+ try:
|
|
|
+ # Ensure parent directory exists (in case not created earlier)
|
|
|
+ Path(self.out_path).parent.mkdir(parents=True, exist_ok=True)
|
|
|
+ if self.audio_format == "wav":
|
|
|
+ self.wav_file = wave_open(self.out_path, 'wb')
|
|
|
+ self.wav_file.setnchannels(self.channels)
|
|
|
+ self.wav_file.setsampwidth(self.sampwidth)
|
|
|
+ self.wav_file.setframerate(self.sample_rate)
|
|
|
+ else:
|
|
|
+ # For mp3 (and other compressed formats), write raw bytes
|
|
|
+ self._fh = open(self.out_path, 'wb')
|
|
|
+ except Exception as e:
|
|
|
+ self.had_error = True
|
|
|
+ self.error_message = f"open output failed: {e}"
|
|
|
+ logger.error("Failed to open output file %s: %s", self.out_path, e, exc_info=True)
|
|
|
|
|
|
def on_complete(self):
|
|
|
- print('Speech synthesizer is completed.')
|
|
|
+ logger.info('Speech synthesizer completed')
|
|
|
if self.wav_file:
|
|
|
self.wav_file.close()
|
|
|
self.wav_file = None
|
|
@@ -114,19 +171,44 @@ class Callback(ResultCallback):
|
|
|
self._fh = None
|
|
|
|
|
|
def on_error(self, response: SpeechSynthesisResponse):
|
|
|
- print('Speech synthesizer failed, response is %s' % (str(response)))
|
|
|
+ # Capture error and mark state for upstream handling
|
|
|
+ code, detail, status = _extract_dashscope_error(response)
|
|
|
+ self.had_error = True
|
|
|
+ self.error_message = detail
|
|
|
+ self.error_code = code
|
|
|
+ self.status_code = status
|
|
|
+ # Log with structured context
|
|
|
+ if code or status is not None:
|
|
|
+ logger.error('Speech synthesizer failed: code=%s status=%s msg=%s', code, status, detail)
|
|
|
+ else:
|
|
|
+ logger.error('Speech synthesizer failed: %s', detail)
|
|
|
+ # Ensure file handles are closed even on error
|
|
|
+ try:
|
|
|
+ self.on_complete()
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
|
|
|
def on_close(self):
|
|
|
- print('Speech synthesizer is closed.')
|
|
|
+ logger.info('Speech synthesizer closed')
|
|
|
|
|
|
def on_event(self, result: SpeechSynthesisResult):
|
|
|
frame = result.get_audio_frame()
|
|
|
if not frame:
|
|
|
return
|
|
|
- if self.wav_file:
|
|
|
- self.wav_file.writeframes(frame)
|
|
|
- elif self._fh:
|
|
|
- self._fh.write(frame)
|
|
|
+ try:
|
|
|
+ if self.wav_file:
|
|
|
+ self.wav_file.writeframes(frame)
|
|
|
+ elif self._fh:
|
|
|
+ self._fh.write(frame)
|
|
|
+ else:
|
|
|
+ # No open handle; mark error to surface upstream
|
|
|
+ self.had_error = True
|
|
|
+ self.error_message = "audio handle not initialized"
|
|
|
+ logger.error("Audio handle not initialized when receiving frame")
|
|
|
+ except Exception as e:
|
|
|
+ self.had_error = True
|
|
|
+ self.error_message = f"write frame failed: {e}"
|
|
|
+ logger.error("Failed writing audio frame: %s", e, exc_info=True)
|
|
|
|
|
|
|
|
|
def _extract_url_from_response(resp_json: dict) -> Optional[str]:
|
|
@@ -144,7 +226,6 @@ def _extract_url_from_response(resp_json: dict) -> Optional[str]:
|
|
|
for k in path:
|
|
|
if isinstance(cur, dict) and k in cur:
|
|
|
cur = cur[k]
|
|
|
- print(cur)
|
|
|
else:
|
|
|
ok = False
|
|
|
break
|
|
@@ -153,10 +234,62 @@ def _extract_url_from_response(resp_json: dict) -> Optional[str]:
|
|
|
return None
|
|
|
|
|
|
|
|
|
+def _extract_dashscope_error(resp: object) -> tuple[Optional[str], str, Optional[int]]:
|
|
|
+ """Best-effort extraction of (code, message, http_status) from DashScope response.
|
|
|
+ Compatible with SpeechSynthesisResponse or dict-like payloads.
|
|
|
+ """
|
|
|
+ code: Optional[str] = None
|
|
|
+ msg: str = "speech synthesis failed"
|
|
|
+ status: Optional[int] = None
|
|
|
+
|
|
|
+ # If it looks like a dict
|
|
|
+ if isinstance(resp, dict):
|
|
|
+ code = str(resp.get("code")) if resp.get("code") is not None else None
|
|
|
+ status = resp.get("status_code") if isinstance(resp.get("status_code"), int) else None
|
|
|
+ msg = str(resp.get("message") or msg)
|
|
|
+ return code, msg, status
|
|
|
+
|
|
|
+ # Try attribute-style access
|
|
|
+ try:
|
|
|
+ status_attr = getattr(resp, "status_code", None)
|
|
|
+ if isinstance(status_attr, int):
|
|
|
+ status = status_attr
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ try:
|
|
|
+ code_attr = getattr(resp, "code", None)
|
|
|
+ if code_attr is not None:
|
|
|
+ code = str(code_attr)
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ try:
|
|
|
+ msg_attr = getattr(resp, "message", None)
|
|
|
+ if msg_attr:
|
|
|
+ msg = str(msg_attr)
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # As a last resort, try to parse JSON from str(resp)
|
|
|
+ try:
|
|
|
+ s = str(resp)
|
|
|
+ if s and s.strip().startswith("{"):
|
|
|
+ data = json.loads(s)
|
|
|
+ if isinstance(data, dict):
|
|
|
+ code = str(data.get("code")) if data.get("code") is not None else code
|
|
|
+ status = data.get("status_code") if isinstance(data.get("status_code"), int) else status
|
|
|
+ msg = str(data.get("message") or msg)
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ return code, msg, status
|
|
|
+
|
|
|
+
|
|
|
def _upload_file(upload_url: str, file_path: Path) -> str:
|
|
|
if not upload_url:
|
|
|
+ logger.error("upload_url is empty")
|
|
|
raise ValueError("upload_url is empty")
|
|
|
if not Path(file_path).exists():
|
|
|
+ logger.error("audio file not found: %s", file_path)
|
|
|
raise FileNotFoundError(str(file_path))
|
|
|
|
|
|
filename = Path(file_path).name
|
|
@@ -177,7 +310,7 @@ def _upload_file(upload_url: str, file_path: Path) -> str:
|
|
|
data = resp.json()
|
|
|
url = _extract_url_from_response(data)
|
|
|
except Exception:
|
|
|
- pass
|
|
|
+ logger.warning("Upload response is not valid JSON")
|
|
|
|
|
|
if not url:
|
|
|
# As a last resort, if the response text looks like a URL, use it
|
|
@@ -186,6 +319,7 @@ def _upload_file(upload_url: str, file_path: Path) -> str:
|
|
|
url = txt
|
|
|
|
|
|
if not url:
|
|
|
- raise RuntimeError("Upload succeeded but no URL found in response")
|
|
|
+ logger.error("upload succeeded but no URL found in response")
|
|
|
+ raise RuntimeError("upload succeeded but no URL found in response")
|
|
|
|
|
|
return url
|