před 3 měsíci · 032faee850
--- a/.dockerignore
+++ b/.dockerignore
@@ -8,7 +8,6 @@ build
 
				 dist
			
 
				 .git
			
 
				 .gitignore
			
 
				-.env
			
 
				 *.log
			
 
				 *.sqlite3
			
 
				 app/audio
			
--- a/.env
+++ b/.env
@@ -0,0 +1 @@
 
				+APIKEY = 'sk-65745f458f654368b4d3a83362a607c1'
			
--- a/.gitignore
+++ b/.gitignore
@@ -15,8 +15,9 @@ venv/
 
				 .idea/
			
 
				 .vscode/
			
 
				 
			
 
				-# Local environment
			
 
				-.env
			
 
				-
			
 
				 # Temp cache directory
			
 
				 temp/
			
 
				+
			
 
				+# logs
			
 
				+logs/
			
 
				+*/log
			
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
 
				 # syntax=docker/dockerfile:1
			
 
				 
			
 
				-FROM python:3.11-slim AS base
			
 
				+FROM registry.cn-hangzhou.aliyuncs.com/stuuudy/python:3.11-slim AS base
			
 
				 
			
 
				 ENV PYTHONDONTWRITEBYTECODE=1 \
			
 
				     PYTHONUNBUFFERED=1 \
			
@@ -10,20 +10,21 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
 
				 WORKDIR /app
			
 
				 
			
 
				 # System deps
			
 
				-RUN apt-get update -y && apt-get install -y --no-install-recommends \
			
 
				-    build-essential curl && \
			
 
				-    rm -rf /var/lib/apt/lists/*
			
 
				+# RUN apt-get update -y && apt-get install -y --no-install-recommends \
			
 
				+#     build-essential curl && \
			
 
				+#     rm -rf /var/lib/apt/lists/*
			
 
				 
			
 
				 # Install Python deps first (better layer caching)
			
 
				 COPY requirements.txt .
			
 
				-RUN pip install -r requirements.txt
			
 
				+RUN pip install --no-cache-dir --upgrade pip -i https://pypi.tuna.tsinghua.edu.cn/simple/
			
 
				+RUN pip install --no-cache-dir -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/
			
 
				 
			
 
				 # Copy source
			
 
				 COPY app ./app
			
 
				 COPY README.md .
			
 
				 
			
 
				 # Optionally copy .env at build time (usually mounted at runtime)
			
 
				-# COPY .env .
			
 
				+COPY .env .
			
 
				 
			
 
				 EXPOSE 8000
			
 
				 
			
--- a/README.md
+++ b/README.md
@@ -29,6 +29,10 @@ uvicorn app.main:app --reload --port 8000
 
				 - Swagger UI: http://127.0.0.1:8000/docs
			
 
				 - ReDoc: http://127.0.0.1:8000/redoc
			
 
				 
			
 
				+5) 内网IP
			
 
				+
			
 
				+192.168.244.164
			
 
				+
			
 
				 ## 项目结构
			
 
				 
			
 
				 ```
			
--- a/app/core/logger.py
+++ b/app/core/logger.py
@@ -0,0 +1,57 @@
 
				+from __future__ import annotations
			
 
				+
			
 
				+import os
			
 
				+import logging
			
 
				+from pathlib import Path
			
 
				+from logging.handlers import RotatingFileHandler
			
 
				+from typing import Optional
			
 
				+
			
 
				+_CONFIGURED = False
			
 
				+
			
 
				+
			
 
				+def _project_root() -> Path:
			
 
				+    # Resolve repo root from this file location: app/core/logger.py -> repo/app/core
			
 
				+    return Path(__file__).resolve().parents[2]
			
 
				+
			
 
				+
			
 
				+def configure_logging(level: Optional[str] = None, log_dir: Optional[str] = None) -> None:
			
 
				+    global _CONFIGURED
			
 
				+    if _CONFIGURED:
			
 
				+        return
			
 
				+
			
 
				+    # Determine log level
			
 
				+    level_name = (level or os.getenv("LOG_LEVEL") or "INFO").upper()
			
 
				+    log_level = getattr(logging, level_name, logging.INFO)
			
 
				+
			
 
				+    # Determine logs directory
			
 
				+    base_dir = Path(log_dir) if log_dir else _project_root() / "logs"
			
 
				+    base_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    # Root logger configuration
			
 
				+    root = logging.getLogger()
			
 
				+    root.setLevel(log_level)
			
 
				+
			
 
				+    fmt = logging.Formatter(
			
 
				+        fmt="%(asctime)s [%(levelname)s] %(name)s - %(message)s",
			
 
				+        datefmt="%Y-%m-%d %H:%M:%S",
			
 
				+    )
			
 
				+
			
 
				+    file_handler = RotatingFileHandler(base_dir / "app.log", maxBytes=1_000_000, backupCount=3)
			
 
				+    file_handler.setFormatter(fmt)
			
 
				+    file_handler.setLevel(log_level)
			
 
				+    root.addHandler(file_handler)
			
 
				+
			
 
				+    # Stream warnings+ to stderr for container visibility
			
 
				+    stream = logging.StreamHandler()
			
 
				+    stream.setLevel(logging.WARNING)
			
 
				+    stream.setFormatter(fmt)
			
 
				+    root.addHandler(stream)
			
 
				+
			
 
				+    _CONFIGURED = True
			
 
				+
			
 
				+
			
 
				+def get_logger(name: Optional[str] = None) -> logging.Logger:
			
 
				+    if not _CONFIGURED:
			
 
				+        configure_logging()
			
 
				+    return logging.getLogger(name or "app")
			
 
				+
			
--- a/app/providers/mock_provider.py
+++ b/app/providers/mock_provider.py
@@ -1,33 +0,0 @@
 
				-from typing import List, Optional
			
 
				-
			
 
				-from ..schemas.speech import ChatMessage, ChatResponse, Usage
			
 
				-
			
 
				-
			
 
				-class MockProvider:
			
 
				-    """A simple echo-like provider for local testing.
			
 
				-
			
 
				-    It takes the last user message and returns a prefixed reply.
			
 
				-    """
			
 
				-
			
 
				-    def chat(
			
 
				-        self,
			
 
				-        messages: List[ChatMessage],
			
 
				-        *,
			
 
				-        model: Optional[str] = None,
			
 
				-        temperature: Optional[float] = None,
			
 
				-        max_tokens: Optional[int] = None,
			
 
				-    ) -> ChatResponse:
			
 
				-        last_user = next((m.content for m in reversed(messages) if m.role == "user"), "")
			
 
				-        content = f"[mock] You said: {last_user}"
			
 
				-
			
 
				-        # Very rough word-count "tokens" approximation for demo purposes only
			
 
				-        prompt_tokens = sum(len(m.content.split()) for m in messages)
			
 
				-        completion_tokens = len(content.split())
			
 
				-        usage = Usage(
			
 
				-            prompt_tokens=prompt_tokens,
			
 
				-            completion_tokens=completion_tokens,
			
 
				-            total_tokens=prompt_tokens + completion_tokens,
			
 
				-        )
			
 
				-
			
 
				-        return ChatResponse(content=content, model=model or "mock-echo-001", usage=usage)
			
 
				-
			
--- a/app/providers/speech_provider.py
+++ b/app/providers/speech_provider.py
@@ -11,13 +11,17 @@ from dashscope.audio.tts import ResultCallback, SpeechSynthesizer, SpeechSynthes
 
				 
			
 
				 import requests
			
 
				 
			
 
				-from ..schemas.speech import TextToSpeechResponse
			
 
				+from ..schemas.speech import TextToSpeechResponse, DataPayload
			
 
				 from ..core.config import get_settings
			
 
				+from ..core.logger import get_logger
			
 
				 
			
 
				 settings = get_settings()
			
 
				 # Configure DashScope API key from env/.env
			
 
				 dashscope.api_key = settings.dashscope_api_key or ""
			
 
				-UPLOAD_PATH = settings.upload_path
			
 
				+UPLOAD_PATH = settings.upload_path or ""
			
 
				+
			
 
				+# module logger
			
 
				+logger = get_logger("speech_provider")
			
 
				 
			
 
				 
			
 
				 def _safe_filename(name: str) -> str:
			
@@ -30,11 +34,30 @@ class SpeechProvider:
 
				         # Resolve output path under project-root/temp and ensure directory exists
			
 
				         project_root = Path(__file__).resolve().parents[2]  # repo root
			
 
				         audio_dir = project_root / "temp"
			
 
				-        audio_dir.mkdir(parents=True, exist_ok=True)
			
 
				+        try:
			
 
				+            audio_dir.mkdir(parents=True, exist_ok=True)
			
 
				+        except Exception as e:
			
 
				+            logger.error("Failed to create audio directory %s: %s", audio_dir, e, exc_info=True)
			
 
				+            return TextToSpeechResponse(code=1, data=None, msg=f"create audio dir failed: {e}")
			
 
				+
			
 
				+        # Basic input validation
			
 
				+        if not isinstance(text, str) or not text.strip():
			
 
				+            msg = "text is required"
			
 
				+            logger.error(msg)
			
 
				+            return TextToSpeechResponse(code=1, data=None, msg=msg)
			
 
				+        if not isinstance(filename, str) or not filename.strip():
			
 
				+            msg = "filename is required"
			
 
				+            logger.error(msg)
			
 
				+            return TextToSpeechResponse(code=1, data=None, msg=msg)
			
 
				+        if not dashscope.api_key:
			
 
				+            msg = "DASHSCOPE_API_KEY is missing"
			
 
				+            logger.error(msg)
			
 
				+            return TextToSpeechResponse(code=1, data=None, msg=msg)
			
 
				 
			
 
				         # determine desired output format (default mp3 for smaller size)
			
 
				         audio_format = (format or 'mp3').lower()
			
 
				         if audio_format not in {"wav", "mp3"}:
			
 
				+            logger.info("unsupported format '%s', fallback to mp3", audio_format)
			
 
				             audio_format = "mp3"
			
 
				 
			
 
				         # choose extension and sample rate
			
@@ -53,18 +76,39 @@ class SpeechProvider:
 
				             audio_format=audio_format,
			
 
				         )
			
 
				 
			
 
				-        SpeechSynthesizer.call(
			
 
				-            model=(model or 'sambert-zhifei-v1'),
			
 
				-            volume=volume,
			
 
				-            text=text,
			
 
				-            pitch=pitch,
			
 
				-            rate=rate,
			
 
				-            format=audio_format,
			
 
				-            sample_rate=sample_rate,
			
 
				-            callback=callback,
			
 
				-            word_timestamp_enabled=True,
			
 
				-            phoneme_timestamp_enabled=True,
			
 
				-        )
			
 
				+        # Run TTS with robust error handling
			
 
				+        try:
			
 
				+            SpeechSynthesizer.call(
			
 
				+                model=(model or 'sambert-zhifei-v1'),
			
 
				+                volume=volume,
			
 
				+                text=text,
			
 
				+                pitch=pitch,
			
 
				+                rate=rate,
			
 
				+                format=audio_format,
			
 
				+                sample_rate=sample_rate,
			
 
				+                callback=callback,
			
 
				+                word_timestamp_enabled=True,
			
 
				+                phoneme_timestamp_enabled=True,
			
 
				+            )
			
 
				+        except Exception as e:
			
 
				+            logger.error("TTS call failed", exc_info=True)
			
 
				+            # Ensure any open file handles are closed
			
 
				+            try:
			
 
				+                callback.on_complete()
			
 
				+            except Exception:
			
 
				+                pass
			
 
				+            return TextToSpeechResponse(code=1, data=None, msg=str(e))
			
 
				+
			
 
				+        if callback.had_error:
			
 
				+            # TTS reported an error via callback
			
 
				+            base_msg = callback.error_message or "speech synthesis failed"
			
 
				+            # Enrich message with model error code/status when available
			
 
				+            if callback.error_code or callback.status_code is not None:
			
 
				+                msg = f"[{callback.error_code or 'Error'}] {base_msg} (status={callback.status_code})"
			
 
				+            else:
			
 
				+                msg = base_msg
			
 
				+            logger.error("TTS callback error: %s", msg)
			
 
				+            return TextToSpeechResponse(code=1, data=None, msg=msg)
			
 
				 
			
 
				         # After synthesis completes, upload the file to OSS
			
 
				         try:
			
@@ -73,12 +117,16 @@ class SpeechProvider:
 
				             try:
			
 
				                 Path(out_path).unlink(missing_ok=True)
			
 
				             except Exception as del_err:
			
 
				-                print(f"[warn] Failed to delete local audio {out_path}: {del_err}")
			
 
				-            return TextToSpeechResponse(audio_url=url)
			
 
				+                logger.warning("Failed to delete local audio %s: %s", out_path, del_err)
			
 
				+            return TextToSpeechResponse(
			
 
				+                code=0,
			
 
				+                data=DataPayload(audio_url=url),
			
 
				+                msg='success'
			
 
				+            )
			
 
				         except Exception as e:
			
 
				-            # If upload fails, fall back to local path to avoid breaking
			
 
				-            print(f"[warn] Upload failed: {e}")
			
 
				-            return TextToSpeechResponse(audio_url=str(out_path))
			
 
				+            # Keep local file for inspection; report error message
			
 
				+            logger.error("Upload failed", exc_info=True)
			
 
				+            return TextToSpeechResponse(code=1, data=None, msg=str(e))
			
 
				 
			
 
				 
			
 
				 class Callback(ResultCallback):
			
@@ -90,22 +138,31 @@ class Callback(ResultCallback):
 
				         self.wav_file = None
			
 
				         self._fh = None
			
 
				         self.audio_format = audio_format
			
 
				+        self.had_error = False
			
 
				+        self.error_message: Optional[str] = None
			
 
				+        self.error_code: Optional[str] = None
			
 
				+        self.status_code: Optional[int] = None
			
 
				 
			
 
				     def on_open(self):
			
 
				-        print('Speech synthesizer is opened.')
			
 
				-        # Ensure parent directory exists (in case not created earlier)
			
 
				-        Path(self.out_path).parent.mkdir(parents=True, exist_ok=True)
			
 
				-        if self.audio_format == "wav":
			
 
				-            self.wav_file = wave_open(self.out_path, 'wb')
			
 
				-            self.wav_file.setnchannels(self.channels)
			
 
				-            self.wav_file.setsampwidth(self.sampwidth)
			
 
				-            self.wav_file.setframerate(self.sample_rate)
			
 
				-        else:
			
 
				-            # For mp3 (and other compressed formats), write raw bytes
			
 
				-            self._fh = open(self.out_path, 'wb')
			
 
				+        logger.info('Speech synthesizer opened')
			
 
				+        try:
			
 
				+            # Ensure parent directory exists (in case not created earlier)
			
 
				+            Path(self.out_path).parent.mkdir(parents=True, exist_ok=True)
			
 
				+            if self.audio_format == "wav":
			
 
				+                self.wav_file = wave_open(self.out_path, 'wb')
			
 
				+                self.wav_file.setnchannels(self.channels)
			
 
				+                self.wav_file.setsampwidth(self.sampwidth)
			
 
				+                self.wav_file.setframerate(self.sample_rate)
			
 
				+            else:
			
 
				+                # For mp3 (and other compressed formats), write raw bytes
			
 
				+                self._fh = open(self.out_path, 'wb')
			
 
				+        except Exception as e:
			
 
				+            self.had_error = True
			
 
				+            self.error_message = f"open output failed: {e}"
			
 
				+            logger.error("Failed to open output file %s: %s", self.out_path, e, exc_info=True)
			
 
				 
			
 
				     def on_complete(self):
			
 
				-        print('Speech synthesizer is completed.')
			
 
				+        logger.info('Speech synthesizer completed')
			
 
				         if self.wav_file:
			
 
				             self.wav_file.close()
			
 
				             self.wav_file = None
			
@@ -114,19 +171,44 @@ class Callback(ResultCallback):
 
				             self._fh = None
			
 
				 
			
 
				     def on_error(self, response: SpeechSynthesisResponse):
			
 
				-        print('Speech synthesizer failed, response is %s' % (str(response)))
			
 
				+        # Capture error and mark state for upstream handling
			
 
				+        code, detail, status = _extract_dashscope_error(response)
			
 
				+        self.had_error = True
			
 
				+        self.error_message = detail
			
 
				+        self.error_code = code
			
 
				+        self.status_code = status
			
 
				+        # Log with structured context
			
 
				+        if code or status is not None:
			
 
				+            logger.error('Speech synthesizer failed: code=%s status=%s msg=%s', code, status, detail)
			
 
				+        else:
			
 
				+            logger.error('Speech synthesizer failed: %s', detail)
			
 
				+        # Ensure file handles are closed even on error
			
 
				+        try:
			
 
				+            self.on_complete()
			
 
				+        except Exception:
			
 
				+            pass
			
 
				 
			
 
				     def on_close(self):
			
 
				-        print('Speech synthesizer is closed.')
			
 
				+        logger.info('Speech synthesizer closed')
			
 
				 
			
 
				     def on_event(self, result: SpeechSynthesisResult):
			
 
				         frame = result.get_audio_frame()
			
 
				         if not frame:
			
 
				             return
			
 
				-        if self.wav_file:
			
 
				-            self.wav_file.writeframes(frame)
			
 
				-        elif self._fh:
			
 
				-            self._fh.write(frame)
			
 
				+        try:
			
 
				+            if self.wav_file:
			
 
				+                self.wav_file.writeframes(frame)
			
 
				+            elif self._fh:
			
 
				+                self._fh.write(frame)
			
 
				+            else:
			
 
				+                # No open handle; mark error to surface upstream
			
 
				+                self.had_error = True
			
 
				+                self.error_message = "audio handle not initialized"
			
 
				+                logger.error("Audio handle not initialized when receiving frame")
			
 
				+        except Exception as e:
			
 
				+            self.had_error = True
			
 
				+            self.error_message = f"write frame failed: {e}"
			
 
				+            logger.error("Failed writing audio frame: %s", e, exc_info=True)
			
 
				 
			
 
				 
			
 
				 def _extract_url_from_response(resp_json: dict) -> Optional[str]:
			
@@ -144,7 +226,6 @@ def _extract_url_from_response(resp_json: dict) -> Optional[str]:
 
				         for k in path:
			
 
				             if isinstance(cur, dict) and k in cur:
			
 
				                 cur = cur[k]
			
 
				-                print(cur)
			
 
				             else:
			
 
				                 ok = False
			
 
				                 break
			
@@ -153,10 +234,62 @@ def _extract_url_from_response(resp_json: dict) -> Optional[str]:
 
				     return None
			
 
				 
			
 
				 
			
 
				+def _extract_dashscope_error(resp: object) -> tuple[Optional[str], str, Optional[int]]:
			
 
				+    """Best-effort extraction of (code, message, http_status) from DashScope response.
			
 
				+    Compatible with SpeechSynthesisResponse or dict-like payloads.
			
 
				+    """
			
 
				+    code: Optional[str] = None
			
 
				+    msg: str = "speech synthesis failed"
			
 
				+    status: Optional[int] = None
			
 
				+
			
 
				+    # If it looks like a dict
			
 
				+    if isinstance(resp, dict):
			
 
				+        code = str(resp.get("code")) if resp.get("code") is not None else None
			
 
				+        status = resp.get("status_code") if isinstance(resp.get("status_code"), int) else None
			
 
				+        msg = str(resp.get("message") or msg)
			
 
				+        return code, msg, status
			
 
				+
			
 
				+    # Try attribute-style access
			
 
				+    try:
			
 
				+        status_attr = getattr(resp, "status_code", None)
			
 
				+        if isinstance(status_attr, int):
			
 
				+            status = status_attr
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+    try:
			
 
				+        code_attr = getattr(resp, "code", None)
			
 
				+        if code_attr is not None:
			
 
				+            code = str(code_attr)
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+    try:
			
 
				+        msg_attr = getattr(resp, "message", None)
			
 
				+        if msg_attr:
			
 
				+            msg = str(msg_attr)
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+
			
 
				+    # As a last resort, try to parse JSON from str(resp)
			
 
				+    try:
			
 
				+        s = str(resp)
			
 
				+        if s and s.strip().startswith("{"):
			
 
				+            data = json.loads(s)
			
 
				+            if isinstance(data, dict):
			
 
				+                code = str(data.get("code")) if data.get("code") is not None else code
			
 
				+                status = data.get("status_code") if isinstance(data.get("status_code"), int) else status
			
 
				+                msg = str(data.get("message") or msg)
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+
			
 
				+    return code, msg, status
			
 
				+
			
 
				+
			
 
				 def _upload_file(upload_url: str, file_path: Path) -> str:
			
 
				     if not upload_url:
			
 
				+        logger.error("upload_url is empty")
			
 
				         raise ValueError("upload_url is empty")
			
 
				     if not Path(file_path).exists():
			
 
				+        logger.error("audio file not found: %s", file_path)
			
 
				         raise FileNotFoundError(str(file_path))
			
 
				 
			
 
				     filename = Path(file_path).name
			
@@ -177,7 +310,7 @@ def _upload_file(upload_url: str, file_path: Path) -> str:
 
				         data = resp.json()
			
 
				         url = _extract_url_from_response(data)
			
 
				     except Exception:
			
 
				-        pass
			
 
				+        logger.warning("Upload response is not valid JSON")
			
 
				 
			
 
				     if not url:
			
 
				         # As a last resort, if the response text looks like a URL, use it
			
@@ -186,6 +319,7 @@ def _upload_file(upload_url: str, file_path: Path) -> str:
 
				             url = txt
			
 
				 
			
 
				     if not url:
			
 
				-        raise RuntimeError("Upload succeeded but no URL found in response")
			
 
				+        logger.error("upload succeeded but no URL found in response")
			
 
				+        raise RuntimeError("upload succeeded but no URL found in response")
			
 
				 
			
 
				     return url
			
--- a/app/schemas/speech.py
+++ b/app/schemas/speech.py
@@ -28,14 +28,18 @@ class ChatResponse(BaseModel):
 
				     model: Optional[str] = None
			
 
				     usage: Optional[Usage] = None
			
 
				 
			
 
				-class TextToSpeechResponse(BaseModel):
			
 
				+class DataPayload(BaseModel):
			
 
				     audio_url: str
			
 
				 
			
 
				+class TextToSpeechResponse(BaseModel):
			
 
				+    code: int
			
 
				+    data: Optional[DataPayload] = None
			
 
				+    msg: Optional[str] = None
			
 
				+
			
 
				 class TextToSpeechRequest(BaseModel):
			
 
				     volume: int = 1
			
 
				     pitch: float = 1
			
 
				     rate: float = 1
			
 
				     filename: str
			
 
				     text: str
			
 
				-    model: Optional[str] = None
			
 
				-    format: Optional[str] = None
			
 
				+    model: str
		`@@ -0,0 +1 @@`
		`+APIKEY = 'sk-65745f458f654368b4d3a83362a607c1'`