|
|
@@ -9,19 +9,15 @@ from __future__ import annotations
|
|
|
|
|
|
import json
|
|
|
import os
|
|
|
-from pathlib import Path
|
|
|
from typing import Any, Callable, Mapping
|
|
|
|
|
|
import httpx
|
|
|
|
|
|
-from content_agent.integrations import video_fetch
|
|
|
+from content_agent.integrations import oss_upload, video_fetch
|
|
|
|
|
|
DEFAULT_OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
|
|
|
DEFAULT_VIDEO_MODEL = "google/gemini-3-flash-preview"
|
|
|
DEFAULT_VIDEO_TIMEOUT_SECONDS = 90.0
|
|
|
-# 原片留档目录(2026-06-12 拍板:全量存,含未过审;相对仓库根,服务器同款)。
|
|
|
-DEFAULT_RAW_VIDEO_DIR = "data"
|
|
|
-
|
|
|
V4_GEMINI_QUERY_RELEVANCE_SCHEMA_VERSION = "v4_gemini_query_relevance.v1"
|
|
|
|
|
|
_SYSTEM_PROMPT = "你是视频内容与搜索需求相关性审核助手。只输出一个 JSON 对象,不要任何解释或 markdown。"
|
|
|
@@ -55,6 +51,12 @@ def _fail(
|
|
|
}
|
|
|
|
|
|
|
|
|
+def _with_media_update(result: dict[str, Any], update: dict[str, Any] | None) -> dict[str, Any]:
|
|
|
+ if update:
|
|
|
+ return {**result, "media_storage_update": update}
|
|
|
+ return result
|
|
|
+
|
|
|
+
|
|
|
def _clamp_score(value: Any) -> float:
|
|
|
try:
|
|
|
number = float(value)
|
|
|
@@ -123,16 +125,16 @@ class GeminiVideoClient:
|
|
|
base_url: str = DEFAULT_OPENROUTER_BASE_URL,
|
|
|
timeout_seconds: float = DEFAULT_VIDEO_TIMEOUT_SECONDS,
|
|
|
fetch_fn: Callable[..., str] = video_fetch.fetch_and_compress,
|
|
|
+ oss_upload_fn: Callable[..., dict[str, Any]] | None = None,
|
|
|
http_post: Callable[..., Any] = httpx.post,
|
|
|
- raw_video_save_dir: str | None = None,
|
|
|
) -> None:
|
|
|
self.api_key = api_key
|
|
|
self.model = model
|
|
|
self.base_url = base_url.rstrip("/")
|
|
|
self.timeout_seconds = timeout_seconds
|
|
|
self.fetch_fn = fetch_fn
|
|
|
+ self.oss_upload_fn = oss_upload_fn
|
|
|
self.http_post = http_post
|
|
|
- self.raw_video_save_dir = raw_video_save_dir
|
|
|
|
|
|
@classmethod
|
|
|
def from_env(cls, env: Mapping[str, str] | None = None) -> "GeminiVideoClient":
|
|
|
@@ -145,19 +147,9 @@ class GeminiVideoClient:
|
|
|
model=source.get("CONTENT_AGENT_VIDEO_LLM_MODEL") or DEFAULT_VIDEO_MODEL,
|
|
|
base_url=source.get("OPENROUTER_BASE_URL") or DEFAULT_OPENROUTER_BASE_URL,
|
|
|
timeout_seconds=float(source.get("CONTENT_AGENT_VIDEO_LLM_TIMEOUT_SECONDS") or DEFAULT_VIDEO_TIMEOUT_SECONDS),
|
|
|
- raw_video_save_dir=DEFAULT_RAW_VIDEO_DIR,
|
|
|
+ oss_upload_fn=oss_upload.upload_video_from_env,
|
|
|
)
|
|
|
|
|
|
- def _raw_save_path(self, content: dict[str, Any]) -> str | None:
|
|
|
- """原片留档路径 data/{run_id}/{platform_content_id}.mp4;身份字段缺失则不存。"""
|
|
|
- if not self.raw_video_save_dir:
|
|
|
- return None
|
|
|
- run_id = content.get("run_id")
|
|
|
- platform_content_id = content.get("platform_content_id")
|
|
|
- if not run_id or not platform_content_id:
|
|
|
- return None
|
|
|
- return str(Path(self.raw_video_save_dir) / str(run_id) / f"{platform_content_id}.mp4")
|
|
|
-
|
|
|
def analyze(
|
|
|
self,
|
|
|
content: dict[str, Any],
|
|
|
@@ -167,15 +159,18 @@ class GeminiVideoClient:
|
|
|
query_text = _query_text(content, source_context)
|
|
|
play_url = media.get("play_url")
|
|
|
if not play_url:
|
|
|
- return _fail("no_play_url", query_text=query_text)
|
|
|
- fetch_kwargs: dict[str, Any] = {}
|
|
|
- save_path = self._raw_save_path(content)
|
|
|
- if save_path:
|
|
|
- fetch_kwargs["save_raw_to"] = save_path
|
|
|
+ return _with_media_update(
|
|
|
+ _fail("no_play_url", query_text=query_text),
|
|
|
+ _media_unavailable_update("no_play_url"),
|
|
|
+ )
|
|
|
+ media_update = self._upload_to_oss(play_url, content.get("platform", "douyin"))
|
|
|
try:
|
|
|
- data_url = self.fetch_fn(play_url, content.get("platform", "douyin"), **fetch_kwargs)
|
|
|
+ data_url = self.fetch_fn(play_url, content.get("platform", "douyin"))
|
|
|
except Exception as exc:
|
|
|
- return _fail("video_fetch_failed", query_text=query_text, exception_type=type(exc).__name__)
|
|
|
+ return _with_media_update(
|
|
|
+ _fail("video_fetch_failed", query_text=query_text, exception_type=type(exc).__name__),
|
|
|
+ media_update or _media_unavailable_update(f"video_fetch_failed:{type(exc).__name__}"),
|
|
|
+ )
|
|
|
|
|
|
messages = [
|
|
|
{"role": "system", "content": _SYSTEM_PROMPT},
|
|
|
@@ -198,7 +193,7 @@ class GeminiVideoClient:
|
|
|
timeout=self.timeout_seconds,
|
|
|
)
|
|
|
response.raise_for_status()
|
|
|
- return _parse(response.json(), query_text)
|
|
|
+ return _with_media_update(_parse(response.json(), query_text), media_update)
|
|
|
except httpx.HTTPError as exc:
|
|
|
last_failure = _fail(
|
|
|
"gemini_http_error",
|
|
|
@@ -209,7 +204,7 @@ class GeminiVideoClient:
|
|
|
)
|
|
|
if attempt == 0 and _retryable_http(exc):
|
|
|
continue
|
|
|
- return last_failure
|
|
|
+ return _with_media_update(last_failure, media_update)
|
|
|
except (KeyError, IndexError, TypeError, ValueError, json.JSONDecodeError) as exc:
|
|
|
last_failure = _fail(
|
|
|
"gemini_response_invalid",
|
|
|
@@ -219,8 +214,39 @@ class GeminiVideoClient:
|
|
|
)
|
|
|
if attempt == 0:
|
|
|
continue
|
|
|
- return last_failure
|
|
|
- return last_failure or _fail("gemini_unknown_error", query_text=query_text)
|
|
|
+ return _with_media_update(last_failure, media_update)
|
|
|
+ return _with_media_update(last_failure or _fail("gemini_unknown_error", query_text=query_text), media_update)
|
|
|
+
|
|
|
+ def _upload_to_oss(self, play_url: str, platform: str) -> dict[str, Any] | None:
|
|
|
+ if not self.oss_upload_fn:
|
|
|
+ return None
|
|
|
+ upload_result = self.oss_upload_fn(
|
|
|
+ play_url,
|
|
|
+ referer=video_fetch._download_headers(platform, None),
|
|
|
+ )
|
|
|
+ if upload_result.get("status") == "ok":
|
|
|
+ raw_payload = {
|
|
|
+ "oss_object_key": upload_result.get("oss_object_key"),
|
|
|
+ "save_oss_timestamp": upload_result.get("save_oss_timestamp"),
|
|
|
+ }
|
|
|
+ return {
|
|
|
+ "content_media_status": "oss_uploaded",
|
|
|
+ "oss_url": upload_result.get("oss_url"),
|
|
|
+ "local_path": None,
|
|
|
+ "raw_payload": {k: v for k, v in raw_payload.items() if v is not None},
|
|
|
+ }
|
|
|
+ raw_payload = {
|
|
|
+ "upload_failure_reason": upload_result.get("failure_type") or "oss_upload_failed",
|
|
|
+ "upload_exception_type": upload_result.get("exception_type"),
|
|
|
+ "upload_http_status_code": upload_result.get("http_status_code"),
|
|
|
+ }
|
|
|
+ return {
|
|
|
+ "content_media_status": "oss_upload_pending",
|
|
|
+ "oss_url": None,
|
|
|
+ "local_path": None,
|
|
|
+ "failure_reason": raw_payload["upload_failure_reason"],
|
|
|
+ "raw_payload": {k: v for k, v in raw_payload.items() if v is not None},
|
|
|
+ }
|
|
|
|
|
|
|
|
|
class MissingGeminiVideoClient:
|
|
|
@@ -234,3 +260,13 @@ class MissingGeminiVideoClient:
|
|
|
source_context: dict[str, Any],
|
|
|
) -> dict[str, Any]:
|
|
|
return _fail("gemini_config_missing", query_text=_query_text(content, source_context), exception_type=self.reason)
|
|
|
+
|
|
|
+
|
|
|
+def _media_unavailable_update(failure_reason: str) -> dict[str, Any]:
|
|
|
+ return {
|
|
|
+ "content_media_status": "unavailable",
|
|
|
+ "oss_url": None,
|
|
|
+ "local_path": None,
|
|
|
+ "failure_reason": failure_reason,
|
|
|
+ "raw_payload": {"failure_reason": failure_reason},
|
|
|
+ }
|