video_fetch.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. """视频获取链 (V3-M2A).
  2. 从 play_url 下载视频(带平台下载头)→ imageio-ffmpeg 压到 ~4MB 低清 →
  3. base64 data URL,供 GeminiVideoClient 投喂(OpenRouter image_url)。
  4. 真实下载/压缩只在 M7 live smoke 跑;单测全 mock。
  5. 2026-06-12 拍板:下载成功的原片全量落盘 data/(过没过审都存,play_url 有时效留不住)。
  6. """
  7. from __future__ import annotations
  8. import base64
  9. import subprocess
  10. from pathlib import Path
  11. from typing import Any
  12. import httpx
  13. import imageio_ffmpeg
  14. # platform_profiles 里写的是 "iOS UA"/"PC UA" 占位,这里映射成真实串 + Referer。
  15. _PLATFORM_DOWNLOAD_HEADERS = {
  16. "douyin": {
  17. "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148",
  18. "Referer": "https://www.douyin.com/",
  19. },
  20. "shipinhao": {
  21. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36",
  22. "Referer": "https://channels.weixin.qq.com/",
  23. },
  24. }
  25. # 已拍板压缩档:360p / 1fps / 低清,实测 ~4MB(memory/video-multimodal-analysis)。
  26. _FFMPEG_ARGS = ["-vf", "scale=360:-2,fps=1", "-crf", "33", "-c:a", "aac", "-b:a", "32k", "-ac", "1"]
  27. MAX_INLINE_BYTES = 30 * 1024 * 1024 # OpenRouter inline base64 平台硬上限
  28. COMPRESS_TIMEOUT_SECONDS = 120.0 # 实测 64MB/720p 压缩 ~8s,120s 足够余量
  29. class VideoFetchError(RuntimeError):
  30. """下载/压缩/超限失败,由 GeminiVideoClient 捕获转 fail。"""
  31. def _download_headers(platform: str, override: dict[str, str] | None) -> dict[str, str]:
  32. if override is not None:
  33. return override
  34. return _PLATFORM_DOWNLOAD_HEADERS.get(platform, {})
  35. def _save_raw(save_path: str, raw: bytes) -> None:
  36. # 原片留档是 best-effort:磁盘问题绝不影响判定链路。
  37. try:
  38. path = Path(save_path)
  39. path.parent.mkdir(parents=True, exist_ok=True)
  40. path.write_bytes(raw)
  41. except OSError:
  42. pass
  43. def _compress(raw: bytes, ffmpeg_exe: str) -> bytes:
  44. # 超时保护:坏视频会让 ffmpeg 卡死,进而挂住一个判定并发线程(实测正常压缩 ~8s)。
  45. try:
  46. proc = subprocess.run(
  47. [ffmpeg_exe, "-i", "pipe:0", *_FFMPEG_ARGS, "-f", "mp4",
  48. "-movflags", "frag_keyframe+empty_moov", "pipe:1"],
  49. input=raw,
  50. stdout=subprocess.PIPE,
  51. stderr=subprocess.PIPE,
  52. timeout=COMPRESS_TIMEOUT_SECONDS,
  53. )
  54. except subprocess.TimeoutExpired as exc:
  55. raise VideoFetchError("ffmpeg compression timeout") from exc
  56. if proc.returncode != 0 or not proc.stdout:
  57. raise VideoFetchError("ffmpeg compression failed")
  58. return proc.stdout
  59. def fetch_and_compress(
  60. play_url: str,
  61. platform: str,
  62. *,
  63. headers: dict[str, str] | None = None,
  64. http_client: Any | None = None,
  65. ffmpeg_exe: str | None = None,
  66. timeout_seconds: float = 90.0,
  67. save_raw_to: str | None = None,
  68. ) -> str:
  69. if not play_url:
  70. raise VideoFetchError("missing play_url")
  71. client = http_client or httpx
  72. try:
  73. response = client.get(
  74. play_url,
  75. headers=_download_headers(platform, headers),
  76. follow_redirects=True,
  77. timeout=timeout_seconds,
  78. )
  79. response.raise_for_status()
  80. raw = response.content
  81. except httpx.HTTPError as exc:
  82. raise VideoFetchError(f"download failed: {type(exc).__name__}") from exc
  83. if not raw:
  84. raise VideoFetchError("empty download")
  85. if save_raw_to:
  86. _save_raw(save_raw_to, raw)
  87. compressed = _compress(raw, ffmpeg_exe or imageio_ffmpeg.get_ffmpeg_exe())
  88. if len(compressed) > MAX_INLINE_BYTES:
  89. raise VideoFetchError(f"compressed video oversize: {len(compressed)} bytes")
  90. return f"data:video/mp4;base64,{base64.b64encode(compressed).decode('ascii')}"