video_utils.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. """
  2. 视频处理工具模块
  3. 提供视频下载、Base64 编码等功能,用于支持视频评估
  4. """
  5. import asyncio
  6. import base64
  7. import hashlib
  8. import os
  9. from pathlib import Path
  10. from typing import Optional
  11. import requests
  12. # 配置
  13. VIDEO_CACHE_DIR = Path(".video_cache")
  14. VIDEO_MAX_SIZE_MB = 50 # 最大视频大小(MB)
  15. VIDEO_DOWNLOAD_TIMEOUT = 120 # 下载超时(秒)
  16. MAX_RETRIES = 2 # 最大重试次数
  17. async def download_video(
  18. video_url: str,
  19. cache_dir: Path = VIDEO_CACHE_DIR
  20. ) -> Optional[Path]:
  21. """
  22. 异步下载视频文件
  23. Args:
  24. video_url: 视频URL
  25. cache_dir: 缓存目录
  26. Returns:
  27. 视频文件路径,失败返回 None
  28. """
  29. # 创建缓存目录
  30. cache_dir.mkdir(exist_ok=True)
  31. # 生成缓存文件名(基于URL hash)
  32. url_hash = hashlib.md5(video_url.encode()).hexdigest()
  33. cache_path = cache_dir / f"{url_hash}.mp4"
  34. # 检查缓存
  35. if cache_path.exists():
  36. file_size_mb = cache_path.stat().st_size / (1024 * 1024)
  37. print(f" ♻️ 使用缓存视频: {file_size_mb:.2f}MB")
  38. return cache_path
  39. # 异步下载
  40. loop = asyncio.get_event_loop()
  41. for attempt in range(MAX_RETRIES + 1):
  42. try:
  43. print(f" 📥 下载视频... (尝试 {attempt + 1}/{MAX_RETRIES + 1})")
  44. # 使用 executor 执行同步下载
  45. response = await loop.run_in_executor(
  46. None,
  47. lambda: requests.get(
  48. video_url,
  49. timeout=VIDEO_DOWNLOAD_TIMEOUT,
  50. stream=True,
  51. headers={"User-Agent": "Mozilla/5.0"}
  52. )
  53. )
  54. response.raise_for_status()
  55. # 检查文件大小
  56. content_length = response.headers.get('content-length')
  57. if content_length:
  58. size_mb = int(content_length) / (1024 * 1024)
  59. if size_mb > VIDEO_MAX_SIZE_MB:
  60. print(f" ⚠️ 视频过大: {size_mb:.2f}MB > {VIDEO_MAX_SIZE_MB}MB")
  61. return None
  62. # 保存到临时文件
  63. temp_path = cache_path.with_suffix('.tmp')
  64. def save_chunks():
  65. with open(temp_path, 'wb') as f:
  66. for chunk in response.iter_content(chunk_size=8192):
  67. if chunk:
  68. f.write(chunk)
  69. await loop.run_in_executor(None, save_chunks)
  70. # 检查实际文件大小
  71. actual_size_mb = temp_path.stat().st_size / (1024 * 1024)
  72. if actual_size_mb > VIDEO_MAX_SIZE_MB:
  73. print(f" ⚠️ 视频过大: {actual_size_mb:.2f}MB > {VIDEO_MAX_SIZE_MB}MB")
  74. temp_path.unlink()
  75. return None
  76. # 重命名为正式文件
  77. temp_path.rename(cache_path)
  78. print(f" ✅ 视频下载成功: {actual_size_mb:.2f}MB")
  79. return cache_path
  80. except Exception as e:
  81. if attempt < MAX_RETRIES:
  82. wait_time = 2 * (attempt + 1)
  83. print(f" ⚠️ 下载失败,{wait_time}秒后重试: {str(e)[:50]}")
  84. await asyncio.sleep(wait_time)
  85. else:
  86. print(f" ❌ 视频下载失败: {str(e)[:100]}")
  87. # 清理临时文件
  88. if cache_path.with_suffix('.tmp').exists():
  89. cache_path.with_suffix('.tmp').unlink()
  90. return None
  91. return None
  92. async def encode_video_to_base64(video_path: Path) -> Optional[str]:
  93. """
  94. 异步将视频文件编码为 Base64 data URL
  95. Args:
  96. video_path: 视频文件路径
  97. Returns:
  98. Base64 编码的 data URL,失败返回 None
  99. """
  100. try:
  101. loop = asyncio.get_event_loop()
  102. # 异步读取文件
  103. def read_file():
  104. with open(video_path, 'rb') as f:
  105. return f.read()
  106. print(f" 🔄 编码视频为 Base64...")
  107. video_bytes = await loop.run_in_executor(None, read_file)
  108. # Base64 编码
  109. def encode():
  110. base64_str = base64.b64encode(video_bytes).decode('utf-8')
  111. return f"data:video/mp4;base64,{base64_str}"
  112. data_url = await loop.run_in_executor(None, encode)
  113. encoded_size_mb = len(data_url) / (1024 * 1024)
  114. print(f" ✅ Base64 编码完成: {encoded_size_mb:.2f}MB")
  115. return data_url
  116. except Exception as e:
  117. print(f" ❌ Base64 编码失败: {str(e)[:100]}")
  118. return None
  119. def cleanup_video_cache(cache_dir: Path = VIDEO_CACHE_DIR, days: int = 7):
  120. """
  121. 清理超过指定天数的视频缓存
  122. Args:
  123. cache_dir: 缓存目录
  124. days: 保留天数
  125. """
  126. import time
  127. if not cache_dir.exists():
  128. return
  129. now = time.time()
  130. cutoff = now - (days * 24 * 60 * 60)
  131. removed_count = 0
  132. for file_path in cache_dir.glob("*.mp4"):
  133. if file_path.stat().st_mtime < cutoff:
  134. file_path.unlink()
  135. removed_count += 1
  136. if removed_count > 0:
  137. print(f"🗑️ 清理了 {removed_count} 个过期视频缓存")