google_ai_analyze.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. import os
  2. import time
  3. import uuid
  4. from typing import Optional
  5. import google.generativeai as genai
  6. import orjson
  7. import requests
  8. from google.generativeai.types import (HarmBlockThreshold, HarmCategory)
  9. from loguru import logger
  10. from utils.coze_hook import CozeHook
  11. from utils.google_ai_prompt import VIDEO_TOPIC_ANALYSIS_PROMPT, VIDEO_SEGMENT_ANALYSIS_PROMPT, VIDEO_ANALYSIS_PROMPT
  12. # from utils.feishu_data import Material
  13. CACHE_DIR = os.path.join(os.getcwd(), 'video_cache')
  14. # CACHE_DIR = '/Users/z/Downloads/'
  15. # PROXY_ADDR = 'http://localhost:1081'
  16. # os.environ['http_proxy'] = PROXY_ADDR
  17. # os.environ['https_proxy'] = PROXY_ADDR
  18. def load_prompts():
  19. """从prompt.py加载Prompt"""
  20. try:
  21. print("\n[初始化] 从prompt.py加载Prompt")
  22. prompts = [
  23. # {
  24. # "name": "视频选题与要点理解",
  25. # "content": VIDEO_TOPIC_ANALYSIS_PROMPT
  26. # },
  27. # {
  28. # "name": "视频分段与时间点分析",
  29. # "content": VIDEO_SEGMENT_ANALYSIS_PROMPT
  30. # }
  31. {
  32. "name": "视频内容分析",
  33. "content": VIDEO_ANALYSIS_PROMPT
  34. }
  35. ]
  36. print(f"[成功] 加载 {len(prompts)} 个Prompt")
  37. return prompts
  38. except Exception as e:
  39. raise Exception(f"加载Prompt失败: {str(e)}")
  40. class GoogleAI(object):
  41. @classmethod
  42. def download_video(cls, video_link: str) -> Optional[str]:
  43. file_path = os.path.join(CACHE_DIR, f'{str(uuid.uuid4())}.mp4')
  44. try:
  45. # 确保缓存目录存在
  46. try:
  47. os.makedirs(CACHE_DIR, exist_ok=True)
  48. except Exception as e:
  49. error_info = {
  50. "error_type": type(e).__name__,
  51. "error_message": str(e),
  52. "cache_dir": CACHE_DIR,
  53. "current_dir": os.getcwd(),
  54. "dir_exists": os.path.exists(CACHE_DIR),
  55. "dir_permissions": oct(os.stat(os.path.dirname(CACHE_DIR)).st_mode)[-3:] if os.path.exists(os.path.dirname(CACHE_DIR)) else "N/A"
  56. }
  57. error_json = orjson.dumps(error_info, option=orjson.OPT_INDENT_2).decode('utf-8')
  58. logger.error(f'[内容分析] 创建缓存目录失败: {error_json}')
  59. return None
  60. for _ in range(3):
  61. try:
  62. response = requests.get(url=video_link, timeout=60)
  63. print(f"response content: {file_path}")
  64. if response.status_code == 200:
  65. try:
  66. with open(file_path, 'wb') as f:
  67. f.write(response.content)
  68. logger.info(f'[内容分析] 视频链接: {video_link}, 存储地址: {file_path}')
  69. except Exception as e:
  70. error_info = {
  71. "error_type": type(e).__name__,
  72. "error_message": str(e),
  73. "file_path": file_path,
  74. "content_length": len(response.content) if response.content else 0
  75. }
  76. error_json = orjson.dumps(error_info, option=orjson.OPT_INDENT_2).decode('utf-8')
  77. logger.error(f'[内容分析] 视频保存失败: {error_json}')
  78. return None
  79. return file_path
  80. except Exception:
  81. time.sleep(1)
  82. continue
  83. except Exception:
  84. logger.error(f'[内容分析] 创建缓存目录失败')
  85. return None
  86. @classmethod
  87. def _analyze_content(cls, video, prompt):
  88. """增强版内容分析"""
  89. model = genai.GenerativeModel(
  90. model_name='gemini-2.0-flash',
  91. generation_config=genai.GenerationConfig(
  92. response_mime_type='application/json',
  93. temperature=0.3,
  94. max_output_tokens=20480
  95. ),
  96. safety_settings={
  97. HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
  98. HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
  99. }
  100. )
  101. try:
  102. response = model.generate_content(
  103. contents=[video, prompt],
  104. request_options={'timeout': 300}
  105. )
  106. if hasattr(response, '_error') and response._error:
  107. raise Exception(f"生成错误: {response._error}")
  108. result = orjson.loads(response.text.strip())
  109. print(f"[视频分析] 响应: {result}")
  110. if not isinstance(result, dict):
  111. raise ValueError("响应格式错误:非字典结构")
  112. return result
  113. except orjson.JSONDecodeError:
  114. raise Exception("响应解析失败,非JSON格式")
  115. except Exception as e:
  116. raise Exception(f"分析失败: {str(e)}")
  117. @classmethod
  118. def run(cls, api_key, video_url):
  119. print(f"api_key:{api_key},video_url:{video_url}")
  120. video_path = None
  121. try:
  122. genai.configure(api_key=api_key)
  123. video_path = cls.download_video(video_link=video_url)
  124. if not video_path:
  125. logger.error(f'[内容分析] 视频下载失败, 跳过任务')
  126. os.remove(video_path)
  127. logger.info(f"[内容分析] 文件已删除: {video_path}")
  128. return "[异常] 视频下载失败",""
  129. video = genai.upload_file(path=video_path, mime_type='video/mp4')
  130. while video.state.name == 'PROCESSING':
  131. time.sleep(1)
  132. video = genai.get_file(name=video.name)
  133. if video.state.name != 'ACTIVE':
  134. genai.delete_file(name=video.name)
  135. os.remove(video_path)
  136. return "[异常] 上传视频失败", ""
  137. prompts = load_prompts()
  138. analysis_data = {}
  139. for prompt in prompts[:3]:
  140. print(f"[分析] 正在执行: {prompt['name']}")
  141. try:
  142. result = cls._analyze_content(video, prompt['content'])
  143. # 提取 result 中的 "内容分段" 和 "视频简介"
  144. analysis_data['视频选题与要点理解'] = {
  145. "视频简介": result.get('视频简介', ''),
  146. "视频内容类型": result.get('视频内容类型', ''),
  147. "段落类型相似度": result.get('段落类型相似度', 1)
  148. }
  149. analysis_data['视频分段与时间点分析'] = {
  150. "内容分段": result.get('内容分段', [])
  151. }
  152. except Exception as e:
  153. analysis_data[prompt['name']] = {
  154. "error": str(e),
  155. "error_type": type(e).__name__
  156. }
  157. print(f"[分析] 所有分析完成, 结果: {analysis_data}")
  158. coze_hook = CozeHook()
  159. demand_list = coze_hook.run(analysis_data["视频选题与要点理解"], analysis_data["视频分段与时间点分析"])
  160. print(f"[分析] 所有分析完成, 结果: {demand_list}")
  161. genai.delete_file(name=video.name)
  162. os.remove(video_path)
  163. return analysis_data, demand_list
  164. except Exception as e:
  165. logger.error(f"[内容分析] 处理异常,异常信息{e}")
  166. os.remove(video_path)
  167. return f"[异常] {e}",""
  168. @classmethod
  169. def _analyze_content_with_api(cls, video_url):
  170. """使用API分析视频内容"""
  171. try:
  172. # 使用API分析视频内容
  173. response = requests.post(
  174. 'http://ai-api.piaoquantv.com/aigc-server/gemini/generateContent',
  175. json={
  176. "mediaUrl": video_url,
  177. "type": 2,
  178. "prompt": VIDEO_ANALYSIS_PROMPT,
  179. "model":"gemini-2.0-flash",
  180. "temperature":"0.3"
  181. }
  182. )
  183. response.raise_for_status()
  184. result = response.json()
  185. print(f"[内容分析] API分析完成, 结果: {result}")
  186. if not result or result.get('code') != 0:
  187. raise Exception("API分析结果异常")
  188. # 解析返回的JSON字符串
  189. analysis_result = orjson.loads(result['data']['result'])
  190. # 构建analysis_data
  191. analysis_data = {
  192. '视频选题与要点理解': {
  193. "视频简介": analysis_result.get('视频简介', ''),
  194. "视频内容类型": analysis_result.get('视频内容类型', ''),
  195. "段落类型相似度": analysis_result.get('段落类型相似度', 1)
  196. },
  197. '视频分段与时间点分析': {
  198. "内容分段": analysis_result.get('内容分段', [])
  199. }
  200. }
  201. # 使用coze_hook处理数据
  202. coze_hook = CozeHook()
  203. demand_list = coze_hook.run(
  204. analysis_data["视频选题与要点理解"],
  205. analysis_data["视频分段与时间点分析"]
  206. )
  207. print(f"[内容分析] API分析完成, 结果: {analysis_data}, {demand_list}")
  208. return analysis_data, demand_list
  209. except Exception as e:
  210. logger.error(f"[内容分析] API分析失败,异常信息{e}")
  211. return f"[异常] {e}", ""
  212. if __name__ == '__main__':
  213. ai = GoogleAI()
  214. # ai.run("AIzaSyAHt9h0ScYki7NmgOXa1jj-UEimCa6JEOs",
  215. # "http://rescdn.yishihui.com/jq_oss/video/2025012215472528213")
  216. ai._analyze_content_with_api("http://rescdn.yishihui.com/jq_oss/video/2025012215472528213")