google_ai_analyze.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. import os
  2. import time
  3. import uuid
  4. from typing import Optional
  5. import google.generativeai as genai
  6. import orjson
  7. import requests
  8. from google.generativeai.types import (HarmBlockThreshold, HarmCategory)
  9. from loguru import logger
  10. from utils.coze_hook import CozeHook
  11. from utils.google_ai_prompt import VIDEO_TOPIC_ANALYSIS_PROMPT, VIDEO_SEGMENT_ANALYSIS_PROMPT, VIDEO_ANALYSIS_PROMPT
  12. # from utils.feishu_data import Material
  13. CACHE_DIR = os.path.join(os.getcwd(), 'video_cache')
  14. # CACHE_DIR = '/Users/z/Downloads/'
  15. # PROXY_ADDR = 'http://localhost:1081'
  16. # os.environ['http_proxy'] = PROXY_ADDR
  17. # os.environ['https_proxy'] = PROXY_ADDR
  18. def load_prompts():
  19. """从prompt.py加载Prompt"""
  20. try:
  21. print("\n[初始化] 从prompt.py加载Prompt")
  22. prompts = [
  23. # {
  24. # "name": "视频选题与要点理解",
  25. # "content": VIDEO_TOPIC_ANALYSIS_PROMPT
  26. # },
  27. # {
  28. # "name": "视频分段与时间点分析",
  29. # "content": VIDEO_SEGMENT_ANALYSIS_PROMPT
  30. # }
  31. {
  32. "name": "视频内容分析",
  33. "content": VIDEO_ANALYSIS_PROMPT
  34. }
  35. ]
  36. print(f"[成功] 加载 {len(prompts)} 个Prompt")
  37. return prompts
  38. except Exception as e:
  39. raise Exception(f"加载Prompt失败: {str(e)}")
  40. class GoogleAI(object):
  41. @classmethod
  42. def download_video(cls, video_link: str) -> Optional[str]:
  43. file_path = os.path.join(CACHE_DIR, f'{str(uuid.uuid4())}.mp4')
  44. try:
  45. # 确保缓存目录存在
  46. try:
  47. os.makedirs(CACHE_DIR, exist_ok=True)
  48. except Exception as e:
  49. error_info = {
  50. "error_type": type(e).__name__,
  51. "error_message": str(e),
  52. "cache_dir": CACHE_DIR,
  53. "current_dir": os.getcwd(),
  54. "dir_exists": os.path.exists(CACHE_DIR),
  55. "dir_permissions": oct(os.stat(os.path.dirname(CACHE_DIR)).st_mode)[-3:] if os.path.exists(os.path.dirname(CACHE_DIR)) else "N/A"
  56. }
  57. error_json = orjson.dumps(error_info, option=orjson.OPT_INDENT_2).decode('utf-8')
  58. logger.error(f'[内容分析] 创建缓存目录失败: {error_json}')
  59. return None
  60. for _ in range(3):
  61. try:
  62. response = requests.get(url=video_link, timeout=60)
  63. print(f"response content: {file_path}")
  64. if response.status_code == 200:
  65. try:
  66. with open(file_path, 'wb') as f:
  67. f.write(response.content)
  68. logger.info(f'[内容分析] 视频链接: {video_link}, 存储地址: {file_path}')
  69. except Exception as e:
  70. error_info = {
  71. "error_type": type(e).__name__,
  72. "error_message": str(e),
  73. "file_path": file_path,
  74. "content_length": len(response.content) if response.content else 0
  75. }
  76. error_json = orjson.dumps(error_info, option=orjson.OPT_INDENT_2).decode('utf-8')
  77. logger.error(f'[内容分析] 视频保存失败: {error_json}')
  78. return None
  79. return file_path
  80. except Exception:
  81. time.sleep(1)
  82. continue
  83. except Exception:
  84. logger.error(f'[内容分析] 创建缓存目录失败')
  85. return None
  86. @classmethod
  87. def _analyze_content(cls, video, prompt):
  88. """增强版内容分析"""
  89. model = genai.GenerativeModel(
  90. model_name='gemini-2.0-flash',
  91. generation_config=genai.GenerationConfig(
  92. response_mime_type='application/json',
  93. temperature=0.3,
  94. max_output_tokens=20480
  95. ),
  96. safety_settings={
  97. HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
  98. HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
  99. }
  100. )
  101. try:
  102. response = model.generate_content(
  103. contents=[video, prompt],
  104. request_options={'timeout': 300}
  105. )
  106. if hasattr(response, '_error') and response._error:
  107. raise Exception(f"生成错误: {response._error}")
  108. result = orjson.loads(response.text.strip())
  109. print(f"[视频分析] 响应: {result}")
  110. if not isinstance(result, dict):
  111. raise ValueError("响应格式错误:非字典结构")
  112. return result
  113. except orjson.JSONDecodeError:
  114. raise Exception("响应解析失败,非JSON格式")
  115. except Exception as e:
  116. raise Exception(f"分析失败: {str(e)}")
  117. @classmethod
  118. def run(cls, api_key, video_url):
  119. print(f"api_key:{api_key},video_url:{video_url}")
  120. video_path = None
  121. try:
  122. genai.configure(api_key=api_key)
  123. video_path = cls.download_video(video_link=video_url)
  124. if not video_path:
  125. logger.error(f'[内容分析] 视频下载失败, 跳过任务')
  126. os.remove(video_path)
  127. logger.info(f"[内容分析] 文件已删除: {video_path}")
  128. return "[异常] 视频下载失败",""
  129. video = genai.upload_file(path=video_path, mime_type='video/mp4')
  130. while video.state.name == 'PROCESSING':
  131. time.sleep(1)
  132. video = genai.get_file(name=video.name)
  133. if video.state.name != 'ACTIVE':
  134. genai.delete_file(name=video.name)
  135. os.remove(video_path)
  136. return "[异常] 上传视频失败", ""
  137. prompts = load_prompts()
  138. analysis_data = {}
  139. for prompt in prompts[:3]:
  140. print(f"[分析] 正在执行: {prompt['name']}")
  141. try:
  142. result = cls._analyze_content(video, prompt['content'])
  143. # 提取 result 中的 "内容分段" 和 "视频简介"
  144. analysis_data['视频选题与要点理解'] = {
  145. "视频简介": result.get('视频简介', ''),
  146. "视频内容类型": result.get('视频内容类型', ''),
  147. "段落类型相似度": result.get('段落类型相似度', 1)
  148. }
  149. analysis_data['视频分段与时间点分析'] = {
  150. "内容分段": result.get('内容分段', [])
  151. }
  152. except Exception as e:
  153. analysis_data[prompt['name']] = {
  154. "error": str(e),
  155. "error_type": type(e).__name__
  156. }
  157. print(f"[分析] 所有分析完成, 结果: {analysis_data}")
  158. coze_hook = CozeHook()
  159. demand_list = coze_hook.run(analysis_data["视频选题与要点理解"], analysis_data["视频分段与时间点分析"])
  160. print(f"[分析] 所有分析完成, 结果: {demand_list}")
  161. genai.delete_file(name=video.name)
  162. os.remove(video_path)
  163. return analysis_data, demand_list
  164. except Exception as e:
  165. logger.error(f"[内容分析] 处理异常,异常信息{e}")
  166. os.remove(video_path)
  167. return f"[异常] {e}",""
  168. @classmethod
  169. def _analyze_content_with_api(cls, video_url):
  170. """使用API分析视频内容"""
  171. try:
  172. # 检查视频URL是否有效
  173. if not video_url or not video_url.startswith('http'):
  174. raise Exception("无效的视频URL")
  175. # 获取视频文件以确定正确的MIME类型
  176. try:
  177. response = requests.head(video_url, timeout=10)
  178. content_type = response.headers.get('content-type', '')
  179. if not content_type or 'video' not in content_type.lower():
  180. # 如果无法从HEAD请求获取正确的content-type,尝试GET请求
  181. response = requests.get(video_url, stream=True, timeout=10)
  182. content_type = response.headers.get('content-type', '')
  183. if not content_type or 'video' not in content_type.lower():
  184. content_type = 'video/mp4' # 默认使用mp4
  185. except Exception as e:
  186. logger.warning(f"[内容分析] 获取视频MIME类型失败: {str(e)}, 使用默认类型video/mp4")
  187. content_type = 'video/mp4'
  188. # 使用API分析视频内容
  189. response = requests.post(
  190. 'http://ai-api.piaoquantv.com/aigc-server/gemini/generateContent',
  191. json={
  192. "mediaUrl": video_url,
  193. "type": 2,
  194. "prompt": VIDEO_ANALYSIS_PROMPT,
  195. "model": "gemini-2.0-flash",
  196. "temperature": "0.3",
  197. "mimeType": content_type # 添加正确的MIME类型
  198. },
  199. timeout=300
  200. )
  201. response.raise_for_status()
  202. result = response.json()
  203. # print(f"[内容分析] API原始响应: {result}")
  204. if not result:
  205. raise Exception("API返回结果为空")
  206. if result.get('code') != 0:
  207. error_msg = result.get('msg', '未知错误')
  208. if 'data' in error_msg and 'error' in error_msg:
  209. try:
  210. error_data = orjson.loads(error_msg)
  211. if isinstance(error_data, dict) and 'error' in error_data:
  212. error_msg = f"API错误: {error_data['error'].get('message', error_msg)}"
  213. except:
  214. pass
  215. raise Exception(f"API返回错误: {error_msg}")
  216. if not result.get('data') or not result['data'].get('result'):
  217. raise Exception("API返回数据格式错误: 缺少result字段")
  218. try:
  219. # 解析返回的JSON字符串
  220. analysis_result = orjson.loads(result['data']['result'])
  221. if not isinstance(analysis_result, dict):
  222. raise ValueError("API返回的result不是有效的JSON对象")
  223. # 构建analysis_data
  224. analysis_data = {
  225. '视频选题与要点理解': {
  226. "视频简介": analysis_result.get('视频简介', ''),
  227. "视频内容类型": analysis_result.get('视频内容类型', ''),
  228. "段落类型相似度": analysis_result.get('段落类型相似度', 1)
  229. },
  230. '视频分段与时间点分析': {
  231. "内容分段": analysis_result.get('内容分段', [])
  232. }
  233. }
  234. # 使用coze_hook处理数据
  235. coze_hook = CozeHook()
  236. demand_list = coze_hook.run(
  237. analysis_data["视频选题与要点理解"],
  238. analysis_data["视频分段与时间点分析"]
  239. )
  240. if not demand_list:
  241. raise Exception("CozeHook处理结果为空")
  242. # print(f"[内容分析] API分析完成, 结果: {analysis_data}, {demand_list}")
  243. return analysis_data, demand_list
  244. except orjson.JSONDecodeError as e:
  245. raise Exception(f"解析API返回的JSON失败: {str(e)}")
  246. except Exception as e:
  247. raise Exception(f"处理API返回数据时出错: {str(e)}")
  248. except requests.exceptions.RequestException as e:
  249. error_msg = f"API请求失败: {str(e)}"
  250. logger.error(f"[内容分析] {error_msg}")
  251. return f"[异常] {error_msg}", None
  252. except Exception as e:
  253. error_msg = f"API分析失败: {str(e)}"
  254. logger.error(f"[内容分析] {error_msg}")
  255. return f"[异常] {error_msg}", None
  256. if __name__ == '__main__':
  257. ai = GoogleAI()
  258. # ai.run("AIzaSyAHt9h0ScYki7NmgOXa1jj-UEimCa6JEOs",
  259. # "http://rescdn.yishihui.com/jq_oss/video/2025012215472528213")
  260. ai._analyze_content_with_api("http://rescdn.yishihui.com/jq_oss/video/2025012215472528213")