|
|
@@ -0,0 +1,496 @@
|
|
|
+"""
|
|
|
+Decode Workflow.
|
|
|
+
|
|
|
+解码工作流:合并 What 解构工作流和脚本理解工作流的完整流程。
|
|
|
+流程:视频上传 → 灵感点提取 → 目的点提取 → 关键点提取 → 选题理解 →
|
|
|
+ 段落划分 → 实质提取 → 形式提取 → 分离结果 → 结果汇总
|
|
|
+"""
|
|
|
+
|
|
|
+from typing import Dict, Any
|
|
|
+from langgraph.graph import StateGraph, END
|
|
|
+
|
|
|
+from src.components.agents.base import BaseGraphAgent
|
|
|
+from src.components.agents.topic_selection_understanding_agent import TopicSelectionUnderstandingAgent
|
|
|
+from src.components.functions.result_aggregation_function import ResultAggregationFunction
|
|
|
+from src.components.functions.video_upload_function import VideoUploadFunction
|
|
|
+# What解构相关Agent
|
|
|
+from src.components.agents.inspiration_points_agent import InspirationPointsAgent
|
|
|
+from src.components.agents.purpose_point_agent import PurposePointAgent
|
|
|
+from src.components.agents.key_points_agent import KeyPointsAgent
|
|
|
+# 脚本理解相关Agent
|
|
|
+from src.components.agents.script_section_division_agent import ScriptSectionDivisionAgent
|
|
|
+from src.components.agents.script_substance_extraction_agent import ScriptSubstanceExtractionAgent
|
|
|
+from src.components.agents.script_form_extraction_agent import ScriptFormExtractionAgent
|
|
|
+from src.utils.logger import get_logger
|
|
|
+
|
|
|
+logger = get_logger(__name__)
|
|
|
+
|
|
|
+
|
|
|
+class DecodeWorkflow(BaseGraphAgent):
|
|
|
+ """解码工作流(合并 What 解构和脚本理解)
|
|
|
+
|
|
|
+ 功能:
|
|
|
+ - 编排完整的解码流程(视频分析)
|
|
|
+ - 流程:视频上传 → 灵感点提取 → 目的点提取 → 关键点提取 → 选题理解 →
|
|
|
+ 段落划分 → 实质提取 → 形式提取 → 分离结果 → 结果汇总
|
|
|
+ - 管理状态传递
|
|
|
+ - 仅支持单视频输入
|
|
|
+
|
|
|
+ 实现方式:BaseGraphAgent (LangGraph)
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(
|
|
|
+ self,
|
|
|
+ name: str = "decode_workflow",
|
|
|
+ description: str = "解码工作流(合并 What 解构和脚本理解)",
|
|
|
+ model_provider: str = "google_genai",
|
|
|
+ max_depth: int = 10
|
|
|
+ ):
|
|
|
+ super().__init__(
|
|
|
+ name=name,
|
|
|
+ description=description,
|
|
|
+ state_class=dict
|
|
|
+ )
|
|
|
+
|
|
|
+ self.max_depth = max_depth
|
|
|
+ self.model_provider = model_provider
|
|
|
+
|
|
|
+ # 初始化视频上传Function
|
|
|
+ self.video_upload_func = VideoUploadFunction()
|
|
|
+
|
|
|
+ # 初始化What解构相关Agent
|
|
|
+ self.inspiration_points_agent = InspirationPointsAgent(
|
|
|
+ model_provider=model_provider
|
|
|
+ )
|
|
|
+ self.purpose_point_agent = PurposePointAgent(
|
|
|
+ model_provider=model_provider
|
|
|
+ )
|
|
|
+ self.key_points_agent = KeyPointsAgent(
|
|
|
+ model_provider=model_provider
|
|
|
+ )
|
|
|
+ self.topic_selection_understanding_agent = TopicSelectionUnderstandingAgent(
|
|
|
+ model_provider=model_provider
|
|
|
+ )
|
|
|
+
|
|
|
+ # 初始化脚本理解相关Agent
|
|
|
+ self.section_agent = ScriptSectionDivisionAgent(
|
|
|
+ model_provider=model_provider
|
|
|
+ )
|
|
|
+ self.substance_agent = ScriptSubstanceExtractionAgent(
|
|
|
+ model_provider=model_provider
|
|
|
+ )
|
|
|
+ self.form_agent = ScriptFormExtractionAgent(
|
|
|
+ model_provider=model_provider
|
|
|
+ )
|
|
|
+
|
|
|
+ # 初始化结果汇总Function
|
|
|
+ self.result_aggregation_func = ResultAggregationFunction()
|
|
|
+
|
|
|
+ logger.info(f"DecodeWorkflow 初始化完成")
|
|
|
+
|
|
|
+ def _build_graph(self) -> StateGraph:
|
|
|
+ """构建工作流图
|
|
|
+
|
|
|
+ 完整流程:
|
|
|
+ START → 视频上传 → 灵感点提取 → 目的点提取 → 关键点提取 → 选题理解 →
|
|
|
+ 段落划分 → 实质提取 → 形式提取 → 分离结果 → 结果汇总 → END
|
|
|
+ """
|
|
|
+ workflow = StateGraph(dict) # 使用dict作为状态类型
|
|
|
+
|
|
|
+ # 添加所有节点
|
|
|
+ workflow.add_node("video_upload", self._video_upload_node)
|
|
|
+ # What解构节点
|
|
|
+ workflow.add_node("inspiration_points_extraction", self._inspiration_points_node)
|
|
|
+ workflow.add_node("purpose_point_extraction", self._purpose_point_node)
|
|
|
+ workflow.add_node("key_points_extraction", self._key_points_node)
|
|
|
+ workflow.add_node("topic_selection_understanding", self._topic_selection_understanding_node)
|
|
|
+ # 脚本理解节点
|
|
|
+ workflow.add_node("section_division", self._section_division_node)
|
|
|
+ workflow.add_node("substance_extraction", self._substance_extraction_node)
|
|
|
+ workflow.add_node("form_extraction", self._form_extraction_node)
|
|
|
+ workflow.add_node("merge_all_results", self._merge_all_results_node)
|
|
|
+ workflow.add_node("result_aggregation", self._result_aggregation_node)
|
|
|
+
|
|
|
+ # 定义流程的边
|
|
|
+ workflow.set_entry_point("video_upload")
|
|
|
+ # What解构流程
|
|
|
+ workflow.add_edge("video_upload", "inspiration_points_extraction")
|
|
|
+ workflow.add_edge("inspiration_points_extraction", "purpose_point_extraction")
|
|
|
+ workflow.add_edge("purpose_point_extraction", "key_points_extraction")
|
|
|
+ workflow.add_edge("key_points_extraction", "topic_selection_understanding")
|
|
|
+ # 脚本理解流程
|
|
|
+ workflow.add_edge("topic_selection_understanding", "section_division")
|
|
|
+ workflow.add_edge("section_division", "substance_extraction")
|
|
|
+ workflow.add_edge("substance_extraction", "form_extraction")
|
|
|
+ workflow.add_edge("form_extraction", "merge_all_results")
|
|
|
+ workflow.add_edge("merge_all_results", "result_aggregation")
|
|
|
+ workflow.add_edge("result_aggregation", END)
|
|
|
+
|
|
|
+ logger.info("工作流图构建完成 - 完整流程:视频上传 → What解构 → 脚本理解 → 结果汇总")
|
|
|
+
|
|
|
+ return workflow
|
|
|
+
|
|
|
+ def _video_upload_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
+ """节点:视频上传(第一步)- 下载视频并上传至Gemini"""
|
|
|
+ logger.info("=== 执行节点:视频上传 ===")
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 初始化Function
|
|
|
+ if not self.video_upload_func.is_initialized:
|
|
|
+ self.video_upload_func.initialize()
|
|
|
+
|
|
|
+ # 执行视频上传
|
|
|
+ result = self.video_upload_func.execute(state)
|
|
|
+
|
|
|
+ # 更新状态
|
|
|
+ state.update(result)
|
|
|
+
|
|
|
+ video_uri = result.get("video_uploaded_uri")
|
|
|
+ if video_uri:
|
|
|
+ logger.info(f"视频上传完成 - URI: {video_uri}")
|
|
|
+ else:
|
|
|
+ error = result.get("video_upload_error", "未知错误")
|
|
|
+ logger.warning(f"视频上传失败: {error}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"视频上传失败: {e}", exc_info=True)
|
|
|
+ state.update({
|
|
|
+ "video_uploaded_uri": None,
|
|
|
+ "video_upload_error": str(e)
|
|
|
+ })
|
|
|
+
|
|
|
+ return state
|
|
|
+
|
|
|
+ def _inspiration_points_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
+ """节点:灵感点提取(What解构)"""
|
|
|
+ logger.info("=== 执行节点:灵感点提取 ===")
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 初始化Agent
|
|
|
+ if not self.inspiration_points_agent.is_initialized:
|
|
|
+ self.inspiration_points_agent.initialize()
|
|
|
+
|
|
|
+ # 执行Agent
|
|
|
+ result = self.inspiration_points_agent.process(state)
|
|
|
+
|
|
|
+ # 更新状态
|
|
|
+ state.update(result)
|
|
|
+
|
|
|
+ # 安全地获取灵感点数量:total_count 在 metadata 中
|
|
|
+ if isinstance(result, dict):
|
|
|
+ metadata = result.get("metadata", {})
|
|
|
+ inspiration_count = metadata.get("total_count", 0) if isinstance(metadata, dict) else 0
|
|
|
+ # 如果 metadata 中没有,尝试从 inspiration_points 列表长度获取
|
|
|
+ if inspiration_count == 0:
|
|
|
+ inspiration_points = result.get("inspiration_points", [])
|
|
|
+ if isinstance(inspiration_points, list):
|
|
|
+ inspiration_count = len(inspiration_points)
|
|
|
+ else:
|
|
|
+ # 如果 result 不是 dict(比如是列表),尝试获取长度
|
|
|
+ inspiration_count = len(result) if isinstance(result, list) else 0
|
|
|
+
|
|
|
+ logger.info(f"灵感点提取完成 - 共 {inspiration_count} 个灵感点")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"灵感点提取失败: {e}", exc_info=True)
|
|
|
+ state.update({
|
|
|
+ "inspiration_points": {
|
|
|
+ "error": str(e),
|
|
|
+ "points": [],
|
|
|
+ "total_count": 0
|
|
|
+ }
|
|
|
+ })
|
|
|
+
|
|
|
+ return state
|
|
|
+
|
|
|
+ def _purpose_point_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
+ """节点:目的点提取(What解构)"""
|
|
|
+ logger.info("=== 执行节点:目的点提取 ===")
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 初始化Agent
|
|
|
+ if not self.purpose_point_agent.is_initialized:
|
|
|
+ self.purpose_point_agent.initialize()
|
|
|
+
|
|
|
+ # 执行Agent
|
|
|
+ result = self.purpose_point_agent.process(state)
|
|
|
+
|
|
|
+ # 更新状态
|
|
|
+ state.update(result)
|
|
|
+
|
|
|
+ main_purpose = result.get("purpose_point", {}).get("main_purpose", "未知")
|
|
|
+ logger.info(f"目的点提取完成 - 主要目的: {main_purpose}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"目的点提取失败: {e}", exc_info=True)
|
|
|
+ state.update({
|
|
|
+ "purpose_point": {
|
|
|
+ "error": str(e),
|
|
|
+ "main_purpose": "未知"
|
|
|
+ }
|
|
|
+ })
|
|
|
+
|
|
|
+ return state
|
|
|
+
|
|
|
+ def _key_points_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
+ """节点:关键点提取(What解构)"""
|
|
|
+ logger.info("=== 执行节点:关键点提取 ===")
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 初始化Agent
|
|
|
+ if not self.key_points_agent.is_initialized:
|
|
|
+ self.key_points_agent.initialize()
|
|
|
+
|
|
|
+ # 执行Agent
|
|
|
+ result = self.key_points_agent.process(state)
|
|
|
+
|
|
|
+ # 更新状态
|
|
|
+ state.update(result)
|
|
|
+
|
|
|
+ total_key_points = result.get("key_points", {}).get("total_count", 0)
|
|
|
+ logger.info(f"关键点提取完成 - 共 {total_key_points} 个关键点")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"关键点提取失败: {e}", exc_info=True)
|
|
|
+ state.update({
|
|
|
+ "key_points": {
|
|
|
+ "error": str(e),
|
|
|
+ "creator_perspective": {"key_points": [], "summary": ""},
|
|
|
+ "consumer_perspective": {"key_points": [], "summary": ""}
|
|
|
+ }
|
|
|
+ })
|
|
|
+
|
|
|
+ return state
|
|
|
+
|
|
|
+ def _topic_selection_understanding_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
+ """节点:选题理解(What解构)"""
|
|
|
+ logger.info("=== 执行节点:选题理解 ===")
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 初始化Agent
|
|
|
+ if not self.topic_selection_understanding_agent.is_initialized:
|
|
|
+ self.topic_selection_understanding_agent.initialize()
|
|
|
+
|
|
|
+ # 执行Agent
|
|
|
+ result = self.topic_selection_understanding_agent.process(state)
|
|
|
+
|
|
|
+ # 更新状态
|
|
|
+ state.update(result)
|
|
|
+
|
|
|
+ logger.info(f"选题理解完成 - result: {result}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"选题理解失败: {e}", exc_info=True)
|
|
|
+ state.update({
|
|
|
+ "topic_selection_understanding": {}
|
|
|
+ })
|
|
|
+
|
|
|
+ return state
|
|
|
+
|
|
|
+ def _section_division_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
+ """节点:脚本段落划分(脚本理解)"""
|
|
|
+ logger.info("=== 执行节点:脚本段落划分 ===")
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 初始化Agent
|
|
|
+ if not self.section_agent.is_initialized:
|
|
|
+ self.section_agent.initialize()
|
|
|
+
|
|
|
+ # 执行Agent
|
|
|
+ result = self.section_agent.process(state)
|
|
|
+
|
|
|
+ # 更新状态
|
|
|
+ state.update(result)
|
|
|
+
|
|
|
+ sections = result.get("段落列表", [])
|
|
|
+ content_category = result.get("内容品类", "未知")
|
|
|
+ logger.info(f"脚本段落划分完成 - 内容品类: {content_category}, 段落数: {len(sections)}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"脚本段落划分失败: {e}", exc_info=True)
|
|
|
+ state.update({
|
|
|
+ "内容品类": "未知品类",
|
|
|
+ "段落列表": []
|
|
|
+ })
|
|
|
+
|
|
|
+ return state
|
|
|
+
|
|
|
+ def _substance_extraction_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
+ """节点:实质元素提取(脚本理解)"""
|
|
|
+ logger.info("=== 执行节点:实质元素提取 ===")
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 初始化Agent
|
|
|
+ if not self.substance_agent.is_initialized:
|
|
|
+ self.substance_agent.initialize()
|
|
|
+
|
|
|
+ # 准备状态:将段落列表包装到section_division字段中
|
|
|
+ sections = state.get("段落列表", [])
|
|
|
+ state["section_division"] = {"段落列表": sections}
|
|
|
+
|
|
|
+ # 执行Agent
|
|
|
+ result = self.substance_agent.process(state)
|
|
|
+
|
|
|
+ # 更新状态
|
|
|
+ state.update(result)
|
|
|
+
|
|
|
+ final_elements = result.get("substance_final_elements", [])
|
|
|
+ logger.info(f"实质元素提取完成 - 最终元素数: {len(final_elements)}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"实质元素提取失败: {e}", exc_info=True)
|
|
|
+ state.update({
|
|
|
+ "concrete_elements": [],
|
|
|
+ "concrete_concepts": [],
|
|
|
+ "abstract_concepts": [],
|
|
|
+ "substance_elements": [],
|
|
|
+ "substance_analyzed_result": [],
|
|
|
+ "substance_scored_result": {},
|
|
|
+ "substance_filtered_ids": [],
|
|
|
+ "substance_categorized_result": {},
|
|
|
+ "substance_final_elements": []
|
|
|
+ })
|
|
|
+
|
|
|
+ return state
|
|
|
+
|
|
|
+ def _form_extraction_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
+ """节点:形式元素提取(脚本理解)"""
|
|
|
+ logger.info("=== 执行节点:形式元素提取 ===")
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 初始化Agent
|
|
|
+ if not self.form_agent.is_initialized:
|
|
|
+ self.form_agent.initialize()
|
|
|
+
|
|
|
+ # 执行Agent(依赖实质元素)
|
|
|
+ result = self.form_agent.process(state)
|
|
|
+
|
|
|
+ # 更新状态
|
|
|
+ state.update(result)
|
|
|
+
|
|
|
+ final_elements = result.get("form_final_elements", [])
|
|
|
+ logger.info(f"形式元素提取完成 - 最终元素数: {len(final_elements)}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"形式元素提取失败: {e}", exc_info=True)
|
|
|
+ state.update({
|
|
|
+ "concrete_element_forms": [],
|
|
|
+ "concrete_concept_forms": [],
|
|
|
+ "overall_forms": [],
|
|
|
+ "form_elements": [],
|
|
|
+ "form_analyzed_result": [],
|
|
|
+ "form_scored_result": {},
|
|
|
+ "form_weighted_result": {},
|
|
|
+ "form_filtered_ids": [],
|
|
|
+ "form_categorized_result": {},
|
|
|
+ "form_final_elements": []
|
|
|
+ })
|
|
|
+
|
|
|
+ return state
|
|
|
+
|
|
|
+ def _merge_all_results_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
+ """节点:分离实质和形式结果(脚本理解)"""
|
|
|
+ logger.info("=== 执行节点:分离实质和形式结果 ===")
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 获取实质和形式的最终元素
|
|
|
+ substance_final_elements = state.get("substance_final_elements", [])
|
|
|
+ form_final_elements = state.get("form_final_elements", [])
|
|
|
+
|
|
|
+ # 分别存储实质列表和形式列表
|
|
|
+ state["实质列表"] = substance_final_elements
|
|
|
+ state["形式列表"] = form_final_elements
|
|
|
+
|
|
|
+ logger.info(f"分离完成 - 实质元素: {len(substance_final_elements)}, 形式元素: {len(form_final_elements)}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"分离结果失败: {e}", exc_info=True)
|
|
|
+ state["实质列表"] = []
|
|
|
+ state["形式列表"] = []
|
|
|
+
|
|
|
+ return state
|
|
|
+
|
|
|
+ def _result_aggregation_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
+ """节点:结果汇总"""
|
|
|
+ logger.info("=== 执行节点:结果汇总 ===")
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 初始化Function
|
|
|
+ if not self.result_aggregation_func.is_initialized:
|
|
|
+ self.result_aggregation_func.initialize()
|
|
|
+
|
|
|
+ # 执行Function
|
|
|
+ final_result = self.result_aggregation_func.execute(state)
|
|
|
+
|
|
|
+ # 更新状态
|
|
|
+ state["final_result"] = final_result
|
|
|
+
|
|
|
+ logger.info("结果汇总完成")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"结果汇总失败: {e}", exc_info=True)
|
|
|
+ state["final_result"] = {
|
|
|
+ "视频信息": {},
|
|
|
+ "三点解构": {
|
|
|
+ "灵感点": [],
|
|
|
+ "目的点": {},
|
|
|
+ "关键点": {}
|
|
|
+ },
|
|
|
+ "选题理解": {},
|
|
|
+ "脚本理解": {
|
|
|
+ "内容品类": "未知",
|
|
|
+ "段落列表": [],
|
|
|
+ "实质列表": [],
|
|
|
+ "形式列表": []
|
|
|
+ },
|
|
|
+ "错误": f"汇总失败: {str(e)}"
|
|
|
+ }
|
|
|
+
|
|
|
+ return state
|
|
|
+
|
|
|
+ def invoke(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
+ """执行工作流(公共接口)- 视频分析版本
|
|
|
+
|
|
|
+ Args:
|
|
|
+ input_data: 输入数据,包含 video 字段(视频URL)
|
|
|
+ 格式参考:examples/56898272/视频详情.json
|
|
|
+ {
|
|
|
+ "video": "http://...",
|
|
|
+ "title": "...",
|
|
|
+ ...
|
|
|
+ }
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 最终解码结果
|
|
|
+ """
|
|
|
+ logger.info("=== 开始执行解码工作流(视频分析) ===")
|
|
|
+
|
|
|
+ # 确保工作流已初始化
|
|
|
+ if not self.is_initialized:
|
|
|
+ self.initialize()
|
|
|
+
|
|
|
+ # 构建 text(兼容两种输入方式)
|
|
|
+ if "text" in input_data and isinstance(input_data.get("text"), dict):
|
|
|
+ text = input_data.get("text", {})
|
|
|
+ else:
|
|
|
+ text = {
|
|
|
+ "title": input_data.get("title", ""),
|
|
|
+ "body": input_data.get("body_text", ""),
|
|
|
+ }
|
|
|
+
|
|
|
+ # 初始化状态(包含视频信息,供视频上传和后续Agent使用)
|
|
|
+ initial_state = {
|
|
|
+ "video": input_data.get("video", ""),
|
|
|
+ "channel_content_id": input_data.get("channel_content_id", ""),
|
|
|
+ "text": text,
|
|
|
+ "current_depth": 0,
|
|
|
+ "max_depth": self.max_depth,
|
|
|
+ }
|
|
|
+
|
|
|
+ # 执行工作流
|
|
|
+ result = self.compiled_graph.invoke(initial_state)
|
|
|
+
|
|
|
+ logger.info("=== 解码工作流执行完成(视频分析) ===")
|
|
|
+
|
|
|
+ return result.get("final_result", {})
|
|
|
+
|