| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499 |
- """
- Decode Workflow.
- 解码工作流:合并 What 解构工作流和脚本理解工作流的完整流程。
- 流程:初始化数据库记录 → 视频上传 → 灵感点提取 → 目的点提取 → 关键点提取 → 选题理解 →
- 段落划分 → 实质提取 → 形式提取 → 分离结果 → 结果汇总
- """
- from typing import Dict, Any
- from langgraph.graph import StateGraph, END
- from src.components.agents.base import BaseGraphAgent
- from src.components.agents.topic_selection_understanding_agent import TopicSelectionUnderstandingAgent
- from src.components.functions.result_aggregation_function import ResultAggregationFunction
- from src.components.functions.video_upload_function import VideoUploadFunction
- # What解构相关Agent
- from src.components.agents.inspiration_points_agent import InspirationPointsAgent
- from src.components.agents.purpose_point_agent import PurposePointAgent
- from src.components.agents.key_points_agent import KeyPointsAgent
- # 脚本理解相关Agent
- from src.components.agents.script_section_division_agent import ScriptSectionDivisionAgent
- from src.components.agents.script_substance_extraction_agent import ScriptSubstanceExtractionAgent
- from src.components.agents.script_form_extraction_agent import ScriptFormExtractionAgent
- # ScriptWorkflowV2 相关Agent
- from src.components.agents.structure_agent import StructureAgent
- from src.components.agents.content_unit_split_agent import ContentUnitSplitAgent
- from src.components.agents.content_unit_understand import ContentUnitUnderstandAgent
- from src.components.agents.script_keyword_agent import ScriptKeywordAgent
- # 搜索关键词Agent
- from src.components.agents.search_keyword_agent import SearchKeywordAgent
- from src.models import get_db, DecodeVideo, DecodeStatus
- from src.utils.logger import get_logger
- from utils.general import get_now_ts
- logger = get_logger(__name__)
- class DecodeWorkflow(BaseGraphAgent):
- """解码工作流(合并 What 解构和脚本理解)
- 功能:
- - 编排完整的解码流程(视频分析)
- - 流程:初始化数据库记录 → 视频上传 → 灵感点提取 → 目的点提取 → 关键点提取 → 选题理解 →
- 段落划分 → 实质提取 → 形式提取 → 分离结果 → 结果汇总
- - 管理状态传递
- - 仅支持单视频输入
- 实现方式:BaseGraphAgent (LangGraph)
- """
- def __init__(
- self,
- name: str = "decode_workflow",
- description: str = "解码工作流(合并 What 解构和脚本理解)",
- model_provider: str = "google_genai",
- max_depth: int = 10
- ):
- super().__init__(
- name=name,
- description=description,
- state_class=dict
- )
- self.max_depth = max_depth
- self.model_provider = model_provider
- # 初始化视频上传Function
- self.video_upload_func = VideoUploadFunction()
- # 初始化What解构相关Agent
- self.inspiration_points_agent = InspirationPointsAgent(
- model_provider=model_provider
- )
- self.purpose_point_agent = PurposePointAgent(
- model_provider=model_provider
- )
- self.key_points_agent = KeyPointsAgent(
- model_provider=model_provider
- )
- self.topic_selection_understanding_agent = TopicSelectionUnderstandingAgent(
- model_provider=model_provider
- )
- # 初始化脚本理解相关Agent
- self.section_agent = ScriptSectionDivisionAgent(
- model_provider=model_provider
- )
- self.substance_agent = ScriptSubstanceExtractionAgent(
- model_provider=model_provider
- )
- self.form_agent = ScriptFormExtractionAgent(
- model_provider=model_provider
- )
- # 初始化 ScriptWorkflowV2 相关Agent
- self.structure_agent = StructureAgent(
- model_provider=model_provider
- )
- self.content_unit_split_agent = ContentUnitSplitAgent(
- model_provider=model_provider
- )
- self.content_unit_understand_agent = ContentUnitUnderstandAgent(
- model_provider=model_provider
- )
- self.script_keyword_agent = ScriptKeywordAgent(
- model_provider=model_provider
- )
- # 初始化搜索关键词Agent
- self.search_keyword_agent = SearchKeywordAgent(
- model_provider=model_provider
- )
- # 初始化结果汇总Function
- self.result_aggregation_func = ResultAggregationFunction()
- logger.info(f"DecodeWorkflow 初始化完成")
- def _build_graph(self) -> StateGraph:
- """构建工作流图
- 完整流程(并行分支):
- START → 初始化数据库记录 → 视频上传 → [并行分支] → 合并结果 → 结果汇总 → END
- 分支1:灵感点提取 → 目的点提取 → 关键点提取 → 选题理解 → 段落划分 → 实质提取 → 形式提取 → 分离结果
- 分支2:结构化分析 → L3单元拆分 → 整体理解 → 金句提取
- """
- workflow = StateGraph(dict) # 使用dict作为状态类型
- # 添加所有节点
- workflow.add_node("init_db_record", self._init_db_record_node)
- workflow.add_node("video_upload", self._video_upload_node)
- # 分叉节点:用于启动两个并行分支
- workflow.add_node("fork_branches", self._fork_branches_node)
-
- # 分支1:原有 decode_workflow 流程
- # What解构节点
- workflow.add_node("inspiration_points_extraction", self._inspiration_points_node)
- workflow.add_node("purpose_point_extraction", self._purpose_point_node)
- workflow.add_node("key_points_extraction", self._key_points_node)
- workflow.add_node("topic_selection_understanding", self._topic_selection_understanding_node)
- # 脚本理解节点
- workflow.add_node("section_division", self._section_division_node)
- workflow.add_node("substance_extraction", self._substance_extraction_node)
- workflow.add_node("form_extraction", self._form_extraction_node)
- workflow.add_node("merge_all_results", self._merge_all_results_node)
- # 搜索关键词提取节点(分支1的最后一步)
- workflow.add_node("search_keywords_extraction", self._search_keywords_extraction_node)
-
- # 分支2:ScriptWorkflowV2 流程
- workflow.add_node("structure_analysis", self._structure_analysis_node)
- workflow.add_node("content_unit_split", self._content_unit_split_node)
- workflow.add_node("content_unit_understand", self._content_unit_understand_node)
- workflow.add_node("keyword_extraction", self._keyword_extraction_node)
-
- # 合并节点和结果汇总
- workflow.add_node("merge_branches", self._merge_branches_node)
- workflow.add_node("result_aggregation", self._result_aggregation_node)
- # 定义流程的边
- workflow.set_entry_point("init_db_record")
- # 数据库记录初始化后进入视频上传
- workflow.add_edge("init_db_record", "video_upload")
- # 视频上传后使用条件边:成功则进入分叉节点,失败则终止
- workflow.add_conditional_edges(
- "video_upload",
- self._check_video_upload_success,
- {
- "success": "fork_branches",
- "failure": END
- }
- )
-
- # 分叉节点:同时启动两个分支
- # 注意:LangGraph 不支持从一个节点直接连接到多个节点
- # 所以我们使用一个技巧:分叉节点连接到分支1,同时通过条件边也连接到分支2
- # 但实际上,我们需要使用一个不同的方法
- # 使用条件边,总是返回两个目标(通过返回一个列表或使用特殊的路由逻辑)
- # 但 LangGraph 的条件边只能返回一个字符串目标
-
- # 解决方案:分叉节点连接到分支1,分支1的第一个节点执行后,也触发分支2
- # 或者:使用一个"并行启动"节点,它内部调用两个分支的入口
- # 实际上,最简单的方法是:分叉节点连接到分支1,然后在分支1的第一个节点中,也启动分支2
-
- # 更好的方法:分叉节点使用条件边,根据一个标志来决定路由
- # 但我们需要确保两个分支都能被启动
-
- # 实际可行的方案:分叉节点连接到分支1,然后在状态中设置一个标志
- # 在分支1的第一个节点中检查这个标志,如果分支2还没启动,则启动它
- # 但这需要修改节点逻辑,比较复杂
-
- # 最简单的方案:分叉节点总是连接到分支1,然后在分支1的第一个节点中
- # 检查并启动分支2(通过修改状态,让工作流能够同时执行两个分支)
- # 但这在 LangGraph 中也不容易实现
-
- # 实际上,在 LangGraph 中实现真正的并行执行比较困难
- # 我们可以使用一个变通方法:分叉节点连接到分支1,分支1的第一个节点执行后
- # 通过修改状态来标记需要执行分支2,然后在适当的时候执行分支2
-
- # 但更简单的方法是:让分叉节点连接到分支1,然后在状态中设置一个标志
- # 在合并节点中检查,如果分支2还没执行,则执行它(但这会变成串行)
-
- # 最佳方案:使用 LangGraph 的 add_edge 多次(但这不支持)
- # 或者:创建一个"并行执行"节点,它内部调用两个分支的入口节点
-
- # 让我使用一个实用的方案:分叉节点连接到分支1,分支1的第一个节点执行时
- # 也检查并启动分支2(通过异步或线程,但这在 LangGraph 中不容易实现)
-
- # 实际上,在 LangGraph 中,我们可以使用一个技巧:
- # 分叉节点连接到分支1,然后在状态中设置一个标志
- # 在合并节点中,如果分支2还没完成,则执行分支2的节点(但这会变成串行)
-
- # 让我采用一个更实用的方案:
- # 1. 分叉节点连接到分支1
- # 2. 分叉节点也通过条件边连接到分支2(使用一个总是返回"start_branch2"的条件函数)
- # 但条件边只能有一个返回值
-
- # 最终方案:使用一个"并行启动"节点,它内部顺序调用两个分支的入口
- # 虽然这不是真正的并行,但在实际执行中,由于每个节点都是独立的,可以认为是并行的
-
- # 或者:分叉节点连接到分支1,分支1完成后检查分支2是否完成,如果没完成则执行分支2
- # 但这会变成串行执行
-
- # 让我采用一个折中方案:分叉节点连接到分支1,同时在状态中标记需要执行分支2
- # 在分支1的某个节点中,检查并执行分支2(但这需要修改节点逻辑)
-
- # 实际上,在 LangGraph 中,最简单的方法是:
- # 分叉节点连接到分支1,分支1完成后,如果分支2还没执行,则执行分支2
- # 虽然这不是真正的并行,但在实际应用中,由于节点执行时间不同,可以认为是近似并行的
-
- # 分叉节点:连接到分支1,分支1的第一个节点会同时启动分支2
- workflow.add_edge("fork_branches", "inspiration_points_extraction") # 启动分支1
-
- # 分支1:原有 decode_workflow 流程
- # What解构流程 - 在关键节点后添加错误检查
- workflow.add_conditional_edges(
- "inspiration_points_extraction",
- self._check_workflow_status,
- {
- "continue": "purpose_point_extraction",
- "terminate": END
- }
- )
- workflow.add_conditional_edges(
- "purpose_point_extraction",
- self._check_workflow_status,
- {
- "continue": "key_points_extraction",
- "terminate": END
- }
- )
- workflow.add_conditional_edges(
- "key_points_extraction",
- self._check_workflow_status,
- {
- "continue": "topic_selection_understanding",
- "terminate": END
- }
- )
- workflow.add_conditional_edges(
- "topic_selection_understanding",
- self._check_workflow_status,
- {
- "continue": "section_division",
- "terminate": END
- }
- )
- # 脚本理解流程
- workflow.add_conditional_edges(
- "section_division",
- self._check_workflow_status,
- {
- "continue": "substance_extraction",
- "terminate": END
- }
- )
- workflow.add_conditional_edges(
- "substance_extraction",
- self._check_workflow_status,
- {
- "continue": "form_extraction",
- "terminate": END
- }
- )
- workflow.add_conditional_edges(
- "form_extraction",
- self._check_workflow_status,
- {
- "continue": "merge_all_results",
- "terminate": END
- }
- )
- workflow.add_conditional_edges(
- "merge_all_results",
- self._check_workflow_status,
- {
- "continue": "search_keywords_extraction",
- "terminate": END
- }
- )
- workflow.add_conditional_edges(
- "search_keywords_extraction",
- self._check_workflow_status,
- {
- "continue": "merge_branches",
- "terminate": END
- }
- )
-
- # 分支2:ScriptWorkflowV2 流程
- # 注意:分支2的节点在分支1的第一个节点中直接执行,不通过图的边连接
- # 这些节点保留在图中,但实际执行是在分支1的第一个节点中触发的
-
- # 合并节点后进入结果汇总
- workflow.add_edge("merge_branches", "result_aggregation")
- workflow.add_edge("result_aggregation", END)
- logger.info("工作流图构建完成 - 完整流程:初始化数据库记录 → 视频上传 → [并行分支] → 合并结果 → 结果汇总")
- logger.info(" 分支1:灵感点提取 → 目的点提取 → 关键点提取 → 选题理解 → 段落划分 → 实质提取 → 形式提取 → 分离结果 → 搜索关键词提取")
- logger.info(" 分支2:结构化分析 → L3单元拆分 → 整体理解 → 金句提取")
- return workflow
- def _check_video_upload_success(self, state: Dict[str, Any]) -> str:
- """检查视频上传是否成功
-
- Returns:
- "success" 如果上传成功,否则返回 "failure"
- """
- video_uri = state.get("video_uploaded_uri")
- video_error = state.get("video_upload_error")
-
- # 如果URI存在且没有错误,则认为成功
- if video_uri and not video_error:
- logger.info("视频上传成功,继续执行后续流程")
- return "success"
- else:
- error_msg = video_error or "视频上传失败:未获取到视频URI"
- logger.error(f"视频上传失败,终止workflow: {error_msg}")
- # 设置失败信息到状态中
- state["workflow_failed"] = True
- state["workflow_error"] = error_msg
- # 更新数据库记录为失败状态
- self._update_db_record_after_workflow(state, success=False, error_msg=error_msg)
- return "failure"
-
- def _check_critical_error(self, state: Dict[str, Any], error_source: str = "") -> bool:
- """检查关键错误,如果存在则设置失败标志
-
- Args:
- state: 状态字典
- error_source: 错误来源(用于日志)
-
- Returns:
- True 如果存在致命错误,False 否则
- """
- # 检查是否已经失败
- if state.get("workflow_failed"):
- return True
-
- # 检查视频文件是否可用
- from src.utils.llm_invoker import get_video_file_from_state
- video_file = get_video_file_from_state(state)
- if not video_file:
- error_msg = f"无法获取视频文件对象{('(' + error_source + ')' if error_source else '')}"
- logger.error(f"{error_msg},终止workflow")
- state["workflow_failed"] = True
- state["workflow_error"] = error_msg
- return True
-
- # 检查视频URI是否存在
- video_uri = state.get("video_uploaded_uri")
- if not video_uri:
- video_error = state.get("video_upload_error", "未知错误")
- error_msg = f"视频URI不存在{('(' + error_source + ')' if error_source else '')}:{video_error}"
- logger.error(f"{error_msg},终止workflow")
- state["workflow_failed"] = True
- state["workflow_error"] = error_msg
- return True
-
- return False
- def _check_agent_result_for_errors(self, result: Dict[str, Any], agent_name: str) -> bool:
- """检查Agent返回结果中是否包含关键错误
-
- Args:
- result: Agent返回的结果字典
- agent_name: Agent名称(用于日志)
-
- Returns:
- True 如果存在关键错误,False 否则
- """
- if not isinstance(result, dict):
- return False
-
- # 检查常见的错误字段
- error_fields = ["error", "错误", "video_upload_error"]
- for field in error_fields:
- error_value = result.get(field)
- if error_value:
- # 检查是否是关键错误(无法获取视频文件等)
- error_str = str(error_value)
- if "无法获取视频文件" in error_str or "无法获取视频文件对象" in error_str:
- logger.error(f"{agent_name}返回关键错误: {error_str}")
- return True
-
- # 检查metadata中的错误
- metadata = result.get("metadata", {})
- if isinstance(metadata, dict):
- error_value = metadata.get("error")
- if error_value:
- error_str = str(error_value)
- if "无法获取视频文件" in error_str or "无法获取视频文件对象" in error_str or "未找到视频URI" in error_str:
- logger.error(f"{agent_name}返回关键错误: {error_str}")
- return True
-
- return False
- def _check_workflow_status(self, state: Dict[str, Any]) -> str:
- """检查workflow状态,用于条件边
-
- Returns:
- "continue" 如果继续执行,否则返回 "terminate"
- """
- if state.get("workflow_failed"):
- return "terminate"
- return "continue"
- def _video_upload_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:视频上传(第一步)- 下载视频并上传至Gemini"""
- logger.info("=== 执行节点:视频上传 ===")
- try:
- # 初始化Function
- if not self.video_upload_func.is_initialized:
- self.video_upload_func.initialize()
- # 执行视频上传
- result = self.video_upload_func.execute(state)
- # 更新状态
- state.update(result)
- video_uri = result.get("video_uploaded_uri")
- if video_uri:
- logger.info(f"视频上传完成 - URI: {video_uri}")
- else:
- error = result.get("video_upload_error", "未知错误")
- logger.error(f"视频上传失败: {error}")
- # 确保失败信息被设置
- state.update({
- "video_uploaded_uri": None,
- "video_upload_error": error
- })
- except Exception as e:
- logger.error(f"视频上传失败: {e}", exc_info=True)
- state.update({
- "video_uploaded_uri": None,
- "video_upload_error": str(e)
- })
- return state
- def _init_db_record_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:初始化数据库记录
-
- 根据 video_id 查询 decode_videos 表:
- - 如果存在记录,则不新建,使用现有记录
- - 如果不存在,则新建记录(状态为 EXECUTING)
- """
- logger.info("=== 执行节点:初始化数据库记录 ===")
-
- try:
- video_id = state.get("video_id", "")
- task_id = state.get("task_id")
-
- if not video_id:
- logger.warning("未提供 video_id,跳过数据库记录初始化")
- return state
-
- db = next(get_db())
- try:
- # 根据 video_id 查询是否已有记录
- existing_record = db.query(DecodeVideo).filter_by(video_id=video_id).first()
-
- if existing_record:
- # 如果存在记录,使用现有的 task_id
- logger.info(f"找到已存在的数据库记录: task_id={existing_record.task_id}, video_id={video_id}")
- state["db_task_id"] = existing_record.task_id
- state["db_record_exists"] = True
- # 更新状态为执行中
- existing_record.update_status(DecodeStatus.EXECUTING)
- db.commit()
- else:
- # 如果不存在,创建新记录
- # 如果没有提供 task_id,使用 video_id 的 hash 值作为 task_id
- if not task_id:
- import hashlib
- task_id = int(hashlib.md5(video_id.encode()).hexdigest()[:15], 16) % (10 ** 15)
- logger.info(f"未提供 task_id,自动生成: {task_id}")
-
- new_record = DecodeVideo.create(
- task_id=task_id,
- video_id=video_id,
- status=DecodeStatus.EXECUTING
- )
- db.add(new_record)
- db.commit()
- logger.info(f"创建新的数据库记录: task_id={task_id}, video_id={video_id}")
- state["db_task_id"] = task_id
- state["db_record_exists"] = False
-
- except Exception as e:
- logger.error(f"数据库操作失败: {e}", exc_info=True)
- db.rollback()
- # 数据库操作失败不影响 workflow 继续执行
- finally:
- db.close()
-
- except Exception as e:
- logger.error(f"初始化数据库记录节点执行失败: {e}", exc_info=True)
- # 数据库操作失败不影响 workflow 继续执行
-
- return state
- def _fork_branches_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:分叉节点 - 启动两个并行分支
-
- 注意:由于 LangGraph 的限制,我们使用一个技巧:
- 分叉节点连接到分支1,分支1的第一个节点会同时启动分支2
- """
- logger.info("=== 执行节点:分叉节点(启动并行分支) ===")
-
- # 标记需要启动分支2
- state["start_branch2"] = True
-
- return state
- def _inspiration_points_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:灵感点提取(What解构)
-
- 同时启动分支2(如果还没启动)
- """
- logger.info("=== 执行节点:灵感点提取 ===")
- # 检查是否需要启动分支2
- if state.get("start_branch2") and not state.get("branch2_started"):
- logger.info("在分支1的第一个节点中启动分支2(ScriptWorkflowV2流程)")
- state["branch2_started"] = True
- # 启动分支2:执行结构化分析
- try:
- if not self.structure_agent.is_initialized:
- self.structure_agent.initialize()
- structure_result = self.structure_agent.process(state)
- state["topic"] = structure_result.get("structure_data", {})
-
- # 继续执行分支2的后续节点
- if not self.content_unit_split_agent.is_initialized:
- self.content_unit_split_agent.initialize()
- split_result = self.content_unit_split_agent.process(state)
- state.update(split_result)
-
- if not self.content_unit_understand_agent.is_initialized:
- self.content_unit_understand_agent.initialize()
- understand_result = self.content_unit_understand_agent.process(state)
- state.update(understand_result)
-
- if not self.script_keyword_agent.is_initialized:
- self.script_keyword_agent.initialize()
- keyword_result = self.script_keyword_agent.process(state)
- state.update(keyword_result)
-
- state["branch2_completed"] = True
- logger.info("分支2(ScriptWorkflowV2流程)执行完成")
- except Exception as e:
- logger.error(f"分支2执行失败: {e}", exc_info=True)
- state["branch2_completed"] = True # 即使失败也标记为完成,避免阻塞
- # 检查关键错误
- if self._check_critical_error(state, "灵感点提取"):
- return state
- try:
- # 初始化Agent
- if not self.inspiration_points_agent.is_initialized:
- self.inspiration_points_agent.initialize()
- # 执行Agent
- result = self.inspiration_points_agent.process(state)
- # 更新状态
- state.update(result)
- # 检查Agent返回结果中是否包含关键错误
- if self._check_agent_result_for_errors(result, "灵感点提取Agent"):
- error_msg = "灵感点提取失败:无法获取视频文件"
- state["workflow_failed"] = True
- state["workflow_error"] = error_msg
- return state
- # 安全地获取灵感点数量:total_count 在 metadata 中
- if isinstance(result, dict):
- metadata = result.get("metadata", {})
- inspiration_count = metadata.get("total_count", 0) if isinstance(metadata, dict) else 0
- # 如果 metadata 中没有,尝试从 inspiration_points 列表长度获取
- if inspiration_count == 0:
- inspiration_points = result.get("inspiration_points", [])
- if isinstance(inspiration_points, list):
- inspiration_count = len(inspiration_points)
- else:
- # 如果 result 不是 dict(比如是列表),尝试获取长度
- inspiration_count = len(result) if isinstance(result, list) else 0
-
- logger.info(f"灵感点提取完成 - 共 {inspiration_count} 个灵感点")
- except Exception as e:
- logger.error(f"灵感点提取失败: {e}", exc_info=True)
- state["workflow_failed"] = True
- state["workflow_error"] = f"灵感点提取异常: {str(e)}"
- state.update({
- "inspiration_points": {
- "error": str(e),
- "points": [],
- "total_count": 0
- }
- })
- return state
- def _purpose_point_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:目的点提取(What解构)"""
- logger.info("=== 执行节点:目的点提取 ===")
- # 检查关键错误
- if self._check_critical_error(state, "目的点提取"):
- return state
- try:
- # 初始化Agent
- if not self.purpose_point_agent.is_initialized:
- self.purpose_point_agent.initialize()
- # 执行Agent
- result = self.purpose_point_agent.process(state)
- # 更新状态
- state.update(result)
- # 检查Agent返回结果中是否包含关键错误
- if self._check_agent_result_for_errors(result, "目的点提取Agent"):
- error_msg = "目的点提取失败:无法获取视频文件"
- state["workflow_failed"] = True
- state["workflow_error"] = error_msg
- return state
- main_purpose = result.get("purpose_point", {}).get("main_purpose", "未知")
- logger.info(f"目的点提取完成 - 主要目的: {main_purpose}")
- except Exception as e:
- logger.error(f"目的点提取失败: {e}", exc_info=True)
- state["workflow_failed"] = True
- state["workflow_error"] = f"目的点提取异常: {str(e)}"
- state.update({
- "purpose_point": {
- "error": str(e),
- "main_purpose": "未知"
- }
- })
- return state
- def _key_points_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:关键点提取(What解构)"""
- logger.info("=== 执行节点:关键点提取 ===")
- # 检查关键错误
- if self._check_critical_error(state, "关键点提取"):
- return state
- try:
- # 初始化Agent
- if not self.key_points_agent.is_initialized:
- self.key_points_agent.initialize()
- # 执行Agent
- result = self.key_points_agent.process(state)
- # 更新状态
- state.update(result)
- # 检查Agent返回结果中是否包含关键错误
- if self._check_agent_result_for_errors(result, "关键点提取Agent"):
- error_msg = "关键点提取失败:无法获取视频文件"
- state["workflow_failed"] = True
- state["workflow_error"] = error_msg
- return state
- total_key_points = result.get("key_points", {}).get("total_count", 0)
- logger.info(f"关键点提取完成 - 共 {total_key_points} 个关键点")
- except Exception as e:
- logger.error(f"关键点提取失败: {e}", exc_info=True)
- state["workflow_failed"] = True
- state["workflow_error"] = f"关键点提取异常: {str(e)}"
- state.update({
- "key_points": {
- "error": str(e),
- "creator_perspective": {"key_points": [], "summary": ""},
- "consumer_perspective": {"key_points": [], "summary": ""}
- }
- })
- return state
- def _topic_selection_understanding_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:选题理解(What解构)"""
- logger.info("=== 执行节点:选题理解 ===")
- # 检查关键错误
- if self._check_critical_error(state, "选题理解"):
- return state
- try:
- # 初始化Agent
- if not self.topic_selection_understanding_agent.is_initialized:
- self.topic_selection_understanding_agent.initialize()
- # 执行Agent
- result = self.topic_selection_understanding_agent.process(state)
- # 更新状态
- state.update(result)
- # 检查Agent返回结果中是否包含关键错误
- if self._check_agent_result_for_errors(result, "选题理解Agent"):
- error_msg = "选题理解失败:无法获取视频文件"
- state["workflow_failed"] = True
- state["workflow_error"] = error_msg
- return state
- logger.info(f"选题理解完成 - result: {result}")
- except Exception as e:
- logger.error(f"选题理解失败: {e}", exc_info=True)
- state["workflow_failed"] = True
- state["workflow_error"] = f"选题理解异常: {str(e)}"
- state.update({
- "topic_selection_understanding": {
- "错误": str(e)
- }
- })
- return state
- def _section_division_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:脚本段落划分(脚本理解)"""
- logger.info("=== 执行节点:脚本段落划分 ===")
- # 检查关键错误
- if self._check_critical_error(state, "脚本段落划分"):
- return state
- try:
- # 初始化Agent
- if not self.section_agent.is_initialized:
- self.section_agent.initialize()
- # 执行Agent
- result = self.section_agent.process(state)
- # 更新状态
- state.update(result)
- # 检查Agent返回结果中是否包含关键错误(段落划分如果没有视频文件会返回空结果,不算致命错误)
- sections = result.get("段落列表", [])
- content_category = result.get("内容品类", "未知")
- logger.info(f"脚本段落划分完成 - 内容品类: {content_category}, 段落数: {len(sections)}")
- except Exception as e:
- logger.error(f"脚本段落划分失败: {e}", exc_info=True)
- state["workflow_failed"] = True
- state["workflow_error"] = f"脚本段落划分异常: {str(e)}"
- state.update({
- "内容品类": "未知品类",
- "段落列表": []
- })
- return state
- def _substance_extraction_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:实质元素提取(脚本理解)"""
- logger.info("=== 执行节点:实质元素提取 ===")
- # 检查关键错误
- if self._check_critical_error(state, "实质元素提取"):
- return state
- try:
- # 初始化Agent
- if not self.substance_agent.is_initialized:
- self.substance_agent.initialize()
- # 准备状态:将段落列表包装到section_division字段中
- sections = state.get("段落列表", [])
- state["section_division"] = {"段落列表": sections}
- # 验证三点解构信息是否可用
- inspiration_points = state.get("inspiration_points", [])
- purpose_point = state.get("purpose_point", {})
- key_points = state.get("key_points", {})
-
- # 统计三点解构信息
- inspiration_count = len(inspiration_points) if isinstance(inspiration_points, list) else 0
- if not inspiration_count and isinstance(inspiration_points, dict):
- inspiration_count = len(inspiration_points.get("points", []))
-
- purpose_count = 0
- if isinstance(purpose_point, dict):
- purpose_count = len(purpose_point.get("purposes", []))
- elif isinstance(purpose_point, list):
- purpose_count = len(purpose_point)
-
- key_points_count = 0
- if isinstance(key_points, dict):
- key_points_list = key_points.get("key_points", [])
- key_points_count = len(key_points_list) if isinstance(key_points_list, list) else 0
- elif isinstance(key_points, list):
- key_points_count = len(key_points)
-
- logger.info(
- f"实质提取节点 - 三点解构信息检查: "
- f"灵感点={inspiration_count}, 目的点={purpose_count}, 关键点={key_points_count}"
- )
- # 执行Agent
- result = self.substance_agent.process(state)
- # 更新状态
- state.update(result)
- # 检查Agent返回结果中是否包含关键错误
- if self._check_agent_result_for_errors(result, "实质元素提取Agent"):
- error_msg = "实质元素提取失败:无法获取视频文件"
- state["workflow_failed"] = True
- state["workflow_error"] = error_msg
- return state
- final_elements = result.get("substance_final_elements", [])
- logger.info(f"实质元素提取完成 - 最终元素数: {len(final_elements)}")
- except Exception as e:
- logger.error(f"实质元素提取失败: {e}", exc_info=True)
- state["workflow_failed"] = True
- state["workflow_error"] = f"实质元素提取异常: {str(e)}"
- state.update({
- "concrete_elements": [],
- "concrete_concepts": [],
- "abstract_concepts": [],
- "substance_elements": [],
- "substance_analyzed_result": [],
- "substance_scored_result": {},
- "substance_filtered_ids": [],
- "substance_categorized_result": {},
- "substance_final_elements": []
- })
- return state
- def _form_extraction_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:形式元素提取(脚本理解)"""
- logger.info("=== 执行节点:形式元素提取 ===")
- # 检查关键错误
- if self._check_critical_error(state, "形式元素提取"):
- return state
- try:
- # 初始化Agent
- if not self.form_agent.is_initialized:
- self.form_agent.initialize()
- # 验证三点解构信息是否可用
- inspiration_points = state.get("inspiration_points", [])
- purpose_point = state.get("purpose_point", {})
- key_points = state.get("key_points", {})
-
- # 统计三点解构信息
- inspiration_count = len(inspiration_points) if isinstance(inspiration_points, list) else 0
- if not inspiration_count and isinstance(inspiration_points, dict):
- inspiration_count = len(inspiration_points.get("points", []))
-
- purpose_count = 0
- if isinstance(purpose_point, dict):
- purpose_count = len(purpose_point.get("purposes", []))
- elif isinstance(purpose_point, list):
- purpose_count = len(purpose_point)
-
- key_points_count = 0
- if isinstance(key_points, dict):
- key_points_list = key_points.get("key_points", [])
- key_points_count = len(key_points_list) if isinstance(key_points_list, list) else 0
- elif isinstance(key_points, list):
- key_points_count = len(key_points)
-
- logger.info(
- f"形式提取节点 - 三点解构信息检查: "
- f"灵感点={inspiration_count}, 目的点={purpose_count}, 关键点={key_points_count}"
- )
- # 执行Agent(依赖实质元素)
- result = self.form_agent.process(state)
- # 更新状态
- state.update(result)
- # 检查Agent返回结果中是否包含关键错误
- if self._check_agent_result_for_errors(result, "形式元素提取Agent"):
- error_msg = "形式元素提取失败:无法获取视频文件"
- state["workflow_failed"] = True
- state["workflow_error"] = error_msg
- return state
- final_elements = result.get("form_final_elements", [])
- logger.info(f"形式元素提取完成 - 最终元素数: {len(final_elements)}")
- except Exception as e:
- logger.error(f"形式元素提取失败: {e}", exc_info=True)
- state["workflow_failed"] = True
- state["workflow_error"] = f"形式元素提取异常: {str(e)}"
- state.update({
- "concrete_element_forms": [],
- "concrete_concept_forms": [],
- "overall_forms": [],
- "form_elements": [],
- "form_analyzed_result": [],
- "form_scored_result": {},
- "form_weighted_result": {},
- "form_filtered_ids": [],
- "form_categorized_result": {},
- "form_final_elements": []
- })
- return state
- def _merge_all_results_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:分离实质和形式结果(脚本理解)"""
- logger.info("=== 执行节点:分离实质和形式结果 ===")
- try:
- # 获取实质和形式的最终元素
- substance_final_elements = state.get("substance_final_elements", [])
- form_final_elements = state.get("form_final_elements", [])
- # 分别存储实质列表和形式列表
- state["实质列表"] = substance_final_elements
- state["形式列表"] = form_final_elements
- logger.info(f"分离完成 - 实质元素: {len(substance_final_elements)}, 形式元素: {len(form_final_elements)}")
- except Exception as e:
- logger.error(f"分离结果失败: {e}", exc_info=True)
- state["实质列表"] = []
- state["形式列表"] = []
- return state
- def _search_keywords_extraction_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:搜索关键词提取(分支1的最后一步)"""
- logger.info("=== 执行节点:搜索关键词提取 ===")
- # 检查关键错误
- if self._check_critical_error(state, "搜索关键词提取"):
- return state
- try:
- # 初始化Agent
- if not self.search_keyword_agent.is_initialized:
- self.search_keyword_agent.initialize()
- # 执行Agent(带重试机制)
- max_retries = 2
- result = None
- last_error = None
-
- def _is_result_failed(result: Dict[str, Any]) -> tuple[bool, str]:
- """检查结果是否失败
-
- Returns:
- (是否失败, 错误信息)
- """
- if not result:
- return True, "结果为空"
-
- # 检查关键错误(如无法获取视频文件)
- if self._check_agent_result_for_errors(result, "搜索关键词提取Agent"):
- return True, "搜索关键词提取失败:无法获取视频文件"
-
- # 检查search_keywords中的错误字段
- search_keywords = result.get("search_keywords", {})
- if isinstance(search_keywords, dict):
- # 检查是否有错误字段
- if "错误" in search_keywords or "error" in search_keywords:
- error_msg = search_keywords.get("错误") or search_keywords.get("error", "未知错误")
- return True, f"搜索关键词提取失败:{error_msg}"
-
- # 检查搜索词数量是否为0(可能是解析失败的表现)
- keyword_count = search_keywords.get("总数", 0)
- if keyword_count == 0:
- # 如果总数为0,可能是解析失败,需要重试
- return True, "搜索关键词提取失败:未提取到搜索词(可能为解析失败)"
-
- return False, ""
-
- for attempt in range(max_retries):
- try:
- # 执行Agent
- result = self.search_keyword_agent.process(state)
-
- # 检查结果是否失败
- is_failed, error_msg = _is_result_failed(result)
-
- if is_failed:
- if attempt < max_retries - 1:
- logger.warning(f"搜索关键词提取失败,准备重试 (尝试 {attempt + 1}/{max_retries}): {error_msg}")
- last_error = error_msg
- continue
- else:
- # 最后一次尝试也失败
- state["workflow_failed"] = True
- state["workflow_error"] = error_msg
- return state
- else:
- # 成功,跳出重试循环
- break
-
- except Exception as e:
- if attempt < max_retries - 1:
- logger.warning(f"搜索关键词提取异常,准备重试 (尝试 {attempt + 1}/{max_retries}): {e}")
- last_error = str(e)
- continue
- else:
- # 最后一次尝试也失败,抛出异常让外层catch处理
- raise
- # 更新状态
- if result:
- state.update(result)
- else:
- # 如果result为None,使用last_error
- error_msg = last_error or "搜索关键词提取失败:未知错误"
- state["workflow_failed"] = True
- state["workflow_error"] = error_msg
- return state
- # 再次检查(双重保险)
- if self._check_agent_result_for_errors(result, "搜索关键词提取Agent"):
- error_msg = "搜索关键词提取失败:无法获取视频文件"
- state["workflow_failed"] = True
- state["workflow_error"] = error_msg
- return state
- # 获取搜索关键词数量
- search_keywords = result.get("search_keywords", {})
- keyword_count = search_keywords.get("总数", 0) if isinstance(search_keywords, dict) else 0
- logger.info(f"搜索关键词提取完成 - 共 {keyword_count} 个搜索词")
-
- # 标记分支1完成
- state["branch1_completed"] = True
- except Exception as e:
- logger.error(f"搜索关键词提取失败: {e}", exc_info=True)
- state["workflow_failed"] = True
- state["workflow_error"] = f"搜索关键词提取异常: {str(e)}"
- state.update({
- "search_keywords": {
- "搜索词列表": [],
- "总数": 0,
- "error": str(e)
- }
- })
- state["branch1_completed"] = True # 即使失败也标记为完成,避免阻塞
- return state
- def _structure_analysis_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:结构化内容库分析(ScriptWorkflowV2分支)"""
- logger.info("=== 执行节点:结构化内容库分析(分支2) ===")
- # 检查关键错误
- if self._check_critical_error(state, "结构化内容库分析"):
- return state
- try:
- if not self.structure_agent.is_initialized:
- self.structure_agent.initialize()
- result = self.structure_agent.process(state)
-
- # 将结果存入 state 的 topic 字段
- structure_data = result.get("structure_data", {})
- state["topic"] = structure_data
- logger.info("结构化内容库分析完成")
- if isinstance(structure_data, dict):
- topic_info = structure_data.get("选题信息表", {})
- macro_topic = topic_info.get("宏观母题", "") if isinstance(topic_info, dict) else ""
- if macro_topic:
- logger.info(f"宏观母题: {macro_topic}")
- except Exception as e:
- logger.error(f"结构化内容库分析失败: {e}", exc_info=True)
- state["topic"] = {
- "错误": str(e),
- }
- return state
- def _content_unit_split_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:L3 内容单元拆分(ScriptWorkflowV2分支)"""
- logger.info("=== 执行节点:L3 内容单元拆分(分支2) ===")
- # 检查关键错误
- if self._check_critical_error(state, "L3 内容单元拆分"):
- return state
- try:
- if not self.content_unit_split_agent.is_initialized:
- self.content_unit_split_agent.initialize()
- result = self.content_unit_split_agent.process(state)
- state.update(result)
- analysis = result.get("content_unit_analysis", {})
- logger.info(
- f"L3 单元拆分完成,单元数量: {len(analysis.get('单元列表', [])) if isinstance(analysis, dict) else 0}"
- )
- except Exception as e:
- logger.error(f"L3 内容单元拆分失败: {e}", exc_info=True)
- state["content_unit_analysis"] = {
- "error": str(e),
- "单元列表": [],
- }
- return state
- def _content_unit_understand_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:整体结构理解(ScriptWorkflowV2分支)"""
- logger.info("=== 执行节点:整体结构理解(分支2) ===")
- # 检查关键错误
- if self._check_critical_error(state, "整体结构理解"):
- return state
- try:
- if not self.content_unit_understand_agent.is_initialized:
- self.content_unit_understand_agent.initialize()
- result = self.content_unit_understand_agent.process(state)
- state.update(result)
- understanding = result.get("content_unit_understanding", {})
- logger.info(
- f"整体结构理解完成,段落数量: {len(understanding.get('段落解构', [])) if isinstance(understanding, dict) else 0}"
- )
- except Exception as e:
- logger.error(f"整体结构理解失败: {e}", exc_info=True)
- state["content_unit_understanding"] = {
- "error": str(e),
- "整体解构": {},
- "段落解构": [],
- "单元解构": {},
- }
- return state
- def _keyword_extraction_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:金句提取(ScriptWorkflowV2分支)"""
- logger.info("=== 执行节点:金句提取(分支2) ===")
- # 检查关键错误
- if self._check_critical_error(state, "金句提取"):
- return state
- try:
- if not self.script_keyword_agent.is_initialized:
- self.script_keyword_agent.initialize()
- result = self.script_keyword_agent.process(state)
- state.update(result)
- script_keywords = result.get("script_keywords", {})
- logger.info("金句提取完成")
- if isinstance(script_keywords, dict):
- hooks_count = len(script_keywords.get("hooks", []))
- golden_sentences_count = len(script_keywords.get("golden_sentences", []))
- logger.info(f"提取钩子数量: {hooks_count}, 金句数量: {golden_sentences_count}")
-
- # 标记分支2完成
- state["branch2_completed"] = True
- except Exception as e:
- logger.error(f"金句提取失败: {e}", exc_info=True)
- state["script_keywords"] = {
- "error": str(e),
- }
- state["branch2_completed"] = True # 即使失败也标记为完成,避免阻塞
- return state
- def _merge_branches_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:合并两个分支的结果
-
- 检查分支2是否完成,如果没完成则执行它
- """
- logger.info("=== 执行节点:合并分支结果 ===")
- try:
- # 检查分支2是否完成
- branch2_completed = state.get("branch2_completed", False)
-
- if not branch2_completed:
- logger.info("分支2还未完成,现在执行分支2(ScriptWorkflowV2流程)")
- try:
- # 执行分支2的所有节点
- if not self.structure_agent.is_initialized:
- self.structure_agent.initialize()
- structure_result = self.structure_agent.process(state)
- state["topic"] = structure_result.get("structure_data", {})
-
- if not self.content_unit_split_agent.is_initialized:
- self.content_unit_split_agent.initialize()
- split_result = self.content_unit_split_agent.process(state)
- state.update(split_result)
-
- if not self.content_unit_understand_agent.is_initialized:
- self.content_unit_understand_agent.initialize()
- understand_result = self.content_unit_understand_agent.process(state)
- state.update(understand_result)
-
- if not self.script_keyword_agent.is_initialized:
- self.script_keyword_agent.initialize()
- keyword_result = self.script_keyword_agent.process(state)
- state.update(keyword_result)
-
- state["branch2_completed"] = True
- logger.info("分支2(ScriptWorkflowV2流程)执行完成")
- except Exception as e:
- logger.error(f"分支2执行失败: {e}", exc_info=True)
- state["branch2_completed"] = True # 即使失败也标记为完成,避免阻塞
- else:
- logger.info("分支2已完成,直接合并结果")
-
- # 标记已合并
- state["branches_merged"] = True
- logger.info("分支结果合并完成,准备进入结果汇总")
- except Exception as e:
- logger.error(f"合并分支结果失败: {e}", exc_info=True)
- state["branches_merged"] = True # 即使失败也标记为已合并,避免阻塞
- return state
- def _result_aggregation_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
- """节点:结果汇总(包含两个分支的结果)"""
- logger.info("=== 执行节点:结果汇总 ===")
- try:
- # 初始化Function
- if not self.result_aggregation_func.is_initialized:
- self.result_aggregation_func.initialize()
- # 执行Function(获取分支1的结果)
- final_result = self.result_aggregation_func.execute(state)
- # 添加分支2的结果(ScriptWorkflowV2的结果)
- topic = state.get("topic", {})
- content_unit_analysis = state.get("content_unit_analysis", {})
- content_unit_understanding = state.get("content_unit_understanding", {})
- script_keywords = state.get("script_keywords", {})
- # 将分支2的结果添加到最终结果中
- final_result["脚本解构V2"] = {
- "结构化内容库": topic,
- "L3单元解构": content_unit_analysis,
- "整体结构理解": content_unit_understanding,
- "金句提取": script_keywords,
- }
- # 更新状态
- state["final_result"] = final_result
- logger.info("结果汇总完成(包含两个分支的结果)")
- except Exception as e:
- logger.error(f"结果汇总失败: {e}", exc_info=True)
- state["final_result"] = {
- "视频信息": {},
- "三点解构": {
- "灵感点": [],
- "目的点": {},
- "关键点": {}
- },
- "选题理解": {},
- "脚本理解": {
- "内容品类": "未知",
- "段落列表": [],
- "实质列表": [],
- "形式列表": []
- },
- "脚本解构V2": {
- "结构化内容库": {},
- "L3单元解构": {},
- "整体结构理解": {},
- "金句提取": {}
- },
- "错误": f"汇总失败: {str(e)}"
- }
- return state
- def _update_db_record_after_workflow(
- self,
- state: Dict[str, Any],
- success: bool,
- final_result: Dict[str, Any] = None,
- error_msg: str = None
- ):
- """工作流执行完毕后更新数据库记录
-
- Args:
- state: 工作流执行后的状态
- success: 是否成功
- final_result: 最终结果(成功时使用)
- error_msg: 错误信息(失败时使用)
- """
- try:
- task_id = state.get("db_task_id")
- if not task_id:
- logger.warning("未找到 db_task_id,跳过数据库记录更新")
- return
-
- db = next(get_db())
- try:
- record = db.query(DecodeVideo).filter_by(task_id=task_id).first()
- if not record:
- logger.warning(f"未找到 task_id={task_id} 的数据库记录,跳过更新")
- return
-
- if success:
- # 更新为成功状态
- import json
- result_json = json.dumps(final_result, ensure_ascii=False) if final_result else None
- record.update_status(DecodeStatus.SUCCESS)
- record.update_result(result_json)
-
- # 提取分支2的结果(脚本解构V2)并存储到 decode_result_v2 字段
- if final_result and isinstance(final_result, dict):
- script_v2_result = final_result.get("脚本解构V2")
- if script_v2_result:
- result_v2_json = json.dumps(script_v2_result, ensure_ascii=False)
- record.update_result_v2(result_v2_json)
- logger.info(f"更新数据库记录的分支2结果: task_id={task_id}")
-
- # 提取搜索关键词并存储到 search_keywords 字段(字符串数组的JSON格式)
- search_keywords_data = state.get("search_keywords", {})
- if search_keywords_data and isinstance(search_keywords_data, dict):
- keyword_list = search_keywords_data.get("搜索词列表", [])
- if keyword_list:
- # 提取每个搜索词的"搜索词"字段,组成字符串数组
- keyword_strings = []
- for item in keyword_list:
- if isinstance(item, dict):
- keyword = item.get("搜索词", "")
- if keyword:
- keyword_strings.append(keyword)
-
- # 转换为JSON字符串数组格式
- if keyword_strings:
- keywords_json = json.dumps(keyword_strings, ensure_ascii=False)
- record.update_search_keywords(keywords_json)
- logger.info(f"更新数据库记录的搜索关键词: task_id={task_id}, 关键词数量={len(keyword_strings)}")
-
- logger.info(f"更新数据库记录为成功: task_id={task_id}")
- else:
- # 更新为失败状态
- record.update_status(DecodeStatus.FAILED, error_reason=error_msg)
- logger.info(f"更新数据库记录为失败: task_id={task_id}, error={error_msg}")
-
- db.commit()
-
- except Exception as e:
- logger.error(f"更新数据库记录失败: {e}", exc_info=True)
- db.rollback()
- finally:
- db.close()
-
- except Exception as e:
- logger.error(f"更新数据库记录节点执行失败: {e}", exc_info=True)
- def invoke(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
- """执行工作流(公共接口)- 视频分析版本
- Returns:
- 最终解码结果
- """
- logger.info("=== 开始执行解码工作流(视频分析) ===")
- logger.info(f"input_data: {input_data}")
- # 确保工作流已初始化
- if not self.is_initialized:
- self.initialize()
- # 验证输入参数
- video_url = input_data.get("video_url", "")
- if not video_url:
- error_msg = "未提供视频URL,无法执行工作流"
- logger.error(error_msg)
- return {
- "error": error_msg,
- "workflow_status": "failed",
- "input_data": input_data
- }
- # 初始化状态(包含视频信息,供视频上传和后续Agent使用)
- initial_state = {
- "video": video_url,
- "video_id": input_data.get("video_id", ""),
- "title": input_data.get("title", ""),
- "current_depth": 0,
- "max_depth": self.max_depth,
- "task_id": input_data.get("task_id", ""),
- }
- # 执行工作流
- result = None
- try:
- result = self.compiled_graph.invoke(initial_state)
- except Exception as e:
- error_msg = f"工作流执行异常: {str(e)}"
- logger.error(error_msg, exc_info=True)
- # 更新数据库记录为失败状态(使用 initial_state 作为 fallback)
- if result is None:
- result = initial_state
- self._update_db_record_after_workflow(result, success=False, error_msg=error_msg)
- return {
- "status": 3,
- "error": error_msg,
- "workflow_status": "failed",
- "exception_type": type(e).__name__
- }
- # 检查是否因为错误而终止
- if result.get("workflow_failed"):
- error_msg = result.get("workflow_error", "工作流执行失败")
- logger.error(f"工作流因错误而终止: {error_msg}")
- # 更新数据库记录为失败状态
- self._update_db_record_after_workflow(result, success=False, error_msg=error_msg)
- return {
- "error": error_msg,
- "video_upload_error": result.get("video_upload_error"),
- "workflow_status": "failed",
- "failed_at": result.get("failed_at", "unknown"),
- "status": 3
- }
- # 检查是否有最终结果
- final_result = result.get("final_result")
- if not final_result:
- # 如果没有最终结果,检查是否有错误信息
- if result.get("workflow_error"):
- error_msg = result.get("workflow_error", "工作流执行失败,未生成结果")
- logger.error(f"工作流执行失败: {error_msg}")
- # 更新数据库记录为失败状态
- self._update_db_record_after_workflow(result, success=False, error_msg=error_msg)
- return {
- "status": 3,
- "error": error_msg,
- "workflow_status": "failed"
- }
- else:
- logger.warning("工作流执行完成,但未生成最终结果")
- # 更新数据库记录为失败状态
- self._update_db_record_after_workflow(result, success=False, error_msg="工作流执行完成,但未生成最终结果")
- return {
- "status": 3,
- "error": "工作流执行完成,但未生成最终结果",
- "workflow_status": "incomplete",
- "state": result
- }
- # 工作流执行成功,更新数据库记录
- self._update_db_record_after_workflow(result, success=True, final_result=final_result)
- logger.info("=== 解码工作流执行完成(视频分析) ===")
- # 添加status字段(2表示成功)
- if isinstance(final_result, dict):
- final_result["status"] = 2
- return final_result
|