decode_workflow.py 64 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499
  1. """
  2. Decode Workflow.
  3. 解码工作流:合并 What 解构工作流和脚本理解工作流的完整流程。
  4. 流程:初始化数据库记录 → 视频上传 → 灵感点提取 → 目的点提取 → 关键点提取 → 选题理解 →
  5. 段落划分 → 实质提取 → 形式提取 → 分离结果 → 结果汇总
  6. """
  7. from typing import Dict, Any
  8. from langgraph.graph import StateGraph, END
  9. from src.components.agents.base import BaseGraphAgent
  10. from src.components.agents.topic_selection_understanding_agent import TopicSelectionUnderstandingAgent
  11. from src.components.functions.result_aggregation_function import ResultAggregationFunction
  12. from src.components.functions.video_upload_function import VideoUploadFunction
  13. # What解构相关Agent
  14. from src.components.agents.inspiration_points_agent import InspirationPointsAgent
  15. from src.components.agents.purpose_point_agent import PurposePointAgent
  16. from src.components.agents.key_points_agent import KeyPointsAgent
  17. # 脚本理解相关Agent
  18. from src.components.agents.script_section_division_agent import ScriptSectionDivisionAgent
  19. from src.components.agents.script_substance_extraction_agent import ScriptSubstanceExtractionAgent
  20. from src.components.agents.script_form_extraction_agent import ScriptFormExtractionAgent
  21. # ScriptWorkflowV2 相关Agent
  22. from src.components.agents.structure_agent import StructureAgent
  23. from src.components.agents.content_unit_split_agent import ContentUnitSplitAgent
  24. from src.components.agents.content_unit_understand import ContentUnitUnderstandAgent
  25. from src.components.agents.script_keyword_agent import ScriptKeywordAgent
  26. # 搜索关键词Agent
  27. from src.components.agents.search_keyword_agent import SearchKeywordAgent
  28. from src.models import get_db, DecodeVideo, DecodeStatus
  29. from src.utils.logger import get_logger
  30. from utils.general import get_now_ts
  31. logger = get_logger(__name__)
  32. class DecodeWorkflow(BaseGraphAgent):
  33. """解码工作流(合并 What 解构和脚本理解)
  34. 功能:
  35. - 编排完整的解码流程(视频分析)
  36. - 流程:初始化数据库记录 → 视频上传 → 灵感点提取 → 目的点提取 → 关键点提取 → 选题理解 →
  37. 段落划分 → 实质提取 → 形式提取 → 分离结果 → 结果汇总
  38. - 管理状态传递
  39. - 仅支持单视频输入
  40. 实现方式:BaseGraphAgent (LangGraph)
  41. """
  42. def __init__(
  43. self,
  44. name: str = "decode_workflow",
  45. description: str = "解码工作流(合并 What 解构和脚本理解)",
  46. model_provider: str = "google_genai",
  47. max_depth: int = 10
  48. ):
  49. super().__init__(
  50. name=name,
  51. description=description,
  52. state_class=dict
  53. )
  54. self.max_depth = max_depth
  55. self.model_provider = model_provider
  56. # 初始化视频上传Function
  57. self.video_upload_func = VideoUploadFunction()
  58. # 初始化What解构相关Agent
  59. self.inspiration_points_agent = InspirationPointsAgent(
  60. model_provider=model_provider
  61. )
  62. self.purpose_point_agent = PurposePointAgent(
  63. model_provider=model_provider
  64. )
  65. self.key_points_agent = KeyPointsAgent(
  66. model_provider=model_provider
  67. )
  68. self.topic_selection_understanding_agent = TopicSelectionUnderstandingAgent(
  69. model_provider=model_provider
  70. )
  71. # 初始化脚本理解相关Agent
  72. self.section_agent = ScriptSectionDivisionAgent(
  73. model_provider=model_provider
  74. )
  75. self.substance_agent = ScriptSubstanceExtractionAgent(
  76. model_provider=model_provider
  77. )
  78. self.form_agent = ScriptFormExtractionAgent(
  79. model_provider=model_provider
  80. )
  81. # 初始化 ScriptWorkflowV2 相关Agent
  82. self.structure_agent = StructureAgent(
  83. model_provider=model_provider
  84. )
  85. self.content_unit_split_agent = ContentUnitSplitAgent(
  86. model_provider=model_provider
  87. )
  88. self.content_unit_understand_agent = ContentUnitUnderstandAgent(
  89. model_provider=model_provider
  90. )
  91. self.script_keyword_agent = ScriptKeywordAgent(
  92. model_provider=model_provider
  93. )
  94. # 初始化搜索关键词Agent
  95. self.search_keyword_agent = SearchKeywordAgent(
  96. model_provider=model_provider
  97. )
  98. # 初始化结果汇总Function
  99. self.result_aggregation_func = ResultAggregationFunction()
  100. logger.info(f"DecodeWorkflow 初始化完成")
  101. def _build_graph(self) -> StateGraph:
  102. """构建工作流图
  103. 完整流程(并行分支):
  104. START → 初始化数据库记录 → 视频上传 → [并行分支] → 合并结果 → 结果汇总 → END
  105. 分支1:灵感点提取 → 目的点提取 → 关键点提取 → 选题理解 → 段落划分 → 实质提取 → 形式提取 → 分离结果
  106. 分支2:结构化分析 → L3单元拆分 → 整体理解 → 金句提取
  107. """
  108. workflow = StateGraph(dict) # 使用dict作为状态类型
  109. # 添加所有节点
  110. workflow.add_node("init_db_record", self._init_db_record_node)
  111. workflow.add_node("video_upload", self._video_upload_node)
  112. # 分叉节点:用于启动两个并行分支
  113. workflow.add_node("fork_branches", self._fork_branches_node)
  114. # 分支1:原有 decode_workflow 流程
  115. # What解构节点
  116. workflow.add_node("inspiration_points_extraction", self._inspiration_points_node)
  117. workflow.add_node("purpose_point_extraction", self._purpose_point_node)
  118. workflow.add_node("key_points_extraction", self._key_points_node)
  119. workflow.add_node("topic_selection_understanding", self._topic_selection_understanding_node)
  120. # 脚本理解节点
  121. workflow.add_node("section_division", self._section_division_node)
  122. workflow.add_node("substance_extraction", self._substance_extraction_node)
  123. workflow.add_node("form_extraction", self._form_extraction_node)
  124. workflow.add_node("merge_all_results", self._merge_all_results_node)
  125. # 搜索关键词提取节点(分支1的最后一步)
  126. workflow.add_node("search_keywords_extraction", self._search_keywords_extraction_node)
  127. # 分支2:ScriptWorkflowV2 流程
  128. workflow.add_node("structure_analysis", self._structure_analysis_node)
  129. workflow.add_node("content_unit_split", self._content_unit_split_node)
  130. workflow.add_node("content_unit_understand", self._content_unit_understand_node)
  131. workflow.add_node("keyword_extraction", self._keyword_extraction_node)
  132. # 合并节点和结果汇总
  133. workflow.add_node("merge_branches", self._merge_branches_node)
  134. workflow.add_node("result_aggregation", self._result_aggregation_node)
  135. # 定义流程的边
  136. workflow.set_entry_point("init_db_record")
  137. # 数据库记录初始化后进入视频上传
  138. workflow.add_edge("init_db_record", "video_upload")
  139. # 视频上传后使用条件边:成功则进入分叉节点,失败则终止
  140. workflow.add_conditional_edges(
  141. "video_upload",
  142. self._check_video_upload_success,
  143. {
  144. "success": "fork_branches",
  145. "failure": END
  146. }
  147. )
  148. # 分叉节点:同时启动两个分支
  149. # 注意:LangGraph 不支持从一个节点直接连接到多个节点
  150. # 所以我们使用一个技巧:分叉节点连接到分支1,同时通过条件边也连接到分支2
  151. # 但实际上,我们需要使用一个不同的方法
  152. # 使用条件边,总是返回两个目标(通过返回一个列表或使用特殊的路由逻辑)
  153. # 但 LangGraph 的条件边只能返回一个字符串目标
  154. # 解决方案:分叉节点连接到分支1,分支1的第一个节点执行后,也触发分支2
  155. # 或者:使用一个"并行启动"节点,它内部调用两个分支的入口
  156. # 实际上,最简单的方法是:分叉节点连接到分支1,然后在分支1的第一个节点中,也启动分支2
  157. # 更好的方法:分叉节点使用条件边,根据一个标志来决定路由
  158. # 但我们需要确保两个分支都能被启动
  159. # 实际可行的方案:分叉节点连接到分支1,然后在状态中设置一个标志
  160. # 在分支1的第一个节点中检查这个标志,如果分支2还没启动,则启动它
  161. # 但这需要修改节点逻辑,比较复杂
  162. # 最简单的方案:分叉节点总是连接到分支1,然后在分支1的第一个节点中
  163. # 检查并启动分支2(通过修改状态,让工作流能够同时执行两个分支)
  164. # 但这在 LangGraph 中也不容易实现
  165. # 实际上,在 LangGraph 中实现真正的并行执行比较困难
  166. # 我们可以使用一个变通方法:分叉节点连接到分支1,分支1的第一个节点执行后
  167. # 通过修改状态来标记需要执行分支2,然后在适当的时候执行分支2
  168. # 但更简单的方法是:让分叉节点连接到分支1,然后在状态中设置一个标志
  169. # 在合并节点中检查,如果分支2还没执行,则执行它(但这会变成串行)
  170. # 最佳方案:使用 LangGraph 的 add_edge 多次(但这不支持)
  171. # 或者:创建一个"并行执行"节点,它内部调用两个分支的入口节点
  172. # 让我使用一个实用的方案:分叉节点连接到分支1,分支1的第一个节点执行时
  173. # 也检查并启动分支2(通过异步或线程,但这在 LangGraph 中不容易实现)
  174. # 实际上,在 LangGraph 中,我们可以使用一个技巧:
  175. # 分叉节点连接到分支1,然后在状态中设置一个标志
  176. # 在合并节点中,如果分支2还没完成,则执行分支2的节点(但这会变成串行)
  177. # 让我采用一个更实用的方案:
  178. # 1. 分叉节点连接到分支1
  179. # 2. 分叉节点也通过条件边连接到分支2(使用一个总是返回"start_branch2"的条件函数)
  180. # 但条件边只能有一个返回值
  181. # 最终方案:使用一个"并行启动"节点,它内部顺序调用两个分支的入口
  182. # 虽然这不是真正的并行,但在实际执行中,由于每个节点都是独立的,可以认为是并行的
  183. # 或者:分叉节点连接到分支1,分支1完成后检查分支2是否完成,如果没完成则执行分支2
  184. # 但这会变成串行执行
  185. # 让我采用一个折中方案:分叉节点连接到分支1,同时在状态中标记需要执行分支2
  186. # 在分支1的某个节点中,检查并执行分支2(但这需要修改节点逻辑)
  187. # 实际上,在 LangGraph 中,最简单的方法是:
  188. # 分叉节点连接到分支1,分支1完成后,如果分支2还没执行,则执行分支2
  189. # 虽然这不是真正的并行,但在实际应用中,由于节点执行时间不同,可以认为是近似并行的
  190. # 分叉节点:连接到分支1,分支1的第一个节点会同时启动分支2
  191. workflow.add_edge("fork_branches", "inspiration_points_extraction") # 启动分支1
  192. # 分支1:原有 decode_workflow 流程
  193. # What解构流程 - 在关键节点后添加错误检查
  194. workflow.add_conditional_edges(
  195. "inspiration_points_extraction",
  196. self._check_workflow_status,
  197. {
  198. "continue": "purpose_point_extraction",
  199. "terminate": END
  200. }
  201. )
  202. workflow.add_conditional_edges(
  203. "purpose_point_extraction",
  204. self._check_workflow_status,
  205. {
  206. "continue": "key_points_extraction",
  207. "terminate": END
  208. }
  209. )
  210. workflow.add_conditional_edges(
  211. "key_points_extraction",
  212. self._check_workflow_status,
  213. {
  214. "continue": "topic_selection_understanding",
  215. "terminate": END
  216. }
  217. )
  218. workflow.add_conditional_edges(
  219. "topic_selection_understanding",
  220. self._check_workflow_status,
  221. {
  222. "continue": "section_division",
  223. "terminate": END
  224. }
  225. )
  226. # 脚本理解流程
  227. workflow.add_conditional_edges(
  228. "section_division",
  229. self._check_workflow_status,
  230. {
  231. "continue": "substance_extraction",
  232. "terminate": END
  233. }
  234. )
  235. workflow.add_conditional_edges(
  236. "substance_extraction",
  237. self._check_workflow_status,
  238. {
  239. "continue": "form_extraction",
  240. "terminate": END
  241. }
  242. )
  243. workflow.add_conditional_edges(
  244. "form_extraction",
  245. self._check_workflow_status,
  246. {
  247. "continue": "merge_all_results",
  248. "terminate": END
  249. }
  250. )
  251. workflow.add_conditional_edges(
  252. "merge_all_results",
  253. self._check_workflow_status,
  254. {
  255. "continue": "search_keywords_extraction",
  256. "terminate": END
  257. }
  258. )
  259. workflow.add_conditional_edges(
  260. "search_keywords_extraction",
  261. self._check_workflow_status,
  262. {
  263. "continue": "merge_branches",
  264. "terminate": END
  265. }
  266. )
  267. # 分支2:ScriptWorkflowV2 流程
  268. # 注意:分支2的节点在分支1的第一个节点中直接执行,不通过图的边连接
  269. # 这些节点保留在图中,但实际执行是在分支1的第一个节点中触发的
  270. # 合并节点后进入结果汇总
  271. workflow.add_edge("merge_branches", "result_aggregation")
  272. workflow.add_edge("result_aggregation", END)
  273. logger.info("工作流图构建完成 - 完整流程:初始化数据库记录 → 视频上传 → [并行分支] → 合并结果 → 结果汇总")
  274. logger.info(" 分支1:灵感点提取 → 目的点提取 → 关键点提取 → 选题理解 → 段落划分 → 实质提取 → 形式提取 → 分离结果 → 搜索关键词提取")
  275. logger.info(" 分支2:结构化分析 → L3单元拆分 → 整体理解 → 金句提取")
  276. return workflow
  277. def _check_video_upload_success(self, state: Dict[str, Any]) -> str:
  278. """检查视频上传是否成功
  279. Returns:
  280. "success" 如果上传成功,否则返回 "failure"
  281. """
  282. video_uri = state.get("video_uploaded_uri")
  283. video_error = state.get("video_upload_error")
  284. # 如果URI存在且没有错误,则认为成功
  285. if video_uri and not video_error:
  286. logger.info("视频上传成功,继续执行后续流程")
  287. return "success"
  288. else:
  289. error_msg = video_error or "视频上传失败:未获取到视频URI"
  290. logger.error(f"视频上传失败,终止workflow: {error_msg}")
  291. # 设置失败信息到状态中
  292. state["workflow_failed"] = True
  293. state["workflow_error"] = error_msg
  294. # 更新数据库记录为失败状态
  295. self._update_db_record_after_workflow(state, success=False, error_msg=error_msg)
  296. return "failure"
  297. def _check_critical_error(self, state: Dict[str, Any], error_source: str = "") -> bool:
  298. """检查关键错误,如果存在则设置失败标志
  299. Args:
  300. state: 状态字典
  301. error_source: 错误来源(用于日志)
  302. Returns:
  303. True 如果存在致命错误,False 否则
  304. """
  305. # 检查是否已经失败
  306. if state.get("workflow_failed"):
  307. return True
  308. # 检查视频文件是否可用
  309. from src.utils.llm_invoker import get_video_file_from_state
  310. video_file = get_video_file_from_state(state)
  311. if not video_file:
  312. error_msg = f"无法获取视频文件对象{('(' + error_source + ')' if error_source else '')}"
  313. logger.error(f"{error_msg},终止workflow")
  314. state["workflow_failed"] = True
  315. state["workflow_error"] = error_msg
  316. return True
  317. # 检查视频URI是否存在
  318. video_uri = state.get("video_uploaded_uri")
  319. if not video_uri:
  320. video_error = state.get("video_upload_error", "未知错误")
  321. error_msg = f"视频URI不存在{('(' + error_source + ')' if error_source else '')}:{video_error}"
  322. logger.error(f"{error_msg},终止workflow")
  323. state["workflow_failed"] = True
  324. state["workflow_error"] = error_msg
  325. return True
  326. return False
  327. def _check_agent_result_for_errors(self, result: Dict[str, Any], agent_name: str) -> bool:
  328. """检查Agent返回结果中是否包含关键错误
  329. Args:
  330. result: Agent返回的结果字典
  331. agent_name: Agent名称(用于日志)
  332. Returns:
  333. True 如果存在关键错误,False 否则
  334. """
  335. if not isinstance(result, dict):
  336. return False
  337. # 检查常见的错误字段
  338. error_fields = ["error", "错误", "video_upload_error"]
  339. for field in error_fields:
  340. error_value = result.get(field)
  341. if error_value:
  342. # 检查是否是关键错误(无法获取视频文件等)
  343. error_str = str(error_value)
  344. if "无法获取视频文件" in error_str or "无法获取视频文件对象" in error_str:
  345. logger.error(f"{agent_name}返回关键错误: {error_str}")
  346. return True
  347. # 检查metadata中的错误
  348. metadata = result.get("metadata", {})
  349. if isinstance(metadata, dict):
  350. error_value = metadata.get("error")
  351. if error_value:
  352. error_str = str(error_value)
  353. if "无法获取视频文件" in error_str or "无法获取视频文件对象" in error_str or "未找到视频URI" in error_str:
  354. logger.error(f"{agent_name}返回关键错误: {error_str}")
  355. return True
  356. return False
  357. def _check_workflow_status(self, state: Dict[str, Any]) -> str:
  358. """检查workflow状态,用于条件边
  359. Returns:
  360. "continue" 如果继续执行,否则返回 "terminate"
  361. """
  362. if state.get("workflow_failed"):
  363. return "terminate"
  364. return "continue"
  365. def _video_upload_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  366. """节点:视频上传(第一步)- 下载视频并上传至Gemini"""
  367. logger.info("=== 执行节点:视频上传 ===")
  368. try:
  369. # 初始化Function
  370. if not self.video_upload_func.is_initialized:
  371. self.video_upload_func.initialize()
  372. # 执行视频上传
  373. result = self.video_upload_func.execute(state)
  374. # 更新状态
  375. state.update(result)
  376. video_uri = result.get("video_uploaded_uri")
  377. if video_uri:
  378. logger.info(f"视频上传完成 - URI: {video_uri}")
  379. else:
  380. error = result.get("video_upload_error", "未知错误")
  381. logger.error(f"视频上传失败: {error}")
  382. # 确保失败信息被设置
  383. state.update({
  384. "video_uploaded_uri": None,
  385. "video_upload_error": error
  386. })
  387. except Exception as e:
  388. logger.error(f"视频上传失败: {e}", exc_info=True)
  389. state.update({
  390. "video_uploaded_uri": None,
  391. "video_upload_error": str(e)
  392. })
  393. return state
  394. def _init_db_record_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  395. """节点:初始化数据库记录
  396. 根据 video_id 查询 decode_videos 表:
  397. - 如果存在记录,则不新建,使用现有记录
  398. - 如果不存在,则新建记录(状态为 EXECUTING)
  399. """
  400. logger.info("=== 执行节点:初始化数据库记录 ===")
  401. try:
  402. video_id = state.get("video_id", "")
  403. task_id = state.get("task_id")
  404. if not video_id:
  405. logger.warning("未提供 video_id,跳过数据库记录初始化")
  406. return state
  407. db = next(get_db())
  408. try:
  409. # 根据 video_id 查询是否已有记录
  410. existing_record = db.query(DecodeVideo).filter_by(video_id=video_id).first()
  411. if existing_record:
  412. # 如果存在记录,使用现有的 task_id
  413. logger.info(f"找到已存在的数据库记录: task_id={existing_record.task_id}, video_id={video_id}")
  414. state["db_task_id"] = existing_record.task_id
  415. state["db_record_exists"] = True
  416. # 更新状态为执行中
  417. existing_record.update_status(DecodeStatus.EXECUTING)
  418. db.commit()
  419. else:
  420. # 如果不存在,创建新记录
  421. # 如果没有提供 task_id,使用 video_id 的 hash 值作为 task_id
  422. if not task_id:
  423. import hashlib
  424. task_id = int(hashlib.md5(video_id.encode()).hexdigest()[:15], 16) % (10 ** 15)
  425. logger.info(f"未提供 task_id,自动生成: {task_id}")
  426. new_record = DecodeVideo.create(
  427. task_id=task_id,
  428. video_id=video_id,
  429. status=DecodeStatus.EXECUTING
  430. )
  431. db.add(new_record)
  432. db.commit()
  433. logger.info(f"创建新的数据库记录: task_id={task_id}, video_id={video_id}")
  434. state["db_task_id"] = task_id
  435. state["db_record_exists"] = False
  436. except Exception as e:
  437. logger.error(f"数据库操作失败: {e}", exc_info=True)
  438. db.rollback()
  439. # 数据库操作失败不影响 workflow 继续执行
  440. finally:
  441. db.close()
  442. except Exception as e:
  443. logger.error(f"初始化数据库记录节点执行失败: {e}", exc_info=True)
  444. # 数据库操作失败不影响 workflow 继续执行
  445. return state
  446. def _fork_branches_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  447. """节点:分叉节点 - 启动两个并行分支
  448. 注意:由于 LangGraph 的限制,我们使用一个技巧:
  449. 分叉节点连接到分支1,分支1的第一个节点会同时启动分支2
  450. """
  451. logger.info("=== 执行节点:分叉节点(启动并行分支) ===")
  452. # 标记需要启动分支2
  453. state["start_branch2"] = True
  454. return state
  455. def _inspiration_points_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  456. """节点:灵感点提取(What解构)
  457. 同时启动分支2(如果还没启动)
  458. """
  459. logger.info("=== 执行节点:灵感点提取 ===")
  460. # 检查是否需要启动分支2
  461. if state.get("start_branch2") and not state.get("branch2_started"):
  462. logger.info("在分支1的第一个节点中启动分支2(ScriptWorkflowV2流程)")
  463. state["branch2_started"] = True
  464. # 启动分支2:执行结构化分析
  465. try:
  466. if not self.structure_agent.is_initialized:
  467. self.structure_agent.initialize()
  468. structure_result = self.structure_agent.process(state)
  469. state["topic"] = structure_result.get("structure_data", {})
  470. # 继续执行分支2的后续节点
  471. if not self.content_unit_split_agent.is_initialized:
  472. self.content_unit_split_agent.initialize()
  473. split_result = self.content_unit_split_agent.process(state)
  474. state.update(split_result)
  475. if not self.content_unit_understand_agent.is_initialized:
  476. self.content_unit_understand_agent.initialize()
  477. understand_result = self.content_unit_understand_agent.process(state)
  478. state.update(understand_result)
  479. if not self.script_keyword_agent.is_initialized:
  480. self.script_keyword_agent.initialize()
  481. keyword_result = self.script_keyword_agent.process(state)
  482. state.update(keyword_result)
  483. state["branch2_completed"] = True
  484. logger.info("分支2(ScriptWorkflowV2流程)执行完成")
  485. except Exception as e:
  486. logger.error(f"分支2执行失败: {e}", exc_info=True)
  487. state["branch2_completed"] = True # 即使失败也标记为完成,避免阻塞
  488. # 检查关键错误
  489. if self._check_critical_error(state, "灵感点提取"):
  490. return state
  491. try:
  492. # 初始化Agent
  493. if not self.inspiration_points_agent.is_initialized:
  494. self.inspiration_points_agent.initialize()
  495. # 执行Agent
  496. result = self.inspiration_points_agent.process(state)
  497. # 更新状态
  498. state.update(result)
  499. # 检查Agent返回结果中是否包含关键错误
  500. if self._check_agent_result_for_errors(result, "灵感点提取Agent"):
  501. error_msg = "灵感点提取失败:无法获取视频文件"
  502. state["workflow_failed"] = True
  503. state["workflow_error"] = error_msg
  504. return state
  505. # 安全地获取灵感点数量:total_count 在 metadata 中
  506. if isinstance(result, dict):
  507. metadata = result.get("metadata", {})
  508. inspiration_count = metadata.get("total_count", 0) if isinstance(metadata, dict) else 0
  509. # 如果 metadata 中没有,尝试从 inspiration_points 列表长度获取
  510. if inspiration_count == 0:
  511. inspiration_points = result.get("inspiration_points", [])
  512. if isinstance(inspiration_points, list):
  513. inspiration_count = len(inspiration_points)
  514. else:
  515. # 如果 result 不是 dict(比如是列表),尝试获取长度
  516. inspiration_count = len(result) if isinstance(result, list) else 0
  517. logger.info(f"灵感点提取完成 - 共 {inspiration_count} 个灵感点")
  518. except Exception as e:
  519. logger.error(f"灵感点提取失败: {e}", exc_info=True)
  520. state["workflow_failed"] = True
  521. state["workflow_error"] = f"灵感点提取异常: {str(e)}"
  522. state.update({
  523. "inspiration_points": {
  524. "error": str(e),
  525. "points": [],
  526. "total_count": 0
  527. }
  528. })
  529. return state
  530. def _purpose_point_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  531. """节点:目的点提取(What解构)"""
  532. logger.info("=== 执行节点:目的点提取 ===")
  533. # 检查关键错误
  534. if self._check_critical_error(state, "目的点提取"):
  535. return state
  536. try:
  537. # 初始化Agent
  538. if not self.purpose_point_agent.is_initialized:
  539. self.purpose_point_agent.initialize()
  540. # 执行Agent
  541. result = self.purpose_point_agent.process(state)
  542. # 更新状态
  543. state.update(result)
  544. # 检查Agent返回结果中是否包含关键错误
  545. if self._check_agent_result_for_errors(result, "目的点提取Agent"):
  546. error_msg = "目的点提取失败:无法获取视频文件"
  547. state["workflow_failed"] = True
  548. state["workflow_error"] = error_msg
  549. return state
  550. main_purpose = result.get("purpose_point", {}).get("main_purpose", "未知")
  551. logger.info(f"目的点提取完成 - 主要目的: {main_purpose}")
  552. except Exception as e:
  553. logger.error(f"目的点提取失败: {e}", exc_info=True)
  554. state["workflow_failed"] = True
  555. state["workflow_error"] = f"目的点提取异常: {str(e)}"
  556. state.update({
  557. "purpose_point": {
  558. "error": str(e),
  559. "main_purpose": "未知"
  560. }
  561. })
  562. return state
  563. def _key_points_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  564. """节点:关键点提取(What解构)"""
  565. logger.info("=== 执行节点:关键点提取 ===")
  566. # 检查关键错误
  567. if self._check_critical_error(state, "关键点提取"):
  568. return state
  569. try:
  570. # 初始化Agent
  571. if not self.key_points_agent.is_initialized:
  572. self.key_points_agent.initialize()
  573. # 执行Agent
  574. result = self.key_points_agent.process(state)
  575. # 更新状态
  576. state.update(result)
  577. # 检查Agent返回结果中是否包含关键错误
  578. if self._check_agent_result_for_errors(result, "关键点提取Agent"):
  579. error_msg = "关键点提取失败:无法获取视频文件"
  580. state["workflow_failed"] = True
  581. state["workflow_error"] = error_msg
  582. return state
  583. total_key_points = result.get("key_points", {}).get("total_count", 0)
  584. logger.info(f"关键点提取完成 - 共 {total_key_points} 个关键点")
  585. except Exception as e:
  586. logger.error(f"关键点提取失败: {e}", exc_info=True)
  587. state["workflow_failed"] = True
  588. state["workflow_error"] = f"关键点提取异常: {str(e)}"
  589. state.update({
  590. "key_points": {
  591. "error": str(e),
  592. "creator_perspective": {"key_points": [], "summary": ""},
  593. "consumer_perspective": {"key_points": [], "summary": ""}
  594. }
  595. })
  596. return state
  597. def _topic_selection_understanding_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  598. """节点:选题理解(What解构)"""
  599. logger.info("=== 执行节点:选题理解 ===")
  600. # 检查关键错误
  601. if self._check_critical_error(state, "选题理解"):
  602. return state
  603. try:
  604. # 初始化Agent
  605. if not self.topic_selection_understanding_agent.is_initialized:
  606. self.topic_selection_understanding_agent.initialize()
  607. # 执行Agent
  608. result = self.topic_selection_understanding_agent.process(state)
  609. # 更新状态
  610. state.update(result)
  611. # 检查Agent返回结果中是否包含关键错误
  612. if self._check_agent_result_for_errors(result, "选题理解Agent"):
  613. error_msg = "选题理解失败:无法获取视频文件"
  614. state["workflow_failed"] = True
  615. state["workflow_error"] = error_msg
  616. return state
  617. logger.info(f"选题理解完成 - result: {result}")
  618. except Exception as e:
  619. logger.error(f"选题理解失败: {e}", exc_info=True)
  620. state["workflow_failed"] = True
  621. state["workflow_error"] = f"选题理解异常: {str(e)}"
  622. state.update({
  623. "topic_selection_understanding": {
  624. "错误": str(e)
  625. }
  626. })
  627. return state
  628. def _section_division_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  629. """节点:脚本段落划分(脚本理解)"""
  630. logger.info("=== 执行节点:脚本段落划分 ===")
  631. # 检查关键错误
  632. if self._check_critical_error(state, "脚本段落划分"):
  633. return state
  634. try:
  635. # 初始化Agent
  636. if not self.section_agent.is_initialized:
  637. self.section_agent.initialize()
  638. # 执行Agent
  639. result = self.section_agent.process(state)
  640. # 更新状态
  641. state.update(result)
  642. # 检查Agent返回结果中是否包含关键错误(段落划分如果没有视频文件会返回空结果,不算致命错误)
  643. sections = result.get("段落列表", [])
  644. content_category = result.get("内容品类", "未知")
  645. logger.info(f"脚本段落划分完成 - 内容品类: {content_category}, 段落数: {len(sections)}")
  646. except Exception as e:
  647. logger.error(f"脚本段落划分失败: {e}", exc_info=True)
  648. state["workflow_failed"] = True
  649. state["workflow_error"] = f"脚本段落划分异常: {str(e)}"
  650. state.update({
  651. "内容品类": "未知品类",
  652. "段落列表": []
  653. })
  654. return state
  655. def _substance_extraction_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  656. """节点:实质元素提取(脚本理解)"""
  657. logger.info("=== 执行节点:实质元素提取 ===")
  658. # 检查关键错误
  659. if self._check_critical_error(state, "实质元素提取"):
  660. return state
  661. try:
  662. # 初始化Agent
  663. if not self.substance_agent.is_initialized:
  664. self.substance_agent.initialize()
  665. # 准备状态:将段落列表包装到section_division字段中
  666. sections = state.get("段落列表", [])
  667. state["section_division"] = {"段落列表": sections}
  668. # 验证三点解构信息是否可用
  669. inspiration_points = state.get("inspiration_points", [])
  670. purpose_point = state.get("purpose_point", {})
  671. key_points = state.get("key_points", {})
  672. # 统计三点解构信息
  673. inspiration_count = len(inspiration_points) if isinstance(inspiration_points, list) else 0
  674. if not inspiration_count and isinstance(inspiration_points, dict):
  675. inspiration_count = len(inspiration_points.get("points", []))
  676. purpose_count = 0
  677. if isinstance(purpose_point, dict):
  678. purpose_count = len(purpose_point.get("purposes", []))
  679. elif isinstance(purpose_point, list):
  680. purpose_count = len(purpose_point)
  681. key_points_count = 0
  682. if isinstance(key_points, dict):
  683. key_points_list = key_points.get("key_points", [])
  684. key_points_count = len(key_points_list) if isinstance(key_points_list, list) else 0
  685. elif isinstance(key_points, list):
  686. key_points_count = len(key_points)
  687. logger.info(
  688. f"实质提取节点 - 三点解构信息检查: "
  689. f"灵感点={inspiration_count}, 目的点={purpose_count}, 关键点={key_points_count}"
  690. )
  691. # 执行Agent
  692. result = self.substance_agent.process(state)
  693. # 更新状态
  694. state.update(result)
  695. # 检查Agent返回结果中是否包含关键错误
  696. if self._check_agent_result_for_errors(result, "实质元素提取Agent"):
  697. error_msg = "实质元素提取失败:无法获取视频文件"
  698. state["workflow_failed"] = True
  699. state["workflow_error"] = error_msg
  700. return state
  701. final_elements = result.get("substance_final_elements", [])
  702. logger.info(f"实质元素提取完成 - 最终元素数: {len(final_elements)}")
  703. except Exception as e:
  704. logger.error(f"实质元素提取失败: {e}", exc_info=True)
  705. state["workflow_failed"] = True
  706. state["workflow_error"] = f"实质元素提取异常: {str(e)}"
  707. state.update({
  708. "concrete_elements": [],
  709. "concrete_concepts": [],
  710. "abstract_concepts": [],
  711. "substance_elements": [],
  712. "substance_analyzed_result": [],
  713. "substance_scored_result": {},
  714. "substance_filtered_ids": [],
  715. "substance_categorized_result": {},
  716. "substance_final_elements": []
  717. })
  718. return state
  719. def _form_extraction_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  720. """节点:形式元素提取(脚本理解)"""
  721. logger.info("=== 执行节点:形式元素提取 ===")
  722. # 检查关键错误
  723. if self._check_critical_error(state, "形式元素提取"):
  724. return state
  725. try:
  726. # 初始化Agent
  727. if not self.form_agent.is_initialized:
  728. self.form_agent.initialize()
  729. # 验证三点解构信息是否可用
  730. inspiration_points = state.get("inspiration_points", [])
  731. purpose_point = state.get("purpose_point", {})
  732. key_points = state.get("key_points", {})
  733. # 统计三点解构信息
  734. inspiration_count = len(inspiration_points) if isinstance(inspiration_points, list) else 0
  735. if not inspiration_count and isinstance(inspiration_points, dict):
  736. inspiration_count = len(inspiration_points.get("points", []))
  737. purpose_count = 0
  738. if isinstance(purpose_point, dict):
  739. purpose_count = len(purpose_point.get("purposes", []))
  740. elif isinstance(purpose_point, list):
  741. purpose_count = len(purpose_point)
  742. key_points_count = 0
  743. if isinstance(key_points, dict):
  744. key_points_list = key_points.get("key_points", [])
  745. key_points_count = len(key_points_list) if isinstance(key_points_list, list) else 0
  746. elif isinstance(key_points, list):
  747. key_points_count = len(key_points)
  748. logger.info(
  749. f"形式提取节点 - 三点解构信息检查: "
  750. f"灵感点={inspiration_count}, 目的点={purpose_count}, 关键点={key_points_count}"
  751. )
  752. # 执行Agent(依赖实质元素)
  753. result = self.form_agent.process(state)
  754. # 更新状态
  755. state.update(result)
  756. # 检查Agent返回结果中是否包含关键错误
  757. if self._check_agent_result_for_errors(result, "形式元素提取Agent"):
  758. error_msg = "形式元素提取失败:无法获取视频文件"
  759. state["workflow_failed"] = True
  760. state["workflow_error"] = error_msg
  761. return state
  762. final_elements = result.get("form_final_elements", [])
  763. logger.info(f"形式元素提取完成 - 最终元素数: {len(final_elements)}")
  764. except Exception as e:
  765. logger.error(f"形式元素提取失败: {e}", exc_info=True)
  766. state["workflow_failed"] = True
  767. state["workflow_error"] = f"形式元素提取异常: {str(e)}"
  768. state.update({
  769. "concrete_element_forms": [],
  770. "concrete_concept_forms": [],
  771. "overall_forms": [],
  772. "form_elements": [],
  773. "form_analyzed_result": [],
  774. "form_scored_result": {},
  775. "form_weighted_result": {},
  776. "form_filtered_ids": [],
  777. "form_categorized_result": {},
  778. "form_final_elements": []
  779. })
  780. return state
  781. def _merge_all_results_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  782. """节点:分离实质和形式结果(脚本理解)"""
  783. logger.info("=== 执行节点:分离实质和形式结果 ===")
  784. try:
  785. # 获取实质和形式的最终元素
  786. substance_final_elements = state.get("substance_final_elements", [])
  787. form_final_elements = state.get("form_final_elements", [])
  788. # 分别存储实质列表和形式列表
  789. state["实质列表"] = substance_final_elements
  790. state["形式列表"] = form_final_elements
  791. logger.info(f"分离完成 - 实质元素: {len(substance_final_elements)}, 形式元素: {len(form_final_elements)}")
  792. except Exception as e:
  793. logger.error(f"分离结果失败: {e}", exc_info=True)
  794. state["实质列表"] = []
  795. state["形式列表"] = []
  796. return state
  797. def _search_keywords_extraction_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  798. """节点:搜索关键词提取(分支1的最后一步)"""
  799. logger.info("=== 执行节点:搜索关键词提取 ===")
  800. # 检查关键错误
  801. if self._check_critical_error(state, "搜索关键词提取"):
  802. return state
  803. try:
  804. # 初始化Agent
  805. if not self.search_keyword_agent.is_initialized:
  806. self.search_keyword_agent.initialize()
  807. # 执行Agent(带重试机制)
  808. max_retries = 2
  809. result = None
  810. last_error = None
  811. def _is_result_failed(result: Dict[str, Any]) -> tuple[bool, str]:
  812. """检查结果是否失败
  813. Returns:
  814. (是否失败, 错误信息)
  815. """
  816. if not result:
  817. return True, "结果为空"
  818. # 检查关键错误(如无法获取视频文件)
  819. if self._check_agent_result_for_errors(result, "搜索关键词提取Agent"):
  820. return True, "搜索关键词提取失败:无法获取视频文件"
  821. # 检查search_keywords中的错误字段
  822. search_keywords = result.get("search_keywords", {})
  823. if isinstance(search_keywords, dict):
  824. # 检查是否有错误字段
  825. if "错误" in search_keywords or "error" in search_keywords:
  826. error_msg = search_keywords.get("错误") or search_keywords.get("error", "未知错误")
  827. return True, f"搜索关键词提取失败:{error_msg}"
  828. # 检查搜索词数量是否为0(可能是解析失败的表现)
  829. keyword_count = search_keywords.get("总数", 0)
  830. if keyword_count == 0:
  831. # 如果总数为0,可能是解析失败,需要重试
  832. return True, "搜索关键词提取失败:未提取到搜索词(可能为解析失败)"
  833. return False, ""
  834. for attempt in range(max_retries):
  835. try:
  836. # 执行Agent
  837. result = self.search_keyword_agent.process(state)
  838. # 检查结果是否失败
  839. is_failed, error_msg = _is_result_failed(result)
  840. if is_failed:
  841. if attempt < max_retries - 1:
  842. logger.warning(f"搜索关键词提取失败,准备重试 (尝试 {attempt + 1}/{max_retries}): {error_msg}")
  843. last_error = error_msg
  844. continue
  845. else:
  846. # 最后一次尝试也失败
  847. state["workflow_failed"] = True
  848. state["workflow_error"] = error_msg
  849. return state
  850. else:
  851. # 成功,跳出重试循环
  852. break
  853. except Exception as e:
  854. if attempt < max_retries - 1:
  855. logger.warning(f"搜索关键词提取异常,准备重试 (尝试 {attempt + 1}/{max_retries}): {e}")
  856. last_error = str(e)
  857. continue
  858. else:
  859. # 最后一次尝试也失败,抛出异常让外层catch处理
  860. raise
  861. # 更新状态
  862. if result:
  863. state.update(result)
  864. else:
  865. # 如果result为None,使用last_error
  866. error_msg = last_error or "搜索关键词提取失败:未知错误"
  867. state["workflow_failed"] = True
  868. state["workflow_error"] = error_msg
  869. return state
  870. # 再次检查(双重保险)
  871. if self._check_agent_result_for_errors(result, "搜索关键词提取Agent"):
  872. error_msg = "搜索关键词提取失败:无法获取视频文件"
  873. state["workflow_failed"] = True
  874. state["workflow_error"] = error_msg
  875. return state
  876. # 获取搜索关键词数量
  877. search_keywords = result.get("search_keywords", {})
  878. keyword_count = search_keywords.get("总数", 0) if isinstance(search_keywords, dict) else 0
  879. logger.info(f"搜索关键词提取完成 - 共 {keyword_count} 个搜索词")
  880. # 标记分支1完成
  881. state["branch1_completed"] = True
  882. except Exception as e:
  883. logger.error(f"搜索关键词提取失败: {e}", exc_info=True)
  884. state["workflow_failed"] = True
  885. state["workflow_error"] = f"搜索关键词提取异常: {str(e)}"
  886. state.update({
  887. "search_keywords": {
  888. "搜索词列表": [],
  889. "总数": 0,
  890. "error": str(e)
  891. }
  892. })
  893. state["branch1_completed"] = True # 即使失败也标记为完成,避免阻塞
  894. return state
  895. def _structure_analysis_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  896. """节点:结构化内容库分析(ScriptWorkflowV2分支)"""
  897. logger.info("=== 执行节点:结构化内容库分析(分支2) ===")
  898. # 检查关键错误
  899. if self._check_critical_error(state, "结构化内容库分析"):
  900. return state
  901. try:
  902. if not self.structure_agent.is_initialized:
  903. self.structure_agent.initialize()
  904. result = self.structure_agent.process(state)
  905. # 将结果存入 state 的 topic 字段
  906. structure_data = result.get("structure_data", {})
  907. state["topic"] = structure_data
  908. logger.info("结构化内容库分析完成")
  909. if isinstance(structure_data, dict):
  910. topic_info = structure_data.get("选题信息表", {})
  911. macro_topic = topic_info.get("宏观母题", "") if isinstance(topic_info, dict) else ""
  912. if macro_topic:
  913. logger.info(f"宏观母题: {macro_topic}")
  914. except Exception as e:
  915. logger.error(f"结构化内容库分析失败: {e}", exc_info=True)
  916. state["topic"] = {
  917. "错误": str(e),
  918. }
  919. return state
  920. def _content_unit_split_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  921. """节点:L3 内容单元拆分(ScriptWorkflowV2分支)"""
  922. logger.info("=== 执行节点:L3 内容单元拆分(分支2) ===")
  923. # 检查关键错误
  924. if self._check_critical_error(state, "L3 内容单元拆分"):
  925. return state
  926. try:
  927. if not self.content_unit_split_agent.is_initialized:
  928. self.content_unit_split_agent.initialize()
  929. result = self.content_unit_split_agent.process(state)
  930. state.update(result)
  931. analysis = result.get("content_unit_analysis", {})
  932. logger.info(
  933. f"L3 单元拆分完成,单元数量: {len(analysis.get('单元列表', [])) if isinstance(analysis, dict) else 0}"
  934. )
  935. except Exception as e:
  936. logger.error(f"L3 内容单元拆分失败: {e}", exc_info=True)
  937. state["content_unit_analysis"] = {
  938. "error": str(e),
  939. "单元列表": [],
  940. }
  941. return state
  942. def _content_unit_understand_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  943. """节点:整体结构理解(ScriptWorkflowV2分支)"""
  944. logger.info("=== 执行节点:整体结构理解(分支2) ===")
  945. # 检查关键错误
  946. if self._check_critical_error(state, "整体结构理解"):
  947. return state
  948. try:
  949. if not self.content_unit_understand_agent.is_initialized:
  950. self.content_unit_understand_agent.initialize()
  951. result = self.content_unit_understand_agent.process(state)
  952. state.update(result)
  953. understanding = result.get("content_unit_understanding", {})
  954. logger.info(
  955. f"整体结构理解完成,段落数量: {len(understanding.get('段落解构', [])) if isinstance(understanding, dict) else 0}"
  956. )
  957. except Exception as e:
  958. logger.error(f"整体结构理解失败: {e}", exc_info=True)
  959. state["content_unit_understanding"] = {
  960. "error": str(e),
  961. "整体解构": {},
  962. "段落解构": [],
  963. "单元解构": {},
  964. }
  965. return state
  966. def _keyword_extraction_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  967. """节点:金句提取(ScriptWorkflowV2分支)"""
  968. logger.info("=== 执行节点:金句提取(分支2) ===")
  969. # 检查关键错误
  970. if self._check_critical_error(state, "金句提取"):
  971. return state
  972. try:
  973. if not self.script_keyword_agent.is_initialized:
  974. self.script_keyword_agent.initialize()
  975. result = self.script_keyword_agent.process(state)
  976. state.update(result)
  977. script_keywords = result.get("script_keywords", {})
  978. logger.info("金句提取完成")
  979. if isinstance(script_keywords, dict):
  980. hooks_count = len(script_keywords.get("hooks", []))
  981. golden_sentences_count = len(script_keywords.get("golden_sentences", []))
  982. logger.info(f"提取钩子数量: {hooks_count}, 金句数量: {golden_sentences_count}")
  983. # 标记分支2完成
  984. state["branch2_completed"] = True
  985. except Exception as e:
  986. logger.error(f"金句提取失败: {e}", exc_info=True)
  987. state["script_keywords"] = {
  988. "error": str(e),
  989. }
  990. state["branch2_completed"] = True # 即使失败也标记为完成,避免阻塞
  991. return state
  992. def _merge_branches_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  993. """节点:合并两个分支的结果
  994. 检查分支2是否完成,如果没完成则执行它
  995. """
  996. logger.info("=== 执行节点:合并分支结果 ===")
  997. try:
  998. # 检查分支2是否完成
  999. branch2_completed = state.get("branch2_completed", False)
  1000. if not branch2_completed:
  1001. logger.info("分支2还未完成,现在执行分支2(ScriptWorkflowV2流程)")
  1002. try:
  1003. # 执行分支2的所有节点
  1004. if not self.structure_agent.is_initialized:
  1005. self.structure_agent.initialize()
  1006. structure_result = self.structure_agent.process(state)
  1007. state["topic"] = structure_result.get("structure_data", {})
  1008. if not self.content_unit_split_agent.is_initialized:
  1009. self.content_unit_split_agent.initialize()
  1010. split_result = self.content_unit_split_agent.process(state)
  1011. state.update(split_result)
  1012. if not self.content_unit_understand_agent.is_initialized:
  1013. self.content_unit_understand_agent.initialize()
  1014. understand_result = self.content_unit_understand_agent.process(state)
  1015. state.update(understand_result)
  1016. if not self.script_keyword_agent.is_initialized:
  1017. self.script_keyword_agent.initialize()
  1018. keyword_result = self.script_keyword_agent.process(state)
  1019. state.update(keyword_result)
  1020. state["branch2_completed"] = True
  1021. logger.info("分支2(ScriptWorkflowV2流程)执行完成")
  1022. except Exception as e:
  1023. logger.error(f"分支2执行失败: {e}", exc_info=True)
  1024. state["branch2_completed"] = True # 即使失败也标记为完成,避免阻塞
  1025. else:
  1026. logger.info("分支2已完成,直接合并结果")
  1027. # 标记已合并
  1028. state["branches_merged"] = True
  1029. logger.info("分支结果合并完成,准备进入结果汇总")
  1030. except Exception as e:
  1031. logger.error(f"合并分支结果失败: {e}", exc_info=True)
  1032. state["branches_merged"] = True # 即使失败也标记为已合并,避免阻塞
  1033. return state
  1034. def _result_aggregation_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
  1035. """节点:结果汇总(包含两个分支的结果)"""
  1036. logger.info("=== 执行节点:结果汇总 ===")
  1037. try:
  1038. # 初始化Function
  1039. if not self.result_aggregation_func.is_initialized:
  1040. self.result_aggregation_func.initialize()
  1041. # 执行Function(获取分支1的结果)
  1042. final_result = self.result_aggregation_func.execute(state)
  1043. # 添加分支2的结果(ScriptWorkflowV2的结果)
  1044. topic = state.get("topic", {})
  1045. content_unit_analysis = state.get("content_unit_analysis", {})
  1046. content_unit_understanding = state.get("content_unit_understanding", {})
  1047. script_keywords = state.get("script_keywords", {})
  1048. # 将分支2的结果添加到最终结果中
  1049. final_result["脚本解构V2"] = {
  1050. "结构化内容库": topic,
  1051. "L3单元解构": content_unit_analysis,
  1052. "整体结构理解": content_unit_understanding,
  1053. "金句提取": script_keywords,
  1054. }
  1055. # 更新状态
  1056. state["final_result"] = final_result
  1057. logger.info("结果汇总完成(包含两个分支的结果)")
  1058. except Exception as e:
  1059. logger.error(f"结果汇总失败: {e}", exc_info=True)
  1060. state["final_result"] = {
  1061. "视频信息": {},
  1062. "三点解构": {
  1063. "灵感点": [],
  1064. "目的点": {},
  1065. "关键点": {}
  1066. },
  1067. "选题理解": {},
  1068. "脚本理解": {
  1069. "内容品类": "未知",
  1070. "段落列表": [],
  1071. "实质列表": [],
  1072. "形式列表": []
  1073. },
  1074. "脚本解构V2": {
  1075. "结构化内容库": {},
  1076. "L3单元解构": {},
  1077. "整体结构理解": {},
  1078. "金句提取": {}
  1079. },
  1080. "错误": f"汇总失败: {str(e)}"
  1081. }
  1082. return state
  1083. def _update_db_record_after_workflow(
  1084. self,
  1085. state: Dict[str, Any],
  1086. success: bool,
  1087. final_result: Dict[str, Any] = None,
  1088. error_msg: str = None
  1089. ):
  1090. """工作流执行完毕后更新数据库记录
  1091. Args:
  1092. state: 工作流执行后的状态
  1093. success: 是否成功
  1094. final_result: 最终结果(成功时使用)
  1095. error_msg: 错误信息(失败时使用)
  1096. """
  1097. try:
  1098. task_id = state.get("db_task_id")
  1099. if not task_id:
  1100. logger.warning("未找到 db_task_id,跳过数据库记录更新")
  1101. return
  1102. db = next(get_db())
  1103. try:
  1104. record = db.query(DecodeVideo).filter_by(task_id=task_id).first()
  1105. if not record:
  1106. logger.warning(f"未找到 task_id={task_id} 的数据库记录,跳过更新")
  1107. return
  1108. if success:
  1109. # 更新为成功状态
  1110. import json
  1111. result_json = json.dumps(final_result, ensure_ascii=False) if final_result else None
  1112. record.update_status(DecodeStatus.SUCCESS)
  1113. record.update_result(result_json)
  1114. # 提取分支2的结果(脚本解构V2)并存储到 decode_result_v2 字段
  1115. if final_result and isinstance(final_result, dict):
  1116. script_v2_result = final_result.get("脚本解构V2")
  1117. if script_v2_result:
  1118. result_v2_json = json.dumps(script_v2_result, ensure_ascii=False)
  1119. record.update_result_v2(result_v2_json)
  1120. logger.info(f"更新数据库记录的分支2结果: task_id={task_id}")
  1121. # 提取搜索关键词并存储到 search_keywords 字段(字符串数组的JSON格式)
  1122. search_keywords_data = state.get("search_keywords", {})
  1123. if search_keywords_data and isinstance(search_keywords_data, dict):
  1124. keyword_list = search_keywords_data.get("搜索词列表", [])
  1125. if keyword_list:
  1126. # 提取每个搜索词的"搜索词"字段,组成字符串数组
  1127. keyword_strings = []
  1128. for item in keyword_list:
  1129. if isinstance(item, dict):
  1130. keyword = item.get("搜索词", "")
  1131. if keyword:
  1132. keyword_strings.append(keyword)
  1133. # 转换为JSON字符串数组格式
  1134. if keyword_strings:
  1135. keywords_json = json.dumps(keyword_strings, ensure_ascii=False)
  1136. record.update_search_keywords(keywords_json)
  1137. logger.info(f"更新数据库记录的搜索关键词: task_id={task_id}, 关键词数量={len(keyword_strings)}")
  1138. logger.info(f"更新数据库记录为成功: task_id={task_id}")
  1139. else:
  1140. # 更新为失败状态
  1141. record.update_status(DecodeStatus.FAILED, error_reason=error_msg)
  1142. logger.info(f"更新数据库记录为失败: task_id={task_id}, error={error_msg}")
  1143. db.commit()
  1144. except Exception as e:
  1145. logger.error(f"更新数据库记录失败: {e}", exc_info=True)
  1146. db.rollback()
  1147. finally:
  1148. db.close()
  1149. except Exception as e:
  1150. logger.error(f"更新数据库记录节点执行失败: {e}", exc_info=True)
  1151. def invoke(self, input_data: Dict[str, Any]) -> Dict[str, Any]:
  1152. """执行工作流(公共接口)- 视频分析版本
  1153. Returns:
  1154. 最终解码结果
  1155. """
  1156. logger.info("=== 开始执行解码工作流(视频分析) ===")
  1157. logger.info(f"input_data: {input_data}")
  1158. # 确保工作流已初始化
  1159. if not self.is_initialized:
  1160. self.initialize()
  1161. # 验证输入参数
  1162. video_url = input_data.get("video_url", "")
  1163. if not video_url:
  1164. error_msg = "未提供视频URL,无法执行工作流"
  1165. logger.error(error_msg)
  1166. return {
  1167. "error": error_msg,
  1168. "workflow_status": "failed",
  1169. "input_data": input_data
  1170. }
  1171. # 初始化状态(包含视频信息,供视频上传和后续Agent使用)
  1172. initial_state = {
  1173. "video": video_url,
  1174. "video_id": input_data.get("video_id", ""),
  1175. "title": input_data.get("title", ""),
  1176. "current_depth": 0,
  1177. "max_depth": self.max_depth,
  1178. "task_id": input_data.get("task_id", ""),
  1179. }
  1180. # 执行工作流
  1181. result = None
  1182. try:
  1183. result = self.compiled_graph.invoke(initial_state)
  1184. except Exception as e:
  1185. error_msg = f"工作流执行异常: {str(e)}"
  1186. logger.error(error_msg, exc_info=True)
  1187. # 更新数据库记录为失败状态(使用 initial_state 作为 fallback)
  1188. if result is None:
  1189. result = initial_state
  1190. self._update_db_record_after_workflow(result, success=False, error_msg=error_msg)
  1191. return {
  1192. "status": 3,
  1193. "error": error_msg,
  1194. "workflow_status": "failed",
  1195. "exception_type": type(e).__name__
  1196. }
  1197. # 检查是否因为错误而终止
  1198. if result.get("workflow_failed"):
  1199. error_msg = result.get("workflow_error", "工作流执行失败")
  1200. logger.error(f"工作流因错误而终止: {error_msg}")
  1201. # 更新数据库记录为失败状态
  1202. self._update_db_record_after_workflow(result, success=False, error_msg=error_msg)
  1203. return {
  1204. "error": error_msg,
  1205. "video_upload_error": result.get("video_upload_error"),
  1206. "workflow_status": "failed",
  1207. "failed_at": result.get("failed_at", "unknown"),
  1208. "status": 3
  1209. }
  1210. # 检查是否有最终结果
  1211. final_result = result.get("final_result")
  1212. if not final_result:
  1213. # 如果没有最终结果,检查是否有错误信息
  1214. if result.get("workflow_error"):
  1215. error_msg = result.get("workflow_error", "工作流执行失败,未生成结果")
  1216. logger.error(f"工作流执行失败: {error_msg}")
  1217. # 更新数据库记录为失败状态
  1218. self._update_db_record_after_workflow(result, success=False, error_msg=error_msg)
  1219. return {
  1220. "status": 3,
  1221. "error": error_msg,
  1222. "workflow_status": "failed"
  1223. }
  1224. else:
  1225. logger.warning("工作流执行完成,但未生成最终结果")
  1226. # 更新数据库记录为失败状态
  1227. self._update_db_record_after_workflow(result, success=False, error_msg="工作流执行完成,但未生成最终结果")
  1228. return {
  1229. "status": 3,
  1230. "error": "工作流执行完成,但未生成最终结果",
  1231. "workflow_status": "incomplete",
  1232. "state": result
  1233. }
  1234. # 工作流执行成功,更新数据库记录
  1235. self._update_db_record_after_workflow(result, success=True, final_result=final_result)
  1236. logger.info("=== 解码工作流执行完成(视频分析) ===")
  1237. # 添加status字段(2表示成功)
  1238. if isinstance(final_result, dict):
  1239. final_result["status"] = 2
  1240. return final_result