script_substance_extraction_agent.py 60 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. 实质提取Agent (SubstanceExtractionAgent)
  5. 功能:
  6. - 从视频内容中提取实质元素(具体元素、具象概念、抽象概念)
  7. - Step 1: 提取具体元素(只看视频画面中的实体)
  8. - Step 2: 提取具象概念(只看画面中的文字 + 口播内容中的文字)
  9. - Step 3: 总结抽象概念(基于Step 1+2)
  10. - Step 4: 共性分析(频次、段落覆盖率)
  11. - Step 5: 多维度评分(vs 灵感点/目的点/实质关键点)
  12. - Step 6: 筛选(基于频次+覆盖率+相似度)
  13. - Step 7: 分类
  14. - Step 8: 合并所有信息
  15. 参考:元素提取新方案设计文档.md
  16. """
  17. import json
  18. from typing import List, Dict, Any
  19. from concurrent.futures import ThreadPoolExecutor
  20. from src.components.agents.base import BaseLLMAgent
  21. from src.utils.logger import get_logger
  22. from src.utils.llm_invoker import LLMInvoker, get_video_file_from_state
  23. logger = get_logger(__name__)
  24. # 全局线程池 - 用于并行处理
  25. _GLOBAL_THREAD_POOL = ThreadPoolExecutor(max_workers=16, thread_name_prefix="SubstanceExtraction")
  26. class ScriptSubstanceExtractionAgent(BaseLLMAgent):
  27. """实质提取Agent - 自底向上的归纳过程
  28. 提取流程(视频版):
  29. - Step 1: 提取具体元素(只看视频画面中的实体)
  30. - Step 2: 提取具象概念(只看画面中的文字 + 口播内容中的文字)
  31. - Step 3: 总结抽象概念(基于Step 1+2)
  32. - Step 4: 共性分析(频次、段落覆盖率)
  33. - Step 5: 多维度评分(vs 灵感点/目的点/实质关键点)
  34. - Step 6: 筛选(基于频次+覆盖率+相似度)
  35. - Step 7: 分类
  36. - Step 8: 合并所有信息
  37. """
  38. def __init__(
  39. self,
  40. name: str = "substance_extraction_agent",
  41. description: str = "实质提取Agent",
  42. model_provider: str = "google_genai",
  43. temperature: float = 0.1,
  44. max_tokens: int = 40960
  45. ):
  46. system_prompt = self._build_system_prompt()
  47. super().__init__(
  48. name=name,
  49. description=description,
  50. model_provider=model_provider,
  51. system_prompt=system_prompt,
  52. temperature=temperature,
  53. max_tokens=max_tokens
  54. )
  55. def _build_system_prompt(self) -> str:
  56. """构建系统提示词"""
  57. return """你是一个专业的内容分析专家,擅长从图文内容中提取实质性元素。
  58. # 核心定义
  59. ## 实质(Substance):"是什么"
  60. 内容本身,独立于表达方式而存在
  61. ### 具体元素
  62. - 定义:从图像中直接观察到的单一视觉实体对象
  63. - 判断标准:可以指着图片说"这是一个X"
  64. ### 具象概念
  65. - 定义:画面或者口播内容中出现的名词
  66. - 判断标准:画面或者口播内容中实际出现,禁止语义推导
  67. ### 抽象概念
  68. - 定义:从具体元素和具象概念中理解到的上位抽象
  69. - 类型1:上位抽象(归类)- 是下位元素的类别、分类
  70. - 类型2:引申含义 - 需要理解上下文的深层含义
  71. ## 区分方法:"剥离测试"
  72. 问题:如果去掉所有表达手法/风格/技巧,这个特征还存在吗?
  73. - 存在 → 实质(内容本身)
  74. - 不存在/失去意义 → 形式(表达方式)
  75. """
  76. def process(self, state: dict) -> dict:
  77. """执行完整的实质提取流程(Step 1-8)"""
  78. logger.info("=== 开始实质元素提取(完整流程) ===")
  79. # 从 state 中获取视频文件(视频版本核心输入)
  80. video_file = get_video_file_from_state(state)
  81. if not video_file:
  82. logger.error("无法从 state 中获取视频文件,实质提取终止")
  83. return {
  84. "concrete_elements": [],
  85. "concrete_concepts": [],
  86. "implicit_concepts": [],
  87. "abstract_concepts": [],
  88. "substance_elements": [],
  89. "substance_analyzed_result": [],
  90. "substance_scored_result": {},
  91. "substance_filtered_ids": [],
  92. "substance_categorized_result": {},
  93. "substance_final_elements": []
  94. }
  95. # 从state中提取其他文本/上下文数据
  96. text_data = state.get("text", {})
  97. section_division = state.get("section_division", {})
  98. # 处理灵感点:支持列表和字典两种格式
  99. inspiration_points_raw = state.get("inspiration_points", {})
  100. if isinstance(inspiration_points_raw, list):
  101. inspiration_points = inspiration_points_raw
  102. elif isinstance(inspiration_points_raw, dict):
  103. # 兼容旧格式:{"points": [...]} 或直接是列表
  104. inspiration_points = inspiration_points_raw.get("points", [])
  105. else:
  106. inspiration_points = []
  107. purpose_points = state.get("purpose_points", [])
  108. key_points = state.get("key_points", [])
  109. # 只保留实质类关键点
  110. substance_key_points = [
  111. kp for kp in key_points
  112. if kp.get("维度大类") == "实质"
  113. ] if key_points else []
  114. logger.info(
  115. f"意图支撑评估输入: 灵感点={len(inspiration_points)}, "
  116. f"目的点={len(purpose_points)}, 关键点(实质类)={len(substance_key_points)}"
  117. )
  118. # Step 1: 提取具体元素(画面中的实体)
  119. logger.info("▶ Step 1: 提取具体元素")
  120. concrete_elements = self._step1_extract_concrete_elements(video_file)
  121. # Step 2: 提取具象概念(画面中的文字 + 口播内容中的文字)
  122. logger.info("▶ Step 2: 提取具象概念")
  123. concrete_concepts = self._step2_extract_concrete_concepts(
  124. video_file, text_data, concrete_elements
  125. )
  126. # 隐含概念相关逻辑已移除,不再单独提取
  127. implicit_concepts: List[dict] = []
  128. # Step 3: 总结抽象概念(基于Step 1+2)
  129. logger.info("▶ Step 3: 总结抽象概念")
  130. abstract_concepts = self._step3_summarize_abstract_concepts(
  131. video_file, concrete_elements, concrete_concepts, implicit_concepts
  132. )
  133. # 合并所有实质元素(不再包含隐含概念)
  134. all_substance_elements = (
  135. concrete_elements + concrete_concepts + abstract_concepts
  136. )
  137. logger.info(
  138. "Step 1-3 完成 - 总计: %d 个元素 (具体:%d, 具象:%d, 抽象:%d)",
  139. len(all_substance_elements),
  140. len(concrete_elements),
  141. len(concrete_concepts),
  142. len(abstract_concepts),
  143. )
  144. # Step 4: 共性分析
  145. logger.info("▶ Step 4: 共性分析")
  146. analyzed_result = self._step4_commonality_analysis(
  147. video_file, all_substance_elements, text_data, section_division
  148. )
  149. # Step 5: 多维度评分(已废弃相似度比较逻辑,当前不再进行相似度评分)
  150. logger.info("▶ Step 5: 多维度评分(已停用相似度计算,仅返回空结果)")
  151. scored_result = self._step5_multi_dimensional_scoring(
  152. all_substance_elements, analyzed_result, inspiration_points, purpose_points, substance_key_points
  153. )
  154. # Step 5.1: 意图支撑评估(基于视频与文本)
  155. logger.info("▶ Step 5.1: 意图支撑评估")
  156. intention_support_result = self._step5_1_intention_support_evaluation(
  157. video_file,
  158. all_substance_elements,
  159. analyzed_result,
  160. inspiration_points,
  161. purpose_points,
  162. substance_key_points,
  163. text_data,
  164. )
  165. # Step 6: 筛选
  166. logger.info("▶ Step 6: 筛选")
  167. filtered_ids = self._step6_filter_elements(
  168. analyzed_result,
  169. scored_result,
  170. intention_support_result,
  171. )
  172. # Step 7: 分类
  173. logger.info("▶ Step 7: 分类")
  174. categorized_result = self._step7_categorize_elements(
  175. all_substance_elements, filtered_ids
  176. )
  177. # Step 8: 合并信息
  178. logger.info("▶ Step 8: 合并信息")
  179. final_elements = self._merge_all_info(
  180. all_substance_elements,
  181. analyzed_result,
  182. scored_result,
  183. intention_support_result,
  184. filtered_ids,
  185. categorized_result,
  186. )
  187. logger.info(f"实质元素提取完成 - 最终元素数: {len(final_elements)}")
  188. # 返回所有结果
  189. return {
  190. # Step 1-3 原始提取结果
  191. "concrete_elements": concrete_elements,
  192. "concrete_concepts": concrete_concepts,
  193. "implicit_concepts": implicit_concepts,
  194. "abstract_concepts": abstract_concepts,
  195. "substance_elements": all_substance_elements,
  196. # Step 4-8 处理结果
  197. "substance_analyzed_result": analyzed_result,
  198. "substance_scored_result": scored_result,
  199. "substance_intention_support_result": intention_support_result,
  200. "substance_filtered_ids": filtered_ids,
  201. "substance_categorized_result": categorized_result,
  202. # 最终结果
  203. "substance_final_elements": final_elements
  204. }
  205. # ========== Step 1-3: 实质提取 ==========
  206. def _step1_extract_concrete_elements(
  207. self,
  208. video_file
  209. ) -> List[dict]:
  210. """Step 1: 提取具体元素 - 从图像中直接观察到的单一视觉实体对象"""
  211. if not self.is_initialized:
  212. self.initialize()
  213. if not video_file:
  214. logger.warning("⚠️ 没有视频文件,跳过具体元素提取")
  215. return []
  216. prompt = """# 任务
  217. 从视频中提取"具体元素"
  218. # 核心定义
  219. ## 具体元素
  220. - **定义**:
  221. -- 1.从视频画面中直接观察到的、可独立存在的**单一视觉实体对象**
  222. -- 2.视频的背景音乐、音效等非口播内容的声音
  223. - **判断标准**:
  224. -- 1.可以指着画面说"这是一个X"(单一、具体、可见的实体)
  225. -- 2.有背景音乐、音效等非口播内容的声音,直接用"背景音乐/音效声"作为名称即可,不要重复提取
  226. - **示例**:
  227. -- 1.胡萝卜、青椒、西兰花(每个都是单独的实体)
  228. -- 2.背景音乐/音效声
  229. - **禁止**:
  230. - 归类词(蔬菜、水果)
  231. - 概念性名词(食物、植物、人)
  232. - 文字内容(只关注视觉实体)
  233. ## 提取原则(仅针对画面中的视觉实体对象)
  234. - 只从视频画面中提取,不关注文字
  235. - 每个元素必须是单一的、具体的视觉实体
  236. - 使用"剥离测试":去掉表达方式后,这个实体仍然存在
  237. # 命名规范
  238. - 原子性:单一原子名词,不可再拆分
  239. - 名词性:纯名词,严禁形容词、动词、副词
  240. - 具体性:直接指向可观察的实体
  241. # 输出json结构
  242. [
  243. {
  244. "id": "从1开始的自增序列",
  245. "名称": "单一原子名词",
  246. "描述": "说明这个元素是什么,外观特征",
  247. "维度": {"一级": "实质", "二级": "具体元素"},
  248. "来源": ["视频画面"],
  249. "推理": "为什么识别这个具体元素"
  250. },
  251. {
  252. "id": "从1开始的自增序列",
  253. "名称": "背景音乐/音效声",
  254. "描述": "说明背景音乐/音效声是什么",
  255. "维度": {"一级": "实质", "二级": "具体元素"},
  256. "来源": ["视频"],
  257. "推理": "为什么识别这个背景音乐/音效声"
  258. }
  259. ]
  260. 注意:只提取具体的视觉实体对象,不要提取抽象概念或归类词
  261. """
  262. # 使用视频分析接口
  263. result = LLMInvoker.safe_invoke_video_analysis(
  264. operation_name="具体元素提取",
  265. video_file=video_file,
  266. prompt=prompt,
  267. agent=self,
  268. fallback=[]
  269. )
  270. # 为每个具体元素添加id
  271. for idx, element in enumerate(result, 1):
  272. element["id"] = f"具体元素-{idx}"
  273. return result
  274. def _step2_extract_concrete_concepts(
  275. self,
  276. video_file,
  277. text_data: dict,
  278. concrete_elements: List[dict],
  279. ) -> List[dict]:
  280. """Step 2: 提取具象概念 - 文字中字面出现的名词"""
  281. if not self.is_initialized:
  282. self.initialize()
  283. # 从第一步结果中提取已识别的具体元素名称,供本步骤排除使用
  284. element_names = [
  285. e.get("名称") for e in (concrete_elements or []) if e.get("名称")
  286. ]
  287. element_names_text = (
  288. json.dumps(element_names, ensure_ascii=False, indent=2)
  289. if element_names
  290. else "[]"
  291. )
  292. prompt = f"""# 任务
  293. 从视频中提取"具象概念"
  294. # 核心定义
  295. ## 具象概念
  296. - **定义**:视频画面内的文字或者口播内容中明确提到的完整名词
  297. ## 排除的名称(来自第一步,仅用于排除)
  298. **禁止提取的名称**:{element_names_text}
  299. ## 判断标准
  300. - **视频画面内的文字或者口播内容**中实际出现的**完整名词**
  301. - **不能是视频画面中出现的元素的名称等归类词**
  302. - 去掉表达方式后,这个概念仍然存在
  303. # 约束
  304. - 禁止通过语义推导、联想、理解得出的名词
  305. - **禁止归类词(蔬菜、水果、人等)**
  306. - **禁止使用第一步中已提取的具体元素名称**
  307. - 禁止拆分复合词
  308. - 禁止提取形容词、动词
  309. - 禁止提取谓语、定语、状语、补语
  310. - 禁止提取副词
  311. ## 提取原则
  312. - **词语完整性**:必须提取完整的**名词**,不允许拆分复合词
  313. - **严格约束**:必须是**画面文字或者口播内容中实际出现**的完整名词
  314. - **严格的名词验证**(必须同时满足以下两个条件):
  315. - 条件1:词性是名词(词典意义上的名词)
  316. - 条件2:在当前上下文中作为名词使用(语境判断)
  317. **验证方法**:
  318. - 找到该词在视频画面内的文字或者口播内容中的具体位置
  319. - 分析该词在句子中的语法成分和实际作用
  320. - 判断:该词是否在这个语境中充当"事物/对象/概念"的角色?
  321. # 输出json结构
  322. [
  323. {{
  324. "id": "从1开始的自增序列",
  325. "名称": "字面原词(完整名词)",
  326. "描述": "说明这个概念是什么",
  327. "维度": {{"一级": "实质", "二级": "具象概念"}},
  328. "来源": "HH:MM:SS",
  329. "上下文验证": {{
  330. "原文位置": "该词在原视频画面内的文字或者口播内容中的具体句子",
  331. "语法成分": "该词在句子中的语法成分(主语/宾语/定语中心语等)",
  332. "语境判断": "说明该词在此语境中确实作为名词使用的理由"
  333. }},
  334. "推理": "为什么这个名词被认为是具象概念"
  335. }}
  336. ]
  337. 注意:只输出同时满足"词性是名词"和"上下文中作为名词使用"两个条件的概念
  338. """
  339. # 使用视频分析接口(可综合语音与画面中的文字)
  340. result = LLMInvoker.safe_invoke_video_analysis(
  341. operation_name="具象概念提取",
  342. video_file=video_file,
  343. prompt=prompt,
  344. agent=self,
  345. fallback=[]
  346. )
  347. # 为每个具象概念添加id
  348. for idx, concept in enumerate(result, 1):
  349. concept["id"] = f"具象概念-{idx}"
  350. return result
  351. def _step3_summarize_abstract_concepts(
  352. self,
  353. video_file,
  354. concrete_elements: List[dict],
  355. concrete_concepts: List[dict],
  356. implicit_concepts: List[dict]
  357. ) -> List[dict]:
  358. """Step 3: 总结抽象概念 - 从具体元素和具象概念中归纳上位抽象"""
  359. if not self.is_initialized:
  360. self.initialize()
  361. if not concrete_elements and not concrete_concepts:
  362. logger.warning("⚠️ 没有具体元素或具象概念,跳过抽象概念提取")
  363. return []
  364. # 构建已提取的元素文本
  365. elements_text = json.dumps([
  366. {"id": e.get("id"), "名称": e.get("名称"), "描述": e.get("描述")}
  367. for e in concrete_elements
  368. ], ensure_ascii=False, indent=2) if concrete_elements else "无"
  369. concepts_text = json.dumps([
  370. {"id": c.get("id"), "名称": c.get("名称"), "描述": c.get("描述")}
  371. for c in concrete_concepts
  372. ], ensure_ascii=False, indent=2) if concrete_concepts else "无"
  373. prompt = f"""# 任务
  374. 基于已提取的具体元素和具象概念,总结新的"抽象概念"
  375. # 已提取的具体元素
  376. {elements_text}
  377. # 已提取的具象概念
  378. {concepts_text}
  379. # 核心定义
  380. # 定义与分类
  381. **抽象概念**分两类:
  382. **类型1-上位抽象**:对具体元素/具象概念的归类
  383. **类型2-引申含义**:具体元素/具象概念无法直接表达的深层含义
  384. # 提取原则
  385. - 对具体元素/具象概念的归类
  386. - 具体元素和具象概念无法直接表达的深层含义
  387. - 基于归纳:基于已提取的具体元素/具象概念
  388. - 来源追溯:准确标明所有来源ID(具体元素ID、具象概念ID),必须完整可追溯
  389. # 命名规范
  390. - 有完整独立语义的概念
  391. - 单一原子名词,不可拆分
  392. - 纯名词,禁止形容词、动词、副词
  393. - 精准描述概念,不做修饰
  394. # 判断标准
  395. - 去掉表达方式后,概念仍存在
  396. # 输出json结构
  397. [
  398. {{
  399. "id": "从1开始的自增序列",
  400. "名称": "单一名词或短语",
  401. "描述": "说明这个抽象概念是什么",
  402. "维度": {{"一级": "实质", "二级": "抽象概念"}},
  403. "类型": "上位抽象 | 引申含义",
  404. "来源": {{
  405. "具体元素": [{{"id":"具体元素-X", "名称":"具体元素-X的名称"}}, {{"id":"具体元素-Y", "名称":"具体元素-Y的名称"}}],
  406. "具象概念": [{{"id":"具象概念-A", "名称":"具象概念-A的名称"}}, {{"id":"具象概念-B", "名称":"具象概念-B的名称"}}]
  407. }},
  408. "推理过程": "明确说明如何从上述来源(具体哪些元素ID和概念ID)推导出这个抽象概念",
  409. }}
  410. ]
  411. 注意:只输出验证全部通过的概念
  412. """
  413. # 使用视频分析接口总结抽象概念
  414. result = LLMInvoker.safe_invoke_video_analysis(
  415. operation_name="抽象概念总结",
  416. video_file=video_file,
  417. prompt=prompt,
  418. agent=self,
  419. fallback=[]
  420. )
  421. # 为每个抽象概念添加id
  422. for idx, concept in enumerate(result, 1):
  423. concept["id"] = f"抽象概念-{idx}"
  424. return result
  425. # ========== Step 4-8: 后续处理 ==========
  426. def _step4_commonality_analysis(
  427. self,
  428. video_file,
  429. substance_elements: List[dict],
  430. text_data: dict,
  431. section_division: dict
  432. ) -> List[dict]:
  433. """Step 4: 共性分析 - 统计频次和段落覆盖率"""
  434. if not substance_elements:
  435. return []
  436. total_sections = self._count_sections(section_division)
  437. # 分批处理
  438. analyzed_items = self._commonality_analysis_in_batches(
  439. video_file, substance_elements, text_data, section_division, total_sections,
  440. max_batch_size=100
  441. )
  442. return analyzed_items
  443. def _commonality_analysis_in_batches(
  444. self,
  445. video_file,
  446. substance_elements: list,
  447. text_data: dict,
  448. section_division: dict,
  449. total_sections: int,
  450. max_batch_size: int = 100
  451. ) -> list:
  452. """分批处理共性分析"""
  453. if not self.is_initialized:
  454. self.initialize()
  455. num_elements = len(substance_elements)
  456. if num_elements == 0:
  457. return []
  458. # 如果元素数少于批次大小,一次性处理
  459. if num_elements <= max_batch_size:
  460. return self._commonality_analysis_single_batch(
  461. video_file, substance_elements, text_data, section_division, total_sections
  462. )
  463. # 分批处理
  464. num_batches = (num_elements + max_batch_size - 1) // max_batch_size
  465. batch_futures = {}
  466. for batch_idx in range(num_batches):
  467. start_idx = batch_idx * max_batch_size
  468. end_idx = min(start_idx + max_batch_size, num_elements)
  469. batch_elements = substance_elements[start_idx:end_idx]
  470. future = _GLOBAL_THREAD_POOL.submit(
  471. self._commonality_analysis_single_batch,
  472. video_file, batch_elements, text_data, section_division, total_sections
  473. )
  474. batch_futures[batch_idx] = future
  475. # 收集结果
  476. all_results = []
  477. for batch_idx, future in batch_futures.items():
  478. try:
  479. batch_result = future.result()
  480. if batch_result:
  481. all_results.extend(batch_result)
  482. except Exception as e:
  483. logger.error(f"批次 {batch_idx + 1} 失败: {e}")
  484. return all_results
  485. def _commonality_analysis_single_batch(
  486. self,
  487. video_file,
  488. batch_elements: list,
  489. text_data: dict,
  490. section_division: dict,
  491. total_sections: int
  492. ) -> list:
  493. """单批次共性分析"""
  494. if not self.is_initialized:
  495. self.initialize()
  496. section_text = self._build_section_text(section_division)
  497. elements_text = self._build_simple_items_text(batch_elements)
  498. prompt = f"""# 段落列表
  499. {section_text}
  500. # 元素列表
  501. {elements_text}
  502. # 任务
  503. 对每个元素统计出现的段落和频次
  504. ## 统计规则
  505. ### 1. 具体元素统计(只统计视觉实体)
  506. - **出现频次**: 统计该**单一视觉实体对象**在视频图像中直接观察到的次数
  507. - **出现段落列表**: 只统计能在视频图像中**直接看到该视觉实体**的段落
  508. ### 2. 具象概念统计(只统计文字字面)
  509. - **出现频次**: 统计该名词在视频画面文字和口播内容中**画面或者口播内容中出现**的次数
  510. - **出现段落列表**: 只统计**视频画面文字或者口播内容中包含该名词**的段落
  511. ### 3. 抽象概念统计(统计语义归类)
  512. - **出现频次**: 统计该概念被**隐含表达**的总次数
  513. - **出现段落列表**: 统计**包含该概念所归类的具体元素/具象概念**的段落
  514. # 输出(JSON)
  515. [
  516. {{
  517. "id": "元素id",
  518. "名称": "元素名称",
  519. "出现频次": 0,
  520. "出现段落列表": [
  521. {{
  522. "段落ID": "段落id",
  523. "如何体现": "描述该元素在这个段落中的具体体现方式"
  524. }}
  525. ]
  526. }}
  527. ]
  528. """
  529. # 使用视频分析接口做共性分析
  530. llm_result = LLMInvoker.safe_invoke_video_analysis(
  531. operation_name="共性分析",
  532. video_file=video_file,
  533. prompt=prompt,
  534. agent=self,
  535. fallback=[]
  536. )
  537. # 计算覆盖率
  538. analyzed_items = []
  539. for analysis in llm_result:
  540. section_list = analysis.get("出现段落列表", [])
  541. unique_paragraph_ids = set()
  542. for item in section_list:
  543. unique_paragraph_ids.add(item.get("段落ID", ""))
  544. coverage_count = len(unique_paragraph_ids)
  545. coverage_rate = round(coverage_count / total_sections, 4) if total_sections > 0 else 0
  546. analyzed_items.append({
  547. "id": analysis.get("id", 0),
  548. "名称": analysis.get("名称", ""),
  549. "出现频次": analysis.get("出现频次", 0),
  550. "出现段落列表": section_list,
  551. "出现段落数": coverage_count,
  552. "段落覆盖率": coverage_rate
  553. })
  554. return analyzed_items
  555. def _step5_multi_dimensional_scoring(
  556. self,
  557. substance_elements: List[dict],
  558. analyzed_result: list,
  559. inspiration_points: dict,
  560. purpose_points: list,
  561. substance_key_points: list
  562. ) -> dict:
  563. """Step 5: 多维度评分(相似度逻辑已废弃)
  564. 说明:
  565. - 不再进行任何相似度计算,完全依赖后续的“意图支撑”进行筛选
  566. - 保留函数与返回结构,仅返回空结果,避免下游依赖崩溃
  567. """
  568. logger.info(
  569. "【多维度评分】相似度比较逻辑已关闭,当前不进行评分,仅返回空结果。"
  570. )
  571. return {
  572. "灵感点": [],
  573. "目的点": [],
  574. "关键点": [],
  575. }
  576. def _step5_1_intention_support_evaluation(
  577. self,
  578. video_file,
  579. substance_elements: List[dict],
  580. analyzed_result: list,
  581. inspiration_points: dict,
  582. purpose_points: list,
  583. substance_key_points: list,
  584. text_data: dict,
  585. ) -> dict:
  586. """Step 5.1: 意图支撑评估
  587. 说明:
  588. - 在保留相似度评分的基础上,增加一套“意图支撑”视角的评估
  589. - 不再使用频次/覆盖率作为筛选条件,仅用于日志与统计
  590. - 结果以元素-点的支撑关系形式返回,不直接参与筛选决策
  591. """
  592. if not substance_elements:
  593. return {"灵感点": [], "目的点": [], "关键点": []}
  594. logger.info(f"【意图支撑评估】输入: {len(substance_elements)} 个实质元素")
  595. # 按二级维度分组(不做频次过滤,全部评估)
  596. dimension_groups = {
  597. "具体元素": [],
  598. "具象概念": [],
  599. "抽象概念": [],
  600. }
  601. for elem in substance_elements:
  602. second_level = elem.get("维度", {}).get("二级", "")
  603. if second_level in dimension_groups:
  604. dimension_groups[second_level].append(elem)
  605. logger.info(
  606. "维度分组(意图支撑): 具体元素=%d, 具象概念=%d, 抽象概念=%d",
  607. len(dimension_groups["具体元素"]),
  608. len(dimension_groups["具象概念"]),
  609. len(dimension_groups["抽象概念"]),
  610. )
  611. # 并行评估(各维度 x 3个点类型)
  612. futures = {}
  613. def submit_if_needed(dimension_name: str, point_type: str, points_list):
  614. if not points_list:
  615. logger.info(
  616. f"⏭️ 跳过意图支撑评估: {dimension_name}-{point_type} "
  617. f"(原因: 点列表为空, len={len(points_list) if isinstance(points_list, list) else 'N/A'})"
  618. )
  619. return
  620. if not dimension_groups.get(dimension_name):
  621. logger.info(
  622. f"⏭️ 跳过意图支撑评估: {dimension_name}-{point_type} "
  623. f"(原因: 该维度无元素, len={len(dimension_groups.get(dimension_name, []))})"
  624. )
  625. return
  626. key = (dimension_name, point_type)
  627. logger.info(
  628. f"📤 提交意图支撑评估任务: {dimension_name}-{point_type} "
  629. f"(元素数={len(dimension_groups[dimension_name])}, 点数={len(points_list)})"
  630. )
  631. futures[key] = _GLOBAL_THREAD_POOL.submit(
  632. self._evaluate_support_by_dimension,
  633. video_file,
  634. dimension_name,
  635. dimension_groups[dimension_name],
  636. points_list,
  637. point_type,
  638. text_data,
  639. )
  640. # 具体元素 / 具象概念 / 抽象概念 × 灵感点 / 目的点 / 关键点
  641. for dim in ["具体元素", "具象概念", "抽象概念"]:
  642. submit_if_needed(dim, "灵感点", inspiration_points if isinstance(inspiration_points, list) else [])
  643. submit_if_needed(dim, "目的点", purpose_points if isinstance(purpose_points, list) else [])
  644. submit_if_needed(dim, "关键点", substance_key_points if isinstance(substance_key_points, list) else [])
  645. # 收集结果(按点类型汇总)
  646. result = {
  647. "灵感点": [],
  648. "目的点": [],
  649. "关键点": [],
  650. }
  651. for (dimension_name, point_type), future in futures.items():
  652. try:
  653. dimension_result = future.result()
  654. if dimension_result:
  655. result[point_type].extend(dimension_result)
  656. logger.info(
  657. f"✅ 意图支撑-{dimension_name}-{point_type} 评估完成: {len(dimension_result)} 条支撑关系"
  658. )
  659. except Exception as e:
  660. logger.error(f"❌ 意图支撑-{dimension_name}-{point_type} 评估失败: {e}")
  661. return result
  662. def _evaluate_support_by_dimension(
  663. self,
  664. video_file,
  665. dimension_name: str,
  666. elements: list,
  667. points: list,
  668. point_type: str,
  669. text_data: dict,
  670. ) -> list:
  671. """按维度评估意图支撑关系(分批处理)"""
  672. if not self.is_initialized:
  673. self.initialize()
  674. if not elements or not points:
  675. return []
  676. # 分批控制:元素数 × 点数 ≈ 100 以内
  677. num_elements = len(elements)
  678. num_points = len(points)
  679. max_batch_product = 100
  680. max_elements_per_batch = max(1, int(max_batch_product / max(1, num_points)))
  681. num_batches = (num_elements + max_elements_per_batch - 1) // max_elements_per_batch
  682. batch_futures = {}
  683. for batch_idx in range(num_batches):
  684. start_idx = batch_idx * max_elements_per_batch
  685. end_idx = min(start_idx + max_elements_per_batch, num_elements)
  686. batch_elements = elements[start_idx:end_idx]
  687. future = _GLOBAL_THREAD_POOL.submit(
  688. self._evaluate_support_single_batch_by_dimension,
  689. video_file,
  690. dimension_name,
  691. batch_elements,
  692. points,
  693. point_type,
  694. text_data,
  695. )
  696. batch_futures[batch_idx] = future
  697. # 收集结果
  698. all_results = []
  699. for batch_idx, future in batch_futures.items():
  700. try:
  701. batch_result = future.result()
  702. if batch_result:
  703. all_results.extend(batch_result)
  704. except Exception as e:
  705. logger.error(f"【意图支撑-{dimension_name}】批次 {batch_idx + 1} 失败: {e}")
  706. # 合并结果(支撑结果)
  707. merged_results = self._merge_support_batch_results(all_results)
  708. return merged_results
  709. def _evaluate_support_single_batch_by_dimension(
  710. self,
  711. video_file,
  712. dimension_name: str,
  713. batch_elements: list,
  714. points: list,
  715. point_type: str,
  716. text_data: dict,
  717. ) -> list:
  718. """单批次意图支撑评估(按维度)"""
  719. if not self.is_initialized:
  720. self.initialize()
  721. post_content = self._build_post_content(text_data)
  722. elements_text = self._build_simple_items_text_dimension(batch_elements, dimension_name)
  723. points_text = self._build_points_text(point_type, points)
  724. # 根据维度选择不同的 prompt
  725. if dimension_name == "具体元素":
  726. prompt = self._build_concrete_element_support_prompt(post_content, elements_text, points_text)
  727. elif dimension_name == "具象概念":
  728. prompt = self._build_concrete_concept_support_prompt(post_content, elements_text, points_text)
  729. elif dimension_name == "抽象概念":
  730. prompt = self._build_abstract_concept_support_prompt(post_content, elements_text, points_text)
  731. else:
  732. logger.error(f"未知维度(意图支撑): {dimension_name}")
  733. return []
  734. # 使用视频分析接口,多模态评估意图支撑
  735. result = LLMInvoker.safe_invoke_video_analysis(
  736. operation_name=f"意图支撑评估-{dimension_name}-{point_type}",
  737. video_file=video_file,
  738. prompt=prompt,
  739. agent=self,
  740. fallback=[],
  741. )
  742. return result
  743. def _evaluate_support_in_batches(
  744. self,
  745. elements: list,
  746. points: list,
  747. point_type: str,
  748. max_batch_product: int = 100
  749. ) -> list:
  750. """分批评估相似度"""
  751. if not self.is_initialized:
  752. self.initialize()
  753. if not points:
  754. return []
  755. num_elements = len(elements)
  756. num_points = len(points)
  757. max_elements_per_batch = max(1, int(max_batch_product / num_points))
  758. num_batches = (num_elements + max_elements_per_batch - 1) // max_elements_per_batch
  759. # 分批处理
  760. batch_futures = {}
  761. for batch_idx in range(num_batches):
  762. start_idx = batch_idx * max_elements_per_batch
  763. end_idx = min(start_idx + max_elements_per_batch, num_elements)
  764. batch_elements = elements[start_idx:end_idx]
  765. future = _GLOBAL_THREAD_POOL.submit(
  766. self._evaluate_support_single_batch,
  767. batch_elements, points, point_type
  768. )
  769. batch_futures[batch_idx] = future
  770. # 收集结果
  771. all_results = []
  772. for batch_idx, future in batch_futures.items():
  773. try:
  774. batch_result = future.result()
  775. if batch_result:
  776. all_results.extend(batch_result)
  777. except Exception as e:
  778. logger.error(f"批次 {batch_idx + 1} 失败: {e}")
  779. # 合并并筛选(每个元素保留最相关的1-2个点)
  780. merged_results = self._merge_batch_results(all_results)
  781. return merged_results
  782. def _evaluate_support_single_batch(
  783. self,
  784. batch_elements: list,
  785. points: list,
  786. point_type: str
  787. ) -> list:
  788. """单批次评估"""
  789. if not self.is_initialized:
  790. self.initialize()
  791. elements_text = self._build_simple_items_text(batch_elements)
  792. points_text = self._build_points_text(point_type, points)
  793. prompt = f"""# 元素列表
  794. {elements_text}
  795. # 点列表
  796. {points_text}
  797. # 任务
  798. 对每个元素计算元素与点的文本相似度和语义相似度
  799. # 输出(JSON)
  800. [
  801. {{
  802. "id": "元素id",
  803. "名称": "元素名称",
  804. "相似度结果": [
  805. {{
  806. "点":"点的名称",
  807. "语义相似度":0.21,
  808. "语义相似度理由": "理由",
  809. "文本相似度":0.33,
  810. "文本相似度理由": "理由"
  811. }}
  812. ]
  813. }}
  814. ]
  815. """
  816. messages = [
  817. {"role": "system", "content": self.system_prompt},
  818. {"role": "user", "content": prompt}
  819. ]
  820. result = LLMInvoker.safe_invoke(
  821. self,
  822. f"评估支撑{point_type}",
  823. messages,
  824. fallback=[]
  825. )
  826. return result
  827. def _merge_batch_results(self, all_results: list) -> list:
  828. """合并批次结果,每个元素只保留最相关的1-2个点"""
  829. if not all_results:
  830. return []
  831. merged_map = {}
  832. for item in all_results:
  833. element_id = item.get("id")
  834. if element_id not in merged_map:
  835. merged_map[element_id] = {
  836. "id": element_id,
  837. "名称": item.get("名称"),
  838. "相似度结果": []
  839. }
  840. if not merged_map[element_id]["相似度结果"]:
  841. merged_map[element_id]["相似度结果"] = item.get("相似度结果", [])
  842. # 筛选每个元素的相似度结果
  843. for element_data in merged_map.values():
  844. similarity_results = element_data.get("相似度结果", [])
  845. if not similarity_results:
  846. continue
  847. max_text_sim_point = max(similarity_results, key=lambda x: x.get("文本相似度", 0))
  848. max_semantic_sim_point = max(similarity_results, key=lambda x: x.get("语义相似度", 0))
  849. if max_text_sim_point.get("点") == max_semantic_sim_point.get("点"):
  850. filtered_results = [max_text_sim_point]
  851. else:
  852. filtered_results = [max_text_sim_point, max_semantic_sim_point]
  853. element_data["相似度结果"] = filtered_results
  854. return list(merged_map.values())
  855. def _merge_support_batch_results(self, all_results: list) -> list:
  856. """合并批次结果(意图支撑),直接合并支撑的元素-点对"""
  857. if not all_results:
  858. return []
  859. merged_map = {}
  860. for item in all_results:
  861. element_id = item.get("id")
  862. if element_id not in merged_map:
  863. merged_map[element_id] = {
  864. "id": element_id,
  865. "名称": item.get("名称"),
  866. "支撑结果": [],
  867. }
  868. # 这里假设下游会控制去重,只在首次合并时写入
  869. if not merged_map[element_id]["支撑结果"]:
  870. merged_map[element_id]["支撑结果"] = item.get("支撑结果", [])
  871. return list(merged_map.values())
  872. def _step6_filter_elements(
  873. self,
  874. analyzed_result: list,
  875. scored_result: dict,
  876. intention_support_result: dict,
  877. ) -> list:
  878. """Step 6: 筛选实质元素
  879. 新的保留策略(基于意图支撑关系 + 覆盖率进行筛选):
  880. - 覆盖率和频次主要用于统计展示,但会作为必要条件之一
  881. - 必须**同时**满足以下三个条件才保留:
  882. - 出现频次 > 1
  883. - 存在任意“意图支撑”关系
  884. - 段落覆盖率 > 0.3(30%)
  885. - 相似度评分相关逻辑已全部停用,不再参与筛选
  886. """
  887. if not analyzed_result:
  888. return []
  889. # 创建 analyzed_result 的映射
  890. analyzed_map = {item.get("id"): item for item in analyzed_result}
  891. # 创建意图支撑映射:只要某个元素在任一维度、任一点类型下有支撑关系,即视为“有支撑”
  892. intention_support_map = {}
  893. if intention_support_result:
  894. for point_type in ["灵感点", "目的点", "关键点"]:
  895. dimension_data = intention_support_result.get(point_type, [])
  896. for item in dimension_data:
  897. if not isinstance(item, dict):
  898. continue
  899. element_id = item.get("id")
  900. support_results = item.get("支撑结果", [])
  901. if not element_id:
  902. continue
  903. if element_id not in intention_support_map:
  904. intention_support_map[element_id] = []
  905. # 只要有一条支撑结果就认为该元素“有支撑关系”
  906. if support_results:
  907. intention_support_map[element_id].extend(support_results)
  908. # 筛选
  909. filtered_ids = []
  910. rejected_ids = []
  911. for element_id, analyzed_data in analyzed_map.items():
  912. element_name = analyzed_data.get("名称", "N/A")
  913. # 声音类型的实质元素(背景音乐、音效声等)直接通过筛选,不参与后续判断
  914. sound_type_names = ["背景音乐", "音效声"]
  915. if element_name in sound_type_names:
  916. filtered_ids.append(element_id)
  917. logger.info(
  918. f"✅ 保留: id={element_id}, name={element_name}, 原因=声音类型元素,豁免筛选"
  919. )
  920. continue
  921. # 确保 frequency 是整数类型
  922. frequency_raw = analyzed_data.get("出现频次", 0)
  923. try:
  924. frequency = int(frequency_raw) if frequency_raw is not None else 0
  925. except (ValueError, TypeError):
  926. frequency = 0
  927. # 确保 coverage_rate 是浮点数类型
  928. coverage_rate_raw = analyzed_data.get("段落覆盖率", 0.0)
  929. try:
  930. coverage_rate = float(coverage_rate_raw) if coverage_rate_raw is not None else 0.0
  931. except (ValueError, TypeError):
  932. coverage_rate = 0.0
  933. # 频次过滤:出现频次<=1 的直接过滤(不再继续做支撑和覆盖率判断)
  934. if frequency <= 1:
  935. rejected_ids.append(element_id)
  936. logger.info(
  937. "❌ 过滤: id=%s, name=%s, 原因=出现频次<=1 (frequency=%d)",
  938. element_id,
  939. element_name,
  940. frequency,
  941. )
  942. continue
  943. support_info = intention_support_map.get(element_id, [])
  944. has_support = bool(support_info)
  945. has_high_coverage = coverage_rate > 0.3
  946. # 出现频次>1 且 有意图支撑关系 且 段落覆盖率 > 30%:直接保留,不进行相似度比较
  947. if has_support and has_high_coverage:
  948. filtered_ids.append(element_id)
  949. logger.info(
  950. f"✅ 保留: id={element_id}, name={element_name}, "
  951. f"support_count={len(support_info)}, coverage={coverage_rate}"
  952. )
  953. continue
  954. # 不满足“出现频次>1 + 有意图支撑 + 覆盖率>30%”的元素全部过滤
  955. rejected_ids.append(element_id)
  956. logger.info(
  957. "❌ 过滤: id=%s, name=%s, 原因=无隐含概念豁免且"
  958. "未同时满足出现频次>1、有意图支撑关系和段落覆盖率>0.3 "
  959. "(frequency=%d, coverage=%.4f)",
  960. element_id,
  961. element_name,
  962. frequency,
  963. coverage_rate,
  964. )
  965. logger.info(f"筛选完成: {len(filtered_ids)}/{len(analyzed_result)} 通过")
  966. return filtered_ids
  967. def _step7_categorize_elements(
  968. self,
  969. substance_elements: List[dict],
  970. filtered_ids: list
  971. ) -> dict:
  972. """Step 7: 元素分类 - 按二级维度分别分类"""
  973. if not filtered_ids:
  974. return {}
  975. # 只保留筛选后的元素
  976. filtered_elements = [
  977. elem for elem in substance_elements
  978. if elem.get("id") in filtered_ids
  979. ]
  980. # 按二级维度分组
  981. dimension_groups = {
  982. "具体元素": [],
  983. "具象概念": [],
  984. "抽象概念": [],
  985. }
  986. for elem in filtered_elements:
  987. second_level = elem.get("维度", {}).get("二级", "")
  988. if second_level in dimension_groups:
  989. dimension_groups[second_level].append(elem)
  990. # 并行分类
  991. categorization_results = {}
  992. futures = {}
  993. for dimension_name, elements in dimension_groups.items():
  994. if not elements:
  995. continue
  996. future = _GLOBAL_THREAD_POOL.submit(
  997. self._categorize_single_dimension,
  998. dimension_name,
  999. elements
  1000. )
  1001. futures[dimension_name] = future
  1002. # 收集结果
  1003. for dimension_name, future in futures.items():
  1004. try:
  1005. categorization_results[dimension_name] = future.result()
  1006. except Exception as e:
  1007. logger.error(f"{dimension_name} 分类失败: {e}")
  1008. categorization_results[dimension_name] = {"元素分类": []}
  1009. return categorization_results
  1010. def _categorize_single_dimension(
  1011. self,
  1012. dimension_name: str,
  1013. elements: list
  1014. ) -> dict:
  1015. """对单个维度的元素进行分类"""
  1016. if not self.is_initialized:
  1017. self.initialize()
  1018. elements_text = json.dumps([
  1019. {"id": elem.get("id"), "名称": elem.get("名称"), "描述": elem.get("描述")}
  1020. for elem in elements
  1021. ], ensure_ascii=False, indent=2)
  1022. prompt = f"""# 任务
  1023. 对"{dimension_name}"维度的元素进行分类
  1024. # 元素列表
  1025. {elements_text}
  1026. # 分类要求
  1027. ## 核心原则
  1028. 1. **单一原子名词**: 分类名称必须是单一的原子名词
  1029. 2. **MECE原则**: 分类之间相互独立、完全穷尽
  1030. 3. **确定性归属**: 每个元素只能归属唯一一个分类
  1031. 4. **层级限制**: 最多2层
  1032. 5. 元素可以没有分类,不要强行归类
  1033. 6. 分类下面至少要有2个元素,否则不要分类
  1034. ## 实质维度的分类逻辑
  1035. - **核心原则**:按照**内容本质、属性特征、功能作用**等角度来分类
  1036. - **包含**:
  1037. - 物理特征:形态、材质、颜色等
  1038. - 功能用途:工具、装饰、食物等
  1039. - 概念类别:情感、价值观、技能等
  1040. - 领域归属:科技、艺术、健康等
  1041. # 输出格式(JSON)
  1042. {{
  1043. "元素分类": [
  1044. {{
  1045. "元素id": "元素的ID",
  1046. "元素名称": "元素名称",
  1047. "分类": ["一级分类","二级分类","..."]
  1048. }}
  1049. ]
  1050. }}
  1051. """
  1052. messages = [
  1053. {"role": "system", "content": self.system_prompt},
  1054. {"role": "user", "content": prompt}
  1055. ]
  1056. result = LLMInvoker.safe_invoke(
  1057. self,
  1058. f"分类-{dimension_name}",
  1059. messages,
  1060. fallback={}
  1061. )
  1062. return result
  1063. def _merge_all_info(
  1064. self,
  1065. substance_elements: List[dict],
  1066. analyzed_result: list,
  1067. scored_result: dict,
  1068. intention_support_result: dict,
  1069. filtered_ids: list,
  1070. categorized_result: dict
  1071. ) -> list:
  1072. """Step 8: 合并所有信息 - 每个元素包含所有中间信息"""
  1073. if not filtered_ids:
  1074. return []
  1075. # 创建映射
  1076. extraction_map = {item.get("id"): item for item in substance_elements}
  1077. analyzed_map = {item.get("id"): item for item in analyzed_result}
  1078. # 创建评分映射(相似度)
  1079. scored_map = {}
  1080. for dimension in ["灵感点", "目的点", "关键点"]:
  1081. dimension_data = scored_result.get(dimension, [])
  1082. for item in dimension_data:
  1083. if not isinstance(item, dict):
  1084. continue
  1085. element_id = item.get("id")
  1086. if element_id not in scored_map:
  1087. scored_map[element_id] = {}
  1088. similarity_results = item.get("相似度结果", [])
  1089. sorted_results = sorted(
  1090. similarity_results,
  1091. key=lambda x: (x.get("文本相似度", 0), x.get("语义相似度", 0)),
  1092. reverse=True
  1093. )
  1094. scored_map[element_id][dimension] = sorted_results
  1095. # 创建意图支撑映射
  1096. intention_support_map = {}
  1097. if intention_support_result:
  1098. for dimension in ["灵感点", "目的点", "关键点"]:
  1099. dimension_data = intention_support_result.get(dimension, [])
  1100. for item in dimension_data:
  1101. if not isinstance(item, dict):
  1102. continue
  1103. element_id = item.get("id")
  1104. if element_id not in intention_support_map:
  1105. intention_support_map[element_id] = {}
  1106. support_results = item.get("支撑结果", [])
  1107. intention_support_map[element_id][dimension] = support_results
  1108. # 创建分类映射
  1109. category_map = {}
  1110. for dimension_data in categorized_result.values():
  1111. element_classifications = dimension_data.get("元素分类", [])
  1112. for classification in element_classifications:
  1113. element_id = classification.get("元素id")
  1114. category_info = classification.get("分类", {})
  1115. if element_id:
  1116. category_map[element_id] = category_info
  1117. # 合并信息
  1118. final_elements = []
  1119. for element_id in filtered_ids:
  1120. base_info = extraction_map.get(element_id, {})
  1121. analysis_info = analyzed_map.get(element_id, {})
  1122. scoring_info = scored_map.get(element_id, {})
  1123. intention_info = intention_support_map.get(element_id, {})
  1124. category_info = category_map.get(element_id, {})
  1125. merged_element = {
  1126. "id": base_info.get("id"),
  1127. "名称": base_info.get("名称"),
  1128. "描述": base_info.get("描述"),
  1129. "维度": base_info.get("维度", {}),
  1130. "分类": category_info,
  1131. "共性分析": {
  1132. "出现频次": analysis_info.get("出现频次", 0),
  1133. "出现段落列表": analysis_info.get("出现段落列表", []),
  1134. "出现段落数": analysis_info.get("出现段落数", 0),
  1135. "段落覆盖率": analysis_info.get("段落覆盖率", 0.0)
  1136. },
  1137. "多维度评分": {
  1138. "灵感点": scoring_info.get("灵感点", []),
  1139. "目的点": scoring_info.get("目的点", []),
  1140. "关键点": scoring_info.get("关键点", [])
  1141. },
  1142. "意图支撑": {
  1143. "灵感点": intention_info.get("灵感点", []),
  1144. "目的点": intention_info.get("目的点", []),
  1145. "关键点": intention_info.get("关键点", [])
  1146. }
  1147. }
  1148. # 根据不同类型添加特定字段
  1149. second_level = base_info.get("维度", {}).get("二级", "")
  1150. if second_level == "具体元素":
  1151. merged_element["来源"] = base_info.get("来源", [])
  1152. elif second_level == "具象概念":
  1153. merged_element["来源"] = base_info.get("来源", [])
  1154. merged_element["字面位置"] = base_info.get("字面位置", [])
  1155. elif second_level == "抽象概念" or second_level == "隐含概念":
  1156. merged_element["类型"] = base_info.get("类型", "")
  1157. merged_element["来源"] = base_info.get("来源", {})
  1158. merged_element["推理过程"] = base_info.get("推理过程", "")
  1159. merged_element["推理层次"] = base_info.get("推理层次", 1)
  1160. final_elements.append(merged_element)
  1161. return final_elements
  1162. # ========== 辅助方法 ==========
  1163. def _build_section_text(self, section_division: dict) -> str:
  1164. """构建段落划分文本"""
  1165. if not section_division:
  1166. return "无段落划分信息"
  1167. sections = section_division.get("段落列表", [])
  1168. if not sections:
  1169. return "无段落信息"
  1170. def build_section_list(section_list, indent=0):
  1171. text = ""
  1172. for section in section_list:
  1173. if section.get('子项'):
  1174. text += build_section_list(section['子项'], indent + 1)
  1175. else:
  1176. section_id = section.get('id', 'N/A')
  1177. section_desc = section.get('描述', 'N/A')
  1178. content_range = section.get('内容范围', 'N/A')
  1179. text += f"{section_id}: {section_desc}\n内容范围: {content_range}\n"
  1180. return text
  1181. return "段落列表:\n" + build_section_list(sections)
  1182. def _build_post_content(self, text_data: dict) -> str:
  1183. """构建原文内容文本(用于意图支撑判断)
  1184. 这里不假设具体结构,直接以 JSON 形式展开,保证信息完整可见。
  1185. """
  1186. if not text_data:
  1187. return "无文本内容"
  1188. try:
  1189. return json.dumps(text_data, ensure_ascii=False, indent=2)
  1190. except TypeError:
  1191. # 避免非序列化对象导致报错
  1192. return str(text_data)
  1193. def _build_simple_items_text_dimension(self, elements: list, dimension_name: str) -> str:
  1194. """构建某个维度下元素列表文本(用于意图支撑判断)"""
  1195. simple_items = [
  1196. {
  1197. "id": elem.get("id", "N/A"),
  1198. "名称": elem.get("名称", "N/A"),
  1199. "描述": elem.get("描述", "N/A"),
  1200. }
  1201. for elem in elements
  1202. ]
  1203. return json.dumps(
  1204. {"维度": dimension_name, "元素列表": simple_items},
  1205. ensure_ascii=False,
  1206. indent=2,
  1207. )
  1208. def _build_simple_items_text(self, elements: list) -> str:
  1209. """构建元素列表文本"""
  1210. grouped_elements = {
  1211. "具体元素": [],
  1212. "具象概念": [],
  1213. "抽象概念": [],
  1214. }
  1215. for elem in elements:
  1216. element_type = elem.get('维度', {}).get('二级', 'N/A')
  1217. element_data = {
  1218. "id": elem.get('id', 'N/A'),
  1219. "名称": elem.get('名称', 'N/A'),
  1220. "描述": elem.get('描述', 'N/A')
  1221. }
  1222. if element_type in grouped_elements:
  1223. grouped_elements[element_type].append(element_data)
  1224. filtered_groups = {k: v for k, v in grouped_elements.items() if v}
  1225. return json.dumps(filtered_groups, ensure_ascii=False, indent=2)
  1226. def _build_points_text(self, point_type: str, points_data) -> str:
  1227. """构建点列表文本"""
  1228. if not points_data:
  1229. return f"无{point_type}信息"
  1230. filtered_points = [
  1231. {"名称": item.get(point_type, 'N/A')}
  1232. for item in points_data if isinstance(item, dict)
  1233. ]
  1234. return json.dumps(filtered_points, ensure_ascii=False, indent=2)
  1235. def _build_concrete_element_support_prompt(
  1236. self, post_content: str, elements_text: str, points_text: str
  1237. ) -> str:
  1238. """构建具体元素的意图支撑判断 prompt(基于视频画面)"""
  1239. return f"""# 原文内容
  1240. {post_content}
  1241. # 具体元素列表
  1242. {elements_text}
  1243. # 点列表
  1244. {points_text}
  1245. # 任务
  1246. 判断每个**具体元素**是否对点有关键支撑
  1247. ## 具体元素定义(重要!)
  1248. - 定义:视频画面中直接观察到的单一视觉实体对象
  1249. - 判断标准:可以指着画面说"这是一个X"
  1250. - 剥离测试:去掉表达方式后,该视觉实体仍然存在
  1251. ## 核心判断原则:仅基于视频画面语境
  1252. ### 关键约束
  1253. 1. 只看视频画面:具体元素的支撑判断**只能基于视频中的视觉实体**,不能基于文字论述
  1254. 2. 视觉实体角色:该视觉实体在视频画面中的作用是什么?
  1255. - ✅ 核心展示对象:该视觉实体是画面的核心展示内容
  1256. - ❌ 辅助/装饰:该视觉实体只是背景、装饰、示意
  1257. 3. 关键支撑:该视觉实体对点的表达是否关键?去掉它是否会明显削弱点的支撑?
  1258. ### 判断流程
  1259. 1. 理解点的意图,点想表达什么
  1260. 2. 在视频画面中找到该视觉实体
  1261. 3. 判断:去掉该视觉实体,是否无法完整表达点
  1262. - 如果是,支撑
  1263. - 如果不是,不支撑
  1264. ### 严格标准
  1265. - 禁止使用文字内容来判断具体元素的支撑
  1266. - 禁止仅凭名称字面匹配判定支撑
  1267. - 必须基于该视觉实体在画面中的实际角色
  1268. # 输出(JSON)
  1269. 只输出有关键支撑的元素-点对,不支撑的不输出
  1270. [
  1271. {{
  1272. "id": "元素id",
  1273. "名称": "元素名称",
  1274. "支撑结果": [
  1275. {{
  1276. "点": "点的名称",
  1277. "点的意图": "点想表达什么",
  1278. "支撑理由": "说明为什么去掉该视觉实体,会削弱点的表达,程度达到30%以上"
  1279. }}
  1280. ]
  1281. }}
  1282. ]
  1283. 注意:
  1284. 1. 只基于视频画面判断
  1285. 2. 只输出"关键支撑"的元素-点对
  1286. 3. 辅助/装饰元素直接排除,不输出
  1287. 4. 必须基于视频画面中的视觉实体判断,不能做字面匹配"""
  1288. def _build_concrete_concept_support_prompt(
  1289. self, post_content: str, elements_text: str, points_text: str
  1290. ) -> str:
  1291. """构建具象概念的意图支撑判断 prompt(基于文字语境)"""
  1292. return f"""# 原文内容
  1293. {post_content}
  1294. # 具象概念列表
  1295. {elements_text}
  1296. # 点列表
  1297. {points_text}
  1298. # 任务
  1299. 判断每个**具象概念**是否对点有关键支撑
  1300. ## 具象概念定义(重要!)
  1301. - 定义:文字中字面出现的名词(包括标题、正文、字幕、视频画面中的文字)
  1302. - 判断标准:文字中实际出现,禁止语义推导
  1303. ## 核心判断原则:仅基于文字语境(包含视频中的文字)
  1304. ### 关键约束
  1305. 1. 只看文字:具象概念的支撑判断**只能基于文字中的概念论述**,不能基于视频中的视觉实体
  1306. 2. 概念角色:该概念在文字论述中的作用是什么?
  1307. - ✅ 核心论述概念:该概念是文字论述的核心对象、关键主题
  1308. - ❌ 次要提及:该概念只是顺带提及、举例说明
  1309. 3. 关键支撑:该概念对点的表达是否关键?去掉它是否会明显削弱点的支撑?
  1310. ### 判断流程
  1311. 1. 理解点的意图,点想表达什么
  1312. 2. 在标题、正文、字幕、画面文字中找到该概念出现的位置
  1313. 3. 判断:去掉该段文字,是否无法完整表达点
  1314. - 如果是,支撑
  1315. - 如果不是,不支撑
  1316. ### 严格标准
  1317. - 禁止用视频画面中的视觉实体来判断具象概念的支撑
  1318. - 禁止仅凭名称字面匹配判定支撑
  1319. - 必须判断该概念在文字论述中的实际角色
  1320. # 输出(JSON)
  1321. 只输出有关键支撑的元素-点对,不支撑的不输出
  1322. [
  1323. {{
  1324. "id": "元素id",
  1325. "名称": "元素名称",
  1326. "支撑结果": [
  1327. {{
  1328. "点": "点的名称",
  1329. "点的意图": "点想表达什么",
  1330. "支撑理由": "说明为什么去掉该概念,会削弱点的表达,程度达到30%以上"
  1331. }}
  1332. ]
  1333. }}
  1334. ]
  1335. 注意:
  1336. 1. 只基于文字判断
  1337. 2. 只输出"关键支撑"的元素-点对
  1338. 3. 次要提及的概念直接排除,不输出
  1339. 4. 必须基于文字中的概念论述判断,不能做字面匹配"""
  1340. def _build_abstract_concept_support_prompt(
  1341. self, post_content: str, elements_text: str, points_text: str
  1342. ) -> str:
  1343. """构建抽象概念的意图支撑判断 prompt"""
  1344. return f"""# 原文内容
  1345. {post_content}
  1346. # 抽象概念列表
  1347. {elements_text}
  1348. # 点列表
  1349. {points_text}
  1350. # 任务
  1351. 判断每个**抽象概念**是否对点有关键支撑
  1352. ## 抽象概念定义(重要!)
  1353. - 定义:从具体元素和具象概念中理解到的上位抽象
  1354. - 类型1-上位抽象(归类):是下位元素的类别、分类
  1355. - 类型2-引申含义:需要理解上下文的深层含义
  1356. - 剥离测试:去掉表达方式后,该抽象概念仍然存在
  1357. ## 核心判断原则:基于来源语境
  1358. ### 关键约束
  1359. 1. 追溯来源:抽象概念来源于具体元素和/或具象概念,必须追溯到来源
  1360. 2. 继承语境:抽象概念的语境继承自其来源
  1361. - 如果来源主要是具体元素 → 语境偏向视频画面
  1362. - 如果来源主要是具象概念 → 语境偏向文字
  1363. - 如果来源混合 → 综合判断
  1364. 3. 关键支撑:该抽象概念对点的表达是否关键?
  1365. ### 判断流程
  1366. 1. 理解点的意图:点想表达什么?
  1367. 2. 根据来源确定该抽象概念的主要语境
  1368. 3. 判断:去掉该抽象概念,是否无法完整表达点
  1369. - 如果是,支撑
  1370. - 如果不是,不支撑
  1371. ### 严格标准
  1372. - 必须基于来源的语境来判断
  1373. - 禁止仅凭名称字面匹配判定支撑
  1374. - 必须能够追溯到来源元素,验证支撑关系
  1375. # 输出(JSON)
  1376. 只输出有关键支撑的元素-点对,不支撑的不输出
  1377. [
  1378. {{
  1379. "id": "元素id",
  1380. "名称": "元素名称",
  1381. "支撑结果": [
  1382. {{
  1383. "点": "点的名称",
  1384. "来源追溯": "该抽象概念的来源(具体元素/具象概念)及其语境",
  1385. "语境分析": "基于来源确定的语境(画面/文字/混合)",
  1386. "支撑理由": "说明该抽象概念为什么对该点有关键支撑"
  1387. }}
  1388. ]
  1389. }}
  1390. ]
  1391. 注意:
  1392. 1. 必须追溯到来源元素
  1393. 2. 必须继承来源的语境来判断
  1394. 3. 只输出"关键支撑"的元素-点对
  1395. 4. 禁止字面匹配"""
  1396. # 隐含概念相关的意图支撑判断已移除
  1397. def _count_sections(self, section_division: dict) -> int:
  1398. """统计段落总数(只统计叶子节点)"""
  1399. if not section_division:
  1400. return 0
  1401. sections = section_division.get("段落列表", [])
  1402. if not sections:
  1403. return 0
  1404. def count_leaf_nodes(section_list):
  1405. count = 0
  1406. for section in section_list:
  1407. children = section.get("子项", [])
  1408. if children:
  1409. count += count_leaf_nodes(children)
  1410. else:
  1411. count += 1
  1412. return count
  1413. return count_leaf_nodes(sections)
  1414. def _build_messages(self, state: dict) -> List[dict]:
  1415. """构建消息 - 本Agent不使用此方法"""
  1416. return []
  1417. def _update_state(self, state: dict, response) -> dict:
  1418. """更新状态 - 本Agent不使用此方法"""
  1419. return state