script_substance_extraction_agent.py 61 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. 实质提取Agent (SubstanceExtractionAgent)
  5. 功能:
  6. - 从视频内容中提取实质元素(具体元素、具象概念、抽象概念)
  7. - Step 1: 提取具体元素(只看视频画面中的实体)
  8. - Step 2: 提取具象概念(只看画面中的文字 + 口播内容中的文字)
  9. - Step 3: 总结抽象概念(基于Step 1+2)
  10. - Step 4: 共性分析(频次、段落覆盖率)
  11. - Step 5: 多维度评分(vs 灵感点/目的点/实质关键点)
  12. - Step 6: 筛选(基于频次+覆盖率+相似度)
  13. - Step 7: 分类
  14. - Step 8: 合并所有信息
  15. 参考:元素提取新方案设计文档.md
  16. """
  17. import json
  18. from typing import List, Dict, Any
  19. from concurrent.futures import ThreadPoolExecutor
  20. from src.components.agents.base import BaseLLMAgent
  21. from src.utils.logger import get_logger
  22. from src.utils.llm_invoker import LLMInvoker, get_video_file_from_state
  23. logger = get_logger(__name__)
  24. # 全局线程池 - 用于并行处理
  25. _GLOBAL_THREAD_POOL = ThreadPoolExecutor(max_workers=16, thread_name_prefix="SubstanceExtraction")
  26. class ScriptSubstanceExtractionAgent(BaseLLMAgent):
  27. """实质提取Agent - 自底向上的归纳过程
  28. 提取流程(视频版):
  29. - Step 1: 提取具体元素(只看视频画面中的实体)
  30. - Step 2: 提取具象概念(只看画面中的文字 + 口播内容中的文字)
  31. - Step 3: 总结抽象概念(基于Step 1+2)
  32. - Step 4: 共性分析(频次、段落覆盖率)
  33. - Step 5: 多维度评分(vs 灵感点/目的点/实质关键点)
  34. - Step 6: 筛选(基于频次+覆盖率+相似度)
  35. - Step 7: 分类
  36. - Step 8: 合并所有信息
  37. """
  38. def __init__(
  39. self,
  40. name: str = "substance_extraction_agent",
  41. description: str = "实质提取Agent",
  42. model_provider: str = "google_genai",
  43. temperature: float = 0.1,
  44. max_tokens: int = 40960
  45. ):
  46. system_prompt = self._build_system_prompt()
  47. super().__init__(
  48. name=name,
  49. description=description,
  50. model_provider=model_provider,
  51. system_prompt=system_prompt,
  52. temperature=temperature,
  53. max_tokens=max_tokens
  54. )
  55. def _build_system_prompt(self) -> str:
  56. """构建系统提示词"""
  57. return """你是一个专业的内容分析专家,擅长从图文内容中提取实质性元素。
  58. # 核心定义
  59. ## 实质(Substance):"是什么"
  60. 内容本身,独立于表达方式而存在
  61. ### 具体元素
  62. - 定义:从图像中直接观察到的单一视觉实体对象
  63. - 判断标准:可以指着图片说"这是一个X"
  64. ### 具象概念
  65. - 定义:画面或者口播内容中出现的名词
  66. - 判断标准:画面或者口播内容中实际出现,禁止语义推导
  67. ### 抽象概念
  68. - 定义:从具体元素和具象概念中理解到的上位抽象
  69. - 类型1:上位抽象(归类)- 是下位元素的类别、分类
  70. - 类型2:引申含义 - 需要理解上下文的深层含义
  71. ## 区分方法:"剥离测试"
  72. 问题:如果去掉所有表达手法/风格/技巧,这个特征还存在吗?
  73. - 存在 → 实质(内容本身)
  74. - 不存在/失去意义 → 形式(表达方式)
  75. """
  76. def process(self, state: dict) -> dict:
  77. """执行完整的实质提取流程(Step 1-8)"""
  78. logger.info("=== 开始实质元素提取(完整流程) ===")
  79. # 从 state 中获取视频文件(视频版本核心输入)
  80. video_file = get_video_file_from_state(state)
  81. if not video_file:
  82. logger.error("无法从 state 中获取视频文件,实质提取终止")
  83. return {
  84. "concrete_elements": [],
  85. "concrete_concepts": [],
  86. "implicit_concepts": [],
  87. "abstract_concepts": [],
  88. "substance_elements": [],
  89. "substance_analyzed_result": [],
  90. "substance_scored_result": {},
  91. "substance_filtered_ids": [],
  92. "substance_categorized_result": {},
  93. "substance_final_elements": []
  94. }
  95. # 从state中提取其他文本/上下文数据
  96. text_data = state.get("text", {})
  97. section_division = state.get("section_division", {})
  98. # 处理灵感点:支持列表和字典两种格式
  99. inspiration_points_raw = state.get("inspiration_points", {})
  100. if isinstance(inspiration_points_raw, list):
  101. inspiration_points = inspiration_points_raw
  102. elif isinstance(inspiration_points_raw, dict):
  103. # 兼容旧格式:{"points": [...]} 或直接是列表
  104. inspiration_points = inspiration_points_raw.get("points", [])
  105. else:
  106. inspiration_points = []
  107. # 兼容 purpose_point 的多种格式:
  108. # 1. 字典格式:{"purpose_point": {"purposes": [...], "total_count": ...}}
  109. # 2. 列表格式:[...](直接是目的点列表)
  110. purpose_point_raw = state.get("purpose_point", {})
  111. if isinstance(purpose_point_raw, dict):
  112. purpose_points = purpose_point_raw.get("purposes", [])
  113. elif isinstance(purpose_point_raw, list):
  114. purpose_points = purpose_point_raw
  115. else:
  116. purpose_points = []
  117. # 兼容 key_points 的多种格式:
  118. # 1. 字典格式:{"key_points": [...], "total_count": ...}
  119. # 2. 列表格式:[...](直接是关键点列表)
  120. key_points_raw = state.get("key_points", {})
  121. if isinstance(key_points_raw, dict):
  122. key_points = key_points_raw.get("key_points", [])
  123. elif isinstance(key_points_raw, list):
  124. key_points = key_points_raw
  125. else:
  126. key_points = []
  127. # 只保留实质类关键点
  128. substance_key_points = [
  129. kp for kp in key_points
  130. if isinstance(kp, dict) and kp.get("维度大类") == "实质"
  131. ] if key_points else []
  132. logger.info(
  133. f"意图支撑评估输入: 灵感点={len(inspiration_points)}, "
  134. f"目的点={len(purpose_points)}, 关键点(实质类)={len(substance_key_points)}"
  135. )
  136. # Step 1: 提取具体元素(画面中的实体)
  137. logger.info("▶ Step 1: 提取具体元素")
  138. concrete_elements = self._step1_extract_concrete_elements(video_file)
  139. # Step 2: 提取具象概念(画面中的文字 + 口播内容中的文字)
  140. logger.info("▶ Step 2: 提取具象概念")
  141. concrete_concepts = self._step2_extract_concrete_concepts(
  142. video_file, text_data, concrete_elements
  143. )
  144. # 隐含概念相关逻辑已移除,不再单独提取
  145. implicit_concepts: List[dict] = []
  146. # Step 3: 总结抽象概念(基于Step 1+2)
  147. logger.info("▶ Step 3: 总结抽象概念")
  148. abstract_concepts = self._step3_summarize_abstract_concepts(
  149. video_file, concrete_elements, concrete_concepts, implicit_concepts
  150. )
  151. # 合并所有实质元素(不再包含隐含概念)
  152. all_substance_elements = (
  153. concrete_elements + concrete_concepts + abstract_concepts
  154. )
  155. logger.info(
  156. "Step 1-3 完成 - 总计: %d 个元素 (具体:%d, 具象:%d, 抽象:%d)",
  157. len(all_substance_elements),
  158. len(concrete_elements),
  159. len(concrete_concepts),
  160. len(abstract_concepts),
  161. )
  162. # Step 4: 共性分析
  163. logger.info("▶ Step 4: 共性分析")
  164. analyzed_result = self._step4_commonality_analysis(
  165. video_file, all_substance_elements, text_data, section_division
  166. )
  167. # Step 5: 多维度评分(已废弃相似度比较逻辑,当前不再进行相似度评分)
  168. logger.info("▶ Step 5: 多维度评分(已停用相似度计算,仅返回空结果)")
  169. scored_result = self._step5_multi_dimensional_scoring(
  170. all_substance_elements, analyzed_result, inspiration_points, purpose_points, substance_key_points
  171. )
  172. # Step 5.1: 意图支撑评估(基于视频与文本)
  173. logger.info("▶ Step 5.1: 意图支撑评估")
  174. intention_support_result = self._step5_1_intention_support_evaluation(
  175. video_file,
  176. all_substance_elements,
  177. analyzed_result,
  178. inspiration_points,
  179. purpose_points,
  180. substance_key_points,
  181. text_data,
  182. )
  183. # Step 6: 筛选
  184. logger.info("▶ Step 6: 筛选")
  185. filtered_ids = self._step6_filter_elements(
  186. analyzed_result,
  187. scored_result,
  188. intention_support_result,
  189. )
  190. # Step 7: 分类
  191. logger.info("▶ Step 7: 分类")
  192. categorized_result = self._step7_categorize_elements(
  193. all_substance_elements, filtered_ids
  194. )
  195. # Step 8: 合并信息
  196. logger.info("▶ Step 8: 合并信息")
  197. final_elements = self._merge_all_info(
  198. all_substance_elements,
  199. analyzed_result,
  200. scored_result,
  201. intention_support_result,
  202. filtered_ids,
  203. categorized_result,
  204. )
  205. logger.info(f"实质元素提取完成 - 最终元素数: {len(final_elements)}")
  206. # 返回所有结果
  207. return {
  208. # Step 1-3 原始提取结果
  209. "concrete_elements": concrete_elements,
  210. "concrete_concepts": concrete_concepts,
  211. "implicit_concepts": implicit_concepts,
  212. "abstract_concepts": abstract_concepts,
  213. "substance_elements": all_substance_elements,
  214. # Step 4-8 处理结果
  215. "substance_analyzed_result": analyzed_result,
  216. "substance_scored_result": scored_result,
  217. "substance_intention_support_result": intention_support_result,
  218. "substance_filtered_ids": filtered_ids,
  219. "substance_categorized_result": categorized_result,
  220. # 最终结果
  221. "substance_final_elements": final_elements
  222. }
  223. # ========== Step 1-3: 实质提取 ==========
  224. def _step1_extract_concrete_elements(
  225. self,
  226. video_file
  227. ) -> List[dict]:
  228. """Step 1: 提取具体元素 - 从图像中直接观察到的单一视觉实体对象"""
  229. if not self.is_initialized:
  230. self.initialize()
  231. if not video_file:
  232. logger.warning("⚠️ 没有视频文件,跳过具体元素提取")
  233. return []
  234. prompt = """# 任务
  235. 从视频中提取"具体元素"
  236. # 核心定义
  237. ## 具体元素
  238. - **定义**:
  239. -- 1.从视频画面中直接观察到的、可独立存在的**单一视觉实体对象**
  240. -- 2.视频的背景音乐、音效等非口播内容的声音
  241. - **判断标准**:
  242. -- 1.可以指着画面说"这是一个X"(单一、具体、可见的实体)
  243. -- 2.有背景音乐、音效等非口播内容的声音,直接用"背景音乐/音效声"作为名称即可,不要重复提取
  244. - **示例**:
  245. -- 1.胡萝卜、青椒、西兰花(每个都是单独的实体)
  246. -- 2.背景音乐/音效声
  247. - **禁止**:
  248. - 归类词(蔬菜、水果)
  249. - 概念性名词(食物、植物、人)
  250. - 文字内容(只关注视觉实体)
  251. ## 提取原则(仅针对画面中的视觉实体对象)
  252. - 只从视频画面中提取,不关注文字
  253. - 每个元素必须是单一的、具体的视觉实体
  254. - 使用"剥离测试":去掉表达方式后,这个实体仍然存在
  255. # 命名规范
  256. - 原子性:单一原子名词,不可再拆分
  257. - 名词性:纯名词,严禁形容词、动词、副词
  258. - 具体性:直接指向可观察的实体
  259. # 输出json结构
  260. [
  261. {
  262. "id": "从1开始的自增序列",
  263. "名称": "单一原子名词",
  264. "描述": "说明这个元素是什么,外观特征",
  265. "维度": {"一级": "实质", "二级": "具体元素"},
  266. "来源": ["视频画面"],
  267. "推理": "为什么识别这个具体元素"
  268. },
  269. {
  270. "id": "从1开始的自增序列",
  271. "名称": "背景音乐/音效声",
  272. "描述": "说明背景音乐/音效声是什么",
  273. "维度": {"一级": "实质", "二级": "具体元素"},
  274. "来源": ["视频"],
  275. "推理": "为什么识别这个背景音乐/音效声"
  276. }
  277. ]
  278. 注意:只提取具体的视觉实体对象,不要提取抽象概念或归类词
  279. """
  280. # 使用视频分析接口
  281. result = LLMInvoker.safe_invoke_video_analysis(
  282. operation_name="具体元素提取",
  283. video_file=video_file,
  284. prompt=prompt,
  285. agent=self,
  286. fallback=[]
  287. )
  288. # 为每个具体元素添加id
  289. for idx, element in enumerate(result, 1):
  290. element["id"] = f"具体元素-{idx}"
  291. return result
  292. def _step2_extract_concrete_concepts(
  293. self,
  294. video_file,
  295. text_data: dict,
  296. concrete_elements: List[dict],
  297. ) -> List[dict]:
  298. """Step 2: 提取具象概念 - 文字中字面出现的名词"""
  299. if not self.is_initialized:
  300. self.initialize()
  301. # 从第一步结果中提取已识别的具体元素名称,供本步骤排除使用
  302. element_names = [
  303. e.get("名称") for e in (concrete_elements or []) if e.get("名称")
  304. ]
  305. element_names_text = (
  306. json.dumps(element_names, ensure_ascii=False, indent=2)
  307. if element_names
  308. else "[]"
  309. )
  310. prompt = f"""# 任务
  311. 从视频中提取"具象概念"
  312. # 核心定义
  313. ## 具象概念
  314. - **定义**:视频画面内的文字或者口播内容中明确提到的完整名词
  315. ## 排除的名称(来自第一步,仅用于排除)
  316. **禁止提取的名称**:{element_names_text}
  317. ## 判断标准
  318. - **视频画面内的文字或者口播内容**中实际出现的**完整名词**
  319. - **不能是视频画面中出现的元素的名称等归类词**
  320. - 去掉表达方式后,这个概念仍然存在
  321. # 约束
  322. - 禁止通过语义推导、联想、理解得出的名词
  323. - **禁止归类词(蔬菜、水果、人等)**
  324. - **禁止使用第一步中已提取的具体元素名称**
  325. - 禁止拆分复合词
  326. - 禁止提取形容词、动词
  327. - 禁止提取谓语、定语、状语、补语
  328. - 禁止提取副词
  329. ## 提取原则
  330. - **词语完整性**:必须提取完整的**名词**,不允许拆分复合词
  331. - **严格约束**:必须是**画面文字或者口播内容中实际出现**的完整名词
  332. - **严格的名词验证**(必须同时满足以下两个条件):
  333. - 条件1:词性是名词(词典意义上的名词)
  334. - 条件2:在当前上下文中作为名词使用(语境判断)
  335. **验证方法**:
  336. - 找到该词在视频画面内的文字或者口播内容中的具体位置
  337. - 分析该词在句子中的语法成分和实际作用
  338. - 判断:该词是否在这个语境中充当"事物/对象/概念"的角色?
  339. # 输出json结构
  340. [
  341. {{
  342. "id": "从1开始的自增序列",
  343. "名称": "字面原词(完整名词)",
  344. "描述": "说明这个概念是什么",
  345. "维度": {{"一级": "实质", "二级": "具象概念"}},
  346. "来源": "HH:MM:SS",
  347. "上下文验证": {{
  348. "原文位置": "该词在原视频画面内的文字或者口播内容中的具体句子",
  349. "语法成分": "该词在句子中的语法成分(主语/宾语/定语中心语等)",
  350. "语境判断": "说明该词在此语境中确实作为名词使用的理由"
  351. }},
  352. "推理": "为什么这个名词被认为是具象概念"
  353. }}
  354. ]
  355. 注意:只输出同时满足"词性是名词"和"上下文中作为名词使用"两个条件的概念
  356. """
  357. # 使用视频分析接口(可综合语音与画面中的文字)
  358. result = LLMInvoker.safe_invoke_video_analysis(
  359. operation_name="具象概念提取",
  360. video_file=video_file,
  361. prompt=prompt,
  362. agent=self,
  363. fallback=[]
  364. )
  365. # 为每个具象概念添加id
  366. for idx, concept in enumerate(result, 1):
  367. concept["id"] = f"具象概念-{idx}"
  368. return result
  369. def _step3_summarize_abstract_concepts(
  370. self,
  371. video_file,
  372. concrete_elements: List[dict],
  373. concrete_concepts: List[dict],
  374. implicit_concepts: List[dict]
  375. ) -> List[dict]:
  376. """Step 3: 总结抽象概念 - 从具体元素和具象概念中归纳上位抽象"""
  377. if not self.is_initialized:
  378. self.initialize()
  379. if not concrete_elements and not concrete_concepts:
  380. logger.warning("⚠️ 没有具体元素或具象概念,跳过抽象概念提取")
  381. return []
  382. # 构建已提取的元素文本
  383. elements_text = json.dumps([
  384. {"id": e.get("id"), "名称": e.get("名称"), "描述": e.get("描述")}
  385. for e in concrete_elements
  386. ], ensure_ascii=False, indent=2) if concrete_elements else "无"
  387. concepts_text = json.dumps([
  388. {"id": c.get("id"), "名称": c.get("名称"), "描述": c.get("描述")}
  389. for c in concrete_concepts
  390. ], ensure_ascii=False, indent=2) if concrete_concepts else "无"
  391. prompt = f"""# 任务
  392. 基于已提取的具体元素和具象概念,总结新的"抽象概念"
  393. # 已提取的具体元素
  394. {elements_text}
  395. # 已提取的具象概念
  396. {concepts_text}
  397. # 核心定义
  398. # 定义与分类
  399. **抽象概念**分两类:
  400. **类型1-上位抽象**:对具体元素/具象概念的归类
  401. **类型2-引申含义**:具体元素/具象概念无法直接表达的深层含义
  402. # 提取原则
  403. - 对具体元素/具象概念的归类
  404. - 具体元素和具象概念无法直接表达的深层含义
  405. - 基于归纳:基于已提取的具体元素/具象概念
  406. - 来源追溯:准确标明所有来源ID(具体元素ID、具象概念ID),必须完整可追溯
  407. # 命名规范
  408. - 有完整独立语义的概念
  409. - 单一原子名词,不可拆分
  410. - 纯名词,禁止形容词、动词、副词
  411. - 精准描述概念,不做修饰
  412. # 判断标准
  413. - 去掉表达方式后,概念仍存在
  414. # 输出json结构
  415. [
  416. {{
  417. "id": "从1开始的自增序列",
  418. "名称": "单一名词或短语",
  419. "描述": "说明这个抽象概念是什么",
  420. "维度": {{"一级": "实质", "二级": "抽象概念"}},
  421. "类型": "上位抽象 | 引申含义",
  422. "来源": {{
  423. "具体元素": [{{"id":"具体元素-X", "名称":"具体元素-X的名称"}}, {{"id":"具体元素-Y", "名称":"具体元素-Y的名称"}}],
  424. "具象概念": [{{"id":"具象概念-A", "名称":"具象概念-A的名称"}}, {{"id":"具象概念-B", "名称":"具象概念-B的名称"}}]
  425. }},
  426. "推理过程": "明确说明如何从上述来源(具体哪些元素ID和概念ID)推导出这个抽象概念",
  427. }}
  428. ]
  429. 注意:只输出验证全部通过的概念
  430. """
  431. # 使用视频分析接口总结抽象概念
  432. result = LLMInvoker.safe_invoke_video_analysis(
  433. operation_name="抽象概念总结",
  434. video_file=video_file,
  435. prompt=prompt,
  436. agent=self,
  437. fallback=[]
  438. )
  439. # 为每个抽象概念添加id
  440. for idx, concept in enumerate(result, 1):
  441. concept["id"] = f"抽象概念-{idx}"
  442. return result
  443. # ========== Step 4-8: 后续处理 ==========
  444. def _step4_commonality_analysis(
  445. self,
  446. video_file,
  447. substance_elements: List[dict],
  448. text_data: dict,
  449. section_division: dict
  450. ) -> List[dict]:
  451. """Step 4: 共性分析 - 统计频次和段落覆盖率"""
  452. if not substance_elements:
  453. return []
  454. total_sections = self._count_sections(section_division)
  455. # 分批处理
  456. analyzed_items = self._commonality_analysis_in_batches(
  457. video_file, substance_elements, text_data, section_division, total_sections,
  458. max_batch_size=100
  459. )
  460. return analyzed_items
  461. def _commonality_analysis_in_batches(
  462. self,
  463. video_file,
  464. substance_elements: list,
  465. text_data: dict,
  466. section_division: dict,
  467. total_sections: int,
  468. max_batch_size: int = 100
  469. ) -> list:
  470. """分批处理共性分析"""
  471. if not self.is_initialized:
  472. self.initialize()
  473. num_elements = len(substance_elements)
  474. if num_elements == 0:
  475. return []
  476. # 如果元素数少于批次大小,一次性处理
  477. if num_elements <= max_batch_size:
  478. return self._commonality_analysis_single_batch(
  479. video_file, substance_elements, text_data, section_division, total_sections
  480. )
  481. # 分批处理
  482. num_batches = (num_elements + max_batch_size - 1) // max_batch_size
  483. batch_futures = {}
  484. for batch_idx in range(num_batches):
  485. start_idx = batch_idx * max_batch_size
  486. end_idx = min(start_idx + max_batch_size, num_elements)
  487. batch_elements = substance_elements[start_idx:end_idx]
  488. future = _GLOBAL_THREAD_POOL.submit(
  489. self._commonality_analysis_single_batch,
  490. video_file, batch_elements, text_data, section_division, total_sections
  491. )
  492. batch_futures[batch_idx] = future
  493. # 收集结果
  494. all_results = []
  495. for batch_idx, future in batch_futures.items():
  496. try:
  497. batch_result = future.result()
  498. if batch_result:
  499. all_results.extend(batch_result)
  500. except Exception as e:
  501. logger.error(f"批次 {batch_idx + 1} 失败: {e}")
  502. return all_results
  503. def _commonality_analysis_single_batch(
  504. self,
  505. video_file,
  506. batch_elements: list,
  507. text_data: dict,
  508. section_division: dict,
  509. total_sections: int
  510. ) -> list:
  511. """单批次共性分析"""
  512. if not self.is_initialized:
  513. self.initialize()
  514. section_text = self._build_section_text(section_division)
  515. elements_text = self._build_simple_items_text(batch_elements)
  516. prompt = f"""# 段落列表
  517. {section_text}
  518. # 元素列表
  519. {elements_text}
  520. # 任务
  521. 对每个元素统计出现的段落和频次
  522. ## 统计规则
  523. ### 1. 具体元素统计(只统计视觉实体)
  524. - **出现频次**: 统计该**单一视觉实体对象**在视频图像中直接观察到的次数
  525. - **出现段落列表**: 只统计能在视频图像中**直接看到该视觉实体**的段落
  526. ### 2. 具象概念统计(只统计文字字面)
  527. - **出现频次**: 统计该名词在视频画面文字和口播内容中**画面或者口播内容中出现**的次数
  528. - **出现段落列表**: 只统计**视频画面文字或者口播内容中包含该名词**的段落
  529. ### 3. 抽象概念统计(统计语义归类)
  530. - **出现频次**: 统计该概念被**隐含表达**的总次数
  531. - **出现段落列表**: 统计**包含该概念所归类的具体元素/具象概念**的段落
  532. # 输出(JSON)
  533. [
  534. {{
  535. "id": "元素id",
  536. "名称": "元素名称",
  537. "出现频次": 0,
  538. "出现段落列表": [
  539. {{
  540. "段落ID": "段落id",
  541. "如何体现": "描述该元素在这个段落中的具体体现方式"
  542. }}
  543. ]
  544. }}
  545. ]
  546. """
  547. # 使用视频分析接口做共性分析
  548. llm_result = LLMInvoker.safe_invoke_video_analysis(
  549. operation_name="共性分析",
  550. video_file=video_file,
  551. prompt=prompt,
  552. agent=self,
  553. fallback=[]
  554. )
  555. # 计算覆盖率
  556. analyzed_items = []
  557. for analysis in llm_result:
  558. section_list = analysis.get("出现段落列表", [])
  559. unique_paragraph_ids = set()
  560. for item in section_list:
  561. unique_paragraph_ids.add(item.get("段落ID", ""))
  562. coverage_count = len(unique_paragraph_ids)
  563. coverage_rate = round(coverage_count / total_sections, 4) if total_sections > 0 else 0
  564. analyzed_items.append({
  565. "id": analysis.get("id", 0),
  566. "名称": analysis.get("名称", ""),
  567. "出现频次": analysis.get("出现频次", 0),
  568. "出现段落列表": section_list,
  569. "出现段落数": coverage_count,
  570. "段落覆盖率": coverage_rate
  571. })
  572. return analyzed_items
  573. def _step5_multi_dimensional_scoring(
  574. self,
  575. substance_elements: List[dict],
  576. analyzed_result: list,
  577. inspiration_points: dict,
  578. purpose_points: list,
  579. substance_key_points: list
  580. ) -> dict:
  581. """Step 5: 多维度评分(相似度逻辑已废弃)
  582. 说明:
  583. - 不再进行任何相似度计算,完全依赖后续的“意图支撑”进行筛选
  584. - 保留函数与返回结构,仅返回空结果,避免下游依赖崩溃
  585. """
  586. logger.info(
  587. "【多维度评分】相似度比较逻辑已关闭,当前不进行评分,仅返回空结果。"
  588. )
  589. return {
  590. "灵感点": [],
  591. "目的点": [],
  592. "关键点": [],
  593. }
  594. def _step5_1_intention_support_evaluation(
  595. self,
  596. video_file,
  597. substance_elements: List[dict],
  598. analyzed_result: list,
  599. inspiration_points: dict,
  600. purpose_points: list,
  601. substance_key_points: list,
  602. text_data: dict,
  603. ) -> dict:
  604. """Step 5.1: 意图支撑评估
  605. 说明:
  606. - 在保留相似度评分的基础上,增加一套“意图支撑”视角的评估
  607. - 不再使用频次/覆盖率作为筛选条件,仅用于日志与统计
  608. - 结果以元素-点的支撑关系形式返回,不直接参与筛选决策
  609. """
  610. if not substance_elements:
  611. return {"灵感点": [], "目的点": [], "关键点": []}
  612. logger.info(f"【意图支撑评估】输入: {len(substance_elements)} 个实质元素")
  613. # 按二级维度分组(不做频次过滤,全部评估)
  614. dimension_groups = {
  615. "具体元素": [],
  616. "具象概念": [],
  617. "抽象概念": [],
  618. }
  619. for elem in substance_elements:
  620. second_level = elem.get("维度", {}).get("二级", "")
  621. if second_level in dimension_groups:
  622. dimension_groups[second_level].append(elem)
  623. logger.info(
  624. "维度分组(意图支撑): 具体元素=%d, 具象概念=%d, 抽象概念=%d",
  625. len(dimension_groups["具体元素"]),
  626. len(dimension_groups["具象概念"]),
  627. len(dimension_groups["抽象概念"]),
  628. )
  629. # 并行评估(各维度 x 3个点类型)
  630. futures = {}
  631. def submit_if_needed(dimension_name: str, point_type: str, points_list):
  632. if not points_list:
  633. logger.info(
  634. f"⏭️ 跳过意图支撑评估: {dimension_name}-{point_type} "
  635. f"(原因: 点列表为空, len={len(points_list) if isinstance(points_list, list) else 'N/A'})"
  636. )
  637. return
  638. if not dimension_groups.get(dimension_name):
  639. logger.info(
  640. f"⏭️ 跳过意图支撑评估: {dimension_name}-{point_type} "
  641. f"(原因: 该维度无元素, len={len(dimension_groups.get(dimension_name, []))})"
  642. )
  643. return
  644. key = (dimension_name, point_type)
  645. logger.info(
  646. f"📤 提交意图支撑评估任务: {dimension_name}-{point_type} "
  647. f"(元素数={len(dimension_groups[dimension_name])}, 点数={len(points_list)})"
  648. )
  649. futures[key] = _GLOBAL_THREAD_POOL.submit(
  650. self._evaluate_support_by_dimension,
  651. video_file,
  652. dimension_name,
  653. dimension_groups[dimension_name],
  654. points_list,
  655. point_type,
  656. text_data,
  657. )
  658. # 具体元素 / 具象概念 / 抽象概念 × 灵感点 / 目的点 / 关键点
  659. for dim in ["具体元素", "具象概念", "抽象概念"]:
  660. submit_if_needed(dim, "灵感点", inspiration_points if isinstance(inspiration_points, list) else [])
  661. submit_if_needed(dim, "目的点", purpose_points if isinstance(purpose_points, list) else [])
  662. submit_if_needed(dim, "关键点", substance_key_points if isinstance(substance_key_points, list) else [])
  663. # 收集结果(按点类型汇总)
  664. result = {
  665. "灵感点": [],
  666. "目的点": [],
  667. "关键点": [],
  668. }
  669. for (dimension_name, point_type), future in futures.items():
  670. try:
  671. dimension_result = future.result()
  672. if dimension_result:
  673. result[point_type].extend(dimension_result)
  674. logger.info(
  675. f"✅ 意图支撑-{dimension_name}-{point_type} 评估完成: {len(dimension_result)} 条支撑关系"
  676. )
  677. except Exception as e:
  678. logger.error(f"❌ 意图支撑-{dimension_name}-{point_type} 评估失败: {e}")
  679. return result
  680. def _evaluate_support_by_dimension(
  681. self,
  682. video_file,
  683. dimension_name: str,
  684. elements: list,
  685. points: list,
  686. point_type: str,
  687. text_data: dict,
  688. ) -> list:
  689. """按维度评估意图支撑关系(分批处理)"""
  690. if not self.is_initialized:
  691. self.initialize()
  692. if not elements or not points:
  693. return []
  694. # 分批控制:元素数 × 点数 ≈ 100 以内
  695. num_elements = len(elements)
  696. num_points = len(points)
  697. max_batch_product = 100
  698. max_elements_per_batch = max(1, int(max_batch_product / max(1, num_points)))
  699. num_batches = (num_elements + max_elements_per_batch - 1) // max_elements_per_batch
  700. batch_futures = {}
  701. for batch_idx in range(num_batches):
  702. start_idx = batch_idx * max_elements_per_batch
  703. end_idx = min(start_idx + max_elements_per_batch, num_elements)
  704. batch_elements = elements[start_idx:end_idx]
  705. future = _GLOBAL_THREAD_POOL.submit(
  706. self._evaluate_support_single_batch_by_dimension,
  707. video_file,
  708. dimension_name,
  709. batch_elements,
  710. points,
  711. point_type,
  712. text_data,
  713. )
  714. batch_futures[batch_idx] = future
  715. # 收集结果
  716. all_results = []
  717. for batch_idx, future in batch_futures.items():
  718. try:
  719. batch_result = future.result()
  720. if batch_result:
  721. all_results.extend(batch_result)
  722. except Exception as e:
  723. logger.error(f"【意图支撑-{dimension_name}】批次 {batch_idx + 1} 失败: {e}")
  724. # 合并结果(支撑结果)
  725. merged_results = self._merge_support_batch_results(all_results)
  726. return merged_results
  727. def _evaluate_support_single_batch_by_dimension(
  728. self,
  729. video_file,
  730. dimension_name: str,
  731. batch_elements: list,
  732. points: list,
  733. point_type: str,
  734. text_data: dict,
  735. ) -> list:
  736. """单批次意图支撑评估(按维度)"""
  737. if not self.is_initialized:
  738. self.initialize()
  739. post_content = self._build_post_content(text_data)
  740. elements_text = self._build_simple_items_text_dimension(batch_elements, dimension_name)
  741. points_text = self._build_points_text(point_type, points)
  742. # 根据维度选择不同的 prompt
  743. if dimension_name == "具体元素":
  744. prompt = self._build_concrete_element_support_prompt(post_content, elements_text, points_text)
  745. elif dimension_name == "具象概念":
  746. prompt = self._build_concrete_concept_support_prompt(post_content, elements_text, points_text)
  747. elif dimension_name == "抽象概念":
  748. prompt = self._build_abstract_concept_support_prompt(post_content, elements_text, points_text)
  749. else:
  750. logger.error(f"未知维度(意图支撑): {dimension_name}")
  751. return []
  752. # 使用视频分析接口,多模态评估意图支撑
  753. result = LLMInvoker.safe_invoke_video_analysis(
  754. operation_name=f"意图支撑评估-{dimension_name}-{point_type}",
  755. video_file=video_file,
  756. prompt=prompt,
  757. agent=self,
  758. fallback=[],
  759. )
  760. return result
  761. def _evaluate_support_in_batches(
  762. self,
  763. elements: list,
  764. points: list,
  765. point_type: str,
  766. max_batch_product: int = 100
  767. ) -> list:
  768. """分批评估相似度"""
  769. if not self.is_initialized:
  770. self.initialize()
  771. if not points:
  772. return []
  773. num_elements = len(elements)
  774. num_points = len(points)
  775. max_elements_per_batch = max(1, int(max_batch_product / num_points))
  776. num_batches = (num_elements + max_elements_per_batch - 1) // max_elements_per_batch
  777. # 分批处理
  778. batch_futures = {}
  779. for batch_idx in range(num_batches):
  780. start_idx = batch_idx * max_elements_per_batch
  781. end_idx = min(start_idx + max_elements_per_batch, num_elements)
  782. batch_elements = elements[start_idx:end_idx]
  783. future = _GLOBAL_THREAD_POOL.submit(
  784. self._evaluate_support_single_batch,
  785. batch_elements, points, point_type
  786. )
  787. batch_futures[batch_idx] = future
  788. # 收集结果
  789. all_results = []
  790. for batch_idx, future in batch_futures.items():
  791. try:
  792. batch_result = future.result()
  793. if batch_result:
  794. all_results.extend(batch_result)
  795. except Exception as e:
  796. logger.error(f"批次 {batch_idx + 1} 失败: {e}")
  797. # 合并并筛选(每个元素保留最相关的1-2个点)
  798. merged_results = self._merge_batch_results(all_results)
  799. return merged_results
  800. def _evaluate_support_single_batch(
  801. self,
  802. batch_elements: list,
  803. points: list,
  804. point_type: str
  805. ) -> list:
  806. """单批次评估"""
  807. if not self.is_initialized:
  808. self.initialize()
  809. elements_text = self._build_simple_items_text(batch_elements)
  810. points_text = self._build_points_text(point_type, points)
  811. prompt = f"""# 元素列表
  812. {elements_text}
  813. # 点列表
  814. {points_text}
  815. # 任务
  816. 对每个元素计算元素与点的文本相似度和语义相似度
  817. # 输出(JSON)
  818. [
  819. {{
  820. "id": "元素id",
  821. "名称": "元素名称",
  822. "相似度结果": [
  823. {{
  824. "点":"点的名称",
  825. "语义相似度":0.21,
  826. "语义相似度理由": "理由",
  827. "文本相似度":0.33,
  828. "文本相似度理由": "理由"
  829. }}
  830. ]
  831. }}
  832. ]
  833. """
  834. messages = [
  835. {"role": "system", "content": self.system_prompt},
  836. {"role": "user", "content": prompt}
  837. ]
  838. result = LLMInvoker.safe_invoke(
  839. self,
  840. f"评估支撑{point_type}",
  841. messages,
  842. fallback=[]
  843. )
  844. return result
  845. def _merge_batch_results(self, all_results: list) -> list:
  846. """合并批次结果,每个元素只保留最相关的1-2个点"""
  847. if not all_results:
  848. return []
  849. merged_map = {}
  850. for item in all_results:
  851. element_id = item.get("id")
  852. if element_id not in merged_map:
  853. merged_map[element_id] = {
  854. "id": element_id,
  855. "名称": item.get("名称"),
  856. "相似度结果": []
  857. }
  858. if not merged_map[element_id]["相似度结果"]:
  859. merged_map[element_id]["相似度结果"] = item.get("相似度结果", [])
  860. # 筛选每个元素的相似度结果
  861. for element_data in merged_map.values():
  862. similarity_results = element_data.get("相似度结果", [])
  863. if not similarity_results:
  864. continue
  865. max_text_sim_point = max(similarity_results, key=lambda x: x.get("文本相似度", 0))
  866. max_semantic_sim_point = max(similarity_results, key=lambda x: x.get("语义相似度", 0))
  867. if max_text_sim_point.get("点") == max_semantic_sim_point.get("点"):
  868. filtered_results = [max_text_sim_point]
  869. else:
  870. filtered_results = [max_text_sim_point, max_semantic_sim_point]
  871. element_data["相似度结果"] = filtered_results
  872. return list(merged_map.values())
  873. def _merge_support_batch_results(self, all_results: list) -> list:
  874. """合并批次结果(意图支撑),直接合并支撑的元素-点对"""
  875. if not all_results:
  876. return []
  877. merged_map = {}
  878. for item in all_results:
  879. element_id = item.get("id")
  880. if element_id not in merged_map:
  881. merged_map[element_id] = {
  882. "id": element_id,
  883. "名称": item.get("名称"),
  884. "支撑结果": [],
  885. }
  886. # 这里假设下游会控制去重,只在首次合并时写入
  887. if not merged_map[element_id]["支撑结果"]:
  888. merged_map[element_id]["支撑结果"] = item.get("支撑结果", [])
  889. return list(merged_map.values())
  890. def _step6_filter_elements(
  891. self,
  892. analyzed_result: list,
  893. scored_result: dict,
  894. intention_support_result: dict,
  895. ) -> list:
  896. """Step 6: 筛选实质元素
  897. 新的保留策略(基于意图支撑关系 + 覆盖率进行筛选):
  898. - 覆盖率和频次主要用于统计展示,但会作为必要条件之一
  899. - 必须**同时**满足以下三个条件才保留:
  900. - 出现频次 > 1
  901. - 存在任意“意图支撑”关系
  902. - 段落覆盖率 > 0.3(30%)
  903. - 相似度评分相关逻辑已全部停用,不再参与筛选
  904. """
  905. if not analyzed_result:
  906. return []
  907. # 创建 analyzed_result 的映射
  908. analyzed_map = {item.get("id"): item for item in analyzed_result}
  909. # 创建意图支撑映射:只要某个元素在任一维度、任一点类型下有支撑关系,即视为“有支撑”
  910. intention_support_map = {}
  911. if intention_support_result:
  912. for point_type in ["灵感点", "目的点", "关键点"]:
  913. dimension_data = intention_support_result.get(point_type, [])
  914. for item in dimension_data:
  915. if not isinstance(item, dict):
  916. continue
  917. element_id = item.get("id")
  918. support_results = item.get("支撑结果", [])
  919. if not element_id:
  920. continue
  921. if element_id not in intention_support_map:
  922. intention_support_map[element_id] = []
  923. # 只要有一条支撑结果就认为该元素“有支撑关系”
  924. if support_results:
  925. intention_support_map[element_id].extend(support_results)
  926. # 筛选
  927. filtered_ids = []
  928. rejected_ids = []
  929. for element_id, analyzed_data in analyzed_map.items():
  930. element_name = analyzed_data.get("名称", "N/A")
  931. # 声音类型的实质元素(背景音乐、音效声等)直接通过筛选,不参与后续判断
  932. sound_type_names = ["背景音乐", "音效声"]
  933. if element_name in sound_type_names:
  934. filtered_ids.append(element_id)
  935. logger.info(
  936. f"✅ 保留: id={element_id}, name={element_name}, 原因=声音类型元素,豁免筛选"
  937. )
  938. continue
  939. # 确保 frequency 是整数类型
  940. frequency_raw = analyzed_data.get("出现频次", 0)
  941. try:
  942. frequency = int(frequency_raw) if frequency_raw is not None else 0
  943. except (ValueError, TypeError):
  944. frequency = 0
  945. # 确保 coverage_rate 是浮点数类型
  946. coverage_rate_raw = analyzed_data.get("段落覆盖率", 0.0)
  947. try:
  948. coverage_rate = float(coverage_rate_raw) if coverage_rate_raw is not None else 0.0
  949. except (ValueError, TypeError):
  950. coverage_rate = 0.0
  951. # 频次过滤:出现频次<=1 的直接过滤(不再继续做支撑和覆盖率判断)
  952. if frequency <= 1:
  953. rejected_ids.append(element_id)
  954. logger.info(
  955. "❌ 过滤: id=%s, name=%s, 原因=出现频次<=1 (frequency=%d)",
  956. element_id,
  957. element_name,
  958. frequency,
  959. )
  960. continue
  961. support_info = intention_support_map.get(element_id, [])
  962. has_support = bool(support_info)
  963. has_high_coverage = coverage_rate > 0.3
  964. # 出现频次>1 且 有意图支撑关系 且 段落覆盖率 > 30%:直接保留,不进行相似度比较
  965. if has_support and has_high_coverage:
  966. filtered_ids.append(element_id)
  967. logger.info(
  968. f"✅ 保留: id={element_id}, name={element_name}, "
  969. f"support_count={len(support_info)}, coverage={coverage_rate}"
  970. )
  971. continue
  972. # 不满足“出现频次>1 + 有意图支撑 + 覆盖率>30%”的元素全部过滤
  973. rejected_ids.append(element_id)
  974. logger.info(
  975. "❌ 过滤: id=%s, name=%s, 原因=无隐含概念豁免且"
  976. "未同时满足出现频次>1、有意图支撑关系和段落覆盖率>0.3 "
  977. "(frequency=%d, coverage=%.4f)",
  978. element_id,
  979. element_name,
  980. frequency,
  981. coverage_rate,
  982. )
  983. logger.info(f"筛选完成: {len(filtered_ids)}/{len(analyzed_result)} 通过")
  984. return filtered_ids
  985. def _step7_categorize_elements(
  986. self,
  987. substance_elements: List[dict],
  988. filtered_ids: list
  989. ) -> dict:
  990. """Step 7: 元素分类 - 按二级维度分别分类"""
  991. if not filtered_ids:
  992. return {}
  993. # 只保留筛选后的元素
  994. filtered_elements = [
  995. elem for elem in substance_elements
  996. if elem.get("id") in filtered_ids
  997. ]
  998. # 按二级维度分组
  999. dimension_groups = {
  1000. "具体元素": [],
  1001. "具象概念": [],
  1002. "抽象概念": [],
  1003. }
  1004. for elem in filtered_elements:
  1005. second_level = elem.get("维度", {}).get("二级", "")
  1006. if second_level in dimension_groups:
  1007. dimension_groups[second_level].append(elem)
  1008. # 并行分类
  1009. categorization_results = {}
  1010. futures = {}
  1011. for dimension_name, elements in dimension_groups.items():
  1012. if not elements:
  1013. continue
  1014. future = _GLOBAL_THREAD_POOL.submit(
  1015. self._categorize_single_dimension,
  1016. dimension_name,
  1017. elements
  1018. )
  1019. futures[dimension_name] = future
  1020. # 收集结果
  1021. for dimension_name, future in futures.items():
  1022. try:
  1023. categorization_results[dimension_name] = future.result()
  1024. except Exception as e:
  1025. logger.error(f"{dimension_name} 分类失败: {e}")
  1026. categorization_results[dimension_name] = {"元素分类": []}
  1027. return categorization_results
  1028. def _categorize_single_dimension(
  1029. self,
  1030. dimension_name: str,
  1031. elements: list
  1032. ) -> dict:
  1033. """对单个维度的元素进行分类"""
  1034. if not self.is_initialized:
  1035. self.initialize()
  1036. elements_text = json.dumps([
  1037. {"id": elem.get("id"), "名称": elem.get("名称"), "描述": elem.get("描述")}
  1038. for elem in elements
  1039. ], ensure_ascii=False, indent=2)
  1040. prompt = f"""# 任务
  1041. 对"{dimension_name}"维度的元素进行分类
  1042. # 元素列表
  1043. {elements_text}
  1044. # 分类要求
  1045. ## 核心原则
  1046. 1. **单一原子名词**: 分类名称必须是单一的原子名词
  1047. 2. **MECE原则**: 分类之间相互独立、完全穷尽
  1048. 3. **确定性归属**: 每个元素只能归属唯一一个分类
  1049. 4. **层级限制**: 最多2层
  1050. 5. 元素可以没有分类,不要强行归类
  1051. 6. 分类下面至少要有2个元素,否则不要分类
  1052. ## 实质维度的分类逻辑
  1053. - **核心原则**:按照**内容本质、属性特征、功能作用**等角度来分类
  1054. - **包含**:
  1055. - 物理特征:形态、材质、颜色等
  1056. - 功能用途:工具、装饰、食物等
  1057. - 概念类别:情感、价值观、技能等
  1058. - 领域归属:科技、艺术、健康等
  1059. # 输出格式(JSON)
  1060. {{
  1061. "元素分类": [
  1062. {{
  1063. "元素id": "元素的ID",
  1064. "元素名称": "元素名称",
  1065. "分类": ["一级分类","二级分类","..."]
  1066. }}
  1067. ]
  1068. }}
  1069. """
  1070. messages = [
  1071. {"role": "system", "content": self.system_prompt},
  1072. {"role": "user", "content": prompt}
  1073. ]
  1074. result = LLMInvoker.safe_invoke(
  1075. self,
  1076. f"分类-{dimension_name}",
  1077. messages,
  1078. fallback={}
  1079. )
  1080. return result
  1081. def _merge_all_info(
  1082. self,
  1083. substance_elements: List[dict],
  1084. analyzed_result: list,
  1085. scored_result: dict,
  1086. intention_support_result: dict,
  1087. filtered_ids: list,
  1088. categorized_result: dict
  1089. ) -> list:
  1090. """Step 8: 合并所有信息 - 每个元素包含所有中间信息"""
  1091. if not filtered_ids:
  1092. return []
  1093. # 创建映射
  1094. extraction_map = {item.get("id"): item for item in substance_elements}
  1095. analyzed_map = {item.get("id"): item for item in analyzed_result}
  1096. # 创建评分映射(相似度)
  1097. scored_map = {}
  1098. for dimension in ["灵感点", "目的点", "关键点"]:
  1099. dimension_data = scored_result.get(dimension, [])
  1100. for item in dimension_data:
  1101. if not isinstance(item, dict):
  1102. continue
  1103. element_id = item.get("id")
  1104. if element_id not in scored_map:
  1105. scored_map[element_id] = {}
  1106. similarity_results = item.get("相似度结果", [])
  1107. sorted_results = sorted(
  1108. similarity_results,
  1109. key=lambda x: (x.get("文本相似度", 0), x.get("语义相似度", 0)),
  1110. reverse=True
  1111. )
  1112. scored_map[element_id][dimension] = sorted_results
  1113. # 创建意图支撑映射
  1114. intention_support_map = {}
  1115. if intention_support_result:
  1116. for dimension in ["灵感点", "目的点", "关键点"]:
  1117. dimension_data = intention_support_result.get(dimension, [])
  1118. for item in dimension_data:
  1119. if not isinstance(item, dict):
  1120. continue
  1121. element_id = item.get("id")
  1122. if element_id not in intention_support_map:
  1123. intention_support_map[element_id] = {}
  1124. support_results = item.get("支撑结果", [])
  1125. intention_support_map[element_id][dimension] = support_results
  1126. # 创建分类映射
  1127. category_map = {}
  1128. for dimension_data in categorized_result.values():
  1129. element_classifications = dimension_data.get("元素分类", [])
  1130. for classification in element_classifications:
  1131. element_id = classification.get("元素id")
  1132. category_info = classification.get("分类", {})
  1133. if element_id:
  1134. category_map[element_id] = category_info
  1135. # 合并信息
  1136. final_elements = []
  1137. for element_id in filtered_ids:
  1138. base_info = extraction_map.get(element_id, {})
  1139. analysis_info = analyzed_map.get(element_id, {})
  1140. scoring_info = scored_map.get(element_id, {})
  1141. intention_info = intention_support_map.get(element_id, {})
  1142. category_info = category_map.get(element_id, {})
  1143. merged_element = {
  1144. "id": base_info.get("id"),
  1145. "名称": base_info.get("名称"),
  1146. "描述": base_info.get("描述"),
  1147. "维度": base_info.get("维度", {}),
  1148. "分类": category_info,
  1149. "共性分析": {
  1150. "出现频次": analysis_info.get("出现频次", 0),
  1151. "出现段落列表": analysis_info.get("出现段落列表", []),
  1152. "出现段落数": analysis_info.get("出现段落数", 0),
  1153. "段落覆盖率": analysis_info.get("段落覆盖率", 0.0)
  1154. },
  1155. "多维度评分": {
  1156. "灵感点": scoring_info.get("灵感点", []),
  1157. "目的点": scoring_info.get("目的点", []),
  1158. "关键点": scoring_info.get("关键点", [])
  1159. },
  1160. "意图支撑": {
  1161. "灵感点": intention_info.get("灵感点", []),
  1162. "目的点": intention_info.get("目的点", []),
  1163. "关键点": intention_info.get("关键点", [])
  1164. }
  1165. }
  1166. # 根据不同类型添加特定字段
  1167. second_level = base_info.get("维度", {}).get("二级", "")
  1168. if second_level == "具体元素":
  1169. merged_element["来源"] = base_info.get("来源", [])
  1170. elif second_level == "具象概念":
  1171. merged_element["来源"] = base_info.get("来源", [])
  1172. merged_element["字面位置"] = base_info.get("字面位置", [])
  1173. elif second_level == "抽象概念" or second_level == "隐含概念":
  1174. merged_element["类型"] = base_info.get("类型", "")
  1175. merged_element["来源"] = base_info.get("来源", {})
  1176. merged_element["推理过程"] = base_info.get("推理过程", "")
  1177. merged_element["推理层次"] = base_info.get("推理层次", 1)
  1178. final_elements.append(merged_element)
  1179. return final_elements
  1180. # ========== 辅助方法 ==========
  1181. def _build_section_text(self, section_division: dict) -> str:
  1182. """构建段落划分文本"""
  1183. if not section_division:
  1184. return "无段落划分信息"
  1185. sections = section_division.get("段落列表", [])
  1186. if not sections:
  1187. return "无段落信息"
  1188. def build_section_list(section_list, indent=0):
  1189. text = ""
  1190. for section in section_list:
  1191. if section.get('子项'):
  1192. text += build_section_list(section['子项'], indent + 1)
  1193. else:
  1194. section_id = section.get('id', 'N/A')
  1195. section_desc = section.get('描述', 'N/A')
  1196. content_range = section.get('内容范围', 'N/A')
  1197. text += f"{section_id}: {section_desc}\n内容范围: {content_range}\n"
  1198. return text
  1199. return "段落列表:\n" + build_section_list(sections)
  1200. def _build_post_content(self, text_data: dict) -> str:
  1201. """构建原文内容文本(用于意图支撑判断)
  1202. 这里不假设具体结构,直接以 JSON 形式展开,保证信息完整可见。
  1203. """
  1204. if not text_data:
  1205. return "无文本内容"
  1206. try:
  1207. return json.dumps(text_data, ensure_ascii=False, indent=2)
  1208. except TypeError:
  1209. # 避免非序列化对象导致报错
  1210. return str(text_data)
  1211. def _build_simple_items_text_dimension(self, elements: list, dimension_name: str) -> str:
  1212. """构建某个维度下元素列表文本(用于意图支撑判断)"""
  1213. simple_items = [
  1214. {
  1215. "id": elem.get("id", "N/A"),
  1216. "名称": elem.get("名称", "N/A"),
  1217. "描述": elem.get("描述", "N/A"),
  1218. }
  1219. for elem in elements
  1220. ]
  1221. return json.dumps(
  1222. {"维度": dimension_name, "元素列表": simple_items},
  1223. ensure_ascii=False,
  1224. indent=2,
  1225. )
  1226. def _build_simple_items_text(self, elements: list) -> str:
  1227. """构建元素列表文本"""
  1228. grouped_elements = {
  1229. "具体元素": [],
  1230. "具象概念": [],
  1231. "抽象概念": [],
  1232. }
  1233. for elem in elements:
  1234. element_type = elem.get('维度', {}).get('二级', 'N/A')
  1235. element_data = {
  1236. "id": elem.get('id', 'N/A'),
  1237. "名称": elem.get('名称', 'N/A'),
  1238. "描述": elem.get('描述', 'N/A')
  1239. }
  1240. if element_type in grouped_elements:
  1241. grouped_elements[element_type].append(element_data)
  1242. filtered_groups = {k: v for k, v in grouped_elements.items() if v}
  1243. return json.dumps(filtered_groups, ensure_ascii=False, indent=2)
  1244. def _build_points_text(self, point_type: str, points_data) -> str:
  1245. """构建点列表文本"""
  1246. if not points_data:
  1247. return f"无{point_type}信息"
  1248. filtered_points = [
  1249. {"名称": item.get(point_type, 'N/A')}
  1250. for item in points_data if isinstance(item, dict)
  1251. ]
  1252. return json.dumps(filtered_points, ensure_ascii=False, indent=2)
  1253. def _build_concrete_element_support_prompt(
  1254. self, post_content: str, elements_text: str, points_text: str
  1255. ) -> str:
  1256. """构建具体元素的意图支撑判断 prompt(基于视频画面)"""
  1257. return f"""# 原文内容
  1258. {post_content}
  1259. # 具体元素列表
  1260. {elements_text}
  1261. # 点列表
  1262. {points_text}
  1263. # 任务
  1264. 判断每个**具体元素**是否对点有关键支撑
  1265. ## 具体元素定义(重要!)
  1266. - 定义:视频画面中直接观察到的单一视觉实体对象
  1267. - 判断标准:可以指着画面说"这是一个X"
  1268. - 剥离测试:去掉表达方式后,该视觉实体仍然存在
  1269. ## 核心判断原则:仅基于视频画面语境
  1270. ### 关键约束
  1271. 1. 只看视频画面:具体元素的支撑判断**只能基于视频中的视觉实体**,不能基于文字论述
  1272. 2. 视觉实体角色:该视觉实体在视频画面中的作用是什么?
  1273. - ✅ 核心展示对象:该视觉实体是画面的核心展示内容
  1274. - ❌ 辅助/装饰:该视觉实体只是背景、装饰、示意
  1275. 3. 关键支撑:该视觉实体对点的表达是否关键?去掉它是否会明显削弱点的支撑?
  1276. ### 判断流程
  1277. 1. 理解点的意图,点想表达什么
  1278. 2. 在视频画面中找到该视觉实体
  1279. 3. 判断:去掉该视觉实体,是否无法完整表达点
  1280. - 如果是,支撑
  1281. - 如果不是,不支撑
  1282. ### 严格标准
  1283. - 禁止使用文字内容来判断具体元素的支撑
  1284. - 禁止仅凭名称字面匹配判定支撑
  1285. - 必须基于该视觉实体在画面中的实际角色
  1286. # 输出(JSON)
  1287. 只输出有关键支撑的元素-点对,不支撑的不输出
  1288. [
  1289. {{
  1290. "id": "元素id",
  1291. "名称": "元素名称",
  1292. "支撑结果": [
  1293. {{
  1294. "点": "点的名称",
  1295. "点的意图": "点想表达什么",
  1296. "支撑理由": "说明为什么去掉该视觉实体,会削弱点的表达,程度达到30%以上"
  1297. }}
  1298. ]
  1299. }}
  1300. ]
  1301. 注意:
  1302. 1. 只基于视频画面判断
  1303. 2. 只输出"关键支撑"的元素-点对
  1304. 3. 辅助/装饰元素直接排除,不输出
  1305. 4. 必须基于视频画面中的视觉实体判断,不能做字面匹配"""
  1306. def _build_concrete_concept_support_prompt(
  1307. self, post_content: str, elements_text: str, points_text: str
  1308. ) -> str:
  1309. """构建具象概念的意图支撑判断 prompt(基于文字语境)"""
  1310. return f"""# 原文内容
  1311. {post_content}
  1312. # 具象概念列表
  1313. {elements_text}
  1314. # 点列表
  1315. {points_text}
  1316. # 任务
  1317. 判断每个**具象概念**是否对点有关键支撑
  1318. ## 具象概念定义(重要!)
  1319. - 定义:文字中字面出现的名词(包括标题、正文、字幕、视频画面中的文字)
  1320. - 判断标准:文字中实际出现,禁止语义推导
  1321. ## 核心判断原则:仅基于文字语境(包含视频中的文字)
  1322. ### 关键约束
  1323. 1. 只看文字:具象概念的支撑判断**只能基于文字中的概念论述**,不能基于视频中的视觉实体
  1324. 2. 概念角色:该概念在文字论述中的作用是什么?
  1325. - ✅ 核心论述概念:该概念是文字论述的核心对象、关键主题
  1326. - ❌ 次要提及:该概念只是顺带提及、举例说明
  1327. 3. 关键支撑:该概念对点的表达是否关键?去掉它是否会明显削弱点的支撑?
  1328. ### 判断流程
  1329. 1. 理解点的意图,点想表达什么
  1330. 2. 在标题、正文、字幕、画面文字中找到该概念出现的位置
  1331. 3. 判断:去掉该段文字,是否无法完整表达点
  1332. - 如果是,支撑
  1333. - 如果不是,不支撑
  1334. ### 严格标准
  1335. - 禁止用视频画面中的视觉实体来判断具象概念的支撑
  1336. - 禁止仅凭名称字面匹配判定支撑
  1337. - 必须判断该概念在文字论述中的实际角色
  1338. # 输出(JSON)
  1339. 只输出有关键支撑的元素-点对,不支撑的不输出
  1340. [
  1341. {{
  1342. "id": "元素id",
  1343. "名称": "元素名称",
  1344. "支撑结果": [
  1345. {{
  1346. "点": "点的名称",
  1347. "点的意图": "点想表达什么",
  1348. "支撑理由": "说明为什么去掉该概念,会削弱点的表达,程度达到30%以上"
  1349. }}
  1350. ]
  1351. }}
  1352. ]
  1353. 注意:
  1354. 1. 只基于文字判断
  1355. 2. 只输出"关键支撑"的元素-点对
  1356. 3. 次要提及的概念直接排除,不输出
  1357. 4. 必须基于文字中的概念论述判断,不能做字面匹配"""
  1358. def _build_abstract_concept_support_prompt(
  1359. self, post_content: str, elements_text: str, points_text: str
  1360. ) -> str:
  1361. """构建抽象概念的意图支撑判断 prompt"""
  1362. return f"""# 原文内容
  1363. {post_content}
  1364. # 抽象概念列表
  1365. {elements_text}
  1366. # 点列表
  1367. {points_text}
  1368. # 任务
  1369. 判断每个**抽象概念**是否对点有关键支撑
  1370. ## 抽象概念定义(重要!)
  1371. - 定义:从具体元素和具象概念中理解到的上位抽象
  1372. - 类型1-上位抽象(归类):是下位元素的类别、分类
  1373. - 类型2-引申含义:需要理解上下文的深层含义
  1374. - 剥离测试:去掉表达方式后,该抽象概念仍然存在
  1375. ## 核心判断原则:基于来源语境
  1376. ### 关键约束
  1377. 1. 追溯来源:抽象概念来源于具体元素和/或具象概念,必须追溯到来源
  1378. 2. 继承语境:抽象概念的语境继承自其来源
  1379. - 如果来源主要是具体元素 → 语境偏向视频画面
  1380. - 如果来源主要是具象概念 → 语境偏向文字
  1381. - 如果来源混合 → 综合判断
  1382. 3. 关键支撑:该抽象概念对点的表达是否关键?
  1383. ### 判断流程
  1384. 1. 理解点的意图:点想表达什么?
  1385. 2. 根据来源确定该抽象概念的主要语境
  1386. 3. 判断:去掉该抽象概念,是否无法完整表达点
  1387. - 如果是,支撑
  1388. - 如果不是,不支撑
  1389. ### 严格标准
  1390. - 必须基于来源的语境来判断
  1391. - 禁止仅凭名称字面匹配判定支撑
  1392. - 必须能够追溯到来源元素,验证支撑关系
  1393. # 输出(JSON)
  1394. 只输出有关键支撑的元素-点对,不支撑的不输出
  1395. [
  1396. {{
  1397. "id": "元素id",
  1398. "名称": "元素名称",
  1399. "支撑结果": [
  1400. {{
  1401. "点": "点的名称",
  1402. "来源追溯": "该抽象概念的来源(具体元素/具象概念)及其语境",
  1403. "语境分析": "基于来源确定的语境(画面/文字/混合)",
  1404. "支撑理由": "说明该抽象概念为什么对该点有关键支撑"
  1405. }}
  1406. ]
  1407. }}
  1408. ]
  1409. 注意:
  1410. 1. 必须追溯到来源元素
  1411. 2. 必须继承来源的语境来判断
  1412. 3. 只输出"关键支撑"的元素-点对
  1413. 4. 禁止字面匹配"""
  1414. # 隐含概念相关的意图支撑判断已移除
  1415. def _count_sections(self, section_division: dict) -> int:
  1416. """统计段落总数(只统计叶子节点)"""
  1417. if not section_division:
  1418. return 0
  1419. sections = section_division.get("段落列表", [])
  1420. if not sections:
  1421. return 0
  1422. def count_leaf_nodes(section_list):
  1423. count = 0
  1424. for section in section_list:
  1425. children = section.get("子项", [])
  1426. if children:
  1427. count += count_leaf_nodes(children)
  1428. else:
  1429. count += 1
  1430. return count
  1431. return count_leaf_nodes(sections)
  1432. def _build_messages(self, state: dict) -> List[dict]:
  1433. """构建消息 - 本Agent不使用此方法"""
  1434. return []
  1435. def _update_state(self, state: dict, response) -> dict:
  1436. """更新状态 - 本Agent不使用此方法"""
  1437. return state