#!/usr/bin/env python3 """ Tab3内容生成器 - 脚本点(元素列表) """ import html as html_module from typing import Dict, Any, List def get_intent_support_data(element: Dict[str, Any]) -> Dict[str, Any]: """ 获取元素的意图支撑数据(兼容新旧数据结构) 优先使用"意图支撑"字段,如果没有则使用"多维度评分"字段 这两个字段内部存储的都是意图支撑数据 Args: element: 元素数据 Returns: 意图支撑数据字典,格式:{"灵感点": [...], "目的点": [...], "关键点": [...]} """ # 优先使用"意图支撑"字段 intent_support = element.get('意图支撑') if intent_support and isinstance(intent_support, dict): return intent_support # 如果没有"意图支撑",则使用"多维度评分"字段(兼容旧数据) multi_scores = element.get('多维度评分') if multi_scores and isinstance(multi_scores, dict): return multi_scores # 都没有则返回空字典 return {} def calculate_intent_support_count(element: Dict[str, Any]) -> int: """ 计算元素的意图支撑数量 统计元素支撑的意图点总数 支撑的意图点越多,说明该元素与意图的关联越强 Args: element: 元素数据 Returns: 支撑的意图点总数 """ # 统一使用意图支撑数据统计(实质和形式都使用相同逻辑) # 优先使用"意图支撑"字段,如果没有则使用"多维度评分"字段 intent_support_data = get_intent_support_data(element) total_support_count = 0 for point_type in ['灵感点', '目的点', '关键点']: support_points = intent_support_data.get(point_type, []) or [] # 每个项目就是一个支撑点,直接统计数量 total_support_count += len(support_points) return total_support_count def get_element_coverage(element: Dict[str, Any]) -> float: """ 获取元素的段落覆盖率(兼容形式元素和实质元素) 对于实质元素:从"共性分析"字段获取 对于形式元素:从"权重明细"中的"覆盖率分"反推 根据script_form_extraction_agent.py的逻辑: - coverage_rate_base_score = coverage_rate * 50 (基础分0-50) - coverage_rate_score = coverage_rate_base_score * 0.3 (加权后0-15) 所以:coverage_rate = coverage_rate_score / 15 Args: element: 元素数据 Returns: 段落覆盖率(0.0-1.0) """ dimension = element.get('维度', {}) is_form = isinstance(dimension, dict) and dimension.get('一级') == '形式' if is_form: # 形式元素:从权重明细中反推覆盖率 weight_details = element.get('权重明细', {}) if weight_details: coverage_score = float(weight_details.get('覆盖率分', 0) or 0) # 覆盖率分 = 覆盖率 × 50 × 0.3 = 覆盖率 × 15 # 所以:覆盖率 = 覆盖率分 / 15 if coverage_score > 0: coverage = coverage_score / 15.0 return min(1.0, max(0.0, coverage)) # 如果权重明细中没有覆盖率分,返回0 return 0.0 else: # 实质元素:从共性分析中获取 commonality = element.get('共性分析') or {} coverage = float(commonality.get('段落覆盖率', 0.0) or 0.0) return coverage def get_support_stats(element: Dict[str, Any]) -> Dict[str, int]: """ 获取元素的支撑统计信息(灵感点/目的点/关键点数量) 优先使用元素中已经预计算好的 support_stats 字段; 若不存在,则根据「意图支撑」或「多维度评分」字段动态统计。 """ # 优先使用预计算的支撑统计 support_stats = element.get('支撑统计') if isinstance(support_stats, dict): # 做一次安全拷贝并补齐缺失字段 return { '灵感点数量': int(support_stats.get('灵感点数量', 0) or 0), '目的点数量': int(support_stats.get('目的点数量', 0) or 0), '关键点数量': int(support_stats.get('关键点数量', 0) or 0), } # 兼容旧字段名 support_stats_old = element.get('support_stats') if isinstance(support_stats_old, dict): return { '灵感点数量': int(support_stats_old.get('灵感点数量', 0) or 0), '目的点数量': int(support_stats_old.get('目的点数量', 0) or 0), '关键点数量': int(support_stats_old.get('关键点数量', 0) or 0), } # 统一使用意图支撑数据统计(实质和形式都使用相同逻辑) # 优先使用"意图支撑"字段,如果没有则使用"多维度评分"字段 intent_support_data = get_intent_support_data(element) return { '灵感点数量': len(intent_support_data.get('灵感点', []) or []), '目的点数量': len(intent_support_data.get('目的点', []) or []), '关键点数量': len(intent_support_data.get('关键点', []) or []), } def compute_weight_scores(element: Dict[str, Any]) -> Dict[str, Any]: """ 计算元素的权重相关得分。 对于形式元素:优先使用元素中预计算的权重分和权重明细 对于实质元素:根据共性分析和意图支撑动态计算 原始总分 = 各子项得分之和 权重分 = min(100, 原始总分 × 100 / 110) """ # 判断是形式元素还是实质元素 dimension = element.get('维度') or {} is_form = isinstance(dimension, dict) and dimension.get('一级') == '形式' # 形式元素:优先使用预计算的权重信息 if is_form: weight_score = element.get('权重分') weight_details = element.get('权重明细') support_stats = get_support_stats(element) # 如果存在预计算的权重信息,直接使用 if weight_score is not None and weight_details is not None: # 从权重明细中提取各项得分 freq_score = float(weight_details.get('频次分', 0) or 0) coverage_count_score = float(weight_details.get('覆盖段落数分', 0) or 0) coverage_rate_score = float(weight_details.get('覆盖率分', 0) or 0) inspiration_score = float(weight_details.get('灵感点支撑分', 0) or 0) purpose_score = float(weight_details.get('目的点支撑分', 0) or 0) keypoint_score = float(weight_details.get('关键点支撑分', 0) or 0) # 计算原始总分(共性总分 + 支撑总分) commonality_total = weight_details.get('共性总分', 0) or 0 support_total = weight_details.get('支撑总分', 0) or 0 raw_total = float(commonality_total) + float(support_total) return { 'weight_score': float(weight_score), 'raw_total': raw_total, 'details': { '频次分': freq_score, '覆盖段落数分': coverage_count_score, '覆盖率分': coverage_rate_score, '灵感点支撑分': inspiration_score, '目的点支撑分': purpose_score, '关键点支撑分': keypoint_score, }, 'support_stats': support_stats, } # 实质元素或形式元素没有预计算权重:使用旧的计算逻辑 commonality = element.get('共性分析') or {} coverage = float(commonality.get('段落覆盖率', 0.0) or 0.0) frequency = int(commonality.get('出现频次', 0) or 0) support_stats = get_support_stats(element) inspiration_count = support_stats.get('灵感点数量', 0) or 0 purpose_count = support_stats.get('目的点数量', 0) or 0 keypoint_count = support_stats.get('关键点数量', 0) or 0 # 1) 频次分(0–30分) # 假定「高频」的参考上限为 12 次,超过即视为满分 # 频次分 = min(30, 出现频次 / 12 * 30) if frequency <= 0: freq_score = 0.0 else: freq_score = min(30.0, frequency * 30.0 / 12.0) # 2) 覆盖率分(0–30分) # 覆盖率分 = 段落覆盖率 × 30 coverage_score = max(0.0, min(30.0, coverage * 30.0)) # 3) 灵感点支撑分(0–25分) # 按你的说明:支撑{灵感点数量}个灵感点 × 25分/个,封顶 25 分 inspiration_score = min(25.0, float(inspiration_count) * 25.0) # 4) 目的点支撑分(0–15分) # 说明:支撑{目的点数量}个目的点 × 5分/个,封顶 15 分 purpose_score = min(15.0, float(purpose_count) * 5.0) # 5) 关键点支撑分(0–10分) # 说明:支撑{关键点数量}个关键点 × 1分/个,封顶 10 分 keypoint_score = min(10.0, float(keypoint_count) * 1.0) raw_total = freq_score + coverage_score + inspiration_score + purpose_score + keypoint_score if raw_total <= 0: weight_score = 0.0 else: weight_score = min(100.0, raw_total * 100.0 / 110.0) return { 'weight_score': weight_score, 'raw_total': raw_total, 'details': { '频次分': freq_score, '覆盖率分': coverage_score, '灵感点支撑分': inspiration_score, '目的点支撑分': purpose_score, '关键点支撑分': keypoint_score, }, 'support_stats': support_stats, } def determine_dominant_factor(element: Dict[str, Any], all_elements: List[Dict[str, Any]]) -> str: """ 判断元素排序的主导因素 排序规则:覆盖率 > 频次 > 意图支撑数 主导因素判断:哪个指标在当前元素中相对最显著 Args: element: 当前元素 all_elements: 同组所有元素 Returns: 主导因素: 'coverage' | 'frequency' | 'intent_support' """ if not all_elements: return 'coverage' # 获取当前元素的指标 commonality = element.get('共性分析') or {} coverage = commonality.get('段落覆盖率', 0.0) frequency = commonality.get('出现频次', 0) intent_count = calculate_intent_support_count(element) # 将所有指标归一化到同一量级,然后比较 # 覆盖率已经是0-1范围 # 频次归一化:假设最大频次为10 normalized_frequency = min(frequency / 10.0, 1.0) # 意图支撑数归一化:假设最大支撑数为10 normalized_intent = min(intent_count / 10.0, 1.0) # 比较归一化后的值,取最大的作为主导因素 scores = { 'coverage': coverage, 'frequency': normalized_frequency, 'intent_support': normalized_intent } return max(scores, key=scores.get) def sort_elements_by_coverage_and_frequency(elements: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ 按照覆盖率、频次和意图支撑数对元素排序 排序规则: 1. 第一优先级:共性(段落覆盖率)- 倒序 2. 第二优先级:共性(出现频次)- 倒序 3. 第三优先级:意图支撑数 - 倒序 Args: elements: 元素列表 Returns: 排序后的元素列表 """ def get_sort_key(elem): # 获取共性分析,防止为None commonality = elem.get('共性分析') or {} # 获取段落覆盖率 coverage = commonality.get('段落覆盖率', 0.0) # 获取出现频次 frequency = commonality.get('出现频次', 0) # 计算意图支撑数 intent_count = calculate_intent_support_count(elem) # 返回排序键(负数用于倒序) return (-coverage, -frequency, -intent_count) return sorted(elements, key=get_sort_key) def get_element_category(element: Dict[str, Any]) -> str: """ 获取元素的分类名称(支持新旧两种数据结构) Args: element: 元素数据 Returns: 分类名称字符串 """ category = element.get('分类', '未分类') if isinstance(category, dict): # 新结构:分类是对象,包含一级分类和二级分类 level1 = category.get('一级分类', '') level2 = category.get('二级分类', '') if level1 and level2: return f"{level1} - {level2}" elif level1: return level1 else: return '未分类' else: # 旧结构:分类是字符串或列表 if isinstance(category, list): return ' - '.join(category) if category else '未分类' return category if category else '未分类' def group_elements_by_hierarchical_category(elements: List[Dict[str, Any]]) -> Dict[str, Any]: """ 按树形分类结构组织元素(一级分类 → 二级分类 → 元素) 优化规则:同一个父节点下的所有子节点采用统一的分类格式展示 - 如果一级分类下既有元素又有分类,将元素归入"未分类"二级分类 Args: elements: 元素列表 Returns: 树形分类结构字典 """ # 1. 按一级分类和二级分类分组 level1_groups = {} for elem in elements: category_data = elem.get('分类', {}) if isinstance(category_data, dict): level1 = category_data.get('一级分类', '未分类') level2 = category_data.get('二级分类', '') elif isinstance(category_data, list): # 列表格式:第一个元素作为一级分类,第二个作为二级分类 level1 = category_data[0] if len(category_data) > 0 else '未分类' level2 = category_data[1] if len(category_data) > 1 else '' else: # 旧结构:分类是字符串 level1 = str(category_data) if category_data else '未分类' level2 = '' # 初始化一级分类 if level1 not in level1_groups: level1_groups[level1] = { 'elements': [], 'level2_groups': {} } # 如果有二级分类,放入二级分类组;否则放入一级分类的直接元素列表(临时) if level2: if level2 not in level1_groups[level1]['level2_groups']: level1_groups[level1]['level2_groups'][level2] = [] level1_groups[level1]['level2_groups'][level2].append(elem) else: level1_groups[level1]['elements'].append(elem) # 1.5 优化:仅当一级分类下既有直接元素又有二级分类时,才将直接元素移到"未分类"二级分类中 # 如果一级分类下只有直接元素,没有二级分类,则保持原样(不需要"未分类"概念) for level1_name, level1_data in level1_groups.items(): if level1_data['elements'] and level1_data['level2_groups']: # 将直接元素移到"未分类"分类 if '未分类' not in level1_data['level2_groups']: level1_data['level2_groups']['未分类'] = [] level1_data['level2_groups']['未分类'].extend(level1_data['elements']) level1_data['elements'] = [] # 如果只有直接元素,没有二级分类,则保持level1_data['elements']不变 # 2. 对每个分类内的元素排序 for level1_data in level1_groups.values(): # 排序一级分类直接包含的元素 if level1_data['elements']: level1_data['elements'] = sort_elements_by_coverage_and_frequency(level1_data['elements']) # 排序每个二级分类的元素 for level2_name in level1_data['level2_groups']: level1_data['level2_groups'][level2_name] = sort_elements_by_coverage_and_frequency( level1_data['level2_groups'][level2_name] ) # 3. 计算每个一级分类的统计信息用于排序 level1_scores = {} for level1_name, level1_data in level1_groups.items(): # 收集该一级分类下的所有元素(包括二级分类下的) all_elements = level1_data['elements'][:] for level2_elements in level1_data['level2_groups'].values(): all_elements.extend(level2_elements) if not all_elements: level1_scores[level1_name] = (0.0, 0, 0.0) continue # 计算统计指标 avg_coverage = sum(get_element_coverage(e) for e in all_elements) / len(all_elements) avg_frequency = sum((e.get('共性分析') or {}).get('出现频次', 0) for e in all_elements) / len(all_elements) avg_intent_count = sum(calculate_intent_support_count(e) for e in all_elements) / len(all_elements) level1_scores[level1_name] = (avg_coverage, avg_frequency, avg_intent_count) # 4. 对一级分类排序 sorted_level1 = sorted( level1_scores.keys(), key=lambda c: (-level1_scores[c][0], -level1_scores[c][1], -level1_scores[c][2]) ) # 5. 对每个一级分类内的二级分类排序 for level1_name in sorted_level1: level1_data = level1_groups[level1_name] level2_groups = level1_data['level2_groups'] if not level2_groups: continue # 计算二级分类的统计信息 level2_scores = {} for level2_name, level2_elements in level2_groups.items(): if not level2_elements: level2_scores[level2_name] = (0.0, 0, 0.0) continue avg_coverage = sum(get_element_coverage(e) for e in level2_elements) / len(level2_elements) avg_frequency = sum((e.get('共性分析') or {}).get('出现频次', 0) for e in level2_elements) / len(level2_elements) avg_intent_count = sum(calculate_intent_support_count(e) for e in level2_elements) / len(level2_elements) level2_scores[level2_name] = (avg_coverage, avg_frequency, avg_intent_count) # 排序二级分类 sorted_level2_names = sorted( level2_scores.keys(), key=lambda c: (-level2_scores[c][0], -level2_scores[c][1], -level2_scores[c][2]) ) # 重新组织为有序字典 sorted_level2_groups = {name: level2_groups[name] for name in sorted_level2_names} level1_data['level2_groups'] = sorted_level2_groups # 6. 返回排序后的结构 return {level1_name: level1_groups[level1_name] for level1_name in sorted_level1} def render_element_item(element: Dict[str, Any], all_elements: List[Dict[str, Any]] = None) -> str: """渲染单个元素项的HTML(卡片样式,详细信息在弹窗中) Args: element: 元素数据 all_elements: 同组所有元素(用于计算主导因素) """ elem_id = element.get('id', '') name = element.get('名称') or '' # 处理None的情况 description = element.get('描述') or '' # 处理None的情况 # 获取类型和维度(兼容新旧结构) dimension = element.get('维度', {}) if isinstance(dimension, dict): elem_type = dimension.get('一级', '') elem_type_level2 = dimension.get('二级', '') else: elem_type = element.get('类型', '') elem_type_level2 = '' # 获取共性分析(防止为None) commonality = element.get('共性分析') or {} coverage = commonality.get('段落覆盖率', 0.0) frequency = commonality.get('出现频次', 0) intent_count = calculate_intent_support_count(element) # 计算权重得分 weight_info = compute_weight_scores(element) # 计算主导因素 dominant_factor = 'coverage' # 默认 if all_elements: dominant_factor = determine_dominant_factor(element, all_elements) # 根据主导因素确定边框颜色 border_color_class = f'dominant-{dominant_factor}' # 检查是否有详细信息 category_def = element.get('分类定义', '') paragraphs_list = commonality.get('出现段落列表', []) source = element.get('来源', []) intent_support = get_intent_support_data(element) has_details = bool(category_def or paragraphs_list or source or intent_support or weight_info.get('raw_total', 0) > 0) # 判断是否为形式元素 is_form = isinstance(dimension, dict) and dimension.get('一级') == '形式' # 卡片样式 html = f'
  • \n' html += '
    \n' # 卡片头部 html += '
    \n' if elem_id: html += f'#{elem_id}\n' html += f'

    {html_module.escape(name)}

    \n' html += '
    \n' # 统计指标 html += '
    \n' if weight_info.get('raw_total', 0) > 0: html += f'权重分: {weight_info["weight_score"]:.1f}\n' if is_form: html += f'支撑: {intent_count}\n' else: coverage_highlight = 'stat-highlight' if dominant_factor == 'coverage' else '' frequency_highlight = 'stat-highlight' if dominant_factor == 'frequency' else '' intent_highlight = 'stat-highlight' if dominant_factor == 'intent_support' else '' html += f'覆盖率: {coverage:.2%}\n' html += f'频次: {frequency}\n' html += f'意图支撑: {intent_count}\n' html += '
    \n' # 描述 if description: html += f'
    {html_module.escape(description)}
    \n' # 查看详情按钮 if has_details: html += f'\n' html += '
    \n' # element-card-body html += '
  • \n' return html def render_element_modal(element: Dict[str, Any]) -> str: """渲染元素详情的弹窗内容 Args: element: 元素数据 """ elem_id = element.get('id', '') name = element.get('名称') or '' description = element.get('描述') or '' # 获取类型和维度 dimension = element.get('维度', {}) if isinstance(dimension, dict): elem_type = dimension.get('一级', '') elem_type_level2 = dimension.get('二级', '') else: elem_type = element.get('类型', '') elem_type_level2 = '' # 获取分类 category_data = element.get('分类', '') category_def = element.get('分类定义', '') # 获取共性分析 commonality = element.get('共性分析') or {} coverage = commonality.get('段落覆盖率', 0.0) frequency = commonality.get('出现频次', 0) paragraphs_list = commonality.get('出现段落列表', []) source = element.get('来源', []) intent_support = get_intent_support_data(element) weight_info = compute_weight_scores(element) html = f'
    \n' html += '\n' html += '\n' html += '\n' return html def render_element_modal(element: Dict[str, Any]) -> str: """渲染元素详情的弹窗内容 Args: element: 元素数据 """ elem_id = element.get('id', '') name = element.get('名称') or '' description = element.get('描述') or '' # 获取类型和维度 dimension = element.get('维度', {}) if isinstance(dimension, dict): elem_type = dimension.get('一级', '') elem_type_level2 = dimension.get('二级', '') else: elem_type = element.get('类型', '') elem_type_level2 = '' # 获取分类 category_data = element.get('分类', '') category_def = element.get('分类定义', '') # 获取共性分析 commonality = element.get('共性分析') or {} coverage = commonality.get('段落覆盖率', 0.0) frequency = commonality.get('出现频次', 0) paragraphs_list = commonality.get('出现段落列表', []) source = element.get('来源', []) intent_support = get_intent_support_data(element) weight_info = compute_weight_scores(element) html = f'
    \n' html += '\n' html += '\n' # modal-body html += '
    \n' # element-modal-content return html def generate_tab3_content(data: Dict[str, Any]) -> str: """生成Tab3内容:按层次展示(实质/形式 → 具体元素/具体概念/抽象概念 → 树形展示)""" html = '\n' return html