#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ How解构结果可视化脚本 V2 改进版: - 使用标签页展示多个帖子 - 参考 visualize_inspiration_points.py 的帖子详情展示 - 分层可折叠的匹配结果 """ import json from pathlib import Path from typing import Dict, List import sys import html as html_module # 添加项目根目录到路径 project_root = Path(__file__).parent.parent.parent sys.path.insert(0, str(project_root)) from script.data_processing.path_config import PathConfig # ============ 相似度阈值配置 ============ SIMILARITY_THRESHOLD_SAME = 0.8 # >= 此值为"相同" SIMILARITY_THRESHOLD_SIMILAR = 0.6 # >= 此值为"相似",< SAME阈值 # < SIMILAR阈值 为"无关" # 相似度对应的颜色 SIMILARITY_COLOR_SAME = "#10b981" # 绿色 SIMILARITY_COLOR_SIMILAR = "#f59e0b" # 橙色 SIMILARITY_COLOR_UNRELATED = "#9ca3af" # 灰色 def get_similarity_status(similarity: float) -> tuple: """根据相似度返回状态标签和颜色 Returns: tuple: (label, color, css_class) """ if similarity >= SIMILARITY_THRESHOLD_SAME: return ("相同", SIMILARITY_COLOR_SAME, "same") elif similarity >= SIMILARITY_THRESHOLD_SIMILAR: return ("相似", SIMILARITY_COLOR_SIMILAR, "similar") else: return ("无关", SIMILARITY_COLOR_UNRELATED, "unrelated") # 注意:已改用基于相似度的显示方式,不再使用关系类型 # def get_relation_color(relation: str) -> str: # """根据关系类型返回对应的颜色""" # color_map = { # "same": "#10b981", # 绿色 - 同义 # "contains": "#3b82f6", # 蓝色 - 包含 # "contained_by": "#8b5cf6", # 紫色 - 被包含 # "coordinate": "#f59e0b", # 橙色 - 同级 # "overlap": "#ec4899", # 粉色 - 部分重叠 # "related": "#6366f1", # 靛蓝 - 相关 # "unrelated": "#9ca3af" # 灰色 - 无关 # } # return color_map.get(relation, "#9ca3af") # # # def get_relation_label(relation: str) -> str: # """返回关系类型的中文标签""" # label_map = { # "same": "同义", # "contains": "包含", # "contained_by": "被包含", # "coordinate": "同级", # "overlap": "部分重叠", # "related": "相关", # "unrelated": "无关" # } # return label_map.get(relation, relation) def generate_historical_post_card_html(post_detail: Dict, inspiration_point: Dict) -> str: """生成历史帖子的紧凑卡片HTML""" title = post_detail.get("title", "无标题") body_text = post_detail.get("body_text", "") images = post_detail.get("images", []) like_count = post_detail.get("like_count", 0) collect_count = post_detail.get("collect_count", 0) comment_count = post_detail.get("comment_count", 0) author = post_detail.get("channel_account_name", "") link = post_detail.get("link", "#") publish_time = post_detail.get("publish_time", "") # 获取灵感点信息 point_name = inspiration_point.get("点的名称", "") point_desc = inspiration_point.get("点的描述", "") # 准备详情数据(用于模态框) import json post_detail_data = { "title": title, "body_text": body_text, "images": images, "like_count": like_count, "comment_count": comment_count, "collect_count": collect_count, "author": author, "publish_time": publish_time, "link": link } post_data_json = json.dumps(post_detail_data, ensure_ascii=False) post_data_json_escaped = html_module.escape(post_data_json) # 截取正文预览(前80个字符) body_preview = body_text[:80] + "..." if len(body_text) > 80 else body_text # 生成缩略图 thumbnail_html = "" if images: thumbnail_html = f'Post thumbnail' html = f'''
{thumbnail_html} {f'
{len(images)}
' if len(images) > 1 else ''}
{html_module.escape(title)}
灵感点 {html_module.escape(point_name)}
{html_module.escape(point_desc[:100])}{"..." if len(point_desc) > 100 else ""}
❤ {like_count} ⭐ {collect_count} 查看原帖 →
''' return html def generate_post_detail_html(post_data: Dict, post_idx: int) -> str: """生成帖子详情HTML(紧凑的卡片样式,点击可展开)""" post_detail = post_data.get("帖子详情", {}) title = post_detail.get("title", "无标题") body_text = post_detail.get("body_text", "") images = post_detail.get("images", []) like_count = post_detail.get("like_count", 0) comment_count = post_detail.get("comment_count", 0) collect_count = post_detail.get("collect_count", 0) author = post_detail.get("channel_account_name", "") publish_time = post_detail.get("publish_time", "") link = post_detail.get("link", "") post_id = post_data.get("帖子id", f"post-{post_idx}") # 准备详情数据(用于模态框) import json post_detail_data = { "title": title, "body_text": body_text, "images": images, "like_count": like_count, "comment_count": comment_count, "collect_count": collect_count, "author": author, "publish_time": publish_time, "link": link } post_data_json = json.dumps(post_detail_data, ensure_ascii=False) post_data_json_escaped = html_module.escape(post_data_json) # 生成缩略图HTML thumbnail_html = "" if images and len(images) > 0: # 使用第一张图片作为缩略图,添加懒加载 thumbnail_html = f'缩略图' else: thumbnail_html = '
📄
' # 截断正文用于预览 body_preview = body_text[:80] + "..." if len(body_text) > 80 else body_text html = f'''
{thumbnail_html} {f'
📷 {len(images)}
' if len(images) > 1 else ''}
{html_module.escape(title)}
{html_module.escape(body_preview) if body_preview else "暂无正文"}
👤 {html_module.escape(author)} 📅 {publish_time}
👍 {like_count} 💬 {comment_count if comment_count else 0} ⭐ {collect_count if collect_count else 0}
''' return html def generate_inspiration_detail_html(inspiration_point: Dict, feature_status_map: Dict[str, str] = None, point_type: str = "灵感点") -> str: """生成点详情HTML Args: inspiration_point: 点数据 feature_status_map: 特征名称到状态的映射 {特征名称: "相同"|"相似"|"无关"} point_type: 点类型(灵感点/关键点/目的点) """ name = inspiration_point.get("名称", "") desc = inspiration_point.get("描述", "") features = inspiration_point.get("特征列表", []) if feature_status_map is None: feature_status_map = {} # 计算该灵感点的整体结论 feature_statuses = [] features_html_list = [] for f in features: feature_name = f if isinstance(f, str) else f.get("特征名称", "") weight = f.get("权重", 1.0) if isinstance(f, dict) else 1.0 # 获取该特征的状态 status = feature_status_map.get(feature_name, "无关") feature_statuses.append(status) if status == "相同": status_class = "feature-same" status_label = "相同" elif status == "相似": status_class = "feature-similar" status_label = "相似" else: status_class = "feature-unrelated" status_label = "无关" features_html_list.append( f'' f'{status_label} ' f'{html_module.escape(feature_name)}' f'' ) features_html = "".join(features_html_list) # 计算灵感点结论 has_same = "相同" in feature_statuses has_similar = "相似" in feature_statuses has_unrelated = "无关" in feature_statuses if not has_unrelated: # 没有无关的 -> 找到 insp_conclusion = "找到" insp_conclusion_class = "insp-conclusion-found" elif has_same or has_similar: # 有相同或相似,但也有无关 -> 部分找到 insp_conclusion = "部分找到" insp_conclusion_class = "insp-conclusion-partial" else: # 都是无关 -> 都找不到 insp_conclusion = "都找不到" insp_conclusion_class = "insp-conclusion-not-found" # 根据点类型设置图标 point_icons = { "灵感点": "💡", "关键点": "🔑", "目的点": "🎯" } point_icon = point_icons.get(point_type, "💡") html = f'''
{point_icon} {point_type}

{html_module.escape(name)}

{insp_conclusion}
描述:
{html_module.escape(desc)}
特征列表:
{features_html}
''' return html def load_feature_category_mapping(config: PathConfig) -> Dict: """加载特征名称到分类的映射""" mapping_file = config.feature_category_mapping_file try: with open(mapping_file, "r", encoding="utf-8") as f: return json.load(f) except Exception as e: print(f"警告: 无法加载特征分类映射文件: {e}") return {} def load_feature_source_mapping(config: PathConfig) -> Dict: """加载特征名称到帖子来源的映射""" mapping_file = config.feature_source_mapping_file try: with open(mapping_file, "r", encoding="utf-8") as f: data = json.load(f) # 转换为便于查询的格式: {特征名称: [来源列表]} result = {} for feature_type in ["灵感点", "关键点", "目的点"]: if feature_type in data: for item in data[feature_type]: feature_name = item.get("特征名称") if feature_name: result[feature_name] = item.get("特征来源", []) return result except Exception as e: print(f"警告: 无法加载特征来源映射文件: {e}") return {} def generate_single_match_html(match: Dict, match_idx: int, post_idx: int, insp_idx: int, feature_idx: int, category_mapping: Dict = None, source_mapping: Dict = None, current_point_type: str = "灵感点") -> str: """生成单个匹配项的HTML Args: match: 单个匹配数据 match_idx: 匹配项索引 post_idx: 帖子索引 insp_idx: 灵感点索引 feature_idx: 特征索引 category_mapping: 特征分类映射 source_mapping: 特征来源映射 current_point_type: 当前点的类型(灵感点/关键点/目的点) """ persona_name = match.get("人设特征名称", "") feature_type = match.get("特征类型", "") feature_categories = match.get("特征分类", []) persona_level = match.get("人设特征层级", "") match_result = match.get("匹配结果", {}) similarity = match_result.get("相似度", 0.0) explanation = match_result.get("说明", "") # 根据相似度确定颜色和标签 label, color, _ = get_similarity_status(similarity) match_id = f"post-{post_idx}-insp-{insp_idx}-feat-{feature_idx}-match-{match_idx}" # 判断是否同层级匹配 is_same_level = (persona_level == current_point_type) same_level_class = "match-same-level" if is_same_level else "" # 生成合并的层级-类型标签 combined_badge_html = "" if persona_level and feature_type: combined_text = f"{persona_level}-{feature_type}" # 同层级用特殊样式 if is_same_level: combined_badge_html = f'[{html_module.escape(combined_text)}]' else: combined_badge_html = f'[{html_module.escape(combined_text)}]' categories_badge_html = "" if feature_categories: categories_text = " / ".join(feature_categories) categories_badge_html = f'{html_module.escape(categories_text)}' # 获取该人设特征的分类信息 categories_html = "" if category_mapping and persona_name: found_categories = None # 依次在灵感点、关键点、目的点中查找 for persona_type in ["灵感点", "关键点", "目的点"]: if persona_type in category_mapping: type_mapping = category_mapping[persona_type] if persona_name in type_mapping: found_categories = type_mapping[persona_name].get("所属分类", []) break if found_categories: # 简洁样式:[大类/中类/小类] categories_reversed = list(reversed(found_categories)) categories_text = "/".join(categories_reversed) categories_html = f'[{html_module.escape(categories_text)}]' # 获取该人设特征的历史帖子来源 historical_posts_html = "" if source_mapping and persona_name and persona_name in source_mapping: source_list = source_mapping[persona_name] if source_list: historical_cards = [] for source_item in source_list: post_detail = source_item.get("帖子详情", {}) if post_detail: card_html = generate_historical_post_card_html(post_detail, source_item) historical_cards.append(card_html) if historical_cards: historical_posts_html = f'''

历史帖子来源

{"".join(historical_cards)}
''' # 生成历史帖子HTML historical_posts_html = "" if source_mapping and persona_name and persona_name in source_mapping: source_list = source_mapping[persona_name] if source_list: for source_item in source_list[:5]: # 最多5个 post_detail = source_item.get("帖子详情", {}) if post_detail: card_html = generate_historical_post_card_html(post_detail, source_item) historical_posts_html += card_html # 将数据编码到data属性中 import html as html_encode data_explanation = html_encode.escape(explanation) data_historical = html_encode.escape(historical_posts_html) # 生成紧凑的匹配项HTML(可点击,弹出模态框) html = f'''
{combined_badge_html} {html_module.escape(persona_name)} 相似度: {similarity:.2f} {label}
''' return html def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_idx: int, post_idx: int, category_mapping: Dict = None, source_mapping: Dict = None, current_point_type: str = "灵感点", feature_number: int = 1) -> str: """生成可折叠的匹配结果HTML Args: how_steps: how步骤列表 feature_idx: 特征索引 insp_idx: 灵感点索引 post_idx: 帖子索引 category_mapping: 特征分类映射 source_mapping: 特征来源映射 current_point_type: 当前点的类型(灵感点/关键点/目的点) feature_number: 特征序号(从1开始) """ if not how_steps or len(how_steps) == 0: return "" step = how_steps[0] features = step.get("特征列表", []) if feature_idx >= len(features): return "" feature_data = features[feature_idx] feature_name = feature_data.get("特征名称", "") feature_weight = feature_data.get("权重", 1.0) match_results = feature_data.get("匹配结果", []) if category_mapping is None: category_mapping = {} # 按相似度排序 sorted_matches = sorted(match_results, key=lambda x: x.get("匹配结果", {}).get("相似度", 0), reverse=True) # 找出最高相似度,确定状态 max_similarity = 0.0 if match_results: max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results) # 根据最高相似度确定状态 status, _, status_suffix = get_similarity_status(max_similarity) status_class = f"status-{status_suffix}" found_status_html = f'{status}' # 统计相似度分布 same_label = f"相同 (≥{SIMILARITY_THRESHOLD_SAME})" similar_label = f"相似 ({SIMILARITY_THRESHOLD_SIMILAR}-{SIMILARITY_THRESHOLD_SAME})" unrelated_label = f"无关 (<{SIMILARITY_THRESHOLD_SIMILAR})" similarity_ranges = {same_label: 0, similar_label: 0, unrelated_label: 0} for match in match_results: similarity = match.get("匹配结果", {}).get("相似度", 0) status_label, _, _ = get_similarity_status(similarity) if status_label == "相同": similarity_ranges[same_label] += 1 elif status_label == "相似": similarity_ranges[similar_label] += 1 else: similarity_ranges[unrelated_label] += 1 # 生成统计信息 stats_items = [] range_colors = { same_label: SIMILARITY_COLOR_SAME, similar_label: SIMILARITY_COLOR_SIMILAR, unrelated_label: SIMILARITY_COLOR_UNRELATED } for range_name, count in similarity_ranges.items(): if count > 0: color = range_colors[range_name] stats_items.append(f'{range_name}: {count}') stats_html = "".join(stats_items) # 按人设特征层级分组匹配项 level_groups = { "灵感点": {"标签": [], "分类": []}, "关键点": {"标签": [], "分类": []}, "目的点": {"标签": [], "分类": []} } for i, match in enumerate(sorted_matches): persona_level = match.get("人设特征层级", "") feature_type = match.get("特征类型", "") if persona_level in level_groups and feature_type in ["标签", "分类"]: level_groups[persona_level][feature_type].append((i, match)) # 生成分组的匹配项HTML matches_html = "" # 按层级顺序显示:灵感点 -> 关键点 -> 目的点 level_index = 1 for level_name in ["灵感点", "关键点", "目的点"]: level_data = level_groups[level_name] total_count = len(level_data["标签"]) + len(level_data["分类"]) if total_count == 0: continue # 生成该层级的折叠区域 level_section_id = f"post-{post_idx}-{current_point_type}-{insp_idx}-feat-{feature_idx}-level-{level_name}" # 找出该层级的最高分匹配 all_level_matches = level_data["标签"] + level_data["分类"] top_match = None max_similarity = 0 for _, match in all_level_matches: similarity = match.get("匹配结果", {}).get("相似度", 0) if similarity > max_similarity: max_similarity = similarity top_match = match # 生成最高分特征信息 top_match_html = "" if top_match: top_persona_name = top_match.get("人设特征名称", "") top_feature_type = top_match.get("特征类型", "") similarity_label, similarity_color, _ = get_similarity_status(max_similarity) top_match_html = f'''
最高: [{html_module.escape(top_feature_type)}] {html_module.escape(top_persona_name)} {max_similarity:.2f} {similarity_label}
''' # 计算该层级的相似度分布 level_stats = {"相同": 0, "相似": 0, "无关": 0} for _, match in all_level_matches: similarity = match.get("匹配结果", {}).get("相似度", 0) stat_label, _, _ = get_similarity_status(similarity) level_stats[stat_label] += 1 # 生成统计标签 level_stats_html = "" if level_stats["相同"] > 0: level_stats_html += f'相同: {level_stats["相同"]}' if level_stats["相似"] > 0: level_stats_html += f'相似: {level_stats["相似"]}' if level_stats["无关"] > 0: level_stats_html += f'无关: {level_stats["无关"]}' matches_html += f'''

{feature_number}.{level_index} 匹配人设{level_name} ({total_count})

{level_stats_html}
{top_match_html}
''' level_index += 1 section_id = f"post-{post_idx}-insp-{insp_idx}-feat-{feature_idx}-section" # 找出所有匹配中的最高分 overall_top_match = None overall_max_similarity = 0 for match in match_results: similarity = match.get("匹配结果", {}).get("相似度", 0) if similarity > overall_max_similarity: overall_max_similarity = similarity overall_top_match = match # 生成最高分信息 overall_top_html = "" if overall_top_match: top_persona_name = overall_top_match.get("人设特征名称", "") top_feature_type = overall_top_match.get("特征类型", "") top_persona_level = overall_top_match.get("人设特征层级", "") top_label, top_color, _ = get_similarity_status(overall_max_similarity) overall_top_html = f'''
最高: [{html_module.escape(top_persona_level)}-{html_module.escape(top_feature_type)}] {html_module.escape(top_persona_name)} {overall_max_similarity:.2f} {top_label}
''' html = f'''

{feature_number}. 匹配结果: {html_module.escape(feature_name)}

{found_status_html}
{stats_html}
{overall_top_html}
{matches_html}
''' return html def generate_toc_html(post_data: Dict, post_idx: int, feature_status_map: Dict[str, str] = None, overall_conclusion: str = "") -> str: """生成目录导航HTML Args: post_data: 帖子数据 post_idx: 帖子索引 feature_status_map: 特征名称到状态的映射 {特征名称: "相同"|"相似"|"无关"} overall_conclusion: 整体结论 """ how_result = post_data.get("how解构结果", {}) if feature_status_map is None: feature_status_map = {} toc_items = [] # 帖子详情 toc_items.append(f'
帖子详情 帖子信息
') # 处理不同类型的点 point_types = [ ("灵感点列表", "灵感点", "toc-badge-inspiration", "💡"), ("目的点列表", "目的点", "toc-badge-purpose", "🎯"), ("关键点列表", "关键点", "toc-badge-key", "🔑") ] for list_key, point_name, badge_class, icon in point_types: point_list = how_result.get(list_key, []) if not point_list: continue # 点类型分组标题 toc_items.append(f'
{icon} {point_name}
') for point_idx, point in enumerate(point_list): name = point.get("名称", f"{point_name} {point_idx + 1}") name_short = name[:18] + "..." if len(name) > 18 else name # 计算该点的整体状态 how_steps = point.get("how步骤列表", []) point_status = "无关" has_same = False has_similar = False if how_steps: features = how_steps[0].get("特征列表", []) for feature_data in features: feature_name = feature_data.get("特征名称", "") status = feature_status_map.get(feature_name, "无关") if status == "相同": has_same = True break elif status == "相似": has_similar = True if has_same: point_status = "找到" point_status_class = "toc-point-found" elif has_similar: point_status = "部分找到" point_status_class = "toc-point-partial" else: point_status = "都找不到" point_status_class = "toc-point-notfound" else: point_status_class = "toc-point-notfound" toc_items.append(f'
{point_name} {html_module.escape(name_short)} [{point_status}]
') # 特征列表 if how_steps: features = how_steps[0].get("特征列表", []) for feat_idx, feature_data in enumerate(features): feature_name = feature_data.get("特征名称", f"特征 {feat_idx + 1}") # 获取状态 status = feature_status_map.get(feature_name, "无关") if status == "相同": status_class = "toc-feature-same" status_label = "相同" elif status == "相似": status_class = "toc-feature-similar" status_label = "相似" else: status_class = "toc-feature-unrelated" status_label = "无关" toc_items.append(f'
特征 {html_module.escape(feature_name)} [{status_label}]
') # 整体结论HTML conclusion_html = "" if overall_conclusion: if overall_conclusion == "找到": conclusion_class = "conclusion-found" conclusion_icon = "✓" elif overall_conclusion == "部分找到": conclusion_class = "conclusion-partial" conclusion_icon = "~" else: # 都找不到 conclusion_class = "conclusion-not-found" conclusion_icon = "✗" conclusion_html = f'''
{conclusion_icon} {overall_conclusion}
''' return f'''
目录导航
{conclusion_html}
{"".join(toc_items)}
''' def generate_post_content_html(post_data: Dict, post_idx: int, category_mapping: Dict = None, source_mapping: Dict = None) -> str: """生成单个帖子的完整内容HTML""" # 2. 灵感点详情和匹配结果 how_result = post_data.get("how解构结果", {}) inspiration_list = how_result.get("灵感点列表", []) # 先计算所有特征的状态(基于最高相似度) feature_status_map = {} # {特征名称: "相同"|"相似"|"无关"} for inspiration_point in inspiration_list: how_steps = inspiration_point.get("how步骤列表", []) if how_steps: features = how_steps[0].get("特征列表", []) for feature_data in features: feature_name = feature_data.get("特征名称", "") match_results = feature_data.get("匹配结果", []) # 找出最高相似度 max_similarity = 0.0 if match_results: max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results) # 根据最高相似度确定状态 status_label, _, _ = get_similarity_status(max_similarity) feature_status_map[feature_name] = status_label # 1. 帖子详情 post_detail_html = generate_post_detail_html(post_data, post_idx) # 生成所有灵感点的详情HTML(传入状态映射) inspirations_detail_html = "" for insp_idx, inspiration_point in enumerate(inspiration_list): inspiration_detail = generate_inspiration_detail_html(inspiration_point, feature_status_map) inspirations_detail_html += f'''
{inspiration_detail}
''' # 生成所有匹配结果HTML,按照how步骤分组 all_matches_html = "" for insp_idx, inspiration_point in enumerate(inspiration_list): inspiration_name = inspiration_point.get("名称", f"灵感点 {insp_idx + 1}") how_steps = inspiration_point.get("how步骤列表", []) if how_steps: # 为每个灵感点创建一个区域 for step_idx, step in enumerate(how_steps): step_name = step.get("步骤名称", f"步骤 {step_idx + 1}") features = step.get("特征列表", []) # 生成该步骤下所有特征的匹配结果 features_html = "" for feat_idx, feature_data in enumerate(features): match_html = generate_match_results_html([step], feat_idx, insp_idx, post_idx, category_mapping, source_mapping, "灵感点", feat_idx + 1) features_html += f'
{match_html}
' # 生成步骤区域(可折叠) step_section_id = f"post-{post_idx}-step-{insp_idx}-{step_idx}" all_matches_html += f'''

{html_module.escape(step_name)}

来自: {html_module.escape(inspiration_name)}
{features_html}
''' html = f'''
{post_detail_html}
{inspirations_detail_html}
{all_matches_html}
''' return html def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None, source_mapping: Dict = None) -> str: """生成包含所有帖子的单一HTML(左边目录,右边内容)""" # 生成统一的目录(包含所有帖子) all_toc_items = [] for post_idx, post in enumerate(posts_data): post_detail = post.get("帖子详情", {}) title = post_detail.get("title", "无标题") post_id = post_detail.get("post_id", f"post_{post_idx}") # 获取发布时间并格式化 publish_timestamp = post_detail.get("publish_timestamp", 0) if publish_timestamp: from datetime import datetime # publish_timestamp 是毫秒级时间戳,需要除以1000 date_str = datetime.fromtimestamp(publish_timestamp / 1000).strftime("%Y-%m-%d") else: date_str = "未知日期" # 帖子标题作为一级目录(可折叠),在标题前显示日期 all_toc_items.append(f''' ') # 生成所有独立的内容视图 all_content_views = [] for post_idx, post in enumerate(posts_data): post_detail = post.get("帖子详情", {}) how_result = post.get("how解构结果", {}) # 1. 生成"仅帖子详情"的视图 post_detail_html = generate_post_detail_html(post, post_idx) post_only_view = f''' ''' all_content_views.append(post_only_view) # 2. 为每个点生成独立视图(包含帖子详情+点详情+匹配结果) for point_list_key in ["灵感点列表", "目的点列表", "关键点列表"]: point_list = how_result.get(point_list_key, []) # 提取点类型(去掉"列表") point_type = point_list_key.replace("列表", "") for point_idx, point in enumerate(point_list): # 计算该点的特征状态映射 point_feature_status_map = {} how_steps = point.get("how步骤列表", []) if how_steps: features = how_steps[0].get("特征列表", []) for feature_data in features: feature_name = feature_data.get("特征名称", "") match_results = feature_data.get("匹配结果", []) max_similarity = 0.0 if match_results: max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results) status_label, _, _ = get_similarity_status(max_similarity) point_feature_status_map[feature_name] = status_label # 生成点的详情HTML,传入特征状态映射和点类型 point_detail_html = generate_inspiration_detail_html(point, point_feature_status_map, point_type) # 生成该点的所有特征匹配结果 point_name = point.get("名称", f"点 {point_idx + 1}") matches_html = "" if how_steps: for step_idx, step in enumerate(how_steps): step_name = step.get("步骤名称", f"步骤 {step_idx + 1}") features = step.get("特征列表", []) features_html = "" for feat_idx, feature_data in enumerate(features): match_html = generate_match_results_html([step], feat_idx, point_idx, post_idx, category_mapping, source_mapping, point_list_key.replace("列表", ""), feat_idx + 1) features_html += f'
{match_html}
' step_section_id = f"post-{post_idx}-step-{point_idx}-{step_idx}" matches_html += f'''

{html_module.escape(step_name)}

来自: {html_module.escape(point_name)}
{features_html}
''' # 组合成完整视图 point_view_id = f"view-post-{post_idx}-point-{point_list_key}-{point_idx}" point_view = f''' ''' all_content_views.append(point_view) # 组合所有内容视图 all_contents_html = "\n".join(all_content_views) # 组合目录HTML toc_items_html = "\n".join(all_toc_items) html = f''' How解构结果可视化
{all_contents_html}

''' return html def minify_html(html: str) -> str: """压缩HTML,去除多余空格和换行""" import re # 保护script和style标签内容 scripts = [] styles = [] def save_script(match): scripts.append(match.group(0)) return f"___SCRIPT_{len(scripts)-1}___" def save_style(match): styles.append(match.group(0)) return f"___STYLE_{len(styles)-1}___" # 保存script和style html = re.sub(r']*>.*?', save_script, html, flags=re.DOTALL) html = re.sub(r']*>.*?', save_style, html, flags=re.DOTALL) # 去除HTML注释 html = re.sub(r'', '', html, flags=re.DOTALL) # 去除多余空格和换行 html = re.sub(r'\s+', ' ', html) html = re.sub(r'>\s+<', '><', html) # 恢复script和style for i, script in enumerate(scripts): html = html.replace(f"___SCRIPT_{i}___", script) for i, style in enumerate(styles): html = html.replace(f"___STYLE_{i}___", style) return html.strip() def main(): """主函数""" # 解析命令行参数 import sys account_name = sys.argv[1] if len(sys.argv) > 1 else None # 使用路径配置 config = PathConfig(account_name=account_name) # 确保输出目录存在 config.ensure_dirs() # 获取路径 input_dir = config.how_results_dir output_file = config.visualization_file print(f"账号: {config.account_name}") print(f"输入目录: {input_dir}") print(f"输出文件: {output_file}") print() print(f"读取 how 解构结果: {input_dir}") # 加载特征分类映射 print(f"加载特征分类映射...") category_mapping = load_feature_category_mapping(config) print(f"已加载 {sum(len(v) for v in category_mapping.values())} 个特征分类") # 加载特征来源映射 print(f"加载特征来源映射...") source_mapping = load_feature_source_mapping(config) print(f"已加载 {len(source_mapping)} 个特征的来源信息") json_files = list(input_dir.glob("*_how.json")) print(f"找到 {len(json_files)} 个文件\n") posts_data = [] for i, file_path in enumerate(json_files, 1): print(f"读取文件 [{i}/{len(json_files)}]: {file_path.name}") with open(file_path, "r", encoding="utf-8") as f: post_data = json.load(f) posts_data.append(post_data) # 按发布时间降序排序(最新的在前) print(f"\n按发布时间排序...") posts_data.sort(key=lambda x: x.get("帖子详情", {}).get("publish_timestamp", 0), reverse=True) print(f"\n生成合并的 HTML...") html_content = generate_combined_html(posts_data, category_mapping, source_mapping) # 保存原始版本 print(f"保存原始HTML到: {output_file}") with open(output_file, "w", encoding="utf-8") as f: f.write(html_content) original_size = len(html_content) / 1024 / 1024 print(f"原始HTML大小: {original_size:.1f} MB") # 压缩HTML print(f"\n压缩HTML...") minified_html = minify_html(html_content) minified_file = output_file.parent / "当前帖子_how解构结果_可视化.min.html" print(f"保存压缩HTML到: {minified_file}") with open(minified_file, "w", encoding="utf-8") as f: f.write(minified_html) minified_size = len(minified_html) / 1024 / 1024 print(f"压缩HTML大小: {minified_size:.1f} MB (减少 {(1 - minified_size/original_size)*100:.1f}%)") # Gzip压缩 import gzip print(f"\n生成Gzip压缩版本...") gzip_file = output_file.parent / "当前帖子_how解构结果_可视化.html.gz" with gzip.open(gzip_file, "wb") as f: f.write(minified_html.encode('utf-8')) gzip_size = gzip_file.stat().st_size / 1024 / 1024 print(f"Gzip压缩大小: {gzip_size:.1f} MB (比原始减少 {(1 - gzip_size/original_size)*100:.1f}%)") print(f"\n完成! 生成了3个版本:") print(f"1. 原始版本: {output_file} ({original_size:.1f} MB)") print(f"2. 压缩版本: {minified_file} ({minified_size:.1f} MB)") print(f"3. Gzip版本: {gzip_file} ({gzip_size:.1f} MB)") print(f"\n建议分享: {gzip_file.name} (浏览器可直接打开)") if __name__ == "__main__": main()