yangxiaohui hace 1 semana
padre
commit
64380d838c

+ 1 - 1
config/accounts.json

@@ -32,7 +32,7 @@
       "description": "未启用的示例账号"
     }
   ],
-  "default_account": "阿里多多酱",
+  "default_account": "阿里多多酱_1125",
   "comment": "数据根目录可通过 data_root 配置(支持绝对路径、~、环境变量),也可通过 DATA_ROOT 环境变量覆盖",
   "filter_mode": "exclude_current_posts",
   "filter_modes": {

+ 488 - 0
script/data_processing/build_match_graph.py

@@ -0,0 +1,488 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+从匹配结果中构建帖子与人设的节点边关系图
+
+输入:
+1. filtered_results目录下的匹配结果文件
+2. 节点列表.json
+3. 边关系.json
+
+输出:
+1. match_graph目录下的节点边关系文件
+"""
+
+import json
+from pathlib import Path
+from typing import Dict, List, Set, Any, Optional
+import sys
+
+# 添加项目根目录到路径
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+from script.data_processing.path_config import PathConfig
+
+
+def build_post_node_id(dimension: str, node_type: str, name: str) -> str:
+    """构建帖子节点ID"""
+    return f"帖子_{dimension}_{node_type}_{name}"
+
+
+def build_persona_node_id(dimension: str, node_type: str, name: str) -> str:
+    """构建人设节点ID"""
+    return f"{dimension}_{node_type}_{name}"
+
+
+def extract_matched_nodes_and_edges(filtered_data: Dict) -> tuple:
+    """
+    从匹配结果中提取帖子节点、人设节点和匹配边
+
+    Args:
+        filtered_data: 匹配结果数据
+
+    Returns:
+        (帖子节点列表, 人设节点ID集合, 匹配边列表)
+    """
+    post_nodes = []
+    persona_node_ids = set()
+    match_edges = []
+
+    how_result = filtered_data.get("how解构结果", {})
+
+    # 维度映射
+    dimension_mapping = {
+        "灵感点列表": "灵感点",
+        "目的点列表": "目的点",
+        "关键点列表": "关键点"
+    }
+
+    for list_key, dimension in dimension_mapping.items():
+        points = how_result.get(list_key, [])
+
+        for point in points:
+            # 遍历how步骤列表
+            how_steps = point.get("how步骤列表", [])
+
+            for step in how_steps:
+                features = step.get("特征列表", [])
+
+                for feature in features:
+                    feature_name = feature.get("特征名称", "")
+                    weight = feature.get("权重", 0)
+                    match_results = feature.get("匹配结果", [])
+
+                    if not feature_name:
+                        continue
+
+                    # 如果有匹配结果,创建帖子节点和匹配边
+                    if match_results:
+                        # 创建帖子节点(标签类型)
+                        post_node_id = build_post_node_id(dimension, "标签", feature_name)
+                        post_node = {
+                            "节点ID": post_node_id,
+                            "节点名称": feature_name,
+                            "节点类型": "标签",
+                            "节点层级": dimension,
+                            "权重": weight
+                        }
+
+                        # 避免重复添加
+                        if not any(n["节点ID"] == post_node_id for n in post_nodes):
+                            post_nodes.append(post_node)
+
+                        # 处理每个匹配结果
+                        for match in match_results:
+                            persona_name = match.get("人设特征名称", "")
+                            persona_dimension = match.get("人设特征层级", "")
+                            persona_type = match.get("特征类型", "标签")
+                            match_detail = match.get("匹配结果", {})
+
+                            if not persona_name or not persona_dimension:
+                                continue
+
+                            # 构建人设节点ID
+                            persona_node_id = build_persona_node_id(
+                                persona_dimension, persona_type, persona_name
+                            )
+                            persona_node_ids.add(persona_node_id)
+
+                            # 创建匹配边
+                            match_edge = {
+                                "源节点ID": post_node_id,
+                                "目标节点ID": persona_node_id,
+                                "边类型": "匹配",
+                                "边详情": {
+                                    "相似度": match_detail.get("相似度", 0),
+                                    "说明": match_detail.get("说明", "")
+                                }
+                            }
+                            match_edges.append(match_edge)
+
+    return post_nodes, persona_node_ids, match_edges
+
+
+def get_persona_nodes_details(
+    persona_node_ids: Set[str],
+    nodes_data: Dict
+) -> List[Dict]:
+    """
+    从节点列表中获取人设节点的详细信息
+
+    Args:
+        persona_node_ids: 人设节点ID集合
+        nodes_data: 节点列表数据
+
+    Returns:
+        人设节点详情列表
+    """
+    persona_nodes = []
+    all_nodes = nodes_data.get("节点列表", [])
+
+    for node in all_nodes:
+        if node["节点ID"] in persona_node_ids:
+            persona_nodes.append(node)
+
+    return persona_nodes
+
+
+def get_edges_between_nodes(
+    node_ids: Set[str],
+    edges_data: Dict
+) -> List[Dict]:
+    """
+    获取指定节点之间的边关系
+
+    Args:
+        node_ids: 节点ID集合
+        edges_data: 边关系数据
+
+    Returns:
+        节点之间的边列表
+    """
+    edges_between = []
+    all_edges = edges_data.get("边列表", [])
+
+    for edge in all_edges:
+        source_id = edge["源节点ID"]
+        target_id = edge["目标节点ID"]
+
+        # 两个节点都在集合中
+        if source_id in node_ids and target_id in node_ids:
+            edges_between.append(edge)
+
+    return edges_between
+
+
+def create_mirrored_post_edges(
+    match_edges: List[Dict],
+    persona_edges: List[Dict]
+) -> List[Dict]:
+    """
+    根据人设节点之间的边,创建帖子节点之间的镜像边
+
+    逻辑:如果人设节点A和B之间有边,且帖子节点X匹配A,帖子节点Y匹配B,
+    则创建帖子节点X和Y之间的镜像边
+
+    Args:
+        match_edges: 匹配边列表(帖子节点 -> 人设节点)
+        persona_edges: 人设节点之间的边列表
+
+    Returns:
+        帖子节点之间的镜像边列表
+    """
+    # 构建人设节点到帖子节点的反向映射
+    # persona_id -> [post_id1, post_id2, ...]
+    persona_to_posts = {}
+    for edge in match_edges:
+        post_id = edge["源节点ID"]
+        persona_id = edge["目标节点ID"]
+        if persona_id not in persona_to_posts:
+            persona_to_posts[persona_id] = []
+        if post_id not in persona_to_posts[persona_id]:
+            persona_to_posts[persona_id].append(post_id)
+
+    # 根据人设边创建帖子镜像边
+    post_edges = []
+    seen_edges = set()
+
+    for persona_edge in persona_edges:
+        source_persona = persona_edge["源节点ID"]
+        target_persona = persona_edge["目标节点ID"]
+        edge_type = persona_edge["边类型"]
+
+        # 获取匹配到这两个人设节点的帖子节点
+        source_posts = persona_to_posts.get(source_persona, [])
+        target_posts = persona_to_posts.get(target_persona, [])
+
+        # 为每对帖子节点创建镜像边
+        for src_post in source_posts:
+            for tgt_post in target_posts:
+                if src_post == tgt_post:
+                    continue
+
+                # 使用排序后的key避免重复(A-B 和 B-A 视为同一条边)
+                edge_key = tuple(sorted([src_post, tgt_post])) + (edge_type,)
+                if edge_key in seen_edges:
+                    continue
+                seen_edges.add(edge_key)
+
+                post_edge = {
+                    "源节点ID": src_post,
+                    "目标节点ID": tgt_post,
+                    "边类型": f"镜像_{edge_type}",  # 标记为镜像边
+                    "边详情": {
+                        "原始边类型": edge_type,
+                        "源人设节点": source_persona,
+                        "目标人设节点": target_persona
+                    }
+                }
+                post_edges.append(post_edge)
+
+    return post_edges
+
+
+def expand_one_layer(
+    node_ids: Set[str],
+    edges_data: Dict,
+    nodes_data: Dict,
+    edge_types: List[str] = None,
+    direction: str = "both"
+) -> tuple:
+    """
+    从指定节点扩展一层,获取相邻节点和连接边
+
+    Args:
+        node_ids: 起始节点ID集合
+        edges_data: 边关系数据
+        nodes_data: 节点列表数据
+        edge_types: 要扩展的边类型列表,None表示所有类型
+        direction: 扩展方向
+            - "outgoing": 只沿出边扩展(源节点在集合中,扩展到目标节点)
+            - "incoming": 只沿入边扩展(目标节点在集合中,扩展到源节点)
+            - "both": 双向扩展
+
+    Returns:
+        (扩展的节点列表, 扩展的边列表, 扩展的节点ID集合)
+    """
+    expanded_node_ids = set()
+    expanded_edges = []
+    all_edges = edges_data.get("边列表", [])
+
+    # 找出所有与起始节点相连的边和节点
+    for edge in all_edges:
+        # 过滤边类型
+        if edge_types and edge["边类型"] not in edge_types:
+            continue
+
+        source_id = edge["源节点ID"]
+        target_id = edge["目标节点ID"]
+
+        # 沿出边扩展:源节点在集合中,扩展到目标节点
+        if direction in ["outgoing", "both"]:
+            if source_id in node_ids and target_id not in node_ids:
+                expanded_node_ids.add(target_id)
+                expanded_edges.append(edge)
+
+        # 沿入边扩展:目标节点在集合中,扩展到源节点
+        if direction in ["incoming", "both"]:
+            if target_id in node_ids and source_id not in node_ids:
+                expanded_node_ids.add(source_id)
+                expanded_edges.append(edge)
+
+    # 获取扩展节点的详情
+    expanded_nodes = []
+    all_nodes = nodes_data.get("节点列表", [])
+    for node in all_nodes:
+        if node["节点ID"] in expanded_node_ids:
+            # 标记为扩展节点
+            node_copy = node.copy()
+            node_copy["是否扩展"] = True
+            expanded_nodes.append(node_copy)
+
+    return expanded_nodes, expanded_edges, expanded_node_ids
+
+
+def process_filtered_result(
+    filtered_file: Path,
+    nodes_data: Dict,
+    edges_data: Dict,
+    output_dir: Path
+) -> Dict:
+    """
+    处理单个匹配结果文件
+
+    Args:
+        filtered_file: 匹配结果文件路径
+        nodes_data: 节点列表数据
+        edges_data: 边关系数据
+        output_dir: 输出目录
+
+    Returns:
+        处理结果统计
+    """
+    # 读取匹配结果
+    with open(filtered_file, "r", encoding="utf-8") as f:
+        filtered_data = json.load(f)
+
+    post_id = filtered_data.get("帖子id", "")
+    post_detail = filtered_data.get("帖子详情", {})
+    post_title = post_detail.get("title", "")
+
+    # 提取节点和边
+    post_nodes, persona_node_ids, match_edges = extract_matched_nodes_and_edges(filtered_data)
+
+    # 获取人设节点详情(直接匹配的,标记为非扩展)
+    persona_nodes = get_persona_nodes_details(persona_node_ids, nodes_data)
+    for node in persona_nodes:
+        node["是否扩展"] = False
+
+    # 获取人设节点之间的边
+    persona_edges = get_edges_between_nodes(persona_node_ids, edges_data)
+
+    # 创建帖子节点之间的镜像边(基于人设边的投影)
+    post_edges = create_mirrored_post_edges(match_edges, persona_edges)
+
+    # 合并节点列表(不扩展,只保留直接匹配的节点)
+    all_nodes = post_nodes + persona_nodes
+
+    # 合并边列表
+    all_edges = match_edges + persona_edges + post_edges
+    # 去重边
+    seen_edges = set()
+    unique_edges = []
+    for edge in all_edges:
+        edge_key = (edge["源节点ID"], edge["目标节点ID"], edge["边类型"])
+        if edge_key not in seen_edges:
+            seen_edges.add(edge_key)
+            unique_edges.append(edge)
+    all_edges = unique_edges
+
+    # 构建节点边索引
+    edges_by_node = {}
+    for edge in all_edges:
+        source_id = edge["源节点ID"]
+        target_id = edge["目标节点ID"]
+        edge_type = edge["边类型"]
+
+        if source_id not in edges_by_node:
+            edges_by_node[source_id] = {}
+        if edge_type not in edges_by_node[source_id]:
+            edges_by_node[source_id][edge_type] = {}
+        edges_by_node[source_id][edge_type][target_id] = edge
+
+    # 构建输出数据
+    output_data = {
+        "说明": {
+            "帖子ID": post_id,
+            "帖子标题": post_title,
+            "描述": "帖子与人设的节点匹配关系",
+            "统计": {
+                "帖子节点数": len(post_nodes),
+                "人设节点数": len(persona_nodes),
+                "匹配边数": len(match_edges),
+                "人设节点间边数": len(persona_edges),
+                "帖子节点间边数": len(post_edges),
+                "总节点数": len(all_nodes),
+                "总边数": len(all_edges)
+            }
+        },
+        "帖子节点列表": post_nodes,
+        "人设节点列表": persona_nodes,
+        "匹配边列表": match_edges,
+        "人设节点间边列表": persona_edges,
+        "帖子节点间边列表": post_edges,
+        "节点列表": all_nodes,
+        "边列表": all_edges,
+        "节点边索引": edges_by_node
+    }
+
+    # 保存输出文件
+    output_file = output_dir / f"{post_id}_match_graph.json"
+    with open(output_file, "w", encoding="utf-8") as f:
+        json.dump(output_data, f, ensure_ascii=False, indent=2)
+
+    return {
+        "帖子ID": post_id,
+        "帖子节点数": len(post_nodes),
+        "人设节点数": len(persona_nodes),
+        "匹配边数": len(match_edges),
+        "人设节点间边数": len(persona_edges),
+        "帖子节点间边数": len(post_edges),
+        "总节点数": len(all_nodes),
+        "总边数": len(all_edges),
+        "输出文件": str(output_file)
+    }
+
+
+def main():
+    # 使用路径配置
+    config = PathConfig()
+    config.ensure_dirs()
+
+    print(f"账号: {config.account_name}")
+    print(f"输出版本: {config.output_version}")
+    print()
+
+    # 输入文件/目录
+    filtered_results_dir = config.intermediate_dir / "filtered_results"
+    nodes_file = config.intermediate_dir / "节点列表.json"
+    edges_file = config.intermediate_dir / "边关系.json"
+
+    # 输出目录
+    output_dir = config.intermediate_dir / "match_graph"
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    print(f"输入:")
+    print(f"  匹配结果目录: {filtered_results_dir}")
+    print(f"  节点列表: {nodes_file}")
+    print(f"  边关系: {edges_file}")
+    print(f"\n输出目录: {output_dir}")
+    print()
+
+    # 读取节点和边数据
+    print("正在读取节点列表...")
+    with open(nodes_file, "r", encoding="utf-8") as f:
+        nodes_data = json.load(f)
+    print(f"  共 {len(nodes_data.get('节点列表', []))} 个节点")
+
+    print("正在读取边关系...")
+    with open(edges_file, "r", encoding="utf-8") as f:
+        edges_data = json.load(f)
+    print(f"  共 {len(edges_data.get('边列表', []))} 条边")
+
+    # 处理所有匹配结果文件
+    print("\n" + "="*60)
+    print("处理匹配结果文件...")
+
+    filtered_files = list(filtered_results_dir.glob("*_filtered.json"))
+    print(f"找到 {len(filtered_files)} 个匹配结果文件")
+
+    results = []
+    for i, filtered_file in enumerate(filtered_files, 1):
+        print(f"\n[{i}/{len(filtered_files)}] 处理: {filtered_file.name}")
+        result = process_filtered_result(filtered_file, nodes_data, edges_data, output_dir)
+        results.append(result)
+        print(f"  帖子节点: {result['帖子节点数']}, 人设节点: {result['人设节点数']}")
+        print(f"  匹配边: {result['匹配边数']}, 人设边: {result['人设节点间边数']}, 帖子边: {result['帖子节点间边数']}")
+
+    # 汇总统计
+    print("\n" + "="*60)
+    print("处理完成!")
+    print(f"\n汇总:")
+    print(f"  处理文件数: {len(results)}")
+    total_post = sum(r['帖子节点数'] for r in results)
+    total_persona = sum(r['人设节点数'] for r in results)
+    total_match = sum(r['匹配边数'] for r in results)
+    total_persona_edges = sum(r['人设节点间边数'] for r in results)
+    total_post_edges = sum(r['帖子节点间边数'] for r in results)
+    print(f"  总帖子节点: {total_post}")
+    print(f"  总人设节点: {total_persona}")
+    print(f"  总匹配边: {total_match}")
+    print(f"  总人设边: {total_persona_edges}")
+    print(f"  总帖子边: {total_post_edges}")
+    print(f"\n输出目录: {output_dir}")
+
+
+if __name__ == "__main__":
+    main()

+ 166 - 0
script/data_processing/extract_category_edges.py

@@ -0,0 +1,166 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+从dimension_associations_analysis.json中提取分类之间的边关系
+"""
+
+import json
+from pathlib import Path
+from typing import Dict, List, Any
+import argparse
+
+
+def get_last_segment(path: str) -> str:
+    """获取路径的最后一段"""
+    return path.split("/")[-1]
+
+
+def build_node_id(dimension: str, node_type: str, name: str) -> str:
+    """
+    构建节点ID
+
+    Args:
+        dimension: 节点层级(灵感点、目的点、关键点)
+        node_type: 节点类型(分类、标签)
+        name: 节点名称(完整路径)
+
+    Returns:
+        节点ID,格式: {层级}_{类型}_{名称最后一段}
+    """
+    last_segment = get_last_segment(name)
+    return f"{dimension}_{node_type}_{last_segment}"
+
+
+def extract_edges_from_single_dimension(data: Dict) -> List[Dict]:
+    """
+    从单维度关联分析中提取边
+
+    Args:
+        data: 单维度关联分析数据
+
+    Returns:
+        边列表
+    """
+    edges = []
+
+    if "单维度关联分析" not in data:
+        return edges
+
+    single_dim = data["单维度关联分析"]
+
+    # 维度映射
+    dimension_map = {
+        "灵感点维度": "灵感点",
+        "目的点维度": "目的点",
+        "关键点维度": "关键点"
+    }
+
+    for dim_key, dim_data in single_dim.items():
+        if dim_key not in dimension_map:
+            continue
+
+        source_dimension = dimension_map[dim_key]
+
+        # 遍历该维度下的所有关联方向
+        for direction_key, direction_data in dim_data.items():
+            if direction_key == "说明":
+                continue
+
+            # 解析方向,如 "灵感点→目的点"
+            if "→" not in direction_key:
+                continue
+
+            # 遍历每个源分类
+            for source_path, source_info in direction_data.items():
+                source_node_id = build_node_id(source_dimension, "分类", source_path)
+
+                # 确定目标维度
+                # 从关联字段名推断,如 "与目的点的关联"
+                for field_name, associations in source_info.items():
+                    if not field_name.startswith("与") or not field_name.endswith("的关联"):
+                        continue
+
+                    # 提取目标维度名称
+                    target_dimension = field_name[1:-3]  # 去掉"与"和"的关联"
+
+                    if not isinstance(associations, list):
+                        continue
+
+                    for assoc in associations:
+                        target_path = assoc.get("目标分类", "")
+                        if not target_path:
+                            continue
+
+                        target_node_id = build_node_id(target_dimension, "分类", target_path)
+
+                        edge = {
+                            "源节点ID": source_node_id,
+                            "目标节点ID": target_node_id,
+                            "边类型": f"{source_dimension}_分类-{target_dimension}_分类",
+                            "边详情": {
+                                "Jaccard相似度": assoc.get("Jaccard相似度", 0),
+                                "重叠系数": assoc.get("重叠系数", 0),
+                                "共同帖子数": assoc.get("共同帖子数", 0),
+                                "共同帖子ID": assoc.get("共同帖子ID", [])
+                            }
+                        }
+                        edges.append(edge)
+
+    return edges
+
+
+def main():
+    parser = argparse.ArgumentParser(description="从dimension_associations_analysis.json中提取分类边关系")
+    parser.add_argument("--input", "-i", type=str, required=True, help="输入文件路径")
+    parser.add_argument("--output", "-o", type=str, required=True, help="输出文件路径")
+    args = parser.parse_args()
+
+    input_file = Path(args.input)
+    output_file = Path(args.output)
+
+    print(f"输入文件: {input_file}")
+    print(f"输出文件: {output_file}")
+
+    # 读取输入文件
+    print(f"\n正在读取文件: {input_file}")
+    with open(input_file, "r", encoding="utf-8") as f:
+        data = json.load(f)
+
+    # 提取边
+    print("\n正在提取边关系...")
+    edges = extract_edges_from_single_dimension(data)
+
+    print(f"提取到 {len(edges)} 条边")
+
+    # 统计边类型
+    edge_type_count = {}
+    for edge in edges:
+        edge_type = edge["边类型"]
+        edge_type_count[edge_type] = edge_type_count.get(edge_type, 0) + 1
+
+    print("\n边类型统计:")
+    for edge_type, count in sorted(edge_type_count.items()):
+        print(f"  {edge_type}: {count} 条")
+
+    # 构建输出
+    output = {
+        "说明": {
+            "描述": "分类之间的边关系",
+            "数据来源": input_file.name
+        },
+        "边列表": edges
+    }
+
+    # 确保输出目录存在
+    output_file.parent.mkdir(parents=True, exist_ok=True)
+
+    # 保存结果
+    print(f"\n正在保存结果到: {output_file}")
+    with open(output_file, "w", encoding="utf-8") as f:
+        json.dump(output, f, ensure_ascii=False, indent=2)
+
+    print("完成!")
+
+
+if __name__ == "__main__":
+    main()

+ 978 - 0
script/data_processing/extract_nodes_and_edges.py

@@ -0,0 +1,978 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+从源数据文件中提取节点列表和边关系
+
+输入:
+1. 过去帖子_pattern聚合结果.json - 分类节点、标签-分类边
+2. 过去帖子_what解构结果目录 - 标签节点来源
+3. dimension_associations_analysis.json - 分类-分类边(共现)
+
+输出:
+1. 节点列表.json
+2. 边关系.json
+"""
+
+import json
+from pathlib import Path
+from typing import Dict, List, Any, Set, Optional
+import sys
+import re
+
+# 添加项目根目录到路径
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+from script.data_processing.path_config import PathConfig
+from script.detail import get_xiaohongshu_detail
+
+
+def get_post_detail(post_id: str) -> Optional[Dict]:
+    """获取帖子详情"""
+    try:
+        detail = get_xiaohongshu_detail(post_id)
+        return detail
+    except Exception as e:
+        print(f"  警告: 获取帖子 {post_id} 详情失败: {e}")
+        return None
+
+
+def get_last_segment(path: str) -> str:
+    """获取路径的最后一段"""
+    return path.split("/")[-1]
+
+
+def build_node_id(dimension: str, node_type: str, name: str) -> str:
+    """
+    构建节点ID
+
+    Args:
+        dimension: 节点层级(灵感点、目的点、关键点)
+        node_type: 节点类型(分类、标签)
+        name: 节点名称
+
+    Returns:
+        节点ID,格式: {层级}_{类型}_{名称}
+    """
+    return f"{dimension}_{node_type}_{name}"
+
+
+def extract_post_id_from_filename(filename: str) -> str:
+    """从文件名中提取帖子ID"""
+    match = re.match(r'^([^_]+)_', filename)
+    if match:
+        return match.group(1)
+    return ""
+
+
+def get_current_post_ids(current_posts_dir: Path) -> Set[str]:
+    """
+    获取当前帖子目录中的所有帖子ID
+
+    Args:
+        current_posts_dir: 当前帖子目录路径
+
+    Returns:
+        当前帖子ID集合
+    """
+    if not current_posts_dir.exists():
+        print(f"警告: 当前帖子目录不存在: {current_posts_dir}")
+        return set()
+
+    json_files = list(current_posts_dir.glob("*.json"))
+    if not json_files:
+        print(f"警告: 当前帖子目录为空: {current_posts_dir}")
+        return set()
+
+    print(f"找到 {len(json_files)} 个当前帖子")
+
+    post_ids = set()
+    for file_path in json_files:
+        post_id = extract_post_id_from_filename(file_path.name)
+        if post_id:
+            post_ids.add(post_id)
+
+    print(f"提取到 {len(post_ids)} 个帖子ID")
+    return post_ids
+
+
+def collect_all_post_ids_from_nodes(nodes: List[Dict]) -> Set[str]:
+    """从节点列表中收集所有帖子ID"""
+    post_ids = set()
+    for node in nodes:
+        for source in node.get("节点来源", []):
+            post_id = source.get("帖子ID", "")
+            if post_id:
+                post_ids.add(post_id)
+    return post_ids
+
+
+def collect_all_post_ids_from_edges(edges: List[Dict]) -> Set[str]:
+    """从边列表中收集所有帖子ID"""
+    post_ids = set()
+    for edge in edges:
+        if edge.get("边类型") in ("分类共现(跨点)", "标签共现"):
+            edge_details = edge.get("边详情", {})
+            common_post_ids = edge_details.get("共同帖子ID", [])
+            post_ids.update(common_post_ids)
+        # 点内共现边不包含帖子ID
+    return post_ids
+
+
+def fetch_post_details(post_ids: Set[str]) -> Dict[str, Dict]:
+    """
+    批量获取帖子详情
+
+    Args:
+        post_ids: 帖子ID集合
+
+    Returns:
+        帖子ID -> 帖子详情 的映射
+    """
+    print(f"\n正在获取 {len(post_ids)} 个帖子的详情...")
+    post_details = {}
+    for i, post_id in enumerate(sorted(post_ids), 1):
+        print(f"  [{i}/{len(post_ids)}] 获取帖子 {post_id} 的详情...")
+        detail = get_post_detail(post_id)
+        if detail:
+            post_details[post_id] = detail
+    print(f"成功获取 {len(post_details)} 个帖子详情")
+    return post_details
+
+
+
+
+def filter_nodes_by_post_ids(nodes: List[Dict], exclude_post_ids: Set[str]) -> List[Dict]:
+    """
+    过滤节点,排除指定帖子ID的来源
+
+    Args:
+        nodes: 节点列表
+        exclude_post_ids: 要排除的帖子ID集合
+
+    Returns:
+        过滤后的节点列表
+    """
+    filtered_nodes = []
+    for node in nodes:
+        # 过滤节点来源
+        filtered_sources = [
+            source for source in node.get("节点来源", [])
+            if source.get("帖子ID", "") not in exclude_post_ids
+        ]
+
+        # 只保留有来源的节点
+        if filtered_sources:
+            node_copy = node.copy()
+            node_copy["节点来源"] = filtered_sources
+            # 重新计算帖子数
+            unique_post_ids = set(s.get("帖子ID", "") for s in filtered_sources if s.get("帖子ID"))
+            node_copy["帖子数"] = len(unique_post_ids)
+            filtered_nodes.append(node_copy)
+
+    return filtered_nodes
+
+
+def filter_edges_by_post_ids(edges: List[Dict], exclude_post_ids: Set[str]) -> List[Dict]:
+    """
+    过滤边,排除指定帖子ID的共现边
+
+    Args:
+        edges: 边列表
+        exclude_post_ids: 要排除的帖子ID集合
+
+    Returns:
+        过滤后的边列表
+    """
+    filtered_edges = []
+    for edge in edges:
+        edge_type = edge["边类型"]
+        if edge_type in ("分类共现(跨点)", "标签共现"):
+            # 过滤共同帖子ID
+            edge_details = edge.get("边详情", {})
+            common_post_ids = edge_details.get("共同帖子ID", [])
+            filtered_post_ids = [pid for pid in common_post_ids if pid not in exclude_post_ids]
+
+            if filtered_post_ids:
+                edge_copy = edge.copy()
+                edge_copy["边详情"] = edge_details.copy()
+                edge_copy["边详情"]["共同帖子ID"] = filtered_post_ids
+                edge_copy["边详情"]["共同帖子数"] = len(filtered_post_ids)
+                filtered_edges.append(edge_copy)
+        elif edge_type == "分类共现(点内)":
+            # 点内共现边不涉及帖子ID,直接保留
+            filtered_edges.append(edge)
+        else:
+            # 属于/包含边不需要过滤
+            filtered_edges.append(edge)
+
+    return filtered_edges
+
+
+# ========== 分类节点提取 ==========
+
+def extract_category_nodes_from_pattern(
+    pattern_data: Dict,
+    dimension_key: str,
+    dimension_name: str
+) -> List[Dict]:
+    """
+    从pattern聚合结果中提取分类节点
+
+    Args:
+        pattern_data: pattern聚合数据
+        dimension_key: 维度键名(灵感点列表、目的点、关键点列表)
+        dimension_name: 维度名称(灵感点、目的点、关键点)
+
+    Returns:
+        分类节点列表
+    """
+    nodes = []
+
+    if dimension_key not in pattern_data:
+        return nodes
+
+    def traverse_node(node: Dict, parent_categories: List[str]):
+        """递归遍历节点"""
+        for key, value in node.items():
+            if key in ["特征列表", "_meta", "帖子数", "特征数", "帖子列表"]:
+                continue
+
+            if isinstance(value, dict):
+                # 当前节点是一个分类
+                current_path = parent_categories + [key]
+
+                # 获取帖子列表
+                post_ids = value.get("帖子列表", [])
+
+                # 构建节点来源(从特征列表中获取)
+                node_sources = []
+                if "特征列表" in value:
+                    for feature in value["特征列表"]:
+                        source = {
+                            "点的名称": feature.get("所属点", ""),
+                            "点的描述": feature.get("点描述", ""),
+                            "帖子ID": feature.get("帖子id", "")
+                        }
+                        node_sources.append(source)
+
+                node_info = {
+                    "节点ID": build_node_id(dimension_name, "分类", key),
+                    "节点名称": key,
+                    "节点类型": "分类",
+                    "节点层级": dimension_name,
+                    "所属分类": parent_categories.copy(),
+                    "帖子数": len(post_ids),
+                    "节点来源": node_sources
+                }
+                nodes.append(node_info)
+
+                # 递归处理子节点
+                traverse_node(value, current_path)
+
+    traverse_node(pattern_data[dimension_key], [])
+    return nodes
+
+
+# ========== 标签节点提取 ==========
+
+def extract_tag_nodes_from_pattern(
+    pattern_data: Dict,
+    dimension_key: str,
+    dimension_name: str
+) -> List[Dict]:
+    """
+    从pattern聚合结果中提取标签节点
+
+    Args:
+        pattern_data: pattern聚合数据
+        dimension_key: 维度键名
+        dimension_name: 维度名称
+
+    Returns:
+        标签节点列表
+    """
+    nodes = []
+    tag_map = {}  # 用于合并同名标签
+
+    if dimension_key not in pattern_data:
+        return nodes
+
+    def traverse_node(node: Dict, parent_categories: List[str]):
+        """递归遍历节点"""
+        # 处理特征列表(标签)
+        if "特征列表" in node:
+            for feature in node["特征列表"]:
+                tag_name = feature.get("特征名称", "")
+                if not tag_name:
+                    continue
+
+                source = {
+                    "点的名称": feature.get("所属点", ""),
+                    "点的描述": feature.get("点描述", ""),
+                    "帖子ID": feature.get("帖子id", "")
+                }
+
+                tag_id = build_node_id(dimension_name, "标签", tag_name)
+
+                if tag_id not in tag_map:
+                    tag_map[tag_id] = {
+                        "节点ID": tag_id,
+                        "节点名称": tag_name,
+                        "节点类型": "标签",
+                        "节点层级": dimension_name,
+                        "所属分类": parent_categories.copy(),
+                        "帖子数": 0,
+                        "节点来源": [],
+                        "_post_ids": set()
+                    }
+
+                tag_map[tag_id]["节点来源"].append(source)
+                if source["帖子ID"]:
+                    tag_map[tag_id]["_post_ids"].add(source["帖子ID"])
+
+        # 递归处理子节点
+        for key, value in node.items():
+            if key in ["特征列表", "_meta", "帖子数", "特征数", "帖子列表"]:
+                continue
+
+            if isinstance(value, dict):
+                current_path = parent_categories + [key]
+                traverse_node(value, current_path)
+
+    traverse_node(pattern_data[dimension_key], [])
+
+    # 转换为列表,计算帖子数
+    for tag_id, tag_info in tag_map.items():
+        tag_info["帖子数"] = len(tag_info["_post_ids"])
+        del tag_info["_post_ids"]
+        nodes.append(tag_info)
+
+    return nodes
+
+
+# ========== 标签-分类边提取 ==========
+
+def extract_tag_category_edges_from_pattern(
+    pattern_data: Dict,
+    dimension_key: str,
+    dimension_name: str
+) -> List[Dict]:
+    """
+    从pattern聚合结果中提取标签-分类边(属于/包含)
+
+    Args:
+        pattern_data: pattern聚合数据
+        dimension_key: 维度键名
+        dimension_name: 维度名称
+
+    Returns:
+        边列表
+    """
+    edges = []
+    seen_edges = set()  # 避免重复边
+
+    if dimension_key not in pattern_data:
+        return edges
+
+    def traverse_node(node: Dict, parent_categories: List[str]):
+        """递归遍历节点"""
+        current_category = parent_categories[-1] if parent_categories else None
+
+        # 处理特征列表(标签)
+        if "特征列表" in node and current_category:
+            for feature in node["特征列表"]:
+                tag_name = feature.get("特征名称", "")
+                if not tag_name:
+                    continue
+
+                tag_id = build_node_id(dimension_name, "标签", tag_name)
+                category_id = build_node_id(dimension_name, "分类", current_category)
+
+                # 属于边:标签 -> 分类
+                edge_key_belong = (tag_id, category_id, "属于")
+                if edge_key_belong not in seen_edges:
+                    seen_edges.add(edge_key_belong)
+                    edges.append({
+                        "源节点ID": tag_id,
+                        "目标节点ID": category_id,
+                        "边类型": "属于",
+                        "边详情": {}
+                    })
+
+                # 包含边:分类 -> 标签
+                edge_key_contain = (category_id, tag_id, "包含")
+                if edge_key_contain not in seen_edges:
+                    seen_edges.add(edge_key_contain)
+                    edges.append({
+                        "源节点ID": category_id,
+                        "目标节点ID": tag_id,
+                        "边类型": "包含",
+                        "边详情": {}
+                    })
+
+        # 递归处理子节点
+        for key, value in node.items():
+            if key in ["特征列表", "_meta", "帖子数", "特征数", "帖子列表"]:
+                continue
+
+            if isinstance(value, dict):
+                current_path = parent_categories + [key]
+                traverse_node(value, current_path)
+
+    traverse_node(pattern_data[dimension_key], [])
+    return edges
+
+
+# ========== 标签-标签共现边提取 ==========
+
+def extract_tags_from_post(post_data: Dict) -> Dict[str, List[str]]:
+    """
+    从单个帖子的解构结果中提取所有标签(特征名称)
+
+    Args:
+        post_data: 帖子解构数据
+
+    Returns:
+        按维度分组的标签字典 {"灵感点": [...], "目的点": [...], "关键点": [...]}
+    """
+    tags_by_dimension = {
+        "灵感点": [],
+        "目的点": [],
+        "关键点": []
+    }
+
+    if "三点解构" not in post_data:
+        return tags_by_dimension
+
+    three_points = post_data["三点解构"]
+
+    # 提取灵感点的特征
+    if "灵感点" in three_points:
+        inspiration = three_points["灵感点"]
+        for section in ["全新内容", "共性差异", "共性内容"]:
+            if section in inspiration and isinstance(inspiration[section], list):
+                for item in inspiration[section]:
+                    if "提取的特征" in item and isinstance(item["提取的特征"], list):
+                        for feature in item["提取的特征"]:
+                            tag_name = feature.get("特征名称", "")
+                            if tag_name:
+                                tags_by_dimension["灵感点"].append(tag_name)
+
+    # 提取目的点的特征
+    if "目的点" in three_points:
+        purpose = three_points["目的点"]
+        if "purposes" in purpose and isinstance(purpose["purposes"], list):
+            for item in purpose["purposes"]:
+                if "提取的特征" in item and isinstance(item["提取的特征"], list):
+                    for feature in item["提取的特征"]:
+                        tag_name = feature.get("特征名称", "")
+                        if tag_name:
+                            tags_by_dimension["目的点"].append(tag_name)
+
+    # 提取关键点的特征
+    if "关键点" in three_points:
+        key_points = three_points["关键点"]
+        if "key_points" in key_points and isinstance(key_points["key_points"], list):
+            for item in key_points["key_points"]:
+                if "提取的特征" in item and isinstance(item["提取的特征"], list):
+                    for feature in item["提取的特征"]:
+                        tag_name = feature.get("特征名称", "")
+                        if tag_name:
+                            tags_by_dimension["关键点"].append(tag_name)
+
+    return tags_by_dimension
+
+
+def extract_tag_cooccurrence_edges(historical_posts_dir: Path, exclude_post_ids: Set[str] = None) -> List[Dict]:
+    """
+    从历史帖子解构结果中提取标签-标签共现边
+
+    Args:
+        historical_posts_dir: 历史帖子解构结果目录
+        exclude_post_ids: 要排除的帖子ID集合
+
+    Returns:
+        标签共现边列表
+    """
+    if exclude_post_ids is None:
+        exclude_post_ids = set()
+
+    # 存储每对标签的共现信息
+    # key: (tag1_id, tag2_id), value: {"共同帖子ID": set()}
+    cooccurrence_map = {}
+
+    if not historical_posts_dir.exists():
+        print(f"警告: 历史帖子目录不存在: {historical_posts_dir}")
+        return []
+
+    json_files = list(historical_posts_dir.glob("*.json"))
+    print(f"找到 {len(json_files)} 个历史帖子文件")
+
+    for file_path in json_files:
+        # 提取帖子ID
+        post_id = extract_post_id_from_filename(file_path.name)
+        if not post_id:
+            continue
+
+        # 跳过排除的帖子
+        if post_id in exclude_post_ids:
+            continue
+
+        try:
+            with open(file_path, "r", encoding="utf-8") as f:
+                post_data = json.load(f)
+
+            # 提取该帖子的所有标签
+            tags_by_dimension = extract_tags_from_post(post_data)
+
+            # 对每个维度内的标签两两组合,构建共现关系
+            for dimension, tags in tags_by_dimension.items():
+                unique_tags = list(set(tags))  # 去重
+                for i in range(len(unique_tags)):
+                    for j in range(i + 1, len(unique_tags)):
+                        tag1 = unique_tags[i]
+                        tag2 = unique_tags[j]
+
+                        # 构建节点ID
+                        tag1_id = build_node_id(dimension, "标签", tag1)
+                        tag2_id = build_node_id(dimension, "标签", tag2)
+
+                        # 确保顺序一致(按字典序)
+                        if tag1_id > tag2_id:
+                            tag1_id, tag2_id = tag2_id, tag1_id
+
+                        key = (tag1_id, tag2_id, dimension)
+
+                        if key not in cooccurrence_map:
+                            cooccurrence_map[key] = {"共同帖子ID": set()}
+
+                        cooccurrence_map[key]["共同帖子ID"].add(post_id)
+
+        except Exception as e:
+            print(f"  警告: 处理文件 {file_path.name} 时出错: {e}")
+
+    # 转换为边列表
+    edges = []
+    for (tag1_id, tag2_id, dimension), info in cooccurrence_map.items():
+        common_post_ids = list(info["共同帖子ID"])
+        edge = {
+            "源节点ID": tag1_id,
+            "目标节点ID": tag2_id,
+            "边类型": "标签共现",
+            "边详情": {
+                "共同帖子数": len(common_post_ids),
+                "共同帖子ID": common_post_ids
+            }
+        }
+        edges.append(edge)
+
+    return edges
+
+
+# ========== 分类-分类边提取 ==========
+
+def extract_category_edges_from_associations(associations_data: Dict) -> List[Dict]:
+    """
+    从dimension_associations_analysis.json中提取分类-分类边(共现)
+
+    Args:
+        associations_data: 关联分析数据
+
+    Returns:
+        边列表
+    """
+    edges = []
+
+    if "单维度关联分析" not in associations_data:
+        return edges
+
+    single_dim = associations_data["单维度关联分析"]
+
+    # 维度映射
+    dimension_map = {
+        "灵感点维度": "灵感点",
+        "目的点维度": "目的点",
+        "关键点维度": "关键点"
+    }
+
+    for dim_key, dim_data in single_dim.items():
+        if dim_key not in dimension_map:
+            continue
+
+        source_dimension = dimension_map[dim_key]
+
+        # 遍历该维度下的所有关联方向
+        for direction_key, direction_data in dim_data.items():
+            if direction_key == "说明":
+                continue
+
+            if "→" not in direction_key:
+                continue
+
+            # 遍历每个源分类
+            for source_path, source_info in direction_data.items():
+                source_name = get_last_segment(source_path)
+                source_node_id = build_node_id(source_dimension, "分类", source_name)
+
+                # 确定目标维度
+                for field_name, associations in source_info.items():
+                    if not field_name.startswith("与") or not field_name.endswith("的关联"):
+                        continue
+
+                    target_dimension = field_name[1:-3]
+
+                    if not isinstance(associations, list):
+                        continue
+
+                    for assoc in associations:
+                        target_path = assoc.get("目标分类", "")
+                        if not target_path:
+                            continue
+
+                        target_name = get_last_segment(target_path)
+                        target_node_id = build_node_id(target_dimension, "分类", target_name)
+
+                        edge = {
+                            "源节点ID": source_node_id,
+                            "目标节点ID": target_node_id,
+                            "边类型": "分类共现(跨点)",
+                            "边详情": {
+                                "Jaccard相似度": assoc.get("Jaccard相似度", 0),
+                                "重叠系数": assoc.get("重叠系数", 0),
+                                "共同帖子数": assoc.get("共同帖子数", 0),
+                                "共同帖子ID": assoc.get("共同帖子ID", [])
+                            }
+                        }
+                        edges.append(edge)
+
+    return edges
+
+
+# ========== 点内分类共现边提取 ==========
+
+def extract_intra_category_edges(intra_associations_data: Dict) -> List[Dict]:
+    """
+    从intra_dimension_associations_analysis.json中提取点内分类共现边
+
+    Args:
+        intra_associations_data: 点内关联分析数据
+
+    Returns:
+        边列表
+    """
+    edges = []
+    seen_edges = set()  # 避免重复边
+
+    if "叶子分类组合聚类" not in intra_associations_data:
+        return edges
+
+    clusters_by_dim = intra_associations_data["叶子分类组合聚类"]
+
+    for dimension, clusters in clusters_by_dim.items():
+        if dimension not in ("灵感点", "目的点", "关键点"):
+            continue
+
+        for cluster_key, cluster_data in clusters.items():
+            leaf_categories = cluster_data.get("叶子分类组合", [])
+            point_count = cluster_data.get("点数", 0)
+            point_details = cluster_data.get("点详情列表", [])
+
+            # 提取点名称列表
+            point_names = [p.get("点名称", "") for p in point_details if p.get("点名称")]
+
+            # 两两组合生成共现边
+            for i in range(len(leaf_categories)):
+                for j in range(i + 1, len(leaf_categories)):
+                    cat1 = leaf_categories[i]
+                    cat2 = leaf_categories[j]
+
+                    # 构建节点ID
+                    cat1_id = build_node_id(dimension, "分类", cat1)
+                    cat2_id = build_node_id(dimension, "分类", cat2)
+
+                    # 确保顺序一致(按字典序)
+                    if cat1_id > cat2_id:
+                        cat1_id, cat2_id = cat2_id, cat1_id
+
+                    edge_key = (cat1_id, cat2_id, dimension)
+
+                    if edge_key in seen_edges:
+                        # 已存在的边,累加点数和点名称
+                        for edge in edges:
+                            if (edge["源节点ID"] == cat1_id and
+                                edge["目标节点ID"] == cat2_id and
+                                edge["边类型"] == "分类共现(点内)"):
+                                edge["边详情"]["点数"] += point_count
+                                edge["边详情"]["关联点名称"].extend(point_names)
+                                break
+                    else:
+                        seen_edges.add(edge_key)
+                        edge = {
+                            "源节点ID": cat1_id,
+                            "目标节点ID": cat2_id,
+                            "边类型": "分类共现(点内)",
+                            "边详情": {
+                                "点数": point_count,
+                                "关联点名称": point_names.copy()
+                            }
+                        }
+                        edges.append(edge)
+
+    return edges
+
+
+# ========== 主函数 ==========
+
+def main():
+    # 使用路径配置
+    config = PathConfig()
+    config.ensure_dirs()
+
+    print(f"账号: {config.account_name}")
+    print(f"输出版本: {config.output_version}")
+    print(f"过滤模式: {config.filter_mode}")
+    print()
+
+    # 输入文件路径
+    pattern_file = config.pattern_cluster_file
+    associations_file = config.account_dir / "pattern相关文件/optimization/dimension_associations_analysis.json"
+    intra_associations_file = config.account_dir / "pattern相关文件/optimization/intra_dimension_associations_analysis.json"
+    current_posts_dir = config.current_posts_dir
+
+    # 输出文件路径
+    nodes_output_file = config.intermediate_dir / "节点列表.json"
+    edges_output_file = config.intermediate_dir / "边关系.json"
+
+    print(f"输入文件:")
+    print(f"  pattern聚合文件: {pattern_file}")
+    print(f"  跨点关联分析文件: {associations_file}")
+    print(f"  点内关联分析文件: {intra_associations_file}")
+    print(f"  当前帖子目录: {current_posts_dir}")
+    print(f"\n输出文件:")
+    print(f"  节点列表: {nodes_output_file}")
+    print(f"  边关系: {edges_output_file}")
+    print()
+
+    # 读取pattern聚合结果
+    print("正在读取pattern聚合结果...")
+    with open(pattern_file, "r", encoding="utf-8") as f:
+        pattern_data = json.load(f)
+
+    # 读取跨点关联分析结果
+    print("正在读取跨点关联分析结果...")
+    with open(associations_file, "r", encoding="utf-8") as f:
+        associations_data = json.load(f)
+
+    # 读取点内关联分析结果
+    print("正在读取点内关联分析结果...")
+    with open(intra_associations_file, "r", encoding="utf-8") as f:
+        intra_associations_data = json.load(f)
+
+    # ===== 提取节点 =====
+    print("\n" + "="*60)
+    print("正在提取节点...")
+
+    all_nodes = []
+
+    # 维度映射
+    dimension_mapping = {
+        "灵感点列表": "灵感点",
+        "目的点": "目的点",
+        "关键点列表": "关键点"
+    }
+
+    # 提取分类节点
+    print("\n提取分类节点:")
+    for dim_key, dim_name in dimension_mapping.items():
+        category_nodes = extract_category_nodes_from_pattern(pattern_data, dim_key, dim_name)
+        all_nodes.extend(category_nodes)
+        print(f"  {dim_name}: {len(category_nodes)} 个分类节点")
+
+    # 提取标签节点
+    print("\n提取标签节点:")
+    for dim_key, dim_name in dimension_mapping.items():
+        tag_nodes = extract_tag_nodes_from_pattern(pattern_data, dim_key, dim_name)
+        all_nodes.extend(tag_nodes)
+        print(f"  {dim_name}: {len(tag_nodes)} 个标签节点")
+
+    print(f"\n总计: {len(all_nodes)} 个节点")
+
+    # 统计节点类型
+    category_count = sum(1 for n in all_nodes if n["节点类型"] == "分类")
+    tag_count = sum(1 for n in all_nodes if n["节点类型"] == "标签")
+    print(f"  分类节点: {category_count}")
+    print(f"  标签节点: {tag_count}")
+
+    # ===== 提取边 =====
+    print("\n" + "="*60)
+    print("正在提取边...")
+
+    all_edges = []
+
+    # 提取分类-分类边(跨点共现)
+    print("\n提取分类-分类边(跨点共现):")
+    category_edges = extract_category_edges_from_associations(associations_data)
+    all_edges.extend(category_edges)
+    print(f"  分类共现(跨点)边: {len(category_edges)} 条")
+
+    # 提取分类-分类边(点内共现)
+    print("\n提取分类-分类边(点内共现):")
+    intra_category_edges = extract_intra_category_edges(intra_associations_data)
+    all_edges.extend(intra_category_edges)
+    print(f"  分类共现(点内)边: {len(intra_category_edges)} 条")
+
+    # 提取标签-分类边(属于/包含)
+    print("\n提取标签-分类边(属于/包含):")
+    belong_count = 0
+    contain_count = 0
+    for dim_key, dim_name in dimension_mapping.items():
+        tag_category_edges = extract_tag_category_edges_from_pattern(pattern_data, dim_key, dim_name)
+        all_edges.extend(tag_category_edges)
+        dim_belong = sum(1 for e in tag_category_edges if e["边类型"] == "属于")
+        dim_contain = sum(1 for e in tag_category_edges if e["边类型"] == "包含")
+        belong_count += dim_belong
+        contain_count += dim_contain
+        print(f"  {dim_name}: {dim_belong} 条属于边, {dim_contain} 条包含边")
+
+    # 提取标签-标签边(共现)- 需要在过滤之前先记录排除的帖子ID
+    # 这里先占位,过滤后再处理
+    tag_cooccurrence_edges_placeholder = True
+
+    print(f"\n边统计(标签共现待提取):")
+    print(f"  分类共现(跨点)边: {len(category_edges)}")
+    print(f"  分类共现(点内)边: {len(intra_category_edges)}")
+    print(f"  属于边: {belong_count}")
+    print(f"  包含边: {contain_count}")
+
+    # ===== 应用过滤 =====
+    exclude_post_ids = set()
+    filter_mode = config.filter_mode
+
+    if filter_mode == "exclude_current_posts":
+        print("\n" + "="*60)
+        print("应用过滤规则: 排除当前帖子ID")
+        exclude_post_ids = get_current_post_ids(current_posts_dir)
+
+        if exclude_post_ids:
+            # 过滤节点
+            nodes_before = len(all_nodes)
+            all_nodes = filter_nodes_by_post_ids(all_nodes, exclude_post_ids)
+            nodes_after = len(all_nodes)
+            print(f"\n节点过滤: {nodes_before} -> {nodes_after} (移除 {nodes_before - nodes_after} 个)")
+
+            # 过滤边
+            edges_before = len(all_edges)
+            all_edges = filter_edges_by_post_ids(all_edges, exclude_post_ids)
+            edges_after = len(all_edges)
+            print(f"边过滤: {edges_before} -> {edges_after} (移除 {edges_before - edges_after} 条)")
+    elif filter_mode == "none":
+        print("\n过滤模式: none,不应用任何过滤")
+    else:
+        print(f"\n警告: 未知的过滤模式 '{filter_mode}',不应用过滤")
+
+    # ===== 提取标签-标签共现边 =====
+    print("\n" + "="*60)
+    print("提取标签-标签共现边...")
+    historical_posts_dir = config.historical_posts_dir
+    print(f"历史帖子目录: {historical_posts_dir}")
+    tag_cooccurrence_edges = extract_tag_cooccurrence_edges(historical_posts_dir, exclude_post_ids)
+    all_edges.extend(tag_cooccurrence_edges)
+    print(f"  标签-标签共现边: {len(tag_cooccurrence_edges)} 条")
+
+    # 更新总计
+    print(f"\n总计: {len(all_edges)} 条边")
+    print(f"  分类共现(跨点)边: {len(category_edges)}")
+    print(f"  分类共现(点内)边: {len(intra_category_edges)}")
+    print(f"  标签共现边: {len(tag_cooccurrence_edges)}")
+    print(f"  属于边: {belong_count}")
+    print(f"  包含边: {contain_count}")
+
+    # ===== 获取帖子详情 =====
+    print("\n" + "="*60)
+    print("获取帖子详情...")
+
+    # 收集所有需要获取详情的帖子ID(从节点和边)
+    post_ids_from_nodes = collect_all_post_ids_from_nodes(all_nodes)
+    post_ids_from_edges = collect_all_post_ids_from_edges(all_edges)
+    all_post_ids = post_ids_from_nodes | post_ids_from_edges
+    print(f"节点中的帖子: {len(post_ids_from_nodes)} 个")
+    print(f"边中的帖子: {len(post_ids_from_edges)} 个")
+    print(f"合计(去重): {len(all_post_ids)} 个")
+
+    # 批量获取帖子详情
+    post_details = fetch_post_details(all_post_ids)
+
+    # ===== 保存结果 =====
+    print("\n" + "="*60)
+
+    # 输出文件路径
+    post_details_output_file = config.intermediate_dir / "帖子详情映射.json"
+
+    # 保存节点列表
+    nodes_output = {
+        "说明": {
+            "描述": "分类和标签节点列表",
+            "数据来源": ["过去帖子_pattern聚合结果.json"],
+            "过滤模式": filter_mode,
+            "过滤帖子数": len(exclude_post_ids) if exclude_post_ids else 0
+        },
+        "节点列表": all_nodes
+    }
+
+    print(f"正在保存节点列表到: {nodes_output_file}")
+    with open(nodes_output_file, "w", encoding="utf-8") as f:
+        json.dump(nodes_output, f, ensure_ascii=False, indent=2)
+
+    # 构建节点ID索引的边关系: 节点 -> 边类型 -> {目标节点: 完整边信息}
+    edges_by_node = {}  # key: 节点ID, value: {边类型: {目标节点ID: 完整边信息}}
+    for edge in all_edges:
+        source_id = edge["源节点ID"]
+        target_id = edge["目标节点ID"]
+        edge_type = edge["边类型"]
+
+        # 源节点 -> 目标节点
+        if source_id not in edges_by_node:
+            edges_by_node[source_id] = {}
+        if edge_type not in edges_by_node[source_id]:
+            edges_by_node[source_id][edge_type] = {}
+        edges_by_node[source_id][edge_type][target_id] = edge
+
+    # 保存边关系
+    edges_output = {
+        "说明": {
+            "描述": "分类和标签之间的边关系",
+            "数据来源": ["过去帖子_pattern聚合结果.json", "dimension_associations_analysis.json", "过去帖子_what解构结果目录"],
+            "过滤模式": filter_mode,
+            "过滤帖子数": len(exclude_post_ids) if exclude_post_ids else 0
+        },
+        "边列表": all_edges,
+        "节点边索引": edges_by_node
+    }
+
+    print(f"正在保存边关系到: {edges_output_file}")
+    with open(edges_output_file, "w", encoding="utf-8") as f:
+        json.dump(edges_output, f, ensure_ascii=False, indent=2)
+
+    # 保存帖子详情映射
+    post_details_output = {
+        "说明": {
+            "描述": "帖子ID到帖子详情的映射",
+            "帖子数": len(post_details)
+        },
+        "帖子详情": post_details
+    }
+
+    print(f"正在保存帖子详情映射到: {post_details_output_file}")
+    with open(post_details_output_file, "w", encoding="utf-8") as f:
+        json.dump(post_details_output, f, ensure_ascii=False, indent=2)
+
+    print("\n完成!")
+    print(f"\n输出文件:")
+    print(f"  节点列表: {len(all_nodes)} 个节点")
+    print(f"  边关系: {len(all_edges)} 条边")
+    print(f"  帖子详情映射: {len(post_details)} 个帖子")
+
+
+if __name__ == "__main__":
+    main()

+ 1100 - 0
script/data_processing/visualize_match_graph.py

@@ -0,0 +1,1100 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+将匹配图谱数据可视化为交互式HTML文件
+
+输入:match_graph目录下的JSON文件
+输出:单个HTML文件,包含所有帖子的图谱,可通过Tab切换
+"""
+
+import json
+from pathlib import Path
+from typing import Dict, List
+import sys
+
+# 添加项目根目录到路径
+project_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(project_root))
+
+from script.data_processing.path_config import PathConfig
+
+
+HTML_TEMPLATE = '''<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>匹配图谱可视化</title>
+    <script src="https://d3js.org/d3.v7.min.js"></script>
+    <style>
+        * {{
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }}
+        body {{
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
+            background: #1a1a2e;
+            color: #eee;
+            overflow: hidden;
+        }}
+        #container {{
+            display: flex;
+            height: 100vh;
+            flex-direction: column;
+        }}
+
+        /* Tab样式 */
+        .tabs {{
+            display: flex;
+            background: #0f3460;
+            padding: 0 20px;
+            overflow-x: auto;
+            flex-shrink: 0;
+        }}
+        .tab {{
+            padding: 12px 20px;
+            cursor: pointer;
+            border-bottom: 3px solid transparent;
+            white-space: nowrap;
+            font-size: 13px;
+            color: #888;
+            transition: all 0.2s;
+        }}
+        .tab:hover {{
+            color: #fff;
+            background: rgba(255,255,255,0.05);
+        }}
+        .tab.active {{
+            color: #e94560;
+            border-bottom-color: #e94560;
+            background: rgba(233, 69, 96, 0.1);
+        }}
+
+        /* 主内容区 */
+        .main-content {{
+            display: flex;
+            flex: 1;
+            overflow: hidden;
+        }}
+        #graph {{
+            flex: 1;
+            position: relative;
+        }}
+        #sidebar {{
+            width: 280px;
+            background: #16213e;
+            padding: 15px;
+            overflow-y: auto;
+            border-left: 1px solid #0f3460;
+        }}
+        h1 {{
+            font-size: 15px;
+            margin-bottom: 10px;
+            color: #e94560;
+        }}
+        h2 {{
+            font-size: 12px;
+            margin: 10px 0 6px;
+            color: #0f9b8e;
+        }}
+        .legend {{
+            margin-top: 10px;
+        }}
+        .legend-grid {{
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 4px 8px;
+        }}
+        .legend-item {{
+            display: flex;
+            align-items: center;
+            font-size: 11px;
+        }}
+        .legend-color {{
+            width: 12px;
+            height: 12px;
+            border-radius: 50%;
+            margin-right: 6px;
+            flex-shrink: 0;
+        }}
+        .legend-line {{
+            width: 20px;
+            height: 3px;
+            margin-right: 6px;
+            flex-shrink: 0;
+        }}
+        .detail-panel {{
+            margin-top: 20px;
+            padding: 15px;
+            background: #0f3460;
+            border-radius: 8px;
+            display: none;
+        }}
+        .detail-panel.active {{
+            display: block;
+        }}
+        .detail-panel h3 {{
+            font-size: 14px;
+            margin-bottom: 10px;
+            color: #e94560;
+        }}
+        .detail-panel p {{
+            font-size: 12px;
+            line-height: 1.6;
+            color: #ccc;
+            margin: 5px 0;
+        }}
+        .detail-panel .label {{
+            color: #888;
+        }}
+        .detail-panel .close-btn {{
+            position: absolute;
+            top: 10px;
+            right: 10px;
+            background: none;
+            border: none;
+            color: #888;
+            cursor: pointer;
+            font-size: 16px;
+        }}
+        .detail-panel .close-btn:hover {{
+            color: #e94560;
+        }}
+        .detail-panel-wrapper {{
+            position: relative;
+        }}
+        .similarity-score {{
+            background: #e94560;
+            color: #fff;
+            padding: 2px 6px;
+            border-radius: 4px;
+            font-weight: bold;
+        }}
+        .edge-description {{
+            background: #1a1a2e;
+            padding: 10px;
+            border-radius: 4px;
+            margin-top: 8px;
+            font-size: 11px;
+            line-height: 1.5;
+        }}
+        svg {{
+            width: 100%;
+            height: 100%;
+        }}
+        .node {{
+            cursor: pointer;
+        }}
+        .node circle, .node rect {{
+            stroke-width: 3px;
+        }}
+        .node .post-node {{
+            stroke: #fff;
+            stroke-dasharray: 4,2;
+        }}
+        .node .persona-node {{
+            stroke: #fff;
+        }}
+        .node text {{
+            font-size: 11px;
+            fill: #fff;
+            pointer-events: none;
+        }}
+        .link {{
+            stroke-opacity: 0.7;
+        }}
+        .link-hitarea {{
+            stroke: transparent;
+            stroke-width: 15px;
+            cursor: pointer;
+            fill: none;
+        }}
+        .link-hitarea:hover + .link {{
+            stroke-opacity: 1;
+            stroke-width: 3px;
+        }}
+        .edge-label {{
+            font-size: 10px;
+            fill: #fff;
+            pointer-events: none;
+            text-anchor: middle;
+        }}
+        .edge-label-bg {{
+            fill: rgba(0,0,0,0.7);
+        }}
+        .link.match {{
+            stroke: #e94560;
+            stroke-dasharray: 5,5;
+        }}
+        .link.category-cross {{
+            stroke: #2ecc71;
+        }}
+        .link.category-intra {{
+            stroke: #27ae60;
+            stroke-dasharray: 3,3;
+        }}
+        .link.tag-cooccur {{
+            stroke: #f39c12;
+        }}
+        .link.belong {{
+            stroke: #9b59b6;
+        }}
+        .link.contain {{
+            stroke: #8e44ad;
+            stroke-dasharray: 2,2;
+        }}
+        /* 镜像边样式(虚线,颜色与原边相同) */
+        .link.mirror-category-cross {{
+            stroke: #2ecc71;
+            stroke-dasharray: 6,3;
+        }}
+        .link.mirror-category-intra {{
+            stroke: #27ae60;
+            stroke-dasharray: 6,3;
+        }}
+        .link.mirror-tag-cooccur {{
+            stroke: #f39c12;
+            stroke-dasharray: 6,3;
+        }}
+        .link.mirror-belong {{
+            stroke: #9b59b6;
+            stroke-dasharray: 6,3;
+        }}
+        .link.mirror-contain {{
+            stroke: #8e44ad;
+            stroke-dasharray: 6,3;
+        }}
+        .tooltip {{
+            position: absolute;
+            background: rgba(0,0,0,0.9);
+            color: #fff;
+            padding: 10px 15px;
+            border-radius: 6px;
+            font-size: 12px;
+            pointer-events: none;
+            max-width: 300px;
+            z-index: 1000;
+            display: none;
+        }}
+        .controls {{
+            position: absolute;
+            top: 20px;
+            left: 20px;
+            background: rgba(22, 33, 62, 0.9);
+            padding: 15px;
+            border-radius: 8px;
+            z-index: 100;
+        }}
+        .controls button {{
+            background: #0f3460;
+            color: #fff;
+            border: none;
+            padding: 8px 15px;
+            margin: 5px;
+            border-radius: 4px;
+            cursor: pointer;
+            font-size: 12px;
+        }}
+        .controls button:hover {{
+            background: #e94560;
+        }}
+    </style>
+</head>
+<body>
+    <div id="container">
+        <div class="tabs" id="tabs">
+            {tabs_html}
+        </div>
+        <div class="main-content">
+            <div id="graph">
+                <div class="controls">
+                    <button onclick="resetZoom()">重置视图</button>
+                    <button onclick="toggleLabels()">切换标签</button>
+                </div>
+                <div class="tooltip" id="tooltip"></div>
+            </div>
+            <div id="sidebar">
+                <h1>匹配图谱</h1>
+
+                <div class="detail-panel active" id="detailPanel">
+                    <h3 id="detailTitle">点击节点或边查看详情</h3>
+                    <div id="detailContent">
+                        <p style="color: #888; font-size: 11px;">点击图中的节点或边,这里会显示详细信息</p>
+                    </div>
+                </div>
+
+                <div class="legend">
+                    <h2>节点</h2>
+                    <div class="legend-grid">
+                        <div class="legend-item">
+                            <div class="legend-color" style="background: #666; border: 2px dashed #fff;"></div>
+                            <span>帖子(虚线)</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-color" style="background: #666; border: 2px solid #fff;"></div>
+                            <span>人设(实线)</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-color" style="background: #666; border-radius: 50%;"></div>
+                            <span>标签(圆)</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-color" style="background: #666; border-radius: 2px;"></div>
+                            <span>分类(方)</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-color" style="background: #666; opacity: 1;"></div>
+                            <span>直接匹配</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-color" style="background: #666; opacity: 0.5;"></div>
+                            <span>扩展节点</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-color" style="background: #f39c12;"></div>
+                            <span>灵感点</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-color" style="background: #3498db;"></div>
+                            <span>目的点</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-color" style="background: #9b59b6;"></div>
+                            <span>关键点</span>
+                        </div>
+                    </div>
+                    <h2>边(人设/实线)</h2>
+                    <div class="legend-grid">
+                        <div class="legend-item">
+                            <div class="legend-line" style="background: #e94560;"></div>
+                            <span>匹配</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-line" style="background: #2ecc71;"></div>
+                            <span>分类共现(跨)</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-line" style="background: #27ae60;"></div>
+                            <span>分类共现(内)</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-line" style="background: #f39c12;"></div>
+                            <span>标签共现</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-line" style="background: #9b59b6;"></div>
+                            <span>属于</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-line" style="background: #8e44ad;"></div>
+                            <span>包含</span>
+                        </div>
+                    </div>
+                    <h2>镜像边(帖子/虚线)</h2>
+                    <div class="legend-grid">
+                        <div class="legend-item">
+                            <div class="legend-line" style="background: repeating-linear-gradient(90deg, #2ecc71, #2ecc71 6px, transparent 6px, transparent 9px);"></div>
+                            <span>分类共现</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-line" style="background: repeating-linear-gradient(90deg, #f39c12, #f39c12 6px, transparent 6px, transparent 9px);"></div>
+                            <span>标签共现</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-line" style="background: repeating-linear-gradient(90deg, #9b59b6, #9b59b6 6px, transparent 6px, transparent 9px);"></div>
+                            <span>属于</span>
+                        </div>
+                        <div class="legend-item">
+                            <div class="legend-line" style="background: repeating-linear-gradient(90deg, #8e44ad, #8e44ad 6px, transparent 6px, transparent 9px);"></div>
+                            <span>包含</span>
+                        </div>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </div>
+
+    <script>
+        // 所有帖子的图谱数据
+        const allGraphData = {all_graph_data};
+
+        // 当前选中的帖子索引
+        let currentIndex = 0;
+        let simulation = null;
+        let svg = null;
+        let g = null;
+        let zoom = null;
+        let showLabels = true;
+
+        // 初始化
+        function init() {{
+            const container = document.getElementById("graph");
+            const width = container.clientWidth;
+            const height = container.clientHeight;
+
+            svg = d3.select("#graph")
+                .append("svg")
+                .attr("width", width)
+                .attr("height", height);
+
+            g = svg.append("g");
+
+            zoom = d3.zoom()
+                .scaleExtent([0.1, 4])
+                .on("zoom", (event) => {{
+                    g.attr("transform", event.transform);
+                }});
+
+            svg.call(zoom);
+
+            // 绑定Tab点击事件
+            document.querySelectorAll(".tab").forEach((tab, index) => {{
+                tab.addEventListener("click", () => switchTab(index));
+            }});
+
+            // 显示第一个帖子
+            switchTab(0);
+        }}
+
+        // 切换Tab
+        function switchTab(index) {{
+            currentIndex = index;
+
+            // 更新Tab样式
+            document.querySelectorAll(".tab").forEach((tab, i) => {{
+                tab.classList.toggle("active", i === index);
+            }});
+
+            // 更新图谱
+            renderGraph(allGraphData[index]);
+        }}
+
+        // 渲染图谱
+        function renderGraph(data) {{
+            // 清空现有图谱
+            g.selectAll("*").remove();
+            if (simulation) {{
+                simulation.stop();
+            }}
+
+            const container = document.getElementById("graph");
+            const width = container.clientWidth;
+            const height = container.clientHeight;
+
+            // 准备数据
+            const nodes = data.nodes.map(n => ({{
+                ...n,
+                id: n.节点ID,
+                source: n.节点ID.startsWith("帖子_") ? "帖子" : "人设",
+                level: n.节点层级
+            }}));
+
+            const links = data.edges.map(e => ({{
+                ...e,
+                source: e.源节点ID,
+                target: e.目标节点ID,
+                type: e.边类型
+            }}));
+
+            // 分离帖子节点和人设节点
+            const postNodes = nodes.filter(n => n.source === "帖子");
+            const personaNodes = nodes.filter(n => n.source === "人设");
+            const matchLinks = links.filter(l => l.type === "匹配");
+
+            // 构建帖子节点到人设节点的映射
+            const postToPersona = {{}};
+            const personaToPost = {{}};
+            matchLinks.forEach(l => {{
+                const sid = typeof l.source === "object" ? l.source.id : l.source;
+                const tid = typeof l.target === "object" ? l.target.id : l.target;
+                if (!postToPersona[sid]) postToPersona[sid] = [];
+                postToPersona[sid].push(tid);
+                if (!personaToPost[tid]) personaToPost[tid] = [];
+                personaToPost[tid].push(sid);
+            }});
+
+            // 找出所有连通分量
+            function findConnectedComponents(nodes, links) {{
+                const nodeIds = new Set(nodes.map(n => n.id));
+                const adj = {{}};
+                nodeIds.forEach(id => adj[id] = []);
+
+                links.forEach(l => {{
+                    const sid = typeof l.source === "object" ? l.source.id : l.source;
+                    const tid = typeof l.target === "object" ? l.target.id : l.target;
+                    if (nodeIds.has(sid) && nodeIds.has(tid)) {{
+                        adj[sid].push(tid);
+                        adj[tid].push(sid);
+                    }}
+                }});
+
+                const visited = new Set();
+                const components = [];
+
+                nodeIds.forEach(startId => {{
+                    if (visited.has(startId)) return;
+
+                    const component = [];
+                    const queue = [startId];
+
+                    while (queue.length > 0) {{
+                        const id = queue.shift();
+                        if (visited.has(id)) continue;
+                        visited.add(id);
+                        component.push(id);
+                        adj[id].forEach(neighbor => {{
+                            if (!visited.has(neighbor)) queue.push(neighbor);
+                        }});
+                    }}
+
+                    components.push(component);
+                }});
+
+                return components;
+            }}
+
+            // 按大小排序连通分量(大的在前)
+            const components = findConnectedComponents(nodes, links)
+                .sort((a, b) => b.length - a.length);
+            console.log(`找到 ${{components.length}} 个连通分量`);
+
+            // 为每个节点分配连通分量ID和分量内的X范围
+            const nodeToComponent = {{}};
+            const componentCenters = {{}};
+            const componentBounds = {{}};
+            const padding = 50;  // 分量之间的间距
+            const totalPadding = padding * (components.length - 1);
+            const availableWidth = width - totalPadding - 100;  // 留边距
+
+            // 根据分量大小分配宽度
+            const totalNodes = nodes.length;
+            let currentX = 50;  // 起始边距
+
+            components.forEach((comp, i) => {{
+                const compWidth = Math.max(150, (comp.length / totalNodes) * availableWidth);
+                const centerX = currentX + compWidth / 2;
+                componentCenters[i] = centerX;
+                componentBounds[i] = {{ start: currentX, end: currentX + compWidth, width: compWidth }};
+                comp.forEach(nodeId => {{
+                    nodeToComponent[nodeId] = i;
+                }});
+                currentX += compWidth + padding;
+            }});
+
+            // 使用重心法(Barycenter)减少边交叉
+            // 迭代优化:交替调整两层节点的顺序
+
+            const nodeTargetX = {{}};
+            const personaXMap = {{}};
+
+            // 对每个连通分量单独处理
+            components.forEach((comp, compIdx) => {{
+                const bounds = componentBounds[compIdx];
+                const compPostNodes = postNodes.filter(n => nodeToComponent[n.id] === compIdx);
+                const compPersonaNodes = personaNodes.filter(n => nodeToComponent[n.id] === compIdx);
+
+                if (compPostNodes.length === 0 || compPersonaNodes.length === 0) {{
+                    // 没有匹配关系的分量,均匀分布
+                    const spacing = bounds.width / (comp.length + 1);
+                    comp.forEach((nodeId, i) => {{
+                        const node = nodes.find(n => n.id === nodeId);
+                        if (node) {{
+                            node.x = bounds.start + spacing * (i + 1);
+                            nodeTargetX[nodeId] = node.x;
+                            if (node.source === "人设") personaXMap[nodeId] = node.x;
+                        }}
+                    }});
+                    return;
+                }}
+
+                // 初始化:给人设节点一个初始顺序
+                let personaOrder = compPersonaNodes.map((n, i) => ({{ node: n, order: i }}));
+
+                // 迭代优化(3轮)
+                for (let iter = 0; iter < 3; iter++) {{
+                    // 1. 根据人设节点位置,计算帖子节点的重心
+                    const postBarycenter = {{}};
+                    compPostNodes.forEach(pn => {{
+                        const matched = postToPersona[pn.id] || [];
+                        if (matched.length > 0) {{
+                            const avgOrder = matched.reduce((sum, pid) => {{
+                                const po = personaOrder.find(p => p.node.id === pid);
+                                return sum + (po ? po.order : 0);
+                            }}, 0) / matched.length;
+                            postBarycenter[pn.id] = avgOrder;
+                        }} else {{
+                            postBarycenter[pn.id] = 0;
+                        }}
+                    }});
+
+                    // 按重心排序帖子节点
+                    const sortedPosts = [...compPostNodes].sort((a, b) =>
+                        postBarycenter[a.id] - postBarycenter[b.id]
+                    );
+
+                    // 2. 根据帖子节点位置,重新计算人设节点的重心
+                    const personaBarycenter = {{}};
+                    compPersonaNodes.forEach(pn => {{
+                        const matched = personaToPost[pn.id] || [];
+                        if (matched.length > 0) {{
+                            const avgOrder = matched.reduce((sum, pid) => {{
+                                const idx = sortedPosts.findIndex(p => p.id === pid);
+                                return sum + (idx >= 0 ? idx : 0);
+                            }}, 0) / matched.length;
+                            personaBarycenter[pn.id] = avgOrder;
+                        }} else {{
+                            personaBarycenter[pn.id] = personaOrder.find(p => p.node.id === pn.id)?.order || 0;
+                        }}
+                    }});
+
+                    // 更新人设节点顺序
+                    personaOrder = compPersonaNodes
+                        .map(n => ({{ node: n, order: personaBarycenter[n.id] }}))
+                        .sort((a, b) => a.order - b.order)
+                        .map((item, i) => ({{ node: item.node, order: i }}));
+                }}
+
+                // 最终排序
+                const finalPersonaOrder = personaOrder.map(p => p.node);
+                const postBarycenter = {{}};
+                compPostNodes.forEach(pn => {{
+                    const matched = postToPersona[pn.id] || [];
+                    if (matched.length > 0) {{
+                        const avgOrder = matched.reduce((sum, pid) => {{
+                            const idx = finalPersonaOrder.findIndex(n => n.id === pid);
+                            return sum + (idx >= 0 ? idx : 0);
+                        }}, 0) / matched.length;
+                        postBarycenter[pn.id] = avgOrder;
+                    }} else {{
+                        postBarycenter[pn.id] = 0;
+                    }}
+                }});
+                const finalPostOrder = [...compPostNodes].sort((a, b) =>
+                    postBarycenter[a.id] - postBarycenter[b.id]
+                );
+
+                // 设置位置
+                const personaSpacing = bounds.width / (finalPersonaOrder.length + 1);
+                finalPersonaOrder.forEach((n, i) => {{
+                    n.x = bounds.start + personaSpacing * (i + 1);
+                    nodeTargetX[n.id] = n.x;
+                    personaXMap[n.id] = n.x;
+                }});
+
+                const postSpacing = bounds.width / (finalPostOrder.length + 1);
+                finalPostOrder.forEach((n, i) => {{
+                    // 帖子节点用重心位置(匹配人设的平均X)
+                    const matched = postToPersona[n.id] || [];
+                    if (matched.length > 0) {{
+                        const avgX = matched.reduce((sum, pid) => sum + (personaXMap[pid] || bounds.start + bounds.width/2), 0) / matched.length;
+                        n.x = avgX;
+                    }} else {{
+                        n.x = bounds.start + postSpacing * (i + 1);
+                    }}
+                    nodeTargetX[n.id] = n.x;
+                }});
+            }});
+
+            // 节点颜色
+            const levelColors = {{
+                "灵感点": "#f39c12",
+                "目的点": "#3498db",
+                "关键点": "#9b59b6"
+            }};
+
+            // 两层Y坐标(带倾斜:右边高,左边低)
+            const postBaseY = height * 0.25;      // 帖子节点基准Y
+            const personaBaseY = height * 0.7;    // 人设节点基准Y
+            const tiltAmount = height * 0.25;     // 倾斜幅度(约14度)
+
+            // 根据X位置计算Y(右边高,左边低)
+            function getTiltedY(baseY, x) {{
+                const tilt = tiltAmount * (0.5 - x / width);
+                return baseY + tilt;
+            }}
+
+            // 力导向模拟
+            simulation = d3.forceSimulation(nodes)
+                .force("link", d3.forceLink(links).id(d => d.id).distance(120).strength(0.1))
+                .force("charge", d3.forceManyBody().strength(-400))  // 更强的互斥
+                // X方向:拉向目标位置,但允许被推开
+                .force("x", d3.forceX(d => nodeTargetX[d.id] || width / 2).strength(0.15))
+                // Y方向力:带倾斜
+                .force("y", d3.forceY(d => {{
+                    const baseY = d.source === "帖子" ? postBaseY : personaBaseY;
+                    return getTiltedY(baseY, d.x || width / 2);
+                }}).strength(0.4))
+                .force("collision", d3.forceCollide().radius(50));  // 更大的碰撞半径
+
+            // 边类型到CSS类的映射
+            const edgeTypeClass = {{
+                "匹配": "match",
+                "分类共现(跨点)": "category-cross",
+                "分类共现(点内)": "category-intra",
+                "标签共现": "tag-cooccur",
+                "属于": "belong",
+                "包含": "contain",
+                // 镜像边(帖子节点之间,虚线)
+                "镜像_分类共现(跨点)": "mirror-category-cross",
+                "镜像_分类共现(点内)": "mirror-category-intra",
+                "镜像_标签共现": "mirror-tag-cooccur",
+                "镜像_属于": "mirror-belong",
+                "镜像_包含": "mirror-contain"
+            }};
+
+            // 创建边的容器
+            const linkGroup = g.append("g").attr("class", "links");
+
+            // 为每条边创建组
+            const linkG = linkGroup.selectAll("g")
+                .data(links)
+                .join("g")
+                .attr("class", "link-group");
+
+            // 绘制点击热区(透明宽线)
+            const linkHitarea = linkG.append("line")
+                .attr("class", "link-hitarea");
+
+            // 绘制可见的边
+            const link = linkG.append("line")
+                .attr("class", d => "link " + (edgeTypeClass[d.type] || "match"))
+                .attr("stroke-width", d => d.type === "匹配" ? 2.5 : 1.5);
+
+            // 为匹配边添加分数标签
+            const edgeLabels = linkG.filter(d => d.type === "匹配" && d.边详情 && d.边详情.相似度)
+                .append("g")
+                .attr("class", "edge-label-group");
+
+            edgeLabels.append("rect")
+                .attr("class", "edge-label-bg")
+                .attr("rx", 3)
+                .attr("ry", 3);
+
+            edgeLabels.append("text")
+                .attr("class", "edge-label")
+                .text(d => {{
+                    const score = d.边详情.相似度;
+                    return typeof score === "number" ? score.toFixed(2) : score;
+                }});
+
+            // 边的点击事件
+            linkHitarea.on("click", (event, d) => {{
+                event.stopPropagation();
+                showEdgeInfo(d);
+            }})
+            .on("mouseover", function(event, d) {{
+                d3.select(this.parentNode).select(".link")
+                    .attr("stroke-opacity", 1)
+                    .attr("stroke-width", 4);
+            }})
+            .on("mouseout", function(event, d) {{
+                d3.select(this.parentNode).select(".link")
+                    .attr("stroke-opacity", 0.7)
+                    .attr("stroke-width", d.type === "匹配" ? 2.5 : 1.5);
+            }});
+
+            // 绘制节点
+            const node = g.append("g")
+                .selectAll("g")
+                .data(nodes)
+                .join("g")
+                .attr("class", "node")
+                .call(d3.drag()
+                    .on("start", dragstarted)
+                    .on("drag", dragged)
+                    .on("end", dragended));
+
+            // 根据节点类型绘制不同形状:标签用圆形,分类用方形
+            // 扩展节点用较低透明度表示
+            node.each(function(d) {{
+                const el = d3.select(this);
+                const isExpanded = d.是否扩展 === true;
+                const size = d.source === "帖子" ? 12 : (isExpanded ? 8 : 10);
+                const fill = levelColors[d.level] || "#666";
+                const nodeClass = d.source === "帖子" ? "post-node" : "persona-node";
+                const opacity = isExpanded ? 0.5 : 1;
+
+                if (d.节点类型 === "分类") {{
+                    // 方形
+                    el.append("rect")
+                        .attr("width", size * 2)
+                        .attr("height", size * 2)
+                        .attr("x", -size)
+                        .attr("y", -size)
+                        .attr("fill", fill)
+                        .attr("class", nodeClass)
+                        .attr("rx", 3)
+                        .attr("opacity", opacity);
+                }} else {{
+                    // 圆形(标签)
+                    el.append("circle")
+                        .attr("r", size)
+                        .attr("fill", fill)
+                        .attr("class", nodeClass)
+                        .attr("opacity", opacity);
+                }}
+            }});
+
+            const labels = node.append("text")
+                .attr("dx", 15)
+                .attr("dy", 4)
+                .text(d => d.节点名称)
+                .style("display", showLabels ? "block" : "none");
+
+            // 工具提示
+            const tooltip = d3.select("#tooltip");
+
+            node.on("mouseover", (event, d) => {{
+                tooltip.style("display", "block")
+                    .html(`<strong>${{d.节点名称}}</strong><br/>类型: ${{d.节点类型}}<br/>层级: ${{d.节点层级}}`);
+            }})
+            .on("mousemove", (event) => {{
+                tooltip.style("left", (event.pageX + 15) + "px")
+                    .style("top", (event.pageY - 10) + "px");
+            }})
+            .on("mouseout", () => {{
+                tooltip.style("display", "none");
+            }})
+            .on("click", (event, d) => {{
+                showNodeInfo(d);
+            }});
+
+            // 更新位置
+            simulation.on("tick", () => {{
+                // 更新热区线
+                linkHitarea
+                    .attr("x1", d => d.source.x)
+                    .attr("y1", d => d.source.y)
+                    .attr("x2", d => d.target.x)
+                    .attr("y2", d => d.target.y);
+
+                // 更新可见边
+                link
+                    .attr("x1", d => d.source.x)
+                    .attr("y1", d => d.source.y)
+                    .attr("x2", d => d.target.x)
+                    .attr("y2", d => d.target.y);
+
+                // 更新边标签位置(放在边的中点)
+                edgeLabels.attr("transform", d => {{
+                    const midX = (d.source.x + d.target.x) / 2;
+                    const midY = (d.source.y + d.target.y) / 2;
+                    return `translate(${{midX}},${{midY}})`;
+                }});
+
+                // 更新标签背景大小
+                edgeLabels.each(function(d) {{
+                    const textEl = d3.select(this).select("text").node();
+                    if (textEl) {{
+                        const bbox = textEl.getBBox();
+                        d3.select(this).select("rect")
+                            .attr("x", bbox.x - 3)
+                            .attr("y", bbox.y - 1)
+                            .attr("width", bbox.width + 6)
+                            .attr("height", bbox.height + 2);
+                    }}
+                }});
+
+                node.attr("transform", d => `translate(${{d.x}},${{d.y}})`);
+            }});
+
+            // 拖拽函数
+            function dragstarted(event, d) {{
+                if (!event.active) simulation.alphaTarget(0.3).restart();
+                d.fx = d.x;
+                d.fy = d.y;
+            }}
+
+            function dragged(event, d) {{
+                d.fx = event.x;
+                d.fy = event.y;
+            }}
+
+            function dragended(event, d) {{
+                if (!event.active) simulation.alphaTarget(0);
+                d.fx = null;
+                d.fy = null;
+            }}
+        }}
+
+        // 控制函数
+        function resetZoom() {{
+            const container = document.getElementById("graph");
+            const width = container.clientWidth;
+            const height = container.clientHeight;
+            svg.transition().duration(750).call(
+                zoom.transform,
+                d3.zoomIdentity.translate(width/2, height/2).scale(1).translate(-width/2, -height/2)
+            );
+        }}
+
+        function toggleLabels() {{
+            showLabels = !showLabels;
+            g.selectAll(".node text").style("display", showLabels ? "block" : "none");
+        }}
+
+        function showNodeInfo(d) {{
+            const panel = document.getElementById("detailPanel");
+            panel.classList.add("active");
+            document.getElementById("detailTitle").textContent = d.source === "帖子" ? "📌 帖子节点" : "👤 人设节点";
+
+            let html = `
+                <p><span class="label">节点ID:</span> ${{d.节点ID}}</p>
+                <p><span class="label">名称:</span> <strong>${{d.节点名称}}</strong></p>
+                <p><span class="label">类型:</span> ${{d.节点类型}}</p>
+                <p><span class="label">层级:</span> ${{d.节点层级}}</p>
+            `;
+
+            if (d.权重) {{
+                html += `<p><span class="label">权重:</span> ${{d.权重}}</p>`;
+            }}
+            if (d.所属分类 && d.所属分类.length > 0) {{
+                html += `<p><span class="label">所属分类:</span> ${{d.所属分类.join(" > ")}}</p>`;
+            }}
+            if (d.帖子数) {{
+                html += `<p><span class="label">帖子数:</span> ${{d.帖子数}}</p>`;
+            }}
+            document.getElementById("detailContent").innerHTML = html;
+        }}
+
+        function showEdgeInfo(d) {{
+            const panel = document.getElementById("detailPanel");
+            panel.classList.add("active");
+
+            const sourceNode = typeof d.source === "object" ? d.source : {{ id: d.source }};
+            const targetNode = typeof d.target === "object" ? d.target : {{ id: d.target }};
+
+            // 判断是否为镜像边
+            const isMirror = d.type.startsWith("镜像_");
+            document.getElementById("detailTitle").textContent = isMirror ? "🪞 镜像边详情" : "🔗 边详情";
+
+            let html = `
+                <p><span class="label">边类型:</span> <strong>${{d.type}}</strong></p>
+                <p><span class="label">源节点:</span> ${{sourceNode.节点名称 || sourceNode.id}}</p>
+                <p><span class="label">目标节点:</span> ${{targetNode.节点名称 || targetNode.id}}</p>
+            `;
+
+            if (d.边详情) {{
+                if (d.边详情.相似度 !== undefined) {{
+                    const score = typeof d.边详情.相似度 === "number" ? d.边详情.相似度.toFixed(2) : d.边详情.相似度;
+                    html += `<p><span class="label">相似度:</span> <span class="similarity-score">${{score}}</span></p>`;
+                }}
+                if (d.边详情.说明) {{
+                    html += `<p><span class="label">说明:</span></p><div class="edge-description">${{d.边详情.说明}}</div>`;
+                }}
+                if (d.边详情.共现次数 !== undefined) {{
+                    html += `<p><span class="label">共现次数:</span> ${{d.边详情.共现次数}}</p>`;
+                }}
+                // 镜像边特有信息
+                if (d.边详情.原始边类型) {{
+                    html += `<p><span class="label">原始边类型:</span> ${{d.边详情.原始边类型}}</p>`;
+                }}
+                if (d.边详情.源人设节点) {{
+                    html += `<p><span class="label">源人设节点:</span> ${{d.边详情.源人设节点}}</p>`;
+                }}
+                if (d.边详情.目标人设节点) {{
+                    html += `<p><span class="label">目标人设节点:</span> ${{d.边详情.目标人设节点}}</p>`;
+                }}
+            }}
+
+            document.getElementById("detailContent").innerHTML = html;
+        }}
+
+        function closeDetailPanel() {{
+            document.getElementById("detailPanel").classList.remove("active");
+        }}
+
+        // 页面加载完成后初始化
+        window.addEventListener("load", init);
+        window.addEventListener("resize", () => {{
+            if (currentIndex >= 0) {{
+                renderGraph(allGraphData[currentIndex]);
+            }}
+        }});
+    </script>
+</body>
+</html>
+'''
+
+
+def generate_combined_html(all_graph_data: List[Dict], output_file: Path):
+    """
+    生成包含所有帖子图谱的HTML文件
+
+    Args:
+        all_graph_data: 所有帖子的图谱数据列表
+        output_file: 输出文件路径
+    """
+    # 生成Tab HTML
+    tabs_html = ""
+    for i, data in enumerate(all_graph_data):
+        post_title = data.get("postTitle", "")
+        # 使用帖子标题,如果太长则截断
+        if post_title:
+            tab_name = post_title[:15] + "..." if len(post_title) > 15 else post_title
+        else:
+            tab_name = f"帖子 {i+1}"
+        active_class = "active" if i == 0 else ""
+        tabs_html += f'<div class="tab {active_class}" data-index="{i}">{tab_name}</div>\n'
+
+    # 生成HTML
+    html_content = HTML_TEMPLATE.format(
+        tabs_html=tabs_html,
+        all_graph_data=json.dumps(all_graph_data, ensure_ascii=False)
+    )
+
+    with open(output_file, "w", encoding="utf-8") as f:
+        f.write(html_content)
+
+
+def main():
+    # 使用路径配置
+    config = PathConfig()
+
+    print(f"账号: {config.account_name}")
+    print(f"输出版本: {config.output_version}")
+    print()
+
+    # 输入目录
+    match_graph_dir = config.intermediate_dir / "match_graph"
+
+    # 输出文件
+    output_file = config.intermediate_dir / "match_graph.html"
+
+    print(f"输入目录: {match_graph_dir}")
+    print(f"输出文件: {output_file}")
+    print()
+
+    # 读取所有匹配图谱文件
+    graph_files = sorted(match_graph_dir.glob("*_match_graph.json"))
+    print(f"找到 {len(graph_files)} 个匹配图谱文件")
+
+    all_graph_data = []
+    for i, graph_file in enumerate(graph_files, 1):
+        print(f"  [{i}/{len(graph_files)}] 读取: {graph_file.name}")
+
+        with open(graph_file, "r", encoding="utf-8") as f:
+            match_graph_data = json.load(f)
+
+        # 提取需要的数据
+        graph_data = {
+            "postId": match_graph_data["说明"]["帖子ID"],
+            "postTitle": match_graph_data["说明"].get("帖子标题", ""),
+            "stats": match_graph_data["说明"]["统计"],
+            "nodes": match_graph_data["节点列表"],
+            "edges": match_graph_data["边列表"]
+        }
+        all_graph_data.append(graph_data)
+
+    # 生成HTML
+    print("\n生成HTML文件...")
+    generate_combined_html(all_graph_data, output_file)
+
+    print("\n" + "="*60)
+    print("处理完成!")
+    print(f"输出文件: {output_file}")
+
+
+if __name__ == "__main__":
+    main()