Bläddra i källkod

feat: 添加支撑边和关联边支持

适配新数据结构,从点的 支撑的ID 和 关联的ID 字段提取支撑边和关联边:
- build_post_graph.py: 新增支撑边和关联边提取逻辑,更新统计输出
- edgeStyle.js: 添加支撑(橙色)和关联(紫色)边类型的样式配置

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
yangxiaohui 1 dag sedan
förälder
incheckning
19a98bacec

+ 140 - 139
script/data_processing/build_post_graph.py

@@ -152,19 +152,26 @@ def create_edge(
 
 # ==================== 从帖子解构结果提取节点和匹配边 ====================
 
-def extract_points_tags_and_matches(filtered_data: Dict) -> tuple:
+def extract_tags_and_matches(filtered_data: Dict) -> tuple:
     """
-    从帖子解构结果中提取点节点、标签节点和匹配边
+    从帖子解构结果中提取标签节点和匹配边(适配新结构)
+
+    新结构:解构结果 → 点列表 → 点 → 匹配人设结果
+    新结构的"点"对应旧结构的"标签"节点,直接挂在维度下
 
     Returns:
-        (点节点字典, 标签节点字典, 标签到点的映射, 匹配边字典)
+        (标签节点字典, 匹配边字典, 支撑边字典, 关联边字典)
     """
-    point_nodes = {}  # nodeId -> nodeData
     tag_nodes = {}    # nodeId -> nodeData
-    tag_to_point = {} # tagId -> [pointId, ...]
     match_edges = {}  # edgeId -> edgeData
+    support_edges = {}  # 支撑边
+    relation_edges = {}  # 关联边
+
+    # ID 到节点ID的映射(用于构建支撑边和关联边)
+    id_to_node_id = {}
 
-    how_result = filtered_data.get("how解构结果", {})
+    # 新结构使用 "解构结果"
+    result = filtered_data.get("解构结果", {})
 
     dimension_mapping = {
         "灵感点列表": "灵感点",
@@ -172,167 +179,158 @@ def extract_points_tags_and_matches(filtered_data: Dict) -> tuple:
         "关键点列表": "关键点"
     }
 
+    # 第一遍:创建节点并建立 ID 映射
     for list_key, dimension in dimension_mapping.items():
-        points = how_result.get(list_key, [])
+        points = result.get(list_key, [])
 
         for point in points:
-            point_name = point.get("名称", "")
-            point_desc = point.get("描述", "")
+            tag_name = point.get("名称", "")
+            tag_desc = point.get("描述", "")
+            point_id = point.get("ID", "")
 
-            if not point_name:
+            if not tag_name:
                 continue
 
-            # 创建点节点
-            point_id = build_node_id("帖子", dimension, "点", point_name)
-            point_nodes[point_id] = create_node(
+            # 新结构的"点"直接创建为"标签"节点
+            tag_id = build_node_id("帖子", dimension, "标签", tag_name)
+            tag_nodes[tag_id] = create_node(
                 domain="帖子",
                 dimension=dimension,
-                node_type="",
-                name=point_name,
+                node_type="标签",
+                name=tag_name,
                 detail={
-                    "description": point_desc
+                    "description": tag_desc,
+                    "pointId": point_id
                 }
             )
 
-            # 遍历how步骤列表,提取标签和匹配
-            how_steps = point.get("how步骤列表", [])
-
-            for step in how_steps:
-                step_name = step.get("步骤名称", "")
-                features = step.get("特征列表", [])
-
-                for feature in features:
-                    tag_name = feature.get("特征名称", "")
-                    weight = feature.get("权重", 1.0)
-
-                    if not tag_name:
-                        continue
-
-                    # 创建标签节点
-                    tag_id = build_node_id("帖子", dimension, "标签", tag_name)
-
-                    if tag_id not in tag_nodes:
-                        tag_nodes[tag_id] = create_node(
-                            domain="帖子",
-                            dimension=dimension,
-                            node_type="标签",
-                            name=tag_name,
-                            detail={
-                                "weight": weight,
-                                "stepName": step_name,
-                                "pointNames": [point_name]
-                            }
-                        )
-                    else:
-                        # 同一标签可能属于多个点
-                        if point_name not in tag_nodes[tag_id]["detail"]["pointNames"]:
-                            tag_nodes[tag_id]["detail"]["pointNames"].append(point_name)
-
-                    # 记录标签到点的映射
-                    if tag_id not in tag_to_point:
-                        tag_to_point[tag_id] = []
-                    if point_id not in tag_to_point[tag_id]:
-                        tag_to_point[tag_id].append(point_id)
-
-                    # 提取匹配边
-                    matches = feature.get("匹配结果", [])
-                    for match in matches:
-                        persona_name = match.get("人设特征名称", "")
-                        persona_dimension = match.get("人设特征层级", "")
-                        persona_type = match.get("特征类型", "标签")
-                        match_detail = match.get("匹配结果", {})
-                        similarity = match_detail.get("相似度", 0)
-
-                        if not persona_name or not persona_dimension:
-                            continue
-
-                        # 构建人设节点ID
-                        persona_id = build_node_id("人设", persona_dimension, persona_type, persona_name)
-
-                        # 创建双向匹配边
-                        # 帖子标签 -> 人设标签
-                        edge_id_1 = build_edge_id(tag_id, "匹配", persona_id)
-                        match_edges[edge_id_1] = create_edge(
-                            source=tag_id,
-                            target=persona_id,
-                            edge_type="匹配",
-                            score=similarity,
-                            detail={}
-                        )
+            # 建立 ID 映射
+            if point_id:
+                id_to_node_id[point_id] = tag_id
+
+            # 直接从点的 匹配人设结果 提取匹配边
+            matches = point.get("匹配人设结果", [])
+            for match in matches:
+                persona_name = match.get("人设特征名称", "")
+                persona_dimension = match.get("人设特征层级", "")
+                # 映射:源数据中 "点" → "标签"
+                persona_type = match.get("特征类型", "标签")
+                if persona_type == "点":
+                    persona_type = "标签"
+                similarity = match.get("相似度", 0)
+
+                if not persona_name or not persona_dimension:
+                    continue
+
+                # 构建人设节点ID
+                persona_id = build_node_id("人设", persona_dimension, persona_type, persona_name)
+
+                # 创建双向匹配边
+                # 帖子标签 -> 人设标签
+                edge_id_1 = build_edge_id(tag_id, "匹配", persona_id)
+                match_edges[edge_id_1] = create_edge(
+                    source=tag_id,
+                    target=persona_id,
+                    edge_type="匹配",
+                    score=similarity,
+                    detail={}
+                )
+
+                # 人设标签 -> 帖子标签
+                edge_id_2 = build_edge_id(persona_id, "匹配", tag_id)
+                match_edges[edge_id_2] = create_edge(
+                    source=persona_id,
+                    target=tag_id,
+                    edge_type="匹配",
+                    score=similarity,
+                    detail={}
+                )
+
+    # 第二遍:构建支撑边和关联边
+    for list_key, dimension in dimension_mapping.items():
+        points = result.get(list_key, [])
+
+        for point in points:
+            tag_name = point.get("名称", "")
+            point_id = point.get("ID", "")
 
-                        # 人设标签 -> 帖子标签
-                        edge_id_2 = build_edge_id(persona_id, "匹配", tag_id)
-                        match_edges[edge_id_2] = create_edge(
-                            source=persona_id,
-                            target=tag_id,
-                            edge_type="匹配",
-                            score=similarity,
+            if not tag_name or not point_id:
+                continue
+
+            tag_id = id_to_node_id.get(point_id)
+            if not tag_id:
+                continue
+
+            # 支撑边:当前点 -> 被支撑的点
+            support_ids = point.get("支撑的ID", [])
+            for target_point_id in support_ids:
+                target_node_id = id_to_node_id.get(target_point_id)
+                if target_node_id:
+                    edge_id = build_edge_id(tag_id, "支撑", target_node_id)
+                    support_edges[edge_id] = create_edge(
+                        source=tag_id,
+                        target=target_node_id,
+                        edge_type="支撑",
+                        score=1.0,
+                        detail={}
+                    )
+
+            # 关联边:当前点 <-> 关联的点(双向)
+            relation_ids = point.get("关联的ID", [])
+            for target_point_id in relation_ids:
+                target_node_id = id_to_node_id.get(target_point_id)
+                if target_node_id:
+                    # 只创建一个方向的边(避免重复)
+                    edge_id = build_edge_id(tag_id, "关联", target_node_id)
+                    if edge_id not in relation_edges:
+                        relation_edges[edge_id] = create_edge(
+                            source=tag_id,
+                            target=target_node_id,
+                            edge_type="关联",
+                            score=1.0,
                             detail={}
                         )
 
-    return point_nodes, tag_nodes, tag_to_point, match_edges
+    return tag_nodes, match_edges, support_edges, relation_edges
 
 
 # ==================== 构建边 ====================
 
 def build_belong_contain_edges(
-    point_nodes: Dict[str, Dict],
     tag_nodes: Dict[str, Dict],
-    tag_to_point: Dict[str, List[str]],
     dimension_node_ids: Dict[str, str]
 ) -> Dict[str, Dict]:
     """
-    构建属于/包含边
+    构建属于/包含边(新结构:标签直接挂维度下)
 
     Returns:
         边字典 { edgeId: edgeData }
     """
     edges = {}
 
-    # 1. 点 -> 维度(属于/包含)
-    for point_id, point_data in point_nodes.items():
-        dimension = point_data["dimension"]
+    # 标签 -> 维度(属于/包含)
+    for tag_id, tag_data in tag_nodes.items():
+        dimension = tag_data["dimension"]
         dim_node_id = dimension_node_ids[dimension]
 
-        # 属于边: -> 维度
-        edge_id = build_edge_id(point_id, "属于", dim_node_id)
+        # 属于边:标签 -> 维度
+        edge_id = build_edge_id(tag_id, "属于", dim_node_id)
         edges[edge_id] = create_edge(
-            source=point_id,
+            source=tag_id,
             target=dim_node_id,
             edge_type="属于",
             score=1.0
         )
 
-        # 包含边:维度 -> 
-        edge_id_contain = build_edge_id(dim_node_id, "包含", point_id)
+        # 包含边:维度 -> 标签
+        edge_id_contain = build_edge_id(dim_node_id, "包含", tag_id)
         edges[edge_id_contain] = create_edge(
             source=dim_node_id,
-            target=point_id,
+            target=tag_id,
             edge_type="包含",
             score=1.0
         )
 
-    # 2. 标签 -> 点(属于/包含)
-    for tag_id, point_ids in tag_to_point.items():
-        for point_id in point_ids:
-            # 属于边:标签 -> 点
-            edge_id = build_edge_id(tag_id, "属于", point_id)
-            edges[edge_id] = create_edge(
-                source=tag_id,
-                target=point_id,
-                edge_type="属于",
-                score=1.0
-            )
-
-            # 包含边:点 -> 标签
-            edge_id_contain = build_edge_id(point_id, "包含", tag_id)
-            edges[edge_id_contain] = create_edge(
-                source=point_id,
-                target=tag_id,
-                edge_type="包含",
-                score=1.0
-            )
-
     return edges
 
 
@@ -530,8 +528,8 @@ def process_single_post(filtered_file: Path, output_dir: Path) -> Dict:
     all_nodes = {}
     all_edges = {}
 
-    # 1. 提取点节点、标签节点和匹配边
-    point_nodes, tag_nodes, tag_to_point, match_edges = extract_points_tags_and_matches(filtered_data)
+    # 1. 提取标签节点和匹配边(新结构:没有点层)
+    tag_nodes, match_edges, support_edges, relation_edges = extract_tags_and_matches(filtered_data)
 
     # 2. 添加根节点
     root_id = build_node_id("帖子", "帖子", "帖子", post_id)
@@ -580,36 +578,36 @@ def process_single_post(filtered_file: Path, output_dir: Path) -> Dict:
             score=1.0
         )
 
-    # 4. 添加点节点和标签节点
-    all_nodes.update(point_nodes)
+    # 4. 添加标签节点
     all_nodes.update(tag_nodes)
 
-    # 5. 构建属于/包含边
-    belong_contain_edges = build_belong_contain_edges(
-        point_nodes, tag_nodes, tag_to_point, dimension_node_ids
-    )
+    # 5. 构建属于/包含边(标签直接挂维度下)
+    belong_contain_edges = build_belong_contain_edges(tag_nodes, dimension_node_ids)
     all_edges.update(belong_contain_edges)
 
     # 6. 添加匹配边
     all_edges.update(match_edges)
 
-    # 7. 构建索引
+    # 7. 添加支撑边和关联边
+    all_edges.update(support_edges)
+    all_edges.update(relation_edges)
+
+    # 8. 构建索引
     index = build_index(all_edges)
 
-    # 8. 构建嵌套树
+    # 9. 构建嵌套树
     tree = build_nested_tree(all_nodes, all_edges, root_id)
 
     # 统计
-    point_count = len(point_nodes)
     tag_count = len(tag_nodes)
     match_count = len(match_edges) // 2  # 双向边,除以2得到实际匹配数
+    support_count = len(support_edges)
+    relation_count = len(relation_edges)
 
     dimension_stats = {}
     for dim in dimensions:
-        dim_points = sum(1 for n in point_nodes.values() if n["dimension"] == dim)
         dim_tags = sum(1 for n in tag_nodes.values() if n["dimension"] == dim)
         dimension_stats[dim] = {
-            "pointCount": dim_points,
             "tagCount": dim_tags
         }
 
@@ -623,9 +621,10 @@ def process_single_post(filtered_file: Path, output_dir: Path) -> Dict:
             "stats": {
                 "nodeCount": len(all_nodes),
                 "edgeCount": len(all_edges),
-                "pointCount": point_count,
                 "tagCount": tag_count,
                 "matchCount": match_count,
+                "supportCount": support_count,
+                "relationCount": relation_count,
                 "dimensions": dimension_stats
             }
         },
@@ -645,9 +644,10 @@ def process_single_post(filtered_file: Path, output_dir: Path) -> Dict:
         "postTitle": post_title,
         "nodeCount": len(all_nodes),
         "edgeCount": len(all_edges),
-        "pointCount": point_count,
         "tagCount": tag_count,
         "matchCount": match_count,
+        "supportCount": support_count,
+        "relationCount": relation_count,
         "outputFile": str(output_file)
     }
 
@@ -685,7 +685,7 @@ def main():
         result = process_single_post(filtered_file, output_dir)
         results.append(result)
         print(f"  节点: {result['nodeCount']}, 边: {result['edgeCount']}")
-        print(f"  点: {result['pointCount']}, 标签: {result['tagCount']}, 匹配: {result['matchCount']}")
+        print(f"  标签: {result['tagCount']}, 匹配: {result['matchCount']}, 支撑: {result['supportCount']}, 关联: {result['relationCount']}")
         print(f"  → {Path(result['outputFile']).name}")
         print()
 
@@ -695,9 +695,10 @@ def main():
     print(f"  帖子数: {len(results)}")
     print(f"  总节点数: {sum(r['nodeCount'] for r in results)}")
     print(f"  总边数: {sum(r['edgeCount'] for r in results)}")
-    print(f"  总点数: {sum(r['pointCount'] for r in results)}")
     print(f"  总标签数: {sum(r['tagCount'] for r in results)}")
     print(f"  总匹配数: {sum(r['matchCount'] for r in results)}")
+    print(f"  总支撑边: {sum(r['supportCount'] for r in results)}")
+    print(f"  总关联边: {sum(r['relationCount'] for r in results)}")
     print(f"\n输出目录: {output_dir}")
 
 

+ 62 - 86
script/data_processing/filter_how_results.py

@@ -3,10 +3,12 @@
 """
 How解构结果过滤脚本
 
-从 how 解构结果中过滤出高质量的匹配结果:
-1. 移除 what解构结果 字段
-2. 只保留相似度 >= 0.5 的 top1 匹配结果
-3. 保留特征即使其匹配结果为空
+从解构结果中过滤出高质量的匹配结果:
+1. 只保留相似度 >= 阈值的匹配结果
+2. 可选只保留 top N 匹配结果
+
+新数据结构:
+  解构结果 → 点列表 → 点 → 匹配人设结果[]
 """
 
 import json
@@ -23,94 +25,63 @@ sys.path.insert(0, str(project_root))
 from script.data_processing.path_config import PathConfig
 
 
-def filter_match_results(feature_list: List[Dict], threshold: float = 0.5) -> List[Dict]:
+def filter_point_matches(point: Dict, threshold: float = 0.5, top_n: int = None) -> Dict:
     """
-    过滤特征列表中的匹配结果
+    过滤单个点的匹配人设结果
 
     Args:
-        feature_list: 特征列表
+        point: 点数据
         threshold: 相似度阈值
+        top_n: 只保留前N个匹配(None表示不限制)
 
     Returns:
-        过滤后的特征列表
-    """
-    filtered_features = []
-
-    for feature in feature_list:
-        filtered_feature = {
-            "特征名称": feature.get("特征名称", ""),
-            "权重": feature.get("权重", 1.0),
-            "匹配结果": []
-        }
-
-        # 过滤匹配结果
-        match_results = feature.get("匹配结果", [])
-        for match in match_results:
-            similarity = match.get("匹配结果", {}).get("相似度", 0)
-            if similarity >= threshold:
-                filtered_feature["匹配结果"].append(match)
-
-        # 按相似度降序排序,只保留 top1
-        if filtered_feature["匹配结果"]:
-            filtered_feature["匹配结果"].sort(
-                key=lambda x: x.get("匹配结果", {}).get("相似度", 0),
-                reverse=True
-            )
-            # 只保留相似度最高的一个
-            filtered_feature["匹配结果"] = [filtered_feature["匹配结果"][0]]
-
-        # 保留特征即使匹配结果为空
-        filtered_features.append(filtered_feature)
-
-    return filtered_features
-
-
-def filter_how_steps(how_steps: List[Dict], threshold: float = 0.5) -> List[Dict]:
+        过滤后的点数据
     """
-    过滤 how 步骤列表
+    # 复制点的基本信息
+    filtered_point = {
+        "名称": point.get("名称", ""),
+        "描述": point.get("描述", ""),
+        "匹配人设结果": []
+    }
 
-    Args:
-        how_steps: how 步骤列表
-        threshold: 相似度阈值
+    # 保留其他字段(如ID、类型等)
+    for key in ["ID", "类型", "置信度", "支撑的ID", "关联的ID"]:
+        if key in point:
+            filtered_point[key] = point[key]
 
-    Returns:
-        过滤后的 how 步骤列表
-    """
-    filtered_steps = []
+    # 过滤匹配结果
+    matches = point.get("匹配人设结果", [])
+    filtered_matches = []
 
-    for step in how_steps:
-        filtered_step = {
-            "步骤名称": step.get("步骤名称", ""),
-            "特征列表": filter_match_results(step.get("特征列表", []), threshold)
-        }
-        filtered_steps.append(filtered_step)
+    for match in matches:
+        similarity = match.get("相似度", 0)
+        if similarity >= threshold:
+            filtered_matches.append(match)
 
-    return filtered_steps
+    # 按相似度降序排序
+    filtered_matches.sort(key=lambda x: x.get("相似度", 0), reverse=True)
 
+    # 只保留 top N
+    if top_n is not None and len(filtered_matches) > top_n:
+        filtered_matches = filtered_matches[:top_n]
 
-def filter_point_list(point_list: List[Dict], threshold: float = 0.5) -> List[Dict]:
+    filtered_point["匹配人设结果"] = filtered_matches
+    return filtered_point
+
+
+def filter_point_list(point_list: List[Dict], threshold: float = 0.5, top_n: int = None) -> List[Dict]:
     """
     过滤点列表(灵感点/关键点/目的点)
 
     Args:
         point_list: 点列表
         threshold: 相似度阈值
+        top_n: 只保留前N个匹配
 
     Returns:
         过滤后的点列表
     """
-    filtered_points = []
-
-    for point in point_list:
-        filtered_point = {
-            "名称": point.get("名称", ""),
-            "描述": point.get("描述", ""),
-            "特征列表": point.get("特征列表", []),
-            "how步骤列表": filter_how_steps(point.get("how步骤列表", []), threshold)
-        }
-        filtered_points.append(filtered_point)
-
-    return filtered_points
+    return [filter_point_matches(point, threshold, top_n) for point in point_list]
 
 
 def calculate_statistics(original_point_list: List[Dict], filtered_point_list: List[Dict]) -> Dict:
@@ -129,15 +100,11 @@ def calculate_statistics(original_point_list: List[Dict], filtered_point_list: L
 
     # 统计原始匹配数量
     for point in original_point_list:
-        for step in point.get("how步骤列表", []):
-            for feature in step.get("特征列表", []):
-                original_count += len(feature.get("匹配结果", []))
+        original_count += len(point.get("匹配人设结果", []))
 
     # 统计过滤后匹配数量
     for point in filtered_point_list:
-        for step in point.get("how步骤列表", []):
-            for feature in step.get("特征列表", []):
-                filtered_count += len(feature.get("匹配结果", []))
+        filtered_count += len(point.get("匹配人设结果", []))
 
     return {
         "原始匹配数": original_count,
@@ -148,7 +115,7 @@ def calculate_statistics(original_point_list: List[Dict], filtered_point_list: L
     }
 
 
-def process_single_file(input_file: Path, output_file: Path, threshold: float = 0.5) -> Dict:
+def process_single_file(input_file: Path, output_file: Path, threshold: float = 0.5, top_n: int = None) -> Dict:
     """
     处理单个文件
 
@@ -156,6 +123,7 @@ def process_single_file(input_file: Path, output_file: Path, threshold: float =
         input_file: 输入文件路径
         output_file: 输出文件路径
         threshold: 相似度阈值
+        top_n: 只保留前N个匹配
 
     Returns:
         统计信息
@@ -164,15 +132,15 @@ def process_single_file(input_file: Path, output_file: Path, threshold: float =
     with open(input_file, "r", encoding="utf-8") as f:
         data = json.load(f)
 
-    # 提取基本信息(移除 what解构结果)
+    # 提取基本信息
     filtered_data = {
         "帖子id": data.get("帖子id", ""),
         "帖子详情": data.get("帖子详情", {})
     }
 
-    # 处理 how解构结果
-    how_result = data.get("how解构结果", {})
-    filtered_how_result = {}
+    # 处理解构结果(新结构)
+    result = data.get("解构结果", {})
+    filtered_result = {}
 
     stats = {
         "灵感点": {"原始匹配数": 0, "过滤后匹配数": 0},
@@ -182,17 +150,17 @@ def process_single_file(input_file: Path, output_file: Path, threshold: float =
 
     for point_type in ["灵感点", "关键点", "目的点"]:
         point_list_key = f"{point_type}列表"
-        original_points = how_result.get(point_list_key, [])
+        original_points = result.get(point_list_key, [])
 
         if original_points:
-            filtered_points = filter_point_list(original_points, threshold)
-            filtered_how_result[point_list_key] = filtered_points
+            filtered_points = filter_point_list(original_points, threshold, top_n)
+            filtered_result[point_list_key] = filtered_points
 
             # 计算统计
             point_stats = calculate_statistics(original_points, filtered_points)
             stats[point_type] = point_stats
 
-    filtered_data["how解构结果"] = filtered_how_result
+    filtered_data["解构结果"] = filtered_result
 
     # 保存过滤后的文件
     output_file.parent.mkdir(parents=True, exist_ok=True)
@@ -211,13 +179,19 @@ def process_single_file(input_file: Path, output_file: Path, threshold: float =
 
 def main():
     """主函数"""
-    parser = argparse.ArgumentParser(description="过滤 how 解构结果,只保留高相似度的匹配")
+    parser = argparse.ArgumentParser(description="过滤解构结果,只保留高相似度的匹配")
     parser.add_argument(
         "--threshold",
         type=float,
         default=0.5,
         help="相似度阈值(默认 0.5)"
     )
+    parser.add_argument(
+        "--top-n",
+        type=int,
+        default=1,
+        help="每个点只保留前N个匹配(默认1)"
+    )
 
     args = parser.parse_args()
 
@@ -231,11 +205,13 @@ def main():
     input_dir = config.how_results_dir
     output_dir = config.intermediate_dir / "filtered_results"
     threshold = args.threshold
+    top_n = args.top_n
 
     print(f"账号: {config.account_name}")
     print(f"输入目录: {input_dir}")
     print(f"输出目录: {output_dir}")
     print(f"相似度阈值: {threshold}")
+    print(f"Top N: {top_n if top_n else '不限制'}")
     print()
 
     # 确保输出目录存在
@@ -260,7 +236,7 @@ def main():
         output_file = output_dir / f"{post_id}_filtered.json"
 
         # 处理文件
-        stats = process_single_file(input_file, output_file, threshold)
+        stats = process_single_file(input_file, output_file, threshold, top_n)
 
         total_original += stats["原始匹配数"]
         total_filtered += stats["过滤后匹配数"]

+ 18 - 21
script/data_processing/run_graph_pipeline.sh

@@ -8,13 +8,10 @@
 #   4. match_inspiration_features.py
 #
 # 本脚本执行:
-#   5. filter_how_results.py      - 过滤how解构结果
-#   6. extract_nodes_and_edges.py - 提取节点和边
-#   7. build_persona_graph.py     - 构建人设图谱
-#   8. build_match_graph.py       - 构建匹配图谱
-#   9. build_post_graph.py        - 构建帖子图谱
-#  10. visualize_match_graph.py   - 生成匹配图谱可视化HTML
-#  11. visualization/build.py     - 生成人设图谱可视化HTML
+#   1. filter_how_results.py      - 过滤how解构结果
+#   2. build_persona_graph.py     - 构建人设图谱
+#   3. build_post_graph.py        - 构建帖子图谱
+#   4. visualization/build.py     - 生成人设图谱可视化HTML
 #
 # 使用方式:
 #   ./run_graph_pipeline.sh              # 使用默认账号
@@ -47,7 +44,7 @@ run_step() {
     local step_name=$2
     local script_name=$3
 
-    print_step "$step_num/7" "$step_name"
+    print_step "$step_num/4" "$step_name"
 
     if python "script/data_processing/$script_name"; then
         print_success "$step_name 完成"
@@ -74,26 +71,26 @@ process_account() {
     # 设置环境变量
     export ACCOUNT_NAME="$account_name"
 
-    # 步骤5: 过滤how解构结果
+    # 步骤1: 过滤how解构结果
     run_step 1 "过滤how解构结果" "filter_how_results.py" || return 1
 
-    # 步骤6: 提取节点和边
-    run_step 2 "提取节点和边" "extract_nodes_and_edges.py" || return 1
+    # # 步骤2: 提取节点和边(不再需要)
+    # run_step 2 "提取节点和边" "extract_nodes_and_edges.py" || return 1
 
-    # 步骤7: 构建人设图谱
-    run_step 3 "构建人设图谱" "build_persona_graph.py" || return 1
+    # 步骤2: 构建人设图谱
+    run_step 2 "构建人设图谱" "build_persona_graph.py" || return 1
 
-    # 步骤8: 构建匹配图谱
-    run_step 4 "构建匹配图谱" "build_match_graph.py" || return 1
+    # # 步骤: 构建匹配图谱(不再需要)
+    # run_step 4 "构建匹配图谱" "build_match_graph.py" || return 1
 
-    # 步骤9: 构建帖子图谱
-    run_step 5 "构建帖子图谱" "build_post_graph.py" || return 1
+    # 步骤3: 构建帖子图谱
+    run_step 3 "构建帖子图谱" "build_post_graph.py" || return 1
 
-    # 步骤10: 生成匹配图谱可视化HTML
-    run_step 6 "生成匹配图谱可视化" "visualize_match_graph.py" || return 1
+    # # 步骤: 生成匹配图谱可视化HTML(不再需要)
+    # run_step 6 "生成匹配图谱可视化" "visualize_match_graph.py" || return 1
 
-    # 步骤11: 生成人设图谱可视化HTML
-    print_step "7/7" "生成人设图谱可视化"
+    # 步骤4: 生成人设图谱可视化HTML
+    print_step "4/4" "生成人设图谱可视化"
     if python "script/visualization/build.py"; then
         print_success "生成人设图谱可视化 完成"
         echo ""

+ 7 - 1
script/visualization/src/config/edgeStyle.js

@@ -8,7 +8,9 @@ export const edgeTypeColors = {
   '分类共现': '#f39c12',
   '匹配': '#e94560',
   '推导': '#00bcd4',  // 青色 - 推导关系
-  '组成': '#8bc34a'   // 浅绿色 - 组合成员
+  '组成': '#8bc34a',  // 浅绿色 - 组合成员
+  '支撑': '#ff9800',  // 橙色 - 支撑关系
+  '关联': '#9c27b0'   // 紫色 - 关联关系
 }
 
 // 获取边样式(统一入口)
@@ -35,6 +37,10 @@ export function getEdgeStyle(edge) {
     opacity = Math.max(0.4, score * 0.8)
   } else if (type === '组成') {
     opacity = 0.6
+  } else if (type === '支撑') {
+    opacity = 0.7
+  } else if (type === '关联') {
+    opacity = 0.6
   }
 
   return {