10 Komitmen 710226ad29 ... 7f86de3a26

Pembuat SHA1 Pesan Tanggal
  yangxiaohui 7f86de3a26 fix: 扩展层排除已在直接匹配层的节点 5 hari lalu
  yangxiaohui 4207fcbb3c feat: 优化边点击高亮,预计算路径节点 5 hari lalu
  yangxiaohui 7032231c79 feat: 优化hover交互,隐藏无关节点和边 5 hari lalu
  yangxiaohui d54ab25b6c feat: 优化关系图与人设树联动,修复多边显示 5 hari lalu
  yangxiaohui 80bd99c744 refactor: 提取相似度阈值为常量配置 5 hari lalu
  yangxiaohui 0e9eb86f42 feat: 优化匹配过滤和边类型区分 5 hari lalu
  yangxiaohui 42dee7df54 add 5 hari lalu
  yangxiaohui 03736ad062 feat: 调整布局,左列(帖子标签+人设匹配)居中显示 5 hari lalu
  yangxiaohui d821b5bac8 feat: 完善三区域联动交互 5 hari lalu
  yangxiaohui 94065cee12 feat: 优化布局,人设树和关系图分离 5 hari lalu

+ 47 - 13
script/data_processing/build_match_graph.py

@@ -151,13 +151,19 @@ def extract_matched_nodes_and_edges(filtered_data: Dict) -> tuple:
                             )
                             persona_node_ids.add(persona_node_id)
 
-                            # 创建匹配边
+                            # 创建匹配边(根据相似度区分类型)
+                            similarity = match_detail.get("相似度", 0)
+                            if similarity >= 0.8:
+                                edge_type = "匹配_相同"
+                            else:
+                                edge_type = "匹配_相似"
+
                             match_edge = {
                                 "源节点ID": tag_node_id,
                                 "目标节点ID": persona_node_id,
-                                "边类型": "匹配",
+                                "边类型": edge_type,
                                 "边详情": {
-                                    "相似度": match_detail.get("相似度", 0),
+                                    "相似度": similarity,
                                     "说明": match_detail.get("说明", "")
                                 }
                             }
@@ -278,7 +284,9 @@ def create_mirrored_post_edges(
                     "边详情": {
                         "原始边类型": edge_type,
                         "源人设节点": source_persona,
-                        "目标人设节点": target_persona
+                        "目标人设节点": target_persona,
+                        # 完整路径节点(用于前端高亮)
+                        "路径节点": [src_post, source_persona, target_persona, tgt_post]
                     }
                 }
                 post_edges.append(post_edge)
@@ -513,7 +521,7 @@ def process_filtered_result(
 
     # 分离帖子侧的边:属于边(标签→点)和匹配边(标签→人设)
     post_belong_edges = [e for e in post_edges_raw if e["边类型"] == "属于"]
-    match_edges = [e for e in post_edges_raw if e["边类型"] == "匹配"]
+    match_edges = [e for e in post_edges_raw if e["边类型"].startswith("匹配_")]
 
     # 统计帖子点节点和标签节点
     post_point_nodes = [n for n in post_nodes if n["节点类型"] == "点"]
@@ -534,12 +542,17 @@ def process_filtered_result(
     # 扩展人设节点一层,只对标签类型的节点通过"属于"边扩展到分类
     # 过滤出标签类型的人设节点(只有标签才能"属于"分类)
     tag_persona_ids = {pid for pid in persona_node_ids if "_标签_" in pid}
-    expanded_nodes, expanded_edges, _ = expand_one_layer(
+    expanded_nodes_raw, expanded_edges_raw, _ = expand_one_layer(
         tag_persona_ids, edges_data, nodes_data,
         edge_types=["属于"],
         direction="outgoing"  # 只向外扩展:标签->分类
     )
 
+    # 排除已经在第3层(直接匹配)中的节点,避免同一节点出现在两层
+    expanded_nodes = [n for n in expanded_nodes_raw if n["节点ID"] not in persona_node_ids]
+    expanded_edges = [e for e in expanded_edges_raw
+                      if e["目标节点ID"] not in persona_node_ids or e["源节点ID"] not in persona_node_ids]
+
     # 创建通过扩展节点的帖子镜像边(正确逻辑)
     # 逻辑:帖子->标签->分类,分类之间有边,则对应帖子产生二阶边
 
@@ -603,10 +616,10 @@ def process_filtered_result(
                             "边类型": f"二阶_{edge_type}",
                             "边详情": {
                                 "原始边类型": edge_type,
-                                "分类节点1": cat1,
-                                "分类节点2": cat2,
-                                "标签节点1": tag1,
-                                "标签节点2": tag2
+                                "源人设节点": cat1,  # 统一字段:指向产生关系的人设节点(分类)
+                                "目标人设节点": cat2,
+                                # 完整路径节点(用于前端高亮)
+                                "路径节点": [post1, tag1, cat1, cat2, tag2, post2]
                             }
                         })
 
@@ -614,8 +627,8 @@ def process_filtered_result(
     # 1. 找出产生了二阶帖子边的扩展节点(分类)
     useful_expanded_ids = set()
     for edge in post_edges_via_expanded:
-        cat1 = edge.get("边详情", {}).get("分类节点1")
-        cat2 = edge.get("边详情", {}).get("分类节点2")
+        cat1 = edge.get("边详情", {}).get("源人设节点")
+        cat2 = edge.get("边详情", {}).get("目标人设节点")
         if cat1:
             useful_expanded_ids.add(cat1)
         if cat2:
@@ -647,6 +660,26 @@ def process_filtered_result(
             unique_edges.append(edge)
     all_edges = unique_edges
 
+    # 构建人设边到镜像边的反向映射
+    # key: "源人设节点ID|目标人设节点ID" (排序后的)
+    # value: [{镜像边信息}, ...]
+    persona_edge_to_mirror_edges = {}
+    all_mirror_edges = post_edges + post_edges_via_expanded
+    for mirror_edge in all_mirror_edges:
+        detail = mirror_edge.get("边详情", {})
+        src_persona = detail.get("源人设节点")
+        tgt_persona = detail.get("目标人设节点")
+        if src_persona and tgt_persona:
+            # 使用排序后的key,确保 A|B 和 B|A 映射到同一个key
+            edge_key = "|".join(sorted([src_persona, tgt_persona]))
+            if edge_key not in persona_edge_to_mirror_edges:
+                persona_edge_to_mirror_edges[edge_key] = []
+            persona_edge_to_mirror_edges[edge_key].append({
+                "源节点ID": mirror_edge["源节点ID"],
+                "目标节点ID": mirror_edge["目标节点ID"],
+                "边类型": mirror_edge["边类型"]
+            })
+
     # 构建节点边索引
     edges_by_node = {}
     for edge in all_edges:
@@ -695,7 +728,8 @@ def process_filtered_result(
         "帖子镜像边列表(二阶)": post_edges_via_expanded,
         "节点列表": all_nodes,
         "边列表": all_edges,
-        "节点边索引": edges_by_node
+        "节点边索引": edges_by_node,
+        "人设边到镜像边映射": persona_edge_to_mirror_edges
     }
 
     # 保存输出文件

+ 47 - 18
script/data_processing/build_persona_tree.py

@@ -96,28 +96,11 @@ def build_persona_tree():
         name = n.get("节点名称", "")
         category_name_to_id[(level, name)] = n["节点ID"]
 
-    # 从分类的"所属分类"字段构建分类之间的层级边(统一用"属于")
-    for n in category_nodes:
-        level = n.get("节点层级", "")
-        parent_names = n.get("所属分类", [])
-        if parent_names:
-            parent_name = parent_names[-1]  # 取最后一个作为直接父分类
-            parent_id = category_name_to_id.get((level, parent_name))
-            if parent_id:
-                tree_edges.append({
-                    "源节点ID": n["节点ID"],
-                    "目标节点ID": parent_id,
-                    "边类型": "属于"
-                })
-
-    # 添加所有原始边(两端节点都在树中的,排除"包含"边因为与"属于"重复)
+    # 先添加所有原始边(两端节点都在树中的)
     for e in all_edges:
         src_id = e["源节点ID"]
         tgt_id = e["目标节点ID"]
         edge_type = e["边类型"]
-        # 跳过"包含"边(与"属于"是反向关系,保留"属于"即可)
-        if edge_type == "包含":
-            continue
         if src_id in node_ids and tgt_id in node_ids:
             tree_edges.append({
                 "源节点ID": src_id,
@@ -126,6 +109,27 @@ def build_persona_tree():
                 "边详情": e.get("边详情", {})
             })
 
+    # 从分类的"所属分类"字段补充分类之间的层级边(如果不存在)
+    for n in category_nodes:
+        level = n.get("节点层级", "")
+        parent_names = n.get("所属分类", [])
+        if parent_names:
+            parent_name = parent_names[-1]  # 取最后一个作为直接父分类
+            parent_id = category_name_to_id.get((level, parent_name))
+            if parent_id:
+                # 检查是否已存在属于边
+                edge_exists = any(
+                    e["源节点ID"] == n["节点ID"] and e["目标节点ID"] == parent_id
+                    and e["边类型"] == "属于"
+                    for e in tree_edges
+                )
+                if not edge_exists:
+                    tree_edges.append({
+                        "源节点ID": n["节点ID"],
+                        "目标节点ID": parent_id,
+                        "边类型": "属于"
+                    })
+
     # 从标签的"所属分类"字段补充标签->分类的边(如果不存在)
     for n in tag_nodes:
         level = n.get("节点层级", "")
@@ -148,6 +152,31 @@ def build_persona_tree():
                         "边详情": {}
                     })
 
+    # 为分类间的"属于"边生成反向的"包含"边
+    # 这样 父分类→子分类 也有边,查询"包含"时可以找到子分类
+    category_ids = set(n["节点ID"] for n in category_nodes)
+    contain_edges_to_add = []
+    for e in tree_edges:
+        if e["边类型"] == "属于":
+            src_id = e["源节点ID"]
+            tgt_id = e["目标节点ID"]
+            # 只为分类→分类的属于边生成反向包含边
+            if src_id in category_ids and tgt_id in category_ids:
+                # 检查是否已存在包含边
+                edge_exists = any(
+                    ex["源节点ID"] == tgt_id and ex["目标节点ID"] == src_id
+                    and ex["边类型"] == "包含"
+                    for ex in tree_edges
+                )
+                if not edge_exists:
+                    contain_edges_to_add.append({
+                        "源节点ID": tgt_id,
+                        "目标节点ID": src_id,
+                        "边类型": "包含",
+                        "边详情": {"说明": "分类层级关系(属于的反向)"}
+                    })
+    tree_edges.extend(contain_edges_to_add)
+
     # 统计各类型边
     tree_edge_counts = {}
     for e in tree_edges:

+ 28 - 2
script/data_processing/extract_nodes_and_edges.py

@@ -232,6 +232,29 @@ def extract_category_nodes_from_pattern(
     if dimension_key not in pattern_data:
         return nodes
 
+    def collect_sources_recursively(node: Dict) -> List[Dict]:
+        """递归收集节点及其所有子节点的特征来源"""
+        sources = []
+
+        # 收集当前节点的特征
+        if "特征列表" in node:
+            for feature in node["特征列表"]:
+                source = {
+                    "点的名称": feature.get("所属点", ""),
+                    "点的描述": feature.get("点描述", ""),
+                    "帖子ID": feature.get("帖子id", "")
+                }
+                sources.append(source)
+
+        # 递归收集子节点的特征
+        for key, value in node.items():
+            if key in ["特征列表", "_meta", "帖子数", "特征数", "帖子列表"]:
+                continue
+            if isinstance(value, dict):
+                sources.extend(collect_sources_recursively(value))
+
+        return sources
+
     def traverse_node(node: Dict, parent_categories: List[str]):
         """递归遍历节点"""
         for key, value in node.items():
@@ -245,7 +268,7 @@ def extract_category_nodes_from_pattern(
                 # 获取帖子列表
                 post_ids = value.get("帖子列表", [])
 
-                # 构建节点来源(从特征列表中获取)
+                # 构建节点来源(从特征列表中获取,如果没有则递归收集子分类的
                 node_sources = []
                 if "特征列表" in value:
                     for feature in value["特征列表"]:
@@ -255,6 +278,9 @@ def extract_category_nodes_from_pattern(
                             "帖子ID": feature.get("帖子id", "")
                         }
                         node_sources.append(source)
+                else:
+                    # 没有直接特征,递归收集子分类的特征来源
+                    node_sources = collect_sources_recursively(value)
 
                 node_info = {
                     "节点ID": build_node_id(dimension_name, "分类", key),
@@ -262,7 +288,7 @@ def extract_category_nodes_from_pattern(
                     "节点类型": "分类",
                     "节点层级": dimension_name,
                     "所属分类": parent_categories.copy(),
-                    "帖子数": len(post_ids),
+                    "帖子数": len(post_ids) if post_ids else len(set(s.get("帖子ID", "") for s in node_sources if s.get("帖子ID"))),
                     "节点来源": node_sources
                 }
                 nodes.append(node_info)

+ 7 - 7
script/data_processing/filter_how_results.py

@@ -5,7 +5,7 @@ How解构结果过滤脚本
 
 从 how 解构结果中过滤出高质量的匹配结果:
 1. 移除 what解构结果 字段
-2. 只保留相似度 >= 0.6 的匹配结果
+2. 只保留相似度 >= 0.5 的 top1 匹配结果
 3. 保留特征即使其匹配结果为空
 """
 
@@ -23,7 +23,7 @@ sys.path.insert(0, str(project_root))
 from script.data_processing.path_config import PathConfig
 
 
-def filter_match_results(feature_list: List[Dict], threshold: float = 0.6) -> List[Dict]:
+def filter_match_results(feature_list: List[Dict], threshold: float = 0.5) -> List[Dict]:
     """
     过滤特征列表中的匹配结果
 
@@ -65,7 +65,7 @@ def filter_match_results(feature_list: List[Dict], threshold: float = 0.6) -> Li
     return filtered_features
 
 
-def filter_how_steps(how_steps: List[Dict], threshold: float = 0.6) -> List[Dict]:
+def filter_how_steps(how_steps: List[Dict], threshold: float = 0.5) -> List[Dict]:
     """
     过滤 how 步骤列表
 
@@ -88,7 +88,7 @@ def filter_how_steps(how_steps: List[Dict], threshold: float = 0.6) -> List[Dict
     return filtered_steps
 
 
-def filter_point_list(point_list: List[Dict], threshold: float = 0.6) -> List[Dict]:
+def filter_point_list(point_list: List[Dict], threshold: float = 0.5) -> List[Dict]:
     """
     过滤点列表(灵感点/关键点/目的点)
 
@@ -148,7 +148,7 @@ def calculate_statistics(original_point_list: List[Dict], filtered_point_list: L
     }
 
 
-def process_single_file(input_file: Path, output_file: Path, threshold: float = 0.6) -> Dict:
+def process_single_file(input_file: Path, output_file: Path, threshold: float = 0.5) -> Dict:
     """
     处理单个文件
 
@@ -215,8 +215,8 @@ def main():
     parser.add_argument(
         "--threshold",
         type=float,
-        default=0.6,
-        help="相似度阈值(默认 0.6)"
+        default=0.5,
+        help="相似度阈值(默认 0.5)"
     )
 
     args = parser.parse_args()

+ 55 - 94
script/data_processing/visualize_how_results.py

@@ -22,6 +22,31 @@ sys.path.insert(0, str(project_root))
 from script.data_processing.path_config import PathConfig
 
 
+# ============ 相似度阈值配置 ============
+SIMILARITY_THRESHOLD_SAME = 0.8      # >= 此值为"相同"
+SIMILARITY_THRESHOLD_SIMILAR = 0.6  # >= 此值为"相似",< SAME阈值
+# < SIMILAR阈值 为"无关"
+
+# 相似度对应的颜色
+SIMILARITY_COLOR_SAME = "#10b981"     # 绿色
+SIMILARITY_COLOR_SIMILAR = "#f59e0b"  # 橙色
+SIMILARITY_COLOR_UNRELATED = "#9ca3af"  # 灰色
+
+
+def get_similarity_status(similarity: float) -> tuple:
+    """根据相似度返回状态标签和颜色
+
+    Returns:
+        tuple: (label, color, css_class)
+    """
+    if similarity >= SIMILARITY_THRESHOLD_SAME:
+        return ("相同", SIMILARITY_COLOR_SAME, "same")
+    elif similarity >= SIMILARITY_THRESHOLD_SIMILAR:
+        return ("相似", SIMILARITY_COLOR_SIMILAR, "similar")
+    else:
+        return ("无关", SIMILARITY_COLOR_UNRELATED, "unrelated")
+
+
 # 注意:已改用基于相似度的显示方式,不再使用关系类型
 # def get_relation_color(relation: str) -> str:
 #     """根据关系类型返回对应的颜色"""
@@ -223,8 +248,7 @@ def generate_inspiration_detail_html(inspiration_point: Dict, feature_status_map
         features_html_list.append(
             f'<span class="feature-tag {status_class}">'
             f'<span class="feature-status-label">{status_label}</span> '
-            f'{html_module.escape(feature_name)} '
-            f'<span class="feature-weight">({weight})</span>'
+            f'{html_module.escape(feature_name)}'
             f'</span>'
         )
 
@@ -331,15 +355,7 @@ def generate_single_match_html(match: Dict, match_idx: int, post_idx: int, insp_
     explanation = match_result.get("说明", "")
 
     # 根据相似度确定颜色和标签
-    if similarity >= 0.9:
-        color = "#10b981"  # 绿色 - 相同
-        label = "相同"
-    elif similarity >= 0.8:
-        color = "#f59e0b"  # 橙色 - 相似
-        label = "相似"
-    else:
-        color = "#9ca3af"  # 灰色 - 无关
-        label = "无关"
+    label, color, _ = get_similarity_status(similarity)
 
     match_id = f"post-{post_idx}-insp-{insp_idx}-feat-{feature_idx}-match-{match_idx}"
 
@@ -477,39 +493,32 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
         max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results)
 
     # 根据最高相似度确定状态
-    if max_similarity >= 0.9:
-        status = "相同"
-        status_class = "status-same"
-    elif max_similarity >= 0.8:
-        status = "相似"
-        status_class = "status-similar"
-    else:
-        status = "无关"
-        status_class = "status-unrelated"
+    status, _, status_suffix = get_similarity_status(max_similarity)
+    status_class = f"status-{status_suffix}"
 
     found_status_html = f'<span class="feature-match-status {status_class}">{status}</span>'
 
     # 统计相似度分布
-    similarity_ranges = {
-        "相同 (≥0.9)": 0,
-        "相似 (0.8-0.9)": 0,
-        "无关 (<0.8)": 0
-    }
+    same_label = f"相同 (≥{SIMILARITY_THRESHOLD_SAME})"
+    similar_label = f"相似 ({SIMILARITY_THRESHOLD_SIMILAR}-{SIMILARITY_THRESHOLD_SAME})"
+    unrelated_label = f"无关 (<{SIMILARITY_THRESHOLD_SIMILAR})"
+    similarity_ranges = {same_label: 0, similar_label: 0, unrelated_label: 0}
     for match in match_results:
         similarity = match.get("匹配结果", {}).get("相似度", 0)
-        if similarity >= 0.9:
-            similarity_ranges["相同 (≥0.9)"] += 1
-        elif similarity >= 0.8:
-            similarity_ranges["相似 (0.8-0.9)"] += 1
+        status_label, _, _ = get_similarity_status(similarity)
+        if status_label == "相同":
+            similarity_ranges[same_label] += 1
+        elif status_label == "相似":
+            similarity_ranges[similar_label] += 1
         else:
-            similarity_ranges["无关 (<0.8)"] += 1
+            similarity_ranges[unrelated_label] += 1
 
     # 生成统计信息
     stats_items = []
     range_colors = {
-        "相同 (≥0.9)": "#10b981",
-        "相似 (0.8-0.9)": "#f59e0b",
-        "无关 (<0.8)": "#9ca3af"
+        same_label: SIMILARITY_COLOR_SAME,
+        similar_label: SIMILARITY_COLOR_SIMILAR,
+        unrelated_label: SIMILARITY_COLOR_UNRELATED
     }
     for range_name, count in similarity_ranges.items():
         if count > 0:
@@ -561,15 +570,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
         if top_match:
             top_persona_name = top_match.get("人设特征名称", "")
             top_feature_type = top_match.get("特征类型", "")
-            if max_similarity >= 0.9:
-                similarity_label = "相同"
-                similarity_color = "#10b981"
-            elif max_similarity >= 0.8:
-                similarity_label = "相似"
-                similarity_color = "#f59e0b"
-            else:
-                similarity_label = "无关"
-                similarity_color = "#9ca3af"
+            similarity_label, similarity_color, _ = get_similarity_status(max_similarity)
 
             top_match_html = f'''
             <div class="level-top-match">
@@ -584,12 +585,8 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
         level_stats = {"相同": 0, "相似": 0, "无关": 0}
         for _, match in all_level_matches:
             similarity = match.get("匹配结果", {}).get("相似度", 0)
-            if similarity >= 0.9:
-                level_stats["相同"] += 1
-            elif similarity >= 0.8:
-                level_stats["相似"] += 1
-            else:
-                level_stats["无关"] += 1
+            stat_label, _, _ = get_similarity_status(similarity)
+            level_stats[stat_label] += 1
 
         # 生成统计标签
         level_stats_html = ""
@@ -640,15 +637,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
                 tag_persona_name = tag_top_match.get("人设特征名称", "")
                 tag_persona_level = tag_top_match.get("人设特征层级", "")
                 tag_feature_type = tag_top_match.get("特征类型", "")
-                if tag_max_similarity >= 0.9:
-                    tag_color = "#10b981"
-                    tag_label = "相同"
-                elif tag_max_similarity >= 0.8:
-                    tag_color = "#f59e0b"
-                    tag_label = "相似"
-                else:
-                    tag_color = "#9ca3af"
-                    tag_label = "无关"
+                tag_label, tag_color, _ = get_similarity_status(tag_max_similarity)
 
                 # 生成层级-类型标签
                 tag_combined = f"[{tag_persona_level}-{tag_feature_type}]" if tag_persona_level and tag_feature_type else ""
@@ -704,15 +693,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
                 cat_persona_name = cat_top_match.get("人设特征名称", "")
                 cat_persona_level = cat_top_match.get("人设特征层级", "")
                 cat_feature_type = cat_top_match.get("特征类型", "")
-                if cat_max_similarity >= 0.9:
-                    cat_color = "#10b981"
-                    cat_label = "相同"
-                elif cat_max_similarity >= 0.8:
-                    cat_color = "#f59e0b"
-                    cat_label = "相似"
-                else:
-                    cat_color = "#9ca3af"
-                    cat_label = "无关"
+                cat_label, cat_color, _ = get_similarity_status(cat_max_similarity)
 
                 # 生成层级-类型标签
                 cat_combined = f"[{cat_persona_level}-{cat_feature_type}]" if cat_persona_level and cat_feature_type else ""
@@ -766,15 +747,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
         top_persona_name = overall_top_match.get("人设特征名称", "")
         top_feature_type = overall_top_match.get("特征类型", "")
         top_persona_level = overall_top_match.get("人设特征层级", "")
-        if overall_max_similarity >= 0.9:
-            top_color = "#10b981"
-            top_label = "相同"
-        elif overall_max_similarity >= 0.8:
-            top_color = "#f59e0b"
-            top_label = "相似"
-        else:
-            top_color = "#9ca3af"
-            top_label = "无关"
+        top_label, top_color, _ = get_similarity_status(overall_max_similarity)
 
         overall_top_html = f'''
         <div class="overall-top-match">
@@ -790,7 +763,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
         <div class="match-section-header collapsible-header" onclick="toggleFeatureSection(event, '{section_id}')">
             <div class="header-left">
                 <span class="expand-icon" id="{section_id}-icon">▼</span>
-                <h4>{feature_number}. 匹配结果: {html_module.escape(feature_name)} <span class="feature-weight-display">(权重: {feature_weight})</span></h4>
+                <h4>{feature_number}. 匹配结果: {html_module.escape(feature_name)}</h4>
                 {found_status_html}
             </div>
             <div class="match-stats">{stats_html}</div>
@@ -947,12 +920,8 @@ def generate_post_content_html(post_data: Dict, post_idx: int, category_mapping:
                     max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results)
 
                 # 根据最高相似度确定状态
-                if max_similarity >= 0.9:
-                    feature_status_map[feature_name] = "相同"
-                elif max_similarity >= 0.8:
-                    feature_status_map[feature_name] = "相似"
-                else:
-                    feature_status_map[feature_name] = "无关"
+                status_label, _, _ = get_similarity_status(max_similarity)
+                feature_status_map[feature_name] = status_label
 
     # 1. 帖子详情
     post_detail_html = generate_post_detail_html(post_data, post_idx)
@@ -1077,12 +1046,8 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
                         max_similarity = 0.0
                         if match_results:
                             max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results)
-                        if max_similarity >= 0.9:
-                            feature_status_map[feature_name] = "相同"
-                        elif max_similarity >= 0.8:
-                            feature_status_map[feature_name] = "相似"
-                        else:
-                            feature_status_map[feature_name] = "无关"
+                        status_label, _, _ = get_similarity_status(max_similarity)
+                        feature_status_map[feature_name] = status_label
 
         # 生成点类型目录
         point_types = [
@@ -1239,12 +1204,8 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
                         max_similarity = 0.0
                         if match_results:
                             max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results)
-                        if max_similarity >= 0.9:
-                            point_feature_status_map[feature_name] = "相同"
-                        elif max_similarity >= 0.8:
-                            point_feature_status_map[feature_name] = "相似"
-                        else:
-                            point_feature_status_map[feature_name] = "无关"
+                        status_label, _, _ = get_similarity_status(max_similarity)
+                        point_feature_status_map[feature_name] = status_label
 
                 # 生成点的详情HTML,传入特征状态映射和点类型
                 point_detail_html = generate_inspiration_detail_html(point, point_feature_status_map, point_type)

File diff ditekan karena terlalu besar
+ 592 - 128
script/data_processing/visualize_match_graph.py


Beberapa file tidak ditampilkan karena terlalu banyak file yang berubah dalam diff ini