Explorar el Código

refactor: 提取相似度阈值为常量配置

- 去掉特征列表的权重展示
- 调整相似度阈值: 相同>=0.8, 相似>=0.6, 无关<0.6
- 新增 get_similarity_status() 函数统一处理阈值判断
- 所有硬编码阈值替换为常量引用,便于维护

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
yangxiaohui hace 5 días
padre
commit
80bd99c744
Se han modificado 1 ficheros con 55 adiciones y 94 borrados
  1. 55 94
      script/data_processing/visualize_how_results.py

+ 55 - 94
script/data_processing/visualize_how_results.py

@@ -22,6 +22,31 @@ sys.path.insert(0, str(project_root))
 from script.data_processing.path_config import PathConfig
 
 
+# ============ 相似度阈值配置 ============
+SIMILARITY_THRESHOLD_SAME = 0.8      # >= 此值为"相同"
+SIMILARITY_THRESHOLD_SIMILAR = 0.6  # >= 此值为"相似",< SAME阈值
+# < SIMILAR阈值 为"无关"
+
+# 相似度对应的颜色
+SIMILARITY_COLOR_SAME = "#10b981"     # 绿色
+SIMILARITY_COLOR_SIMILAR = "#f59e0b"  # 橙色
+SIMILARITY_COLOR_UNRELATED = "#9ca3af"  # 灰色
+
+
+def get_similarity_status(similarity: float) -> tuple:
+    """根据相似度返回状态标签和颜色
+
+    Returns:
+        tuple: (label, color, css_class)
+    """
+    if similarity >= SIMILARITY_THRESHOLD_SAME:
+        return ("相同", SIMILARITY_COLOR_SAME, "same")
+    elif similarity >= SIMILARITY_THRESHOLD_SIMILAR:
+        return ("相似", SIMILARITY_COLOR_SIMILAR, "similar")
+    else:
+        return ("无关", SIMILARITY_COLOR_UNRELATED, "unrelated")
+
+
 # 注意:已改用基于相似度的显示方式,不再使用关系类型
 # def get_relation_color(relation: str) -> str:
 #     """根据关系类型返回对应的颜色"""
@@ -223,8 +248,7 @@ def generate_inspiration_detail_html(inspiration_point: Dict, feature_status_map
         features_html_list.append(
             f'<span class="feature-tag {status_class}">'
             f'<span class="feature-status-label">{status_label}</span> '
-            f'{html_module.escape(feature_name)} '
-            f'<span class="feature-weight">({weight})</span>'
+            f'{html_module.escape(feature_name)}'
             f'</span>'
         )
 
@@ -331,15 +355,7 @@ def generate_single_match_html(match: Dict, match_idx: int, post_idx: int, insp_
     explanation = match_result.get("说明", "")
 
     # 根据相似度确定颜色和标签
-    if similarity >= 0.9:
-        color = "#10b981"  # 绿色 - 相同
-        label = "相同"
-    elif similarity >= 0.8:
-        color = "#f59e0b"  # 橙色 - 相似
-        label = "相似"
-    else:
-        color = "#9ca3af"  # 灰色 - 无关
-        label = "无关"
+    label, color, _ = get_similarity_status(similarity)
 
     match_id = f"post-{post_idx}-insp-{insp_idx}-feat-{feature_idx}-match-{match_idx}"
 
@@ -477,39 +493,32 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
         max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results)
 
     # 根据最高相似度确定状态
-    if max_similarity >= 0.9:
-        status = "相同"
-        status_class = "status-same"
-    elif max_similarity >= 0.8:
-        status = "相似"
-        status_class = "status-similar"
-    else:
-        status = "无关"
-        status_class = "status-unrelated"
+    status, _, status_suffix = get_similarity_status(max_similarity)
+    status_class = f"status-{status_suffix}"
 
     found_status_html = f'<span class="feature-match-status {status_class}">{status}</span>'
 
     # 统计相似度分布
-    similarity_ranges = {
-        "相同 (≥0.9)": 0,
-        "相似 (0.8-0.9)": 0,
-        "无关 (<0.8)": 0
-    }
+    same_label = f"相同 (≥{SIMILARITY_THRESHOLD_SAME})"
+    similar_label = f"相似 ({SIMILARITY_THRESHOLD_SIMILAR}-{SIMILARITY_THRESHOLD_SAME})"
+    unrelated_label = f"无关 (<{SIMILARITY_THRESHOLD_SIMILAR})"
+    similarity_ranges = {same_label: 0, similar_label: 0, unrelated_label: 0}
     for match in match_results:
         similarity = match.get("匹配结果", {}).get("相似度", 0)
-        if similarity >= 0.9:
-            similarity_ranges["相同 (≥0.9)"] += 1
-        elif similarity >= 0.8:
-            similarity_ranges["相似 (0.8-0.9)"] += 1
+        status_label, _, _ = get_similarity_status(similarity)
+        if status_label == "相同":
+            similarity_ranges[same_label] += 1
+        elif status_label == "相似":
+            similarity_ranges[similar_label] += 1
         else:
-            similarity_ranges["无关 (<0.8)"] += 1
+            similarity_ranges[unrelated_label] += 1
 
     # 生成统计信息
     stats_items = []
     range_colors = {
-        "相同 (≥0.9)": "#10b981",
-        "相似 (0.8-0.9)": "#f59e0b",
-        "无关 (<0.8)": "#9ca3af"
+        same_label: SIMILARITY_COLOR_SAME,
+        similar_label: SIMILARITY_COLOR_SIMILAR,
+        unrelated_label: SIMILARITY_COLOR_UNRELATED
     }
     for range_name, count in similarity_ranges.items():
         if count > 0:
@@ -561,15 +570,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
         if top_match:
             top_persona_name = top_match.get("人设特征名称", "")
             top_feature_type = top_match.get("特征类型", "")
-            if max_similarity >= 0.9:
-                similarity_label = "相同"
-                similarity_color = "#10b981"
-            elif max_similarity >= 0.8:
-                similarity_label = "相似"
-                similarity_color = "#f59e0b"
-            else:
-                similarity_label = "无关"
-                similarity_color = "#9ca3af"
+            similarity_label, similarity_color, _ = get_similarity_status(max_similarity)
 
             top_match_html = f'''
             <div class="level-top-match">
@@ -584,12 +585,8 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
         level_stats = {"相同": 0, "相似": 0, "无关": 0}
         for _, match in all_level_matches:
             similarity = match.get("匹配结果", {}).get("相似度", 0)
-            if similarity >= 0.9:
-                level_stats["相同"] += 1
-            elif similarity >= 0.8:
-                level_stats["相似"] += 1
-            else:
-                level_stats["无关"] += 1
+            stat_label, _, _ = get_similarity_status(similarity)
+            level_stats[stat_label] += 1
 
         # 生成统计标签
         level_stats_html = ""
@@ -640,15 +637,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
                 tag_persona_name = tag_top_match.get("人设特征名称", "")
                 tag_persona_level = tag_top_match.get("人设特征层级", "")
                 tag_feature_type = tag_top_match.get("特征类型", "")
-                if tag_max_similarity >= 0.9:
-                    tag_color = "#10b981"
-                    tag_label = "相同"
-                elif tag_max_similarity >= 0.8:
-                    tag_color = "#f59e0b"
-                    tag_label = "相似"
-                else:
-                    tag_color = "#9ca3af"
-                    tag_label = "无关"
+                tag_label, tag_color, _ = get_similarity_status(tag_max_similarity)
 
                 # 生成层级-类型标签
                 tag_combined = f"[{tag_persona_level}-{tag_feature_type}]" if tag_persona_level and tag_feature_type else ""
@@ -704,15 +693,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
                 cat_persona_name = cat_top_match.get("人设特征名称", "")
                 cat_persona_level = cat_top_match.get("人设特征层级", "")
                 cat_feature_type = cat_top_match.get("特征类型", "")
-                if cat_max_similarity >= 0.9:
-                    cat_color = "#10b981"
-                    cat_label = "相同"
-                elif cat_max_similarity >= 0.8:
-                    cat_color = "#f59e0b"
-                    cat_label = "相似"
-                else:
-                    cat_color = "#9ca3af"
-                    cat_label = "无关"
+                cat_label, cat_color, _ = get_similarity_status(cat_max_similarity)
 
                 # 生成层级-类型标签
                 cat_combined = f"[{cat_persona_level}-{cat_feature_type}]" if cat_persona_level and cat_feature_type else ""
@@ -766,15 +747,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
         top_persona_name = overall_top_match.get("人设特征名称", "")
         top_feature_type = overall_top_match.get("特征类型", "")
         top_persona_level = overall_top_match.get("人设特征层级", "")
-        if overall_max_similarity >= 0.9:
-            top_color = "#10b981"
-            top_label = "相同"
-        elif overall_max_similarity >= 0.8:
-            top_color = "#f59e0b"
-            top_label = "相似"
-        else:
-            top_color = "#9ca3af"
-            top_label = "无关"
+        top_label, top_color, _ = get_similarity_status(overall_max_similarity)
 
         overall_top_html = f'''
         <div class="overall-top-match">
@@ -790,7 +763,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
         <div class="match-section-header collapsible-header" onclick="toggleFeatureSection(event, '{section_id}')">
             <div class="header-left">
                 <span class="expand-icon" id="{section_id}-icon">▼</span>
-                <h4>{feature_number}. 匹配结果: {html_module.escape(feature_name)} <span class="feature-weight-display">(权重: {feature_weight})</span></h4>
+                <h4>{feature_number}. 匹配结果: {html_module.escape(feature_name)}</h4>
                 {found_status_html}
             </div>
             <div class="match-stats">{stats_html}</div>
@@ -947,12 +920,8 @@ def generate_post_content_html(post_data: Dict, post_idx: int, category_mapping:
                     max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results)
 
                 # 根据最高相似度确定状态
-                if max_similarity >= 0.9:
-                    feature_status_map[feature_name] = "相同"
-                elif max_similarity >= 0.8:
-                    feature_status_map[feature_name] = "相似"
-                else:
-                    feature_status_map[feature_name] = "无关"
+                status_label, _, _ = get_similarity_status(max_similarity)
+                feature_status_map[feature_name] = status_label
 
     # 1. 帖子详情
     post_detail_html = generate_post_detail_html(post_data, post_idx)
@@ -1077,12 +1046,8 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
                         max_similarity = 0.0
                         if match_results:
                             max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results)
-                        if max_similarity >= 0.9:
-                            feature_status_map[feature_name] = "相同"
-                        elif max_similarity >= 0.8:
-                            feature_status_map[feature_name] = "相似"
-                        else:
-                            feature_status_map[feature_name] = "无关"
+                        status_label, _, _ = get_similarity_status(max_similarity)
+                        feature_status_map[feature_name] = status_label
 
         # 生成点类型目录
         point_types = [
@@ -1239,12 +1204,8 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
                         max_similarity = 0.0
                         if match_results:
                             max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results)
-                        if max_similarity >= 0.9:
-                            point_feature_status_map[feature_name] = "相同"
-                        elif max_similarity >= 0.8:
-                            point_feature_status_map[feature_name] = "相似"
-                        else:
-                            point_feature_status_map[feature_name] = "无关"
+                        status_label, _, _ = get_similarity_status(max_similarity)
+                        point_feature_status_map[feature_name] = status_label
 
                 # 生成点的详情HTML,传入特征状态映射和点类型
                 point_detail_html = generate_inspiration_detail_html(point, point_feature_status_map, point_type)