hace 5 días · 80bd99c744
--- a/script/data_processing/visualize_how_results.py
+++ b/script/data_processing/visualize_how_results.py
@@ -22,6 +22,31 @@ sys.path.insert(0, str(project_root))
 
				 from script.data_processing.path_config import PathConfig
			
 
				 
			
 
				 
			
 
				+# ============ 相似度阈值配置 ============
			
 
				+SIMILARITY_THRESHOLD_SAME = 0.8      # >= 此值为"相同"
			
 
				+SIMILARITY_THRESHOLD_SIMILAR = 0.6  # >= 此值为"相似"，< SAME阈值
			
 
				+# < SIMILAR阈值 为"无关"
			
 
				+
			
 
				+# 相似度对应的颜色
			
 
				+SIMILARITY_COLOR_SAME = "#10b981"     # 绿色
			
 
				+SIMILARITY_COLOR_SIMILAR = "#f59e0b"  # 橙色
			
 
				+SIMILARITY_COLOR_UNRELATED = "#9ca3af"  # 灰色
			
 
				+
			
 
				+
			
 
				+def get_similarity_status(similarity: float) -> tuple:
			
 
				+    """根据相似度返回状态标签和颜色
			
 
				+
			
 
				+    Returns:
			
 
				+        tuple: (label, color, css_class)
			
 
				+    """
			
 
				+    if similarity >= SIMILARITY_THRESHOLD_SAME:
			
 
				+        return ("相同", SIMILARITY_COLOR_SAME, "same")
			
 
				+    elif similarity >= SIMILARITY_THRESHOLD_SIMILAR:
			
 
				+        return ("相似", SIMILARITY_COLOR_SIMILAR, "similar")
			
 
				+    else:
			
 
				+        return ("无关", SIMILARITY_COLOR_UNRELATED, "unrelated")
			
 
				+
			
 
				+
			
 
				 # 注意：已改用基于相似度的显示方式，不再使用关系类型
			
 
				 # def get_relation_color(relation: str) -> str:
			
 
				 #     """根据关系类型返回对应的颜色"""
			
@@ -223,8 +248,7 @@ def generate_inspiration_detail_html(inspiration_point: Dict, feature_status_map
 
				         features_html_list.append(
			
 
				             f'<span class="feature-tag {status_class}">'
			
 
				             f'<span class="feature-status-label">{status_label}</span> '
			
 
				-            f'{html_module.escape(feature_name)} '
			
 
				-            f'<span class="feature-weight">({weight})</span>'
			
 
				+            f'{html_module.escape(feature_name)}'
			
 
				             f'</span>'
			
 
				         )
			
 
				 
			
@@ -331,15 +355,7 @@ def generate_single_match_html(match: Dict, match_idx: int, post_idx: int, insp_
 
				     explanation = match_result.get("说明", "")
			
 
				 
			
 
				     # 根据相似度确定颜色和标签
			
 
				-    if similarity >= 0.9:
			
 
				-        color = "#10b981"  # 绿色 - 相同
			
 
				-        label = "相同"
			
 
				-    elif similarity >= 0.8:
			
 
				-        color = "#f59e0b"  # 橙色 - 相似
			
 
				-        label = "相似"
			
 
				-    else:
			
 
				-        color = "#9ca3af"  # 灰色 - 无关
			
 
				-        label = "无关"
			
 
				+    label, color, _ = get_similarity_status(similarity)
			
 
				 
			
 
				     match_id = f"post-{post_idx}-insp-{insp_idx}-feat-{feature_idx}-match-{match_idx}"
			
 
				 
			
@@ -477,39 +493,32 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				         max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results)
			
 
				 
			
 
				     # 根据最高相似度确定状态
			
 
				-    if max_similarity >= 0.9:
			
 
				-        status = "相同"
			
 
				-        status_class = "status-same"
			
 
				-    elif max_similarity >= 0.8:
			
 
				-        status = "相似"
			
 
				-        status_class = "status-similar"
			
 
				-    else:
			
 
				-        status = "无关"
			
 
				-        status_class = "status-unrelated"
			
 
				+    status, _, status_suffix = get_similarity_status(max_similarity)
			
 
				+    status_class = f"status-{status_suffix}"
			
 
				 
			
 
				     found_status_html = f'<span class="feature-match-status {status_class}">{status}</span>'
			
 
				 
			
 
				     # 统计相似度分布
			
 
				-    similarity_ranges = {
			
 
				-        "相同 (≥0.9)": 0,
			
 
				-        "相似 (0.8-0.9)": 0,
			
 
				-        "无关 (<0.8)": 0
			
 
				-    }
			
 
				+    same_label = f"相同 (≥{SIMILARITY_THRESHOLD_SAME})"
			
 
				+    similar_label = f"相似 ({SIMILARITY_THRESHOLD_SIMILAR}-{SIMILARITY_THRESHOLD_SAME})"
			
 
				+    unrelated_label = f"无关 (<{SIMILARITY_THRESHOLD_SIMILAR})"
			
 
				+    similarity_ranges = {same_label: 0, similar_label: 0, unrelated_label: 0}
			
 
				     for match in match_results:
			
 
				         similarity = match.get("匹配结果", {}).get("相似度", 0)
			
 
				-        if similarity >= 0.9:
			
 
				-            similarity_ranges["相同 (≥0.9)"] += 1
			
 
				-        elif similarity >= 0.8:
			
 
				-            similarity_ranges["相似 (0.8-0.9)"] += 1
			
 
				+        status_label, _, _ = get_similarity_status(similarity)
			
 
				+        if status_label == "相同":
			
 
				+            similarity_ranges[same_label] += 1
			
 
				+        elif status_label == "相似":
			
 
				+            similarity_ranges[similar_label] += 1
			
 
				         else:
			
 
				-            similarity_ranges["无关 (<0.8)"] += 1
			
 
				+            similarity_ranges[unrelated_label] += 1
			
 
				 
			
 
				     # 生成统计信息
			
 
				     stats_items = []
			
 
				     range_colors = {
			
 
				-        "相同 (≥0.9)": "#10b981",
			
 
				-        "相似 (0.8-0.9)": "#f59e0b",
			
 
				-        "无关 (<0.8)": "#9ca3af"
			
 
				+        same_label: SIMILARITY_COLOR_SAME,
			
 
				+        similar_label: SIMILARITY_COLOR_SIMILAR,
			
 
				+        unrelated_label: SIMILARITY_COLOR_UNRELATED
			
 
				     }
			
 
				     for range_name, count in similarity_ranges.items():
			
 
				         if count > 0:
			
@@ -561,15 +570,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				         if top_match:
			
 
				             top_persona_name = top_match.get("人设特征名称", "")
			
 
				             top_feature_type = top_match.get("特征类型", "")
			
 
				-            if max_similarity >= 0.9:
			
 
				-                similarity_label = "相同"
			
 
				-                similarity_color = "#10b981"
			
 
				-            elif max_similarity >= 0.8:
			
 
				-                similarity_label = "相似"
			
 
				-                similarity_color = "#f59e0b"
			
 
				-            else:
			
 
				-                similarity_label = "无关"
			
 
				-                similarity_color = "#9ca3af"
			
 
				+            similarity_label, similarity_color, _ = get_similarity_status(max_similarity)
			
 
				 
			
 
				             top_match_html = f'''
			
 
				             <div class="level-top-match">
			
@@ -584,12 +585,8 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				         level_stats = {"相同": 0, "相似": 0, "无关": 0}
			
 
				         for _, match in all_level_matches:
			
 
				             similarity = match.get("匹配结果", {}).get("相似度", 0)
			
 
				-            if similarity >= 0.9:
			
 
				-                level_stats["相同"] += 1
			
 
				-            elif similarity >= 0.8:
			
 
				-                level_stats["相似"] += 1
			
 
				-            else:
			
 
				-                level_stats["无关"] += 1
			
 
				+            stat_label, _, _ = get_similarity_status(similarity)
			
 
				+            level_stats[stat_label] += 1
			
 
				 
			
 
				         # 生成统计标签
			
 
				         level_stats_html = ""
			
@@ -640,15 +637,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				                 tag_persona_name = tag_top_match.get("人设特征名称", "")
			
 
				                 tag_persona_level = tag_top_match.get("人设特征层级", "")
			
 
				                 tag_feature_type = tag_top_match.get("特征类型", "")
			
 
				-                if tag_max_similarity >= 0.9:
			
 
				-                    tag_color = "#10b981"
			
 
				-                    tag_label = "相同"
			
 
				-                elif tag_max_similarity >= 0.8:
			
 
				-                    tag_color = "#f59e0b"
			
 
				-                    tag_label = "相似"
			
 
				-                else:
			
 
				-                    tag_color = "#9ca3af"
			
 
				-                    tag_label = "无关"
			
 
				+                tag_label, tag_color, _ = get_similarity_status(tag_max_similarity)
			
 
				 
			
 
				                 # 生成层级-类型标签
			
 
				                 tag_combined = f"[{tag_persona_level}-{tag_feature_type}]" if tag_persona_level and tag_feature_type else ""
			
@@ -704,15 +693,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				                 cat_persona_name = cat_top_match.get("人设特征名称", "")
			
 
				                 cat_persona_level = cat_top_match.get("人设特征层级", "")
			
 
				                 cat_feature_type = cat_top_match.get("特征类型", "")
			
 
				-                if cat_max_similarity >= 0.9:
			
 
				-                    cat_color = "#10b981"
			
 
				-                    cat_label = "相同"
			
 
				-                elif cat_max_similarity >= 0.8:
			
 
				-                    cat_color = "#f59e0b"
			
 
				-                    cat_label = "相似"
			
 
				-                else:
			
 
				-                    cat_color = "#9ca3af"
			
 
				-                    cat_label = "无关"
			
 
				+                cat_label, cat_color, _ = get_similarity_status(cat_max_similarity)
			
 
				 
			
 
				                 # 生成层级-类型标签
			
 
				                 cat_combined = f"[{cat_persona_level}-{cat_feature_type}]" if cat_persona_level and cat_feature_type else ""
			
@@ -766,15 +747,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				         top_persona_name = overall_top_match.get("人设特征名称", "")
			
 
				         top_feature_type = overall_top_match.get("特征类型", "")
			
 
				         top_persona_level = overall_top_match.get("人设特征层级", "")
			
 
				-        if overall_max_similarity >= 0.9:
			
 
				-            top_color = "#10b981"
			
 
				-            top_label = "相同"
			
 
				-        elif overall_max_similarity >= 0.8:
			
 
				-            top_color = "#f59e0b"
			
 
				-            top_label = "相似"
			
 
				-        else:
			
 
				-            top_color = "#9ca3af"
			
 
				-            top_label = "无关"
			
 
				+        top_label, top_color, _ = get_similarity_status(overall_max_similarity)
			
 
				 
			
 
				         overall_top_html = f'''
			
 
				         <div class="overall-top-match">
			
@@ -790,7 +763,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				         <div class="match-section-header collapsible-header" onclick="toggleFeatureSection(event, '{section_id}')">
			
 
				             <div class="header-left">
			
 
				                 <span class="expand-icon" id="{section_id}-icon">▼</span>
			
 
				-                <h4>{feature_number}. 匹配结果: {html_module.escape(feature_name)} <span class="feature-weight-display">(权重: {feature_weight})</span></h4>
			
 
				+                <h4>{feature_number}. 匹配结果: {html_module.escape(feature_name)}</h4>
			
 
				                 {found_status_html}
			
 
				             </div>
			
 
				             <div class="match-stats">{stats_html}</div>
			
@@ -947,12 +920,8 @@ def generate_post_content_html(post_data: Dict, post_idx: int, category_mapping:
 
				                     max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results)
			
 
				 
			
 
				                 # 根据最高相似度确定状态
			
 
				-                if max_similarity >= 0.9:
			
 
				-                    feature_status_map[feature_name] = "相同"
			
 
				-                elif max_similarity >= 0.8:
			
 
				-                    feature_status_map[feature_name] = "相似"
			
 
				-                else:
			
 
				-                    feature_status_map[feature_name] = "无关"
			
 
				+                status_label, _, _ = get_similarity_status(max_similarity)
			
 
				+                feature_status_map[feature_name] = status_label
			
 
				 
			
 
				     # 1. 帖子详情
			
 
				     post_detail_html = generate_post_detail_html(post_data, post_idx)
			
@@ -1077,12 +1046,8 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
 
				                         max_similarity = 0.0
			
 
				                         if match_results:
			
 
				                             max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results)
			
 
				-                        if max_similarity >= 0.9:
			
 
				-                            feature_status_map[feature_name] = "相同"
			
 
				-                        elif max_similarity >= 0.8:
			
 
				-                            feature_status_map[feature_name] = "相似"
			
 
				-                        else:
			
 
				-                            feature_status_map[feature_name] = "无关"
			
 
				+                        status_label, _, _ = get_similarity_status(max_similarity)
			
 
				+                        feature_status_map[feature_name] = status_label
			
 
				 
			
 
				         # 生成点类型目录
			
 
				         point_types = [
			
@@ -1239,12 +1204,8 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
 
				                         max_similarity = 0.0
			
 
				                         if match_results:
			
 
				                             max_similarity = max(match.get("匹配结果", {}).get("相似度", 0) for match in match_results)
			
 
				-                        if max_similarity >= 0.9:
			
 
				-                            point_feature_status_map[feature_name] = "相同"
			
 
				-                        elif max_similarity >= 0.8:
			
 
				-                            point_feature_status_map[feature_name] = "相似"
			
 
				-                        else:
			
 
				-                            point_feature_status_map[feature_name] = "无关"
			
 
				+                        status_label, _, _ = get_similarity_status(max_similarity)
			
 
				+                        point_feature_status_map[feature_name] = status_label
			
 
				 
			
 
				                 # 生成点的详情HTML，传入特征状态映射和点类型
			
 
				                 point_detail_html = generate_inspiration_detail_html(point, point_feature_status_map, point_type)