1 тиждень тому · 18695bc29a
--- a/script/data_processing/match_inspiration_features.py
+++ b/script/data_processing/match_inspiration_features.py
@@ -35,6 +35,7 @@ def get_semaphore():
 
				 async def match_single_pair(
			
 
				     feature_name: str,
			
 
				     persona_name: str,
			
 
				+    category_mapping: Dict = None,
			
 
				     model_name: str = None
			
 
				 ) -> Dict:
			
 
				     """
			
@@ -43,12 +44,15 @@ async def match_single_pair(
 
				     Args:
			
 
				         feature_name: 要匹配的特征名称
			
 
				         persona_name: 人设特征名称
			
 
				+        category_mapping: 特征分类映射字典
			
 
				         model_name: 使用的模型名称
			
 
				 
			
 
				     Returns:
			
 
				         单个匹配结果，格式：
			
 
				         {
			
 
				             "人设特征名称": "xxx",
			
 
				+            "特征类型": "标签",
			
 
				+            "特征分类": ["分类1", "分类2"],
			
 
				             "匹配结果": {
			
 
				                 "相似度": 0.75,
			
 
				                 "说明": "..."
			
@@ -63,8 +67,45 @@ async def match_single_pair(
 
				             phrase_b=persona_name,
			
 
				         )
			
 
				 
			
 
				+        # 判断该特征是标签还是分类
			
 
				+        feature_type = "分类"  # 默认为分类
			
 
				+        categories = []
			
 
				+
			
 
				+        if category_mapping:
			
 
				+            # 先在标签特征中查找（灵感点、关键点、目的点）
			
 
				+            is_tag_feature = False
			
 
				+            for ft in ["灵感点", "关键点", "目的点"]:
			
 
				+                if ft in category_mapping:
			
 
				+                    type_mapping = category_mapping[ft]
			
 
				+                    if persona_name in type_mapping:
			
 
				+                        # 找到了，说明是标签特征
			
 
				+                        feature_type = "标签"
			
 
				+                        categories = type_mapping[persona_name].get("所属分类", [])
			
 
				+                        is_tag_feature = True
			
 
				+                        break
			
 
				+
			
 
				+            # 如果不是标签特征，检查是否是分类特征
			
 
				+            if not is_tag_feature:
			
 
				+                # 收集所有分类
			
 
				+                all_categories = set()
			
 
				+                for ft in ["灵感点", "关键点", "目的点"]:
			
 
				+                    if ft in category_mapping:
			
 
				+                        for fname, fdata in category_mapping[ft].items():
			
 
				+                            cats = fdata.get("所属分类", [])
			
 
				+                            all_categories.update(cats)
			
 
				+
			
 
				+                # 如果当前特征名在分类列表中，则是分类特征
			
 
				+                if persona_name in all_categories:
			
 
				+                    feature_type = "分类"
			
 
				+                    categories = []  # 分类特征本身没有所属分类
			
 
				+
			
 
				+        # 去重分类
			
 
				+        unique_categories = list(dict.fromkeys(categories))
			
 
				+
			
 
				         return {
			
 
				             "人设特征名称": persona_name,
			
 
				+            "特征类型": feature_type,
			
 
				+            "特征分类": unique_categories,
			
 
				             "匹配结果": similarity_result
			
 
				         }
			
 
				 
			
@@ -72,6 +113,7 @@ async def match_single_pair(
 
				 async def match_feature_with_persona(
			
 
				     feature_name: str,
			
 
				     persona_features: List[Dict],
			
 
				+    category_mapping: Dict = None,
			
 
				     model_name: str = None
			
 
				 ) -> List[Dict]:
			
 
				     """
			
@@ -80,6 +122,7 @@ async def match_feature_with_persona(
 
				     Args:
			
 
				         feature_name: 要匹配的特征名称
			
 
				         persona_features: 人设特征列表
			
 
				+        category_mapping: 特征分类映射字典
			
 
				         model_name: 使用的模型名称
			
 
				 
			
 
				     Returns:
			
@@ -87,7 +130,7 @@ async def match_feature_with_persona(
 
				     """
			
 
				     # 创建所有匹配任务
			
 
				     tasks = [
			
 
				-        match_single_pair(feature_name, persona_feature["特征名称"], model_name)
			
 
				+        match_single_pair(feature_name, persona_feature["特征名称"], category_mapping, model_name)
			
 
				         for persona_feature in persona_features
			
 
				     ]
			
 
				 
			
@@ -100,6 +143,7 @@ async def match_feature_with_persona(
 
				 async def match_single_feature(
			
 
				     feature_name: str,
			
 
				     persona_features: List[Dict],
			
 
				+    category_mapping: Dict = None,
			
 
				     model_name: str = None
			
 
				 ) -> Dict:
			
 
				     """
			
@@ -108,6 +152,7 @@ async def match_single_feature(
 
				     Args:
			
 
				         feature_name: 特征名称
			
 
				         persona_features: 人设特征列表
			
 
				+        category_mapping: 特征分类映射字典
			
 
				         model_name: 使用的模型名称
			
 
				 
			
 
				     Returns:
			
@@ -117,6 +162,7 @@ async def match_single_feature(
 
				     match_results = await match_feature_with_persona(
			
 
				         feature_name=feature_name,
			
 
				         persona_features=persona_features,
			
 
				+        category_mapping=category_mapping,
			
 
				         model_name=model_name
			
 
				     )
			
 
				 
			
@@ -129,6 +175,7 @@ async def match_single_feature(
 
				 async def process_single_inspiration_point(
			
 
				     inspiration_point: Dict,
			
 
				     persona_features: List[Dict],
			
 
				+    category_mapping: Dict = None,
			
 
				     model_name: str = None
			
 
				 ) -> Dict:
			
 
				     """
			
@@ -137,6 +184,7 @@ async def process_single_inspiration_point(
 
				     Args:
			
 
				         inspiration_point: 灵感点数据
			
 
				         persona_features: 人设灵感特征列表
			
 
				+        category_mapping: 特征分类映射字典
			
 
				         model_name: 使用的模型名称
			
 
				 
			
 
				     Returns:
			
@@ -150,7 +198,7 @@ async def process_single_inspiration_point(
 
				 
			
 
				     # 并发匹配所有特征
			
 
				     tasks = [
			
 
				-        match_single_feature(feature_name, persona_features, model_name)
			
 
				+        match_single_feature(feature_name, persona_features, category_mapping, model_name)
			
 
				         for feature_name in feature_list
			
 
				     ]
			
 
				     feature_match_results = await asyncio.gather(*tasks)
			
@@ -173,6 +221,7 @@ async def process_single_task(
 
				     task_index: int,
			
 
				     total_tasks: int,
			
 
				     persona_inspiration_features: List[Dict],
			
 
				+    category_mapping: Dict = None,
			
 
				     model_name: str = None
			
 
				 ) -> Dict:
			
 
				     """
			
@@ -183,6 +232,7 @@ async def process_single_task(
 
				         task_index: 任务索引（从1开始）
			
 
				         total_tasks: 总任务数
			
 
				         persona_inspiration_features: 人设灵感特征列表
			
 
				+        category_mapping: 特征分类映射字典
			
 
				         model_name: 使用的模型名称
			
 
				 
			
 
				     Returns:
			
@@ -202,6 +252,7 @@ async def process_single_task(
 
				         process_single_inspiration_point(
			
 
				             inspiration_point=inspiration_point,
			
 
				             persona_features=persona_inspiration_features,
			
 
				+            category_mapping=category_mapping,
			
 
				             model_name=model_name
			
 
				         )
			
 
				         for inspiration_point in inspiration_list
			
@@ -223,6 +274,7 @@ async def process_single_task(
 
				 async def process_task_list(
			
 
				     task_list: List[Dict],
			
 
				     persona_features_dict: Dict,
			
 
				+    category_mapping: Dict = None,
			
 
				     model_name: str = None
			
 
				 ) -> List[Dict]:
			
 
				     """
			
@@ -231,13 +283,33 @@ async def process_task_list(
 
				     Args:
			
 
				         task_list: 解构任务列表
			
 
				         persona_features_dict: 人设特征字典（包含灵感点、目的点、关键点）
			
 
				+        category_mapping: 特征分类映射字典
			
 
				         model_name: 使用的模型名称
			
 
				 
			
 
				     Returns:
			
 
				         包含 how 解构结果的任务列表
			
 
				     """
			
 
				+    # 获取标签特征列表
			
 
				     persona_inspiration_features = persona_features_dict.get("灵感点", [])
			
 
				-    print(f"人设灵感特征数量: {len(persona_inspiration_features)}")
			
 
				+    print(f"人设标签特征数量: {len(persona_inspiration_features)}")
			
 
				+
			
 
				+    # 从分类映射中提取所有唯一的分类作为分类特征（仅从灵感点中提取）
			
 
				+    category_features = []
			
 
				+    if category_mapping:
			
 
				+        all_categories = set()
			
 
				+        # 只从灵感点中提取分类
			
 
				+        if "灵感点" in category_mapping:
			
 
				+            for _, feature_data in category_mapping["灵感点"].items():
			
 
				+                categories = feature_data.get("所属分类", [])
			
 
				+                all_categories.update(categories)
			
 
				+
			
 
				+        # 转换为特征格式
			
 
				+        category_features = [{"特征名称": cat} for cat in sorted(all_categories)]
			
 
				+        print(f"人设分类特征数量: {len(category_features)}")
			
 
				+
			
 
				+    # 合并标签特征和分类特征
			
 
				+    all_features = persona_inspiration_features + category_features
			
 
				+    print(f"总特征数量（标签+分类）: {len(all_features)}")
			
 
				 
			
 
				     # 并发处理所有任务
			
 
				     tasks = [
			
@@ -245,7 +317,8 @@ async def process_task_list(
 
				             task=task,
			
 
				             task_index=i,
			
 
				             total_tasks=len(task_list),
			
 
				-            persona_inspiration_features=persona_inspiration_features,
			
 
				+            persona_inspiration_features=all_features,
			
 
				+            category_mapping=category_mapping,
			
 
				             model_name=model_name
			
 
				         )
			
 
				         for i, task in enumerate(task_list, 1)
			
@@ -264,6 +337,7 @@ async def main():
 
				 
			
 
				     task_list_file = data_dir / "当前帖子_解构任务列表.json"
			
 
				     persona_features_file = data_dir / "特征名称_帖子来源.json"
			
 
				+    category_mapping_file = data_dir / "特征名称_分类映射.json"
			
 
				     output_dir = data_dir / "当前帖子_how解构结果"
			
 
				 
			
 
				     # 创建输出目录
			
@@ -277,6 +351,10 @@ async def main():
 
				     with open(persona_features_file, "r", encoding="utf-8") as f:
			
 
				         persona_features_data = json.load(f)
			
 
				 
			
 
				+    print(f"读取特征分类映射: {category_mapping_file}")
			
 
				+    with open(category_mapping_file, "r", encoding="utf-8") as f:
			
 
				+        category_mapping = json.load(f)
			
 
				+
			
 
				     # 获取任务列表
			
 
				     task_list = task_list_data.get("解构任务列表", [])
			
 
				     print(f"\n总任务数: {len(task_list)}")
			
@@ -285,6 +363,7 @@ async def main():
 
				     updated_task_list = await process_task_list(
			
 
				         task_list=task_list,
			
 
				         persona_features_dict=persona_features_data,
			
 
				+        category_mapping=category_mapping,
			
 
				         model_name=None  # 使用默认模型
			
 
				     )
			
 
				 
			
--- a/script/data_processing/visualize_how_results.py
+++ b/script/data_processing/visualize_how_results.py
@@ -302,6 +302,8 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				     matches_html = ""
			
 
				     for i, match in enumerate(sorted_matches):
			
 
				         persona_name = match.get("人设特征名称", "")
			
 
				+        feature_type = match.get("特征类型", "")
			
 
				+        feature_categories = match.get("特征分类", [])
			
 
				         match_result = match.get("匹配结果", {})
			
 
				         similarity = match_result.get("相似度", 0.0)
			
 
				         explanation = match_result.get("说明", "")
			
@@ -319,6 +321,16 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				 
			
 
				         match_id = f"post-{post_idx}-insp-{insp_idx}-feat-{feature_idx}-match-{i}"
			
 
				 
			
 
				+        # 生成特征类型和分类标签
			
 
				+        type_badge_html = ""
			
 
				+        if feature_type:
			
 
				+            type_badge_html = f'<span class="feature-type-badge">{html_module.escape(feature_type)}</span>'
			
 
				+
			
 
				+        categories_badge_html = ""
			
 
				+        if feature_categories:
			
 
				+            categories_text = " / ".join(feature_categories)
			
 
				+            categories_badge_html = f'<span class="feature-category-badge">{html_module.escape(categories_text)}</span>'
			
 
				+
			
 
				         # 获取该人设特征的分类信息
			
 
				         # 需要在三个类型中查找该特征
			
 
				         categories_html = ""
			
@@ -366,6 +378,8 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
				                 <div class="match-header-left">
			
 
				                     <span class="expand-icon" id="{match_id}-icon">▶</span>
			
 
				                     <span class="persona-name">{categories_html} {html_module.escape(persona_name)}</span>
			
 
				+                    {type_badge_html}
			
 
				+                    {categories_badge_html}
			
 
				                     <span class="relation-badge" style="background: {color};">{label}</span>
			
 
				                     <span class="score-badge">相似度: {similarity:.2f}</span>
			
 
				                 </div>
			
@@ -1137,6 +1151,26 @@ def generate_combined_html(posts_data: List[Dict], category_mapping: Dict = None
 
				                 font-weight: 600;
			
 
				             }}
			
 
				 
			
 
				+            .feature-type-badge {{
			
 
				+                padding: 3px 8px;
			
 
				+                border-radius: 10px;
			
 
				+                background: #fef3c7;
			
 
				+                color: #92400e;
			
 
				+                font-size: 10px;
			
 
				+                font-weight: 600;
			
 
				+                border: 1px solid #fcd34d;
			
 
				+            }}
			
 
				+
			
 
				+            .feature-category-badge {{
			
 
				+                padding: 3px 8px;
			
 
				+                border-radius: 10px;
			
 
				+                background: #dbeafe;
			
 
				+                color: #1e40af;
			
 
				+                font-size: 10px;
			
 
				+                font-weight: 500;
			
 
				+                border: 1px solid #93c5fd;
			
 
				+            }}
			
 
				+
			
 
				             .match-content {{
			
 
				                 padding: 16px;
			
 
				                 background: #f9fafb;