Bladeren bron

feat: 添加特征权重支持

- extract_current_posts.py: 提取特征时包含权重信息
- match_inspiration_features.py: 适配新的特征结构,在匹配结果中保留权重
- visualize_how_results.py: 在可视化中显示特征权重
- 特征列表从字符串数组改为字典数组,包含"特征名称"和"权重"字段

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
yangxiaohui 1 week geleden
bovenliggende
commit
26267801d3

+ 8 - 4
script/data_processing/extract_current_posts.py

@@ -35,21 +35,25 @@ def get_post_detail(post_id: str) -> Optional[Dict]:
         return None
 
 
-def extract_features_from_point(point_data: Dict) -> List[str]:
+def extract_features_from_point(point_data: Dict) -> List[Dict]:
     """
-    从点数据中提取特征名称列表
+    从点数据中提取特征信息列表(包含名称和权重)
 
     Args:
         point_data: 点的数据(包含"提取的特征"字段)
 
     Returns:
-        特征名称列表
+        特征信息列表,每项包含 {"特征名称": str, "权重": float}
     """
     features = []
     if "提取的特征" in point_data and isinstance(point_data["提取的特征"], list):
         for feature in point_data["提取的特征"]:
             if "特征名称" in feature:
-                features.append(feature["特征名称"])
+                feature_item = {
+                    "特征名称": feature["特征名称"],
+                    "权重": feature.get("权重", 1.0)  # 默认权重为1.0
+                }
+                features.append(feature_item)
     return features
 
 

+ 9 - 5
script/data_processing/match_inspiration_features.py

@@ -141,7 +141,7 @@ async def match_feature_with_persona(
 
 
 async def match_single_feature(
-    feature_name: str,
+    feature_item: Dict,
     persona_features: List[Dict],
     category_mapping: Dict = None,
     model_name: str = None
@@ -150,7 +150,7 @@ async def match_single_feature(
     匹配单个特征与所有人设特征
 
     Args:
-        feature_name: 特征名称
+        feature_item: 特征信息(包含"特征名称"和"权重")
         persona_features: 人设特征列表
         category_mapping: 特征分类映射字典
         model_name: 使用的模型名称
@@ -158,7 +158,10 @@ async def match_single_feature(
     Returns:
         特征匹配结果
     """
-    print(f"    特征: {feature_name}")
+    feature_name = feature_item.get("特征名称", "")
+    feature_weight = feature_item.get("权重", 1.0)
+
+    print(f"    特征: {feature_name} (权重: {feature_weight})")
     match_results = await match_feature_with_persona(
         feature_name=feature_name,
         persona_features=persona_features,
@@ -168,6 +171,7 @@ async def match_single_feature(
 
     return {
         "特征名称": feature_name,
+        "权重": feature_weight,
         "匹配结果": match_results
     }
 
@@ -198,8 +202,8 @@ async def process_single_inspiration_point(
 
     # 并发匹配所有特征
     tasks = [
-        match_single_feature(feature_name, persona_features, category_mapping, model_name)
-        for feature_name in feature_list
+        match_single_feature(feature_item, persona_features, category_mapping, model_name)
+        for feature_item in feature_list
     ]
     feature_match_results = await asyncio.gather(*tasks)
 

+ 3 - 2
script/data_processing/visualize_how_results.py

@@ -189,7 +189,7 @@ def generate_inspiration_detail_html(inspiration_point: Dict) -> str:
     features = inspiration_point.get("特征列表", [])
 
     features_html = "".join([
-        f'<span class="feature-tag">{html_module.escape(f)}</span>'
+        f'<span class="feature-tag">{html_module.escape(f if isinstance(f, str) else f.get("特征名称", ""))} <span class="feature-weight">({f.get("权重", 1.0) if isinstance(f, dict) else 1.0})</span></span>'
         for f in features
     ])
 
@@ -262,6 +262,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
 
     feature_data = features[feature_idx]
     feature_name = feature_data.get("特征名称", "")
+    feature_weight = feature_data.get("权重", 1.0)
     match_results = feature_data.get("匹配结果", [])
 
     if category_mapping is None:
@@ -398,7 +399,7 @@ def generate_match_results_html(how_steps: List[Dict], feature_idx: int, insp_id
         <div class="match-section-header collapsible-header" onclick="toggleFeatureSection('{section_id}')">
             <div class="header-left">
                 <span class="expand-icon" id="{section_id}-icon">▼</span>
-                <h4>匹配结果: {html_module.escape(feature_name)}</h4>
+                <h4>匹配结果: {html_module.escape(feature_name)} <span class="feature-weight-display">(权重: {feature_weight})</span></h4>
             </div>
             <div class="match-stats">{stats_html}</div>
         </div>