há 2 semanas atrás · 7241759dd5
--- a/lib/hierarchical_match_analyzer.py
+++ b/lib/hierarchical_match_analyzer.py
@@ -0,0 +1,419 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+分层匹配分析模块
			
 
				+
			
 
				+实现特征组合的分层匹配逻辑：
			
 
				+1. 优先匹配灵感点标签（特征名称）
			
 
				+2. 无标签匹配时，匹配第一层分类
			
 
				+3. 仍无结果时，匹配第二层上位分类
			
 
				+4. 对每个候选进行推理难度打分
			
 
				+"""
			
 
				+
			
 
				+from typing import List, Dict, Optional
			
 
				+from agents import Agent, Runner, ModelSettings
			
 
				+from agents.tracing.create import custom_span
			
 
				+from lib.client import get_model
			
 
				+from lib.utils import parse_json_from_text
			
 
				+
			
 
				+
			
 
				+# ========== System Prompts ==========
			
 
				+
			
 
				+TAG_MATCH_SYSTEM_PROMPT = """
			
 
				+# 任务
			
 
				+判断"当前特征列表"中的特征，是否有与"人设特征标签"在语义上相同或高度接近的。
			
 
				+
			
 
				+## 评分标准
			
 
				+- **相似度 ≥ 80**: 语义相同或高度接近，判定为匹配成功
			
 
				+- **相似度 < 80**: 不够接近，判定为匹配失败
			
 
				+
			
 
				+## 输出格式（严格JSON）
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "匹配成功": true/false,
			
 
				+  "匹配对": [
			
 
				+    {"当前特征": "...", "人设标签": "...", "相似度": 95}
			
 
				+  ],
			
 
				+  "最佳匹配": {"当前特征": "...", "人设标签": "...", "相似度": 95} or null,
			
 
				+  "说明": "匹配结果说明"
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+**要求**：
			
 
				+1. 逐一比较当前特征与人设标签
			
 
				+2. 找到所有相似度≥80的配对
			
 
				+3. 按相似度降序排列匹配对
			
 
				+4. 最佳匹配为相似度最高的配对
			
 
				+""".strip()
			
 
				+
			
 
				+
			
 
				+CATEGORY_MATCH_SYSTEM_PROMPT = """
			
 
				+# 任务
			
 
				+为"当前特征列表"在"候选分类"中找到语义最接近的分类，并评估推理难度。
			
 
				+
			
 
				+## 推理难度评估标准（0-10分）
			
 
				+- **0-2分**: 几乎直接对应，推理非常容易
			
 
				+- **3-4分**: 需要简单推理，难度较低
			
 
				+- **5-6分**: 需要中等程度的推理
			
 
				+- **7-8分**: 需要较复杂的推理
			
 
				+- **9-10分**: 推理非常困难，关联很弱
			
 
				+
			
 
				+## 推理难度得分计算
			
 
				+```
			
 
				+推理难度得分 = (10 - 推理难度) / 10
			
 
				+```
			
 
				+例如：推理难度=3，则得分=(10-3)/10=0.7
			
 
				+
			
 
				+## 输出格式（严格JSON）
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "匹配成功": true/false,
			
 
				+  "最佳分类": "分类名称" or null,
			
 
				+  "推理难度": 3,
			
 
				+  "推理难度得分": 0.7,
			
 
				+  "推理路径": "从该分类如何推理到当前特征的说明",
			
 
				+  "说明": "为什么选择这个分类"
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+**要求**：
			
 
				+1. 判断当前特征整体的主题/领域
			
 
				+2. 在候选分类中找到最符合的分类
			
 
				+3. 评估推理难度（0-10）
			
 
				+4. 计算推理难度得分
			
 
				+5. 只有推理难度得分≥0.5时，判定为匹配成功
			
 
				+""".strip()
			
 
				+
			
 
				+
			
 
				+def create_tag_match_agent(model_name: str) -> Agent:
			
 
				+    """创建标签匹配的Agent"""
			
 
				+    return Agent(
			
 
				+        name="Tag Match Expert",
			
 
				+        instructions=TAG_MATCH_SYSTEM_PROMPT,
			
 
				+        model=get_model(model_name),
			
 
				+        model_settings=ModelSettings(
			
 
				+            temperature=0.0,
			
 
				+            max_tokens=65536,
			
 
				+        ),
			
 
				+        tools=[],
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def create_category_match_agent(model_name: str) -> Agent:
			
 
				+    """创建分类匹配的Agent"""
			
 
				+    return Agent(
			
 
				+        name="Category Match Expert",
			
 
				+        instructions=CATEGORY_MATCH_SYSTEM_PROMPT,
			
 
				+        model=get_model(model_name),
			
 
				+        model_settings=ModelSettings(
			
 
				+            temperature=0.0,
			
 
				+            max_tokens=65536,
			
 
				+        ),
			
 
				+        tools=[],
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+async def match_current_features_to_persona_tags(
			
 
				+    current_features: List[str],
			
 
				+    persona_combination: List[Dict],
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    第一层匹配: 将当前特征列表与人设组合的特征标签进行语义匹配
			
 
				+
			
 
				+    Args:
			
 
				+        current_features: 当前特征列表，如 ["立冬", "教资查分", "时间巧合"]
			
 
				+        persona_combination: 人设组合特征列表，如:
			
 
				+            [
			
 
				+                {"特征名称": "猫孩子", "所属分类": ["宠物亲子化", "宠物情感", "实质"]},
			
 
				+                {"特征名称": "被拿捏住的无奈感", "所属分类": ["宠物关系主导", "宠物情感", "实质"]}
			
 
				+            ]
			
 
				+        model_name: 模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        {
			
 
				+            "匹配成功": bool,
			
 
				+            "匹配的特征": str or None,
			
 
				+            "得分": 1 or 0,
			
 
				+            "详细结果": {...}
			
 
				+        }
			
 
				+    """
			
 
				+    if model_name is None:
			
 
				+        from lib.client import MODEL_NAME
			
 
				+        model_name = MODEL_NAME
			
 
				+
			
 
				+    persona_tags = [f["特征名称"] for f in persona_combination]
			
 
				+
			
 
				+    # 创建Agent
			
 
				+    agent = create_tag_match_agent(model_name)
			
 
				+
			
 
				+    # 构建任务描述
			
 
				+    task_description = f"""## 本次匹配任务
			
 
				+
			
 
				+<当前特征列表>
			
 
				+{', '.join(current_features)}
			
 
				+</当前特征列表>
			
 
				+
			
 
				+<人设特征标签>
			
 
				+{', '.join(persona_tags)}
			
 
				+</人设特征标签>
			
 
				+
			
 
				+请判断当前特征列表中是否有与人设标签语义相同或高度接近的（相似度≥80），输出JSON格式结果。
			
 
				+"""
			
 
				+
			
 
				+    messages = [{
			
 
				+        "role": "user",
			
 
				+        "content": [{"type": "input_text", "text": task_description}]
			
 
				+    }]
			
 
				+
			
 
				+    with custom_span(
			
 
				+        name=f"标签匹配: {current_features[:2]} vs {len(persona_tags)}个标签",
			
 
				+        data={
			
 
				+            "current_features": current_features,
			
 
				+            "persona_tags": persona_tags
			
 
				+        }
			
 
				+    ):
			
 
				+        result = await Runner.run(agent, input=messages)
			
 
				+
			
 
				+    # 解析响应
			
 
				+    parsed_result = parse_json_from_text(result.final_output)
			
 
				+
			
 
				+    if not parsed_result:
			
 
				+        return {
			
 
				+            "匹配成功": False,
			
 
				+            "匹配的特征": None,
			
 
				+            "得分": 0,
			
 
				+            "详细结果": {"说明": "解析失败"}
			
 
				+        }
			
 
				+
			
 
				+    # 转换为标准格式
			
 
				+    if parsed_result.get("匹配成功"):
			
 
				+        best_match = parsed_result.get("最佳匹配", {})
			
 
				+        return {
			
 
				+            "匹配成功": True,
			
 
				+            "匹配的特征": best_match.get("人设标签"),
			
 
				+            "得分": 1,
			
 
				+            "详细结果": parsed_result
			
 
				+        }
			
 
				+    else:
			
 
				+        return {
			
 
				+            "匹配成功": False,
			
 
				+            "匹配的特征": None,
			
 
				+            "得分": 0,
			
 
				+            "详细结果": parsed_result
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+async def match_to_categories(
			
 
				+    current_features: List[str],
			
 
				+    persona_combination: List[Dict],
			
 
				+    layer: str,  # "first" or "second"
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    分类匹配（第一层或第二层）
			
 
				+
			
 
				+    Args:
			
 
				+        current_features: 当前特征列表
			
 
				+        persona_combination: 人设组合特征列表（带分类）
			
 
				+        layer: "first"=第一层分类, "second"=第二层上位分类
			
 
				+        model_name: 模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        {
			
 
				+            "匹配成功": bool,
			
 
				+            "匹配的分类": str or None,
			
 
				+            "推理难度得分": float (0-1),
			
 
				+            "详细结果": {...}
			
 
				+        }
			
 
				+    """
			
 
				+    if model_name is None:
			
 
				+        from lib.client import MODEL_NAME
			
 
				+        model_name = MODEL_NAME
			
 
				+
			
 
				+    # 收集分类
			
 
				+    all_categories = set()
			
 
				+    for feature in persona_combination:
			
 
				+        categories = feature.get("所属分类", [])
			
 
				+
			
 
				+        if layer == "first":
			
 
				+            # 第一层：过滤掉"实质"和"形式"
			
 
				+            filtered_cats = [c for c in categories if c not in ["实质", "形式"]]
			
 
				+            all_categories.update(filtered_cats)
			
 
				+        elif layer == "second":
			
 
				+            # 第二层：只保留"实质"和"形式"
			
 
				+            generic_cats = [c for c in categories if c in ["实质", "形式"]]
			
 
				+            all_categories.update(generic_cats)
			
 
				+
			
 
				+    if not all_categories:
			
 
				+        # 如果没有可用分类
			
 
				+        if layer == "first":
			
 
				+            # 降级使用所有分类
			
 
				+            for feature in persona_combination:
			
 
				+                all_categories.update(feature.get("所属分类", []))
			
 
				+        else:
			
 
				+            # 第二层没有分类，返回失败
			
 
				+            return {
			
 
				+                "匹配成功": False,
			
 
				+                "匹配的分类": None,
			
 
				+                "推理难度得分": 0,
			
 
				+                "详细结果": {"说明": "没有可用的上位分类"}
			
 
				+            }
			
 
				+
			
 
				+    categories_list = list(all_categories)
			
 
				+    persona_tags = [f["特征名称"] for f in persona_combination]
			
 
				+
			
 
				+    # 创建Agent
			
 
				+    agent = create_category_match_agent(model_name)
			
 
				+
			
 
				+    # 构建任务描述
			
 
				+    layer_desc = "第一层分类（具体领域分类）" if layer == "first" else "第二层上位分类（实质/形式）"
			
 
				+
			
 
				+    task_description = f"""## 本次匹配任务 - {layer_desc}
			
 
				+
			
 
				+<当前特征列表>
			
 
				+{', '.join(current_features)}
			
 
				+</当前特征列表>
			
 
				+
			
 
				+<候选分类>
			
 
				+{', '.join(categories_list)}
			
 
				+</候选分类>
			
 
				+
			
 
				+<人设组合特征>
			
 
				+{', '.join(persona_tags)}
			
 
				+</人设组合特征>
			
 
				+
			
 
				+请为当前特征列表在候选分类中找到最接近的分类，并评估推理难度（0-10），输出JSON格式结果。
			
 
				+"""
			
 
				+
			
 
				+    messages = [{
			
 
				+        "role": "user",
			
 
				+        "content": [{"type": "input_text", "text": task_description}]
			
 
				+    }]
			
 
				+
			
 
				+    layer_name = "第一层分类" if layer == "first" else "第二层上位分类"
			
 
				+    with custom_span(
			
 
				+        name=f"{layer_name}匹配: {current_features[:2]} vs {len(categories_list)}个分类",
			
 
				+        data={
			
 
				+            "current_features": current_features,
			
 
				+            "categories": categories_list,
			
 
				+            "layer": layer
			
 
				+        }
			
 
				+    ):
			
 
				+        result = await Runner.run(agent, input=messages)
			
 
				+
			
 
				+    # 解析响应
			
 
				+    parsed_result = parse_json_from_text(result.final_output)
			
 
				+
			
 
				+    if not parsed_result:
			
 
				+        return {
			
 
				+            "匹配成功": False,
			
 
				+            "匹配的分类": None,
			
 
				+            "推理难度得分": 0,
			
 
				+            "详细结果": {"说明": "解析失败"}
			
 
				+        }
			
 
				+
			
 
				+    return {
			
 
				+        "匹配成功": parsed_result.get("匹配成功", False),
			
 
				+        "匹配的分类": parsed_result.get("最佳分类"),
			
 
				+        "推理难度得分": parsed_result.get("推理难度得分", 0),
			
 
				+        "详细结果": parsed_result
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+async def hierarchical_match(
			
 
				+    current_features: List[str],
			
 
				+    persona_combination: List[Dict],
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    分层匹配主函数
			
 
				+
			
 
				+    依次尝试:
			
 
				+    1. 标签匹配（特征名称）
			
 
				+    2. 第一层分类匹配
			
 
				+    3. 第二层上位分类匹配
			
 
				+
			
 
				+    Args:
			
 
				+        current_features: 当前特征列表
			
 
				+        persona_combination: 人设组合特征列表（带分类）
			
 
				+        model_name: 模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        {
			
 
				+            "最终得分": float,  // 0-1
			
 
				+            "匹配层级": "标签匹配" | "第一层分类匹配" | "第二层上位分类匹配" | "无匹配",
			
 
				+            "匹配结果": str,  // 匹配到的标签/分类名称
			
 
				+            "分层结果": {
			
 
				+                "标签匹配": {...},
			
 
				+                "第一层分类匹配": {...},
			
 
				+                "第二层上位分类匹配": {...}
			
 
				+            },
			
 
				+            "综合说明": str
			
 
				+        }
			
 
				+    """
			
 
				+    # 第一层: 标签匹配
			
 
				+    tag_match = await match_current_features_to_persona_tags(
			
 
				+        current_features, persona_combination, model_name
			
 
				+    )
			
 
				+
			
 
				+    if tag_match["匹配成功"]:
			
 
				+        return {
			
 
				+            "最终得分": 1.0,
			
 
				+            "匹配层级": "标签匹配",
			
 
				+            "匹配结果": tag_match["匹配的特征"],
			
 
				+            "分层结果": {
			
 
				+                "标签匹配": tag_match
			
 
				+            },
			
 
				+            "综合说明": f"在标签层级找到完全匹配: {tag_match['匹配的特征']}"
			
 
				+        }
			
 
				+
			
 
				+    # 第二层: 第一层分类匹配
			
 
				+    first_cat_match = await match_to_categories(
			
 
				+        current_features, persona_combination, "first", model_name
			
 
				+    )
			
 
				+
			
 
				+    if first_cat_match["匹配成功"] and first_cat_match["推理难度得分"] >= 0.5:
			
 
				+        return {
			
 
				+            "最终得分": first_cat_match["推理难度得分"],
			
 
				+            "匹配层级": "第一层分类匹配",
			
 
				+            "匹配结果": first_cat_match["匹配的分类"],
			
 
				+            "分层结果": {
			
 
				+                "标签匹配": tag_match,
			
 
				+                "第一层分类匹配": first_cat_match
			
 
				+            },
			
 
				+            "综合说明": f"在第一层分类找到匹配: {first_cat_match['匹配的分类']}, 推理难度得分: {first_cat_match['推理难度得分']:.2f}"
			
 
				+        }
			
 
				+
			
 
				+    # 第三层: 第二层上位分类匹配
			
 
				+    second_cat_match = await match_to_categories(
			
 
				+        current_features, persona_combination, "second", model_name
			
 
				+    )
			
 
				+
			
 
				+    if second_cat_match["匹配成功"]:
			
 
				+        return {
			
 
				+            "最终得分": second_cat_match["推理难度得分"],
			
 
				+            "匹配层级": "第二层上位分类匹配",
			
 
				+            "匹配结果": second_cat_match["匹配的分类"],
			
 
				+            "分层结果": {
			
 
				+                "标签匹配": tag_match,
			
 
				+                "第一层分类匹配": first_cat_match,
			
 
				+                "第二层上位分类匹配": second_cat_match
			
 
				+            },
			
 
				+            "综合说明": f"在第二层上位分类找到匹配: {second_cat_match['匹配的分类']}, 推理难度得分: {second_cat_match['推理难度得分']:.2f}"
			
 
				+        }
			
 
				+
			
 
				+    # 无匹配
			
 
				+    return {
			
 
				+        "最终得分": 0,
			
 
				+        "匹配层级": "无匹配",
			
 
				+        "匹配结果": None,
			
 
				+        "分层结果": {
			
 
				+            "标签匹配": tag_match,
			
 
				+            "第一层分类匹配": first_cat_match,
			
 
				+            "第二层上位分类匹配": second_cat_match
			
 
				+        },
			
 
				+        "综合说明": "在所有层级都未找到合适的匹配"
			
 
				+    }
			
--- a/lib/unified_match_analyzer.py
+++ b/lib/unified_match_analyzer.py
@@ -0,0 +1,294 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+统一匹配分析模块 (v4 - 优化版)
			
 
				+
			
 
				+使用单个prompt同时完成标签匹配和分类匹配，一步到位。
			
 
				+输出格式：当前标签列表中每个标签的匹配结果。
			
 
				+"""
			
 
				+
			
 
				+from typing import List, Dict, Optional
			
 
				+from agents import Agent, Runner, ModelSettings
			
 
				+from agents.tracing.create import custom_span
			
 
				+from lib.client import get_model
			
 
				+from lib.utils import parse_json_from_text
			
 
				+
			
 
				+
			
 
				+# ========== System Prompt ==========
			
 
				+
			
 
				+UNIFIED_MATCH_SYSTEM_PROMPT = """
			
 
				+# 任务
			
 
				+对"当前标签列表"中的每个标签，与"人设标签组合"进行综合匹配分析。
			
 
				+
			
 
				+## 输入说明
			
 
				+- **当前标签列表**: 需要匹配的标签列表
			
 
				+- **人设标签组合**: 包含标签名称及其分类的组合
			
 
				+  - 每个标签有：标签名称、所属分类(多层级，从具体到抽象)
			
 
				+  - 分类是树状结构，按数组顺序从具体到抽象排列
			
 
				+
			
 
				+## 匹配策略
			
 
				+
			
 
				+对当前标签列表中的**每个标签**：
			
 
				+
			
 
				+**重要约束 - 分类排他性**：
			
 
				+- 如果某个人设标签已经被标签匹配，则该标签的所有所属分类都不能再被其他当前标签使用
			
 
				+
			
 
				+**匹配优先级和提前终止**：
			
 
				+1. 优先进行标签匹配，如果匹配成功则立即停止，不再进行分类匹配
			
 
				+2. 如果标签匹配失败，则进行分类匹配
			
 
				+3. 分类匹配按层级从下到上（从具体到抽象），一旦某层匹配成功则立即停止，不再检查更抽象的层级
			
 
				+
			
 
				+### 1. 标签匹配（同义关系）
			
 
				+- **逐个判断**每个人设标签
			
 
				+- **核心判断**: "A 和 B 是同一个东西吗？是同义词吗？"
			
 
				+- **输出**: 是否匹配(true/false)
			
 
				+- **严格要求**: 必须是同义词或几乎相同的表述才能匹配
			
 
				+- **如果匹配成功**: 立即返回结果，不再进行分类匹配
			
 
				+
			
 
				+### 2. 分类匹配（从属关系）
			
 
				+- **仅在标签匹配全部失败时进行**
			
 
				+- **按层级从下到上**遍历分类（从具体到抽象）
			
 
				+- **每层判断所有分类**
			
 
				+- **核心判断**: "当前标签 本身就是 {分类} 的一种吗？"
			
 
				+- **输出**:
			
 
				+  - 该层候选分类：列出该层所有分类名称
			
 
				+  - 该层匹配结果：对该层每个分类逐个判断，输出分类名称、从属关系判断、是否有从属关系、相似度分析、语义相似度
			
 
				+- **严格要求**: 必须是直接从属关系，不能是间接关系或关联关系
			
 
				+- **禁止**:
			
 
				+  - ✗ "A 可能会有 B"（间接推理）
			
 
				+  - ✗ "A 与 B 有关"（关联不等于从属）
			
 
				+- **语义相似度计算规则**:
			
 
				+  - **重要**：语义相似度和从属关系是两个完全独立的维度！
			
 
				+    * 从属关系判断："A 本身就是 B 的一种吗？"（层级关系）
			
 
				+    * 语义相似度："A 和 B 这两个词本身像吗？"（词义距离）
			
 
				+  - **核心原则**：计算语义相似度时，**完全不考虑**从属关系的判断结果
			
 
				+  - **判断方法**：想象你不知道这两个词之间有任何关系，只是单独看这两个词的字面含义，它们像吗？
			
 
				+  - **禁止思路**：不要因为"A 是 B 的一种"就给高相似度
			
 
				+  - 计算标准：
			
 
				+    * 两个词几乎是同义词：0.8-1.0
			
 
				+    * 两个词意思比较接近：0.5-0.7
			
 
				+    * 两个词意思差距较大：0.2-0.4
			
 
				+    * 两个词意思完全不同：0.0-0.1
			
 
				+  - **相似度分析**：说明两个词本身的字面含义有多相似（30字以内），不要提及从属关系
			
 
				+- **如果某层匹配成功**: 立即返回该层的匹配结果，不再检查更抽象的层级
			
 
				+
			
 
				+## 输出格式 (严格JSON数组)
			
 
				+
			
 
				+```json
			
 
				+[
			
 
				+  {
			
 
				+    "当前标签": "<标签名称>",
			
 
				+    "匹配过程": {
			
 
				+      "标签匹配": [
			
 
				+        {
			
 
				+          "人设标签": "<标签名称>",
			
 
				+          "是否匹配": <true|false>
			
 
				+        }
			
 
				+      ],
			
 
				+      "分类匹配_按层级": [
			
 
				+        {
			
 
				+          "该层候选分类": ["<分类1>", "<分类2>", "..."],
			
 
				+          "该层匹配结果": [
			
 
				+            {
			
 
				+              "分类名称": "<分类1>",
			
 
				+              "从属关系判断": "<判断过程和理由>",
			
 
				+              "是否有从属关系": <true|false>,
			
 
				+              "相似度分析": "<两个词本身的相似度分析>",
			
 
				+              "语义相似度": <0到1之间的数值>
			
 
				+            },
			
 
				+            {
			
 
				+              "分类名称": "<分类2>",
			
 
				+              "从属关系判断": "<判断过程和理由>",
			
 
				+              "是否有从属关系": <true|false>,
			
 
				+              "相似度分析": "<两个词本身的相似度分析>",
			
 
				+              "语义相似度": <0到1之间的数值>
			
 
				+            }
			
 
				+          ]
			
 
				+        }
			
 
				+      ]
			
 
				+    },
			
 
				+    "匹配结果": {
			
 
				+      "匹配类型": "<标签匹配|分类匹配|无匹配>",
			
 
				+      "匹配到": "<标签或分类名称，无匹配时为null>",
			
 
				+      "语义相似度": <0到1之间的数值>
			
 
				+    }
			
 
				+  }
			
 
				+]
			
 
				+```
			
 
				+
			
 
				+## 要求
			
 
				+1. **数组长度必须等于当前标签列表的长度**
			
 
				+2. **标签匹配**: 对人设组合中每个标签都要输出判断结果(true/false)
			
 
				+3. **提前终止**:
			
 
				+   - 如果标签匹配成功，则"分类匹配_按层级"为空数组[]，不进行分类匹配
			
 
				+   - 如果标签匹配失败，进行分类匹配：
			
 
				+     * 从第一层开始逐层判断，每层都输出到"分类匹配_按层级"数组
			
 
				+     * 每层的"该层匹配结果"数组长度必须等于"该层候选分类"数组长度，每个分类都要判断
			
 
				+     * 一旦某层有匹配成功的分类(是否有从属关系=true)，该层之后的层级不再输出
			
 
				+     * 例如：第2层匹配成功，则数组长度=2（包含第1层和第2层）
			
 
				+4. **匹配结果**:
			
 
				+   - 标签匹配成功时：匹配类型="标签匹配"，语义相似度=1.0
			
 
				+   - 分类匹配成功时：匹配类型="分类匹配"，语义相似度为该分类的语义相似度
			
 
				+   - 都不成功时：匹配类型="无匹配"，语义相似度=0
			
 
				+5. **严格遵守分类排他性约束**
			
 
				+""".strip()
			
 
				+
			
 
				+
			
 
				+def create_unified_match_agent(model_name: str) -> Agent:
			
 
				+    """创建统一匹配的Agent"""
			
 
				+    return Agent(
			
 
				+        name="Unified Match Expert",
			
 
				+        instructions=UNIFIED_MATCH_SYSTEM_PROMPT,
			
 
				+        model=get_model(model_name),
			
 
				+        model_settings=ModelSettings(
			
 
				+            temperature=0.0,
			
 
				+            max_tokens=65536,
			
 
				+        ),
			
 
				+        tools=[],
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+async def unified_match(
			
 
				+    current_tags: List[str],
			
 
				+    persona_combination: List[Dict],
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> List[Dict]:
			
 
				+    """
			
 
				+    统一匹配函数 - 一次调用完成所有层级的匹配
			
 
				+
			
 
				+    返回当前标签列表中每个标签的匹配结果
			
 
				+
			
 
				+    Args:
			
 
				+        current_tags: 当前标签列表，如 ["立冬", "教资查分", "时间巧合"]
			
 
				+        persona_combination: 人设标签组合（带分类），如:
			
 
				+            [
			
 
				+                {"标签名称": "猫孩子", "所属分类": ["宠物亲子化", "宠物情感", "实质"]},
			
 
				+                {"标签名称": "被拿捏住的无奈感", "所属分类": ["宠物关系主导", "宠物情感", "实质"]}
			
 
				+            ]
			
 
				+        model_name: 模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        List[Dict]: 每个当前标签的匹配结果
			
 
				+        [
			
 
				+            {
			
 
				+                "当前标签": "立冬",
			
 
				+                "最终得分": 0.7,
			
 
				+                "匹配层级": "第一层分类匹配",
			
 
				+                "匹配到": "节气习俗",
			
 
				+                "匹配详情": {...},
			
 
				+                "综合说明": "..."
			
 
				+            },
			
 
				+            ...
			
 
				+        ]
			
 
				+    """
			
 
				+    if model_name is None:
			
 
				+        from lib.client import MODEL_NAME
			
 
				+        model_name = MODEL_NAME
			
 
				+
			
 
				+    # 提取人设标签和分类信息
			
 
				+    persona_tags = [f.get("特征名称", f.get("标签名称")) for f in persona_combination]
			
 
				+
			
 
				+    # 收集所有分类
			
 
				+    all_categories = set()
			
 
				+    for feature in persona_combination:
			
 
				+        categories = feature.get("所属分类", [])
			
 
				+        all_categories.update(categories)
			
 
				+
			
 
				+    # 创建Agent
			
 
				+    agent = create_unified_match_agent(model_name)
			
 
				+
			
 
				+    # 构建任务描述
			
 
				+    task_description = f"""## 本次匹配任务
			
 
				+
			
 
				+<当前标签列表>
			
 
				+{', '.join(current_tags)}
			
 
				+</当前标签列表>
			
 
				+
			
 
				+<人设标签组合>
			
 
				+{persona_combination}
			
 
				+</人设标签组合>
			
 
				+
			
 
				+**重要提醒**：
			
 
				+1. **标签匹配**: 对人设组合中每个"特征名称"逐个判断是否与当前标签同义(true/false)
			
 
				+2. **提前终止机制**:
			
 
				+   - 如果标签匹配成功，立即停止，"分类匹配_按层级"输出空数组[]
			
 
				+   - 如果标签匹配失败，进行分类匹配
			
 
				+3. **分类匹配**: 按层级（从具体到抽象）逐层判断
			
 
				+   - 分类在"所属分类"数组中的顺序就是从具体到抽象
			
 
				+   - 从第一层开始，判断该层所有分类
			
 
				+   - 在"分类匹配_按层级"数组中，按顺序输出每一层的判断结果
			
 
				+   - **重要**：每层的"该层匹配结果"必须对"该层候选分类"中的每个分类逐一判断
			
 
				+   - 一旦某层有匹配成功的分类(是否有从属关系=true)，该层后面不再输出更多层级
			
 
				+   - 示例：如果第2层匹配成功，则只输出第1层和第2层，不输出第3层及以后
			
 
				+4. **语义相似度（核心规则）**：
			
 
				+   - ⚠️ **严格要求**：语义相似度和从属关系是**完全独立**的两个维度！
			
 
				+   - 从属关系看层级：判断"A 是不是 B 的一种"
			
 
				+   - 语义相似度看词义：判断"A 和 B 这两个词本身像不像"
			
 
				+   - **禁止**：不要因为"是一种"就给高相似度！
			
 
				+5. **匹配结果**:
			
 
				+   - 标签匹配成功：匹配类型="标签匹配"，语义相似度=1.0
			
 
				+   - 分类匹配成功：匹配类型="分类匹配"，语义相似度为该分类的语义相似度
			
 
				+   - 都不成功：匹配类型="无匹配"，语义相似度=0
			
 
				+
			
 
				+请对当前标签列表中的**每个标签**（共{len(current_tags)}个）进行匹配评估。
			
 
				+输出JSON数组，长度必须等于{len(current_tags)}，顺序与当前标签列表一一对应。
			
 
				+"""
			
 
				+
			
 
				+    messages = [{
			
 
				+        "role": "user",
			
 
				+        "content": [{"type": "input_text", "text": task_description}]
			
 
				+    }]
			
 
				+
			
 
				+    with custom_span(
			
 
				+        name=f"统一匹配: 当前{len(current_tags)}个标签 vs 人设组合{persona_tags}",
			
 
				+        data={
			
 
				+            "当前标签列表": current_tags,
			
 
				+            "人设标签": persona_tags,
			
 
				+            "可用分类": list(all_categories)
			
 
				+        }
			
 
				+    ):
			
 
				+        result = await Runner.run(agent, input=messages)
			
 
				+
			
 
				+    # 解析响应
			
 
				+    parsed_result = parse_json_from_text(result.final_output)
			
 
				+
			
 
				+    if not parsed_result:
			
 
				+        # 解析失败，返回默认结果
			
 
				+        print("警告: JSON解析失败，返回默认结果")
			
 
				+        return [
			
 
				+            {
			
 
				+                "当前标签": tag,
			
 
				+                "匹配过程": {
			
 
				+                    "标签匹配": [],
			
 
				+                    "分类匹配_按层级": []
			
 
				+                },
			
 
				+                "匹配结果": {
			
 
				+                    "匹配类型": "无匹配",
			
 
				+                    "匹配到": None,
			
 
				+                    "语义相似度": 0
			
 
				+                }
			
 
				+            }
			
 
				+            for tag in current_tags
			
 
				+        ]
			
 
				+
			
 
				+    # 确保返回的是列表
			
 
				+    if not isinstance(parsed_result, list):
			
 
				+        print(f"警告: 返回结果不是列表，转换中: {type(parsed_result)}")
			
 
				+        parsed_result = [parsed_result]
			
 
				+
			
 
				+    # 验证结果数量
			
 
				+    if len(parsed_result) != len(current_tags):
			
 
				+        print(f"警告: 返回结果数量({len(parsed_result)})与当前标签数量({len(current_tags)})不匹配")
			
 
				+        # 补齐或截断
			
 
				+        while len(parsed_result) < len(current_tags):
			
 
				+            parsed_result.append({
			
 
				+                "当前标签": current_tags[len(parsed_result)],
			
 
				+                "最终得分": 0,
			
 
				+                "匹配层级": "无匹配",
			
 
				+                "匹配到": None,
			
 
				+                "匹配详情": {},
			
 
				+                "综合说明": "结果数量不匹配，自动补齐"
			
 
				+            })
			
 
				+        parsed_result = parsed_result[:len(current_tags)]
			
 
				+
			
 
				+    return parsed_result
			
--- a/script/data_processing/extract_feature_combinations_from_posts.py
+++ b/script/data_processing/extract_feature_combinations_from_posts.py
@@ -0,0 +1,384 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+从过去帖子_what解构结果目录中提取特征组合及其来源信息
			
 
				+特征组合格式: ['特征名称1', '特征名称2', ...]
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Optional
			
 
				+import re
			
 
				+import sys
			
 
				+
			
 
				+# 添加项目根目录到路径
			
 
				+project_root = Path(__file__).parent.parent.parent
			
 
				+sys.path.insert(0, str(project_root))
			
 
				+
			
 
				+from script.detail import get_xiaohongshu_detail
			
 
				+
			
 
				+
			
 
				+def extract_post_id_from_filename(filename: str) -> str:
			
 
				+    """从文件名中提取帖子ID"""
			
 
				+    match = re.match(r'^([^_]+)_', filename)
			
 
				+    if match:
			
 
				+        return match.group(1)
			
 
				+    return ""
			
 
				+
			
 
				+
			
 
				+def get_post_detail(post_id: str) -> Optional[Dict]:
			
 
				+    """
			
 
				+    获取帖子详情
			
 
				+
			
 
				+    Args:
			
 
				+        post_id: 帖子ID
			
 
				+
			
 
				+    Returns:
			
 
				+        帖子详情字典，如果获取失败则返回None
			
 
				+    """
			
 
				+    try:
			
 
				+        detail = get_xiaohongshu_detail(post_id)
			
 
				+        return detail
			
 
				+    except Exception as e:
			
 
				+        print(f"  警告: 获取帖子 {post_id} 详情失败: {e}")
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def extract_feature_combination_from_point(point_data: Dict, post_id: str, point_name: str, point_description: str) -> Optional[Dict]:
			
 
				+    """
			
 
				+    从单个点（灵感点/目的点/关键点）中提取特征组合信息
			
 
				+
			
 
				+    Args:
			
 
				+        point_data: 点的数据
			
 
				+        post_id: 帖子ID
			
 
				+        point_name: 点的名称
			
 
				+        point_description: 点的描述
			
 
				+
			
 
				+    Returns:
			
 
				+        特征组合字典，如果没有特征则返回None
			
 
				+    """
			
 
				+    # 检查是否有"提取的特征"字段
			
 
				+    if "提取的特征" not in point_data or not isinstance(point_data["提取的特征"], list):
			
 
				+        return None
			
 
				+
			
 
				+    features = point_data["提取的特征"]
			
 
				+    if not features:
			
 
				+        return None
			
 
				+
			
 
				+    # 提取所有特征名称，组成特征组合
			
 
				+    feature_names = [f["特征名称"] for f in features if "特征名称" in f]
			
 
				+
			
 
				+    if not feature_names:
			
 
				+        return None
			
 
				+
			
 
				+    return {
			
 
				+        "特征组合": feature_names,
			
 
				+        "点的名称": point_name,
			
 
				+        "点的描述": point_description,
			
 
				+        "帖子id": post_id
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def process_single_file(file_path: Path) -> Dict[str, List[Dict]]:
			
 
				+    """
			
 
				+    处理单个JSON文件，提取所有特征组合信息
			
 
				+
			
 
				+    Args:
			
 
				+        file_path: JSON文件路径
			
 
				+
			
 
				+    Returns:
			
 
				+        包含灵感点、目的点、关键点的特征组合列表字典
			
 
				+    """
			
 
				+    result = {
			
 
				+        "灵感点": [],
			
 
				+        "目的点": [],
			
 
				+        "关键点": []
			
 
				+    }
			
 
				+
			
 
				+    # 从文件名提取帖子ID
			
 
				+    post_id = extract_post_id_from_filename(file_path.name)
			
 
				+
			
 
				+    try:
			
 
				+        with open(file_path, "r", encoding="utf-8") as f:
			
 
				+            data = json.load(f)
			
 
				+
			
 
				+        # 提取三点解构数据
			
 
				+        if "三点解构" not in data:
			
 
				+            return result
			
 
				+
			
 
				+        three_points = data["三点解构"]
			
 
				+
			
 
				+        # 处理灵感点
			
 
				+        if "灵感点" in three_points:
			
 
				+            inspiration = three_points["灵感点"]
			
 
				+
			
 
				+            # 处理全新内容
			
 
				+            if "全新内容" in inspiration and isinstance(inspiration["全新内容"], list):
			
 
				+                for item in inspiration["全新内容"]:
			
 
				+                    point_name = item.get("灵感点", "")
			
 
				+                    point_desc = item.get("描述", "")
			
 
				+                    feature_combo = extract_feature_combination_from_point(item, post_id, point_name, point_desc)
			
 
				+                    if feature_combo:
			
 
				+                        result["灵感点"].append(feature_combo)
			
 
				+
			
 
				+            # 处理共性差异
			
 
				+            if "共性差异" in inspiration and isinstance(inspiration["共性差异"], list):
			
 
				+                for item in inspiration["共性差异"]:
			
 
				+                    point_name = item.get("灵感点", "")
			
 
				+                    point_desc = item.get("描述", "")
			
 
				+                    feature_combo = extract_feature_combination_from_point(item, post_id, point_name, point_desc)
			
 
				+                    if feature_combo:
			
 
				+                        result["灵感点"].append(feature_combo)
			
 
				+
			
 
				+            # 处理共性内容
			
 
				+            if "共性内容" in inspiration and isinstance(inspiration["共性内容"], list):
			
 
				+                for item in inspiration["共性内容"]:
			
 
				+                    point_name = item.get("灵感点", "")
			
 
				+                    point_desc = item.get("描述", "")
			
 
				+                    feature_combo = extract_feature_combination_from_point(item, post_id, point_name, point_desc)
			
 
				+                    if feature_combo:
			
 
				+                        result["灵感点"].append(feature_combo)
			
 
				+
			
 
				+        # 处理目的点
			
 
				+        if "目的点" in three_points:
			
 
				+            purpose = three_points["目的点"]
			
 
				+
			
 
				+            if "purposes" in purpose and isinstance(purpose["purposes"], list):
			
 
				+                for item in purpose["purposes"]:
			
 
				+                    point_name = item.get("目的点", "")
			
 
				+                    point_desc = item.get("描述", "")
			
 
				+                    feature_combo = extract_feature_combination_from_point(item, post_id, point_name, point_desc)
			
 
				+                    if feature_combo:
			
 
				+                        result["目的点"].append(feature_combo)
			
 
				+
			
 
				+        # 处理关键点
			
 
				+        if "关键点" in three_points:
			
 
				+            key_points = three_points["关键点"]
			
 
				+
			
 
				+            if "key_points" in key_points and isinstance(key_points["key_points"], list):
			
 
				+                for item in key_points["key_points"]:
			
 
				+                    point_name = item.get("关键点", "")
			
 
				+                    point_desc = item.get("描述", "")
			
 
				+                    feature_combo = extract_feature_combination_from_point(item, post_id, point_name, point_desc)
			
 
				+                    if feature_combo:
			
 
				+                        result["关键点"].append(feature_combo)
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"处理文件 {file_path.name} 时出错: {e}")
			
 
				+
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+def merge_results(all_results: List[Dict]) -> Dict:
			
 
				+    """
			
 
				+    合并所有文件的提取结果，按特征组合分组
			
 
				+
			
 
				+    Args:
			
 
				+        all_results: 所有文件的结果列表
			
 
				+
			
 
				+    Returns:
			
 
				+        合并后的结果
			
 
				+    """
			
 
				+    merged = {
			
 
				+        "灵感点": {},
			
 
				+        "目的点": {},
			
 
				+        "关键点": {}
			
 
				+    }
			
 
				+
			
 
				+    for result in all_results:
			
 
				+        for category in ["灵感点", "目的点", "关键点"]:
			
 
				+            for combo_data in result[category]:
			
 
				+                # 将特征组合列表转换为tuple作为字典的key（list不可哈希）
			
 
				+                combo_key = tuple(sorted(combo_data["特征组合"]))
			
 
				+
			
 
				+                if combo_key not in merged[category]:
			
 
				+                    merged[category][combo_key] = []
			
 
				+
			
 
				+                merged[category][combo_key].append({
			
 
				+                    "点的名称": combo_data["点的名称"],
			
 
				+                    "点的描述": combo_data["点的描述"],
			
 
				+                    "帖子id": combo_data["帖子id"]
			
 
				+                })
			
 
				+
			
 
				+    return merged
			
 
				+
			
 
				+
			
 
				+def convert_to_array_format(merged_dict: Dict, fetch_details: bool = True, time_filter: Optional[str] = None) -> Dict:
			
 
				+    """
			
 
				+    将字典格式转换为数组格式，并添加帖子详情
			
 
				+
			
 
				+    Args:
			
 
				+        merged_dict: 字典格式的结果
			
 
				+        fetch_details: 是否获取帖子详情，默认为True
			
 
				+        time_filter: 时间过滤阈值，只保留发布时间<该时间的帖子，格式为 "YYYY-MM-DD HH:MM:SS"
			
 
				+
			
 
				+    Returns:
			
 
				+        数组格式的结果
			
 
				+    """
			
 
				+    result = {
			
 
				+        "灵感点": [],
			
 
				+        "目的点": [],
			
 
				+        "关键点": []
			
 
				+    }
			
 
				+
			
 
				+    # 收集所有需要获取详情的帖子ID
			
 
				+    post_ids = set()
			
 
				+    if fetch_details:
			
 
				+        for category in ["灵感点", "目的点", "关键点"]:
			
 
				+            for combo_key, sources in merged_dict[category].items():
			
 
				+                for source in sources:
			
 
				+                    post_ids.add(source["帖子id"])
			
 
				+
			
 
				+        # 批量获取帖子详情
			
 
				+        print(f"\n正在获取 {len(post_ids)} 个帖子的详情...")
			
 
				+        post_details = {}
			
 
				+        for i, post_id in enumerate(post_ids, 1):
			
 
				+            print(f"[{i}/{len(post_ids)}] 获取帖子 {post_id} 的详情...")
			
 
				+            detail = get_post_detail(post_id)
			
 
				+            if detail:
			
 
				+                post_details[post_id] = detail
			
 
				+
			
 
				+        print(f"成功获取 {len(post_details)} 个帖子详情")
			
 
				+
			
 
				+        # 如果启用时间过滤，过滤帖子（过滤掉发布时间晚于等于阈值的帖子，避免穿越）
			
 
				+        if time_filter:
			
 
				+            print(f"\n正在应用时间过滤 (< {time_filter})，避免使用晚于当前帖子的数据...")
			
 
				+            filtered_post_ids = set()
			
 
				+            filtered_count = 0
			
 
				+            for post_id, detail in post_details.items():
			
 
				+                publish_time = detail.get('publish_time', '')
			
 
				+                if publish_time < time_filter:
			
 
				+                    filtered_post_ids.add(post_id)
			
 
				+                else:
			
 
				+                    filtered_count += 1
			
 
				+                    print(f"  ⚠️  过滤掉帖子 {post_id} (发布时间: {publish_time}，晚于阈值)")
			
 
				+
			
 
				+            print(f"过滤掉 {filtered_count} 个帖子（穿越），保留 {len(filtered_post_ids)} 个帖子")
			
 
				+            # 更新post_details，只保留符合时间条件的
			
 
				+            post_details = {pid: detail for pid, detail in post_details.items() if pid in filtered_post_ids}
			
 
				+
			
 
				+    # 转换为数组格式并添加帖子详情
			
 
				+    for category in ["灵感点", "目的点", "关键点"]:
			
 
				+        for combo_key, sources in merged_dict[category].items():
			
 
				+            # 为每个来源添加帖子详情
			
 
				+            enhanced_sources = []
			
 
				+            for source in sources:
			
 
				+                # 如果启用时间过滤，跳过不符合时间条件的帖子
			
 
				+                if fetch_details and time_filter and source["帖子id"] not in post_details:
			
 
				+                    continue
			
 
				+
			
 
				+                enhanced_source = source.copy()
			
 
				+                if fetch_details and source["帖子id"] in post_details:
			
 
				+                    enhanced_source["帖子详情"] = post_details[source["帖子id"]]
			
 
				+                enhanced_sources.append(enhanced_source)
			
 
				+
			
 
				+            # 只添加有来源的特征组合
			
 
				+            if enhanced_sources:
			
 
				+                result[category].append({
			
 
				+                    "特征组合": list(combo_key),  # 将tuple转回list
			
 
				+                    "特征来源": enhanced_sources
			
 
				+                })
			
 
				+
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+def get_earliest_publish_time(current_posts_dir: Path) -> Optional[str]:
			
 
				+    """
			
 
				+    获取当前帖子目录中最早的发布时间
			
 
				+
			
 
				+    Args:
			
 
				+        current_posts_dir: 当前帖子目录路径
			
 
				+
			
 
				+    Returns:
			
 
				+        最早的发布时间字符串，格式为 "YYYY-MM-DD HH:MM:SS"
			
 
				+    """
			
 
				+    if not current_posts_dir.exists():
			
 
				+        print(f"警告: 当前帖子目录不存在: {current_posts_dir}")
			
 
				+        return None
			
 
				+
			
 
				+    json_files = list(current_posts_dir.glob("*.json"))
			
 
				+    if not json_files:
			
 
				+        print(f"警告: 当前帖子目录为空: {current_posts_dir}")
			
 
				+        return None
			
 
				+
			
 
				+    print(f"\n正在获取当前帖子的发布时间...")
			
 
				+    print(f"找到 {len(json_files)} 个当前帖子")
			
 
				+
			
 
				+    earliest_time = None
			
 
				+    for file_path in json_files:
			
 
				+        post_id = extract_post_id_from_filename(file_path.name)
			
 
				+        if not post_id:
			
 
				+            continue
			
 
				+
			
 
				+        try:
			
 
				+            detail = get_post_detail(post_id)
			
 
				+            if detail and 'publish_time' in detail:
			
 
				+                publish_time = detail['publish_time']
			
 
				+                if earliest_time is None or publish_time < earliest_time:
			
 
				+                    earliest_time = publish_time
			
 
				+                    print(f"  更新最早时间: {publish_time} (帖子: {post_id})")
			
 
				+        except Exception as e:
			
 
				+            print(f"  警告: 获取帖子 {post_id} 发布时间失败: {e}")
			
 
				+
			
 
				+    if earliest_time:
			
 
				+        print(f"\n当前帖子最早发布时间: {earliest_time}")
			
 
				+    else:
			
 
				+        print("\n警告: 未能获取到任何当前帖子的发布时间")
			
 
				+
			
 
				+    return earliest_time
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    # 输入输出路径（默认使用项目根目录下的 data/data_1118 目录）
			
 
				+    script_dir = Path(__file__).parent
			
 
				+    project_root = script_dir.parent.parent
			
 
				+    data_dir = project_root / "data" / "data_1118"
			
 
				+
			
 
				+    input_dir = data_dir / "过去帖子_what解构结果"
			
 
				+    current_posts_dir = data_dir / "当前帖子_what解构结果"
			
 
				+    output_file = data_dir / "特征组合_帖子来源.json"
			
 
				+
			
 
				+    # 获取当前帖子的最早发布时间
			
 
				+    earliest_time = get_earliest_publish_time(current_posts_dir)
			
 
				+
			
 
				+    print(f"\n正在扫描目录: {input_dir}")
			
 
				+
			
 
				+    # 获取所有JSON文件
			
 
				+    json_files = list(input_dir.glob("*.json"))
			
 
				+    print(f"找到 {len(json_files)} 个JSON文件")
			
 
				+
			
 
				+    # 处理所有文件
			
 
				+    all_results = []
			
 
				+    for i, file_path in enumerate(json_files, 1):
			
 
				+        print(f"处理文件 [{i}/{len(json_files)}]: {file_path.name}")
			
 
				+        result = process_single_file(file_path)
			
 
				+        all_results.append(result)
			
 
				+
			
 
				+    # 合并结果
			
 
				+    print("\n正在合并结果...")
			
 
				+    merged_result = merge_results(all_results)
			
 
				+
			
 
				+    # 转换为数组格式（带时间过滤）
			
 
				+    print("正在转换为数组格式...")
			
 
				+    final_result = convert_to_array_format(merged_result, fetch_details=True, time_filter=earliest_time)
			
 
				+
			
 
				+    # 统计信息
			
 
				+    if earliest_time:
			
 
				+        print(f"\n提取统计 (已过滤掉发布时间 >= {earliest_time} 的帖子):")
			
 
				+    else:
			
 
				+        print(f"\n提取统计:")
			
 
				+    for category in ["灵感点", "目的点", "关键点"]:
			
 
				+        combo_count = len(final_result[category])
			
 
				+        source_count = sum(len(item["特征来源"]) for item in final_result[category])
			
 
				+        print(f"  {category}: {combo_count} 个特征组合, {source_count} 个来源")
			
 
				+
			
 
				+    # 保存结果
			
 
				+    print(f"\n正在保存结果到: {output_file}")
			
 
				+    with open(output_file, "w", encoding="utf-8") as f:
			
 
				+        json.dump(final_result, f, ensure_ascii=False, indent=4)
			
 
				+
			
 
				+    print("完成!")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/script/data_processing/match_inspiration_features_v3.py
+++ b/script/data_processing/match_inspiration_features_v3.py
@@ -0,0 +1,459 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+灵感点特征匹配脚本 v3（特征组合匹配版本）
			
 
				+
			
 
				+从解构任务列表中提取灵感点的特征列表，与人设历史的特征组合进行匹配。
			
 
				+匹配时考虑组合中每个特征的分类信息，使用待设计的组合匹配模块。
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import asyncio
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Optional
			
 
				+import sys
			
 
				+
			
 
				+# 添加项目根目录到路径
			
 
				+project_root = Path(__file__).parent.parent.parent
			
 
				+sys.path.insert(0, str(project_root))
			
 
				+
			
 
				+from agents import trace
			
 
				+from agents.tracing.create import custom_span
			
 
				+from lib.my_trace import set_trace
			
 
				+from lib.hierarchical_match_analyzer import hierarchical_match
			
 
				+
			
 
				+# 全局并发限制
			
 
				+MAX_CONCURRENT_REQUESTS = 20
			
 
				+semaphore = None
			
 
				+
			
 
				+
			
 
				+def get_semaphore():
			
 
				+    """获取全局信号量"""
			
 
				+    global semaphore
			
 
				+    if semaphore is None:
			
 
				+        semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
			
 
				+    return semaphore
			
 
				+
			
 
				+
			
 
				+def load_feature_categories(categories_file: Path) -> Dict:
			
 
				+    """
			
 
				+    加载特征分类映射
			
 
				+
			
 
				+    Args:
			
 
				+        categories_file: 特征名称_分类映射.json 文件路径
			
 
				+
			
 
				+    Returns:
			
 
				+        特征分类字典
			
 
				+    """
			
 
				+    with open(categories_file, "r", encoding="utf-8") as f:
			
 
				+        return json.load(f)
			
 
				+
			
 
				+
			
 
				+def enrich_persona_combinations_with_categories(
			
 
				+    persona_combinations: List[Dict],
			
 
				+    feature_categories: Dict,
			
 
				+    point_type: str
			
 
				+) -> List[Dict]:
			
 
				+    """
			
 
				+    为人设特征组合添加分类信息
			
 
				+
			
 
				+    Args:
			
 
				+        persona_combinations: 人设特征组合列表
			
 
				+        feature_categories: 特征分类映射字典
			
 
				+        point_type: 点类型 ("灵感点", "目的点", "关键点")
			
 
				+
			
 
				+    Returns:
			
 
				+        enriched_combinations: 增强后的组合列表，每个组合包含特征及其分类
			
 
				+    """
			
 
				+    enriched_combinations = []
			
 
				+
			
 
				+    # 获取该点类型的分类映射
			
 
				+    type_categories = feature_categories.get(point_type, {})
			
 
				+
			
 
				+    for combo in persona_combinations:
			
 
				+        feature_list = combo.get("特征组合", [])
			
 
				+
			
 
				+        # 为每个特征添加分类信息
			
 
				+        enriched_features = []
			
 
				+        for feature_name in feature_list:
			
 
				+            categories = type_categories.get(feature_name, {}).get("所属分类", [])
			
 
				+            enriched_features.append({
			
 
				+                "特征名称": feature_name,
			
 
				+                "所属分类": categories
			
 
				+            })
			
 
				+
			
 
				+        # 构建增强后的组合
			
 
				+        enriched_combo = {
			
 
				+            "特征组合": enriched_features,  # 带分类的特征列表
			
 
				+            "原始特征组合": feature_list,      # 保留原始特征名称列表
			
 
				+            "特征来源": combo.get("特征来源", [])
			
 
				+        }
			
 
				+        enriched_combinations.append(enriched_combo)
			
 
				+
			
 
				+    return enriched_combinations
			
 
				+
			
 
				+
			
 
				+async def match_feature_list_with_combination(
			
 
				+    current_feature_list: List[str],
			
 
				+    persona_combination: Dict,
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    将当前点的特征列表与一个人设历史组合进行分层匹配
			
 
				+
			
 
				+    使用分层匹配策略:
			
 
				+    1. 优先匹配灵感点标签（特征名称）
			
 
				+    2. 无标签匹配时，匹配第一层分类
			
 
				+    3. 仍无结果时，匹配第二层上位分类
			
 
				+    4. 对每个候选进行推理难度打分
			
 
				+
			
 
				+    Args:
			
 
				+        current_feature_list: 当前点的特征列表，如 ["立冬", "教资查分", "时间巧合"]
			
 
				+        persona_combination: 人设历史组合（带分类信息），格式如:
			
 
				+            {
			
 
				+                "特征组合": [
			
 
				+                    {"特征名称": "猫孩子", "所属分类": ["宠物亲子化", "宠物情感", "实质"]},
			
 
				+                    {"特征名称": "被拿捏住的无奈感", "所属分类": ["宠物关系主导", "宠物情感", "实质"]}
			
 
				+                ],
			
 
				+                "原始特征组合": ["猫孩子", "被拿捏住的无奈感"],
			
 
				+                "特征来源": [...]
			
 
				+            }
			
 
				+        model_name: 使用的模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        {
			
 
				+            "人设特征组合": [...],
			
 
				+            "匹配结果": {
			
 
				+                "最终得分": 0.85,
			
 
				+                "匹配层级": "第一层分类匹配",
			
 
				+                "匹配结果": "宠物情感",
			
 
				+                "综合说明": "...",
			
 
				+                "分层详情": {...}
			
 
				+            },
			
 
				+            "人设特征来源": [...]
			
 
				+        }
			
 
				+    """
			
 
				+    sem = get_semaphore()
			
 
				+    async with sem:
			
 
				+        # 调用分层匹配模块
			
 
				+        match_result = await hierarchical_match(
			
 
				+            current_features=current_feature_list,
			
 
				+            persona_combination=persona_combination["特征组合"],
			
 
				+            model_name=model_name
			
 
				+        )
			
 
				+
			
 
				+        # 构建返回结果
			
 
				+        result = {
			
 
				+            "人设特征组合": persona_combination["原始特征组合"],
			
 
				+            "匹配结果": {
			
 
				+                "最终得分": match_result["最终得分"],
			
 
				+                "匹配层级": match_result["匹配层级"],
			
 
				+                "匹配结果": match_result["匹配结果"],
			
 
				+                "综合说明": match_result["综合说明"],
			
 
				+                "分层详情": match_result["分层结果"]
			
 
				+            },
			
 
				+            "人设特征来源": persona_combination["特征来源"]
			
 
				+        }
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+
			
 
				+async def match_inspiration_point_with_combinations(
			
 
				+    current_feature_list: List[str],
			
 
				+    persona_combinations: List[Dict],
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> List[Dict]:
			
 
				+    """
			
 
				+    将当前点的特征列表与所有人设特征组合进行匹配
			
 
				+
			
 
				+    Args:
			
 
				+        current_feature_list: 当前点的特征列表
			
 
				+        persona_combinations: 人设特征组合列表（已包含分类信息）
			
 
				+        model_name: 使用的模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        匹配结果列表（按分数降序排序）
			
 
				+    """
			
 
				+    print(f"      批量匹配: {current_feature_list} <-> {len(persona_combinations)}个人设特征组合")
			
 
				+
			
 
				+    # 并发匹配所有组合
			
 
				+    tasks = [
			
 
				+        match_feature_list_with_combination(
			
 
				+            current_feature_list=current_feature_list,
			
 
				+            persona_combination=combo,
			
 
				+            model_name=model_name
			
 
				+        )
			
 
				+        for combo in persona_combinations
			
 
				+    ]
			
 
				+
			
 
				+    match_results = await asyncio.gather(*tasks)
			
 
				+
			
 
				+    # 按最终得分降序排序
			
 
				+    match_results.sort(key=lambda x: x["匹配结果"]["最终得分"], reverse=True)
			
 
				+
			
 
				+    return match_results
			
 
				+
			
 
				+
			
 
				+async def process_single_inspiration_point(
			
 
				+    inspiration_point: Dict,
			
 
				+    persona_combinations: List[Dict],
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    处理单个灵感点的特征组合匹配
			
 
				+
			
 
				+    Args:
			
 
				+        inspiration_point: 灵感点数据，包含特征列表
			
 
				+        persona_combinations: 人设特征组合列表（已包含分类信息）
			
 
				+        model_name: 使用的模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        包含 how 步骤列表的灵感点数据
			
 
				+    """
			
 
				+    point_name = inspiration_point.get("名称", "")
			
 
				+    feature_list = inspiration_point.get("特征列表", [])
			
 
				+
			
 
				+    print(f"  处理灵感点: {point_name}")
			
 
				+    print(f"    特征列表: {feature_list}")
			
 
				+
			
 
				+    # 使用 custom_span 标识灵感点处理
			
 
				+    with custom_span(
			
 
				+        name=f"处理灵感点: {point_name}",
			
 
				+        data={
			
 
				+            "灵感点": point_name,
			
 
				+            "特征列表": feature_list,
			
 
				+            "人设组合数量": len(persona_combinations)
			
 
				+        }
			
 
				+    ):
			
 
				+        # 将特征列表与所有人设组合进行匹配
			
 
				+        match_results = await match_inspiration_point_with_combinations(
			
 
				+            current_feature_list=feature_list,
			
 
				+            persona_combinations=persona_combinations,
			
 
				+            model_name=model_name
			
 
				+        )
			
 
				+
			
 
				+    # 构建 how 步骤
			
 
				+    how_step = {
			
 
				+        "步骤名称": "灵感特征列表批量匹配人设特征组合",
			
 
				+        "当前特征列表": feature_list,
			
 
				+        "匹配结果": match_results
			
 
				+    }
			
 
				+
			
 
				+    # 返回更新后的灵感点
			
 
				+    result = inspiration_point.copy()
			
 
				+    result["how步骤列表"] = [how_step]
			
 
				+
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+async def process_single_task(
			
 
				+    task: Dict,
			
 
				+    task_index: int,
			
 
				+    total_tasks: int,
			
 
				+    persona_combinations: List[Dict],
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    处理单个任务
			
 
				+
			
 
				+    Args:
			
 
				+        task: 任务数据
			
 
				+        task_index: 任务索引（从1开始）
			
 
				+        total_tasks: 总任务数
			
 
				+        persona_combinations: 人设特征组合列表（已包含分类信息）
			
 
				+        model_name: 使用的模型名称
			
 
				+
			
 
				+    Returns:
			
 
				+        包含 how 解构结果的任务
			
 
				+    """
			
 
				+    post_id = task.get("帖子id", "")
			
 
				+    print(f"\n处理任务 [{task_index}/{total_tasks}]: {post_id}")
			
 
				+
			
 
				+    # 获取灵感点列表
			
 
				+    what_result = task.get("what解构结果", {})
			
 
				+    inspiration_list = what_result.get("灵感点列表", [])
			
 
				+
			
 
				+    print(f"  灵感点数量: {len(inspiration_list)}")
			
 
				+
			
 
				+    # 并发处理所有灵感点
			
 
				+    tasks = [
			
 
				+        process_single_inspiration_point(
			
 
				+            inspiration_point=inspiration_point,
			
 
				+            persona_combinations=persona_combinations,
			
 
				+            model_name=model_name
			
 
				+        )
			
 
				+        for inspiration_point in inspiration_list
			
 
				+    ]
			
 
				+    updated_inspiration_list = await asyncio.gather(*tasks)
			
 
				+
			
 
				+    # 构建 how 解构结果
			
 
				+    how_result = {
			
 
				+        "灵感点列表": list(updated_inspiration_list)
			
 
				+    }
			
 
				+
			
 
				+    # 更新任务
			
 
				+    updated_task = task.copy()
			
 
				+    updated_task["how解构结果"] = how_result
			
 
				+
			
 
				+    return updated_task
			
 
				+
			
 
				+
			
 
				+async def process_task_list(
			
 
				+    task_list: List[Dict],
			
 
				+    persona_combinations: List[Dict],
			
 
				+    model_name: Optional[str] = None,
			
 
				+    current_time: Optional[str] = None,
			
 
				+    log_url: Optional[str] = None
			
 
				+) -> List[Dict]:
			
 
				+    """
			
 
				+    处理整个解构任务列表（并发执行）
			
 
				+
			
 
				+    Args:
			
 
				+        task_list: 解构任务列表
			
 
				+        persona_combinations: 人设特征组合列表（已包含分类信息）
			
 
				+        model_name: 使用的模型名称
			
 
				+        current_time: 当前时间戳
			
 
				+        log_url: 日志链接
			
 
				+
			
 
				+    Returns:
			
 
				+        包含 how 解构结果的任务列表
			
 
				+    """
			
 
				+    print(f"人设灵感特征组合数量: {len(persona_combinations)}")
			
 
				+
			
 
				+    # 使用 custom_span 标识整个处理流程
			
 
				+    with custom_span(
			
 
				+        name="特征组合批量匹配 v3 - 所有任务",
			
 
				+        data={
			
 
				+            "任务总数": len(task_list),
			
 
				+            "人设组合数量": len(persona_combinations),
			
 
				+            "current_time": current_time,
			
 
				+            "log_url": log_url
			
 
				+        }
			
 
				+    ):
			
 
				+        # 并发处理所有任务
			
 
				+        tasks = [
			
 
				+            process_single_task(
			
 
				+                task=task,
			
 
				+                task_index=i,
			
 
				+                total_tasks=len(task_list),
			
 
				+                persona_combinations=persona_combinations,
			
 
				+                model_name=model_name
			
 
				+            )
			
 
				+            for i, task in enumerate(task_list, 1)
			
 
				+        ]
			
 
				+        updated_task_list = await asyncio.gather(*tasks)
			
 
				+
			
 
				+    return list(updated_task_list)
			
 
				+
			
 
				+
			
 
				+async def main(current_time: Optional[str] = None, log_url: Optional[str] = None):
			
 
				+    """主函数
			
 
				+
			
 
				+    Args:
			
 
				+        current_time: 当前时间戳（从外部传入）
			
 
				+        log_url: 日志链接（从外部传入）
			
 
				+    """
			
 
				+    # 输入输出路径
			
 
				+    script_dir = Path(__file__).parent
			
 
				+    project_root = script_dir.parent.parent
			
 
				+    data_dir = project_root / "data" / "data_1118"
			
 
				+
			
 
				+    task_list_file = data_dir / "当前帖子_解构任务列表.json"
			
 
				+    persona_combinations_file = data_dir / "特征组合_帖子来源.json"
			
 
				+    feature_categories_file = data_dir / "特征名称_分类映射.json"
			
 
				+    output_dir = data_dir / "当前帖子_how解构结果_v3"
			
 
				+
			
 
				+    # 创建输出目录
			
 
				+    output_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    # 获取模型名称
			
 
				+    from lib.client import MODEL_NAME
			
 
				+    model_name_short = MODEL_NAME.replace("google/", "").replace("/", "_")
			
 
				+
			
 
				+    print(f"读取解构任务列表: {task_list_file}")
			
 
				+    with open(task_list_file, "r", encoding="utf-8") as f:
			
 
				+        task_list_data = json.load(f)
			
 
				+
			
 
				+    print(f"读取人设特征组合: {persona_combinations_file}")
			
 
				+    with open(persona_combinations_file, "r", encoding="utf-8") as f:
			
 
				+        persona_combinations_data = json.load(f)
			
 
				+
			
 
				+    print(f"读取特征分类映射: {feature_categories_file}")
			
 
				+    feature_categories = load_feature_categories(feature_categories_file)
			
 
				+
			
 
				+    # 获取任务列表
			
 
				+    task_list = task_list_data.get("解构任务列表", [])
			
 
				+    print(f"\n总任务数: {len(task_list)}")
			
 
				+    print(f"使用模型: {MODEL_NAME}\n")
			
 
				+
			
 
				+    # 为人设特征组合添加分类信息（只处理灵感点）
			
 
				+    persona_inspiration_combinations_raw = persona_combinations_data.get("灵感点", [])
			
 
				+    persona_inspiration_combinations = enrich_persona_combinations_with_categories(
			
 
				+        persona_combinations=persona_inspiration_combinations_raw,
			
 
				+        feature_categories=feature_categories,
			
 
				+        point_type="灵感点"
			
 
				+    )
			
 
				+
			
 
				+    print(f"灵感点特征组合数量: {len(persona_inspiration_combinations)}")
			
 
				+    print(f"示例组合 (前3个):")
			
 
				+    for i, combo in enumerate(persona_inspiration_combinations[:3], 1):
			
 
				+        print(f"  {i}. 原始组合: {combo['原始特征组合']}")
			
 
				+        print(f"     带分类: {combo['特征组合']}")
			
 
				+    print()
			
 
				+
			
 
				+    # 处理任务列表
			
 
				+    updated_task_list = await process_task_list(
			
 
				+        task_list=task_list,
			
 
				+        persona_combinations=persona_inspiration_combinations,
			
 
				+        model_name=None,  # 使用默认模型
			
 
				+        current_time=current_time,
			
 
				+        log_url=log_url
			
 
				+    )
			
 
				+
			
 
				+    # 分文件保存结果
			
 
				+    print(f"\n保存结果到: {output_dir}")
			
 
				+    for task in updated_task_list:
			
 
				+        post_id = task.get("帖子id", "unknown")
			
 
				+        output_file = output_dir / f"{post_id}_how_v3_{model_name_short}.json"
			
 
				+
			
 
				+        # 在每个任务中添加元数据
			
 
				+        task["元数据"] = {
			
 
				+            "current_time": current_time,
			
 
				+            "log_url": log_url,
			
 
				+            "version": "v3_combination_match",
			
 
				+            "model": MODEL_NAME,
			
 
				+            "说明": "v3版本: 使用特征列表匹配人设特征组合（带分类信息）"
			
 
				+        }
			
 
				+
			
 
				+        print(f"  保存: {output_file.name}")
			
 
				+        with open(output_file, "w", encoding="utf-8") as f:
			
 
				+            json.dump(task, f, ensure_ascii=False, indent=4)
			
 
				+
			
 
				+    print("\n完成!")
			
 
				+
			
 
				+    # 打印统计信息
			
 
				+    total_inspiration_points = sum(
			
 
				+        len(task["how解构结果"]["灵感点列表"])
			
 
				+        for task in updated_task_list
			
 
				+    )
			
 
				+    total_matches = sum(
			
 
				+        len(point["how步骤列表"][0]["匹配结果"])
			
 
				+        for task in updated_task_list
			
 
				+        for point in task["how解构结果"]["灵感点列表"]
			
 
				+    )
			
 
				+    print(f"\n统计:")
			
 
				+    print(f"  处理的帖子数: {len(updated_task_list)}")
			
 
				+    print(f"  处理的灵感点数: {total_inspiration_points}")
			
 
				+    print(f"  生成的匹配结果数: {total_matches}")
			
 
				+
			
 
				+    if log_url:
			
 
				+        print(f"\nTrace: {log_url}\n")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # 设置 trace
			
 
				+    current_time, log_url = set_trace()
			
 
				+
			
 
				+    # 使用 trace 上下文包裹整个执行流程
			
 
				+    with trace("灵感特征组合批量匹配 v3"):
			
 
				+        asyncio.run(main(current_time, log_url))
			
--- a/script/data_processing/match_inspiration_features_v4.py
+++ b/script/data_processing/match_inspiration_features_v4.py
@@ -0,0 +1,435 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+灵感点特征匹配脚本 v4（统一匹配版本）
			
 
				+
			
 
				+使用单个prompt同时完成标签匹配和分类匹配，不分步骤执行。
			
 
				+一次LLM调用完成所有层级的评估。
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import asyncio
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Optional
			
 
				+import sys
			
 
				+
			
 
				+# 添加项目根目录到路径
			
 
				+project_root = Path(__file__).parent.parent.parent
			
 
				+sys.path.insert(0, str(project_root))
			
 
				+
			
 
				+from agents import trace
			
 
				+from agents.tracing.create import custom_span
			
 
				+from lib.my_trace import set_trace
			
 
				+from lib.unified_match_analyzer import unified_match
			
 
				+
			
 
				+# 全局并发限制
			
 
				+MAX_CONCURRENT_REQUESTS = 20
			
 
				+semaphore = None
			
 
				+
			
 
				+
			
 
				+def get_semaphore():
			
 
				+    """获取全局信号量"""
			
 
				+    global semaphore
			
 
				+    if semaphore is None:
			
 
				+        semaphore = asyncio.Semaphore(MAX_CONCURRENT_REQUESTS)
			
 
				+    return semaphore
			
 
				+
			
 
				+
			
 
				+def load_feature_categories(categories_file: Path) -> Dict:
			
 
				+    """加载特征分类映射"""
			
 
				+    with open(categories_file, "r", encoding="utf-8") as f:
			
 
				+        return json.load(f)
			
 
				+
			
 
				+
			
 
				+def enrich_persona_combinations_with_categories(
			
 
				+    persona_combinations: List[Dict],
			
 
				+    feature_categories: Dict,
			
 
				+    point_type: str
			
 
				+) -> List[Dict]:
			
 
				+    """为人设特征组合添加分类信息"""
			
 
				+    enriched_combinations = []
			
 
				+    type_categories = feature_categories.get(point_type, {})
			
 
				+
			
 
				+    for combo in persona_combinations:
			
 
				+        feature_list = combo.get("特征组合", [])
			
 
				+
			
 
				+        # 为每个特征添加分类信息
			
 
				+        enriched_features = []
			
 
				+        for feature_name in feature_list:
			
 
				+            categories = type_categories.get(feature_name, {}).get("所属分类", [])
			
 
				+            enriched_features.append({
			
 
				+                "特征名称": feature_name,
			
 
				+                "所属分类": categories
			
 
				+            })
			
 
				+
			
 
				+        enriched_combo = {
			
 
				+            "特征组合": enriched_features,
			
 
				+            "原始特征组合": feature_list,
			
 
				+            "特征来源": combo.get("特征来源", [])
			
 
				+        }
			
 
				+        enriched_combinations.append(enriched_combo)
			
 
				+
			
 
				+    return enriched_combinations
			
 
				+
			
 
				+
			
 
				+async def match_tag_list_with_combination(
			
 
				+    current_tag_list: List[str],
			
 
				+    persona_combination: Dict,
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    使用统一匹配将当前点的标签列表与一个人设历史组合进行匹配
			
 
				+
			
 
				+    一次LLM调用完成标签匹配和分类匹配的评估
			
 
				+
			
 
				+    Returns:
			
 
				+        {
			
 
				+            "人设标签组合": [...],
			
 
				+            "当前标签匹配结果": [
			
 
				+                {"当前标签": "立冬", "最终得分": 0.7, "匹配层级": "...", ...},
			
 
				+                {"当前标签": "教资查分", "最终得分": 0.6, ...},
			
 
				+                ...
			
 
				+            ],
			
 
				+            "人设标签来源": [...]
			
 
				+        }
			
 
				+    """
			
 
				+    sem = get_semaphore()
			
 
				+    async with sem:
			
 
				+        # 调用统一匹配模块（返回每个当前标签的匹配结果）
			
 
				+        tag_match_results = await unified_match(
			
 
				+            current_tags=current_tag_list,
			
 
				+            persona_combination=persona_combination["特征组合"],
			
 
				+            model_name=model_name
			
 
				+        )
			
 
				+
			
 
				+        # 构建返回结果
			
 
				+        result = {
			
 
				+            "人设标签组合": persona_combination["原始特征组合"],
			
 
				+            "当前标签匹配结果": tag_match_results,  # 每个当前标签的匹配结果
			
 
				+            "人设标签来源": persona_combination["特征来源"]
			
 
				+        }
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+
			
 
				+async def match_inspiration_point_with_combinations(
			
 
				+    current_feature_list: List[str],
			
 
				+    persona_combinations: List[Dict],
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> List[Dict]:
			
 
				+    """将当前点的特征列表与所有人设特征组合进行匹配"""
			
 
				+    print(f"      批量匹配: 当前{len(current_feature_list)}个标签 {current_feature_list} vs {len(persona_combinations)}个人设组合")
			
 
				+
			
 
				+    # 并发匹配所有组合
			
 
				+    tasks = [
			
 
				+        match_tag_list_with_combination(
			
 
				+            current_tag_list=current_feature_list,
			
 
				+            persona_combination=combo,
			
 
				+            model_name=model_name
			
 
				+        )
			
 
				+        for combo in persona_combinations
			
 
				+    ]
			
 
				+
			
 
				+    match_results = await asyncio.gather(*tasks)
			
 
				+
			
 
				+    # 过滤和修复无效结果
			
 
				+    valid_results = []
			
 
				+    for result in match_results:
			
 
				+        # 确保result是dict
			
 
				+        if not isinstance(result, dict):
			
 
				+            print(f"警告: 跳过无效结果 (不是字典): {type(result)}")
			
 
				+            continue
			
 
				+
			
 
				+        # 确保有当前标签匹配结果字段
			
 
				+        tag_results = result.get("当前标签匹配结果")
			
 
				+        if tag_results is None:
			
 
				+            print(f"警告: 结果缺少当前标签匹配结果字段")
			
 
				+            continue
			
 
				+
			
 
				+        # 确保当前标签匹配结果是list
			
 
				+        if not isinstance(tag_results, list):
			
 
				+            print(f"警告: 当前标签匹配结果不是列表: {type(tag_results)}")
			
 
				+            continue
			
 
				+
			
 
				+        # 计算该人设组合的加权平均得分
			
 
				+        weighted_scores = []
			
 
				+        for tag_result in tag_results:
			
 
				+            if isinstance(tag_result, dict):
			
 
				+                match_result = tag_result.get("匹配结果", {})
			
 
				+                match_type = match_result.get("匹配类型")
			
 
				+                similarity = match_result.get("语义相似度", 0)
			
 
				+
			
 
				+                # 根据匹配类型设置权重
			
 
				+                if match_type == "标签匹配":
			
 
				+                    weight = 1.0
			
 
				+                elif match_type == "分类匹配":
			
 
				+                    weight = 0.5
			
 
				+                else:  # 无匹配
			
 
				+                    weight = 1.0  # 无匹配也使用1.0权重，因为相似度已经是0
			
 
				+
			
 
				+                weighted_score = similarity * weight
			
 
				+                weighted_scores.append(weighted_score)
			
 
				+
			
 
				+        avg_score = sum(weighted_scores) / len(weighted_scores) if weighted_scores else 0
			
 
				+        result["组合平均得分"] = avg_score
			
 
				+
			
 
				+        # 添加精简结果字段
			
 
				+        result["精简结果"] = {
			
 
				+            "人设标签组合": result.get("人设标签组合", []),
			
 
				+            "组合平均得分": avg_score,
			
 
				+            "各标签得分": [
			
 
				+                {
			
 
				+                    "标签": tag_res.get("当前标签"),
			
 
				+                    "原始相似度": tag_res.get("匹配结果", {}).get("语义相似度", 0),
			
 
				+                    "匹配类型": tag_res.get("匹配结果", {}).get("匹配类型"),
			
 
				+                    "权重": 1.0 if tag_res.get("匹配结果", {}).get("匹配类型") == "标签匹配" else 0.5 if tag_res.get("匹配结果", {}).get("匹配类型") == "分类匹配" else 1.0,
			
 
				+                    "加权得分": tag_res.get("匹配结果", {}).get("语义相似度", 0) * (1.0 if tag_res.get("匹配结果", {}).get("匹配类型") == "标签匹配" else 0.5 if tag_res.get("匹配结果", {}).get("匹配类型") == "分类匹配" else 1.0),
			
 
				+                    "匹配到": tag_res.get("匹配结果", {}).get("匹配到")
			
 
				+                }
			
 
				+                for tag_res in tag_results if isinstance(tag_res, dict)
			
 
				+            ]
			
 
				+        }
			
 
				+
			
 
				+        valid_results.append(result)
			
 
				+
			
 
				+    # 按组合平均得分降序排序
			
 
				+    valid_results.sort(
			
 
				+        key=lambda x: x.get("组合平均得分", 0),
			
 
				+        reverse=True
			
 
				+    )
			
 
				+
			
 
				+    return valid_results
			
 
				+
			
 
				+
			
 
				+async def process_single_inspiration_point(
			
 
				+    inspiration_point: Dict,
			
 
				+    persona_combinations: List[Dict],
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> Dict:
			
 
				+    """处理单个灵感点的特征组合匹配"""
			
 
				+    point_name = inspiration_point.get("名称", "")
			
 
				+    feature_list = inspiration_point.get("特征列表", [])
			
 
				+
			
 
				+    print(f"  处理灵感点: {point_name}")
			
 
				+    print(f"    特征列表: {feature_list}")
			
 
				+
			
 
				+    with custom_span(
			
 
				+        name=f"处理灵感点: {point_name}",
			
 
				+        data={
			
 
				+            "灵感点": point_name,
			
 
				+            "特征列表": feature_list,
			
 
				+            "人设组合数量": len(persona_combinations)
			
 
				+        }
			
 
				+    ):
			
 
				+        # 将特征列表与所有人设组合进行匹配
			
 
				+        match_results = await match_inspiration_point_with_combinations(
			
 
				+            current_feature_list=feature_list,
			
 
				+            persona_combinations=persona_combinations,
			
 
				+            model_name=model_name
			
 
				+        )
			
 
				+
			
 
				+    # 构建完整版 how 步骤
			
 
				+    how_step = {
			
 
				+        "步骤名称": "灵感特征列表统一匹配人设特征组合 (v4)",
			
 
				+        "当前特征列表": feature_list,
			
 
				+        "匹配结果": match_results
			
 
				+    }
			
 
				+
			
 
				+    # 构建精简版 how 步骤（只包含精简结果）
			
 
				+    how_step_simplified = {
			
 
				+        "步骤名称": "灵感特征列表统一匹配人设特征组合 (v4) - 精简版",
			
 
				+        "当前特征列表": feature_list,
			
 
				+        "匹配结果": [
			
 
				+            match.get("精简结果", {})
			
 
				+            for match in match_results
			
 
				+        ]
			
 
				+    }
			
 
				+
			
 
				+    # 返回更新后的灵感点
			
 
				+    result = inspiration_point.copy()
			
 
				+    result["how步骤列表"] = [how_step]
			
 
				+    result["how步骤列表_精简版"] = [how_step_simplified]
			
 
				+
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+async def process_single_task(
			
 
				+    task: Dict,
			
 
				+    task_index: int,
			
 
				+    total_tasks: int,
			
 
				+    persona_combinations: List[Dict],
			
 
				+    model_name: Optional[str] = None
			
 
				+) -> Dict:
			
 
				+    """处理单个任务"""
			
 
				+    post_id = task.get("帖子id", "")
			
 
				+    print(f"\n处理任务 [{task_index}/{total_tasks}]: {post_id}")
			
 
				+
			
 
				+    what_result = task.get("what解构结果", {})
			
 
				+    inspiration_list = what_result.get("灵感点列表", [])
			
 
				+
			
 
				+    print(f"  灵感点数量: {len(inspiration_list)}")
			
 
				+
			
 
				+    # 并发处理所有灵感点
			
 
				+    tasks = [
			
 
				+        process_single_inspiration_point(
			
 
				+            inspiration_point=inspiration_point,
			
 
				+            persona_combinations=persona_combinations,
			
 
				+            model_name=model_name
			
 
				+        )
			
 
				+        for inspiration_point in inspiration_list
			
 
				+    ]
			
 
				+    updated_inspiration_list = await asyncio.gather(*tasks)
			
 
				+
			
 
				+    # 构建 how 解构结果
			
 
				+    how_result = {
			
 
				+        "灵感点列表": list(updated_inspiration_list)
			
 
				+    }
			
 
				+
			
 
				+    # 更新任务
			
 
				+    updated_task = task.copy()
			
 
				+    updated_task["how解构结果"] = how_result
			
 
				+
			
 
				+    return updated_task
			
 
				+
			
 
				+
			
 
				+async def process_task_list(
			
 
				+    task_list: List[Dict],
			
 
				+    persona_combinations: List[Dict],
			
 
				+    model_name: Optional[str] = None,
			
 
				+    current_time: Optional[str] = None,
			
 
				+    log_url: Optional[str] = None
			
 
				+) -> List[Dict]:
			
 
				+    """处理整个解构任务列表（并发执行）"""
			
 
				+    print(f"人设灵感特征组合数量: {len(persona_combinations)}")
			
 
				+
			
 
				+    with custom_span(
			
 
				+        name="统一匹配 v4 - 所有任务",
			
 
				+        data={
			
 
				+            "任务总数": len(task_list),
			
 
				+            "人设组合数量": len(persona_combinations),
			
 
				+            "current_time": current_time,
			
 
				+            "log_url": log_url
			
 
				+        }
			
 
				+    ):
			
 
				+        # 并发处理所有任务
			
 
				+        tasks = [
			
 
				+            process_single_task(
			
 
				+                task=task,
			
 
				+                task_index=i,
			
 
				+                total_tasks=len(task_list),
			
 
				+                persona_combinations=persona_combinations,
			
 
				+                model_name=model_name
			
 
				+            )
			
 
				+            for i, task in enumerate(task_list, 1)
			
 
				+        ]
			
 
				+        updated_task_list = await asyncio.gather(*tasks)
			
 
				+
			
 
				+    return list(updated_task_list)
			
 
				+
			
 
				+
			
 
				+async def main(current_time: Optional[str] = None, log_url: Optional[str] = None):
			
 
				+    """主函数"""
			
 
				+    # 输入输出路径
			
 
				+    script_dir = Path(__file__).parent
			
 
				+    project_root = script_dir.parent.parent
			
 
				+    data_dir = project_root / "data" / "data_1118"
			
 
				+
			
 
				+    task_list_file = data_dir / "当前帖子_解构任务列表.json"
			
 
				+    persona_combinations_file = data_dir / "特征组合_帖子来源.json"
			
 
				+    feature_categories_file = data_dir / "特征名称_分类映射.json"
			
 
				+    output_dir = data_dir / "当前帖子_how解构结果_v4"
			
 
				+
			
 
				+    # 创建输出目录
			
 
				+    output_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    # 获取模型名称
			
 
				+    from lib.client import MODEL_NAME
			
 
				+    model_name_short = MODEL_NAME.replace("google/", "").replace("/", "_")
			
 
				+
			
 
				+    print(f"读取解构任务列表: {task_list_file}")
			
 
				+    with open(task_list_file, "r", encoding="utf-8") as f:
			
 
				+        task_list_data = json.load(f)
			
 
				+
			
 
				+    print(f"读取人设特征组合: {persona_combinations_file}")
			
 
				+    with open(persona_combinations_file, "r", encoding="utf-8") as f:
			
 
				+        persona_combinations_data = json.load(f)
			
 
				+
			
 
				+    print(f"读取特征分类映射: {feature_categories_file}")
			
 
				+    feature_categories = load_feature_categories(feature_categories_file)
			
 
				+
			
 
				+    # 获取任务列表 - 处理所有帖子
			
 
				+    task_list = task_list_data.get("解构任务列表", [])
			
 
				+    print(f"\n总任务数: {len(task_list)}")
			
 
				+    print(f"使用模型: {MODEL_NAME}\n")
			
 
				+
			
 
				+    # 为人设特征组合添加分类信息（只处理灵感点）- 使用所有组合
			
 
				+    persona_inspiration_combinations_raw = persona_combinations_data.get("灵感点", [])
			
 
				+    persona_inspiration_combinations = enrich_persona_combinations_with_categories(
			
 
				+        persona_combinations=persona_inspiration_combinations_raw,
			
 
				+        feature_categories=feature_categories,
			
 
				+        point_type="灵感点"
			
 
				+    )
			
 
				+
			
 
				+    print(f"灵感点特征组合数量: {len(persona_inspiration_combinations)}")
			
 
				+    print(f"示例组合 (前2个):")
			
 
				+    for i, combo in enumerate(persona_inspiration_combinations[:2], 1):
			
 
				+        print(f"  {i}. 原始组合: {combo['原始特征组合']}")
			
 
				+        print(f"     带分类: {combo['特征组合'][:2]}...")  # 只显示前2个特征
			
 
				+    print()
			
 
				+
			
 
				+    # 处理任务列表
			
 
				+    updated_task_list = await process_task_list(
			
 
				+        task_list=task_list,
			
 
				+        persona_combinations=persona_inspiration_combinations,
			
 
				+        model_name=None,
			
 
				+        current_time=current_time,
			
 
				+        log_url=log_url
			
 
				+    )
			
 
				+
			
 
				+    # 分文件保存结果
			
 
				+    print(f"\n保存结果到: {output_dir}")
			
 
				+    for task in updated_task_list:
			
 
				+        post_id = task.get("帖子id", "unknown")
			
 
				+        output_file = output_dir / f"{post_id}_how_v4_{model_name_short}.json"
			
 
				+
			
 
				+        # 在每个任务中添加元数据
			
 
				+        task["元数据"] = {
			
 
				+            "current_time": current_time,
			
 
				+            "log_url": log_url,
			
 
				+            "version": "v4_unified_match",
			
 
				+            "model": MODEL_NAME,
			
 
				+            "说明": "v4版本: 使用单个prompt统一完成标签匹配和分类匹配"
			
 
				+        }
			
 
				+
			
 
				+        print(f"  保存: {output_file.name}")
			
 
				+        with open(output_file, "w", encoding="utf-8") as f:
			
 
				+            json.dump(task, f, ensure_ascii=False, indent=4)
			
 
				+
			
 
				+    print("\n完成!")
			
 
				+
			
 
				+    # 打印统计信息
			
 
				+    total_inspiration_points = sum(
			
 
				+        len(task["how解构结果"]["灵感点列表"])
			
 
				+        for task in updated_task_list
			
 
				+    )
			
 
				+    total_matches = sum(
			
 
				+        len(point["how步骤列表"][0]["匹配结果"])
			
 
				+        for task in updated_task_list
			
 
				+        for point in task["how解构结果"]["灵感点列表"]
			
 
				+    )
			
 
				+    print(f"\n统计:")
			
 
				+    print(f"  处理的帖子数: {len(updated_task_list)}")
			
 
				+    print(f"  处理的灵感点数: {total_inspiration_points}")
			
 
				+    print(f"  生成的匹配结果数: {total_matches}")
			
 
				+
			
 
				+    if log_url:
			
 
				+        print(f"\nTrace: {log_url}\n")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # 设置 trace
			
 
				+    current_time, log_url = set_trace()
			
 
				+
			
 
				+    # 使用 trace 上下文包裹整个执行流程
			
 
				+    with trace("灵感特征统一匹配 v4"):
			
 
				+        asyncio.run(main(current_time, log_url))
			
--- a/test_hierarchical_match.py
+++ b/test_hierarchical_match.py
@@ -0,0 +1,110 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+测试分层匹配模块
			
 
				+"""
			
 
				+
			
 
				+import asyncio
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+import sys
			
 
				+
			
 
				+project_root = Path(__file__).parent
			
 
				+sys.path.insert(0, str(project_root))
			
 
				+
			
 
				+from lib.hierarchical_match_analyzer import hierarchical_match
			
 
				+
			
 
				+
			
 
				+async def test_hierarchical_match():
			
 
				+    """测试分层匹配功能"""
			
 
				+
			
 
				+    print("=== 测试分层匹配模块 ===\n")
			
 
				+
			
 
				+    # 测试案例1: 标签匹配（应该匹配成功）
			
 
				+    print("测试案例1: 标签完全匹配")
			
 
				+    print("-" * 50)
			
 
				+
			
 
				+    current_features_1 = ["猫咪", "拟人化", "搞笑"]
			
 
				+    persona_combo_1 = [
			
 
				+        {"特征名称": "猫咪", "所属分类": ["宠物情感", "实质"]},
			
 
				+        {"特征名称": "被拿捏住的无奈感", "所属分类": ["宠物关系主导", "宠物情感", "实质"]}
			
 
				+    ]
			
 
				+
			
 
				+    print(f"当前特征: {current_features_1}")
			
 
				+    print(f"人设组合: {[f['特征名称'] for f in persona_combo_1]}")
			
 
				+    print("\n正在匹配...")
			
 
				+
			
 
				+    result_1 = await hierarchical_match(current_features_1, persona_combo_1)
			
 
				+
			
 
				+    print(f"\n结果:")
			
 
				+    print(f"  匹配层级: {result_1['匹配层级']}")
			
 
				+    print(f"  匹配结果: {result_1['匹配结果']}")
			
 
				+    print(f"  最终得分: {result_1['最终得分']}")
			
 
				+    print(f"  说明: {result_1['综合说明']}")
			
 
				+
			
 
				+
			
 
				+    # 测试案例2: 第一层分类匹配
			
 
				+    print("\n\n测试案例2: 第一层分类匹配")
			
 
				+    print("-" * 50)
			
 
				+
			
 
				+    current_features_2 = ["立冬", "教资查分", "时间巧合"]
			
 
				+    persona_combo_2 = [
			
 
				+        {"特征名称": "节后返工场景", "所属分类": ["职场状态", "实质"]},
			
 
				+        {"特征名称": "打工", "所属分类": ["职场状态", "实质"]}
			
 
				+    ]
			
 
				+
			
 
				+    print(f"当前特征: {current_features_2}")
			
 
				+    print(f"人设组合: {[f['特征名称'] for f in persona_combo_2]}")
			
 
				+    print(f"人设分类: {list(set(c for f in persona_combo_2 for c in f['所属分类']))}")
			
 
				+    print("\n正在匹配...")
			
 
				+
			
 
				+    result_2 = await hierarchical_match(current_features_2, persona_combo_2)
			
 
				+
			
 
				+    print(f"\n结果:")
			
 
				+    print(f"  匹配层级: {result_2['匹配层级']}")
			
 
				+    print(f"  匹配结果: {result_2['匹配结果']}")
			
 
				+    print(f"  最终得分: {result_2['最终得分']}")
			
 
				+    print(f"  说明: {result_2['综合说明']}")
			
 
				+
			
 
				+
			
 
				+    # 测试案例3: 第二层上位分类匹配
			
 
				+    print("\n\n测试案例3: 第二层上位分类匹配（完全不相关）")
			
 
				+    print("-" * 50)
			
 
				+
			
 
				+    current_features_3 = ["科技产品", "功能介绍"]
			
 
				+    persona_combo_3 = [
			
 
				+        {"特征名称": "猫孩子", "所属分类": ["宠物亲子化", "宠物情感", "实质"]},
			
 
				+        {"特征名称": "被拿捏住的无奈感", "所属分类": ["宠物关系主导", "宠物情感", "实质"]}
			
 
				+    ]
			
 
				+
			
 
				+    print(f"当前特征: {current_features_3}")
			
 
				+    print(f"人设组合: {[f['特征名称'] for f in persona_combo_3]}")
			
 
				+    print(f"人设分类: {list(set(c for f in persona_combo_3 for c in f['所属分类']))}")
			
 
				+    print("\n正在匹配...")
			
 
				+
			
 
				+    result_3 = await hierarchical_match(current_features_3, persona_combo_3)
			
 
				+
			
 
				+    print(f"\n结果:")
			
 
				+    print(f"  匹配层级: {result_3['匹配层级']}")
			
 
				+    print(f"  匹配结果: {result_3['匹配结果']}")
			
 
				+    print(f"  最终得分: {result_3['最终得分']}")
			
 
				+    print(f"  说明: {result_3['综合说明']}")
			
 
				+
			
 
				+
			
 
				+    # 保存详细结果
			
 
				+    print("\n\n=== 保存详细结果 ===")
			
 
				+    output_file = Path("test_hierarchical_match_results.json")
			
 
				+    results = {
			
 
				+        "测试案例1_标签匹配": result_1,
			
 
				+        "测试案例2_第一层分类匹配": result_2,
			
 
				+        "测试案例3_第二层上位分类匹配": result_3
			
 
				+    }
			
 
				+
			
 
				+    with open(output_file, "w", encoding="utf-8") as f:
			
 
				+        json.dump(results, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    print(f"详细结果已保存到: {output_file}")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    asyncio.run(test_hierarchical_match())
			
--- a/test_hierarchical_match_results.json
+++ b/test_hierarchical_match_results.json
@@ -0,0 +1,121 @@
 
				+{
			
 
				+  "测试案例1_标签匹配": {
			
 
				+    "最终得分": 1.0,
			
 
				+    "匹配层级": "标签匹配",
			
 
				+    "匹配结果": "猫咪",
			
 
				+    "分层结果": {
			
 
				+      "标签匹配": {
			
 
				+        "匹配成功": true,
			
 
				+        "匹配的特征": "猫咪",
			
 
				+        "得分": 1,
			
 
				+        "详细结果": {
			
 
				+          "匹配成功": true,
			
 
				+          "匹配对": [
			
 
				+            {
			
 
				+              "当前特征": "猫咪",
			
 
				+              "人设标签": "猫咪",
			
 
				+              "相似度": 100
			
 
				+            }
			
 
				+          ],
			
 
				+          "最佳匹配": {
			
 
				+            "当前特征": "猫咪",
			
 
				+            "人设标签": "猫咪",
			
 
				+            "相似度": 100
			
 
				+          },
			
 
				+          "说明": "当前特征列表中的'猫咪'与人设标签中的'猫咪'完全一致，相似度为100%。其他特征没有达到80%的相似度。"
			
 
				+        }
			
 
				+      }
			
 
				+    },
			
 
				+    "综合说明": "在标签层级找到完全匹配: 猫咪"
			
 
				+  },
			
 
				+  "测试案例2_第一层分类匹配": {
			
 
				+    "最终得分": 0,
			
 
				+    "匹配层级": "无匹配",
			
 
				+    "匹配结果": null,
			
 
				+    "分层结果": {
			
 
				+      "标签匹配": {
			
 
				+        "匹配成功": false,
			
 
				+        "匹配的特征": null,
			
 
				+        "得分": 0,
			
 
				+        "详细结果": {
			
 
				+          "匹配成功": false,
			
 
				+          "匹配对": [],
			
 
				+          "最佳匹配": null,
			
 
				+          "说明": "当前特征列表中的'立冬'、'教资查分'、'时间巧合'与人设特征标签中的'节后返工场景'、'打工'在语义上均不相似，没有达到80%的相似度阈值。"
			
 
				+        }
			
 
				+      },
			
 
				+      "第一层分类匹配": {
			
 
				+        "匹配成功": false,
			
 
				+        "匹配的分类": null,
			
 
				+        "推理难度得分": 0.2,
			
 
				+        "详细结果": {
			
 
				+          "匹配成功": false,
			
 
				+          "最佳分类": null,
			
 
				+          "推理难度": 8,
			
 
				+          "推理难度得分": 0.2,
			
 
				+          "推理路径": "立冬和教资查分是具体事件，时间巧合是事件的属性。这些事件本身与'职场状态'没有直接关联。虽然教资查分可能与未来的职场有关，但当前特征列表并未明确指向职场状态，而是更侧重于事件本身及其时间上的巧合。",
			
 
				+          "说明": "当前特征列表中的'立冬'和'教资查分'是两个独立的事件，'时间巧合'是描述这两个事件之间关系的一个属性。这些特征本身并未直接描述或暗示任何'职场状态'。虽然教资查分可能与未来的职业发展有关，但从当前特征来看，其核心关注点并非职场状态，而是事件本身及其巧合性。因此，与'职场状态'的关联性非常弱，推理难度高，不匹配。"
			
 
				+        }
			
 
				+      },
			
 
				+      "第二层上位分类匹配": {
			
 
				+        "匹配成功": false,
			
 
				+        "匹配的分类": null,
			
 
				+        "推理难度得分": 0.2,
			
 
				+        "详细结果": {
			
 
				+          "匹配成功": false,
			
 
				+          "最佳分类": null,
			
 
				+          "推理难度": 8,
			
 
				+          "推理难度得分": 0.2,
			
 
				+          "推理路径": "当前特征列表中的'立冬'、'教资查分'、'时间巧合'描述的是具体的事件或现象，它们本身不直接指向事物的内在本质或核心内容。'实质'分类通常指事物的本质、核心、内在属性。这些特征更多是表象或时间上的关联，而非对事物本质的探讨。",
			
 
				+          "说明": "当前特征列表描述的是具体的事件和它们之间可能存在的巧合，这些都属于现象层面的描述。而'实质'是指事物的内在本质或核心内容。两者之间没有直接的语义关联，需要进行较复杂的抽象和概念转换才能尝试关联，因此推理难度较高，且不匹配。"
			
 
				+        }
			
 
				+      }
			
 
				+    },
			
 
				+    "综合说明": "在所有层级都未找到合适的匹配"
			
 
				+  },
			
 
				+  "测试案例3_第二层上位分类匹配": {
			
 
				+    "最终得分": 0,
			
 
				+    "匹配层级": "无匹配",
			
 
				+    "匹配结果": null,
			
 
				+    "分层结果": {
			
 
				+      "标签匹配": {
			
 
				+        "匹配成功": false,
			
 
				+        "匹配的特征": null,
			
 
				+        "得分": 0,
			
 
				+        "详细结果": {
			
 
				+          "匹配成功": false,
			
 
				+          "匹配对": [],
			
 
				+          "最佳匹配": null,
			
 
				+          "说明": "当前特征列表中的“科技产品”和“功能介绍”与人设特征标签中的“猫孩子”和“被拿捏住的无奈感”在语义上没有相似或高度接近的匹配项。它们描述的是完全不同的概念范畴。"
			
 
				+        }
			
 
				+      },
			
 
				+      "第一层分类匹配": {
			
 
				+        "匹配成功": false,
			
 
				+        "匹配的分类": null,
			
 
				+        "推理难度得分": 0.0,
			
 
				+        "详细结果": {
			
 
				+          "匹配成功": false,
			
 
				+          "最佳分类": null,
			
 
				+          "推理难度": 10,
			
 
				+          "推理难度得分": 0.0,
			
 
				+          "推理路径": "当前特征'科技产品, 功能介绍'与候选分类'宠物情感, 宠物关系主导, 宠物亲子化'以及人设组合特征'猫孩子, 被拿捏住的无奈感'在语义上完全不相关。前者属于科技产品领域，后者属于宠物情感和人设领域。",
			
 
				+          "说明": "当前特征描述的是科技产品及其功能，而候选分类和人设组合特征都围绕宠物情感和关系展开，两者之间不存在任何语义上的关联，因此无法匹配。"
			
 
				+        }
			
 
				+      },
			
 
				+      "第二层上位分类匹配": {
			
 
				+        "匹配成功": false,
			
 
				+        "匹配的分类": null,
			
 
				+        "推理难度得分": 0.3,
			
 
				+        "详细结果": {
			
 
				+          "匹配成功": false,
			
 
				+          "最佳分类": null,
			
 
				+          "推理难度": 7,
			
 
				+          "推理难度得分": 0.3,
			
 
				+          "推理路径": "科技产品和功能介绍是具体的事物和其属性，而“实质”是一个抽象的概念，指事物的内在本质。从具体的事物和属性推断到其内在本质，需要进行抽象和概括，但“科技产品, 功能介绍”本身并未直接指向“实质”这一概念，关联性较弱。",
			
 
				+          "说明": "“科技产品”和“功能介绍”是具体的事物和其表面的描述，而“实质”是指事物的内在本质或核心。这两者之间没有直接的语义关联，需要进行较复杂的抽象和推断才能建立联系，且这种联系并不强。因此，不认为匹配成功。"
			
 
				+        }
			
 
				+      }
			
 
				+    },
			
 
				+    "综合说明": "在所有层级都未找到合适的匹配"
			
 
				+  }
			
 
				+}