yangxiaohui há 2 semanas atrás
pai
commit
929d439ca4
3 ficheiros alterados com 40 adições e 28 exclusões
  1. 13 5
      extract_inspirations.py
  2. 9 8
      lib/data_loader.py
  3. 18 15
      step2_incremental_match.py

+ 13 - 5
extract_inspirations.py

@@ -86,16 +86,24 @@ def extract_inspirations_from_folder(folder_path: str) -> List[dict]:
 
 
 def save_inspirations(inspirations: List[dict], output_dir: str):
-    """保存灵感点列表
+    """保存灵感点列表(输出两个文件)
 
     Args:
         inspirations: 灵感点列表(包含 灵感点 和 meta 字段)
         output_dir: 输出目录
     """
-    output_file = os.path.join(output_dir, "灵感点.json")
-    with open(output_file, 'w', encoding='utf-8') as f:
+    # 1. 保存详细版本(包含 meta 信息)
+    detailed_file = os.path.join(output_dir, "灵感点_详细.json")
+    with open(detailed_file, 'w', encoding='utf-8') as f:
         json.dump(inspirations, f, ensure_ascii=False, indent=2)
-    print(f"\n✓ 灵感点列表已保存到: {output_file}")
+    print(f"\n✓ 详细灵感点列表已保存到: {detailed_file}")
+
+    # 2. 保存简化版本(仅灵感点名称列表)
+    simple_list = [item["灵感点"] for item in inspirations]
+    simple_file = os.path.join(output_dir, "灵感点.json")
+    with open(simple_file, 'w', encoding='utf-8') as f:
+        json.dump(simple_list, f, ensure_ascii=False, indent=2)
+    print(f"✓ 简化灵感点列表已保存到: {simple_file}")
 
 
 def main():
@@ -106,7 +114,7 @@ def main():
     if len(sys.argv) > 1:
         what_folder = sys.argv[1]
     else:
-        what_folder = "data/阿里多多酱/out/人设_v2/what解构结果"
+        what_folder = "data/阿里多多酱/out/人设_1110/what解构结果"
 
     print(f"{'=' * 80}")
     print(f"从 what 解构结果中提取灵感点")

+ 9 - 8
lib/data_loader.py

@@ -31,7 +31,7 @@ def load_persona_data(persona_dir: str) -> dict:
 
 
 def load_inspiration_list(persona_dir: str) -> List[str]:
-    """加载灵感点列表
+    """加载灵感点列表(简化版本,仅包含名称)
 
     Args:
         persona_dir: 人设目录路径
@@ -44,11 +44,12 @@ def load_inspiration_list(persona_dir: str) -> List[str]:
     """
     inspiration_list_path = os.path.join(persona_dir, "灵感点.json")
     try:
-        inspiration_data = read_json(inspiration_list_path)
-        if not isinstance(inspiration_data, list) or len(inspiration_data) == 0:
+        inspiration_list = read_json(inspiration_list_path)
+        if not isinstance(inspiration_list, list) or len(inspiration_list) == 0:
             print(f"❌ 灵感文件格式错误或为空: {inspiration_list_path}")
             sys.exit(1)
-        return [item["灵感点"] for item in inspiration_data]
+        # 直接返回字符串列表(简化版本)
+        return inspiration_list
     except FileNotFoundError:
         print(f"❌ 找不到灵感文件: {inspiration_list_path}")
         print("请先运行 extract_inspirations.py 生成灵感点文件")
@@ -67,15 +68,15 @@ def load_inspiration_data(persona_dir: str) -> List[dict]:
     Raises:
         SystemExit: 文件不存在或格式错误时退出
     """
-    inspiration_list_path = os.path.join(persona_dir, "灵感点.json")
+    inspiration_detail_path = os.path.join(persona_dir, "灵感点_详细.json")
     try:
-        inspiration_data = read_json(inspiration_list_path)
+        inspiration_data = read_json(inspiration_detail_path)
         if not isinstance(inspiration_data, list) or len(inspiration_data) == 0:
-            print(f"❌ 灵感文件格式错误或为空: {inspiration_list_path}")
+            print(f"❌ 灵感详细文件格式错误或为空: {inspiration_detail_path}")
             sys.exit(1)
         return inspiration_data
     except FileNotFoundError:
-        print(f"❌ 找不到灵感文件: {inspiration_list_path}")
+        print(f"❌ 找不到灵感详细文件: {inspiration_detail_path}")
         print("请先运行 extract_inspirations.py 生成灵感点文件")
         sys.exit(1)
 

+ 18 - 15
step2_incremental_match.py

@@ -12,8 +12,8 @@ from pathlib import Path
 from agents import trace
 from agents.tracing.create import custom_span
 from lib.my_trace import set_trace_smith as set_trace
-from lib.match_analyzer import match_batch
-from lib.data_loader import load_persona_data, load_inspiration_data, select_inspiration
+from lib.match_analyzer import match_single
+from lib.data_loader import load_persona_data, load_inspiration_list, select_inspiration
 
 # 模型配置
 MODEL_NAME = "google/gemini-2.5-pro"
@@ -135,19 +135,27 @@ async def process_step2_incremental_match(
             },
             "灵感": step1_inspiration,
             "输入信息": {
-                "B": [],
+                "B": "",  # 空字符串
                 "A": persona_system_text,
                 "B_Context": b_context,  # 使用统一构造的 context
                 "A_Context": ""
             },
             "step1_结果": step1_top1,
-            "匹配结果": []
+            "匹配结果": {
+                "score": 0.0,
+                "score说明": "无增量词",
+                "相同部分": {},
+                "增量部分": {}
+            }
         }
 
     print(f"\n开始增量词匹配分析: {step1_inspiration}")
     print(f"匹配要素: {matched_element}")
     print(f"增量词数量: {len(incremental_words)}, 模型: {MODEL_NAME}\n")
 
+    # 将增量词列表拼接成一个字符串(用换行符分隔)
+    b_content = "\n".join(incremental_words)
+
     # 使用 custom_span 标识整个流程
     with custom_span(
         name=f"Step2: 增量词匹配 - {step1_inspiration}",
@@ -159,18 +167,14 @@ async def process_step2_incremental_match(
             "步骤": "增量词在人设中的匹配分析"
         }
     ):
-        # 调用通用批量匹配模块
-        match_results = await match_batch(
-            b_items=incremental_words,
+        # 调用通用匹配模块(单次调用)
+        match_result = await match_single(
+            b_content=b_content,
             a_content=persona_system_text,
             model_name=MODEL_NAME,
             b_context=b_context
         )
 
-    # 按 score 降序排序
-    if isinstance(match_results, list):
-        match_results.sort(key=lambda x: x.get('score', 0), reverse=True)
-
     # 构建输出(使用统一构造的变量)
     return {
         "元数据": {
@@ -181,12 +185,12 @@ async def process_step2_incremental_match(
         },
         "灵感": step1_inspiration,
         "输入信息": {
-            "B": incremental_words,
+            "B": b_content,  # 拼接后的字符串
             "A": persona_system_text,
             "B_Context": b_context,  # 使用统一构造的 context
             "A_Context": ""
         },
-        "匹配结果": match_results,
+        "匹配结果": match_result,  # 单个匹配结果对象
         "step1_结果": step1_top1,
     }
 
@@ -205,8 +209,7 @@ async def main(current_time: str, log_url: str):
 
     # 加载数据
     persona_data = load_persona_data(persona_dir)
-    inspiration_data = load_inspiration_data(persona_dir)
-    inspiration_list = [item["灵感点"] for item in inspiration_data]
+    inspiration_list = load_inspiration_list(persona_dir)
     test_inspiration = select_inspiration(inspiration_arg, inspiration_list)
 
     # 查找并加载 step1 结果