فهرست منبع

feat: 添加搜索并匹配模式到主流程

新增功能:
- 添加 --search-and-match 参数支持搜索并匹配模式
- 集成 Step4 搜索结果匹配到主流程
- 自动跳过已有 Step4 结果(除非使用 --force)
- 显示匹配统计(高匹配数量和 Top1 分数)
- 自动按 Step1 分数降序处理灵感

技术实现:
- 导入 step4_search_result_match 模块
- 在搜索后执行 Step4 匹配分析
- 检查输出文件避免重复执行
- 修复 original_argv 未定义的 bug
- 添加 search_and_match 参数互斥检查

使用示例:
  # 搜索并匹配模式,处理前10个高分灵感
  python run_inspiration_analysis.py --search-and-match --count 10

  # 强制重新执行
  python run_inspiration_analysis.py --search-and-match --count 1 --force

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
yangxiaohui 2 هفته پیش
والد
کامیت
0d29060c3f
1فایلهای تغییر یافته به همراه118 افزوده شده و 20 حذف شده
  1. 118 20
      run_inspiration_analysis.py

+ 118 - 20
run_inspiration_analysis.py

@@ -19,9 +19,10 @@ from lib.my_trace import set_trace_smith as set_trace
 from lib.data_loader import load_inspiration_list, select_inspiration
 from lib.utils import read_json
 
-# 导入 step1 和 step2 的 main 函数
+# 导入 step1, step2 和 step4 的 main 函数
 import step1_inspiration_match
 import step2_incremental_match
+import step4_search_result_match
 
 # 导入搜索功能
 from script.search import search_xiaohongshu
@@ -156,9 +157,10 @@ async def run_full_analysis(
     current_time: str = None,
     log_url: str = None,
     enable_step2: bool = False,
-    search_only: bool = False
+    search_only: bool = False,
+    search_and_match: bool = False
 ) -> dict:
-    """执行完整的灵感分析流程(Step1 + 搜索 + Step2)
+    """执行完整的灵感分析流程(Step1 + 搜索 + Step4匹配 + Step2)
 
     Args:
         persona_dir: 人设目录路径
@@ -169,22 +171,26 @@ async def run_full_analysis(
         log_url: 日志链接
         enable_step2: 是否执行 Step2(默认 False)
         search_only: 是否只执行搜索(跳过 Step1 和 Step2,默认 False)
+        search_and_match: 是否搜索并匹配模式(跳过 Step1 和 Step2,执行搜索和 Step4,默认 False)
 
     Returns:
         包含文件路径和状态的字典
     """
     print(f"\n{'=' * 80}")
-    print(f"开始{'仅搜索' if search_only else '完整分析'}流程: {inspiration}")
+    mode_desc = "仅搜索" if search_only else ("搜索并匹配" if search_and_match else "完整分析")
+    print(f"开始{mode_desc}流程: {inspiration}")
     print(f"{'=' * 80}\n")
 
+    # 保存原始 sys.argv
+    original_argv = sys.argv.copy()
+
     # ========== Step1: 灵感与人设匹配 ==========
-    if not search_only:
+    if not search_only and not search_and_match:
         print(f"{'─' * 80}")
         print(f"Step1: 灵感与人设匹配")
         print(f"{'─' * 80}\n")
 
         # 临时修改 sys.argv 来传递参数给 step1
-        original_argv = sys.argv.copy()
         sys.argv = [
             "step1_inspiration_match.py",
             persona_dir,
@@ -204,7 +210,8 @@ async def run_full_analysis(
         print(f"✓ Step1 完成,结果文件: {step1_file}\n")
     else:
         print(f"{'─' * 80}")
-        print(f"Step1: 跳过(仅搜索模式)")
+        mode_label = "搜索并匹配模式" if search_and_match else "仅搜索模式"
+        print(f"Step1: 跳过({mode_label})")
         print(f"{'─' * 80}\n")
 
         # 查找已有的 step1 输出文件
@@ -216,6 +223,7 @@ async def run_full_analysis(
             return {
                 "step1_file": None,
                 "search_file": None,
+                "step4_file": None,
                 "step2_file": None,
                 "summary_file": None,
                 "status": "step1_not_found"
@@ -272,12 +280,79 @@ async def run_full_analysis(
         search_file = None
         search_notes_count = 0
 
+    # ========== Step4: 搜索结果匹配 ==========
+    step4_file = None
+    step4_high_score_count = None
+    step4_top1_score = None
+
+    if search_and_match and 'search_file' in locals() and search_file:
+        from pathlib import Path
+
+        # 检查 step4 输出文件是否已存在
+        step4_dir = os.path.join(persona_dir, "how", "灵感点", inspiration, "search")
+        scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
+        step4_pattern = f"{scope_prefix}_step4_*.json"
+        step4_files = list(Path(step4_dir).glob(step4_pattern)) if os.path.exists(step4_dir) else []
+
+        step4_exists = len(step4_files) > 0
+
+        if step4_exists and not force:
+            print(f"\n{'─' * 80}")
+            print(f"Step4: 已跳过(结果文件已存在)")
+            print(f"{'─' * 80}\n")
+
+            step4_file = str(step4_files[0])
+            print(f"✓ 找到已有 Step4 结果: {step4_file}\n")
+
+            # 读取已有结果
+            step4_data = read_json(step4_file)
+            step4_results = step4_data.get("匹配结果列表", [])
+            step4_high_score_count = sum(1 for r in step4_results if r.get("匹配结果", {}).get("score", 0) >= 0.7)
+            step4_top1_score = step4_results[0].get("匹配结果", {}).get("score", 0) if step4_results else 0
+        else:
+            print(f"\n{'─' * 80}")
+            print(f"Step4: 搜索结果与灵感匹配")
+            print(f"{'─' * 80}\n")
+
+            # 临时修改 sys.argv 来传递参数给 step4
+            sys.argv = [
+                "step4_search_result_match.py",
+                persona_dir,
+                inspiration
+            ]
+            if max_tasks is not None:
+                sys.argv.append(str(max_tasks))
+
+            try:
+                # 调用 step4 的 main 函数
+                await step4_search_result_match.main(current_time, log_url, force=force)
+            finally:
+                # 恢复原始参数
+                sys.argv = original_argv
+
+            # 查找 step4 输出文件
+            step4_files = list(Path(step4_dir).glob(step4_pattern))
+
+            if step4_files:
+                step4_file = str(step4_files[0])
+                print(f"✓ Step4 完成,结果文件: {step4_file}\n")
+
+                # 读取 step4 结果
+                step4_data = read_json(step4_file)
+                step4_results = step4_data.get("匹配结果列表", [])
+                step4_high_score_count = sum(1 for r in step4_results if r.get("匹配结果", {}).get("score", 0) >= 0.7)
+                step4_top1_score = step4_results[0].get("匹配结果", {}).get("score", 0) if step4_results else 0
+    elif search_and_match:
+        print(f"\n{'─' * 80}")
+        print(f"Step4: 已跳过(搜索失败)")
+        print(f"{'─' * 80}\n")
+
     # ========== Step2: 增量词匹配 ==========
     step2_file = None
     step2_score = None
     step2_word_count = None
 
-    if enable_step2 and not search_only:
+    if enable_step2 and not search_only and not search_and_match:
         print(f"\n{'─' * 80}")
         print(f"Step2: 增量词在人设中的匹配")
         print(f"{'─' * 80}\n")
@@ -305,14 +380,14 @@ async def run_full_analysis(
         step2_score = step2_data.get("匹配结果", {}).get("score", 0)
         step2_b_content = step2_data.get("输入信息", {}).get("B", "")
         step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
-    elif not search_only:
+    elif not search_only and not search_and_match:
         print(f"\n{'─' * 80}")
         print(f"Step2: 已跳过(使用 --enable-step2 启用)")
         print(f"{'─' * 80}\n")
 
     # ========== 保存流程汇总 ==========
-    # search_only 模式不保存汇总文件
-    if not search_only:
+    # search_only 和 search_and_match 模式不保存汇总文件
+    if not search_only and not search_and_match:
         output_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
         scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
 
@@ -358,13 +433,18 @@ async def run_full_analysis(
         summary_file = None
 
     print(f"{'=' * 80}")
-    print(f"{'仅搜索' if search_only else '完整流程'}执行完成")
+    mode_desc = "仅搜索" if search_only else ("搜索并匹配" if search_and_match else "完整流程")
+    print(f"{mode_desc}执行完成")
     print(f"{'=' * 80}")
     print(f"\n结果文件:")
-    if not search_only:
+    if not search_only and not search_and_match:
         print(f"  Step1:  {step1_file}")
     if 'search_file' in locals() and search_file:
         print(f"  搜索:   {search_file}")
+    if step4_file:
+        print(f"  Step4:  {step4_file}")
+        if step4_high_score_count is not None:
+            print(f"          (高匹配: {step4_high_score_count} 个, Top1 score: {step4_top1_score:.2f})")
     if enable_step2 and step2_file:
         print(f"  Step2:  {step2_file}")
     if summary_file:
@@ -372,8 +452,9 @@ async def run_full_analysis(
     print()
 
     return {
-        "step1_file": step1_file if not search_only else None,
+        "step1_file": step1_file if not search_only and not search_and_match else None,
         "search_file": search_file if 'search_file' in locals() else None,
+        "step4_file": step4_file,
         "step2_file": step2_file,
         "summary_file": summary_file,
         "status": "success"
@@ -403,6 +484,9 @@ async def main():
   # 仅搜索模式:基于已有 Step1 结果,按分数降序搜索前10个
   python run_inspiration_analysis.py --search-only --count 10
 
+  # 搜索并匹配模式:基于已有 Step1 结果,执行搜索和 Step4 匹配
+  python run_inspiration_analysis.py --search-and-match --count 10
+
   # 处理所有灵感,强制重新执行
   python run_inspiration_analysis.py --count all --force
 
@@ -460,6 +544,12 @@ async def main():
         help="仅执行搜索(跳过 Step1 和 Step2,基于已有 Step1 结果,自动按分数降序)"
     )
 
+    parser.add_argument(
+        "--search-and-match",
+        action="store_true",
+        help="搜索并匹配模式(跳过 Step1 和 Step2,执行搜索和 Step4 匹配,自动按分数降序)"
+    )
+
     args = parser.parse_args()
 
     persona_dir = args.dir
@@ -468,9 +558,15 @@ async def main():
     sort_by_score = args.sort_by_score
     enable_step2 = args.enable_step2
     search_only = args.search_only
+    search_and_match = args.search_and_match
 
-    # search_only 模式自动启用分数排序
-    if search_only:
+    # 互斥检查
+    if search_only and search_and_match:
+        print("❌ 错误: --search-only 和 --search-and-match 不能同时使用")
+        sys.exit(1)
+
+    # search_only 和 search_and_match 模式自动启用分数排序
+    if search_only or search_and_match:
         sort_by_score = True
         enable_step2 = False  # 搜索模式下强制禁用 step2
         if shuffle:
@@ -550,8 +646,8 @@ async def main():
         print(f"处理第 {i}/{len(inspirations_to_process)} 个灵感: {inspiration}")
         print(f"{'#' * 80}")
 
-        # search_only 模式不创建 trace
-        if search_only:
+        # search_only 和 search_and_match 模式不创建 trace
+        if search_only or search_and_match:
             result = await run_full_analysis(
                 persona_dir=persona_dir,
                 inspiration=inspiration,
@@ -560,7 +656,8 @@ async def main():
                 current_time=None,
                 log_url=None,
                 enable_step2=enable_step2,
-                search_only=search_only
+                search_only=search_only,
+                search_and_match=search_and_match
             )
         else:
             # 为每个灵感创建独立的 trace
@@ -575,7 +672,8 @@ async def main():
                     current_time=insp_time,
                     log_url=insp_log_url,
                     enable_step2=enable_step2,
-                    search_only=search_only
+                    search_only=search_only,
+                    search_and_match=search_and_match
                 )
 
             if insp_log_url: