Forráskód Böngészése

feat: 添加仅搜索模式和搜索关键词文件名修复

新增功能:
- 添加 --search-only 参数,支持基于已有 Step1 结果仅执行搜索
- search-only 模式自动按分数降序排序
- search-only 模式跳过 Step1 和 Step2,不创建 trace,不保存汇总文件
- 轻量级输出,仅显示搜索结果文件路径

Bug 修复:
- 修复搜索关键词中包含特殊字符(/, \, :)导致文件保存失败的问题
- 文件名中的非法字符自动替换为下划线

使用示例:
- python run_inspiration_analysis.py --search-only --count 10

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
yangxiaohui 2 hete
szülő
commit
ddae681f99
1 módosított fájl, 159 hozzáadás és 90 törlés
  1. 159 90
      run_inspiration_analysis.py

+ 159 - 90
run_inspiration_analysis.py

@@ -155,7 +155,8 @@ async def run_full_analysis(
     force: bool = False,
     current_time: str = None,
     log_url: str = None,
-    enable_step2: bool = False
+    enable_step2: bool = False,
+    search_only: bool = False
 ) -> dict:
     """执行完整的灵感分析流程(Step1 + 搜索 + Step2)
 
@@ -167,38 +168,58 @@ async def run_full_analysis(
         current_time: 当前时间戳
         log_url: 日志链接
         enable_step2: 是否执行 Step2(默认 False)
+        search_only: 是否只执行搜索(跳过 Step1 和 Step2,默认 False)
 
     Returns:
         包含文件路径和状态的字典
     """
     print(f"\n{'=' * 80}")
-    print(f"开始完整分析流程: {inspiration}")
+    print(f"开始{'仅搜索' if search_only else '完整分析'}流程: {inspiration}")
     print(f"{'=' * 80}\n")
 
     # ========== Step1: 灵感与人设匹配 ==========
-    print(f"{'─' * 80}")
-    print(f"Step1: 灵感与人设匹配")
-    print(f"{'─' * 80}\n")
+    if not search_only:
+        print(f"{'─' * 80}")
+        print(f"Step1: 灵感与人设匹配")
+        print(f"{'─' * 80}\n")
 
-    # 临时修改 sys.argv 来传递参数给 step1
-    original_argv = sys.argv.copy()
-    sys.argv = [
-        "step1_inspiration_match.py",
-        persona_dir,
-        inspiration,
-        str(max_tasks) if max_tasks is not None else "all"
-    ]
+        # 临时修改 sys.argv 来传递参数给 step1
+        original_argv = sys.argv.copy()
+        sys.argv = [
+            "step1_inspiration_match.py",
+            persona_dir,
+            inspiration,
+            str(max_tasks) if max_tasks is not None else "all"
+        ]
 
-    try:
-        # 调用 step1 的 main 函数(通过参数传递 force)
-        await step1_inspiration_match.main(current_time, log_url, force=force)
-    finally:
-        # 恢复原始参数
-        sys.argv = original_argv
+        try:
+            # 调用 step1 的 main 函数(通过参数传递 force)
+            await step1_inspiration_match.main(current_time, log_url, force=force)
+        finally:
+            # 恢复原始参数
+            sys.argv = original_argv
+
+        # 查找 step1 输出文件
+        step1_file = find_step1_output(persona_dir, inspiration, max_tasks)
+        print(f"✓ Step1 完成,结果文件: {step1_file}\n")
+    else:
+        print(f"{'─' * 80}")
+        print(f"Step1: 跳过(仅搜索模式)")
+        print(f"{'─' * 80}\n")
 
-    # 查找 step1 输出文件
-    step1_file = find_step1_output(persona_dir, inspiration, max_tasks)
-    print(f"✓ Step1 完成,结果文件: {step1_file}\n")
+        # 查找已有的 step1 输出文件
+        try:
+            step1_file = find_step1_output(persona_dir, inspiration, max_tasks)
+            print(f"✓ 找到已有 Step1 结果: {step1_file}\n")
+        except FileNotFoundError as e:
+            print(f"⚠️  {e}")
+            return {
+                "step1_file": None,
+                "search_file": None,
+                "step2_file": None,
+                "summary_file": None,
+                "status": "step1_not_found"
+            }
 
     # 读取 step1 结果
     step1_data = read_json(step1_file)
@@ -236,7 +257,9 @@ async def run_full_analysis(
         os.makedirs(search_dir, exist_ok=True)
 
         scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
-        search_filename = f"{scope_prefix}_search_{search_keyword[:20]}.json"  # 截取关键词前20字符避免文件名过长
+        # 清理文件名中的非法字符
+        safe_keyword = search_keyword[:20].replace('/', '_').replace('\\', '_').replace(':', '_')
+        search_filename = f"{scope_prefix}_search_{safe_keyword}.json"
         search_file = os.path.join(search_dir, search_filename)
 
         with open(search_file, 'w', encoding='utf-8') as f:
@@ -254,7 +277,7 @@ async def run_full_analysis(
     step2_score = None
     step2_word_count = None
 
-    if enable_step2:
+    if enable_step2 and not search_only:
         print(f"\n{'─' * 80}")
         print(f"Step2: 增量词在人设中的匹配")
         print(f"{'─' * 80}\n")
@@ -282,67 +305,74 @@ async def run_full_analysis(
         step2_score = step2_data.get("匹配结果", {}).get("score", 0)
         step2_b_content = step2_data.get("输入信息", {}).get("B", "")
         step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
-    else:
+    elif not search_only:
         print(f"\n{'─' * 80}")
         print(f"Step2: 已跳过(使用 --enable-step2 启用)")
         print(f"{'─' * 80}\n")
 
     # ========== 保存流程汇总 ==========
-    output_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
-    scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
+    # search_only 模式不保存汇总文件
+    if not search_only:
+        output_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
+        scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
 
-    # 从 step1 文件名提取模型名称
-    step1_filename = os.path.basename(step1_file)
-    model_short = step1_filename.split("_")[-1].replace(".json", "")
-
-    summary_filename = f"{scope_prefix}_summary_完整流程_{model_short}.json"
-    summary_file = os.path.join(output_dir, summary_filename)
-
-    # 构建流程描述
-    workflow = "Step1 + 搜索"
-    if enable_step2:
-        workflow += " + Step2"
-
-    summary = {
-        "元数据": {
-            "current_time": current_time,
-            "log_url": log_url,
-            "流程": workflow,
-            "step1_model": step1_data.get("元数据", {}).get("model", ""),
-            "step2_model": step2_data.get("元数据", {}).get("model", "") if enable_step2 and 'step2_data' in locals() else None
-        },
-        "灵感": inspiration,
-        "文件路径": {
-            "step1": step1_file,
-            "search": search_file if 'search_file' in locals() else None,
-            "step2": step2_file
-        },
-        "关键指标": {
-            "step1_top1_score": step1_score,
-            "step1_top1_匹配要素": step1_element,
-            "search_keyword": search_keyword if 'search_keyword' in locals() else None,
-            "search_notes_count": search_notes_count if 'search_notes_count' in locals() else 0,
-            "step2_增量词数量": step2_word_count,
-            "step2_score": step2_score
+        # 从 step1 文件名提取模型名称
+        step1_filename = os.path.basename(step1_file)
+        model_short = step1_filename.split("_")[-1].replace(".json", "")
+
+        summary_filename = f"{scope_prefix}_summary_完整流程_{model_short}.json"
+        summary_file = os.path.join(output_dir, summary_filename)
+
+        # 构建流程描述
+        workflow = "Step1 + 搜索"
+        if enable_step2:
+            workflow += " + Step2"
+
+        summary = {
+            "元数据": {
+                "current_time": current_time,
+                "log_url": log_url,
+                "流程": workflow,
+                "step1_model": step1_data.get("元数据", {}).get("model", ""),
+                "step2_model": step2_data.get("元数据", {}).get("model", "") if enable_step2 and 'step2_data' in locals() else None
+            },
+            "灵感": inspiration,
+            "文件路径": {
+                "step1": step1_file,
+                "search": search_file if 'search_file' in locals() else None,
+                "step2": step2_file
+            },
+            "关键指标": {
+                "step1_top1_score": step1_score,
+                "step1_top1_匹配要素": step1_element,
+                "search_keyword": search_keyword if 'search_keyword' in locals() else None,
+                "search_notes_count": search_notes_count if 'search_notes_count' in locals() else 0,
+                "step2_增量词数量": step2_word_count,
+                "step2_score": step2_score
+            }
         }
-    }
 
-    with open(summary_file, 'w', encoding='utf-8') as f:
-        json.dump(summary, f, ensure_ascii=False, indent=2)
+        with open(summary_file, 'w', encoding='utf-8') as f:
+            json.dump(summary, f, ensure_ascii=False, indent=2)
+    else:
+        summary_file = None
 
     print(f"{'=' * 80}")
-    print(f"完整流程执行完成")
+    print(f"{'仅搜索' if search_only else '完整流程'}执行完成")
     print(f"{'=' * 80}")
     print(f"\n结果文件:")
-    print(f"  Step1:  {step1_file}")
+    if not search_only:
+        print(f"  Step1:  {step1_file}")
     if 'search_file' in locals() and search_file:
         print(f"  搜索:   {search_file}")
     if enable_step2 and step2_file:
         print(f"  Step2:  {step2_file}")
-    print(f"  汇总:   {summary_file}\n")
+    if summary_file:
+        print(f"  汇总:   {summary_file}")
+    print()
 
     return {
-        "step1_file": step1_file,
+        "step1_file": step1_file if not search_only else None,
         "search_file": search_file if 'search_file' in locals() else None,
         "step2_file": step2_file,
         "summary_file": summary_file,
@@ -370,6 +400,9 @@ async def main():
   # 按 Step1 分数排序,处理前10个高分灵感
   python run_inspiration_analysis.py --count 10 --sort-by-score
 
+  # 仅搜索模式:基于已有 Step1 结果,按分数降序搜索前10个
+  python run_inspiration_analysis.py --search-only --count 10
+
   # 处理所有灵感,强制重新执行
   python run_inspiration_analysis.py --count all --force
 
@@ -421,6 +454,12 @@ async def main():
         help="启用 Step2 增量词匹配(默认关闭)"
     )
 
+    parser.add_argument(
+        "--search-only",
+        action="store_true",
+        help="仅执行搜索(跳过 Step1 和 Step2,基于已有 Step1 结果,自动按分数降序)"
+    )
+
     args = parser.parse_args()
 
     persona_dir = args.dir
@@ -428,14 +467,26 @@ async def main():
     shuffle = args.shuffle
     sort_by_score = args.sort_by_score
     enable_step2 = args.enable_step2
+    search_only = args.search_only
+
+    # search_only 模式自动启用分数排序
+    if search_only:
+        sort_by_score = True
+        enable_step2 = False  # 搜索模式下强制禁用 step2
+        if shuffle:
+            print("⚠️  警告: --search-only 模式会自动按分数排序,忽略 --shuffle 参数")
+            shuffle = False
 
     # 处理 max_tasks
     max_tasks = None if args.max_tasks == "all" else int(args.max_tasks)
 
     # 动态流程名称
-    workflow_name = "Step1 + 搜索"
-    if enable_step2:
-        workflow_name += " + Step2"
+    if search_only:
+        workflow_name = "仅搜索"
+    else:
+        workflow_name = "Step1 + 搜索"
+        if enable_step2:
+            workflow_name += " + Step2"
 
     print(f"{'=' * 80}")
     print(f"灵感分析主流程 ({workflow_name})")
@@ -456,19 +507,23 @@ async def main():
     if max_tasks:
         print(f"Step1 任务数限制: {max_tasks}")
 
-    if force:
-        print(f"强制模式: 重新执行所有步骤")
+    if search_only:
+        print(f"搜索模式: 仅搜索(跳过 Step1 和 Step2)")
+        print(f"分数排序: 根据已有 Step1 结果按分数降序处理")
+    else:
+        if force:
+            print(f"强制模式: 重新执行所有步骤")
 
-    if shuffle:
-        print(f"随机模式: 随机选择灵感")
+        if shuffle:
+            print(f"随机模式: 随机选择灵感")
 
-    if sort_by_score:
-        print(f"分数排序: 根据 Step1 结果按分数降序处理")
+        if sort_by_score:
+            print(f"分数排序: 根据 Step1 结果按分数降序处理")
 
-    if enable_step2:
-        print(f"Step2: 启用增量词匹配")
-    else:
-        print(f"Step2: 已关闭(使用 --enable-step2 启用)")
+        if enable_step2:
+            print(f"Step2: 启用增量词匹配")
+        else:
+            print(f"Step2: 已关闭(使用 --enable-step2 启用)")
 
     # 选择要处理的灵感列表
     if sort_by_score:
@@ -495,25 +550,39 @@ async def main():
         print(f"处理第 {i}/{len(inspirations_to_process)} 个灵感: {inspiration}")
         print(f"{'#' * 80}")
 
-        # 为每个灵感创建独立的 trace
-        insp_time, insp_log_url = set_trace()
-
-        with trace(f"灵感分析: {inspiration}"):
+        # search_only 模式不创建 trace
+        if search_only:
             result = await run_full_analysis(
                 persona_dir=persona_dir,
                 inspiration=inspiration,
                 max_tasks=max_tasks,
                 force=force,
-                current_time=insp_time,
-                log_url=insp_log_url,
-                enable_step2=enable_step2
+                current_time=None,
+                log_url=None,
+                enable_step2=enable_step2,
+                search_only=search_only
             )
+        else:
+            # 为每个灵感创建独立的 trace
+            insp_time, insp_log_url = set_trace()
+
+            with trace(f"灵感分析: {inspiration}"):
+                result = await run_full_analysis(
+                    persona_dir=persona_dir,
+                    inspiration=inspiration,
+                    max_tasks=max_tasks,
+                    force=force,
+                    current_time=insp_time,
+                    log_url=insp_log_url,
+                    enable_step2=enable_step2,
+                    search_only=search_only
+                )
+
+            if insp_log_url:
+                print(f"本次 Trace: {insp_log_url}")
 
         results.append(result)
 
-        if insp_log_url:
-            print(f"本次 Trace: {insp_log_url}")
-
     # 输出最终汇总
     print(f"\n{'=' * 80}")
     print(f"批量处理完成")