2 weeks ago · 0d29060c3f
--- a/run_inspiration_analysis.py
+++ b/run_inspiration_analysis.py
@@ -19,9 +19,10 @@ from lib.my_trace import set_trace_smith as set_trace
 
															 from lib.data_loader import load_inspiration_list, select_inspiration
														
 
															 from lib.utils import read_json
														
 
															-# 导入 step1 和 step2 的 main 函数
														
 
															+# 导入 step1, step2 和 step4 的 main 函数
														
 
															 import step1_inspiration_match
														
 
															 import step2_incremental_match
														
 
															+import step4_search_result_match
														
 
															 # 导入搜索功能
														
 
															 from script.search import search_xiaohongshu
														
@@ -156,9 +157,10 @@ async def run_full_analysis(
 
															     current_time: str = None,
														
 
															     log_url: str = None,
														
 
															     enable_step2: bool = False,
														
 
															-    search_only: bool = False
														
 
															+    search_only: bool = False,
														
 
															+    search_and_match: bool = False
														
 
															 ) -> dict:
														
 
															-    """执行完整的灵感分析流程（Step1 + 搜索 + Step2）
														
 
															+    """执行完整的灵感分析流程（Step1 + 搜索 + Step4匹配 + Step2）
														
 
															     Args:
														
 
															         persona_dir: 人设目录路径
														
@@ -169,22 +171,26 @@ async def run_full_analysis(
 
															         log_url: 日志链接
														
 
															         enable_step2: 是否执行 Step2（默认 False）
														
 
															         search_only: 是否只执行搜索（跳过 Step1 和 Step2，默认 False）
														
 
															+        search_and_match: 是否搜索并匹配模式（跳过 Step1 和 Step2，执行搜索和 Step4，默认 False）
														
 
															     Returns:
														
 
															         包含文件路径和状态的字典
														
 
															     """
														
 
															     print(f"\n{'=' * 80}")
														
 
															-    print(f"开始{'仅搜索' if search_only else '完整分析'}流程: {inspiration}")
														
 
															+    mode_desc = "仅搜索" if search_only else ("搜索并匹配" if search_and_match else "完整分析")
														
 
															+    print(f"开始{mode_desc}流程: {inspiration}")
														
 
															     print(f"{'=' * 80}\n")
														
 
															+    # 保存原始 sys.argv
														
 
															+    original_argv = sys.argv.copy()
														
 
															+
														
 
															     # ========== Step1: 灵感与人设匹配 ==========
														
 
															-    if not search_only:
														
 
															+    if not search_only and not search_and_match:
														
 
															         print(f"{'─' * 80}")
														
 
															         print(f"Step1: 灵感与人设匹配")
														
 
															         print(f"{'─' * 80}\n")
														
 
															         # 临时修改 sys.argv 来传递参数给 step1
														
 
															-        original_argv = sys.argv.copy()
														
 
															         sys.argv = [
														
 
															             "step1_inspiration_match.py",
														
 
															             persona_dir,
														
@@ -204,7 +210,8 @@ async def run_full_analysis(
 
															         print(f"✓ Step1 完成，结果文件: {step1_file}\n")
														
 
															     else:
														
 
															         print(f"{'─' * 80}")
														
 
															-        print(f"Step1: 跳过（仅搜索模式）")
														
 
															+        mode_label = "搜索并匹配模式" if search_and_match else "仅搜索模式"
														
 
															+        print(f"Step1: 跳过（{mode_label}）")
														
 
															         print(f"{'─' * 80}\n")
														
 
															         # 查找已有的 step1 输出文件
														
@@ -216,6 +223,7 @@ async def run_full_analysis(
 
															             return {
														
 
															                 "step1_file": None,
														
 
															                 "search_file": None,
														
 
															+                "step4_file": None,
														
 
															                 "step2_file": None,
														
 
															                 "summary_file": None,
														
 
															                 "status": "step1_not_found"
														
@@ -272,12 +280,79 @@ async def run_full_analysis(
 
															         search_file = None
														
 
															         search_notes_count = 0
														
 
															+    # ========== Step4: 搜索结果匹配 ==========
														
 
															+    step4_file = None
														
 
															+    step4_high_score_count = None
														
 
															+    step4_top1_score = None
														
 
															+
														
 
															+    if search_and_match and 'search_file' in locals() and search_file:
														
 
															+        from pathlib import Path
														
 
															+
														
 
															+        # 检查 step4 输出文件是否已存在
														
 
															+        step4_dir = os.path.join(persona_dir, "how", "灵感点", inspiration, "search")
														
 
															+        scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
														
 
															+        step4_pattern = f"{scope_prefix}_step4_*.json"
														
 
															+        step4_files = list(Path(step4_dir).glob(step4_pattern)) if os.path.exists(step4_dir) else []
														
 
															+
														
 
															+        step4_exists = len(step4_files) > 0
														
 
															+
														
 
															+        if step4_exists and not force:
														
 
															+            print(f"\n{'─' * 80}")
														
 
															+            print(f"Step4: 已跳过（结果文件已存在）")
														
 
															+            print(f"{'─' * 80}\n")
														
 
															+
														
 
															+            step4_file = str(step4_files[0])
														
 
															+            print(f"✓ 找到已有 Step4 结果: {step4_file}\n")
														
 
															+
														
 
															+            # 读取已有结果
														
 
															+            step4_data = read_json(step4_file)
														
 
															+            step4_results = step4_data.get("匹配结果列表", [])
														
 
															+            step4_high_score_count = sum(1 for r in step4_results if r.get("匹配结果", {}).get("score", 0) >= 0.7)
														
 
															+            step4_top1_score = step4_results[0].get("匹配结果", {}).get("score", 0) if step4_results else 0
														
 
															+        else:
														
 
															+            print(f"\n{'─' * 80}")
														
 
															+            print(f"Step4: 搜索结果与灵感匹配")
														
 
															+            print(f"{'─' * 80}\n")
														
 
															+
														
 
															+            # 临时修改 sys.argv 来传递参数给 step4
														
 
															+            sys.argv = [
														
 
															+                "step4_search_result_match.py",
														
 
															+                persona_dir,
														
 
															+                inspiration
														
 
															+            ]
														
 
															+            if max_tasks is not None:
														
 
															+                sys.argv.append(str(max_tasks))
														
 
															+
														
 
															+            try:
														
 
															+                # 调用 step4 的 main 函数
														
 
															+                await step4_search_result_match.main(current_time, log_url, force=force)
														
 
															+            finally:
														
 
															+                # 恢复原始参数
														
 
															+                sys.argv = original_argv
														
 
															+
														
 
															+            # 查找 step4 输出文件
														
 
															+            step4_files = list(Path(step4_dir).glob(step4_pattern))
														
 
															+
														
 
															+            if step4_files:
														
 
															+                step4_file = str(step4_files[0])
														
 
															+                print(f"✓ Step4 完成，结果文件: {step4_file}\n")
														
 
															+
														
 
															+                # 读取 step4 结果
														
 
															+                step4_data = read_json(step4_file)
														
 
															+                step4_results = step4_data.get("匹配结果列表", [])
														
 
															+                step4_high_score_count = sum(1 for r in step4_results if r.get("匹配结果", {}).get("score", 0) >= 0.7)
														
 
															+                step4_top1_score = step4_results[0].get("匹配结果", {}).get("score", 0) if step4_results else 0
														
 
															+    elif search_and_match:
														
 
															+        print(f"\n{'─' * 80}")
														
 
															+        print(f"Step4: 已跳过（搜索失败）")
														
 
															+        print(f"{'─' * 80}\n")
														
 
															+
														
 
															     # ========== Step2: 增量词匹配 ==========
														
 
															     step2_file = None
														
 
															     step2_score = None
														
 
															     step2_word_count = None
														
 
															-    if enable_step2 and not search_only:
														
 
															+    if enable_step2 and not search_only and not search_and_match:
														
 
															         print(f"\n{'─' * 80}")
														
 
															         print(f"Step2: 增量词在人设中的匹配")
														
 
															         print(f"{'─' * 80}\n")
														
@@ -305,14 +380,14 @@ async def run_full_analysis(
 
															         step2_score = step2_data.get("匹配结果", {}).get("score", 0)
														
 
															         step2_b_content = step2_data.get("输入信息", {}).get("B", "")
														
 
															         step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
														
 
															-    elif not search_only:
														
 
															+    elif not search_only and not search_and_match:
														
 
															         print(f"\n{'─' * 80}")
														
 
															         print(f"Step2: 已跳过（使用 --enable-step2 启用）")
														
 
															         print(f"{'─' * 80}\n")
														
 
															     # ========== 保存流程汇总 ==========
														
 
															-    # search_only 模式不保存汇总文件
														
 
															-    if not search_only:
														
 
															+    # search_only 和 search_and_match 模式不保存汇总文件
														
 
															+    if not search_only and not search_and_match:
														
 
															         output_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
														
 
															         scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
														
@@ -358,13 +433,18 @@ async def run_full_analysis(
 
															         summary_file = None
														
 
															     print(f"{'=' * 80}")
														
 
															-    print(f"{'仅搜索' if search_only else '完整流程'}执行完成")
														
 
															+    mode_desc = "仅搜索" if search_only else ("搜索并匹配" if search_and_match else "完整流程")
														
 
															+    print(f"{mode_desc}执行完成")
														
 
															     print(f"{'=' * 80}")
														
 
															     print(f"\n结果文件:")
														
 
															-    if not search_only:
														
 
															+    if not search_only and not search_and_match:
														
 
															         print(f"  Step1:  {step1_file}")
														
 
															     if 'search_file' in locals() and search_file:
														
 
															         print(f"  搜索:   {search_file}")
														
 
															+    if step4_file:
														
 
															+        print(f"  Step4:  {step4_file}")
														
 
															+        if step4_high_score_count is not None:
														
 
															+            print(f"          (高匹配: {step4_high_score_count} 个, Top1 score: {step4_top1_score:.2f})")
														
 
															     if enable_step2 and step2_file:
														
 
															         print(f"  Step2:  {step2_file}")
														
 
															     if summary_file:
														
@@ -372,8 +452,9 @@ async def run_full_analysis(
 
															     print()
														
 
															     return {
														
 
															-        "step1_file": step1_file if not search_only else None,
														
 
															+        "step1_file": step1_file if not search_only and not search_and_match else None,
														
 
															         "search_file": search_file if 'search_file' in locals() else None,
														
 
															+        "step4_file": step4_file,
														
 
															         "step2_file": step2_file,
														
 
															         "summary_file": summary_file,
														
 
															         "status": "success"
														
@@ -403,6 +484,9 @@ async def main():
 
															   # 仅搜索模式：基于已有 Step1 结果，按分数降序搜索前10个
														
 
															   python run_inspiration_analysis.py --search-only --count 10
														
 
															+  # 搜索并匹配模式：基于已有 Step1 结果，执行搜索和 Step4 匹配
														
 
															+  python run_inspiration_analysis.py --search-and-match --count 10
														
 
															+
														
 
															   # 处理所有灵感，强制重新执行
														
 
															   python run_inspiration_analysis.py --count all --force
														
@@ -460,6 +544,12 @@ async def main():
 
															         help="仅执行搜索（跳过 Step1 和 Step2，基于已有 Step1 结果，自动按分数降序）"
														
 
															     )
														
 
															+    parser.add_argument(
														
 
															+        "--search-and-match",
														
 
															+        action="store_true",
														
 
															+        help="搜索并匹配模式（跳过 Step1 和 Step2，执行搜索和 Step4 匹配，自动按分数降序）"
														
 
															+    )
														
 
															+
														
 
															     args = parser.parse_args()
														
 
															     persona_dir = args.dir
														
@@ -468,9 +558,15 @@ async def main():
 
															     sort_by_score = args.sort_by_score
														
 
															     enable_step2 = args.enable_step2
														
 
															     search_only = args.search_only
														
 
															+    search_and_match = args.search_and_match
														
 
															-    # search_only 模式自动启用分数排序
														
 
															-    if search_only:
														
 
															+    # 互斥检查
														
 
															+    if search_only and search_and_match:
														
 
															+        print("❌ 错误: --search-only 和 --search-and-match 不能同时使用")
														
 
															+        sys.exit(1)
														
 
															+
														
 
															+    # search_only 和 search_and_match 模式自动启用分数排序
														
 
															+    if search_only or search_and_match:
														
 
															         sort_by_score = True
														
 
															         enable_step2 = False  # 搜索模式下强制禁用 step2
														
 
															         if shuffle:
														
@@ -550,8 +646,8 @@ async def main():
 
															         print(f"处理第 {i}/{len(inspirations_to_process)} 个灵感: {inspiration}")
														
 
															         print(f"{'#' * 80}")
														
 
															-        # search_only 模式不创建 trace
														
 
															-        if search_only:
														
 
															+        # search_only 和 search_and_match 模式不创建 trace
														
 
															+        if search_only or search_and_match:
														
 
															             result = await run_full_analysis(
														
 
															                 persona_dir=persona_dir,
														
 
															                 inspiration=inspiration,
														
@@ -560,7 +656,8 @@ async def main():
 
															                 current_time=None,
														
 
															                 log_url=None,
														
 
															                 enable_step2=enable_step2,
														
 
															-                search_only=search_only
														
 
															+                search_only=search_only,
														
 
															+                search_and_match=search_and_match
														
 
															             )
														
 
															         else:
														
 
															             # 为每个灵感创建独立的 trace
														
@@ -575,7 +672,8 @@ async def main():
 
															                     current_time=insp_time,
														
 
															                     log_url=insp_log_url,
														
 
															                     enable_step2=enable_step2,
														
 
															-                    search_only=search_only
														
 
															+                    search_only=search_only,
														
 
															+                    search_and_match=search_and_match
														
 
															                 )
														
 
															             if insp_log_url: