2 هفته پیش · 0d29060c3f
--- a/run_inspiration_analysis.py
+++ b/run_inspiration_analysis.py
@@ -19,9 +19,10 @@ from lib.my_trace import set_trace_smith as set_trace
 
				 from lib.data_loader import load_inspiration_list, select_inspiration
			
 
				 from lib.utils import read_json
			
 
				 
			
 
				-# 导入 step1 和 step2 的 main 函数
			
 
				+# 导入 step1, step2 和 step4 的 main 函数
			
 
				 import step1_inspiration_match
			
 
				 import step2_incremental_match
			
 
				+import step4_search_result_match
			
 
				 
			
 
				 # 导入搜索功能
			
 
				 from script.search import search_xiaohongshu
			
@@ -156,9 +157,10 @@ async def run_full_analysis(
 
				     current_time: str = None,
			
 
				     log_url: str = None,
			
 
				     enable_step2: bool = False,
			
 
				-    search_only: bool = False
			
 
				+    search_only: bool = False,
			
 
				+    search_and_match: bool = False
			
 
				 ) -> dict:
			
 
				-    """执行完整的灵感分析流程（Step1 + 搜索 + Step2）
			
 
				+    """执行完整的灵感分析流程（Step1 + 搜索 + Step4匹配 + Step2）
			
 
				 
			
 
				     Args:
			
 
				         persona_dir: 人设目录路径
			
@@ -169,22 +171,26 @@ async def run_full_analysis(
 
				         log_url: 日志链接
			
 
				         enable_step2: 是否执行 Step2（默认 False）
			
 
				         search_only: 是否只执行搜索（跳过 Step1 和 Step2，默认 False）
			
 
				+        search_and_match: 是否搜索并匹配模式（跳过 Step1 和 Step2，执行搜索和 Step4，默认 False）
			
 
				 
			
 
				     Returns:
			
 
				         包含文件路径和状态的字典
			
 
				     """
			
 
				     print(f"\n{'=' * 80}")
			
 
				-    print(f"开始{'仅搜索' if search_only else '完整分析'}流程: {inspiration}")
			
 
				+    mode_desc = "仅搜索" if search_only else ("搜索并匹配" if search_and_match else "完整分析")
			
 
				+    print(f"开始{mode_desc}流程: {inspiration}")
			
 
				     print(f"{'=' * 80}\n")
			
 
				 
			
 
				+    # 保存原始 sys.argv
			
 
				+    original_argv = sys.argv.copy()
			
 
				+
			
 
				     # ========== Step1: 灵感与人设匹配 ==========
			
 
				-    if not search_only:
			
 
				+    if not search_only and not search_and_match:
			
 
				         print(f"{'─' * 80}")
			
 
				         print(f"Step1: 灵感与人设匹配")
			
 
				         print(f"{'─' * 80}\n")
			
 
				 
			
 
				         # 临时修改 sys.argv 来传递参数给 step1
			
 
				-        original_argv = sys.argv.copy()
			
 
				         sys.argv = [
			
 
				             "step1_inspiration_match.py",
			
 
				             persona_dir,
			
@@ -204,7 +210,8 @@ async def run_full_analysis(
 
				         print(f"✓ Step1 完成，结果文件: {step1_file}\n")
			
 
				     else:
			
 
				         print(f"{'─' * 80}")
			
 
				-        print(f"Step1: 跳过（仅搜索模式）")
			
 
				+        mode_label = "搜索并匹配模式" if search_and_match else "仅搜索模式"
			
 
				+        print(f"Step1: 跳过（{mode_label}）")
			
 
				         print(f"{'─' * 80}\n")
			
 
				 
			
 
				         # 查找已有的 step1 输出文件
			
@@ -216,6 +223,7 @@ async def run_full_analysis(
 
				             return {
			
 
				                 "step1_file": None,
			
 
				                 "search_file": None,
			
 
				+                "step4_file": None,
			
 
				                 "step2_file": None,
			
 
				                 "summary_file": None,
			
 
				                 "status": "step1_not_found"
			
@@ -272,12 +280,79 @@ async def run_full_analysis(
 
				         search_file = None
			
 
				         search_notes_count = 0
			
 
				 
			
 
				+    # ========== Step4: 搜索结果匹配 ==========
			
 
				+    step4_file = None
			
 
				+    step4_high_score_count = None
			
 
				+    step4_top1_score = None
			
 
				+
			
 
				+    if search_and_match and 'search_file' in locals() and search_file:
			
 
				+        from pathlib import Path
			
 
				+
			
 
				+        # 检查 step4 输出文件是否已存在
			
 
				+        step4_dir = os.path.join(persona_dir, "how", "灵感点", inspiration, "search")
			
 
				+        scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
			
 
				+        step4_pattern = f"{scope_prefix}_step4_*.json"
			
 
				+        step4_files = list(Path(step4_dir).glob(step4_pattern)) if os.path.exists(step4_dir) else []
			
 
				+
			
 
				+        step4_exists = len(step4_files) > 0
			
 
				+
			
 
				+        if step4_exists and not force:
			
 
				+            print(f"\n{'─' * 80}")
			
 
				+            print(f"Step4: 已跳过（结果文件已存在）")
			
 
				+            print(f"{'─' * 80}\n")
			
 
				+
			
 
				+            step4_file = str(step4_files[0])
			
 
				+            print(f"✓ 找到已有 Step4 结果: {step4_file}\n")
			
 
				+
			
 
				+            # 读取已有结果
			
 
				+            step4_data = read_json(step4_file)
			
 
				+            step4_results = step4_data.get("匹配结果列表", [])
			
 
				+            step4_high_score_count = sum(1 for r in step4_results if r.get("匹配结果", {}).get("score", 0) >= 0.7)
			
 
				+            step4_top1_score = step4_results[0].get("匹配结果", {}).get("score", 0) if step4_results else 0
			
 
				+        else:
			
 
				+            print(f"\n{'─' * 80}")
			
 
				+            print(f"Step4: 搜索结果与灵感匹配")
			
 
				+            print(f"{'─' * 80}\n")
			
 
				+
			
 
				+            # 临时修改 sys.argv 来传递参数给 step4
			
 
				+            sys.argv = [
			
 
				+                "step4_search_result_match.py",
			
 
				+                persona_dir,
			
 
				+                inspiration
			
 
				+            ]
			
 
				+            if max_tasks is not None:
			
 
				+                sys.argv.append(str(max_tasks))
			
 
				+
			
 
				+            try:
			
 
				+                # 调用 step4 的 main 函数
			
 
				+                await step4_search_result_match.main(current_time, log_url, force=force)
			
 
				+            finally:
			
 
				+                # 恢复原始参数
			
 
				+                sys.argv = original_argv
			
 
				+
			
 
				+            # 查找 step4 输出文件
			
 
				+            step4_files = list(Path(step4_dir).glob(step4_pattern))
			
 
				+
			
 
				+            if step4_files:
			
 
				+                step4_file = str(step4_files[0])
			
 
				+                print(f"✓ Step4 完成，结果文件: {step4_file}\n")
			
 
				+
			
 
				+                # 读取 step4 结果
			
 
				+                step4_data = read_json(step4_file)
			
 
				+                step4_results = step4_data.get("匹配结果列表", [])
			
 
				+                step4_high_score_count = sum(1 for r in step4_results if r.get("匹配结果", {}).get("score", 0) >= 0.7)
			
 
				+                step4_top1_score = step4_results[0].get("匹配结果", {}).get("score", 0) if step4_results else 0
			
 
				+    elif search_and_match:
			
 
				+        print(f"\n{'─' * 80}")
			
 
				+        print(f"Step4: 已跳过（搜索失败）")
			
 
				+        print(f"{'─' * 80}\n")
			
 
				+
			
 
				     # ========== Step2: 增量词匹配 ==========
			
 
				     step2_file = None
			
 
				     step2_score = None
			
 
				     step2_word_count = None
			
 
				 
			
 
				-    if enable_step2 and not search_only:
			
 
				+    if enable_step2 and not search_only and not search_and_match:
			
 
				         print(f"\n{'─' * 80}")
			
 
				         print(f"Step2: 增量词在人设中的匹配")
			
 
				         print(f"{'─' * 80}\n")
			
@@ -305,14 +380,14 @@ async def run_full_analysis(
 
				         step2_score = step2_data.get("匹配结果", {}).get("score", 0)
			
 
				         step2_b_content = step2_data.get("输入信息", {}).get("B", "")
			
 
				         step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
			
 
				-    elif not search_only:
			
 
				+    elif not search_only and not search_and_match:
			
 
				         print(f"\n{'─' * 80}")
			
 
				         print(f"Step2: 已跳过（使用 --enable-step2 启用）")
			
 
				         print(f"{'─' * 80}\n")
			
 
				 
			
 
				     # ========== 保存流程汇总 ==========
			
 
				-    # search_only 模式不保存汇总文件
			
 
				-    if not search_only:
			
 
				+    # search_only 和 search_and_match 模式不保存汇总文件
			
 
				+    if not search_only and not search_and_match:
			
 
				         output_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
			
 
				         scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
			
 
				 
			
@@ -358,13 +433,18 @@ async def run_full_analysis(
 
				         summary_file = None
			
 
				 
			
 
				     print(f"{'=' * 80}")
			
 
				-    print(f"{'仅搜索' if search_only else '完整流程'}执行完成")
			
 
				+    mode_desc = "仅搜索" if search_only else ("搜索并匹配" if search_and_match else "完整流程")
			
 
				+    print(f"{mode_desc}执行完成")
			
 
				     print(f"{'=' * 80}")
			
 
				     print(f"\n结果文件:")
			
 
				-    if not search_only:
			
 
				+    if not search_only and not search_and_match:
			
 
				         print(f"  Step1:  {step1_file}")
			
 
				     if 'search_file' in locals() and search_file:
			
 
				         print(f"  搜索:   {search_file}")
			
 
				+    if step4_file:
			
 
				+        print(f"  Step4:  {step4_file}")
			
 
				+        if step4_high_score_count is not None:
			
 
				+            print(f"          (高匹配: {step4_high_score_count} 个, Top1 score: {step4_top1_score:.2f})")
			
 
				     if enable_step2 and step2_file:
			
 
				         print(f"  Step2:  {step2_file}")
			
 
				     if summary_file:
			
@@ -372,8 +452,9 @@ async def run_full_analysis(
 
				     print()
			
 
				 
			
 
				     return {
			
 
				-        "step1_file": step1_file if not search_only else None,
			
 
				+        "step1_file": step1_file if not search_only and not search_and_match else None,
			
 
				         "search_file": search_file if 'search_file' in locals() else None,
			
 
				+        "step4_file": step4_file,
			
 
				         "step2_file": step2_file,
			
 
				         "summary_file": summary_file,
			
 
				         "status": "success"
			
@@ -403,6 +484,9 @@ async def main():
 
				   # 仅搜索模式：基于已有 Step1 结果，按分数降序搜索前10个
			
 
				   python run_inspiration_analysis.py --search-only --count 10
			
 
				 
			
 
				+  # 搜索并匹配模式：基于已有 Step1 结果，执行搜索和 Step4 匹配
			
 
				+  python run_inspiration_analysis.py --search-and-match --count 10
			
 
				+
			
 
				   # 处理所有灵感，强制重新执行
			
 
				   python run_inspiration_analysis.py --count all --force
			
 
				 
			
@@ -460,6 +544,12 @@ async def main():
 
				         help="仅执行搜索（跳过 Step1 和 Step2，基于已有 Step1 结果，自动按分数降序）"
			
 
				     )
			
 
				 
			
 
				+    parser.add_argument(
			
 
				+        "--search-and-match",
			
 
				+        action="store_true",
			
 
				+        help="搜索并匹配模式（跳过 Step1 和 Step2，执行搜索和 Step4 匹配，自动按分数降序）"
			
 
				+    )
			
 
				+
			
 
				     args = parser.parse_args()
			
 
				 
			
 
				     persona_dir = args.dir
			
@@ -468,9 +558,15 @@ async def main():
 
				     sort_by_score = args.sort_by_score
			
 
				     enable_step2 = args.enable_step2
			
 
				     search_only = args.search_only
			
 
				+    search_and_match = args.search_and_match
			
 
				 
			
 
				-    # search_only 模式自动启用分数排序
			
 
				-    if search_only:
			
 
				+    # 互斥检查
			
 
				+    if search_only and search_and_match:
			
 
				+        print("❌ 错误: --search-only 和 --search-and-match 不能同时使用")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+    # search_only 和 search_and_match 模式自动启用分数排序
			
 
				+    if search_only or search_and_match:
			
 
				         sort_by_score = True
			
 
				         enable_step2 = False  # 搜索模式下强制禁用 step2
			
 
				         if shuffle:
			
@@ -550,8 +646,8 @@ async def main():
 
				         print(f"处理第 {i}/{len(inspirations_to_process)} 个灵感: {inspiration}")
			
 
				         print(f"{'#' * 80}")
			
 
				 
			
 
				-        # search_only 模式不创建 trace
			
 
				-        if search_only:
			
 
				+        # search_only 和 search_and_match 模式不创建 trace
			
 
				+        if search_only or search_and_match:
			
 
				             result = await run_full_analysis(
			
 
				                 persona_dir=persona_dir,
			
 
				                 inspiration=inspiration,
			
@@ -560,7 +656,8 @@ async def main():
 
				                 current_time=None,
			
 
				                 log_url=None,
			
 
				                 enable_step2=enable_step2,
			
 
				-                search_only=search_only
			
 
				+                search_only=search_only,
			
 
				+                search_and_match=search_and_match
			
 
				             )
			
 
				         else:
			
 
				             # 为每个灵感创建独立的 trace
			
@@ -575,7 +672,8 @@ async def main():
 
				                     current_time=insp_time,
			
 
				                     log_url=insp_log_url,
			
 
				                     enable_step2=enable_step2,
			
 
				-                    search_only=search_only
			
 
				+                    search_only=search_only,
			
 
				+                    search_and_match=search_and_match
			
 
				                 )
			
 
				 
			
 
				             if insp_log_url: