run_inspiration_analysis.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758
  1. """
  2. 主流程脚本:串联 Step1、搜索和 Step2
  3. 执行完整的灵感分析流程:
  4. 1. Step1: 灵感与人设匹配(调用 step1 main,自动保存结果)
  5. 2. Step1.5: 基于 Top1 匹配要素进行小红书搜索(使用 search_xiaohongshu)
  6. 3. Step2: 增量词在人设中的匹配(调用 step2 main,自动保存结果)
  7. 4. 生成流程汇总文件
  8. """
  9. import os
  10. import sys
  11. import json
  12. import asyncio
  13. import random
  14. import argparse
  15. from agents import trace
  16. from lib.my_trace import set_trace_smith as set_trace
  17. from lib.data_loader import load_inspiration_list, select_inspiration
  18. from lib.utils import read_json
  19. # 导入 step1, step2 和 step4 的 main 函数
  20. import step1_inspiration_match
  21. import step2_incremental_match
  22. import step4_search_result_match
  23. # 导入搜索功能
  24. from script.search import search_xiaohongshu
  25. def find_step1_output(persona_dir: str, inspiration: str, max_tasks: int = None) -> str:
  26. """查找 step1 输出文件
  27. Args:
  28. persona_dir: 人设目录
  29. inspiration: 灵感点名称
  30. max_tasks: 任务数限制
  31. Returns:
  32. step1 文件路径
  33. """
  34. from pathlib import Path
  35. step1_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
  36. scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
  37. step1_pattern = f"{scope_prefix}_step1_*.json"
  38. step1_files = list(Path(step1_dir).glob(step1_pattern))
  39. if not step1_files:
  40. raise FileNotFoundError(f"找不到 step1 输出文件: {step1_dir}/{step1_pattern}")
  41. return str(step1_files[0])
  42. def find_step2_output(persona_dir: str, inspiration: str, max_tasks: int = None) -> str:
  43. """查找 step2 输出文件
  44. Args:
  45. persona_dir: 人设目录
  46. inspiration: 灵感点名称
  47. max_tasks: 任务数限制
  48. Returns:
  49. step2 文件路径
  50. """
  51. from pathlib import Path
  52. step2_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
  53. scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
  54. step2_pattern = f"{scope_prefix}_step2_*.json"
  55. step2_files = list(Path(step2_dir).glob(step2_pattern))
  56. if not step2_files:
  57. raise FileNotFoundError(f"找不到 step2 输出文件: {step2_dir}/{step2_pattern}")
  58. return str(step2_files[0])
  59. def get_inspiration_score(persona_dir: str, inspiration: str, max_tasks: int = None) -> float:
  60. """获取灵感的 Step1 Top1 分数
  61. Args:
  62. persona_dir: 人设目录
  63. inspiration: 灵感点名称
  64. max_tasks: 任务数限制
  65. Returns:
  66. Step1 Top1 的 score,如果文件不存在返回 -1
  67. """
  68. try:
  69. step1_file = find_step1_output(persona_dir, inspiration, max_tasks)
  70. step1_data = read_json(step1_file)
  71. results = step1_data.get("匹配结果列表", [])
  72. if results:
  73. return results[0].get('匹配结果', {}).get('score', 0)
  74. return 0
  75. except (FileNotFoundError, Exception):
  76. return -1
  77. def sort_inspirations_by_score(
  78. persona_dir: str,
  79. inspiration_list: list,
  80. max_tasks: int = None
  81. ) -> list:
  82. """根据 Step1 结果分数对灵感列表排序
  83. Args:
  84. persona_dir: 人设目录
  85. inspiration_list: 灵感列表
  86. max_tasks: 任务数限制
  87. Returns:
  88. 排序后的灵感列表(按分数降序)
  89. """
  90. print(f"\n{'─' * 80}")
  91. print(f"正在读取现有 Step1 结果文件...")
  92. print(f"{'─' * 80}")
  93. inspiration_scores = []
  94. for inspiration in inspiration_list:
  95. score = get_inspiration_score(persona_dir, inspiration, max_tasks)
  96. inspiration_scores.append({
  97. "inspiration": inspiration,
  98. "score": score,
  99. "has_result": score >= 0
  100. })
  101. # 统计
  102. has_result_count = sum(1 for item in inspiration_scores if item["has_result"])
  103. print(f"找到 {has_result_count}/{len(inspiration_list)} 个灵感的 Step1 结果")
  104. # 排序:有结果的按分数降序,无结果的放最后(保持原顺序)
  105. sorted_items = sorted(
  106. inspiration_scores,
  107. key=lambda x: (x["has_result"], x["score"]),
  108. reverse=True
  109. )
  110. # 显示排序结果(前10个)
  111. print(f"\n排序后的灵感列表(前10个):")
  112. for i, item in enumerate(sorted_items[:10], 1):
  113. status = f"score={item['score']:.2f}" if item['has_result'] else "无结果"
  114. print(f" {i}. [{status}] {item['inspiration']}")
  115. if len(sorted_items) > 10:
  116. print(f" ... 还有 {len(sorted_items) - 10} 个")
  117. return [item["inspiration"] for item in sorted_items]
  118. async def run_full_analysis(
  119. persona_dir: str,
  120. inspiration: str,
  121. max_tasks: int = None,
  122. force: bool = False,
  123. current_time: str = None,
  124. log_url: str = None,
  125. enable_step2: bool = False,
  126. search_only: bool = False,
  127. search_and_match: bool = False
  128. ) -> dict:
  129. """执行完整的灵感分析流程(Step1 + 搜索 + Step4匹配 + Step2)
  130. Args:
  131. persona_dir: 人设目录路径
  132. inspiration: 灵感点文本
  133. max_tasks: step1 最大任务数(None 表示不限制)
  134. force: 是否强制重新执行(跳过文件存在检查)
  135. current_time: 当前时间戳
  136. log_url: 日志链接
  137. enable_step2: 是否执行 Step2(默认 False)
  138. search_only: 是否只执行搜索(跳过 Step1 和 Step2,默认 False)
  139. search_and_match: 是否搜索并匹配模式(跳过 Step1 和 Step2,执行搜索和 Step4,默认 False)
  140. Returns:
  141. 包含文件路径和状态的字典
  142. """
  143. print(f"\n{'=' * 80}")
  144. mode_desc = "仅搜索" if search_only else ("搜索并匹配" if search_and_match else "完整分析")
  145. print(f"开始{mode_desc}流程: {inspiration}")
  146. print(f"{'=' * 80}\n")
  147. # 保存原始 sys.argv
  148. original_argv = sys.argv.copy()
  149. # ========== Step1: 灵感与人设匹配 ==========
  150. if not search_only and not search_and_match:
  151. print(f"{'─' * 80}")
  152. print(f"Step1: 灵感与人设匹配")
  153. print(f"{'─' * 80}\n")
  154. # 临时修改 sys.argv 来传递参数给 step1
  155. sys.argv = [
  156. "step1_inspiration_match.py",
  157. persona_dir,
  158. inspiration,
  159. str(max_tasks) if max_tasks is not None else "all"
  160. ]
  161. try:
  162. # 调用 step1 的 main 函数(通过参数传递 force)
  163. await step1_inspiration_match.main(current_time, log_url, force=force)
  164. finally:
  165. # 恢复原始参数
  166. sys.argv = original_argv
  167. # 查找 step1 输出文件
  168. step1_file = find_step1_output(persona_dir, inspiration, max_tasks)
  169. print(f"✓ Step1 完成,结果文件: {step1_file}\n")
  170. else:
  171. print(f"{'─' * 80}")
  172. mode_label = "搜索并匹配模式" if search_and_match else "仅搜索模式"
  173. print(f"Step1: 跳过({mode_label})")
  174. print(f"{'─' * 80}\n")
  175. # 查找已有的 step1 输出文件
  176. try:
  177. step1_file = find_step1_output(persona_dir, inspiration, max_tasks)
  178. print(f"✓ 找到已有 Step1 结果: {step1_file}\n")
  179. except FileNotFoundError as e:
  180. print(f"⚠️ {e}")
  181. return {
  182. "step1_file": None,
  183. "search_file": None,
  184. "step4_file": None,
  185. "step2_file": None,
  186. "summary_file": None,
  187. "status": "step1_not_found"
  188. }
  189. # 读取 step1 结果
  190. step1_data = read_json(step1_file)
  191. step1_results = step1_data.get("匹配结果列表", [])
  192. if not step1_results:
  193. print("⚠️ Step1 结果为空,终止流程")
  194. return {
  195. "step1_file": step1_file,
  196. "step2_file": None,
  197. "summary_file": None,
  198. "status": "step1_empty"
  199. }
  200. step1_top1 = step1_results[0]
  201. step1_score = step1_top1.get('匹配结果', {}).get('score', 0)
  202. step1_element = step1_top1.get("业务信息", {}).get("匹配要素", "")
  203. print(f"Top1 匹配要素: {step1_element}, score: {step1_score:.2f}")
  204. # ========== Step1.5: 小红书搜索 ==========
  205. print(f"\n{'─' * 80}")
  206. print(f"Step1.5: 基于 Top1 匹配要素进行小红书搜索")
  207. print(f"{'─' * 80}\n")
  208. search_keyword = step1_element
  209. print(f"搜索关键词: {search_keyword}")
  210. # 执行搜索
  211. try:
  212. search_result = search_xiaohongshu(search_keyword)
  213. search_notes_count = len(search_result.get('notes', []))
  214. print(f"✓ 搜索完成,找到 {search_notes_count} 条笔记")
  215. # 保存搜索结果
  216. search_dir = os.path.join(persona_dir, "how", "灵感点", inspiration, "search")
  217. os.makedirs(search_dir, exist_ok=True)
  218. scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
  219. # 清理文件名中的非法字符
  220. safe_keyword = search_keyword[:20].replace('/', '_').replace('\\', '_').replace(':', '_')
  221. search_filename = f"{scope_prefix}_search_{safe_keyword}.json"
  222. search_file = os.path.join(search_dir, search_filename)
  223. with open(search_file, 'w', encoding='utf-8') as f:
  224. json.dump(search_result, f, ensure_ascii=False, indent=2)
  225. print(f"✓ 搜索结果已保存: {search_file}\n")
  226. except Exception as e:
  227. print(f"⚠️ 搜索失败: {e}")
  228. search_file = None
  229. search_notes_count = 0
  230. # ========== Step4: 搜索结果匹配 ==========
  231. step4_file = None
  232. step4_high_score_count = None
  233. step4_top1_score = None
  234. if search_and_match and 'search_file' in locals() and search_file:
  235. from pathlib import Path
  236. # 检查 step4 输出文件是否已存在
  237. step4_dir = os.path.join(persona_dir, "how", "灵感点", inspiration, "search")
  238. scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
  239. step4_pattern = f"{scope_prefix}_step4_*.json"
  240. step4_files = list(Path(step4_dir).glob(step4_pattern)) if os.path.exists(step4_dir) else []
  241. step4_exists = len(step4_files) > 0
  242. if step4_exists and not force:
  243. print(f"\n{'─' * 80}")
  244. print(f"Step4: 已跳过(结果文件已存在)")
  245. print(f"{'─' * 80}\n")
  246. step4_file = str(step4_files[0])
  247. print(f"✓ 找到已有 Step4 结果: {step4_file}\n")
  248. # 读取已有结果
  249. step4_data = read_json(step4_file)
  250. step4_results = step4_data.get("匹配结果列表", [])
  251. step4_high_score_count = sum(1 for r in step4_results if r.get("匹配结果", {}).get("score", 0) >= 0.7)
  252. step4_top1_score = step4_results[0].get("匹配结果", {}).get("score", 0) if step4_results else 0
  253. else:
  254. print(f"\n{'─' * 80}")
  255. print(f"Step4: 搜索结果与灵感匹配")
  256. print(f"{'─' * 80}\n")
  257. # 如果 force=True,删除已有的 step4 文件
  258. if force and step4_exists:
  259. for f in step4_files:
  260. os.remove(f)
  261. print(f"✓ 已删除旧文件: {f}")
  262. # 临时修改 sys.argv 来传递参数给 step4
  263. sys.argv = [
  264. "step4_search_result_match.py",
  265. persona_dir,
  266. inspiration
  267. ]
  268. if max_tasks is not None:
  269. sys.argv.append(str(max_tasks))
  270. try:
  271. # 调用 step4 的 main 函数
  272. await step4_search_result_match.main(current_time, log_url, force=force)
  273. finally:
  274. # 恢复原始参数
  275. sys.argv = original_argv
  276. # 查找 step4 输出文件
  277. step4_files = list(Path(step4_dir).glob(step4_pattern))
  278. if step4_files:
  279. step4_file = str(step4_files[0])
  280. print(f"✓ Step4 完成,结果文件: {step4_file}\n")
  281. # 读取 step4 结果
  282. step4_data = read_json(step4_file)
  283. step4_results = step4_data.get("匹配结果列表", [])
  284. step4_high_score_count = sum(1 for r in step4_results if r.get("匹配结果", {}).get("score", 0) >= 0.7)
  285. step4_top1_score = step4_results[0].get("匹配结果", {}).get("score", 0) if step4_results else 0
  286. elif search_and_match:
  287. print(f"\n{'─' * 80}")
  288. print(f"Step4: 已跳过(搜索失败)")
  289. print(f"{'─' * 80}\n")
  290. # ========== Step2: 增量词匹配 ==========
  291. step2_file = None
  292. step2_score = None
  293. step2_word_count = None
  294. if enable_step2 and not search_only and not search_and_match:
  295. print(f"\n{'─' * 80}")
  296. print(f"Step2: 增量词在人设中的匹配")
  297. print(f"{'─' * 80}\n")
  298. # 临时修改 sys.argv 来传递参数给 step2
  299. sys.argv = [
  300. "step2_incremental_match.py",
  301. persona_dir,
  302. inspiration
  303. ]
  304. try:
  305. # 调用 step2 的 main 函数(通过参数传递 force)
  306. await step2_incremental_match.main(current_time, log_url, force=force)
  307. finally:
  308. # 恢复原始参数
  309. sys.argv = original_argv
  310. # 查找 step2 输出文件
  311. step2_file = find_step2_output(persona_dir, inspiration, max_tasks)
  312. print(f"✓ Step2 完成,结果文件: {step2_file}\n")
  313. # 读取 step2 结果
  314. step2_data = read_json(step2_file)
  315. step2_score = step2_data.get("匹配结果", {}).get("score", 0)
  316. step2_b_content = step2_data.get("输入信息", {}).get("B", "")
  317. step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
  318. elif not search_only and not search_and_match:
  319. print(f"\n{'─' * 80}")
  320. print(f"Step2: 已跳过(使用 --enable-step2 启用)")
  321. print(f"{'─' * 80}\n")
  322. # ========== 保存流程汇总 ==========
  323. # search_only 和 search_and_match 模式不保存汇总文件
  324. if not search_only and not search_and_match:
  325. output_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
  326. scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
  327. # 从 step1 文件名提取模型名称
  328. step1_filename = os.path.basename(step1_file)
  329. model_short = step1_filename.split("_")[-1].replace(".json", "")
  330. summary_filename = f"{scope_prefix}_summary_完整流程_{model_short}.json"
  331. summary_file = os.path.join(output_dir, summary_filename)
  332. # 构建流程描述
  333. workflow = "Step1 + 搜索"
  334. if enable_step2:
  335. workflow += " + Step2"
  336. summary = {
  337. "元数据": {
  338. "current_time": current_time,
  339. "log_url": log_url,
  340. "流程": workflow,
  341. "step1_model": step1_data.get("元数据", {}).get("model", ""),
  342. "step2_model": step2_data.get("元数据", {}).get("model", "") if enable_step2 and 'step2_data' in locals() else None
  343. },
  344. "灵感": inspiration,
  345. "文件路径": {
  346. "step1": step1_file,
  347. "search": search_file if 'search_file' in locals() else None,
  348. "step2": step2_file
  349. },
  350. "关键指标": {
  351. "step1_top1_score": step1_score,
  352. "step1_top1_匹配要素": step1_element,
  353. "search_keyword": search_keyword if 'search_keyword' in locals() else None,
  354. "search_notes_count": search_notes_count if 'search_notes_count' in locals() else 0,
  355. "step2_增量词数量": step2_word_count,
  356. "step2_score": step2_score
  357. }
  358. }
  359. with open(summary_file, 'w', encoding='utf-8') as f:
  360. json.dump(summary, f, ensure_ascii=False, indent=2)
  361. else:
  362. summary_file = None
  363. print(f"{'=' * 80}")
  364. mode_desc = "仅搜索" if search_only else ("搜索并匹配" if search_and_match else "完整流程")
  365. print(f"{mode_desc}执行完成")
  366. print(f"{'=' * 80}")
  367. print(f"\n结果文件:")
  368. if not search_only and not search_and_match:
  369. print(f" Step1: {step1_file}")
  370. if 'search_file' in locals() and search_file:
  371. print(f" 搜索: {search_file}")
  372. if step4_file:
  373. print(f" Step4: {step4_file}")
  374. if step4_high_score_count is not None:
  375. print(f" (高匹配: {step4_high_score_count} 个, Top1 score: {step4_top1_score:.2f})")
  376. if enable_step2 and step2_file:
  377. print(f" Step2: {step2_file}")
  378. if summary_file:
  379. print(f" 汇总: {summary_file}")
  380. print()
  381. return {
  382. "step1_file": step1_file if not search_only and not search_and_match else None,
  383. "search_file": search_file if 'search_file' in locals() else None,
  384. "step4_file": step4_file,
  385. "step2_file": step2_file,
  386. "summary_file": summary_file,
  387. "status": "success"
  388. }
  389. async def main():
  390. """主函数"""
  391. # 解析命令行参数
  392. parser = argparse.ArgumentParser(
  393. description="灵感分析主流程 (Step1 + 搜索 + Step2)",
  394. formatter_class=argparse.RawDescriptionHelpFormatter,
  395. epilog="""
  396. 使用示例:
  397. # 处理指定的灵感(通过名称)
  398. python run_inspiration_analysis.py --inspiration "内容植入品牌推广"
  399. # 处理指定的灵感(通过索引,0表示第1个)
  400. python run_inspiration_analysis.py --inspiration 0
  401. # 处理第1个灵感(Step1 + 搜索,默认不执行 Step2)
  402. python run_inspiration_analysis.py --dir data/阿里多多酱/out/人设_1110 --count 1
  403. # 启用 Step2 完整流程(Step1 + 搜索 + Step2)
  404. python run_inspiration_analysis.py --count 1 --enable-step2
  405. # 随机处理5个灵感
  406. python run_inspiration_analysis.py --count 5 --shuffle
  407. # 按 Step1 分数排序,处理前10个高分灵感
  408. python run_inspiration_analysis.py --count 10 --sort-by-score
  409. # 仅搜索模式:基于已有 Step1 结果,按分数降序搜索前10个
  410. python run_inspiration_analysis.py --search-only --count 10
  411. # 搜索并匹配模式:基于已有 Step1 结果,执行搜索和 Step4 匹配
  412. python run_inspiration_analysis.py --search-and-match --count 10
  413. # 从第11个开始,处理10个灵感
  414. python run_inspiration_analysis.py --search-and-match --start 11 --count 10
  415. # 处理所有灵感,强制重新执行
  416. python run_inspiration_analysis.py --count all --force
  417. # 处理前10个灵感,step1只处理前20个任务
  418. python run_inspiration_analysis.py --count 10 --max-tasks 20
  419. """
  420. )
  421. parser.add_argument(
  422. "--dir",
  423. default="data/阿里多多酱/out/人设_1110",
  424. help="人设目录路径 (默认: data/阿里多多酱/out/人设_1110)"
  425. )
  426. parser.add_argument(
  427. "--inspiration",
  428. type=str,
  429. default=None,
  430. help="指定要处理的灵感(可以是灵感名称或索引,如 '内容植入品牌推广' 或 '0')"
  431. )
  432. parser.add_argument(
  433. "--count",
  434. default="1",
  435. help="处理的灵感数量,可以是数字或 'all' (默认: 1, 当指定 --inspiration 时忽略)"
  436. )
  437. parser.add_argument(
  438. "--start",
  439. type=int,
  440. default=1,
  441. help="起始位置(从1开始),与 --count 配合使用 (默认: 1)"
  442. )
  443. parser.add_argument(
  444. "--max-tasks",
  445. type=str,
  446. default="all",
  447. help="Step1 处理的最大任务数,可以是数字或 'all' (默认: all)"
  448. )
  449. parser.add_argument(
  450. "--force",
  451. action="store_true",
  452. help="强制重新执行,覆盖已存在的文件"
  453. )
  454. parser.add_argument(
  455. "--shuffle",
  456. action="store_true",
  457. help="随机选择灵感,而不是按顺序"
  458. )
  459. parser.add_argument(
  460. "--sort-by-score",
  461. action="store_true",
  462. help="根据 Step1 结果分数排序(降序),优先处理高分灵感"
  463. )
  464. parser.add_argument(
  465. "--enable-step2",
  466. action="store_true",
  467. help="启用 Step2 增量词匹配(默认关闭)"
  468. )
  469. parser.add_argument(
  470. "--search-only",
  471. action="store_true",
  472. help="仅执行搜索(跳过 Step1 和 Step2,基于已有 Step1 结果,自动按分数降序)"
  473. )
  474. parser.add_argument(
  475. "--search-and-match",
  476. action="store_true",
  477. help="搜索并匹配模式(跳过 Step1 和 Step2,执行搜索和 Step4 匹配,自动按分数降序)"
  478. )
  479. args = parser.parse_args()
  480. persona_dir = args.dir
  481. force = args.force
  482. shuffle = args.shuffle
  483. sort_by_score = args.sort_by_score
  484. enable_step2 = args.enable_step2
  485. search_only = args.search_only
  486. search_and_match = args.search_and_match
  487. start_index = args.start
  488. # 互斥检查
  489. if search_only and search_and_match:
  490. print("❌ 错误: --search-only 和 --search-and-match 不能同时使用")
  491. sys.exit(1)
  492. # search_only 和 search_and_match 模式自动启用分数排序
  493. if search_only or search_and_match:
  494. sort_by_score = True
  495. enable_step2 = False # 搜索模式下强制禁用 step2
  496. if shuffle:
  497. print("⚠️ 警告: --search-only 模式会自动按分数排序,忽略 --shuffle 参数")
  498. shuffle = False
  499. # 处理 max_tasks
  500. max_tasks = None if args.max_tasks == "all" else int(args.max_tasks)
  501. # 动态流程名称
  502. if search_only:
  503. workflow_name = "仅搜索"
  504. else:
  505. workflow_name = "Step1 + 搜索"
  506. if enable_step2:
  507. workflow_name += " + Step2"
  508. print(f"{'=' * 80}")
  509. print(f"灵感分析主流程 ({workflow_name})")
  510. print(f"{'=' * 80}")
  511. print(f"人设目录: {persona_dir}")
  512. # 加载灵感列表
  513. inspiration_list = load_inspiration_list(persona_dir)
  514. # 处理 --inspiration 参数(优先级最高)
  515. if args.inspiration is not None:
  516. from lib.data_loader import select_inspiration
  517. selected_inspiration = select_inspiration(args.inspiration, inspiration_list)
  518. inspirations_to_process = [selected_inspiration]
  519. print(f"处理灵感: {selected_inspiration}")
  520. # 跳过后续的选择逻辑
  521. inspiration_count = 1
  522. skip_selection = True
  523. else:
  524. skip_selection = False
  525. # 确定要处理的灵感数量
  526. if args.count == "all":
  527. inspiration_count = len(inspiration_list) - (start_index - 1)
  528. print(f"处理灵感: 从第 {start_index} 个到最后 (共 {inspiration_count} 个)")
  529. else:
  530. inspiration_count = int(args.count)
  531. if start_index > 1:
  532. print(f"处理灵感: 从第 {start_index} 个开始,共 {inspiration_count} 个")
  533. else:
  534. print(f"处理灵感: 前 {inspiration_count} 个")
  535. if max_tasks:
  536. print(f"Step1 任务数限制: {max_tasks}")
  537. if search_only:
  538. print(f"搜索模式: 仅搜索(跳过 Step1 和 Step2)")
  539. print(f"分数排序: 根据已有 Step1 结果按分数降序处理")
  540. else:
  541. if force:
  542. print(f"强制模式: 重新执行所有步骤")
  543. if shuffle:
  544. print(f"随机模式: 随机选择灵感")
  545. if sort_by_score:
  546. print(f"分数排序: 根据 Step1 结果按分数降序处理")
  547. if enable_step2:
  548. print(f"Step2: 启用增量词匹配")
  549. else:
  550. print(f"Step2: 已关闭(使用 --enable-step2 启用)")
  551. # 选择要处理的灵感列表(如果没有指定 --inspiration)
  552. if not skip_selection:
  553. # 验证 start_index
  554. if start_index < 1:
  555. print(f"❌ 错误: --start 必须 >= 1")
  556. sys.exit(1)
  557. if sort_by_score:
  558. # 根据 Step1 结果分数排序
  559. sorted_list = sort_inspirations_by_score(persona_dir, inspiration_list, max_tasks)
  560. # 应用 start 和 count(start 从 1 开始,转换为 0 索引)
  561. start_idx = start_index - 1
  562. end_idx = start_idx + inspiration_count
  563. inspirations_to_process = sorted_list[start_idx:end_idx]
  564. elif shuffle:
  565. # 随机打乱灵感列表后选择
  566. shuffled_list = inspiration_list.copy()
  567. random.shuffle(shuffled_list)
  568. # 应用 start 和 count
  569. start_idx = start_index - 1
  570. end_idx = start_idx + inspiration_count
  571. inspirations_to_process = shuffled_list[start_idx:end_idx]
  572. else:
  573. # 按顺序选择,应用 start 和 count
  574. start_idx = start_index - 1
  575. end_idx = start_idx + inspiration_count
  576. inspirations_to_process = inspiration_list[start_idx:end_idx]
  577. print(f"\n将处理以下灵感:")
  578. for i, insp in enumerate(inspirations_to_process, 1):
  579. print(f" {i}. {insp}")
  580. # 批量执行流程
  581. results = []
  582. for i, inspiration in enumerate(inspirations_to_process, 1):
  583. print(f"\n{'#' * 80}")
  584. print(f"处理第 {i}/{len(inspirations_to_process)} 个灵感: {inspiration}")
  585. print(f"{'#' * 80}")
  586. # search_only 模式不创建 trace,search_and_match 需要 trace
  587. if search_only:
  588. result = await run_full_analysis(
  589. persona_dir=persona_dir,
  590. inspiration=inspiration,
  591. max_tasks=max_tasks,
  592. force=force,
  593. current_time=None,
  594. log_url=None,
  595. enable_step2=enable_step2,
  596. search_only=search_only,
  597. search_and_match=search_and_match
  598. )
  599. else:
  600. # 为每个灵感创建独立的 trace
  601. insp_time, insp_log_url = set_trace()
  602. with trace(f"灵感分析: {inspiration}"):
  603. result = await run_full_analysis(
  604. persona_dir=persona_dir,
  605. inspiration=inspiration,
  606. max_tasks=max_tasks,
  607. force=force,
  608. current_time=insp_time,
  609. log_url=insp_log_url,
  610. enable_step2=enable_step2,
  611. search_only=search_only,
  612. search_and_match=search_and_match
  613. )
  614. if insp_log_url:
  615. print(f"本次 Trace: {insp_log_url}")
  616. results.append(result)
  617. # 输出最终汇总
  618. print(f"\n{'=' * 80}")
  619. print(f"批量处理完成")
  620. print(f"{'=' * 80}")
  621. success_count = sum(1 for r in results if r["status"] == "success")
  622. print(f"\n成功: {success_count}/{len(results)}")
  623. for i, (insp, result) in enumerate(zip(inspirations_to_process, results), 1):
  624. status_icon = "✓" if result["status"] == "success" else "✗"
  625. print(f" {status_icon} [{i}] {insp}")
  626. if __name__ == "__main__":
  627. # 主流程不设置 trace,由每个灵感独立设置
  628. asyncio.run(main())