run_inspiration_analysis.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. """
  2. 主流程脚本:串联 Step1 和 Step2
  3. 执行完整的灵感分析流程:
  4. 1. Step1: 灵感与人设匹配(调用 step1 main,自动保存结果)
  5. 2. Step2: 增量词在人设中的匹配(调用 step2 main,自动保存结果)
  6. 3. 生成流程汇总文件
  7. """
  8. import os
  9. import sys
  10. import json
  11. import asyncio
  12. import random
  13. import argparse
  14. from agents import trace
  15. from lib.my_trace import set_trace_smith as set_trace
  16. from lib.data_loader import load_inspiration_list, select_inspiration
  17. from lib.utils import read_json
  18. # 导入 step1 和 step2 的 main 函数
  19. import step1_inspiration_match
  20. import step2_incremental_match
  21. def find_step1_output(persona_dir: str, inspiration: str, max_tasks: int = None) -> str:
  22. """查找 step1 输出文件
  23. Args:
  24. persona_dir: 人设目录
  25. inspiration: 灵感点名称
  26. max_tasks: 任务数限制
  27. Returns:
  28. step1 文件路径
  29. """
  30. from pathlib import Path
  31. step1_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
  32. scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
  33. step1_pattern = f"{scope_prefix}_step1_*.json"
  34. step1_files = list(Path(step1_dir).glob(step1_pattern))
  35. if not step1_files:
  36. raise FileNotFoundError(f"找不到 step1 输出文件: {step1_dir}/{step1_pattern}")
  37. return str(step1_files[0])
  38. def find_step2_output(persona_dir: str, inspiration: str, max_tasks: int = None) -> str:
  39. """查找 step2 输出文件
  40. Args:
  41. persona_dir: 人设目录
  42. inspiration: 灵感点名称
  43. max_tasks: 任务数限制
  44. Returns:
  45. step2 文件路径
  46. """
  47. from pathlib import Path
  48. step2_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
  49. scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
  50. step2_pattern = f"{scope_prefix}_step2_*.json"
  51. step2_files = list(Path(step2_dir).glob(step2_pattern))
  52. if not step2_files:
  53. raise FileNotFoundError(f"找不到 step2 输出文件: {step2_dir}/{step2_pattern}")
  54. return str(step2_files[0])
  55. async def run_full_analysis(
  56. persona_dir: str,
  57. inspiration: str,
  58. max_tasks: int = None,
  59. force: bool = False,
  60. current_time: str = None,
  61. log_url: str = None,
  62. step1_only: bool = False
  63. ) -> dict:
  64. """执行完整的灵感分析流程(Step1 + Step2)
  65. Args:
  66. persona_dir: 人设目录路径
  67. inspiration: 灵感点文本
  68. max_tasks: step1 最大任务数(None 表示不限制)
  69. force: 是否强制重新执行(跳过文件存在检查)
  70. current_time: 当前时间戳
  71. log_url: 日志链接
  72. step1_only: 是否只执行 Step1,跳过 Step2
  73. Returns:
  74. 包含文件路径和状态的字典
  75. """
  76. print(f"\n{'=' * 80}")
  77. print(f"开始完整分析流程: {inspiration}")
  78. print(f"{'=' * 80}\n")
  79. # ========== Step1: 灵感与人设匹配 ==========
  80. print(f"{'─' * 80}")
  81. print(f"Step1: 灵感与人设匹配")
  82. print(f"{'─' * 80}\n")
  83. # 临时修改 sys.argv 来传递参数给 step1
  84. original_argv = sys.argv.copy()
  85. sys.argv = [
  86. "step1_inspiration_match.py",
  87. persona_dir,
  88. inspiration,
  89. str(max_tasks) if max_tasks is not None else "all"
  90. ]
  91. try:
  92. # 调用 step1 的 main 函数(通过参数传递 force)
  93. await step1_inspiration_match.main(current_time, log_url, force=force)
  94. finally:
  95. # 恢复原始参数
  96. sys.argv = original_argv
  97. # 查找 step1 输出文件
  98. step1_file = find_step1_output(persona_dir, inspiration, max_tasks)
  99. print(f"✓ Step1 完成,结果文件: {step1_file}\n")
  100. # 读取 step1 结果
  101. step1_data = read_json(step1_file)
  102. step1_results = step1_data.get("匹配结果列表", [])
  103. if not step1_results:
  104. print("⚠️ Step1 结果为空,终止流程")
  105. return {
  106. "step1_file": step1_file,
  107. "step2_file": None,
  108. "summary_file": None,
  109. "status": "step1_empty"
  110. }
  111. step1_top1 = step1_results[0]
  112. step1_score = step1_top1.get('匹配结果', {}).get('score', 0)
  113. step1_element = step1_top1.get("业务信息", {}).get("匹配要素", "")
  114. print(f"Top1 匹配要素: {step1_element}, score: {step1_score:.2f}")
  115. # 如果只执行 Step1,直接返回
  116. if step1_only:
  117. print(f"\n{'=' * 80}")
  118. print(f"Step1 执行完成(跳过 Step2)")
  119. print(f"{'=' * 80}")
  120. print(f"\n结果文件:")
  121. print(f" Step1: {step1_file}\n")
  122. return {
  123. "step1_file": step1_file,
  124. "step2_file": None,
  125. "summary_file": None,
  126. "status": "step1_only"
  127. }
  128. # ========== Step2: 增量词匹配 ==========
  129. print(f"\n{'─' * 80}")
  130. print(f"Step2: 增量词在人设中的匹配")
  131. print(f"{'─' * 80}\n")
  132. # 临时修改 sys.argv 来传递参数给 step2
  133. sys.argv = [
  134. "step2_incremental_match.py",
  135. persona_dir,
  136. inspiration
  137. ]
  138. try:
  139. # 调用 step2 的 main 函数(通过参数传递 force)
  140. await step2_incremental_match.main(current_time, log_url, force=force)
  141. finally:
  142. # 恢复原始参数
  143. sys.argv = original_argv
  144. # 查找 step2 输出文件
  145. step2_file = find_step2_output(persona_dir, inspiration, max_tasks)
  146. print(f"✓ Step2 完成,结果文件: {step2_file}\n")
  147. # 读取 step2 结果
  148. step2_data = read_json(step2_file)
  149. step2_score = step2_data.get("匹配结果", {}).get("score", 0)
  150. step2_b_content = step2_data.get("输入信息", {}).get("B", "")
  151. step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
  152. # ========== 保存流程汇总 ==========
  153. output_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
  154. scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
  155. # 从 step1 文件名提取模型名称
  156. step1_filename = os.path.basename(step1_file)
  157. model_short = step1_filename.split("_")[-1].replace(".json", "")
  158. summary_filename = f"{scope_prefix}_summary_完整流程_{model_short}.json"
  159. summary_file = os.path.join(output_dir, summary_filename)
  160. summary = {
  161. "元数据": {
  162. "current_time": current_time,
  163. "log_url": log_url,
  164. "流程": "Step1 + Step2 完整分析",
  165. "step1_model": step1_data.get("元数据", {}).get("model", ""),
  166. "step2_model": step2_data.get("元数据", {}).get("model", "")
  167. },
  168. "灵感": inspiration,
  169. "文件路径": {
  170. "step1": step1_file,
  171. "step2": step2_file
  172. },
  173. "关键指标": {
  174. "step1_top1_score": step1_score,
  175. "step1_top1_匹配要素": step1_element,
  176. "step2_增量词数量": step2_word_count,
  177. "step2_score": step2_score
  178. }
  179. }
  180. with open(summary_file, 'w', encoding='utf-8') as f:
  181. json.dump(summary, f, ensure_ascii=False, indent=2)
  182. print(f"{'=' * 80}")
  183. print(f"完整流程执行完成")
  184. print(f"{'=' * 80}")
  185. print(f"\n结果文件:")
  186. print(f" Step1: {step1_file}")
  187. print(f" Step2: {step2_file}")
  188. print(f" 汇总: {summary_file}\n")
  189. return {
  190. "step1_file": step1_file,
  191. "step2_file": step2_file,
  192. "summary_file": summary_file,
  193. "status": "success"
  194. }
  195. async def main():
  196. """主函数"""
  197. # 解析命令行参数
  198. parser = argparse.ArgumentParser(
  199. description="灵感分析主流程 (Step1 + Step2)",
  200. formatter_class=argparse.RawDescriptionHelpFormatter,
  201. epilog="""
  202. 使用示例:
  203. # 处理第1个灵感
  204. python run_inspiration_analysis.py --dir data/阿里多多酱/out/人设_1110 --count 1
  205. # 随机处理5个灵感
  206. python run_inspiration_analysis.py --count 5 --shuffle
  207. # 处理所有灵感,强制重新执行
  208. python run_inspiration_analysis.py --count all --force
  209. # 处理前10个灵感,step1只处理前20个任务
  210. python run_inspiration_analysis.py --count 10 --max-tasks 20
  211. # 只执行 Step1,跳过 Step2
  212. python run_inspiration_analysis.py --count 5 --step1-only
  213. """
  214. )
  215. parser.add_argument(
  216. "--dir",
  217. default="data/阿里多多酱/out/人设_1110",
  218. help="人设目录路径 (默认: data/阿里多多酱/out/人设_1110)"
  219. )
  220. parser.add_argument(
  221. "--count",
  222. default="1",
  223. help="处理的灵感数量,可以是数字或 'all' (默认: 1)"
  224. )
  225. parser.add_argument(
  226. "--max-tasks",
  227. type=str,
  228. default="all",
  229. help="Step1 处理的最大任务数,可以是数字或 'all' (默认: all)"
  230. )
  231. parser.add_argument(
  232. "--force",
  233. action="store_true",
  234. help="强制重新执行,覆盖已存在的文件"
  235. )
  236. parser.add_argument(
  237. "--shuffle",
  238. action="store_true",
  239. help="随机选择灵感,而不是按顺序"
  240. )
  241. parser.add_argument(
  242. "--step1-only",
  243. action="store_true",
  244. help="只执行 Step1,跳过 Step2"
  245. )
  246. args = parser.parse_args()
  247. persona_dir = args.dir
  248. force = args.force
  249. shuffle = args.shuffle
  250. step1_only = args.step1_only
  251. # 处理 max_tasks
  252. max_tasks = None if args.max_tasks == "all" else int(args.max_tasks)
  253. print(f"{'=' * 80}")
  254. print(f"灵感分析主流程 (Step1 + Step2)")
  255. print(f"{'=' * 80}")
  256. print(f"人设目录: {persona_dir}")
  257. # 加载灵感列表
  258. inspiration_list = load_inspiration_list(persona_dir)
  259. # 确定要处理的灵感数量
  260. if args.count == "all":
  261. inspiration_count = len(inspiration_list)
  262. print(f"处理灵感: 全部 ({inspiration_count} 个)")
  263. else:
  264. inspiration_count = int(args.count)
  265. print(f"处理灵感: 前 {inspiration_count} 个")
  266. if max_tasks:
  267. print(f"Step1 任务数限制: {max_tasks}")
  268. if force:
  269. print(f"强制模式: 重新执行所有步骤")
  270. if shuffle:
  271. print(f"随机模式: 随机选择灵感")
  272. if step1_only:
  273. print(f"Step1 Only: 只执行 Step1,跳过 Step2")
  274. # 选择要处理的灵感列表
  275. if shuffle:
  276. # 随机打乱灵感列表后选择
  277. shuffled_list = inspiration_list.copy()
  278. random.shuffle(shuffled_list)
  279. inspirations_to_process = shuffled_list[:inspiration_count]
  280. else:
  281. # 按顺序选择前 N 个
  282. inspirations_to_process = inspiration_list[:inspiration_count]
  283. print(f"\n将处理以下灵感:")
  284. for i, insp in enumerate(inspirations_to_process, 1):
  285. print(f" {i}. {insp}")
  286. # 批量执行流程
  287. results = []
  288. for i, inspiration in enumerate(inspirations_to_process, 1):
  289. print(f"\n{'#' * 80}")
  290. print(f"处理第 {i}/{len(inspirations_to_process)} 个灵感: {inspiration}")
  291. print(f"{'#' * 80}")
  292. # 为每个灵感创建独立的 trace
  293. insp_time, insp_log_url = set_trace()
  294. with trace(f"灵感分析: {inspiration}"):
  295. result = await run_full_analysis(
  296. persona_dir=persona_dir,
  297. inspiration=inspiration,
  298. max_tasks=max_tasks,
  299. force=force,
  300. current_time=insp_time,
  301. log_url=insp_log_url,
  302. step1_only=step1_only
  303. )
  304. results.append(result)
  305. if insp_log_url:
  306. print(f"本次 Trace: {insp_log_url}")
  307. # 输出最终汇总
  308. print(f"\n{'=' * 80}")
  309. print(f"批量处理完成")
  310. print(f"{'=' * 80}")
  311. success_count = sum(1 for r in results if r["status"] == "success")
  312. print(f"\n成功: {success_count}/{len(results)}")
  313. for i, (insp, result) in enumerate(zip(inspirations_to_process, results), 1):
  314. status_icon = "✓" if result["status"] == "success" else "✗"
  315. print(f" {status_icon} [{i}] {insp}")
  316. if __name__ == "__main__":
  317. # 主流程不设置 trace,由每个灵感独立设置
  318. asyncio.run(main())