run_inspiration_analysis.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. """
  2. 主流程脚本:串联 Step1 和 Step2
  3. 执行完整的灵感分析流程:
  4. 1. Step1: 灵感与人设匹配(调用 step1 main,自动保存结果)
  5. 2. Step2: 增量词在人设中的匹配(调用 step2 main,自动保存结果)
  6. 3. 生成流程汇总文件
  7. """
  8. import os
  9. import sys
  10. import json
  11. import asyncio
  12. from agents import trace
  13. from lib.my_trace import set_trace_smith as set_trace
  14. from lib.data_loader import load_inspiration_list, select_inspiration
  15. from lib.utils import read_json
  16. # 导入 step1 和 step2 的 main 函数
  17. import step1_inspiration_match
  18. import step2_incremental_match
  19. def find_step1_output(persona_dir: str, inspiration: str, max_tasks: int = None) -> str:
  20. """查找 step1 输出文件
  21. Args:
  22. persona_dir: 人设目录
  23. inspiration: 灵感点名称
  24. max_tasks: 任务数限制
  25. Returns:
  26. step1 文件路径
  27. """
  28. from pathlib import Path
  29. step1_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
  30. scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
  31. step1_pattern = f"{scope_prefix}_step1_*.json"
  32. step1_files = list(Path(step1_dir).glob(step1_pattern))
  33. if not step1_files:
  34. raise FileNotFoundError(f"找不到 step1 输出文件: {step1_dir}/{step1_pattern}")
  35. return str(step1_files[0])
  36. def find_step2_output(persona_dir: str, inspiration: str, max_tasks: int = None) -> str:
  37. """查找 step2 输出文件
  38. Args:
  39. persona_dir: 人设目录
  40. inspiration: 灵感点名称
  41. max_tasks: 任务数限制
  42. Returns:
  43. step2 文件路径
  44. """
  45. from pathlib import Path
  46. step2_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
  47. scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
  48. step2_pattern = f"{scope_prefix}_step2_*.json"
  49. step2_files = list(Path(step2_dir).glob(step2_pattern))
  50. if not step2_files:
  51. raise FileNotFoundError(f"找不到 step2 输出文件: {step2_dir}/{step2_pattern}")
  52. return str(step2_files[0])
  53. async def run_full_analysis(
  54. persona_dir: str,
  55. inspiration: str,
  56. max_tasks: int = None,
  57. current_time: str = None,
  58. log_url: str = None
  59. ) -> dict:
  60. """执行完整的灵感分析流程(Step1 + Step2)
  61. Args:
  62. persona_dir: 人设目录路径
  63. inspiration: 灵感点文本
  64. max_tasks: step1 最大任务数(None 表示不限制)
  65. current_time: 当前时间戳
  66. log_url: 日志链接
  67. Returns:
  68. 包含文件路径和状态的字典
  69. """
  70. print(f"\n{'=' * 80}")
  71. print(f"开始完整分析流程: {inspiration}")
  72. print(f"{'=' * 80}\n")
  73. # ========== Step1: 灵感与人设匹配 ==========
  74. print(f"{'─' * 80}")
  75. print(f"Step1: 灵感与人设匹配")
  76. print(f"{'─' * 80}\n")
  77. # 临时修改 sys.argv 来传递参数给 step1
  78. original_argv = sys.argv.copy()
  79. sys.argv = [
  80. "step1_inspiration_match.py",
  81. persona_dir,
  82. inspiration,
  83. str(max_tasks) if max_tasks is not None else "all"
  84. ]
  85. try:
  86. # 调用 step1 的 main 函数
  87. await step1_inspiration_match.main(current_time, log_url)
  88. finally:
  89. # 恢复原始参数
  90. sys.argv = original_argv
  91. # 查找 step1 输出文件
  92. step1_file = find_step1_output(persona_dir, inspiration, max_tasks)
  93. print(f"✓ Step1 完成,结果文件: {step1_file}\n")
  94. # 读取 step1 结果
  95. step1_data = read_json(step1_file)
  96. step1_results = step1_data.get("匹配结果列表", [])
  97. if not step1_results:
  98. print("⚠️ Step1 结果为空,终止流程")
  99. return {
  100. "step1_file": step1_file,
  101. "step2_file": None,
  102. "summary_file": None,
  103. "status": "step1_empty"
  104. }
  105. step1_top1 = step1_results[0]
  106. step1_score = step1_top1.get('匹配结果', {}).get('score', 0)
  107. step1_element = step1_top1.get("业务信息", {}).get("匹配要素", "")
  108. print(f"Top1 匹配要素: {step1_element}, score: {step1_score:.2f}")
  109. # ========== Step2: 增量词匹配 ==========
  110. print(f"\n{'─' * 80}")
  111. print(f"Step2: 增量词在人设中的匹配")
  112. print(f"{'─' * 80}\n")
  113. # 临时修改 sys.argv 来传递参数给 step2
  114. sys.argv = [
  115. "step2_incremental_match.py",
  116. persona_dir,
  117. inspiration
  118. ]
  119. try:
  120. # 调用 step2 的 main 函数
  121. await step2_incremental_match.main(current_time, log_url)
  122. finally:
  123. # 恢复原始参数
  124. sys.argv = original_argv
  125. # 查找 step2 输出文件
  126. step2_file = find_step2_output(persona_dir, inspiration, max_tasks)
  127. print(f"✓ Step2 完成,结果文件: {step2_file}\n")
  128. # 读取 step2 结果
  129. step2_data = read_json(step2_file)
  130. step2_score = step2_data.get("匹配结果", {}).get("score", 0)
  131. step2_b_content = step2_data.get("输入信息", {}).get("B", "")
  132. step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
  133. # ========== 保存流程汇总 ==========
  134. output_dir = os.path.join(persona_dir, "how", "灵感点", inspiration)
  135. scope_prefix = f"top{max_tasks}" if max_tasks is not None else "all"
  136. # 从 step1 文件名提取模型名称
  137. step1_filename = os.path.basename(step1_file)
  138. model_short = step1_filename.split("_")[-1].replace(".json", "")
  139. summary_filename = f"{scope_prefix}_summary_完整流程_{model_short}.json"
  140. summary_file = os.path.join(output_dir, summary_filename)
  141. summary = {
  142. "元数据": {
  143. "current_time": current_time,
  144. "log_url": log_url,
  145. "流程": "Step1 + Step2 完整分析",
  146. "step1_model": step1_data.get("元数据", {}).get("model", ""),
  147. "step2_model": step2_data.get("元数据", {}).get("model", "")
  148. },
  149. "灵感": inspiration,
  150. "文件路径": {
  151. "step1": step1_file,
  152. "step2": step2_file
  153. },
  154. "关键指标": {
  155. "step1_top1_score": step1_score,
  156. "step1_top1_匹配要素": step1_element,
  157. "step2_增量词数量": step2_word_count,
  158. "step2_score": step2_score
  159. }
  160. }
  161. with open(summary_file, 'w', encoding='utf-8') as f:
  162. json.dump(summary, f, ensure_ascii=False, indent=2)
  163. print(f"{'=' * 80}")
  164. print(f"完整流程执行完成")
  165. print(f"{'=' * 80}")
  166. print(f"\n结果文件:")
  167. print(f" Step1: {step1_file}")
  168. print(f" Step2: {step2_file}")
  169. print(f" 汇总: {summary_file}\n")
  170. return {
  171. "step1_file": step1_file,
  172. "step2_file": step2_file,
  173. "summary_file": summary_file,
  174. "status": "success"
  175. }
  176. async def main(current_time: str, log_url: str):
  177. """主函数"""
  178. # 解析命令行参数
  179. persona_dir = sys.argv[1] if len(sys.argv) > 1 else "data/阿里多多酱/out/人设_1110"
  180. # 第二个参数:灵感数量限制,默认为 1(处理第一个灵感)
  181. # 可以是数字(如 1, 5, 10)或 "all"(所有灵感)
  182. inspiration_count_arg = sys.argv[2] if len(sys.argv) > 2 else "1"
  183. # 第三个参数:step1 任务数限制,默认为 None(所有任务)
  184. max_tasks = None if len(sys.argv) > 3 and sys.argv[3] == "all" else (
  185. int(sys.argv[3]) if len(sys.argv) > 3 else None
  186. )
  187. print(f"{'=' * 80}")
  188. print(f"灵感分析主流程 (Step1 + Step2)")
  189. print(f"{'=' * 80}")
  190. print(f"人设目录: {persona_dir}")
  191. # 加载灵感列表
  192. inspiration_list = load_inspiration_list(persona_dir)
  193. # 确定要处理的灵感数量
  194. if inspiration_count_arg == "all":
  195. inspiration_count = len(inspiration_list)
  196. print(f"处理灵感: 全部 ({inspiration_count} 个)")
  197. else:
  198. inspiration_count = int(inspiration_count_arg)
  199. print(f"处理灵感: 前 {inspiration_count} 个")
  200. if max_tasks:
  201. print(f"Step1 任务数限制: {max_tasks}")
  202. # 选择要处理的灵感列表
  203. inspirations_to_process = inspiration_list[:inspiration_count]
  204. print(f"\n将处理以下灵感:")
  205. for i, insp in enumerate(inspirations_to_process, 1):
  206. print(f" {i}. {insp}")
  207. # 批量执行流程
  208. results = []
  209. for i, inspiration in enumerate(inspirations_to_process, 1):
  210. print(f"\n{'#' * 80}")
  211. print(f"处理第 {i}/{len(inspirations_to_process)} 个灵感")
  212. print(f"{'#' * 80}")
  213. result = await run_full_analysis(
  214. persona_dir=persona_dir,
  215. inspiration=inspiration,
  216. max_tasks=max_tasks,
  217. current_time=current_time,
  218. log_url=log_url
  219. )
  220. results.append(result)
  221. # 输出最终汇总
  222. print(f"\n{'=' * 80}")
  223. print(f"批量处理完成")
  224. print(f"{'=' * 80}")
  225. success_count = sum(1 for r in results if r["status"] == "success")
  226. print(f"\n成功: {success_count}/{len(results)}")
  227. for i, (insp, result) in enumerate(zip(inspirations_to_process, results), 1):
  228. status_icon = "✓" if result["status"] == "success" else "✗"
  229. print(f" {status_icon} [{i}] {insp}")
  230. if log_url:
  231. print(f"\nTrace: {log_url}")
  232. if __name__ == "__main__":
  233. # 设置 trace
  234. current_time, log_url = set_trace()
  235. # 使用 trace 上下文包裹整个执行流程
  236. with trace("灵感分析完整流程 (Step1+Step2)"):
  237. asyncio.run(main(current_time, log_url))