analyze_inspiration_results.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. """
  2. 分析灵感点匹配结果
  3. 从 how/灵感点 目录读取所有灵感的 step1 和 step2 结果,
  4. 按 step1 score 和 step2 score 排序,输出汇总报告
  5. """
  6. import os
  7. import json
  8. import argparse
  9. from pathlib import Path
  10. from typing import List, Dict
  11. def collect_inspiration_results(persona_dir: str) -> List[Dict]:
  12. """收集所有灵感的匹配结果
  13. Args:
  14. persona_dir: 人设目录路径
  15. Returns:
  16. 结果列表,每项包含灵感名称、step1 结果、step2 结果
  17. """
  18. inspiration_base_dir = os.path.join(persona_dir, "how", "灵感点")
  19. if not os.path.exists(inspiration_base_dir):
  20. print(f"❌ 目录不存在: {inspiration_base_dir}")
  21. return []
  22. results = []
  23. # 遍历所有灵感目录
  24. for inspiration_name in os.listdir(inspiration_base_dir):
  25. inspiration_dir = os.path.join(inspiration_base_dir, inspiration_name)
  26. # 跳过非目录
  27. if not os.path.isdir(inspiration_dir):
  28. continue
  29. # 查找 step1 文件
  30. step1_files = list(Path(inspiration_dir).glob("*_step1_*.json"))
  31. if not step1_files:
  32. continue
  33. step1_file = str(step1_files[0])
  34. # 查找 step2 文件
  35. step2_files = list(Path(inspiration_dir).glob("*_step2_*.json"))
  36. step2_file = str(step2_files[0]) if step2_files else None
  37. # 读取 step1 结果
  38. try:
  39. with open(step1_file, 'r', encoding='utf-8') as f:
  40. step1_data = json.load(f)
  41. except Exception as e:
  42. print(f"⚠️ 读取 step1 失败: {inspiration_name}, {e}")
  43. continue
  44. # 提取 step1 top1 信息(输入信息 + 完整匹配结果)
  45. step1_results = step1_data.get("匹配结果列表", [])
  46. if not step1_results:
  47. continue
  48. step1_top1 = step1_results[0]
  49. step1_input_info = step1_top1.get("输入信息", {})
  50. step1_match_result = step1_top1.get("匹配结果", {})
  51. step1_score = step1_match_result.get("score", 0)
  52. step1_element = step1_top1.get("业务信息", {}).get("匹配要素", "")
  53. # 读取 step2 结果(如果存在,包含输入信息 + 完整匹配结果)
  54. step2_input_info = None
  55. step2_match_result = None
  56. step2_score = None
  57. step2_word_count = 0
  58. if step2_file:
  59. try:
  60. with open(step2_file, 'r', encoding='utf-8') as f:
  61. step2_data = json.load(f)
  62. step2_input_info = step2_data.get("输入信息", {})
  63. step2_match_result = step2_data.get("匹配结果", {})
  64. step2_score = step2_match_result.get("score", 0)
  65. step2_b_content = step2_input_info.get("B", "")
  66. step2_word_count = len(step2_b_content.split("\n")) if step2_b_content else 0
  67. except Exception as e:
  68. print(f"⚠️ 读取 step2 失败: {inspiration_name}, {e}")
  69. # 构建结果项(包含输入信息和完整匹配结果)
  70. result_item = {
  71. "灵感": inspiration_name,
  72. "step1": {
  73. "输入信息": step1_input_info,
  74. "匹配结果": step1_match_result,
  75. "匹配要素": step1_element
  76. },
  77. "step2": {
  78. "输入信息": step2_input_info,
  79. "匹配结果": step2_match_result,
  80. "增量词数量": step2_word_count
  81. } if step2_file else None,
  82. "文件信息": {
  83. "step1": os.path.basename(step1_file),
  84. "step2": os.path.basename(step2_file) if step2_file else None
  85. }
  86. }
  87. results.append(result_item)
  88. return results
  89. def main():
  90. """主函数"""
  91. parser = argparse.ArgumentParser(
  92. description="分析灵感点匹配结果",
  93. formatter_class=argparse.RawDescriptionHelpFormatter,
  94. epilog="""
  95. 使用示例:
  96. # 分析默认目录
  97. python analyze_inspiration_results.py
  98. # 指定人设目录
  99. python analyze_inspiration_results.py --dir data/阿里多多酱/out/人设_1110
  100. # 指定输出文件
  101. python analyze_inspiration_results.py --output analysis_report.json
  102. """
  103. )
  104. parser.add_argument(
  105. "--dir",
  106. default="data/阿里多多酱/out/人设_1110",
  107. help="人设目录路径 (默认: data/阿里多多酱/out/人设_1110)"
  108. )
  109. parser.add_argument(
  110. "--output",
  111. default=None,
  112. help="输出文件路径 (默认: 在人设目录下的 how/灵感匹配分析.json)"
  113. )
  114. args = parser.parse_args()
  115. persona_dir = args.dir
  116. print(f"{'=' * 80}")
  117. print(f"灵感点匹配结果分析")
  118. print(f"{'=' * 80}")
  119. print(f"人设目录: {persona_dir}\n")
  120. # 收集结果
  121. results = collect_inspiration_results(persona_dir)
  122. if not results:
  123. print("❌ 未找到任何灵感结果")
  124. return
  125. print(f"找到 {len(results)} 个灵感的匹配结果\n")
  126. # 排序:先按 step1 score 降序,再按 step2 score 降序
  127. # step2 score 为 None 的排在最后
  128. def sort_key(item):
  129. step1_score = item["step1"]["匹配结果"].get("score", 0)
  130. step2_score = item["step2"]["匹配结果"].get("score", 0) if item["step2"] else -1
  131. return (-step1_score, -step2_score)
  132. results.sort(key=sort_key)
  133. # 构建输出
  134. output_data = {
  135. "元数据": {
  136. "人设目录": persona_dir,
  137. "灵感总数": len(results),
  138. "排序规则": "先按 step1 score 降序,再按 step2 score 降序"
  139. },
  140. "排序结果": results
  141. }
  142. # 输出统计信息
  143. has_step2 = sum(1 for r in results if r["step2"] is not None)
  144. print(f"统计信息:")
  145. print(f" 总灵感数: {len(results)}")
  146. print(f" 完成 step2: {has_step2}")
  147. print(f" 仅 step1: {len(results) - has_step2}")
  148. # Top 5 预览
  149. print(f"\nTop 5 灵感 (按排序规则):")
  150. for i, item in enumerate(results[:5], 1):
  151. step1_score = item["step1"]["匹配结果"].get("score", 0)
  152. step2_score = item["step2"]["匹配结果"].get("score", 0) if item["step2"] else None
  153. step2_info = f", step2: {step2_score:.2f}" if step2_score is not None else ""
  154. print(f" {i}. {item['灵感']}")
  155. print(f" step1: {step1_score:.2f} → {item['step1']['匹配要素']}{step2_info}")
  156. # 保存结果
  157. if args.output:
  158. output_file = args.output
  159. else:
  160. output_file = os.path.join(persona_dir, "how", "灵感匹配分析.json")
  161. os.makedirs(os.path.dirname(output_file), exist_ok=True)
  162. with open(output_file, 'w', encoding='utf-8') as f:
  163. json.dump(output_data, f, ensure_ascii=False, indent=2)
  164. print(f"\n完成!分析结果已保存到: {output_file}\n")
  165. if __name__ == "__main__":
  166. main()