extract_inspirations.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. """
  2. 从 what 解构结果中提取灵感点列表
  3. 读取指定文件夹中的所有 JSON 文件,提取灵感点,保存到同级目录
  4. """
  5. import json
  6. import os
  7. from pathlib import Path
  8. from typing import List
  9. from lib.utils import read_json
  10. def extract_inspirations_from_file(file_path: str) -> List[dict]:
  11. """从单个 what 解构文件中提取所有灵感点
  12. Args:
  13. file_path: JSON 文件路径
  14. Returns:
  15. 灵感点列表,每个元素包含 灵感点 和 meta 字段
  16. """
  17. # 从文件名提取 note_id(第一个下划线之前的部分)
  18. filename = os.path.basename(file_path)
  19. note_id = filename.split('_')[0]
  20. try:
  21. data = read_json(file_path)
  22. except Exception as e:
  23. print(f"⚠️ 读取文件失败: {file_path} - {e}")
  24. return []
  25. inspirations = []
  26. # 提取灵感点
  27. san_dian = data.get("三点解构", {})
  28. ling_gan_dian = san_dian.get("灵感点", {})
  29. # 三个类别:全新内容、共性差异、共性内容
  30. for category in ["全新内容", "共性差异", "共性内容"]:
  31. items = ling_gan_dian.get(category, [])
  32. for item in items:
  33. inspiration_text = item.get("灵感点", "")
  34. if inspiration_text:
  35. # 构建 meta 字段:原有字段 + note_id + category + what文件路径,但排除"灵感点"字段
  36. meta = {k: v for k, v in item.items() if k != "灵感点"}
  37. meta["note_id"] = note_id
  38. meta["category"] = category
  39. meta["what_file"] = file_path
  40. inspirations.append({
  41. "灵感点": inspiration_text,
  42. "meta": meta
  43. })
  44. return inspirations
  45. def extract_inspirations_from_folder(folder_path: str) -> List[dict]:
  46. """从文件夹中提取所有灵感点
  47. Args:
  48. folder_path: what 解构结果文件夹路径
  49. Returns:
  50. 灵感点列表(保留所有,不去重)
  51. """
  52. folder = Path(folder_path)
  53. if not folder.exists():
  54. raise FileNotFoundError(f"文件夹不存在: {folder_path}")
  55. # 收集所有 JSON 文件
  56. json_files = sorted(list(folder.glob("*.json")))
  57. print(f"\n找到 {len(json_files)} 个 JSON 文件")
  58. # 提取所有灵感点
  59. all_inspirations = []
  60. for json_file in json_files:
  61. inspirations = extract_inspirations_from_file(str(json_file))
  62. all_inspirations.extend(inspirations)
  63. if inspirations:
  64. print(f" ✓ {json_file.name}: {len(inspirations)} 个灵感点")
  65. print(f"\n总计提取: {len(all_inspirations)} 个灵感点")
  66. return all_inspirations
  67. def save_inspirations(inspirations: List[dict], output_dir: str):
  68. """保存灵感点列表
  69. Args:
  70. inspirations: 灵感点列表(包含 灵感点 和 meta 字段)
  71. output_dir: 输出目录
  72. """
  73. output_file = os.path.join(output_dir, "灵感点.json")
  74. with open(output_file, 'w', encoding='utf-8') as f:
  75. json.dump(inspirations, f, ensure_ascii=False, indent=2)
  76. print(f"\n✓ 灵感点列表已保存到: {output_file}")
  77. def main():
  78. """主函数"""
  79. import sys
  80. # 命令行参数:what 解构结果文件夹路径
  81. if len(sys.argv) > 1:
  82. what_folder = sys.argv[1]
  83. else:
  84. what_folder = "data/阿里多多酱/out/人设_v2/what解构结果"
  85. print(f"{'=' * 80}")
  86. print(f"从 what 解构结果中提取灵感点")
  87. print(f"{'=' * 80}")
  88. print(f"输入文件夹: {what_folder}")
  89. try:
  90. # 提取灵感点
  91. inspirations = extract_inspirations_from_folder(what_folder)
  92. # 确定输出目录(输入文件夹的父目录,即同级目录)
  93. what_folder_path = Path(what_folder)
  94. output_dir = what_folder_path.parent # data/阿里多多酱/out/人设_v2
  95. # 保存结果
  96. save_inspirations(inspirations, str(output_dir))
  97. # 显示前10个灵感点
  98. print(f"\n{'=' * 80}")
  99. print(f"灵感点预览(前10个):")
  100. print(f"{'=' * 80}")
  101. for i, item in enumerate(inspirations[:10], 1):
  102. print(f"{i}. {item['灵感点']}")
  103. if len(inspirations) > 10:
  104. print(f"... 还有 {len(inspirations) - 10} 个")
  105. except Exception as e:
  106. print(f"\n❌ 错误: {e}")
  107. import traceback
  108. traceback.print_exc()
  109. if __name__ == "__main__":
  110. main()