data_loader.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. """
  2. 通用数据加载模块
  3. 提供项目中常用的数据加载函数
  4. """
  5. import os
  6. import sys
  7. from typing import List
  8. from lib.utils import read_json
  9. def load_persona_data(persona_dir: str) -> dict:
  10. """加载人设数据
  11. Args:
  12. persona_dir: 人设目录路径
  13. Returns:
  14. 人设数据字典
  15. Raises:
  16. SystemExit: 文件不存在时退出
  17. """
  18. persona_data_path = os.path.join(persona_dir, "人设.json")
  19. try:
  20. return read_json(persona_data_path)
  21. except FileNotFoundError:
  22. print(f"❌ 找不到人设数据文件: {persona_data_path}")
  23. print(f"请检查路径是否正确: {persona_dir}")
  24. sys.exit(1)
  25. def load_inspiration_list(persona_dir: str) -> List[str]:
  26. """加载灵感点列表
  27. Args:
  28. persona_dir: 人设目录路径
  29. Returns:
  30. 灵感点文本列表
  31. Raises:
  32. SystemExit: 文件不存在或格式错误时退出
  33. """
  34. inspiration_list_path = os.path.join(persona_dir, "灵感点.json")
  35. try:
  36. inspiration_data = read_json(inspiration_list_path)
  37. if not isinstance(inspiration_data, list) or len(inspiration_data) == 0:
  38. print(f"❌ 灵感文件格式错误或为空: {inspiration_list_path}")
  39. sys.exit(1)
  40. return [item["灵感点"] for item in inspiration_data]
  41. except FileNotFoundError:
  42. print(f"❌ 找不到灵感文件: {inspiration_list_path}")
  43. print("请先运行 extract_inspirations.py 生成灵感点文件")
  44. sys.exit(1)
  45. def load_inspiration_data(persona_dir: str) -> List[dict]:
  46. """加载完整的灵感点数据(包含 meta 信息)
  47. Args:
  48. persona_dir: 人设目录路径
  49. Returns:
  50. 灵感点数据列表,每项包含 {"灵感点": str, "meta": dict}
  51. Raises:
  52. SystemExit: 文件不存在或格式错误时退出
  53. """
  54. inspiration_list_path = os.path.join(persona_dir, "灵感点.json")
  55. try:
  56. inspiration_data = read_json(inspiration_list_path)
  57. if not isinstance(inspiration_data, list) or len(inspiration_data) == 0:
  58. print(f"❌ 灵感文件格式错误或为空: {inspiration_list_path}")
  59. sys.exit(1)
  60. return inspiration_data
  61. except FileNotFoundError:
  62. print(f"❌ 找不到灵感文件: {inspiration_list_path}")
  63. print("请先运行 extract_inspirations.py 生成灵感点文件")
  64. sys.exit(1)
  65. def select_inspiration(inspiration_arg: str, inspiration_list: List[str]) -> str:
  66. """根据参数选择灵感
  67. Args:
  68. inspiration_arg: 灵感参数(数字索引或灵感名称)
  69. inspiration_list: 灵感点文本列表
  70. Returns:
  71. 选中的灵感点文本
  72. Raises:
  73. SystemExit: 选择失败时退出
  74. """
  75. try:
  76. # 尝试作为索引解析
  77. inspiration_index = int(inspiration_arg)
  78. if 0 <= inspiration_index < len(inspiration_list):
  79. inspiration = inspiration_list[inspiration_index]
  80. print(f"使用灵感[{inspiration_index}]: {inspiration}")
  81. return inspiration
  82. else:
  83. print(f"❌ 灵感索引超出范围: {inspiration_index} (有效范围: 0-{len(inspiration_list)-1})")
  84. except ValueError:
  85. # 不是数字,当作灵感名称
  86. if inspiration_arg in inspiration_list:
  87. print(f"使用灵感: {inspiration_arg}")
  88. return inspiration_arg
  89. else:
  90. print(f"❌ 找不到灵感: {inspiration_arg}")
  91. # 显示可用灵感列表后退出
  92. print(f"可用灵感列表:")
  93. for i, insp in enumerate(inspiration_list[:10]):
  94. print(f" {i}: {insp}")
  95. if len(inspiration_list) > 10:
  96. print(f" ... 还有 {len(inspiration_list) - 10} 个")
  97. sys.exit(1)
  98. def load_step1_result(persona_dir: str, inspiration: str, model_name: str, scope: str = "all") -> dict:
  99. """加载 step1 匹配结果
  100. Args:
  101. persona_dir: 人设目录路径
  102. inspiration: 灵感点名称
  103. model_name: 模型名称(如 "google/gemini-2.5-pro")
  104. scope: 范围标识("all" 或 "top10" 等)
  105. Returns:
  106. step1 结果字典
  107. Raises:
  108. SystemExit: 文件不存在时退出
  109. """
  110. # 提取模型简称
  111. model_name_short = model_name.replace("google/", "").replace("/", "_")
  112. # 构建文件路径
  113. step1_file = os.path.join(
  114. persona_dir,
  115. "how",
  116. "灵感点",
  117. inspiration,
  118. f"{scope}_step1_灵感人设匹配_{model_name_short}.json"
  119. )
  120. try:
  121. return read_json(step1_file)
  122. except FileNotFoundError:
  123. print(f"❌ 找不到 step1 结果文件: {step1_file}")
  124. print(f"请先运行 step1_inspiration_match.py 生成结果")
  125. sys.exit(1)