data_loader.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. """
  2. 通用数据加载模块
  3. 提供项目中常用的数据加载函数
  4. """
  5. import os
  6. import sys
  7. from typing import List
  8. from lib.utils import read_json
  9. def load_persona_data(persona_dir: str) -> dict:
  10. """加载人设数据
  11. Args:
  12. persona_dir: 人设目录路径
  13. Returns:
  14. 人设数据字典
  15. Raises:
  16. SystemExit: 文件不存在时退出
  17. """
  18. persona_data_path = os.path.join(persona_dir, "人设.json")
  19. try:
  20. return read_json(persona_data_path)
  21. except FileNotFoundError:
  22. print(f"❌ 找不到人设数据文件: {persona_data_path}")
  23. print(f"请检查路径是否正确: {persona_dir}")
  24. sys.exit(1)
  25. def load_inspiration_list(persona_dir: str) -> List[str]:
  26. """加载灵感点列表(简化版本,仅包含名称)
  27. Args:
  28. persona_dir: 人设目录路径
  29. Returns:
  30. 灵感点文本列表
  31. Raises:
  32. SystemExit: 文件不存在或格式错误时退出
  33. """
  34. inspiration_list_path = os.path.join(persona_dir, "灵感点.json")
  35. try:
  36. inspiration_list = read_json(inspiration_list_path)
  37. if not isinstance(inspiration_list, list) or len(inspiration_list) == 0:
  38. print(f"❌ 灵感文件格式错误或为空: {inspiration_list_path}")
  39. sys.exit(1)
  40. # 直接返回字符串列表(简化版本)
  41. return inspiration_list
  42. except FileNotFoundError:
  43. print(f"❌ 找不到灵感文件: {inspiration_list_path}")
  44. print("请先运行 extract_inspirations.py 生成灵感点文件")
  45. sys.exit(1)
  46. def load_inspiration_data(persona_dir: str) -> List[dict]:
  47. """加载完整的灵感点数据(包含 meta 信息)
  48. Args:
  49. persona_dir: 人设目录路径
  50. Returns:
  51. 灵感点数据列表,每项包含 {"灵感点": str, "meta": dict}
  52. Raises:
  53. SystemExit: 文件不存在或格式错误时退出
  54. """
  55. inspiration_detail_path = os.path.join(persona_dir, "灵感点_详细.json")
  56. try:
  57. inspiration_data = read_json(inspiration_detail_path)
  58. if not isinstance(inspiration_data, list) or len(inspiration_data) == 0:
  59. print(f"❌ 灵感详细文件格式错误或为空: {inspiration_detail_path}")
  60. sys.exit(1)
  61. return inspiration_data
  62. except FileNotFoundError:
  63. print(f"❌ 找不到灵感详细文件: {inspiration_detail_path}")
  64. print("请先运行 extract_inspirations.py 生成灵感点文件")
  65. sys.exit(1)
  66. def select_inspiration(inspiration_arg: str, inspiration_list: List[str]) -> str:
  67. """根据参数选择灵感
  68. Args:
  69. inspiration_arg: 灵感参数(数字索引或灵感名称)
  70. inspiration_list: 灵感点文本列表
  71. Returns:
  72. 选中的灵感点文本
  73. Raises:
  74. SystemExit: 选择失败时退出
  75. """
  76. try:
  77. # 尝试作为索引解析
  78. inspiration_index = int(inspiration_arg)
  79. if 0 <= inspiration_index < len(inspiration_list):
  80. inspiration = inspiration_list[inspiration_index]
  81. print(f"使用灵感[{inspiration_index}]: {inspiration}")
  82. return inspiration
  83. else:
  84. print(f"❌ 灵感索引超出范围: {inspiration_index} (有效范围: 0-{len(inspiration_list)-1})")
  85. except ValueError:
  86. # 不是数字,当作灵感名称
  87. if inspiration_arg in inspiration_list:
  88. print(f"使用灵感: {inspiration_arg}")
  89. return inspiration_arg
  90. else:
  91. print(f"❌ 找不到灵感: {inspiration_arg}")
  92. # 显示可用灵感列表后退出
  93. print(f"可用灵感列表:")
  94. for i, insp in enumerate(inspiration_list[:10]):
  95. print(f" {i}: {insp}")
  96. if len(inspiration_list) > 10:
  97. print(f" ... 还有 {len(inspiration_list) - 10} 个")
  98. sys.exit(1)
  99. def load_step1_result(persona_dir: str, inspiration: str, model_name: str, scope: str = "all") -> dict:
  100. """加载 step1 匹配结果
  101. Args:
  102. persona_dir: 人设目录路径
  103. inspiration: 灵感点名称
  104. model_name: 模型名称(如 "google/gemini-2.5-pro")
  105. scope: 范围标识("all" 或 "top10" 等)
  106. Returns:
  107. step1 结果字典
  108. Raises:
  109. SystemExit: 文件不存在时退出
  110. """
  111. # 提取模型简称
  112. model_name_short = model_name.replace("google/", "").replace("/", "_")
  113. # 构建文件路径
  114. step1_file = os.path.join(
  115. persona_dir,
  116. "how",
  117. "灵感点",
  118. inspiration,
  119. f"{scope}_step1_灵感人设匹配_{model_name_short}.json"
  120. )
  121. try:
  122. return read_json(step1_file)
  123. except FileNotFoundError:
  124. print(f"❌ 找不到 step1 结果文件: {step1_file}")
  125. print(f"请先运行 step1_inspiration_match.py 生成结果")
  126. sys.exit(1)