extract_simple_tree_node.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. """
  2. 从「处理后数据/tree」下的人设树 JSON 提取节点名称与 _ratio,输出为带缩进的纯文本,
  3. 便于大模型阅读。输出:input/{account_name}/处理后数据/simple_tree/simple_tree.txt
  4. """
  5. from __future__ import annotations
  6. import argparse
  7. import json
  8. import sys
  9. from pathlib import Path
  10. from typing import Any
  11. def _format_ratio_paren(ratio: Any) -> str:
  12. """概率两位小数,拼在节点名后的括号内;无 _ratio 时为 —。"""
  13. if ratio is None:
  14. return "—"
  15. try:
  16. return f"{float(ratio):.3f}"
  17. except (TypeError, ValueError):
  18. return str(ratio)
  19. def _walk_node(name: str, node: dict[str, Any], depth: int, lines: list[str]) -> None:
  20. if not isinstance(node, dict):
  21. return
  22. indent = " " * depth
  23. r = node.get("_ratio")
  24. lines.append(f"{indent}{name}({_format_ratio_paren(r)})")
  25. children = node.get("children")
  26. if not isinstance(children, dict):
  27. return
  28. for child_name, child in children.items():
  29. if isinstance(child, dict):
  30. _walk_node(str(child_name), child, depth + 1, lines)
  31. def _tree_json_to_lines(data: Any) -> list[str]:
  32. lines: list[str] = []
  33. if not isinstance(data, dict):
  34. return lines
  35. for root_name, root_node in data.items():
  36. if isinstance(root_node, dict):
  37. _walk_node(str(root_name), root_node, 0, lines)
  38. return lines
  39. def extract_simple_tree_for_account(account_name: str) -> Path:
  40. base = Path(__file__).resolve().parents[1]
  41. tree_dir = base / "input" / account_name / "处理后数据" / "tree"
  42. out_dir = base / "input" / account_name / "处理后数据" / "simple_tree"
  43. out_file = out_dir / "simple_tree.txt"
  44. if not tree_dir.is_dir():
  45. raise FileNotFoundError(f"目录不存在: {tree_dir}")
  46. json_paths = sorted(tree_dir.glob("*.json"))
  47. if not json_paths:
  48. raise FileNotFoundError(f"未找到 JSON 文件: {tree_dir}")
  49. out_dir.mkdir(parents=True, exist_ok=True)
  50. chunks: list[str] = []
  51. chunks.append("说明: 每行「节点名(概率)」;概率表示该节点在账号下出现的频率;缩进表示层级;root节点无概率用 — 表示。\n分三颗树:\n实质-内容的核心主题/对象\n形式-内容的呈现形式\n意图-内容的目标/用户意图\n")
  52. for jp in json_paths:
  53. chunks.append("\n" + "=" * 72 + "\n")
  54. with jp.open(encoding="utf-8") as f:
  55. data = json.load(f)
  56. lines = _tree_json_to_lines(data)
  57. chunks.append("\n".join(lines))
  58. chunks.append("\n")
  59. text = "".join(chunks)
  60. out_file.write_text(text, encoding="utf-8")
  61. return out_file
  62. def main(account_name) -> None:
  63. # p = argparse.ArgumentParser(description="人设树节点简化:节点名与概率 节点(0.00)")
  64. # p.add_argument("account_name", help="账号目录名,对应 input/{account_name}/")
  65. # args = p.parse_args()
  66. try:
  67. out = extract_simple_tree_for_account(account_name)
  68. print(f"已写入: {out}")
  69. except FileNotFoundError as e:
  70. print(str(e), file=sys.stderr)
  71. sys.exit(1)
  72. if __name__ == "__main__":
  73. main(account_name="家有大志")