| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- #!/usr/bin/env python3
- """
- 人设树 JSON 精简处理:去掉指定字段,输出到目标目录。
- 进一步精简建议(可选):
- --minify 单行 JSON,去掉缩进与多余空白(体积可降约 40%+,推荐)
- --round N 数值保留 N 位小数(默认 4,可再减小体积)
- --short-keys 用短键名(t/n/w/r/c/ch),体积最小,但读取时需配合 KEY_MAP 还原
- """
- import argparse
- import json
- from pathlib import Path
- # 需要移除的字段
- FIELDS_TO_REMOVE = {"_post_ids", "_child_categories_relation", "_child_categories_relation_detail"}
- # 短键名映射(仅当 --short-keys 时使用)
- KEY_MAP = {
- "_type": "t",
- "_post_count": "n",
- "_persona_weight_score": "w",
- "_ratio": "r",
- "_is_constant": "c",
- "_is_local_constant": "lc",
- "children": "ch",
- }
- KEY_MAP_INV = {v: k for k, v in KEY_MAP.items()}
- INPUT_DIR = Path(__file__).resolve().parent / "input/家有大志/原始数据/tree"
- OUTPUT_DIR = Path(__file__).resolve().parent / "input/家有大志/tree"
- def strip_fields(obj):
- """递归移除指定字段。"""
- if isinstance(obj, dict):
- for key in list(obj.keys()):
- if key in FIELDS_TO_REMOVE:
- del obj[key]
- else:
- strip_fields(obj[key])
- elif isinstance(obj, list):
- for item in obj:
- strip_fields(item)
- return obj
- def round_floats(obj, ndigits: int):
- """递归将浮点数四舍五入到 ndigits 位。"""
- if isinstance(obj, dict):
- for k, v in obj.items():
- obj[k] = round_floats(v, ndigits)
- elif isinstance(obj, list):
- for i, v in enumerate(obj):
- obj[i] = round_floats(v, ndigits)
- elif isinstance(obj, float):
- return round(obj, ndigits)
- return obj
- def abbreviate_keys(obj):
- """递归将已知长键名替换为短键名(仅处理 KEY_MAP 中的键)。"""
- if isinstance(obj, dict):
- new_obj = {}
- for k, v in obj.items():
- new_key = KEY_MAP.get(k, k)
- new_obj[new_key] = abbreviate_keys(v)
- return new_obj
- if isinstance(obj, list):
- return [abbreviate_keys(x) for x in obj]
- return obj
- def expand_keys(obj):
- """递归将短键名还原为长键名(读取 --short-keys 输出的文件时使用)。"""
- if isinstance(obj, dict):
- new_obj = {}
- for k, v in obj.items():
- new_key = KEY_MAP_INV.get(k, k)
- new_obj[new_key] = expand_keys(v)
- return new_obj
- if isinstance(obj, list):
- return [expand_keys(x) for x in obj]
- return obj
- def process_tree_json(
- in_path: Path,
- out_path: Path,
- *,
- minify: bool = False,
- round_ndigits: int | None = None,
- short_keys: bool = False,
- ) -> None:
- """读取一个树 JSON,精简后写入 out_path。"""
- with open(in_path, "r", encoding="utf-8") as f:
- data = json.load(f)
- strip_fields(data)
- if round_ndigits is not None:
- round_floats(data, round_ndigits)
- if short_keys:
- data = abbreviate_keys(data)
- out_path.parent.mkdir(parents=True, exist_ok=True)
- with open(out_path, "w", encoding="utf-8") as f:
- json.dump(
- data,
- f,
- ensure_ascii=False,
- indent=None if minify else 2,
- separators=(",", ":") if minify else (", ", ": "),
- )
- if minify:
- f.write("\n")
- def main():
- parser = argparse.ArgumentParser(description="人设树 JSON 精简")
- parser.add_argument("--minify", action="store_true", help="单行 JSON,减小体积")
- parser.add_argument("--round", type=int, default=None, metavar="N", help="数值保留 N 位小数")
- parser.add_argument("--short-keys", action="store_true", help="使用短键名(读取时需还原)")
- args = parser.parse_args()
- INPUT_DIR.mkdir(parents=True, exist_ok=True)
- OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
- for in_file in sorted(INPUT_DIR.glob("*.json")):
- out_file = OUTPUT_DIR / in_file.name
- process_tree_json(
- in_file,
- out_file,
- minify=args.minify,
- round_ndigits=args.round,
- short_keys=args.short_keys,
- )
- size = out_file.stat().st_size
- print(f"已处理: {in_file.name} -> {out_file} ({size:,} B)")
- if __name__ == "__main__":
- main()
|