#!/usr/bin/env python3 """ 人设树 JSON 精简处理:去掉指定字段,输出到目标目录。 进一步精简建议(可选): --minify 单行 JSON,去掉缩进与多余空白(体积可降约 40%+,推荐) --round N 数值保留 N 位小数(默认 4,可再减小体积) --short-keys 用短键名(t/n/w/r/c/ch),体积最小,但读取时需配合 KEY_MAP 还原 """ import argparse import json from pathlib import Path # 需要移除的字段 FIELDS_TO_REMOVE = {"_post_ids", "_child_categories_relation", "_child_categories_relation_detail"} # 短键名映射(仅当 --short-keys 时使用) KEY_MAP = { "_type": "t", "_post_count": "n", "_persona_weight_score": "w", "_ratio": "r", "_is_constant": "c", "_is_local_constant": "lc", "children": "ch", } KEY_MAP_INV = {v: k for k, v in KEY_MAP.items()} INPUT_DIR = Path(__file__).resolve().parent / "input/家有大志/原始数据/tree" OUTPUT_DIR = Path(__file__).resolve().parent / "input/家有大志/tree" def strip_fields(obj): """递归移除指定字段。""" if isinstance(obj, dict): for key in list(obj.keys()): if key in FIELDS_TO_REMOVE: del obj[key] else: strip_fields(obj[key]) elif isinstance(obj, list): for item in obj: strip_fields(item) return obj def round_floats(obj, ndigits: int): """递归将浮点数四舍五入到 ndigits 位。""" if isinstance(obj, dict): for k, v in obj.items(): obj[k] = round_floats(v, ndigits) elif isinstance(obj, list): for i, v in enumerate(obj): obj[i] = round_floats(v, ndigits) elif isinstance(obj, float): return round(obj, ndigits) return obj def abbreviate_keys(obj): """递归将已知长键名替换为短键名(仅处理 KEY_MAP 中的键)。""" if isinstance(obj, dict): new_obj = {} for k, v in obj.items(): new_key = KEY_MAP.get(k, k) new_obj[new_key] = abbreviate_keys(v) return new_obj if isinstance(obj, list): return [abbreviate_keys(x) for x in obj] return obj def expand_keys(obj): """递归将短键名还原为长键名(读取 --short-keys 输出的文件时使用)。""" if isinstance(obj, dict): new_obj = {} for k, v in obj.items(): new_key = KEY_MAP_INV.get(k, k) new_obj[new_key] = expand_keys(v) return new_obj if isinstance(obj, list): return [expand_keys(x) for x in obj] return obj def process_tree_json( in_path: Path, out_path: Path, *, minify: bool = False, round_ndigits: int | None = None, short_keys: bool = False, ) -> None: """读取一个树 JSON,精简后写入 out_path。""" with open(in_path, "r", encoding="utf-8") as f: data = json.load(f) strip_fields(data) if round_ndigits is not None: round_floats(data, round_ndigits) if short_keys: data = abbreviate_keys(data) out_path.parent.mkdir(parents=True, exist_ok=True) with open(out_path, "w", encoding="utf-8") as f: json.dump( data, f, ensure_ascii=False, indent=None if minify else 2, separators=(",", ":") if minify else (", ", ": "), ) if minify: f.write("\n") def main(): parser = argparse.ArgumentParser(description="人设树 JSON 精简") parser.add_argument("--minify", action="store_true", help="单行 JSON,减小体积") parser.add_argument("--round", type=int, default=None, metavar="N", help="数值保留 N 位小数") parser.add_argument("--short-keys", action="store_true", help="使用短键名(读取时需还原)") args = parser.parse_args() INPUT_DIR.mkdir(parents=True, exist_ok=True) OUTPUT_DIR.mkdir(parents=True, exist_ok=True) for in_file in sorted(INPUT_DIR.glob("*.json")): out_file = OUTPUT_DIR / in_file.name process_tree_json( in_file, out_file, minify=args.minify, round_ndigits=args.round, short_keys=args.short_keys, ) size = out_file.stat().st_size print(f"已处理: {in_file.name} -> {out_file} ({size:,} B)") if __name__ == "__main__": main()