"""Check (or rewrite) config JSON files into the canonical byte form.

Grown from the M1 byte-equal feasibility spike (2026-06-09). The V2 plan
(`tech_documents/工程落地/06_V2阶段开发计划.md`, V2-M1) locks the converter to a
single canonical formatter so Excel->JSON output is byte-equal to the runtime
JSON and `policy_bundle_hash` (`policy_json.py`) stays stable.

Canonical form = `json.dumps(data, indent=2, ensure_ascii=False)` + trailing "\n",
preserving key insertion order (no sort).

Spike findings this encodes:
- `douyin_rule_packs.v1.json` (the only file hashed by policy_json) already
  round-trips to exact bytes under this formatter.
- `douyin_walk_strategy.v1.json` is hand-mixed (leaf objects collapsed to single
  lines); it is NOT hashed and no test pins its bytes, so `--write` re-normalizes
  it once to the canonical form with zero runtime impact.

Usage:
    python scripts/check_config_json_canonical.py            # --check (default)
    python scripts/check_config_json_canonical.py --write    # rewrite in place
    python scripts/check_config_json_canonical.py --check path/to/other.json
Exit code: 0 = all canonical (or written), 1 = drift found in --check mode.
"""

from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path
from typing import Any

ROOT = Path(__file__).resolve().parents[1]

# Default config JSONs the V2 converter governs.
DEFAULT_TARGETS = [
    Path("product_documents/规则包/douyin_rule_packs.v1.json"),
    Path("product_documents/抖音游走策略/douyin_walk_strategy.v1.json"),
]


def canonical_dumps(data: Any) -> str:
    """The single canonical formatter the M1 converter must emit.

    indent=2, ensure_ascii=False, insertion-order keys, trailing newline.
    """
    return json.dumps(data, indent=2, ensure_ascii=False) + "\n"


def _first_diff(a: bytes, b: bytes) -> dict[str, Any] | None:
    n = min(len(a), len(b))
    for i in range(n):
        if a[i] != b[i]:
            lo = max(0, i - 40)
            return {
                "byte_offset": i,
                "expected": a[lo : i + 40].decode("utf-8", "replace"),
                "actual": b[lo : i + 40].decode("utf-8", "replace"),
            }
    if len(a) != len(b):
        return {"byte_offset": n, "note": f"length differs: canonical={len(b)} file={len(a)}"}
    return None


def _check_one(path: Path) -> dict[str, Any]:
    raw = path.read_bytes()
    canonical = canonical_dumps(json.loads(raw.decode("utf-8"))).encode("utf-8")
    ok = raw == canonical
    finding: dict[str, Any] = {
        "config_path": str(path.relative_to(ROOT)),
        "canonical": ok,
        "file_bytes": len(raw),
        "canonical_bytes": len(canonical),
    }
    if not ok:
        finding["first_diff"] = _first_diff(raw, canonical)
    return finding


def main() -> int:
    args = _parse_args()
    targets = [p if p.is_absolute() else ROOT / p for p in (args.paths or DEFAULT_TARGETS)]

    findings = []
    rewritten = []
    for path in targets:
        if not path.exists():
            findings.append({"config_path": str(path), "canonical": False, "error": "not_found"})
            continue
        if args.write:
            canonical = canonical_dumps(json.loads(path.read_text(encoding="utf-8")))
            before = path.read_bytes()
            path.write_text(canonical, encoding="utf-8")
            changed = before != canonical.encode("utf-8")
            rewritten.append({"config_path": str(path.relative_to(ROOT)), "changed": changed})
        else:
            findings.append(_check_one(path))

    if args.write:
        result = {"mode": "write", "rewritten": rewritten}
        print(json.dumps(result, ensure_ascii=False, indent=2))
        return 0

    status = "fail" if any(not f.get("canonical") for f in findings) else "pass"
    print(json.dumps({"mode": "check", "status": status, "findings": findings}, ensure_ascii=False, indent=2))
    return 1 if status == "fail" else 0


def _parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("paths", nargs="*", type=Path, help="JSON files (default: the two config JSONs)")
    parser.add_argument("--write", action="store_true", help="rewrite files in canonical form instead of checking")
    return parser.parse_args()


if __name__ == "__main__":
    sys.exit(main())