lisihan
/
content-find-agent-new


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243
							"""Centralized config file read / parse / hash (V2-M1B).

Pure refactor: `policy_json` and `walk_strategy_json` previously inlined
read_text + json.loads + sha256. This module concentrates those so M1C's
converter and validators share one read path. Behavior is unchanged — hashes
are still taken over the RAW on-disk text (never re-serialized), so
`policy_bundle_hash` stays byte-identical.
"""

from __future__ import annotations

import hashlib
import json
from pathlib import Path
from typing import Any


def read_text(path: Path) -> str:
    return Path(path).read_text(encoding="utf-8")


# mtime+size 键缓存:同一 run 内 walk_policy 等配置会被读 3+ 次(walk_engine + 每次 recall),
# 文件未变时不重复读盘/parse。调用方约定不就地修改返回的 parsed 对象(全仓配置均只读消费)。
_CACHE: dict[str, tuple[tuple[int, int], Any, str]] = {}


def load_json(path: Path) -> tuple[Any, str]:
    """Return (parsed, raw_text). raw_text is the exact on-disk text for hashing."""
    resolved = Path(path)
    stat = resolved.stat()
    key = str(resolved)
    stamp = (stat.st_mtime_ns, stat.st_size)
    cached = _CACHE.get(key)
    if cached and cached[0] == stamp:
        return cached[1], cached[2]
    raw = read_text(resolved)
    parsed = json.loads(raw)
    _CACHE[key] = (stamp, parsed, raw)
    return parsed, raw


def sha256_text(text: str) -> str:
    return hashlib.sha256(text.encode("utf-8")).hexdigest()