from __future__ import annotations import json import re from dataclasses import dataclass from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[1] MANIFEST_PATH = ROOT / "tech_documents/命名规范/naming_standards_manifest.json" REPORT_JSON = ROOT / "tech_documents/命名规范/naming_standards_report.json" REPORT_MD = ROOT / "tech_documents/命名规范/naming_standards_report.md" SCAN_ROOTS = [ "product_documents", "tech_documents", "content_agent", "tests", "sql", "scripts", ] TEXT_EXTENSIONS = { ".md", ".json", ".py", ".sql", ".toml", ".txt", } @dataclass(frozen=True) class Finding: term: str path: str line_number: int line: str reason: str def main() -> None: manifest = json.loads(MANIFEST_PATH.read_text(encoding="utf-8")) banned_terms = manifest["banned_terms"] exception_paths = manifest["allowed_exception_paths"] violations: list[Finding] = [] allowed_exceptions: list[Finding] = [] for path in _iter_scan_files(): rel_path = path.relative_to(ROOT).as_posix() text = path.read_text(encoding="utf-8", errors="ignore") for line_number, line in enumerate(text.splitlines(), start=1): for term in banned_terms: if not _line_contains_term(line, term): continue if _is_allowed_exception(rel_path, line, exception_paths, term): allowed_exceptions.append( Finding(term, rel_path, line_number, line.strip(), "allowed_exception") ) else: violations.append( Finding(term, rel_path, line_number, line.strip(), "violation") ) payload = { "status": "fail" if violations else "pass", "violation_count": len(violations), "allowed_exception_count": len(allowed_exceptions), "violations": [finding.__dict__ for finding in violations], "allowed_exceptions": [finding.__dict__ for finding in allowed_exceptions], } REPORT_JSON.write_text( json.dumps(payload, ensure_ascii=False, indent=2) + "\n", encoding="utf-8", ) REPORT_MD.write_text(_render_markdown(payload), encoding="utf-8") print(json.dumps({"status": payload["status"], "violations": len(violations)}, ensure_ascii=False)) if violations: raise SystemExit(1) def _iter_scan_files() -> list[Path]: paths: list[Path] = [] for root in SCAN_ROOTS: base = ROOT / root if not base.exists(): continue for path in base.rglob("*"): if not path.is_file(): continue if "__pycache__" in path.parts: continue if path.name in { "naming_standards_report.json", "naming_standards_report.md", }: continue if path.suffix not in TEXT_EXTENSIONS: continue paths.append(path) return sorted(paths) def _line_contains_term(line: str, term: str) -> bool: if term.endswith(".jsonl"): return term in line return re.search( rf"(? bool: if any(rel_path.startswith(path) or rel_path == path for path in exception_paths): return True legacy_runtime_aliases = { "queries.jsonl", "candidate_pool.jsonl", "media_assets.jsonl", "source_edges.jsonl", "trace_events.jsonl", } if rel_path == "tech_documents/数据库字段总览/content_agent_schema_registry.json" and any( alias in line for alias in legacy_runtime_aliases ): return True if rel_path in {"scripts/validate_schema_registry.py", "scripts/check_naming_standards.py"} and any( alias in line for alias in legacy_runtime_aliases ): return True if rel_path == "scripts/check_naming_standards.py" and term == "evidence_refs": return True if "platform_raw_payload" in line: return True if "source_post_id" in line or "matched_post_ids" in line or "video_ids" in line: return True if term == "evidence_refs" and rel_path in { "tech_documents/数据接口与来源/01_DemandAgent输入合同.md", "tech_documents/Pattern回扣与分类树/00_全链路说明.md", "tech_documents/Pattern回扣与分类树/02_前置坑与FAQ.md", }: return True return False def _render_markdown(payload: dict[str, Any]) -> str: lines = [ "# 命名规范检查报告", "", f"- 状态:`{payload['status']}`", f"- 违规数量:`{payload['violation_count']}`", f"- 允许例外数量:`{payload['allowed_exception_count']}`", "", ] if payload["violations"]: lines.extend(["## 违规残留", ""]) for finding in payload["violations"][:200]: lines.append( f"- `{finding['term']}` at `{finding['path']}:{finding['line_number']}`: {finding['line']}" ) if len(payload["violations"]) > 200: lines.append(f"- 其余 {len(payload['violations']) - 200} 条见 JSON 报告。") lines.append("") if payload["allowed_exceptions"]: lines.extend(["## 允许例外", ""]) for finding in payload["allowed_exceptions"][:100]: lines.append( f"- `{finding['term']}` at `{finding['path']}:{finding['line_number']}`" ) if len(payload["allowed_exceptions"]) > 100: lines.append(f"- 其余 {len(payload['allowed_exceptions']) - 100} 条见 JSON 报告。") lines.append("") return "\n".join(lines) + "\n" if __name__ == "__main__": main()