| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179 |
- from __future__ import annotations
- import json
- import re
- from dataclasses import dataclass
- from pathlib import Path
- from typing import Any
- ROOT = Path(__file__).resolve().parents[1]
- MANIFEST_PATH = ROOT / "tech_documents/命名规范/naming_standards_manifest.json"
- REPORT_JSON = ROOT / "tech_documents/命名规范/naming_standards_report.json"
- REPORT_MD = ROOT / "tech_documents/命名规范/naming_standards_report.md"
- SCAN_ROOTS = [
- "product_documents",
- "tech_documents",
- "content_agent",
- "tests",
- "sql",
- "scripts",
- ]
- TEXT_EXTENSIONS = {
- ".md",
- ".json",
- ".py",
- ".sql",
- ".toml",
- ".txt",
- }
- @dataclass(frozen=True)
- class Finding:
- term: str
- path: str
- line_number: int
- line: str
- reason: str
- def main() -> None:
- manifest = json.loads(MANIFEST_PATH.read_text(encoding="utf-8"))
- banned_terms = manifest["banned_terms"]
- exception_paths = manifest["allowed_exception_paths"]
- violations: list[Finding] = []
- allowed_exceptions: list[Finding] = []
- for path in _iter_scan_files():
- rel_path = path.relative_to(ROOT).as_posix()
- text = path.read_text(encoding="utf-8", errors="ignore")
- for line_number, line in enumerate(text.splitlines(), start=1):
- for term in banned_terms:
- if not _line_contains_term(line, term):
- continue
- if _is_allowed_exception(rel_path, line, exception_paths, term):
- allowed_exceptions.append(
- Finding(term, rel_path, line_number, line.strip(), "allowed_exception")
- )
- else:
- violations.append(
- Finding(term, rel_path, line_number, line.strip(), "violation")
- )
- payload = {
- "status": "fail" if violations else "pass",
- "violation_count": len(violations),
- "allowed_exception_count": len(allowed_exceptions),
- "violations": [finding.__dict__ for finding in violations],
- "allowed_exceptions": [finding.__dict__ for finding in allowed_exceptions],
- }
- REPORT_JSON.write_text(
- json.dumps(payload, ensure_ascii=False, indent=2) + "\n",
- encoding="utf-8",
- )
- REPORT_MD.write_text(_render_markdown(payload), encoding="utf-8")
- print(json.dumps({"status": payload["status"], "violations": len(violations)}, ensure_ascii=False))
- if violations:
- raise SystemExit(1)
- def _iter_scan_files() -> list[Path]:
- paths: list[Path] = []
- for root in SCAN_ROOTS:
- base = ROOT / root
- if not base.exists():
- continue
- for path in base.rglob("*"):
- if not path.is_file():
- continue
- if "__pycache__" in path.parts:
- continue
- if path.name in {
- "naming_standards_report.json",
- "naming_standards_report.md",
- }:
- continue
- if path.suffix not in TEXT_EXTENSIONS:
- continue
- paths.append(path)
- return sorted(paths)
- def _line_contains_term(line: str, term: str) -> bool:
- if term.endswith(".jsonl"):
- return term in line
- return re.search(
- rf"(?<![A-Za-z0-9_]){re.escape(term)}(?![A-Za-z0-9_])",
- line,
- ) is not None
- def _is_allowed_exception(rel_path: str, line: str, exception_paths: list[str], term: str = "") -> bool:
- if any(rel_path.startswith(path) or rel_path == path for path in exception_paths):
- return True
- legacy_runtime_aliases = {
- "queries.jsonl",
- "candidate_pool.jsonl",
- "media_assets.jsonl",
- "source_edges.jsonl",
- "trace_events.jsonl",
- }
- if rel_path == "tech_documents/数据库字段总览/content_agent_schema_registry.json" and any(
- alias in line for alias in legacy_runtime_aliases
- ):
- return True
- if rel_path in {"scripts/validate_schema_registry.py", "scripts/check_naming_standards.py"} and any(
- alias in line for alias in legacy_runtime_aliases
- ):
- return True
- if rel_path == "scripts/check_naming_standards.py" and term == "evidence_refs":
- return True
- if "platform_raw_payload" in line:
- return True
- if "source_post_id" in line or "matched_post_ids" in line or "video_ids" in line:
- return True
- if term == "evidence_refs" and rel_path in {
- "tech_documents/数据接口与来源/01_DemandAgent输入合同.md",
- "tech_documents/Pattern回扣与分类树/00_全链路说明.md",
- "tech_documents/Pattern回扣与分类树/02_前置坑与FAQ.md",
- }:
- return True
- return False
- def _render_markdown(payload: dict[str, Any]) -> str:
- lines = [
- "# 命名规范检查报告",
- "",
- f"- 状态:`{payload['status']}`",
- f"- 违规数量:`{payload['violation_count']}`",
- f"- 允许例外数量:`{payload['allowed_exception_count']}`",
- "",
- ]
- if payload["violations"]:
- lines.extend(["## 违规残留", ""])
- for finding in payload["violations"][:200]:
- lines.append(
- f"- `{finding['term']}` at `{finding['path']}:{finding['line_number']}`: {finding['line']}"
- )
- if len(payload["violations"]) > 200:
- lines.append(f"- 其余 {len(payload['violations']) - 200} 条见 JSON 报告。")
- lines.append("")
- if payload["allowed_exceptions"]:
- lines.extend(["## 允许例外", ""])
- for finding in payload["allowed_exceptions"][:100]:
- lines.append(
- f"- `{finding['term']}` at `{finding['path']}:{finding['line_number']}`"
- )
- if len(payload["allowed_exceptions"]) > 100:
- lines.append(f"- 其余 {len(payload['allowed_exceptions']) - 100} 条见 JSON 报告。")
- lines.append("")
- return "\n".join(lines) + "\n"
- if __name__ == "__main__":
- main()
|