add_excel_field_comment_row.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. """Insert a Chinese business-comment row (row 2) under the header of every config
  2. sheet in the rule-pack and walk-strategy workbooks.
  3. Why this is safe for loading / code-reading / JSON conversion:
  4. - The runtime never reads these .xlsx files; it reads the JSON exports only.
  5. - The inserted comment row puts a sentinel prefix "备注:" in the FIRST column,
  6. so any future Excel->JSON converter can skip it with a one-line guard:
  7. if str(row[0]).startswith("备注"): continue
  8. - The row is styled (light fill + grey italic) so humans also see it is not data.
  9. Sources of the Chinese meanings:
  10. - Rule-pack workbook: its own built-in `字段注释` dictionary sheet.
  11. - Walk-strategy workbook: tech_documents/游走策略/02_游走策略表字段说明.md (encoded below).
  12. Idempotent: re-running detects an existing "备注" row and skips that sheet.
  13. """
  14. from __future__ import annotations
  15. from pathlib import Path
  16. from openpyxl import load_workbook
  17. from openpyxl.styles import Font, PatternFill
  18. from openpyxl.utils import get_column_letter
  19. ROOT = Path(__file__).resolve().parent.parent
  20. RULE_PACK_XLSX = ROOT / "tech_documents/规则包映射/规则包映射配置表.xlsx"
  21. WALK_XLSX = ROOT / "tech_documents/游走策略/游走策略配置表.xlsx"
  22. SENTINEL = "备注:"
  23. COMMENT_FILL = PatternFill(start_color="FFF2F2F2", end_color="FFF2F2F2", fill_type="solid")
  24. COMMENT_FONT = Font(italic=True, color="FF7F7F7F")
  25. # Walk-strategy meanings, per sheet, from 02_游走策略表字段说明.md.
  26. # V3 清理: 仅保留 3 个仍存在的 sheet(其余 10 个僵尸 sheet 已随 JSON 段删除)。
  27. WALK_MEANINGS: dict[str, dict[str, str]] = {
  28. "walk_edge_catalog": {
  29. "edge_id": "边 ID", "edge_type": "边类型", "from_node_type": "起点节点类型",
  30. "to_node_type": "终点节点类型", "edge_label": "产品可读名称", "enabled": "是否启用",
  31. "runtime_stage": "所属运行阶段", "creates_new_node": "是否产生新节点",
  32. "can_loop": "是否允许循环或多轮", "priority": "边优先级,数字越小越先执行",
  33. "notes": "备注", "注释": "中文业务注释",
  34. },
  35. "walk_rule_pack_binding": {
  36. "binding_id": "绑定 ID", "edge_id": "被绑定的边",
  37. "target_entity": "规则判断对象,如 Content、Author、Hashtag、Path、Budget",
  38. "rule_pack_id": "P5 规则包 ID", "rule_pack_version": "P5 规则包版本",
  39. "required": "是否必须通过该规则判断",
  40. "dispatch_policy": "调用方式,如 required / optional / advisory",
  41. "notes": "备注", "注释": "中文业务注释",
  42. },
  43. "walk_fact_contract": {
  44. "runtime_file": "runtime 文件名", "db_table": "对应 DB 表", "required_fields": "必填字段",
  45. "json_fields": "JSON 字段", "unique_key": "唯一键", "notes": "备注", "注释": "中文业务注释",
  46. },
  47. }
  48. # Simple meanings for the rule-pack dictionary sheet itself (it has no self-entry).
  49. DICT_SHEET_NAME = "字段注释"
  50. DICT_SHEET_MEANINGS = {
  51. "sheet_name": "配置表 sheet 名", "field_name": "字段英文名",
  52. "字段中文名": "字段中文名", "注释": "字段业务含义说明",
  53. }
  54. def build_rule_pack_meanings(wb) -> dict[str, dict[str, str]]:
  55. """Read the workbook's own 字段注释 sheet into {sheet: {field: note}}."""
  56. meanings: dict[str, dict[str, str]] = {}
  57. ws = wb[DICT_SHEET_NAME]
  58. for sheet_name, field_name, _cn, note in ws.iter_rows(min_row=2, values_only=True):
  59. if not sheet_name or not field_name:
  60. continue
  61. meanings.setdefault(sheet_name, {})[field_name] = note or field_name
  62. return meanings
  63. def annotate_sheet(ws, meanings_for_sheet: dict[str, str]) -> str:
  64. headers = [c.value for c in ws[1]]
  65. if not headers or all(h is None for h in headers):
  66. return "skip(空表)"
  67. # Idempotency guard: already annotated?
  68. if ws.max_row >= 2 and str(ws.cell(row=2, column=1).value or "").startswith(SENTINEL.rstrip(":")):
  69. return "skip(已存在备注行)"
  70. ws.insert_rows(2)
  71. for col_idx, header in enumerate(headers, start=1):
  72. if header is None:
  73. continue
  74. meaning = meanings_for_sheet.get(str(header), str(header))
  75. text = (SENTINEL + meaning) if col_idx == 1 else meaning
  76. cell = ws.cell(row=2, column=col_idx, value=text)
  77. cell.fill = COMMENT_FILL
  78. cell.font = COMMENT_FONT
  79. # Keep the autofilter dropdown anchored on the header row across full range.
  80. last_col = get_column_letter(ws.max_column)
  81. ws.auto_filter.ref = f"A1:{last_col}{ws.max_row}"
  82. return f"ok({len(headers)}列)"
  83. def process(path: Path, meanings_by_sheet, skip_sheets: set[str]) -> None:
  84. wb = load_workbook(path)
  85. print(f"\n=== {path.name} ===")
  86. for ws in wb.worksheets:
  87. if ws.title in skip_sheets:
  88. print(f" - {ws.title:32s} skip(字典/说明 sheet)")
  89. continue
  90. m = meanings_by_sheet.get(ws.title, {})
  91. status = annotate_sheet(ws, m)
  92. print(f" - {ws.title:32s} {status}")
  93. wb.save(path)
  94. print(f" saved -> {path}")
  95. def main() -> None:
  96. # Rule-pack workbook: meanings from its own 字段注释 sheet; annotate that sheet too.
  97. wb_rp = load_workbook(RULE_PACK_XLSX)
  98. rp_meanings = build_rule_pack_meanings(wb_rp)
  99. rp_meanings[DICT_SHEET_NAME] = DICT_SHEET_MEANINGS
  100. process(RULE_PACK_XLSX, rp_meanings, skip_sheets=set())
  101. # Walk-strategy workbook: meanings encoded from the field-spec doc.
  102. process(WALK_XLSX, WALK_MEANINGS, skip_sheets=set())
  103. if __name__ == "__main__":
  104. main()