lisihan
/
content-find-agent-new


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
							"""Insert a Chinese business-comment row (row 2) under the header of every config
sheet in the rule-pack and walk-strategy workbooks.

Why this is safe for loading / code-reading / JSON conversion:
- The runtime never reads these .xlsx files; it reads the JSON exports only.
- The inserted comment row puts a sentinel prefix "备注：" in the FIRST column,
  so any future Excel->JSON converter can skip it with a one-line guard:
      if str(row[0]).startswith("备注"): continue
- The row is styled (light fill + grey italic) so humans also see it is not data.

Sources of the Chinese meanings:
- Rule-pack workbook: its own built-in `字段注释` dictionary sheet.
- Walk-strategy workbook: tech_documents/游走策略/02_游走策略表字段说明.md (encoded below).

Idempotent: re-running detects an existing "备注" row and skips that sheet.
"""

from __future__ import annotations

from pathlib import Path

from openpyxl import load_workbook
from openpyxl.styles import Font, PatternFill
from openpyxl.utils import get_column_letter

ROOT = Path(__file__).resolve().parent.parent
RULE_PACK_XLSX = ROOT / "tech_documents/规则包映射/规则包映射配置表.xlsx"
WALK_XLSX = ROOT / "tech_documents/游走策略/游走策略配置表.xlsx"

SENTINEL = "备注："
COMMENT_FILL = PatternFill(start_color="FFF2F2F2", end_color="FFF2F2F2", fill_type="solid")
COMMENT_FONT = Font(italic=True, color="FF7F7F7F")

# Walk-strategy meanings, per sheet, from 02_游走策略表字段说明.md.
# V3 清理: 仅保留 3 个仍存在的 sheet(其余 10 个僵尸 sheet 已随 JSON 段删除)。
WALK_MEANINGS: dict[str, dict[str, str]] = {
    "walk_edge_catalog": {
        "edge_id": "边 ID", "edge_type": "边类型", "from_node_type": "起点节点类型",
        "to_node_type": "终点节点类型", "edge_label": "产品可读名称", "enabled": "是否启用",
        "runtime_stage": "所属运行阶段", "creates_new_node": "是否产生新节点",
        "can_loop": "是否允许循环或多轮", "priority": "边优先级，数字越小越先执行",
        "notes": "备注", "注释": "中文业务注释",
    },
    "walk_rule_pack_binding": {
        "binding_id": "绑定 ID", "edge_id": "被绑定的边",
        "target_entity": "规则判断对象，如 Content、Author、Hashtag、Path、Budget",
        "rule_pack_id": "P5 规则包 ID", "rule_pack_version": "P5 规则包版本",
        "required": "是否必须通过该规则判断",
        "dispatch_policy": "调用方式，如 required / optional / advisory",
        "notes": "备注", "注释": "中文业务注释",
    },
    "walk_fact_contract": {
        "runtime_file": "runtime 文件名", "db_table": "对应 DB 表", "required_fields": "必填字段",
        "json_fields": "JSON 字段", "unique_key": "唯一键", "notes": "备注", "注释": "中文业务注释",
    },
}

# Simple meanings for the rule-pack dictionary sheet itself (it has no self-entry).
DICT_SHEET_NAME = "字段注释"
DICT_SHEET_MEANINGS = {
    "sheet_name": "配置表 sheet 名", "field_name": "字段英文名",
    "字段中文名": "字段中文名", "注释": "字段业务含义说明",
}


def build_rule_pack_meanings(wb) -> dict[str, dict[str, str]]:
    """Read the workbook's own 字段注释 sheet into {sheet: {field: note}}."""
    meanings: dict[str, dict[str, str]] = {}
    ws = wb[DICT_SHEET_NAME]
    for sheet_name, field_name, _cn, note in ws.iter_rows(min_row=2, values_only=True):
        if not sheet_name or not field_name:
            continue
        meanings.setdefault(sheet_name, {})[field_name] = note or field_name
    return meanings


def annotate_sheet(ws, meanings_for_sheet: dict[str, str]) -> str:
    headers = [c.value for c in ws[1]]
    if not headers or all(h is None for h in headers):
        return "skip(空表)"
    # Idempotency guard: already annotated?
    if ws.max_row >= 2 and str(ws.cell(row=2, column=1).value or "").startswith(SENTINEL.rstrip("：")):
        return "skip(已存在备注行)"

    ws.insert_rows(2)
    for col_idx, header in enumerate(headers, start=1):
        if header is None:
            continue
        meaning = meanings_for_sheet.get(str(header), str(header))
        text = (SENTINEL + meaning) if col_idx == 1 else meaning
        cell = ws.cell(row=2, column=col_idx, value=text)
        cell.fill = COMMENT_FILL
        cell.font = COMMENT_FONT
    # Keep the autofilter dropdown anchored on the header row across full range.
    last_col = get_column_letter(ws.max_column)
    ws.auto_filter.ref = f"A1:{last_col}{ws.max_row}"
    return f"ok({len(headers)}列)"


def process(path: Path, meanings_by_sheet, skip_sheets: set[str]) -> None:
    wb = load_workbook(path)
    print(f"\n=== {path.name} ===")
    for ws in wb.worksheets:
        if ws.title in skip_sheets:
            print(f"  - {ws.title:32s} skip(字典/说明 sheet)")
            continue
        m = meanings_by_sheet.get(ws.title, {})
        status = annotate_sheet(ws, m)
        print(f"  - {ws.title:32s} {status}")
    wb.save(path)
    print(f"  saved -> {path}")


def main() -> None:
    # Rule-pack workbook: meanings from its own 字段注释 sheet; annotate that sheet too.
    wb_rp = load_workbook(RULE_PACK_XLSX)
    rp_meanings = build_rule_pack_meanings(wb_rp)
    rp_meanings[DICT_SHEET_NAME] = DICT_SHEET_MEANINGS
    process(RULE_PACK_XLSX, rp_meanings, skip_sheets=set())

    # Walk-strategy workbook: meanings encoded from the field-spec doc.
    process(WALK_XLSX, WALK_MEANINGS, skip_sheets=set())


if __name__ == "__main__":
    main()