|
|
@@ -0,0 +1,185 @@
|
|
|
+"""Insert a Chinese business-comment row (row 2) under the header of every config
|
|
|
+sheet in the rule-pack and walk-strategy workbooks.
|
|
|
+
|
|
|
+Why this is safe for loading / code-reading / JSON conversion:
|
|
|
+- The runtime never reads these .xlsx files; it reads the JSON exports only.
|
|
|
+- The inserted comment row puts a sentinel prefix "备注:" in the FIRST column,
|
|
|
+ so any future Excel->JSON converter can skip it with a one-line guard:
|
|
|
+ if str(row[0]).startswith("备注"): continue
|
|
|
+- The row is styled (light fill + grey italic) so humans also see it is not data.
|
|
|
+
|
|
|
+Sources of the Chinese meanings:
|
|
|
+- Rule-pack workbook: its own built-in `字段注释` dictionary sheet.
|
|
|
+- Walk-strategy workbook: tech_documents/游走策略/02_游走策略表字段说明.md (encoded below).
|
|
|
+
|
|
|
+Idempotent: re-running detects an existing "备注" row and skips that sheet.
|
|
|
+"""
|
|
|
+
|
|
|
+from __future__ import annotations
|
|
|
+
|
|
|
+from pathlib import Path
|
|
|
+
|
|
|
+from openpyxl import load_workbook
|
|
|
+from openpyxl.styles import Font, PatternFill
|
|
|
+from openpyxl.utils import get_column_letter
|
|
|
+
|
|
|
+ROOT = Path(__file__).resolve().parent.parent
|
|
|
+RULE_PACK_XLSX = ROOT / "tech_documents/规则包映射/规则包映射配置表.xlsx"
|
|
|
+WALK_XLSX = ROOT / "tech_documents/游走策略/游走策略配置表.xlsx"
|
|
|
+
|
|
|
+SENTINEL = "备注:"
|
|
|
+COMMENT_FILL = PatternFill(start_color="FFF2F2F2", end_color="FFF2F2F2", fill_type="solid")
|
|
|
+COMMENT_FONT = Font(italic=True, color="FF7F7F7F")
|
|
|
+
|
|
|
+# Walk-strategy meanings, per sheet, from 02_游走策略表字段说明.md.
|
|
|
+WALK_MEANINGS: dict[str, dict[str, str]] = {
|
|
|
+ "walk_strategy_meta": {
|
|
|
+ "strategy_id": "游走策略 ID", "strategy_version": "游走策略版本",
|
|
|
+ "platform": "平台,V1 为 douyin", "status": "策略状态,如 active / draft / deprecated",
|
|
|
+ "owner": "业务负责人", "source_json_path": "对应 runtime JSON 路径",
|
|
|
+ "description": "策略说明", "notes": "备注", "注释": "中文业务注释",
|
|
|
+ },
|
|
|
+ "walk_node_catalog": {
|
|
|
+ "node_type": "节点类型,如 query、video、author、hashtag", "node_label": "产品可读名称",
|
|
|
+ "node_category": "节点类别,如 input、platform_result、signal、asset",
|
|
|
+ "id_field": "识别该节点的字段", "can_expand": "是否可继续扩展",
|
|
|
+ "is_signal_node": "是否只是信号节点,不直接沉淀资产", "notes": "备注", "注释": "中文业务注释",
|
|
|
+ },
|
|
|
+ "walk_edge_catalog": {
|
|
|
+ "edge_id": "边 ID", "edge_type": "边类型", "from_node_type": "起点节点类型",
|
|
|
+ "to_node_type": "终点节点类型", "edge_label": "产品可读名称", "enabled": "是否启用",
|
|
|
+ "runtime_stage": "所属运行阶段", "creates_new_node": "是否产生新节点",
|
|
|
+ "can_loop": "是否允许循环或多轮", "priority": "边优先级,数字越小越先执行",
|
|
|
+ "notes": "备注", "注释": "中文业务注释",
|
|
|
+ },
|
|
|
+ "walk_edge_trigger_rules": {
|
|
|
+ "trigger_id": "触发规则 ID", "edge_id": "适用边",
|
|
|
+ "input_level": "判断粒度,如 query、content、author、tag",
|
|
|
+ "field_path": "用来判断的字段路径", "operator": "判断操作符", "expected_value": "期望值",
|
|
|
+ "required": "是否必需满足", "failure_policy": "不满足时的处理方式",
|
|
|
+ "notes": "备注", "注释": "中文业务注释",
|
|
|
+ },
|
|
|
+ "walk_rule_pack_binding": {
|
|
|
+ "binding_id": "绑定 ID", "edge_id": "被绑定的边",
|
|
|
+ "target_entity": "规则判断对象,如 Content、Author、Hashtag、Path、Budget",
|
|
|
+ "rule_pack_id": "P5 规则包 ID", "rule_pack_version": "P5 规则包版本",
|
|
|
+ "required": "是否必须通过该规则判断",
|
|
|
+ "dispatch_policy": "调用方式,如 required / optional / advisory",
|
|
|
+ "notes": "备注", "注释": "中文业务注释",
|
|
|
+ },
|
|
|
+ "walk_budget_policy": {
|
|
|
+ "budget_id": "预算规则 ID", "edge_id": "适用边",
|
|
|
+ "max_total_actions": "单 run 最多执行次数", "max_per_query": "每条 query 最多执行次数",
|
|
|
+ "max_per_content": "每条内容最多执行次数", "max_pages": "最大页数",
|
|
|
+ "max_depth": "最大游走深度", "max_tag_hops": "最大 tag 扩散跳数",
|
|
|
+ "budget_tier": "预算档位", "notes": "备注", "注释": "中文业务注释",
|
|
|
+ },
|
|
|
+ "walk_stop_policy": {
|
|
|
+ "stop_policy_id": "停止规则 ID", "edge_id": "适用边", "condition_label": "停止条件名称",
|
|
|
+ "field_path": "判断字段路径", "operator": "判断操作符", "expected_value": "期望值",
|
|
|
+ "stop_action": "停止后的动作", "reason_code": "停止原因码", "priority": "停止规则优先级",
|
|
|
+ "notes": "备注", "注释": "中文业务注释",
|
|
|
+ },
|
|
|
+ "walk_retry_policy": {
|
|
|
+ "retry_policy_id": "重试规则 ID", "edge_id": "适用边", "operation": "重试的外部操作",
|
|
|
+ "max_retries": "最大重试次数", "retry_interval_seconds": "重试间隔秒数",
|
|
|
+ "on_final_failure": "最终失败后的处理", "notes": "备注", "注释": "中文业务注释",
|
|
|
+ },
|
|
|
+ "walk_action_catalog": {
|
|
|
+ "walk_action": "游走动作枚举", "action_label": "产品可读名称", "edge_id": "适用边",
|
|
|
+ "runtime_effect": "运行影响", "is_terminal": "是否终止路径",
|
|
|
+ "writes_next_hop": "是否写入下一跳节点", "notes": "备注", "注释": "中文业务注释",
|
|
|
+ },
|
|
|
+ "walk_source_path_mapping": {
|
|
|
+ "mapping_id": "来源路径映射 ID", "edge_id": "适用边", "source_path_type": "来源路径类型",
|
|
|
+ "from_node_type": "起点节点类型", "to_node_type": "终点节点类型",
|
|
|
+ "origin_path_template": "来源路径模板", "source_evidence_ref_template": "来源证据引用模板",
|
|
|
+ "notes": "备注", "注释": "中文业务注释",
|
|
|
+ },
|
|
|
+ "walk_event_mapping": {
|
|
|
+ "mapping_id": "事件映射 ID", "edge_id": "适用边", "event_type": "运行事件类型",
|
|
|
+ "success_status": "成功状态", "failure_status": "失败状态",
|
|
|
+ "input_ref_template": "输入引用模板", "output_ref_template": "输出引用模板",
|
|
|
+ "notes": "备注", "注释": "中文业务注释",
|
|
|
+ },
|
|
|
+ "walk_query_generation_policy": {
|
|
|
+ "policy_id": "query 生成规则 ID", "edge_id": "适用边",
|
|
|
+ "query_generation_method": "query 生成方式", "source_field": "生成 query 的来源字段",
|
|
|
+ "dedup_policy": "去重策略", "max_queries": "最大生成 query 数",
|
|
|
+ "notes": "备注", "注释": "中文业务注释",
|
|
|
+ },
|
|
|
+ "walk_fact_contract": {
|
|
|
+ "runtime_file": "runtime 文件名", "db_table": "对应 DB 表", "required_fields": "必填字段",
|
|
|
+ "json_fields": "JSON 字段", "unique_key": "唯一键", "notes": "备注", "注释": "中文业务注释",
|
|
|
+ },
|
|
|
+}
|
|
|
+
|
|
|
+# Simple meanings for the rule-pack dictionary sheet itself (it has no self-entry).
|
|
|
+DICT_SHEET_NAME = "字段注释"
|
|
|
+DICT_SHEET_MEANINGS = {
|
|
|
+ "sheet_name": "配置表 sheet 名", "field_name": "字段英文名",
|
|
|
+ "字段中文名": "字段中文名", "注释": "字段业务含义说明",
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+def build_rule_pack_meanings(wb) -> dict[str, dict[str, str]]:
|
|
|
+ """Read the workbook's own 字段注释 sheet into {sheet: {field: note}}."""
|
|
|
+ meanings: dict[str, dict[str, str]] = {}
|
|
|
+ ws = wb[DICT_SHEET_NAME]
|
|
|
+ for sheet_name, field_name, _cn, note in ws.iter_rows(min_row=2, values_only=True):
|
|
|
+ if not sheet_name or not field_name:
|
|
|
+ continue
|
|
|
+ meanings.setdefault(sheet_name, {})[field_name] = note or field_name
|
|
|
+ return meanings
|
|
|
+
|
|
|
+
|
|
|
+def annotate_sheet(ws, meanings_for_sheet: dict[str, str]) -> str:
|
|
|
+ headers = [c.value for c in ws[1]]
|
|
|
+ if not headers or all(h is None for h in headers):
|
|
|
+ return "skip(空表)"
|
|
|
+ # Idempotency guard: already annotated?
|
|
|
+ if ws.max_row >= 2 and str(ws.cell(row=2, column=1).value or "").startswith(SENTINEL.rstrip(":")):
|
|
|
+ return "skip(已存在备注行)"
|
|
|
+
|
|
|
+ ws.insert_rows(2)
|
|
|
+ for col_idx, header in enumerate(headers, start=1):
|
|
|
+ if header is None:
|
|
|
+ continue
|
|
|
+ meaning = meanings_for_sheet.get(str(header), str(header))
|
|
|
+ text = (SENTINEL + meaning) if col_idx == 1 else meaning
|
|
|
+ cell = ws.cell(row=2, column=col_idx, value=text)
|
|
|
+ cell.fill = COMMENT_FILL
|
|
|
+ cell.font = COMMENT_FONT
|
|
|
+ # Keep the autofilter dropdown anchored on the header row across full range.
|
|
|
+ last_col = get_column_letter(ws.max_column)
|
|
|
+ ws.auto_filter.ref = f"A1:{last_col}{ws.max_row}"
|
|
|
+ return f"ok({len(headers)}列)"
|
|
|
+
|
|
|
+
|
|
|
+def process(path: Path, meanings_by_sheet, skip_sheets: set[str]) -> None:
|
|
|
+ wb = load_workbook(path)
|
|
|
+ print(f"\n=== {path.name} ===")
|
|
|
+ for ws in wb.worksheets:
|
|
|
+ if ws.title in skip_sheets:
|
|
|
+ print(f" - {ws.title:32s} skip(字典/说明 sheet)")
|
|
|
+ continue
|
|
|
+ m = meanings_by_sheet.get(ws.title, {})
|
|
|
+ status = annotate_sheet(ws, m)
|
|
|
+ print(f" - {ws.title:32s} {status}")
|
|
|
+ wb.save(path)
|
|
|
+ print(f" saved -> {path}")
|
|
|
+
|
|
|
+
|
|
|
+def main() -> None:
|
|
|
+ # Rule-pack workbook: meanings from its own 字段注释 sheet; annotate that sheet too.
|
|
|
+ wb_rp = load_workbook(RULE_PACK_XLSX)
|
|
|
+ rp_meanings = build_rule_pack_meanings(wb_rp)
|
|
|
+ rp_meanings[DICT_SHEET_NAME] = DICT_SHEET_MEANINGS
|
|
|
+ process(RULE_PACK_XLSX, rp_meanings, skip_sheets=set())
|
|
|
+
|
|
|
+ # Walk-strategy workbook: meanings encoded from the field-spec doc.
|
|
|
+ process(WALK_XLSX, WALK_MEANINGS, skip_sheets=set())
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ main()
|