| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403 |
- #!/usr/bin/env python3
- """将 run_log 文本渲染为可折叠 HTML 页面。
- 直接在脚本内修改 INPUT_LOG_PATH / OUTPUT_HTML_PATH 后运行:
- python examples/piaoquan_needs/render_log_html.py
- """
- from __future__ import annotations
- import html
- import os
- from dataclasses import dataclass, field
- from pathlib import Path
- from dotenv import load_dotenv
- # 加载 examples/content_finder/.env(不依赖你从哪个目录运行)
- load_dotenv(dotenv_path=Path(__file__).resolve().parent / ".env", override=False)
- @dataclass
- class Node:
- title: str | None = None
- entries: list[str | "Node"] = field(default_factory=list)
- @property
- def is_fold(self) -> bool:
- return self.title is not None
- def parse_log(content: str) -> Node:
- root = Node(title=None)
- stack: list[Node] = [root]
- for raw_line in content.splitlines():
- line = raw_line.rstrip("\n")
- tag = line.strip()
- if tag.startswith("[FOLD:") and tag.endswith("]"):
- title = tag[len("[FOLD:") : -1]
- node = Node(title=title)
- stack[-1].entries.append(node)
- stack.append(node)
- continue
- if tag == "[/FOLD]":
- # 容错:遇到多余的 [/FOLD] 时,忽略而不是把它当作正文
- if len(stack) > 1:
- stack.pop()
- continue
- stack[-1].entries.append(line)
- while len(stack) > 1:
- unclosed = stack.pop()
- # 容错: 遇到缺失 [/FOLD] 时,保留原有内容,不丢日志
- stack[-1].entries.append(unclosed)
- return root
- DEFAULT_COLLAPSE_PREFIXES = ["🔧", "📥", "📤"]
- DEFAULT_COLLAPSE_KEYWORDS = ["调用参数", "返回内容"]
- # 工具功能摘要(静态映射,用于日志可视化展示)
- TOOL_DESCRIPTION_MAP: dict[str, str] = {
- "think_and_plan": "系统化记录思考、计划与下一步行动(只记录不获取新信息)。",
- "douyin_search": "通过关键词在抖音上搜索视频内容。",
- "douyin_search_tikhub": "通过关键词在抖音上搜索视频内容(Tikhub 接口)。",
- "douyin_user_videos": "通过账号/作者 sec_uid 获取其历史作品列表。",
- "get_content_fans_portrait": "获取视频点赞用户画像(热点宝),判断 metadata.has_portrait。",
- "get_account_fans_portrait": "获取作者粉丝画像(热点宝),用于内容画像缺失兜底。",
- "store_results_mysql": "将 output.json 写入 MySQL(作者表与内容表)。",
- "create_crawler_plan_by_douyin_content_id": "为入选视频生成 AIGC 爬取计划。",
- "create_crawler_plan_by_douyin_account_id": "为入选账号生成 AIGC 爬取计划。",
- }
- # =========================
- # 运行配置(默认从 .env 读取)
- # =========================
- INPUT_LOG_PATH = os.getenv("INPUT_LOG_PATH", ".cache/input_log")
- # 设为 None 则默认生成到输入文件同名 .html
- OUTPUT_HTML_PATH: str | None = os.getenv("OUTPUT_HTML_PATH") or None
- # 是否默认折叠所有 [FOLD] 块
- COLLAPSE_ALL_FOLDS = False
- # 命中这些前缀/关键词的折叠块默认收起
- COLLAPSE_PREFIXES = DEFAULT_COLLAPSE_PREFIXES
- COLLAPSE_KEYWORDS = DEFAULT_COLLAPSE_KEYWORDS
- def resolve_config_path(path_str: str) -> Path:
- """解析配置中的路径,兼容从项目根目录或脚本目录运行。"""
- raw = Path(path_str).expanduser()
- if raw.is_absolute():
- return raw.resolve()
- cwd_candidate = (Path.cwd() / raw).resolve()
- if cwd_candidate.exists():
- return cwd_candidate
- script_dir = Path(__file__).resolve().parent
- script_candidate = (script_dir / raw).resolve()
- if script_candidate.exists():
- return script_candidate
- project_root = script_dir.parent.parent
- project_candidate = (project_root / raw).resolve()
- if project_candidate.exists():
- return project_candidate
- # 如果都不存在,返回项目根拼接结果,便于报错信息更稳定
- return project_candidate
- def should_collapse(
- title: str,
- collapse_prefixes: list[str],
- collapse_keywords: list[str],
- collapse_all: bool,
- ) -> bool:
- if collapse_all:
- return True
- if any(title.startswith(prefix) for prefix in collapse_prefixes):
- return True
- return any(keyword in title for keyword in collapse_keywords)
- def render_text_block(lines: list[str]) -> str:
- if not lines:
- return ""
- normalized = lines[:]
- while normalized and normalized[0].strip() == "":
- normalized.pop(0)
- while normalized and normalized[-1].strip() == "":
- normalized.pop()
- if not normalized:
- return ""
- compact: list[str] = []
- empty_streak = 0
- for line in normalized:
- if line.strip() == "":
- empty_streak += 1
- if empty_streak <= 1:
- compact.append("")
- else:
- empty_streak = 0
- compact.append(line)
- escaped = html.escape("\n".join(compact))
- return f'<pre class="log-text">{escaped}</pre>'
- def enrich_fold_title(title: str) -> str:
- """为工具调用标题附加工具功能描述。"""
- tool_prefix = "🔧 "
- if not title.startswith(tool_prefix):
- return title
- tool_name = title[len(tool_prefix):].strip()
- description = TOOL_DESCRIPTION_MAP.get(tool_name)
- if not description:
- return title
- return f"{tool_prefix}{tool_name}({description})"
- def render_node(
- node: Node,
- collapse_prefixes: list[str],
- collapse_keywords: list[str],
- collapse_all: bool,
- ) -> str:
- parts: list[str] = []
- text_buffer: list[str] = []
- def flush_text_buffer() -> None:
- if text_buffer:
- parts.append(render_text_block(text_buffer))
- text_buffer.clear()
- for entry in node.entries:
- if isinstance(entry, str):
- text_buffer.append(entry)
- continue
- child = entry
- if child.is_fold:
- flush_text_buffer()
- title = child.title or ""
- is_collapsed = should_collapse(
- title=title,
- collapse_prefixes=collapse_prefixes,
- collapse_keywords=collapse_keywords,
- collapse_all=collapse_all,
- )
- folded_class = "fold tool-fold" if is_collapsed else "fold normal-fold"
- open_attr = "" if is_collapsed else " open"
- display_title = enrich_fold_title(title)
- inner = render_node(
- child,
- collapse_prefixes=collapse_prefixes,
- collapse_keywords=collapse_keywords,
- collapse_all=collapse_all,
- )
- parts.append(
- f'<details class="{folded_class}"{open_attr}>'
- f'<summary>{html.escape(display_title)}</summary>'
- f"{inner}"
- "</details>"
- )
- flush_text_buffer()
- return "".join(parts)
- def build_html(body: str, source_name: str) -> str:
- return f"""<!doctype html>
- <html lang="zh-CN">
- <head>
- <meta charset="UTF-8" />
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
- <title>Run Log 可视化 - {html.escape(source_name)}</title>
- <style>
- :root {{
- --bg: #0b1020;
- --panel: #131a2a;
- --text: #e8edf7;
- --muted: #98a2b3;
- --accent: #6ea8fe;
- --border: #263146;
- }}
- * {{
- box-sizing: border-box;
- }}
- body {{
- margin: 0;
- background: var(--bg);
- color: var(--text);
- font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif;
- }}
- .wrap {{
- max-width: 1200px;
- margin: 0 auto;
- padding: 20px;
- }}
- .header {{
- margin-bottom: 14px;
- display: flex;
- align-items: center;
- gap: 10px;
- flex-wrap: wrap;
- }}
- .title {{
- font-size: 18px;
- font-weight: 700;
- }}
- .source {{
- color: var(--muted);
- font-size: 13px;
- }}
- button {{
- border: 1px solid var(--border);
- background: var(--panel);
- color: var(--text);
- padding: 6px 10px;
- border-radius: 8px;
- cursor: pointer;
- }}
- button:hover {{
- border-color: var(--accent);
- color: var(--accent);
- }}
- .content {{
- background: var(--panel);
- border: 1px solid var(--border);
- border-radius: 10px;
- padding: 10px;
- }}
- details {{
- margin: 6px 0;
- border: 1px solid var(--border);
- border-radius: 8px;
- background: rgba(255, 255, 255, 0.01);
- }}
- details > summary {{
- cursor: pointer;
- padding: 8px 10px;
- font-size: 13px;
- list-style: none;
- user-select: none;
- color: #cdd6e5;
- }}
- details > summary::-webkit-details-marker {{
- display: none;
- }}
- details > summary::before {{
- content: "▶";
- display: inline-block;
- margin-right: 6px;
- transform: rotate(0deg);
- transition: transform 120ms ease;
- color: var(--muted);
- }}
- details[open] > summary::before {{
- transform: rotate(90deg);
- }}
- .tool-fold > summary {{
- color: #f6cf76;
- }}
- .log-text {{
- margin: 0;
- padding: 10px;
- border-top: 1px dashed var(--border);
- color: var(--text);
- white-space: pre-wrap;
- word-break: break-word;
- line-height: 1.4;
- font-size: 13px;
- font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
- }}
- </style>
- </head>
- <body>
- <div class="wrap">
- <div class="header">
- <div class="title">Run Log 可视化</div>
- <div class="source">{html.escape(source_name)}</div>
- <button id="expand-tools">展开全部工具调用</button>
- <button id="collapse-tools">折叠全部工具调用</button>
- </div>
- <div class="content">{body}</div>
- </div>
- <script>
- const toolFolds = Array.from(document.querySelectorAll("details.tool-fold"));
- document.getElementById("expand-tools").addEventListener("click", () => {{
- toolFolds.forEach((el) => (el.open = true));
- }});
- document.getElementById("collapse-tools").addEventListener("click", () => {{
- toolFolds.forEach((el) => (el.open = false));
- }});
- </script>
- </body>
- </html>
- """
- def generate_html(
- input_path: Path,
- output_path: Path,
- collapse_prefixes: list[str],
- collapse_keywords: list[str],
- collapse_all: bool = False,
- ) -> None:
- content = input_path.read_text(encoding="utf-8")
- tree = parse_log(content)
- body = render_node(
- tree,
- collapse_prefixes=collapse_prefixes,
- collapse_keywords=collapse_keywords,
- collapse_all=collapse_all,
- )
- html_content = build_html(body=body, source_name=input_path.name)
- output_path.parent.mkdir(parents=True, exist_ok=True)
- output_path.write_text(html_content, encoding="utf-8")
- def main() -> None:
- input_base = resolve_config_path(INPUT_LOG_PATH)
- if input_base.is_file():
- input_path = input_base
- elif input_base.is_dir():
- # 优先渲染最新 run_log_*.txt,其次渲染任意 *.txt
- candidates = sorted(
- input_base.glob("run_log_*.txt"),
- key=lambda p: p.stat().st_mtime,
- reverse=True,
- )
- if not candidates:
- candidates = sorted(
- input_base.glob("*.txt"),
- key=lambda p: p.stat().st_mtime,
- reverse=True,
- )
- if not candidates:
- raise FileNotFoundError(f"目录下未找到可渲染日志文件: {input_base}")
- input_path = candidates[0]
- else:
- raise FileNotFoundError(f"输入日志路径不存在: {input_base}")
- if OUTPUT_HTML_PATH:
- output_path = resolve_config_path(OUTPUT_HTML_PATH)
- else:
- output_path = input_path.with_suffix(".html")
- generate_html(
- input_path=input_path,
- output_path=output_path,
- collapse_prefixes=COLLAPSE_PREFIXES,
- collapse_keywords=COLLAPSE_KEYWORDS,
- collapse_all=COLLAPSE_ALL_FOLDS,
- )
- print(f"HTML 已生成: {output_path}")
- if __name__ == "__main__":
- main()
|