|
|
@@ -0,0 +1,402 @@
|
|
|
+#!/usr/bin/env python3
|
|
|
+"""将 run_log 文本渲染为可折叠 HTML 页面。
|
|
|
+
|
|
|
+直接在脚本内修改 INPUT_LOG_PATH / OUTPUT_HTML_PATH 后运行:
|
|
|
+ python examples/piaoquan_needs/render_log_html.py
|
|
|
+"""
|
|
|
+
|
|
|
+from __future__ import annotations
|
|
|
+
|
|
|
+import html
|
|
|
+import os
|
|
|
+from dataclasses import dataclass, field
|
|
|
+from pathlib import Path
|
|
|
+from dotenv import load_dotenv
|
|
|
+
|
|
|
+# 加载 examples/content_finder/.env(不依赖你从哪个目录运行)
|
|
|
+load_dotenv(dotenv_path=Path(__file__).resolve().parent / ".env", override=False)
|
|
|
+
|
|
|
+
|
|
|
+@dataclass
|
|
|
+class Node:
|
|
|
+ title: str | None = None
|
|
|
+ entries: list[str | "Node"] = field(default_factory=list)
|
|
|
+
|
|
|
+ @property
|
|
|
+ def is_fold(self) -> bool:
|
|
|
+ return self.title is not None
|
|
|
+
|
|
|
+
|
|
|
+def parse_log(content: str) -> Node:
|
|
|
+ root = Node(title=None)
|
|
|
+ stack: list[Node] = [root]
|
|
|
+
|
|
|
+ for raw_line in content.splitlines():
|
|
|
+ line = raw_line.rstrip("\n")
|
|
|
+ tag = line.strip()
|
|
|
+ if tag.startswith("[FOLD:") and tag.endswith("]"):
|
|
|
+ title = tag[len("[FOLD:") : -1]
|
|
|
+ node = Node(title=title)
|
|
|
+ stack[-1].entries.append(node)
|
|
|
+ stack.append(node)
|
|
|
+ continue
|
|
|
+ if tag == "[/FOLD]":
|
|
|
+ # 容错:遇到多余的 [/FOLD] 时,忽略而不是把它当作正文
|
|
|
+ if len(stack) > 1:
|
|
|
+ stack.pop()
|
|
|
+ continue
|
|
|
+ stack[-1].entries.append(line)
|
|
|
+
|
|
|
+ while len(stack) > 1:
|
|
|
+ unclosed = stack.pop()
|
|
|
+ # 容错: 遇到缺失 [/FOLD] 时,保留原有内容,不丢日志
|
|
|
+ stack[-1].entries.append(unclosed)
|
|
|
+ return root
|
|
|
+
|
|
|
+
|
|
|
+DEFAULT_COLLAPSE_PREFIXES = ["🔧", "📥", "📤"]
|
|
|
+DEFAULT_COLLAPSE_KEYWORDS = ["调用参数", "返回内容"]
|
|
|
+
|
|
|
+# 工具功能摘要(静态映射,用于日志可视化展示)
|
|
|
+TOOL_DESCRIPTION_MAP: dict[str, str] = {
|
|
|
+ "think_and_plan": "系统化记录思考、计划与下一步行动(只记录不获取新信息)。",
|
|
|
+ "douyin_search": "通过关键词在抖音上搜索视频内容。",
|
|
|
+ "douyin_user_videos": "通过账号/作者 sec_uid 获取其历史作品列表。",
|
|
|
+ "get_content_fans_portrait": "获取视频点赞用户画像(热点宝),判断 metadata.has_portrait。",
|
|
|
+ "get_account_fans_portrait": "获取作者粉丝画像(热点宝),用于内容画像缺失兜底。",
|
|
|
+ "store_results_mysql": "将 output.json 写入 MySQL(作者表与内容表)。",
|
|
|
+ "create_crawler_plan_by_douyin_content_id": "为入选视频生成 AIGC 爬取计划。",
|
|
|
+ "create_crawler_plan_by_douyin_account_id": "为入选账号生成 AIGC 爬取计划。",
|
|
|
+}
|
|
|
+
|
|
|
+# =========================
|
|
|
+# 运行配置(默认从 .env 读取)
|
|
|
+# =========================
|
|
|
+INPUT_LOG_PATH = os.getenv("INPUT_LOG_PATH", ".cache/input_log")
|
|
|
+# 设为 None 则默认生成到输入文件同名 .html
|
|
|
+OUTPUT_HTML_PATH: str | None = os.getenv("OUTPUT_HTML_PATH") or None
|
|
|
+# 是否默认折叠所有 [FOLD] 块
|
|
|
+COLLAPSE_ALL_FOLDS = False
|
|
|
+# 命中这些前缀/关键词的折叠块默认收起
|
|
|
+COLLAPSE_PREFIXES = DEFAULT_COLLAPSE_PREFIXES
|
|
|
+COLLAPSE_KEYWORDS = DEFAULT_COLLAPSE_KEYWORDS
|
|
|
+
|
|
|
+
|
|
|
+def resolve_config_path(path_str: str) -> Path:
|
|
|
+ """解析配置中的路径,兼容从项目根目录或脚本目录运行。"""
|
|
|
+ raw = Path(path_str).expanduser()
|
|
|
+ if raw.is_absolute():
|
|
|
+ return raw.resolve()
|
|
|
+
|
|
|
+ cwd_candidate = (Path.cwd() / raw).resolve()
|
|
|
+ if cwd_candidate.exists():
|
|
|
+ return cwd_candidate
|
|
|
+
|
|
|
+ script_dir = Path(__file__).resolve().parent
|
|
|
+ script_candidate = (script_dir / raw).resolve()
|
|
|
+ if script_candidate.exists():
|
|
|
+ return script_candidate
|
|
|
+
|
|
|
+ project_root = script_dir.parent.parent
|
|
|
+ project_candidate = (project_root / raw).resolve()
|
|
|
+ if project_candidate.exists():
|
|
|
+ return project_candidate
|
|
|
+
|
|
|
+ # 如果都不存在,返回项目根拼接结果,便于报错信息更稳定
|
|
|
+ return project_candidate
|
|
|
+
|
|
|
+
|
|
|
+def should_collapse(
|
|
|
+ title: str,
|
|
|
+ collapse_prefixes: list[str],
|
|
|
+ collapse_keywords: list[str],
|
|
|
+ collapse_all: bool,
|
|
|
+) -> bool:
|
|
|
+ if collapse_all:
|
|
|
+ return True
|
|
|
+ if any(title.startswith(prefix) for prefix in collapse_prefixes):
|
|
|
+ return True
|
|
|
+ return any(keyword in title for keyword in collapse_keywords)
|
|
|
+
|
|
|
+
|
|
|
+def render_text_block(lines: list[str]) -> str:
|
|
|
+ if not lines:
|
|
|
+ return ""
|
|
|
+
|
|
|
+ normalized = lines[:]
|
|
|
+ while normalized and normalized[0].strip() == "":
|
|
|
+ normalized.pop(0)
|
|
|
+ while normalized and normalized[-1].strip() == "":
|
|
|
+ normalized.pop()
|
|
|
+ if not normalized:
|
|
|
+ return ""
|
|
|
+
|
|
|
+ compact: list[str] = []
|
|
|
+ empty_streak = 0
|
|
|
+ for line in normalized:
|
|
|
+ if line.strip() == "":
|
|
|
+ empty_streak += 1
|
|
|
+ if empty_streak <= 1:
|
|
|
+ compact.append("")
|
|
|
+ else:
|
|
|
+ empty_streak = 0
|
|
|
+ compact.append(line)
|
|
|
+
|
|
|
+ escaped = html.escape("\n".join(compact))
|
|
|
+ return f'<pre class="log-text">{escaped}</pre>'
|
|
|
+
|
|
|
+
|
|
|
+def enrich_fold_title(title: str) -> str:
|
|
|
+ """为工具调用标题附加工具功能描述。"""
|
|
|
+ tool_prefix = "🔧 "
|
|
|
+ if not title.startswith(tool_prefix):
|
|
|
+ return title
|
|
|
+
|
|
|
+ tool_name = title[len(tool_prefix):].strip()
|
|
|
+ description = TOOL_DESCRIPTION_MAP.get(tool_name)
|
|
|
+ if not description:
|
|
|
+ return title
|
|
|
+ return f"{tool_prefix}{tool_name}({description})"
|
|
|
+
|
|
|
+
|
|
|
+def render_node(
|
|
|
+ node: Node,
|
|
|
+ collapse_prefixes: list[str],
|
|
|
+ collapse_keywords: list[str],
|
|
|
+ collapse_all: bool,
|
|
|
+) -> str:
|
|
|
+ parts: list[str] = []
|
|
|
+ text_buffer: list[str] = []
|
|
|
+
|
|
|
+ def flush_text_buffer() -> None:
|
|
|
+ if text_buffer:
|
|
|
+ parts.append(render_text_block(text_buffer))
|
|
|
+ text_buffer.clear()
|
|
|
+
|
|
|
+ for entry in node.entries:
|
|
|
+ if isinstance(entry, str):
|
|
|
+ text_buffer.append(entry)
|
|
|
+ continue
|
|
|
+
|
|
|
+ child = entry
|
|
|
+ if child.is_fold:
|
|
|
+ flush_text_buffer()
|
|
|
+ title = child.title or ""
|
|
|
+ is_collapsed = should_collapse(
|
|
|
+ title=title,
|
|
|
+ collapse_prefixes=collapse_prefixes,
|
|
|
+ collapse_keywords=collapse_keywords,
|
|
|
+ collapse_all=collapse_all,
|
|
|
+ )
|
|
|
+ folded_class = "fold tool-fold" if is_collapsed else "fold normal-fold"
|
|
|
+ open_attr = "" if is_collapsed else " open"
|
|
|
+ display_title = enrich_fold_title(title)
|
|
|
+ inner = render_node(
|
|
|
+ child,
|
|
|
+ collapse_prefixes=collapse_prefixes,
|
|
|
+ collapse_keywords=collapse_keywords,
|
|
|
+ collapse_all=collapse_all,
|
|
|
+ )
|
|
|
+ parts.append(
|
|
|
+ f'<details class="{folded_class}"{open_attr}>'
|
|
|
+ f'<summary>{html.escape(display_title)}</summary>'
|
|
|
+ f"{inner}"
|
|
|
+ "</details>"
|
|
|
+ )
|
|
|
+
|
|
|
+ flush_text_buffer()
|
|
|
+
|
|
|
+ return "".join(parts)
|
|
|
+
|
|
|
+
|
|
|
+def build_html(body: str, source_name: str) -> str:
|
|
|
+ return f"""<!doctype html>
|
|
|
+<html lang="zh-CN">
|
|
|
+<head>
|
|
|
+ <meta charset="UTF-8" />
|
|
|
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
|
+ <title>Run Log 可视化 - {html.escape(source_name)}</title>
|
|
|
+ <style>
|
|
|
+ :root {{
|
|
|
+ --bg: #0b1020;
|
|
|
+ --panel: #131a2a;
|
|
|
+ --text: #e8edf7;
|
|
|
+ --muted: #98a2b3;
|
|
|
+ --accent: #6ea8fe;
|
|
|
+ --border: #263146;
|
|
|
+ }}
|
|
|
+ * {{
|
|
|
+ box-sizing: border-box;
|
|
|
+ }}
|
|
|
+ body {{
|
|
|
+ margin: 0;
|
|
|
+ background: var(--bg);
|
|
|
+ color: var(--text);
|
|
|
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif;
|
|
|
+ }}
|
|
|
+ .wrap {{
|
|
|
+ max-width: 1200px;
|
|
|
+ margin: 0 auto;
|
|
|
+ padding: 20px;
|
|
|
+ }}
|
|
|
+ .header {{
|
|
|
+ margin-bottom: 14px;
|
|
|
+ display: flex;
|
|
|
+ align-items: center;
|
|
|
+ gap: 10px;
|
|
|
+ flex-wrap: wrap;
|
|
|
+ }}
|
|
|
+ .title {{
|
|
|
+ font-size: 18px;
|
|
|
+ font-weight: 700;
|
|
|
+ }}
|
|
|
+ .source {{
|
|
|
+ color: var(--muted);
|
|
|
+ font-size: 13px;
|
|
|
+ }}
|
|
|
+ button {{
|
|
|
+ border: 1px solid var(--border);
|
|
|
+ background: var(--panel);
|
|
|
+ color: var(--text);
|
|
|
+ padding: 6px 10px;
|
|
|
+ border-radius: 8px;
|
|
|
+ cursor: pointer;
|
|
|
+ }}
|
|
|
+ button:hover {{
|
|
|
+ border-color: var(--accent);
|
|
|
+ color: var(--accent);
|
|
|
+ }}
|
|
|
+ .content {{
|
|
|
+ background: var(--panel);
|
|
|
+ border: 1px solid var(--border);
|
|
|
+ border-radius: 10px;
|
|
|
+ padding: 10px;
|
|
|
+ }}
|
|
|
+ details {{
|
|
|
+ margin: 6px 0;
|
|
|
+ border: 1px solid var(--border);
|
|
|
+ border-radius: 8px;
|
|
|
+ background: rgba(255, 255, 255, 0.01);
|
|
|
+ }}
|
|
|
+ details > summary {{
|
|
|
+ cursor: pointer;
|
|
|
+ padding: 8px 10px;
|
|
|
+ font-size: 13px;
|
|
|
+ list-style: none;
|
|
|
+ user-select: none;
|
|
|
+ color: #cdd6e5;
|
|
|
+ }}
|
|
|
+ details > summary::-webkit-details-marker {{
|
|
|
+ display: none;
|
|
|
+ }}
|
|
|
+ details > summary::before {{
|
|
|
+ content: "▶";
|
|
|
+ display: inline-block;
|
|
|
+ margin-right: 6px;
|
|
|
+ transform: rotate(0deg);
|
|
|
+ transition: transform 120ms ease;
|
|
|
+ color: var(--muted);
|
|
|
+ }}
|
|
|
+ details[open] > summary::before {{
|
|
|
+ transform: rotate(90deg);
|
|
|
+ }}
|
|
|
+ .tool-fold > summary {{
|
|
|
+ color: #f6cf76;
|
|
|
+ }}
|
|
|
+ .log-text {{
|
|
|
+ margin: 0;
|
|
|
+ padding: 10px;
|
|
|
+ border-top: 1px dashed var(--border);
|
|
|
+ color: var(--text);
|
|
|
+ white-space: pre-wrap;
|
|
|
+ word-break: break-word;
|
|
|
+ line-height: 1.4;
|
|
|
+ font-size: 13px;
|
|
|
+ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
|
|
|
+ }}
|
|
|
+ </style>
|
|
|
+</head>
|
|
|
+<body>
|
|
|
+ <div class="wrap">
|
|
|
+ <div class="header">
|
|
|
+ <div class="title">Run Log 可视化</div>
|
|
|
+ <div class="source">{html.escape(source_name)}</div>
|
|
|
+ <button id="expand-tools">展开全部工具调用</button>
|
|
|
+ <button id="collapse-tools">折叠全部工具调用</button>
|
|
|
+ </div>
|
|
|
+ <div class="content">{body}</div>
|
|
|
+ </div>
|
|
|
+ <script>
|
|
|
+ const toolFolds = Array.from(document.querySelectorAll("details.tool-fold"));
|
|
|
+ document.getElementById("expand-tools").addEventListener("click", () => {{
|
|
|
+ toolFolds.forEach((el) => (el.open = true));
|
|
|
+ }});
|
|
|
+ document.getElementById("collapse-tools").addEventListener("click", () => {{
|
|
|
+ toolFolds.forEach((el) => (el.open = false));
|
|
|
+ }});
|
|
|
+ </script>
|
|
|
+</body>
|
|
|
+</html>
|
|
|
+"""
|
|
|
+
|
|
|
+
|
|
|
+def generate_html(
|
|
|
+ input_path: Path,
|
|
|
+ output_path: Path,
|
|
|
+ collapse_prefixes: list[str],
|
|
|
+ collapse_keywords: list[str],
|
|
|
+ collapse_all: bool = False,
|
|
|
+) -> None:
|
|
|
+ content = input_path.read_text(encoding="utf-8")
|
|
|
+ tree = parse_log(content)
|
|
|
+ body = render_node(
|
|
|
+ tree,
|
|
|
+ collapse_prefixes=collapse_prefixes,
|
|
|
+ collapse_keywords=collapse_keywords,
|
|
|
+ collapse_all=collapse_all,
|
|
|
+ )
|
|
|
+ html_content = build_html(body=body, source_name=input_path.name)
|
|
|
+ output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
+ output_path.write_text(html_content, encoding="utf-8")
|
|
|
+
|
|
|
+
|
|
|
+def main() -> None:
|
|
|
+ input_base = resolve_config_path(INPUT_LOG_PATH)
|
|
|
+ if input_base.is_file():
|
|
|
+ input_path = input_base
|
|
|
+ elif input_base.is_dir():
|
|
|
+ # 优先渲染最新 run_log_*.txt,其次渲染任意 *.txt
|
|
|
+ candidates = sorted(
|
|
|
+ input_base.glob("run_log_*.txt"),
|
|
|
+ key=lambda p: p.stat().st_mtime,
|
|
|
+ reverse=True,
|
|
|
+ )
|
|
|
+ if not candidates:
|
|
|
+ candidates = sorted(
|
|
|
+ input_base.glob("*.txt"),
|
|
|
+ key=lambda p: p.stat().st_mtime,
|
|
|
+ reverse=True,
|
|
|
+ )
|
|
|
+ if not candidates:
|
|
|
+ raise FileNotFoundError(f"目录下未找到可渲染日志文件: {input_base}")
|
|
|
+ input_path = candidates[0]
|
|
|
+ else:
|
|
|
+ raise FileNotFoundError(f"输入日志路径不存在: {input_base}")
|
|
|
+
|
|
|
+ if OUTPUT_HTML_PATH:
|
|
|
+ output_path = resolve_config_path(OUTPUT_HTML_PATH)
|
|
|
+ else:
|
|
|
+ output_path = input_path.with_suffix(".html")
|
|
|
+
|
|
|
+ generate_html(
|
|
|
+ input_path=input_path,
|
|
|
+ output_path=output_path,
|
|
|
+ collapse_prefixes=COLLAPSE_PREFIXES,
|
|
|
+ collapse_keywords=COLLAPSE_KEYWORDS,
|
|
|
+ collapse_all=COLLAPSE_ALL_FOLDS,
|
|
|
+ )
|
|
|
+ print(f"HTML 已生成: {output_path}")
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ main()
|