render_log_html.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. #!/usr/bin/env python3
  2. """将 run_log 文本渲染为可折叠 HTML 页面。
  3. 直接在脚本内修改 INPUT_LOG_PATH / OUTPUT_HTML_PATH 后运行:
  4. python examples/piaoquan_needs/render_log_html.py
  5. """
  6. from __future__ import annotations
  7. import html
  8. import os
  9. from dataclasses import dataclass, field
  10. from pathlib import Path
  11. from dotenv import load_dotenv
  12. # 加载 examples/content_finder/.env(不依赖你从哪个目录运行)
  13. load_dotenv(dotenv_path=Path(__file__).resolve().parent / ".env", override=False)
  14. @dataclass
  15. class Node:
  16. title: str | None = None
  17. entries: list[str | "Node"] = field(default_factory=list)
  18. @property
  19. def is_fold(self) -> bool:
  20. return self.title is not None
  21. def parse_log(content: str) -> Node:
  22. root = Node(title=None)
  23. stack: list[Node] = [root]
  24. for raw_line in content.splitlines():
  25. line = raw_line.rstrip("\n")
  26. tag = line.strip()
  27. if tag.startswith("[FOLD:") and tag.endswith("]"):
  28. title = tag[len("[FOLD:") : -1]
  29. node = Node(title=title)
  30. stack[-1].entries.append(node)
  31. stack.append(node)
  32. continue
  33. if tag == "[/FOLD]":
  34. # 容错:遇到多余的 [/FOLD] 时,忽略而不是把它当作正文
  35. if len(stack) > 1:
  36. stack.pop()
  37. continue
  38. stack[-1].entries.append(line)
  39. while len(stack) > 1:
  40. unclosed = stack.pop()
  41. # 容错: 遇到缺失 [/FOLD] 时,保留原有内容,不丢日志
  42. stack[-1].entries.append(unclosed)
  43. return root
  44. DEFAULT_COLLAPSE_PREFIXES = ["🔧", "📥", "📤"]
  45. DEFAULT_COLLAPSE_KEYWORDS = ["调用参数", "返回内容"]
  46. # 工具功能摘要(静态映射,用于日志可视化展示)
  47. TOOL_DESCRIPTION_MAP: dict[str, str] = {
  48. "think_and_plan": "系统化记录思考、计划与下一步行动(只记录不获取新信息)。",
  49. "douyin_search": "通过关键词在抖音上搜索视频内容。",
  50. "douyin_user_videos": "通过账号/作者 sec_uid 获取其历史作品列表。",
  51. "get_content_fans_portrait": "获取视频点赞用户画像(热点宝),判断 metadata.has_portrait。",
  52. "get_account_fans_portrait": "获取作者粉丝画像(热点宝),用于内容画像缺失兜底。",
  53. "store_results_mysql": "将 output.json 写入 MySQL(作者表与内容表)。",
  54. "create_crawler_plan_by_douyin_content_id": "为入选视频生成 AIGC 爬取计划。",
  55. "create_crawler_plan_by_douyin_account_id": "为入选账号生成 AIGC 爬取计划。",
  56. }
  57. # =========================
  58. # 运行配置(默认从 .env 读取)
  59. # =========================
  60. INPUT_LOG_PATH = os.getenv("INPUT_LOG_PATH", ".cache/input_log")
  61. # 设为 None 则默认生成到输入文件同名 .html
  62. OUTPUT_HTML_PATH: str | None = os.getenv("OUTPUT_HTML_PATH") or None
  63. # 是否默认折叠所有 [FOLD] 块
  64. COLLAPSE_ALL_FOLDS = False
  65. # 命中这些前缀/关键词的折叠块默认收起
  66. COLLAPSE_PREFIXES = DEFAULT_COLLAPSE_PREFIXES
  67. COLLAPSE_KEYWORDS = DEFAULT_COLLAPSE_KEYWORDS
  68. def resolve_config_path(path_str: str) -> Path:
  69. """解析配置中的路径,兼容从项目根目录或脚本目录运行。"""
  70. raw = Path(path_str).expanduser()
  71. if raw.is_absolute():
  72. return raw.resolve()
  73. cwd_candidate = (Path.cwd() / raw).resolve()
  74. if cwd_candidate.exists():
  75. return cwd_candidate
  76. script_dir = Path(__file__).resolve().parent
  77. script_candidate = (script_dir / raw).resolve()
  78. if script_candidate.exists():
  79. return script_candidate
  80. project_root = script_dir.parent.parent
  81. project_candidate = (project_root / raw).resolve()
  82. if project_candidate.exists():
  83. return project_candidate
  84. # 如果都不存在,返回项目根拼接结果,便于报错信息更稳定
  85. return project_candidate
  86. def should_collapse(
  87. title: str,
  88. collapse_prefixes: list[str],
  89. collapse_keywords: list[str],
  90. collapse_all: bool,
  91. ) -> bool:
  92. if collapse_all:
  93. return True
  94. if any(title.startswith(prefix) for prefix in collapse_prefixes):
  95. return True
  96. return any(keyword in title for keyword in collapse_keywords)
  97. def render_text_block(lines: list[str]) -> str:
  98. if not lines:
  99. return ""
  100. normalized = lines[:]
  101. while normalized and normalized[0].strip() == "":
  102. normalized.pop(0)
  103. while normalized and normalized[-1].strip() == "":
  104. normalized.pop()
  105. if not normalized:
  106. return ""
  107. compact: list[str] = []
  108. empty_streak = 0
  109. for line in normalized:
  110. if line.strip() == "":
  111. empty_streak += 1
  112. if empty_streak <= 1:
  113. compact.append("")
  114. else:
  115. empty_streak = 0
  116. compact.append(line)
  117. escaped = html.escape("\n".join(compact))
  118. return f'<pre class="log-text">{escaped}</pre>'
  119. def enrich_fold_title(title: str) -> str:
  120. """为工具调用标题附加工具功能描述。"""
  121. tool_prefix = "🔧 "
  122. if not title.startswith(tool_prefix):
  123. return title
  124. tool_name = title[len(tool_prefix):].strip()
  125. description = TOOL_DESCRIPTION_MAP.get(tool_name)
  126. if not description:
  127. return title
  128. return f"{tool_prefix}{tool_name}({description})"
  129. def render_node(
  130. node: Node,
  131. collapse_prefixes: list[str],
  132. collapse_keywords: list[str],
  133. collapse_all: bool,
  134. ) -> str:
  135. parts: list[str] = []
  136. text_buffer: list[str] = []
  137. def flush_text_buffer() -> None:
  138. if text_buffer:
  139. parts.append(render_text_block(text_buffer))
  140. text_buffer.clear()
  141. for entry in node.entries:
  142. if isinstance(entry, str):
  143. text_buffer.append(entry)
  144. continue
  145. child = entry
  146. if child.is_fold:
  147. flush_text_buffer()
  148. title = child.title or ""
  149. is_collapsed = should_collapse(
  150. title=title,
  151. collapse_prefixes=collapse_prefixes,
  152. collapse_keywords=collapse_keywords,
  153. collapse_all=collapse_all,
  154. )
  155. folded_class = "fold tool-fold" if is_collapsed else "fold normal-fold"
  156. open_attr = "" if is_collapsed else " open"
  157. display_title = enrich_fold_title(title)
  158. inner = render_node(
  159. child,
  160. collapse_prefixes=collapse_prefixes,
  161. collapse_keywords=collapse_keywords,
  162. collapse_all=collapse_all,
  163. )
  164. parts.append(
  165. f'<details class="{folded_class}"{open_attr}>'
  166. f'<summary>{html.escape(display_title)}</summary>'
  167. f"{inner}"
  168. "</details>"
  169. )
  170. flush_text_buffer()
  171. return "".join(parts)
  172. def build_html(body: str, source_name: str) -> str:
  173. return f"""<!doctype html>
  174. <html lang="zh-CN">
  175. <head>
  176. <meta charset="UTF-8" />
  177. <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  178. <title>Run Log 可视化 - {html.escape(source_name)}</title>
  179. <style>
  180. :root {{
  181. --bg: #0b1020;
  182. --panel: #131a2a;
  183. --text: #e8edf7;
  184. --muted: #98a2b3;
  185. --accent: #6ea8fe;
  186. --border: #263146;
  187. }}
  188. * {{
  189. box-sizing: border-box;
  190. }}
  191. body {{
  192. margin: 0;
  193. background: var(--bg);
  194. color: var(--text);
  195. font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif;
  196. }}
  197. .wrap {{
  198. max-width: 1200px;
  199. margin: 0 auto;
  200. padding: 20px;
  201. }}
  202. .header {{
  203. margin-bottom: 14px;
  204. display: flex;
  205. align-items: center;
  206. gap: 10px;
  207. flex-wrap: wrap;
  208. }}
  209. .title {{
  210. font-size: 18px;
  211. font-weight: 700;
  212. }}
  213. .source {{
  214. color: var(--muted);
  215. font-size: 13px;
  216. }}
  217. button {{
  218. border: 1px solid var(--border);
  219. background: var(--panel);
  220. color: var(--text);
  221. padding: 6px 10px;
  222. border-radius: 8px;
  223. cursor: pointer;
  224. }}
  225. button:hover {{
  226. border-color: var(--accent);
  227. color: var(--accent);
  228. }}
  229. .content {{
  230. background: var(--panel);
  231. border: 1px solid var(--border);
  232. border-radius: 10px;
  233. padding: 10px;
  234. }}
  235. details {{
  236. margin: 6px 0;
  237. border: 1px solid var(--border);
  238. border-radius: 8px;
  239. background: rgba(255, 255, 255, 0.01);
  240. }}
  241. details > summary {{
  242. cursor: pointer;
  243. padding: 8px 10px;
  244. font-size: 13px;
  245. list-style: none;
  246. user-select: none;
  247. color: #cdd6e5;
  248. }}
  249. details > summary::-webkit-details-marker {{
  250. display: none;
  251. }}
  252. details > summary::before {{
  253. content: "▶";
  254. display: inline-block;
  255. margin-right: 6px;
  256. transform: rotate(0deg);
  257. transition: transform 120ms ease;
  258. color: var(--muted);
  259. }}
  260. details[open] > summary::before {{
  261. transform: rotate(90deg);
  262. }}
  263. .tool-fold > summary {{
  264. color: #f6cf76;
  265. }}
  266. .log-text {{
  267. margin: 0;
  268. padding: 10px;
  269. border-top: 1px dashed var(--border);
  270. color: var(--text);
  271. white-space: pre-wrap;
  272. word-break: break-word;
  273. line-height: 1.4;
  274. font-size: 13px;
  275. font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
  276. }}
  277. </style>
  278. </head>
  279. <body>
  280. <div class="wrap">
  281. <div class="header">
  282. <div class="title">Run Log 可视化</div>
  283. <div class="source">{html.escape(source_name)}</div>
  284. <button id="expand-tools">展开全部工具调用</button>
  285. <button id="collapse-tools">折叠全部工具调用</button>
  286. </div>
  287. <div class="content">{body}</div>
  288. </div>
  289. <script>
  290. const toolFolds = Array.from(document.querySelectorAll("details.tool-fold"));
  291. document.getElementById("expand-tools").addEventListener("click", () => {{
  292. toolFolds.forEach((el) => (el.open = true));
  293. }});
  294. document.getElementById("collapse-tools").addEventListener("click", () => {{
  295. toolFolds.forEach((el) => (el.open = false));
  296. }});
  297. </script>
  298. </body>
  299. </html>
  300. """
  301. def generate_html(
  302. input_path: Path,
  303. output_path: Path,
  304. collapse_prefixes: list[str],
  305. collapse_keywords: list[str],
  306. collapse_all: bool = False,
  307. ) -> None:
  308. content = input_path.read_text(encoding="utf-8")
  309. tree = parse_log(content)
  310. body = render_node(
  311. tree,
  312. collapse_prefixes=collapse_prefixes,
  313. collapse_keywords=collapse_keywords,
  314. collapse_all=collapse_all,
  315. )
  316. html_content = build_html(body=body, source_name=input_path.name)
  317. output_path.parent.mkdir(parents=True, exist_ok=True)
  318. output_path.write_text(html_content, encoding="utf-8")
  319. def main() -> None:
  320. input_base = resolve_config_path(INPUT_LOG_PATH)
  321. if input_base.is_file():
  322. input_path = input_base
  323. elif input_base.is_dir():
  324. # 优先渲染最新 run_log_*.txt,其次渲染任意 *.txt
  325. candidates = sorted(
  326. input_base.glob("run_log_*.txt"),
  327. key=lambda p: p.stat().st_mtime,
  328. reverse=True,
  329. )
  330. if not candidates:
  331. candidates = sorted(
  332. input_base.glob("*.txt"),
  333. key=lambda p: p.stat().st_mtime,
  334. reverse=True,
  335. )
  336. if not candidates:
  337. raise FileNotFoundError(f"目录下未找到可渲染日志文件: {input_base}")
  338. input_path = candidates[0]
  339. else:
  340. raise FileNotFoundError(f"输入日志路径不存在: {input_base}")
  341. if OUTPUT_HTML_PATH:
  342. output_path = resolve_config_path(OUTPUT_HTML_PATH)
  343. else:
  344. output_path = input_path.with_suffix(".html")
  345. generate_html(
  346. input_path=input_path,
  347. output_path=output_path,
  348. collapse_prefixes=COLLAPSE_PREFIXES,
  349. collapse_keywords=COLLAPSE_KEYWORDS,
  350. collapse_all=COLLAPSE_ALL_FOLDS,
  351. )
  352. print(f"HTML 已生成: {output_path}")
  353. if __name__ == "__main__":
  354. main()