render_log_html.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. #!/usr/bin/env python3
  2. """将 run_log 文本渲染为可折叠 HTML 页面。
  3. 直接在脚本内修改 INPUT_LOG_PATH / OUTPUT_HTML_PATH 后运行:
  4. python examples/piaoquan_needs/render_log_html.py
  5. """
  6. from __future__ import annotations
  7. import html
  8. import logging
  9. import os
  10. from dataclasses import dataclass, field
  11. from pathlib import Path
  12. from dotenv import load_dotenv
  13. # 加载 examples/content_finder/.env(不依赖你从哪个目录运行)
  14. load_dotenv(dotenv_path=Path(__file__).resolve().parent / ".env", override=False)
  15. @dataclass
  16. class Node:
  17. title: str | None = None
  18. entries: list[str | "Node"] = field(default_factory=list)
  19. @property
  20. def is_fold(self) -> bool:
  21. return self.title is not None
  22. def parse_log(content: str) -> Node:
  23. root = Node(title=None)
  24. stack: list[Node] = [root]
  25. for raw_line in content.splitlines():
  26. line = raw_line.rstrip("\n")
  27. tag = line.strip()
  28. if tag.startswith("[FOLD:") and tag.endswith("]"):
  29. title = tag[len("[FOLD:") : -1]
  30. node = Node(title=title)
  31. stack[-1].entries.append(node)
  32. stack.append(node)
  33. continue
  34. if tag == "[/FOLD]":
  35. # 容错:遇到多余的 [/FOLD] 时,忽略而不是把它当作正文
  36. if len(stack) > 1:
  37. stack.pop()
  38. continue
  39. stack[-1].entries.append(line)
  40. while len(stack) > 1:
  41. unclosed = stack.pop()
  42. # 容错: 遇到缺失 [/FOLD] 时,保留原有内容,不丢日志
  43. stack[-1].entries.append(unclosed)
  44. return root
  45. DEFAULT_COLLAPSE_PREFIXES = ["🔧", "📥", "📤"]
  46. DEFAULT_COLLAPSE_KEYWORDS = ["调用参数", "返回内容"]
  47. # 工具功能摘要(静态映射,用于日志可视化展示)
  48. TOOL_DESCRIPTION_MAP: dict[str, str] = {
  49. "think_and_plan": "系统化记录思考、计划与下一步行动(只记录不获取新信息)。",
  50. "douyin_search": "通过关键词在抖音上搜索视频内容。",
  51. "douyin_search_tikhub": "通过关键词在抖音上搜索视频内容(Tikhub 接口)。",
  52. "douyin_user_videos": "通过账号/作者 sec_uid 获取其历史作品列表。",
  53. "get_content_fans_portrait": "获取视频点赞用户画像(热点宝),判断 metadata.has_portrait。",
  54. "get_account_fans_portrait": "获取作者粉丝画像(热点宝),用于内容画像缺失兜底。",
  55. "store_results_mysql": "将 output.json 写入 MySQL(作者表与内容表)。",
  56. "create_crawler_plan_by_douyin_content_id": "为入选视频生成 AIGC 爬取计划。",
  57. "create_crawler_plan_by_douyin_account_id": "为入选账号生成 AIGC 爬取计划。",
  58. }
  59. # =========================
  60. # 运行配置(默认从 .env 读取)
  61. # =========================
  62. INPUT_LOG_PATH = os.getenv("INPUT_LOG_PATH", ".cache/input_log")
  63. # 设为 None 则默认生成到输入文件同名 .html
  64. OUTPUT_HTML_PATH: str | None = os.getenv("OUTPUT_HTML_PATH") or None
  65. # 是否默认折叠所有 [FOLD] 块
  66. COLLAPSE_ALL_FOLDS = False
  67. # 命中这些前缀/关键词的折叠块默认收起
  68. COLLAPSE_PREFIXES = DEFAULT_COLLAPSE_PREFIXES
  69. COLLAPSE_KEYWORDS = DEFAULT_COLLAPSE_KEYWORDS
  70. logger = logging.getLogger(__name__)
  71. def resolve_config_path(path_str: str) -> Path:
  72. """解析配置中的路径,兼容从项目根目录或脚本目录运行。"""
  73. raw = Path(path_str).expanduser()
  74. if raw.is_absolute():
  75. return raw.resolve()
  76. cwd_candidate = (Path.cwd() / raw).resolve()
  77. if cwd_candidate.exists():
  78. return cwd_candidate
  79. script_dir = Path(__file__).resolve().parent
  80. script_candidate = (script_dir / raw).resolve()
  81. if script_candidate.exists():
  82. return script_candidate
  83. project_root = script_dir.parent.parent
  84. project_candidate = (project_root / raw).resolve()
  85. if project_candidate.exists():
  86. return project_candidate
  87. # 如果都不存在,返回项目根拼接结果,便于报错信息更稳定
  88. return project_candidate
  89. def should_collapse(
  90. title: str,
  91. collapse_prefixes: list[str],
  92. collapse_keywords: list[str],
  93. collapse_all: bool,
  94. ) -> bool:
  95. if collapse_all:
  96. return True
  97. if any(title.startswith(prefix) for prefix in collapse_prefixes):
  98. return True
  99. return any(keyword in title for keyword in collapse_keywords)
  100. def render_text_block(lines: list[str]) -> str:
  101. if not lines:
  102. return ""
  103. normalized = lines[:]
  104. while normalized and normalized[0].strip() == "":
  105. normalized.pop(0)
  106. while normalized and normalized[-1].strip() == "":
  107. normalized.pop()
  108. if not normalized:
  109. return ""
  110. compact: list[str] = []
  111. empty_streak = 0
  112. for line in normalized:
  113. if line.strip() == "":
  114. empty_streak += 1
  115. if empty_streak <= 1:
  116. compact.append("")
  117. else:
  118. empty_streak = 0
  119. compact.append(line)
  120. escaped = html.escape("\n".join(compact))
  121. return f'<pre class="log-text">{escaped}</pre>'
  122. def enrich_fold_title(title: str) -> str:
  123. """为工具调用标题附加工具功能描述。"""
  124. tool_prefix = "🔧 "
  125. if not title.startswith(tool_prefix):
  126. return title
  127. tool_name = title[len(tool_prefix):].strip()
  128. description = TOOL_DESCRIPTION_MAP.get(tool_name)
  129. if not description:
  130. return title
  131. return f"{tool_prefix}{tool_name}({description})"
  132. def render_node(
  133. node: Node,
  134. collapse_prefixes: list[str],
  135. collapse_keywords: list[str],
  136. collapse_all: bool,
  137. ) -> str:
  138. parts: list[str] = []
  139. text_buffer: list[str] = []
  140. def flush_text_buffer() -> None:
  141. if text_buffer:
  142. parts.append(render_text_block(text_buffer))
  143. text_buffer.clear()
  144. for entry in node.entries:
  145. if isinstance(entry, str):
  146. text_buffer.append(entry)
  147. continue
  148. child = entry
  149. if child.is_fold:
  150. flush_text_buffer()
  151. title = child.title or ""
  152. is_collapsed = should_collapse(
  153. title=title,
  154. collapse_prefixes=collapse_prefixes,
  155. collapse_keywords=collapse_keywords,
  156. collapse_all=collapse_all,
  157. )
  158. folded_class = "fold tool-fold" if is_collapsed else "fold normal-fold"
  159. open_attr = "" if is_collapsed else " open"
  160. display_title = enrich_fold_title(title)
  161. inner = render_node(
  162. child,
  163. collapse_prefixes=collapse_prefixes,
  164. collapse_keywords=collapse_keywords,
  165. collapse_all=collapse_all,
  166. )
  167. parts.append(
  168. f'<details class="{folded_class}"{open_attr}>'
  169. f'<summary>{html.escape(display_title)}</summary>'
  170. f"{inner}"
  171. "</details>"
  172. )
  173. flush_text_buffer()
  174. return "".join(parts)
  175. def build_html(body: str, source_name: str) -> str:
  176. return f"""<!doctype html>
  177. <html lang="zh-CN">
  178. <head>
  179. <meta charset="UTF-8" />
  180. <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  181. <title>Run Log 可视化 - {html.escape(source_name)}</title>
  182. <style>
  183. :root {{
  184. --bg: #0b1020;
  185. --panel: #131a2a;
  186. --text: #e8edf7;
  187. --muted: #98a2b3;
  188. --accent: #6ea8fe;
  189. --border: #263146;
  190. }}
  191. * {{
  192. box-sizing: border-box;
  193. }}
  194. body {{
  195. margin: 0;
  196. background: var(--bg);
  197. color: var(--text);
  198. font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif;
  199. }}
  200. .wrap {{
  201. max-width: 1200px;
  202. margin: 0 auto;
  203. padding: 20px;
  204. }}
  205. .header {{
  206. margin-bottom: 14px;
  207. display: flex;
  208. align-items: center;
  209. gap: 10px;
  210. flex-wrap: wrap;
  211. }}
  212. .title {{
  213. font-size: 18px;
  214. font-weight: 700;
  215. }}
  216. .source {{
  217. color: var(--muted);
  218. font-size: 13px;
  219. }}
  220. button {{
  221. border: 1px solid var(--border);
  222. background: var(--panel);
  223. color: var(--text);
  224. padding: 6px 10px;
  225. border-radius: 8px;
  226. cursor: pointer;
  227. }}
  228. button:hover {{
  229. border-color: var(--accent);
  230. color: var(--accent);
  231. }}
  232. .content {{
  233. background: var(--panel);
  234. border: 1px solid var(--border);
  235. border-radius: 10px;
  236. padding: 10px;
  237. }}
  238. details {{
  239. margin: 6px 0;
  240. border: 1px solid var(--border);
  241. border-radius: 8px;
  242. background: rgba(255, 255, 255, 0.01);
  243. }}
  244. details > summary {{
  245. cursor: pointer;
  246. padding: 8px 10px;
  247. font-size: 13px;
  248. list-style: none;
  249. user-select: none;
  250. color: #cdd6e5;
  251. }}
  252. details > summary::-webkit-details-marker {{
  253. display: none;
  254. }}
  255. details > summary::before {{
  256. content: "▶";
  257. display: inline-block;
  258. margin-right: 6px;
  259. transform: rotate(0deg);
  260. transition: transform 120ms ease;
  261. color: var(--muted);
  262. }}
  263. details[open] > summary::before {{
  264. transform: rotate(90deg);
  265. }}
  266. .tool-fold > summary {{
  267. color: #f6cf76;
  268. }}
  269. .log-text {{
  270. margin: 0;
  271. padding: 10px;
  272. border-top: 1px dashed var(--border);
  273. color: var(--text);
  274. white-space: pre-wrap;
  275. word-break: break-word;
  276. line-height: 1.4;
  277. font-size: 13px;
  278. font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
  279. }}
  280. </style>
  281. </head>
  282. <body>
  283. <div class="wrap">
  284. <div class="header">
  285. <div class="title">Run Log 可视化</div>
  286. <div class="source">{html.escape(source_name)}</div>
  287. <button id="expand-tools">展开全部工具调用</button>
  288. <button id="collapse-tools">折叠全部工具调用</button>
  289. </div>
  290. <div class="content">{body}</div>
  291. </div>
  292. <script>
  293. const toolFolds = Array.from(document.querySelectorAll("details.tool-fold"));
  294. document.getElementById("expand-tools").addEventListener("click", () => {{
  295. toolFolds.forEach((el) => (el.open = true));
  296. }});
  297. document.getElementById("collapse-tools").addEventListener("click", () => {{
  298. toolFolds.forEach((el) => (el.open = false));
  299. }});
  300. </script>
  301. </body>
  302. </html>
  303. """
  304. def generate_html(
  305. input_path: Path,
  306. output_path: Path,
  307. collapse_prefixes: list[str],
  308. collapse_keywords: list[str],
  309. collapse_all: bool = False,
  310. ) -> None:
  311. content = input_path.read_text(encoding="utf-8")
  312. tree = parse_log(content)
  313. body = render_node(
  314. tree,
  315. collapse_prefixes=collapse_prefixes,
  316. collapse_keywords=collapse_keywords,
  317. collapse_all=collapse_all,
  318. )
  319. html_content = build_html(body=body, source_name=input_path.name)
  320. output_path.parent.mkdir(parents=True, exist_ok=True)
  321. output_path.write_text(html_content, encoding="utf-8")
  322. def render_log_html_and_upload(*, trace_id: str, log_file_path: Path) -> str | None:
  323. """
  324. 将 log.txt 渲染为 HTML 并上传 OSS。
  325. - 生成文件:与 log.txt 同目录的 log.html
  326. - 上传:使用 utils/oss_upload.upload_html_to_oss
  327. Returns:
  328. 上传成功返回公网 URL;失败返回 None(不抛出异常,便于上层不影响主流程)
  329. """
  330. tid = (trace_id or "").strip()
  331. if not tid:
  332. return None
  333. if not log_file_path.exists():
  334. return None
  335. html_path = log_file_path.with_name("log.html")
  336. try:
  337. generate_html(
  338. input_path=log_file_path,
  339. output_path=html_path,
  340. collapse_prefixes=COLLAPSE_PREFIXES,
  341. collapse_keywords=COLLAPSE_KEYWORDS,
  342. collapse_all=COLLAPSE_ALL_FOLDS,
  343. )
  344. except Exception as e:
  345. logger.warning("render log.html failed: trace_id=%s err=%s", tid, e)
  346. return None
  347. try:
  348. from utils.oss_upload import upload_html_to_oss
  349. url = upload_html_to_oss(html_path, task_id=tid)
  350. # 回写 MySQL:demand_find_content_result.web_html_url
  351. try:
  352. from db import update_web_html_url
  353. update_web_html_url(trace_id=tid, web_html_url=url)
  354. except Exception as e:
  355. logger.warning("update web_html_url failed: trace_id=%s err=%s", tid, e)
  356. return url
  357. except Exception as e:
  358. logger.warning("upload log.html failed: trace_id=%s err=%s", tid, e)
  359. return None
  360. def main() -> None:
  361. input_base = resolve_config_path(INPUT_LOG_PATH)
  362. if input_base.is_file():
  363. input_path = input_base
  364. elif input_base.is_dir():
  365. # 优先渲染最新 run_log_*.txt,其次渲染任意 *.txt
  366. candidates = sorted(
  367. input_base.glob("run_log_*.txt"),
  368. key=lambda p: p.stat().st_mtime,
  369. reverse=True,
  370. )
  371. if not candidates:
  372. candidates = sorted(
  373. input_base.glob("*.txt"),
  374. key=lambda p: p.stat().st_mtime,
  375. reverse=True,
  376. )
  377. if not candidates:
  378. raise FileNotFoundError(f"目录下未找到可渲染日志文件: {input_base}")
  379. input_path = candidates[0]
  380. else:
  381. raise FileNotFoundError(f"输入日志路径不存在: {input_base}")
  382. if OUTPUT_HTML_PATH:
  383. output_path = resolve_config_path(OUTPUT_HTML_PATH)
  384. else:
  385. output_path = input_path.with_suffix(".html")
  386. generate_html(
  387. input_path=input_path,
  388. output_path=output_path,
  389. collapse_prefixes=COLLAPSE_PREFIXES,
  390. collapse_keywords=COLLAPSE_KEYWORDS,
  391. collapse_all=COLLAPSE_ALL_FOLDS,
  392. )
  393. print(f"HTML 已生成: {output_path}")
  394. if __name__ == "__main__":
  395. main()