render_log_html.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757
  1. #!/usr/bin/env python3
  2. """将 run_log 文本渲染为可折叠 HTML 页面。
  3. 直接在脚本内修改 INPUT_LOG_PATH / OUTPUT_HTML_PATH 后运行:
  4. python examples/piaoquan_needs/render_log_html.py
  5. """
  6. from __future__ import annotations
  7. import argparse
  8. import json
  9. import html
  10. import logging
  11. import os
  12. from dataclasses import dataclass, field
  13. from pathlib import Path
  14. from dotenv import load_dotenv
  15. # 加载 examples/content_finder/.env(不依赖你从哪个目录运行)
  16. load_dotenv(dotenv_path=Path(__file__).resolve().parent / ".env", override=False)
  17. @dataclass
  18. class Node:
  19. title: str | None = None
  20. entries: list[str | "Node"] = field(default_factory=list)
  21. @property
  22. def is_fold(self) -> bool:
  23. return self.title is not None
  24. def parse_log(content: str) -> Node:
  25. root = Node(title=None)
  26. stack: list[Node] = [root]
  27. for raw_line in content.splitlines():
  28. line = raw_line.rstrip("\n")
  29. tag = line.strip()
  30. if tag.startswith("[FOLD:") and tag.endswith("]"):
  31. title = tag[len("[FOLD:") : -1]
  32. node = Node(title=title)
  33. stack[-1].entries.append(node)
  34. stack.append(node)
  35. continue
  36. if tag == "[/FOLD]":
  37. # 容错:遇到多余的 [/FOLD] 时,忽略而不是把它当作正文
  38. if len(stack) > 1:
  39. stack.pop()
  40. continue
  41. stack[-1].entries.append(line)
  42. while len(stack) > 1:
  43. unclosed = stack.pop()
  44. # 容错: 遇到缺失 [/FOLD] 时,保留原有内容,不丢日志
  45. stack[-1].entries.append(unclosed)
  46. return root
  47. DEFAULT_COLLAPSE_PREFIXES = ["🔧", "📥", "📤"]
  48. DEFAULT_COLLAPSE_KEYWORDS = ["调用参数", "返回内容"]
  49. # 工具功能摘要(静态映射,用于日志可视化展示)
  50. TOOL_DESCRIPTION_MAP: dict[str, str] = {
  51. "think_and_plan": "系统化记录思考、计划与下一步行动(只记录不获取新信息)。",
  52. "douyin_search": "通过关键词在抖音上搜索视频内容。",
  53. "douyin_search_tikhub": "通过关键词在抖音上搜索视频内容(Tikhub 接口)。",
  54. "douyin_user_videos": "通过账号/作者 sec_uid 获取其历史作品列表。",
  55. "get_content_fans_portrait": "获取视频点赞用户画像(热点宝),判断 metadata.has_portrait。",
  56. "get_account_fans_portrait": "获取作者粉丝画像(热点宝),用于内容画像缺失兜底。",
  57. "store_results_mysql": "将 output.json 写入 MySQL(作者表与内容表)。",
  58. "create_crawler_plan_by_douyin_content_id": "为入选视频生成 AIGC 爬取计划。",
  59. "create_crawler_plan_by_douyin_account_id": "为入选账号生成 AIGC 爬取计划。",
  60. }
  61. # =========================
  62. # 运行配置(默认从 .env 读取)
  63. # =========================
  64. INPUT_LOG_PATH = os.getenv("INPUT_LOG_PATH", ".cache/input_log")
  65. # 设为 None 则默认生成到输入文件同名 .html
  66. OUTPUT_HTML_PATH: str | None = os.getenv("OUTPUT_HTML_PATH") or None
  67. # 产物输出目录(content_finder 的标准 output 目录)
  68. OUTPUT_DIR = os.getenv("OUTPUT_DIR", ".cache/output")
  69. # 置顶摘要表格数据源(可选)。不填则默认取 input_log 同目录下的 process_trace.json / output.json
  70. PROCESS_TRACE_PATH: str | None = os.getenv("PROCESS_TRACE_PATH") or None
  71. OUTPUT_JSON_PATH: str | None = os.getenv("OUTPUT_JSON_PATH") or None
  72. # 如果未显式指定 PROCESS_TRACE_PATH/OUTPUT_JSON_PATH,且同目录不存在文件,则尝试从该 trace_id 推导 .cache/output/{trace_id}/...
  73. TRACE_ID: str | None = os.getenv("TRACE_ID") or None
  74. # 是否默认折叠所有 [FOLD] 块
  75. COLLAPSE_ALL_FOLDS = False
  76. # 命中这些前缀/关键词的折叠块默认收起
  77. COLLAPSE_PREFIXES = DEFAULT_COLLAPSE_PREFIXES
  78. COLLAPSE_KEYWORDS = DEFAULT_COLLAPSE_KEYWORDS
  79. logger = logging.getLogger(__name__)
  80. def resolve_config_path(path_str: str) -> Path:
  81. """解析配置中的路径,兼容从项目根目录或脚本目录运行。"""
  82. raw = Path(path_str).expanduser()
  83. if raw.is_absolute():
  84. return raw.resolve()
  85. cwd_candidate = (Path.cwd() / raw).resolve()
  86. if cwd_candidate.exists():
  87. return cwd_candidate
  88. script_dir = Path(__file__).resolve().parent
  89. script_candidate = (script_dir / raw).resolve()
  90. if script_candidate.exists():
  91. return script_candidate
  92. project_root = script_dir.parent.parent
  93. project_candidate = (project_root / raw).resolve()
  94. if project_candidate.exists():
  95. return project_candidate
  96. # 如果都不存在,返回项目根拼接结果,便于报错信息更稳定
  97. return project_candidate
  98. def should_collapse(
  99. title: str,
  100. collapse_prefixes: list[str],
  101. collapse_keywords: list[str],
  102. collapse_all: bool,
  103. ) -> bool:
  104. if collapse_all:
  105. return True
  106. if any(title.startswith(prefix) for prefix in collapse_prefixes):
  107. return True
  108. return any(keyword in title for keyword in collapse_keywords)
  109. def render_text_block(lines: list[str]) -> str:
  110. if not lines:
  111. return ""
  112. normalized = lines[:]
  113. while normalized and normalized[0].strip() == "":
  114. normalized.pop(0)
  115. while normalized and normalized[-1].strip() == "":
  116. normalized.pop()
  117. if not normalized:
  118. return ""
  119. compact: list[str] = []
  120. empty_streak = 0
  121. for line in normalized:
  122. if line.strip() == "":
  123. empty_streak += 1
  124. if empty_streak <= 1:
  125. compact.append("")
  126. else:
  127. empty_streak = 0
  128. compact.append(line)
  129. escaped = html.escape("\n".join(compact))
  130. return f'<pre class="log-text">{escaped}</pre>'
  131. def enrich_fold_title(title: str) -> str:
  132. """为工具调用标题附加工具功能描述。"""
  133. tool_prefix = "🔧 "
  134. if not title.startswith(tool_prefix):
  135. return title
  136. tool_name = title[len(tool_prefix):].strip()
  137. description = TOOL_DESCRIPTION_MAP.get(tool_name)
  138. if not description:
  139. return title
  140. return f"{tool_prefix}{tool_name}({description})"
  141. def render_node(
  142. node: Node,
  143. collapse_prefixes: list[str],
  144. collapse_keywords: list[str],
  145. collapse_all: bool,
  146. ) -> str:
  147. parts: list[str] = []
  148. text_buffer: list[str] = []
  149. def flush_text_buffer() -> None:
  150. if text_buffer:
  151. parts.append(render_text_block(text_buffer))
  152. text_buffer.clear()
  153. for entry in node.entries:
  154. if isinstance(entry, str):
  155. text_buffer.append(entry)
  156. continue
  157. child = entry
  158. if child.is_fold:
  159. flush_text_buffer()
  160. title = child.title or ""
  161. is_collapsed = should_collapse(
  162. title=title,
  163. collapse_prefixes=collapse_prefixes,
  164. collapse_keywords=collapse_keywords,
  165. collapse_all=collapse_all,
  166. )
  167. folded_class = "fold tool-fold" if is_collapsed else "fold normal-fold"
  168. open_attr = "" if is_collapsed else " open"
  169. display_title = enrich_fold_title(title)
  170. inner = render_node(
  171. child,
  172. collapse_prefixes=collapse_prefixes,
  173. collapse_keywords=collapse_keywords,
  174. collapse_all=collapse_all,
  175. )
  176. parts.append(
  177. f'<details class="{folded_class}"{open_attr}>'
  178. f'<summary>{html.escape(display_title)}</summary>'
  179. f"{inner}"
  180. "</details>"
  181. )
  182. flush_text_buffer()
  183. return "".join(parts)
  184. def _safe_str(v: object) -> str:
  185. if v is None:
  186. return ""
  187. if isinstance(v, (str, int, float, bool)):
  188. return str(v)
  189. return json.dumps(v, ensure_ascii=False)
  190. def _truncate(s: str, max_len: int) -> str:
  191. s = s or ""
  192. if len(s) <= max_len:
  193. return s
  194. return s[: max(0, max_len - 1)] + "…"
  195. def _read_json_file(path: Path) -> dict:
  196. return json.loads(path.read_text(encoding="utf-8"))
  197. def _build_aweme_id_to_video_url(output_json_path: Path) -> dict[str, str]:
  198. """
  199. 从 output.json 的 contents[] 构建 {aweme_id: video_url} 映射。
  200. 约定:
  201. - output.json 中每条 content 都包含 aweme_id 与 video_url(字符串)
  202. """
  203. data = _read_json_file(output_json_path)
  204. contents = data.get("contents") or []
  205. if not isinstance(contents, list):
  206. return {}
  207. mapping: dict[str, str] = {}
  208. for item in contents:
  209. if not isinstance(item, dict):
  210. continue
  211. aweme_id = _safe_str(item.get("aweme_id")).strip()
  212. video_url = _safe_str(item.get("video_url")).strip()
  213. if aweme_id and video_url:
  214. mapping[aweme_id] = video_url
  215. return mapping
  216. def _build_process_trace_table_html(*, process_trace_path: Path, output_json_path: Path) -> str:
  217. """
  218. 生成置顶摘要表格。
  219. 数据来源:
  220. - process_trace.json: rows[]
  221. - output.json: contents[],按 aweme_id 补齐 video_url
  222. """
  223. if not process_trace_path.exists() or not output_json_path.exists():
  224. return ""
  225. try:
  226. trace_data = _read_json_file(process_trace_path)
  227. except Exception as e:
  228. logger.warning("read process_trace.json failed: path=%s err=%s", process_trace_path, e)
  229. return ""
  230. rows = trace_data.get("rows") or []
  231. if not isinstance(rows, list) or not rows:
  232. return ""
  233. aweme_to_url: dict[str, str] = {}
  234. try:
  235. aweme_to_url = _build_aweme_id_to_video_url(output_json_path)
  236. except Exception as e:
  237. logger.warning("read output.json failed: path=%s err=%s", output_json_path, e)
  238. headers: list[tuple[str, str]] = [
  239. ("input_features", "特征"),
  240. ("aweme_id", "视频id"),
  241. ("title", "标题"),
  242. ("video_url", "视频链接"),
  243. ("author_nickname", "作者"),
  244. ("from_case_point", "参考点"),
  245. ("strategy_type", "策略"),
  246. ("channel", "渠道"),
  247. ("search_keyword", "搜索词"),
  248. ("decision_basis", "依据"),
  249. ("decision_notes", "理由"),
  250. ]
  251. def td(text: str, *, muted: bool = False, title: str | None = None) -> str:
  252. klass = "cell muted" if muted else "cell"
  253. title_attr = f' title="{html.escape(title)}"' if title else ""
  254. return f'<td class="{klass}"{title_attr}>{html.escape(text)}</td>'
  255. body_parts: list[str] = []
  256. for r in rows:
  257. if not isinstance(r, dict):
  258. continue
  259. aweme_id = _safe_str(r.get("aweme_id")).strip()
  260. video_url = aweme_to_url.get(aweme_id, "")
  261. values: dict[str, str] = {
  262. "strategy_type": _safe_str(r.get("strategy_type")),
  263. "from_case_point": _safe_str(r.get("from_case_point")),
  264. "search_keyword": _safe_str(r.get("search_keyword")),
  265. "aweme_id": aweme_id,
  266. "title": _safe_str(r.get("title")),
  267. "author_nickname": _safe_str(r.get("author_nickname")),
  268. "channel": _safe_str(r.get("channel")),
  269. "decision_basis": _safe_str(r.get("decision_basis")),
  270. "decision_notes": _safe_str(r.get("decision_notes")),
  271. "input_features": _safe_str(r.get("input_features")),
  272. "video_url": video_url,
  273. }
  274. tds: list[str] = []
  275. for key, _label in headers:
  276. val = values.get(key, "")
  277. if key == "decision_notes":
  278. full = val
  279. val = _truncate(val, 80)
  280. tds.append(td(val, title=full))
  281. continue
  282. if key == "title":
  283. full = val
  284. val = _truncate(val, 60)
  285. tds.append(td(val, title=full))
  286. continue
  287. if key == "video_url":
  288. if video_url:
  289. safe_url = html.escape(video_url, quote=True)
  290. tds.append(
  291. '<td class="cell link-cell">'
  292. f'<a class="link" href="{safe_url}" target="_blank" rel="noreferrer">打开</a>'
  293. "</td>"
  294. )
  295. else:
  296. tds.append(td("", muted=True))
  297. continue
  298. tds.append(td(val))
  299. body_parts.append("<tr>" + "".join(tds) + "</tr>")
  300. if not body_parts:
  301. return ""
  302. thead = "".join(f"<th>{html.escape(label)}</th>" for _key, label in headers)
  303. return (
  304. '<div class="summary-panel">'
  305. '<div class="summary-title">过程追踪摘要</div>'
  306. f'<div class="summary-meta">{html.escape(process_trace_path.name)}</div>'
  307. '<div class="table-wrap">'
  308. '<table class="summary-table">'
  309. f"<thead><tr>{thead}</tr></thead>"
  310. f"<tbody>{''.join(body_parts)}</tbody>"
  311. "</table>"
  312. "</div>"
  313. "</div>"
  314. )
  315. def build_html(body: str, source_name: str, *, summary_table_html: str = "") -> str:
  316. return f"""<!doctype html>
  317. <html lang="zh-CN">
  318. <head>
  319. <meta charset="UTF-8" />
  320. <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  321. <title>Run Log 可视化 - {html.escape(source_name)}</title>
  322. <style>
  323. :root {{
  324. --bg: #0b1020;
  325. --panel: #131a2a;
  326. --text: #e8edf7;
  327. --muted: #98a2b3;
  328. --accent: #6ea8fe;
  329. --border: #263146;
  330. }}
  331. * {{
  332. box-sizing: border-box;
  333. }}
  334. body {{
  335. margin: 0;
  336. background: var(--bg);
  337. color: var(--text);
  338. font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, "Noto Sans", sans-serif;
  339. }}
  340. .wrap {{
  341. max-width: 1200px;
  342. margin: 0 auto;
  343. padding: 20px;
  344. }}
  345. .header {{
  346. margin-bottom: 14px;
  347. display: flex;
  348. align-items: center;
  349. gap: 10px;
  350. flex-wrap: wrap;
  351. }}
  352. .title {{
  353. font-size: 18px;
  354. font-weight: 700;
  355. }}
  356. .source {{
  357. color: var(--muted);
  358. font-size: 13px;
  359. }}
  360. button {{
  361. border: 1px solid var(--border);
  362. background: var(--panel);
  363. color: var(--text);
  364. padding: 6px 10px;
  365. border-radius: 8px;
  366. cursor: pointer;
  367. }}
  368. button:hover {{
  369. border-color: var(--accent);
  370. color: var(--accent);
  371. }}
  372. .content {{
  373. background: var(--panel);
  374. border: 1px solid var(--border);
  375. border-radius: 10px;
  376. padding: 10px;
  377. }}
  378. .summary-panel {{
  379. background: rgba(255, 255, 255, 0.02);
  380. border: 1px solid var(--border);
  381. border-radius: 10px;
  382. padding: 12px;
  383. margin-bottom: 12px;
  384. }}
  385. .summary-title {{
  386. font-size: 14px;
  387. font-weight: 700;
  388. margin-bottom: 2px;
  389. letter-spacing: 0.2px;
  390. }}
  391. .summary-meta {{
  392. color: var(--muted);
  393. font-size: 12px;
  394. margin-bottom: 10px;
  395. }}
  396. .table-wrap {{
  397. overflow: auto;
  398. border: 1px solid var(--border);
  399. border-radius: 10px;
  400. background: rgba(0, 0, 0, 0.12);
  401. }}
  402. table.summary-table {{
  403. border-collapse: separate;
  404. border-spacing: 0;
  405. width: 100%;
  406. min-width: 1100px;
  407. font-size: 12px;
  408. }}
  409. table.summary-table thead th {{
  410. position: sticky;
  411. top: 0;
  412. z-index: 1;
  413. text-align: left;
  414. padding: 10px 10px;
  415. background: #111a2b;
  416. border-bottom: 1px solid var(--border);
  417. color: #cdd6e5;
  418. white-space: nowrap;
  419. }}
  420. table.summary-table tbody tr {{
  421. border-bottom: 1px solid rgba(255, 255, 255, 0.04);
  422. }}
  423. table.summary-table tbody tr:nth-child(2n) {{
  424. background: rgba(255, 255, 255, 0.02);
  425. }}
  426. table.summary-table td.cell {{
  427. padding: 9px 10px;
  428. vertical-align: top;
  429. border-bottom: 1px solid rgba(255, 255, 255, 0.04);
  430. color: var(--text);
  431. line-height: 1.35;
  432. }}
  433. table.summary-table td.muted {{
  434. color: var(--muted);
  435. }}
  436. td.link-cell {{
  437. white-space: nowrap;
  438. }}
  439. a.link {{
  440. color: var(--accent);
  441. text-decoration: none;
  442. border: 1px solid rgba(110, 168, 254, 0.35);
  443. padding: 2px 8px;
  444. border-radius: 999px;
  445. display: inline-block;
  446. background: rgba(110, 168, 254, 0.08);
  447. }}
  448. a.link:hover {{
  449. border-color: var(--accent);
  450. background: rgba(110, 168, 254, 0.14);
  451. }}
  452. details {{
  453. margin: 6px 0;
  454. border: 1px solid var(--border);
  455. border-radius: 8px;
  456. background: rgba(255, 255, 255, 0.01);
  457. }}
  458. details > summary {{
  459. cursor: pointer;
  460. padding: 8px 10px;
  461. font-size: 13px;
  462. list-style: none;
  463. user-select: none;
  464. color: #cdd6e5;
  465. }}
  466. details > summary::-webkit-details-marker {{
  467. display: none;
  468. }}
  469. details > summary::before {{
  470. content: "▶";
  471. display: inline-block;
  472. margin-right: 6px;
  473. transform: rotate(0deg);
  474. transition: transform 120ms ease;
  475. color: var(--muted);
  476. }}
  477. details[open] > summary::before {{
  478. transform: rotate(90deg);
  479. }}
  480. .tool-fold > summary {{
  481. color: #f6cf76;
  482. }}
  483. .log-text {{
  484. margin: 0;
  485. padding: 10px;
  486. border-top: 1px dashed var(--border);
  487. color: var(--text);
  488. white-space: pre-wrap;
  489. word-break: break-word;
  490. line-height: 1.4;
  491. font-size: 13px;
  492. font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
  493. }}
  494. </style>
  495. </head>
  496. <body>
  497. <div class="wrap">
  498. <div class="header">
  499. <div class="title">Run Log 可视化</div>
  500. <div class="source">{html.escape(source_name)}</div>
  501. <button id="expand-tools">展开全部工具调用</button>
  502. <button id="collapse-tools">折叠全部工具调用</button>
  503. </div>
  504. {summary_table_html}
  505. <div class="content">{body}</div>
  506. </div>
  507. <script>
  508. const toolFolds = Array.from(document.querySelectorAll("details.tool-fold"));
  509. document.getElementById("expand-tools").addEventListener("click", () => {{
  510. toolFolds.forEach((el) => (el.open = true));
  511. }});
  512. document.getElementById("collapse-tools").addEventListener("click", () => {{
  513. toolFolds.forEach((el) => (el.open = false));
  514. }});
  515. </script>
  516. </body>
  517. </html>
  518. """
  519. def generate_html(
  520. input_path: Path,
  521. output_path: Path,
  522. collapse_prefixes: list[str],
  523. collapse_keywords: list[str],
  524. collapse_all: bool = False,
  525. ) -> None:
  526. content = input_path.read_text(encoding="utf-8")
  527. tree = parse_log(content)
  528. body = render_node(
  529. tree,
  530. collapse_prefixes=collapse_prefixes,
  531. collapse_keywords=collapse_keywords,
  532. collapse_all=collapse_all,
  533. )
  534. if PROCESS_TRACE_PATH:
  535. process_trace_path = resolve_config_path(PROCESS_TRACE_PATH)
  536. else:
  537. process_trace_path = input_path.with_name("process_trace.json")
  538. if OUTPUT_JSON_PATH:
  539. output_json_path = resolve_config_path(OUTPUT_JSON_PATH)
  540. else:
  541. output_json_path = input_path.with_name("output.json")
  542. if TRACE_ID and (not process_trace_path.exists() or not output_json_path.exists()):
  543. trace_dir = resolve_config_path(f".cache/output/{TRACE_ID}")
  544. if not process_trace_path.exists():
  545. process_trace_path = trace_dir / "process_trace.json"
  546. if not output_json_path.exists():
  547. output_json_path = trace_dir / "output.json"
  548. summary_table_html = _build_process_trace_table_html(
  549. process_trace_path=process_trace_path,
  550. output_json_path=output_json_path,
  551. )
  552. html_content = build_html(body=body, source_name=input_path.name, summary_table_html=summary_table_html)
  553. output_path.parent.mkdir(parents=True, exist_ok=True)
  554. output_path.write_text(html_content, encoding="utf-8")
  555. def render_log_html_and_upload(*, trace_id: str, log_file_path: Path) -> str | None:
  556. """
  557. 将 log.txt 渲染为 HTML 并上传 OSS。
  558. - 生成文件:与 log.txt 同目录的 log.html
  559. - 上传:使用 utils/oss_upload.upload_html_to_oss
  560. Returns:
  561. 上传成功返回公网 URL;失败返回 None(不抛出异常,便于上层不影响主流程)
  562. """
  563. tid = (trace_id or "").strip()
  564. if not tid:
  565. return None
  566. if not log_file_path.exists():
  567. return None
  568. html_path = log_file_path.with_name("log.html")
  569. try:
  570. generate_html(
  571. input_path=log_file_path,
  572. output_path=html_path,
  573. collapse_prefixes=COLLAPSE_PREFIXES,
  574. collapse_keywords=COLLAPSE_KEYWORDS,
  575. collapse_all=COLLAPSE_ALL_FOLDS,
  576. )
  577. except Exception as e:
  578. logger.warning("render log.html failed: trace_id=%s err=%s", tid, e)
  579. return None
  580. try:
  581. from utils.oss_upload import upload_html_to_oss
  582. url = upload_html_to_oss(html_path, task_id=tid)
  583. # 回写 MySQL:demand_find_content_result.web_html_url
  584. try:
  585. from db import update_web_html_url
  586. update_web_html_url(trace_id=tid, web_html_url=url)
  587. except Exception as e:
  588. logger.warning("update web_html_url failed: trace_id=%s err=%s", tid, e)
  589. return url
  590. except Exception as e:
  591. logger.warning("upload log.html failed: trace_id=%s err=%s", tid, e)
  592. return None
  593. def _resolve_input_log_path_from_trace_id(*, trace_id: str, output_dir: Path) -> Path:
  594. tid = (trace_id or "").strip()
  595. if not tid:
  596. raise ValueError("trace_id is required")
  597. run_dir = (output_dir / tid).resolve()
  598. if not run_dir.exists():
  599. raise FileNotFoundError(f"OUTPUT_DIR 下未找到 trace_id 目录: {run_dir}")
  600. log_path = run_dir / "log.txt"
  601. if log_path.exists():
  602. return log_path
  603. # 兼容:部分任务可能用 run_log_*.txt 命名
  604. candidates = sorted(
  605. run_dir.glob("run_log_*.txt"),
  606. key=lambda p: p.stat().st_mtime,
  607. reverse=True,
  608. )
  609. if not candidates:
  610. candidates = sorted(
  611. run_dir.glob("*.txt"),
  612. key=lambda p: p.stat().st_mtime,
  613. reverse=True,
  614. )
  615. if not candidates:
  616. raise FileNotFoundError(f"trace_id 目录下未找到可渲染日志文件: {run_dir}")
  617. return candidates[0]
  618. def _resolve_input_log_path_from_input_base(input_base: Path) -> Path:
  619. if input_base.is_file():
  620. return input_base
  621. if input_base.is_dir():
  622. # 优先渲染最新 run_log_*.txt,其次渲染任意 *.txt
  623. candidates = sorted(
  624. input_base.glob("run_log_*.txt"),
  625. key=lambda p: p.stat().st_mtime,
  626. reverse=True,
  627. )
  628. if not candidates:
  629. candidates = sorted(
  630. input_base.glob("*.txt"),
  631. key=lambda p: p.stat().st_mtime,
  632. reverse=True,
  633. )
  634. if not candidates:
  635. raise FileNotFoundError(f"目录下未找到可渲染日志文件: {input_base}")
  636. return candidates[0]
  637. raise FileNotFoundError(f"输入日志路径不存在: {input_base}")
  638. def main(argv: list[str] | None = None) -> None:
  639. parser = argparse.ArgumentParser(description="Render run log text to collapsible HTML.")
  640. parser.add_argument("--trace-id", dest="trace_id", default="", help="trace_id in OUTPUT_DIR/<trace_id>/")
  641. parser.add_argument("trace_id_pos", nargs="?", default="", help="trace_id (positional), same as --trace-id")
  642. args = parser.parse_args(argv)
  643. trace_id = ((args.trace_id or "").strip() or (args.trace_id_pos or "").strip())
  644. if trace_id:
  645. output_dir = resolve_config_path(OUTPUT_DIR)
  646. input_path = _resolve_input_log_path_from_trace_id(trace_id=trace_id, output_dir=output_dir)
  647. output_path = input_path.with_name("log.html")
  648. else:
  649. input_base = resolve_config_path(INPUT_LOG_PATH)
  650. input_path = _resolve_input_log_path_from_input_base(input_base)
  651. if OUTPUT_HTML_PATH:
  652. output_path = resolve_config_path(OUTPUT_HTML_PATH)
  653. else:
  654. output_path = input_path.with_suffix(".html")
  655. generate_html(
  656. input_path=input_path,
  657. output_path=output_path,
  658. collapse_prefixes=COLLAPSE_PREFIXES,
  659. collapse_keywords=COLLAPSE_KEYWORDS,
  660. collapse_all=COLLAPSE_ALL_FOLDS,
  661. )
  662. print(f"HTML 已生成: {output_path}")
  663. if __name__ == "__main__":
  664. main()