#!/usr/bin/env python3 """将 run_log 文本渲染为可折叠 HTML 页面。 直接在脚本内修改 INPUT_LOG_PATH / OUTPUT_HTML_PATH 后运行: python examples/piaoquan_needs/render_log_html.py """ from __future__ import annotations import html import os from dataclasses import dataclass, field from pathlib import Path from dotenv import load_dotenv # 加载 examples/content_finder/.env(不依赖你从哪个目录运行) load_dotenv(dotenv_path=Path(__file__).resolve().parent / ".env", override=False) @dataclass class Node: title: str | None = None entries: list[str | "Node"] = field(default_factory=list) @property def is_fold(self) -> bool: return self.title is not None def parse_log(content: str) -> Node: root = Node(title=None) stack: list[Node] = [root] for raw_line in content.splitlines(): line = raw_line.rstrip("\n") tag = line.strip() if tag.startswith("[FOLD:") and tag.endswith("]"): title = tag[len("[FOLD:") : -1] node = Node(title=title) stack[-1].entries.append(node) stack.append(node) continue if tag == "[/FOLD]": # 容错:遇到多余的 [/FOLD] 时,忽略而不是把它当作正文 if len(stack) > 1: stack.pop() continue stack[-1].entries.append(line) while len(stack) > 1: unclosed = stack.pop() # 容错: 遇到缺失 [/FOLD] 时,保留原有内容,不丢日志 stack[-1].entries.append(unclosed) return root DEFAULT_COLLAPSE_PREFIXES = ["🔧", "📥", "📤"] DEFAULT_COLLAPSE_KEYWORDS = ["调用参数", "返回内容"] # 工具功能摘要(静态映射,用于日志可视化展示) TOOL_DESCRIPTION_MAP: dict[str, str] = { "think_and_plan": "系统化记录思考、计划与下一步行动(只记录不获取新信息)。", "douyin_search": "通过关键词在抖音上搜索视频内容。", "douyin_search_tikhub": "通过关键词在抖音上搜索视频内容(Tikhub 接口)。", "douyin_user_videos": "通过账号/作者 sec_uid 获取其历史作品列表。", "get_content_fans_portrait": "获取视频点赞用户画像(热点宝),判断 metadata.has_portrait。", "get_account_fans_portrait": "获取作者粉丝画像(热点宝),用于内容画像缺失兜底。", "store_results_mysql": "将 output.json 写入 MySQL(作者表与内容表)。", "create_crawler_plan_by_douyin_content_id": "为入选视频生成 AIGC 爬取计划。", "create_crawler_plan_by_douyin_account_id": "为入选账号生成 AIGC 爬取计划。", } # ========================= # 运行配置(默认从 .env 读取) # ========================= INPUT_LOG_PATH = os.getenv("INPUT_LOG_PATH", ".cache/input_log") # 设为 None 则默认生成到输入文件同名 .html OUTPUT_HTML_PATH: str | None = os.getenv("OUTPUT_HTML_PATH") or None # 是否默认折叠所有 [FOLD] 块 COLLAPSE_ALL_FOLDS = False # 命中这些前缀/关键词的折叠块默认收起 COLLAPSE_PREFIXES = DEFAULT_COLLAPSE_PREFIXES COLLAPSE_KEYWORDS = DEFAULT_COLLAPSE_KEYWORDS def resolve_config_path(path_str: str) -> Path: """解析配置中的路径,兼容从项目根目录或脚本目录运行。""" raw = Path(path_str).expanduser() if raw.is_absolute(): return raw.resolve() cwd_candidate = (Path.cwd() / raw).resolve() if cwd_candidate.exists(): return cwd_candidate script_dir = Path(__file__).resolve().parent script_candidate = (script_dir / raw).resolve() if script_candidate.exists(): return script_candidate project_root = script_dir.parent.parent project_candidate = (project_root / raw).resolve() if project_candidate.exists(): return project_candidate # 如果都不存在,返回项目根拼接结果,便于报错信息更稳定 return project_candidate def should_collapse( title: str, collapse_prefixes: list[str], collapse_keywords: list[str], collapse_all: bool, ) -> bool: if collapse_all: return True if any(title.startswith(prefix) for prefix in collapse_prefixes): return True return any(keyword in title for keyword in collapse_keywords) def render_text_block(lines: list[str]) -> str: if not lines: return "" normalized = lines[:] while normalized and normalized[0].strip() == "": normalized.pop(0) while normalized and normalized[-1].strip() == "": normalized.pop() if not normalized: return "" compact: list[str] = [] empty_streak = 0 for line in normalized: if line.strip() == "": empty_streak += 1 if empty_streak <= 1: compact.append("") else: empty_streak = 0 compact.append(line) escaped = html.escape("\n".join(compact)) return f'
{escaped}
' def enrich_fold_title(title: str) -> str: """为工具调用标题附加工具功能描述。""" tool_prefix = "🔧 " if not title.startswith(tool_prefix): return title tool_name = title[len(tool_prefix):].strip() description = TOOL_DESCRIPTION_MAP.get(tool_name) if not description: return title return f"{tool_prefix}{tool_name}({description})" def render_node( node: Node, collapse_prefixes: list[str], collapse_keywords: list[str], collapse_all: bool, ) -> str: parts: list[str] = [] text_buffer: list[str] = [] def flush_text_buffer() -> None: if text_buffer: parts.append(render_text_block(text_buffer)) text_buffer.clear() for entry in node.entries: if isinstance(entry, str): text_buffer.append(entry) continue child = entry if child.is_fold: flush_text_buffer() title = child.title or "" is_collapsed = should_collapse( title=title, collapse_prefixes=collapse_prefixes, collapse_keywords=collapse_keywords, collapse_all=collapse_all, ) folded_class = "fold tool-fold" if is_collapsed else "fold normal-fold" open_attr = "" if is_collapsed else " open" display_title = enrich_fold_title(title) inner = render_node( child, collapse_prefixes=collapse_prefixes, collapse_keywords=collapse_keywords, collapse_all=collapse_all, ) parts.append( f'
' f'{html.escape(display_title)}' f"{inner}" "
" ) flush_text_buffer() return "".join(parts) def build_html(body: str, source_name: str) -> str: return f""" Run Log 可视化 - {html.escape(source_name)}
Run Log 可视化
{html.escape(source_name)}
{body}
""" def generate_html( input_path: Path, output_path: Path, collapse_prefixes: list[str], collapse_keywords: list[str], collapse_all: bool = False, ) -> None: content = input_path.read_text(encoding="utf-8") tree = parse_log(content) body = render_node( tree, collapse_prefixes=collapse_prefixes, collapse_keywords=collapse_keywords, collapse_all=collapse_all, ) html_content = build_html(body=body, source_name=input_path.name) output_path.parent.mkdir(parents=True, exist_ok=True) output_path.write_text(html_content, encoding="utf-8") def main() -> None: input_base = resolve_config_path(INPUT_LOG_PATH) if input_base.is_file(): input_path = input_base elif input_base.is_dir(): # 优先渲染最新 run_log_*.txt,其次渲染任意 *.txt candidates = sorted( input_base.glob("run_log_*.txt"), key=lambda p: p.stat().st_mtime, reverse=True, ) if not candidates: candidates = sorted( input_base.glob("*.txt"), key=lambda p: p.stat().st_mtime, reverse=True, ) if not candidates: raise FileNotFoundError(f"目录下未找到可渲染日志文件: {input_base}") input_path = candidates[0] else: raise FileNotFoundError(f"输入日志路径不存在: {input_base}") if OUTPUT_HTML_PATH: output_path = resolve_config_path(OUTPUT_HTML_PATH) else: output_path = input_path.with_suffix(".html") generate_html( input_path=input_path, output_path=output_path, collapse_prefixes=COLLAPSE_PREFIXES, collapse_keywords=COLLAPSE_KEYWORDS, collapse_all=COLLAPSE_ALL_FOLDS, ) print(f"HTML 已生成: {output_path}") if __name__ == "__main__": main()