|
@@ -7,6 +7,8 @@
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
+import argparse
|
|
|
|
|
+import json
|
|
|
import html
|
|
import html
|
|
|
import logging
|
|
import logging
|
|
|
import os
|
|
import os
|
|
@@ -77,6 +79,13 @@ TOOL_DESCRIPTION_MAP: dict[str, str] = {
|
|
|
INPUT_LOG_PATH = os.getenv("INPUT_LOG_PATH", ".cache/input_log")
|
|
INPUT_LOG_PATH = os.getenv("INPUT_LOG_PATH", ".cache/input_log")
|
|
|
# 设为 None 则默认生成到输入文件同名 .html
|
|
# 设为 None 则默认生成到输入文件同名 .html
|
|
|
OUTPUT_HTML_PATH: str | None = os.getenv("OUTPUT_HTML_PATH") or None
|
|
OUTPUT_HTML_PATH: str | None = os.getenv("OUTPUT_HTML_PATH") or None
|
|
|
|
|
+# 产物输出目录(content_finder 的标准 output 目录)
|
|
|
|
|
+OUTPUT_DIR = os.getenv("OUTPUT_DIR", ".cache/output")
|
|
|
|
|
+# 置顶摘要表格数据源(可选)。不填则默认取 input_log 同目录下的 process_trace.json / output.json
|
|
|
|
|
+PROCESS_TRACE_PATH: str | None = os.getenv("PROCESS_TRACE_PATH") or None
|
|
|
|
|
+OUTPUT_JSON_PATH: str | None = os.getenv("OUTPUT_JSON_PATH") or None
|
|
|
|
|
+# 如果未显式指定 PROCESS_TRACE_PATH/OUTPUT_JSON_PATH,且同目录不存在文件,则尝试从该 trace_id 推导 .cache/output/{trace_id}/...
|
|
|
|
|
+TRACE_ID: str | None = os.getenv("TRACE_ID") or None
|
|
|
# 是否默认折叠所有 [FOLD] 块
|
|
# 是否默认折叠所有 [FOLD] 块
|
|
|
COLLAPSE_ALL_FOLDS = False
|
|
COLLAPSE_ALL_FOLDS = False
|
|
|
# 命中这些前缀/关键词的折叠块默认收起
|
|
# 命中这些前缀/关键词的折叠块默认收起
|
|
@@ -213,7 +222,162 @@ def render_node(
|
|
|
return "".join(parts)
|
|
return "".join(parts)
|
|
|
|
|
|
|
|
|
|
|
|
|
-def build_html(body: str, source_name: str) -> str:
|
|
|
|
|
|
|
+def _safe_str(v: object) -> str:
|
|
|
|
|
+ if v is None:
|
|
|
|
|
+ return ""
|
|
|
|
|
+ if isinstance(v, (str, int, float, bool)):
|
|
|
|
|
+ return str(v)
|
|
|
|
|
+ return json.dumps(v, ensure_ascii=False)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _truncate(s: str, max_len: int) -> str:
|
|
|
|
|
+ s = s or ""
|
|
|
|
|
+ if len(s) <= max_len:
|
|
|
|
|
+ return s
|
|
|
|
|
+ return s[: max(0, max_len - 1)] + "…"
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _read_json_file(path: Path) -> dict:
|
|
|
|
|
+ return json.loads(path.read_text(encoding="utf-8"))
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _build_aweme_id_to_video_url(output_json_path: Path) -> dict[str, str]:
|
|
|
|
|
+ """
|
|
|
|
|
+ 从 output.json 的 contents[] 构建 {aweme_id: video_url} 映射。
|
|
|
|
|
+
|
|
|
|
|
+ 约定:
|
|
|
|
|
+ - output.json 中每条 content 都包含 aweme_id 与 video_url(字符串)
|
|
|
|
|
+ """
|
|
|
|
|
+ data = _read_json_file(output_json_path)
|
|
|
|
|
+ contents = data.get("contents") or []
|
|
|
|
|
+ if not isinstance(contents, list):
|
|
|
|
|
+ return {}
|
|
|
|
|
+
|
|
|
|
|
+ mapping: dict[str, str] = {}
|
|
|
|
|
+ for item in contents:
|
|
|
|
|
+ if not isinstance(item, dict):
|
|
|
|
|
+ continue
|
|
|
|
|
+ aweme_id = _safe_str(item.get("aweme_id")).strip()
|
|
|
|
|
+ video_url = _safe_str(item.get("video_url")).strip()
|
|
|
|
|
+ if aweme_id and video_url:
|
|
|
|
|
+ mapping[aweme_id] = video_url
|
|
|
|
|
+ return mapping
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _build_process_trace_table_html(*, process_trace_path: Path, output_json_path: Path) -> str:
|
|
|
|
|
+ """
|
|
|
|
|
+ 生成置顶摘要表格。
|
|
|
|
|
+
|
|
|
|
|
+ 数据来源:
|
|
|
|
|
+ - process_trace.json: rows[]
|
|
|
|
|
+ - output.json: contents[],按 aweme_id 补齐 video_url
|
|
|
|
|
+ """
|
|
|
|
|
+ if not process_trace_path.exists() or not output_json_path.exists():
|
|
|
|
|
+ return ""
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ trace_data = _read_json_file(process_trace_path)
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.warning("read process_trace.json failed: path=%s err=%s", process_trace_path, e)
|
|
|
|
|
+ return ""
|
|
|
|
|
+
|
|
|
|
|
+ rows = trace_data.get("rows") or []
|
|
|
|
|
+ if not isinstance(rows, list) or not rows:
|
|
|
|
|
+ return ""
|
|
|
|
|
+
|
|
|
|
|
+ aweme_to_url: dict[str, str] = {}
|
|
|
|
|
+ try:
|
|
|
|
|
+ aweme_to_url = _build_aweme_id_to_video_url(output_json_path)
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.warning("read output.json failed: path=%s err=%s", output_json_path, e)
|
|
|
|
|
+
|
|
|
|
|
+ headers: list[tuple[str, str]] = [
|
|
|
|
|
+ ("input_features", "特征"),
|
|
|
|
|
+ ("aweme_id", "视频id"),
|
|
|
|
|
+ ("title", "标题"),
|
|
|
|
|
+ ("video_url", "视频链接"),
|
|
|
|
|
+ ("author_nickname", "作者"),
|
|
|
|
|
+ ("from_case_point", "参考点"),
|
|
|
|
|
+ ("strategy_type", "策略"),
|
|
|
|
|
+ ("channel", "渠道"),
|
|
|
|
|
+ ("search_keyword", "搜索词"),
|
|
|
|
|
+ ("decision_basis", "依据"),
|
|
|
|
|
+ ("decision_notes", "理由"),
|
|
|
|
|
+ ]
|
|
|
|
|
+
|
|
|
|
|
+ def td(text: str, *, muted: bool = False, title: str | None = None) -> str:
|
|
|
|
|
+ klass = "cell muted" if muted else "cell"
|
|
|
|
|
+ title_attr = f' title="{html.escape(title)}"' if title else ""
|
|
|
|
|
+ return f'<td class="{klass}"{title_attr}>{html.escape(text)}</td>'
|
|
|
|
|
+
|
|
|
|
|
+ body_parts: list[str] = []
|
|
|
|
|
+ for r in rows:
|
|
|
|
|
+ if not isinstance(r, dict):
|
|
|
|
|
+ continue
|
|
|
|
|
+ aweme_id = _safe_str(r.get("aweme_id")).strip()
|
|
|
|
|
+ video_url = aweme_to_url.get(aweme_id, "")
|
|
|
|
|
+
|
|
|
|
|
+ values: dict[str, str] = {
|
|
|
|
|
+ "strategy_type": _safe_str(r.get("strategy_type")),
|
|
|
|
|
+ "from_case_point": _safe_str(r.get("from_case_point")),
|
|
|
|
|
+ "search_keyword": _safe_str(r.get("search_keyword")),
|
|
|
|
|
+ "aweme_id": aweme_id,
|
|
|
|
|
+ "title": _safe_str(r.get("title")),
|
|
|
|
|
+ "author_nickname": _safe_str(r.get("author_nickname")),
|
|
|
|
|
+ "channel": _safe_str(r.get("channel")),
|
|
|
|
|
+ "decision_basis": _safe_str(r.get("decision_basis")),
|
|
|
|
|
+ "decision_notes": _safe_str(r.get("decision_notes")),
|
|
|
|
|
+ "input_features": _safe_str(r.get("input_features")),
|
|
|
|
|
+ "video_url": video_url,
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ tds: list[str] = []
|
|
|
|
|
+ for key, _label in headers:
|
|
|
|
|
+ val = values.get(key, "")
|
|
|
|
|
+ if key == "decision_notes":
|
|
|
|
|
+ full = val
|
|
|
|
|
+ val = _truncate(val, 80)
|
|
|
|
|
+ tds.append(td(val, title=full))
|
|
|
|
|
+ continue
|
|
|
|
|
+ if key == "title":
|
|
|
|
|
+ full = val
|
|
|
|
|
+ val = _truncate(val, 60)
|
|
|
|
|
+ tds.append(td(val, title=full))
|
|
|
|
|
+ continue
|
|
|
|
|
+ if key == "video_url":
|
|
|
|
|
+ if video_url:
|
|
|
|
|
+ safe_url = html.escape(video_url, quote=True)
|
|
|
|
|
+ tds.append(
|
|
|
|
|
+ '<td class="cell link-cell">'
|
|
|
|
|
+ f'<a class="link" href="{safe_url}" target="_blank" rel="noreferrer">打开</a>'
|
|
|
|
|
+ "</td>"
|
|
|
|
|
+ )
|
|
|
|
|
+ else:
|
|
|
|
|
+ tds.append(td("", muted=True))
|
|
|
|
|
+ continue
|
|
|
|
|
+ tds.append(td(val))
|
|
|
|
|
+
|
|
|
|
|
+ body_parts.append("<tr>" + "".join(tds) + "</tr>")
|
|
|
|
|
+
|
|
|
|
|
+ if not body_parts:
|
|
|
|
|
+ return ""
|
|
|
|
|
+
|
|
|
|
|
+ thead = "".join(f"<th>{html.escape(label)}</th>" for _key, label in headers)
|
|
|
|
|
+ return (
|
|
|
|
|
+ '<div class="summary-panel">'
|
|
|
|
|
+ '<div class="summary-title">过程追踪摘要</div>'
|
|
|
|
|
+ f'<div class="summary-meta">{html.escape(process_trace_path.name)}</div>'
|
|
|
|
|
+ '<div class="table-wrap">'
|
|
|
|
|
+ '<table class="summary-table">'
|
|
|
|
|
+ f"<thead><tr>{thead}</tr></thead>"
|
|
|
|
|
+ f"<tbody>{''.join(body_parts)}</tbody>"
|
|
|
|
|
+ "</table>"
|
|
|
|
|
+ "</div>"
|
|
|
|
|
+ "</div>"
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def build_html(body: str, source_name: str, *, summary_table_html: str = "") -> str:
|
|
|
return f"""<!doctype html>
|
|
return f"""<!doctype html>
|
|
|
<html lang="zh-CN">
|
|
<html lang="zh-CN">
|
|
|
<head>
|
|
<head>
|
|
@@ -276,6 +440,80 @@ def build_html(body: str, source_name: str) -> str:
|
|
|
border-radius: 10px;
|
|
border-radius: 10px;
|
|
|
padding: 10px;
|
|
padding: 10px;
|
|
|
}}
|
|
}}
|
|
|
|
|
+ .summary-panel {{
|
|
|
|
|
+ background: rgba(255, 255, 255, 0.02);
|
|
|
|
|
+ border: 1px solid var(--border);
|
|
|
|
|
+ border-radius: 10px;
|
|
|
|
|
+ padding: 12px;
|
|
|
|
|
+ margin-bottom: 12px;
|
|
|
|
|
+ }}
|
|
|
|
|
+ .summary-title {{
|
|
|
|
|
+ font-size: 14px;
|
|
|
|
|
+ font-weight: 700;
|
|
|
|
|
+ margin-bottom: 2px;
|
|
|
|
|
+ letter-spacing: 0.2px;
|
|
|
|
|
+ }}
|
|
|
|
|
+ .summary-meta {{
|
|
|
|
|
+ color: var(--muted);
|
|
|
|
|
+ font-size: 12px;
|
|
|
|
|
+ margin-bottom: 10px;
|
|
|
|
|
+ }}
|
|
|
|
|
+ .table-wrap {{
|
|
|
|
|
+ overflow: auto;
|
|
|
|
|
+ border: 1px solid var(--border);
|
|
|
|
|
+ border-radius: 10px;
|
|
|
|
|
+ background: rgba(0, 0, 0, 0.12);
|
|
|
|
|
+ }}
|
|
|
|
|
+ table.summary-table {{
|
|
|
|
|
+ border-collapse: separate;
|
|
|
|
|
+ border-spacing: 0;
|
|
|
|
|
+ width: 100%;
|
|
|
|
|
+ min-width: 1100px;
|
|
|
|
|
+ font-size: 12px;
|
|
|
|
|
+ }}
|
|
|
|
|
+ table.summary-table thead th {{
|
|
|
|
|
+ position: sticky;
|
|
|
|
|
+ top: 0;
|
|
|
|
|
+ z-index: 1;
|
|
|
|
|
+ text-align: left;
|
|
|
|
|
+ padding: 10px 10px;
|
|
|
|
|
+ background: #111a2b;
|
|
|
|
|
+ border-bottom: 1px solid var(--border);
|
|
|
|
|
+ color: #cdd6e5;
|
|
|
|
|
+ white-space: nowrap;
|
|
|
|
|
+ }}
|
|
|
|
|
+ table.summary-table tbody tr {{
|
|
|
|
|
+ border-bottom: 1px solid rgba(255, 255, 255, 0.04);
|
|
|
|
|
+ }}
|
|
|
|
|
+ table.summary-table tbody tr:nth-child(2n) {{
|
|
|
|
|
+ background: rgba(255, 255, 255, 0.02);
|
|
|
|
|
+ }}
|
|
|
|
|
+ table.summary-table td.cell {{
|
|
|
|
|
+ padding: 9px 10px;
|
|
|
|
|
+ vertical-align: top;
|
|
|
|
|
+ border-bottom: 1px solid rgba(255, 255, 255, 0.04);
|
|
|
|
|
+ color: var(--text);
|
|
|
|
|
+ line-height: 1.35;
|
|
|
|
|
+ }}
|
|
|
|
|
+ table.summary-table td.muted {{
|
|
|
|
|
+ color: var(--muted);
|
|
|
|
|
+ }}
|
|
|
|
|
+ td.link-cell {{
|
|
|
|
|
+ white-space: nowrap;
|
|
|
|
|
+ }}
|
|
|
|
|
+ a.link {{
|
|
|
|
|
+ color: var(--accent);
|
|
|
|
|
+ text-decoration: none;
|
|
|
|
|
+ border: 1px solid rgba(110, 168, 254, 0.35);
|
|
|
|
|
+ padding: 2px 8px;
|
|
|
|
|
+ border-radius: 999px;
|
|
|
|
|
+ display: inline-block;
|
|
|
|
|
+ background: rgba(110, 168, 254, 0.08);
|
|
|
|
|
+ }}
|
|
|
|
|
+ a.link:hover {{
|
|
|
|
|
+ border-color: var(--accent);
|
|
|
|
|
+ background: rgba(110, 168, 254, 0.14);
|
|
|
|
|
+ }}
|
|
|
details {{
|
|
details {{
|
|
|
margin: 6px 0;
|
|
margin: 6px 0;
|
|
|
border: 1px solid var(--border);
|
|
border: 1px solid var(--border);
|
|
@@ -328,6 +566,7 @@ def build_html(body: str, source_name: str) -> str:
|
|
|
<button id="expand-tools">展开全部工具调用</button>
|
|
<button id="expand-tools">展开全部工具调用</button>
|
|
|
<button id="collapse-tools">折叠全部工具调用</button>
|
|
<button id="collapse-tools">折叠全部工具调用</button>
|
|
|
</div>
|
|
</div>
|
|
|
|
|
+ {summary_table_html}
|
|
|
<div class="content">{body}</div>
|
|
<div class="content">{body}</div>
|
|
|
</div>
|
|
</div>
|
|
|
<script>
|
|
<script>
|
|
@@ -359,7 +598,28 @@ def generate_html(
|
|
|
collapse_keywords=collapse_keywords,
|
|
collapse_keywords=collapse_keywords,
|
|
|
collapse_all=collapse_all,
|
|
collapse_all=collapse_all,
|
|
|
)
|
|
)
|
|
|
- html_content = build_html(body=body, source_name=input_path.name)
|
|
|
|
|
|
|
+ if PROCESS_TRACE_PATH:
|
|
|
|
|
+ process_trace_path = resolve_config_path(PROCESS_TRACE_PATH)
|
|
|
|
|
+ else:
|
|
|
|
|
+ process_trace_path = input_path.with_name("process_trace.json")
|
|
|
|
|
+
|
|
|
|
|
+ if OUTPUT_JSON_PATH:
|
|
|
|
|
+ output_json_path = resolve_config_path(OUTPUT_JSON_PATH)
|
|
|
|
|
+ else:
|
|
|
|
|
+ output_json_path = input_path.with_name("output.json")
|
|
|
|
|
+
|
|
|
|
|
+ if TRACE_ID and (not process_trace_path.exists() or not output_json_path.exists()):
|
|
|
|
|
+ trace_dir = resolve_config_path(f".cache/output/{TRACE_ID}")
|
|
|
|
|
+ if not process_trace_path.exists():
|
|
|
|
|
+ process_trace_path = trace_dir / "process_trace.json"
|
|
|
|
|
+ if not output_json_path.exists():
|
|
|
|
|
+ output_json_path = trace_dir / "output.json"
|
|
|
|
|
+
|
|
|
|
|
+ summary_table_html = _build_process_trace_table_html(
|
|
|
|
|
+ process_trace_path=process_trace_path,
|
|
|
|
|
+ output_json_path=output_json_path,
|
|
|
|
|
+ )
|
|
|
|
|
+ html_content = build_html(body=body, source_name=input_path.name, summary_table_html=summary_table_html)
|
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
output_path.write_text(html_content, encoding="utf-8")
|
|
output_path.write_text(html_content, encoding="utf-8")
|
|
|
|
|
|
|
@@ -412,11 +672,40 @@ def render_log_html_and_upload(*, trace_id: str, log_file_path: Path) -> str | N
|
|
|
return None
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
-def main() -> None:
|
|
|
|
|
- input_base = resolve_config_path(INPUT_LOG_PATH)
|
|
|
|
|
|
|
+def _resolve_input_log_path_from_trace_id(*, trace_id: str, output_dir: Path) -> Path:
|
|
|
|
|
+ tid = (trace_id or "").strip()
|
|
|
|
|
+ if not tid:
|
|
|
|
|
+ raise ValueError("trace_id is required")
|
|
|
|
|
+
|
|
|
|
|
+ run_dir = (output_dir / tid).resolve()
|
|
|
|
|
+ if not run_dir.exists():
|
|
|
|
|
+ raise FileNotFoundError(f"OUTPUT_DIR 下未找到 trace_id 目录: {run_dir}")
|
|
|
|
|
+
|
|
|
|
|
+ log_path = run_dir / "log.txt"
|
|
|
|
|
+ if log_path.exists():
|
|
|
|
|
+ return log_path
|
|
|
|
|
+
|
|
|
|
|
+ # 兼容:部分任务可能用 run_log_*.txt 命名
|
|
|
|
|
+ candidates = sorted(
|
|
|
|
|
+ run_dir.glob("run_log_*.txt"),
|
|
|
|
|
+ key=lambda p: p.stat().st_mtime,
|
|
|
|
|
+ reverse=True,
|
|
|
|
|
+ )
|
|
|
|
|
+ if not candidates:
|
|
|
|
|
+ candidates = sorted(
|
|
|
|
|
+ run_dir.glob("*.txt"),
|
|
|
|
|
+ key=lambda p: p.stat().st_mtime,
|
|
|
|
|
+ reverse=True,
|
|
|
|
|
+ )
|
|
|
|
|
+ if not candidates:
|
|
|
|
|
+ raise FileNotFoundError(f"trace_id 目录下未找到可渲染日志文件: {run_dir}")
|
|
|
|
|
+ return candidates[0]
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def _resolve_input_log_path_from_input_base(input_base: Path) -> Path:
|
|
|
if input_base.is_file():
|
|
if input_base.is_file():
|
|
|
- input_path = input_base
|
|
|
|
|
- elif input_base.is_dir():
|
|
|
|
|
|
|
+ return input_base
|
|
|
|
|
+ if input_base.is_dir():
|
|
|
# 优先渲染最新 run_log_*.txt,其次渲染任意 *.txt
|
|
# 优先渲染最新 run_log_*.txt,其次渲染任意 *.txt
|
|
|
candidates = sorted(
|
|
candidates = sorted(
|
|
|
input_base.glob("run_log_*.txt"),
|
|
input_base.glob("run_log_*.txt"),
|
|
@@ -431,14 +720,28 @@ def main() -> None:
|
|
|
)
|
|
)
|
|
|
if not candidates:
|
|
if not candidates:
|
|
|
raise FileNotFoundError(f"目录下未找到可渲染日志文件: {input_base}")
|
|
raise FileNotFoundError(f"目录下未找到可渲染日志文件: {input_base}")
|
|
|
- input_path = candidates[0]
|
|
|
|
|
- else:
|
|
|
|
|
- raise FileNotFoundError(f"输入日志路径不存在: {input_base}")
|
|
|
|
|
|
|
+ return candidates[0]
|
|
|
|
|
+ raise FileNotFoundError(f"输入日志路径不存在: {input_base}")
|
|
|
|
|
+
|
|
|
|
|
|
|
|
- if OUTPUT_HTML_PATH:
|
|
|
|
|
- output_path = resolve_config_path(OUTPUT_HTML_PATH)
|
|
|
|
|
|
|
+def main(argv: list[str] | None = None) -> None:
|
|
|
|
|
+ parser = argparse.ArgumentParser(description="Render run log text to collapsible HTML.")
|
|
|
|
|
+ parser.add_argument("--trace-id", dest="trace_id", default="", help="trace_id in OUTPUT_DIR/<trace_id>/")
|
|
|
|
|
+ parser.add_argument("trace_id_pos", nargs="?", default="", help="trace_id (positional), same as --trace-id")
|
|
|
|
|
+ args = parser.parse_args(argv)
|
|
|
|
|
+
|
|
|
|
|
+ trace_id = ((args.trace_id or "").strip() or (args.trace_id_pos or "").strip())
|
|
|
|
|
+ if trace_id:
|
|
|
|
|
+ output_dir = resolve_config_path(OUTPUT_DIR)
|
|
|
|
|
+ input_path = _resolve_input_log_path_from_trace_id(trace_id=trace_id, output_dir=output_dir)
|
|
|
|
|
+ output_path = input_path.with_name("log.html")
|
|
|
else:
|
|
else:
|
|
|
- output_path = input_path.with_suffix(".html")
|
|
|
|
|
|
|
+ input_base = resolve_config_path(INPUT_LOG_PATH)
|
|
|
|
|
+ input_path = _resolve_input_log_path_from_input_base(input_base)
|
|
|
|
|
+ if OUTPUT_HTML_PATH:
|
|
|
|
|
+ output_path = resolve_config_path(OUTPUT_HTML_PATH)
|
|
|
|
|
+ else:
|
|
|
|
|
+ output_path = input_path.with_suffix(".html")
|
|
|
|
|
|
|
|
generate_html(
|
|
generate_html(
|
|
|
input_path=input_path,
|
|
input_path=input_path,
|