tool_logging.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. """工具调用日志的通用封装。"""
  2. from __future__ import annotations
  3. import json
  4. from typing import Any, Dict
  5. from .log_capture import log, log_fold
  6. def _pretty_json_if_possible(text: str) -> str:
  7. """如果文本是合法 JSON,则返回带缩进的可读格式;否则原样返回。"""
  8. raw = (text or "").strip()
  9. if not raw:
  10. return text
  11. if not (raw.startswith("{") or raw.startswith("[")):
  12. return text
  13. try:
  14. parsed = json.loads(raw)
  15. except Exception:
  16. return text
  17. return json.dumps(parsed, ensure_ascii=False, indent=2)
  18. def _truncate_deep(obj: Any, str_limit: int = 2000) -> Any:
  19. """递归遍历对象,对超长字符串做截断,其余结构原样保留。"""
  20. if isinstance(obj, str):
  21. return obj if len(obj) <= str_limit else obj[:str_limit] + f"...(truncated, total {len(obj)} chars)"
  22. if isinstance(obj, dict):
  23. return {k: _truncate_deep(v, str_limit) for k, v in obj.items()}
  24. if isinstance(obj, list):
  25. return [_truncate_deep(item, str_limit) for item in obj]
  26. return obj
  27. def _structure_metadata(md: Dict[str, Any], body_limit: int = 200) -> Dict[str, Any]:
  28. """对 metadata 做结构化精简,剥离 raw_data / 完整正文等大字段。
  29. - 含 article_info 的结果:提取标题、统计、正文预览,丢弃 raw HTML / 图片列表。
  30. - 含 account_info 的结果:保留账号关键字段。
  31. - 含 search_results 的结果:每条只保留标题和 URL。
  32. - 其他情况:递归截断超长字符串。
  33. """
  34. # --- 文章详情 ---
  35. article_info = md.get("article_info")
  36. if isinstance(article_info, dict):
  37. body = str(article_info.get("body_text", "") or "")
  38. body_preview = body[:body_limit] + "..." if len(body) > body_limit else body
  39. # 去掉图片标记行
  40. body_preview = "\n".join(
  41. line for line in body_preview.splitlines()
  42. if not line.strip().startswith("[image:")
  43. )
  44. images = article_info.get("image_url_list") or []
  45. return {
  46. "article_info": {
  47. "title": article_info.get("title", ""),
  48. "content_link": article_info.get("content_link", ""),
  49. "publish_timestamp": article_info.get("publish_timestamp"),
  50. "statistics": {
  51. "view_count": article_info.get("view_count"),
  52. "like_count": article_info.get("like_count"),
  53. "share_count": article_info.get("share_count"),
  54. "looking_count": article_info.get("looking_count"),
  55. "comment_count": article_info.get("comment_count"),
  56. "collect_count": article_info.get("collect_count"),
  57. },
  58. "is_original": article_info.get("is_original", False),
  59. "image_count": len(images),
  60. "body_length": len(body),
  61. "body_preview": body_preview,
  62. }
  63. }
  64. # --- 账号信息 ---
  65. account_info = md.get("account_info")
  66. if isinstance(account_info, dict):
  67. return {
  68. "account_info": {
  69. "account_name": account_info.get("account_name", ""),
  70. "wx_gh": account_info.get("wx_gh", ""),
  71. "channel_account_id": account_info.get("channel_account_id", ""),
  72. }
  73. }
  74. # --- 搜索结果列表 ---
  75. search_results = md.get("search_results")
  76. if isinstance(search_results, list):
  77. brief = [
  78. {"title": item.get("title", ""), "url": item.get("url", "")}
  79. for item in search_results[:20]
  80. ]
  81. return {"search_results": brief, "total": len(search_results)}
  82. # --- 兜底:递归截断 ---
  83. return _truncate_deep(md)
  84. def format_tool_result_for_log(result: Any) -> str:
  85. """将 ToolResult 或普通字符串格式化为可写入日志的文本。
  86. 对文章详情类结果,输出结构化摘要(标题/统计/正文预览),
  87. 剥离 raw_data 和完整正文,避免日志被大段内容淹没。
  88. """
  89. if result is None:
  90. return ""
  91. if isinstance(result, str):
  92. s = result
  93. return s if len(s) <= 8000 else s[:8000] + "\n...(truncated)"
  94. title = getattr(result, "title", "") or ""
  95. output = getattr(result, "output", None) or ""
  96. err = getattr(result, "error", None)
  97. payload: Dict[str, Any] = {"title": title, "output": output}
  98. if err:
  99. payload["error"] = err
  100. md = getattr(result, "metadata", None)
  101. if isinstance(md, dict) and md:
  102. payload["metadata"] = _structure_metadata(md)
  103. return json.dumps(payload, ensure_ascii=False)
  104. def log_tool_call(tool_name: str, params: Dict[str, Any], result: str) -> None:
  105. """以折叠块结构化输出工具调用参数与返回内容。"""
  106. with log_fold(f"🔧 {tool_name}"):
  107. with log_fold("📥 调用参数"):
  108. log(json.dumps(params, ensure_ascii=False, indent=2))
  109. with log_fold("📤 返回内容"):
  110. log(_pretty_json_if_possible(result))