extraction_review.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. """
  2. 提取审核交互式 CLI
  3. 用途
  4. ----
  5. 反思侧分支产出的知识条目默认写为 cognition_log: type="extraction_pending",
  6. 不会直接上传到 KnowHub。本 CLI 提供人工审核 + 批量提交入口。
  7. 两种入口(共享同一核心逻辑,见 agent/trace/extraction_review.py):
  8. - 独立脚本:python -m agent.cli.extraction_review --trace <TRACE_ID> [--list|--review|--commit]
  9. - interactive.py 菜单项 8/9(见 agent/cli/interactive.py)
  10. 用法示例
  11. --------
  12. # 查看当前 trace 的所有未审核条目
  13. python -m agent.cli.extraction_review --trace abc-123 --list
  14. # 交互式逐条审核
  15. python -m agent.cli.extraction_review --trace abc-123 --review
  16. # 把已 approved 的条目批量提交到 KnowHub
  17. python -m agent.cli.extraction_review --trace abc-123 --commit
  18. # 一条龙:review 完直接 commit
  19. python -m agent.cli.extraction_review --trace abc-123
  20. """
  21. from __future__ import annotations
  22. import argparse
  23. import asyncio
  24. import json
  25. import sys
  26. from pathlib import Path
  27. from typing import List, Optional
  28. from agent.trace.store import FileSystemTraceStore
  29. from agent.trace.extraction_review import (
  30. PendingExtraction,
  31. CommitReport,
  32. list_pending,
  33. review_one,
  34. commit_approved,
  35. )
  36. # ===== 打印工具 =====
  37. _SEP = "─" * 60
  38. def _format_payload(payload: dict, max_content: int = 400) -> str:
  39. task = payload.get("task", "")
  40. content = payload.get("content", "")
  41. types = payload.get("types", [])
  42. tags = payload.get("tags", {})
  43. score = payload.get("score", 0)
  44. resource_ids = payload.get("resource_ids", [])
  45. if len(content) > max_content:
  46. content = content[:max_content] + "…(truncated)"
  47. lines = [
  48. f"task: {task}",
  49. f"types: {types} score: {score}",
  50. ]
  51. if tags:
  52. lines.append(f"tags: {tags}")
  53. if resource_ids:
  54. lines.append(f"resources: {resource_ids}")
  55. lines.append("")
  56. lines.append(content)
  57. return "\n".join(lines)
  58. def _print_pending(p: PendingExtraction, index: int, total: int) -> None:
  59. state = ""
  60. if p.committed:
  61. state = " [已提交]"
  62. elif p.reviewed:
  63. state = f" [已审核: {p.decision}]"
  64. print()
  65. print(f"[{index}/{total}] {p.extraction_id}{state}")
  66. print(_SEP)
  67. print(_format_payload(p.payload))
  68. print(_SEP)
  69. def _print_report(report: CommitReport) -> None:
  70. print()
  71. print("=" * 60)
  72. print("提交结果")
  73. print("=" * 60)
  74. print(f"✅ 成功: {len(report.committed)}")
  75. for eid, kid in zip(report.committed, report.knowledge_ids):
  76. print(f" - {eid} → knowledge_id={kid}")
  77. if report.failed:
  78. print(f"❌ 失败: {len(report.failed)}")
  79. for item in report.failed:
  80. print(f" - {item['extraction_id']}: {item['error']}")
  81. if report.skipped:
  82. print(f"⏭ 跳过: {len(report.skipped)}(未 approved 或已提交)")
  83. print("=" * 60)
  84. # ===== 交互式编辑 =====
  85. def _prompt_edit(payload: dict) -> Optional[dict]:
  86. """进入交互式文本编辑模式,返回修改后的 payload(None 表示取消)。
  87. 初版只支持改 task/content/score/tags(最常用字段)。
  88. """
  89. print("\n编辑模式(空行回车保留原值)")
  90. task = input(f"task [{payload.get('task', '')[:50]}]: ").strip()
  91. content_default = payload.get("content", "")
  92. print(f"content 当前:\n{content_default}\n")
  93. print("输入新 content(单行回车保留原值;多行请在末尾输入 `.` 单独成行结束):")
  94. content = _read_multiline_or_keep(content_default)
  95. score_raw = input(f"score [{payload.get('score', 3)}]: ").strip()
  96. tags_raw = input(f"tags JSON [{json.dumps(payload.get('tags', {}), ensure_ascii=False)}]: ").strip()
  97. new_payload = dict(payload)
  98. if task:
  99. new_payload["task"] = task
  100. if content is not None:
  101. new_payload["content"] = content
  102. if score_raw:
  103. try:
  104. new_payload["score"] = int(score_raw)
  105. except ValueError:
  106. print(f"⚠ score 不是整数,保留原值 {payload.get('score', 3)}")
  107. if tags_raw:
  108. try:
  109. new_payload["tags"] = json.loads(tags_raw)
  110. except json.JSONDecodeError as e:
  111. print(f"⚠ tags 不是合法 JSON({e}),保留原值")
  112. confirm = input("\n保存修改?[y/N]: ").strip().lower()
  113. if confirm != "y":
  114. return None
  115. return new_payload
  116. def _read_multiline_or_keep(default: str) -> Optional[str]:
  117. """单行输入则直接返回(空行表示保留默认);
  118. 如果输入 `<<` 则进入多行模式,直到 `.` 单独成行结束。"""
  119. first = input("> ")
  120. if not first.strip():
  121. return None
  122. if first.strip() != "<<":
  123. return first
  124. lines = []
  125. while True:
  126. line = input()
  127. if line.strip() == ".":
  128. break
  129. lines.append(line)
  130. return "\n".join(lines)
  131. # ===== 三种命令 =====
  132. async def cmd_list(store: FileSystemTraceStore, trace_id: str, show_all: bool) -> int:
  133. pendings = await list_pending(store, trace_id, include_reviewed=show_all)
  134. if not pendings:
  135. msg = "没有" + ("任何提取记录" if show_all else "待审核的提取条目")
  136. print(f"trace {trace_id}: {msg}")
  137. return 0
  138. print(f"trace {trace_id}: 共 {len(pendings)} 条{'' if show_all else '待审核'}")
  139. for i, p in enumerate(pendings, 1):
  140. _print_pending(p, i, len(pendings))
  141. return 0
  142. async def cmd_review(store: FileSystemTraceStore, trace_id: str) -> int:
  143. pendings = await list_pending(store, trace_id, include_reviewed=False)
  144. if not pendings:
  145. print(f"trace {trace_id}: 没有待审核的提取条目")
  146. return 0
  147. print(f"trace {trace_id}: 开始审核 {len(pendings)} 条")
  148. for i, p in enumerate(pendings, 1):
  149. _print_pending(p, i, len(pendings))
  150. while True:
  151. choice = input("[a]pprove / [e]dit / [d]iscard / [s]kip / [q]uit: ").strip().lower()
  152. if choice in ("a", "approve"):
  153. await review_one(store, trace_id, p.extraction_id, "approve")
  154. print(f"✓ {p.extraction_id} approved")
  155. break
  156. elif choice in ("d", "discard"):
  157. await review_one(store, trace_id, p.extraction_id, "discard")
  158. print(f"✗ {p.extraction_id} discarded")
  159. break
  160. elif choice in ("s", "skip"):
  161. print(f"⏭ {p.extraction_id} skipped(保留为 pending)")
  162. break
  163. elif choice in ("q", "quit"):
  164. print("退出审核")
  165. return 0
  166. elif choice in ("e", "edit"):
  167. edited = _prompt_edit(p.payload)
  168. if edited is None:
  169. print("取消编辑,请重选")
  170. continue
  171. await review_one(store, trace_id, p.extraction_id, "edit", edited_payload=edited)
  172. print(f"✎ {p.extraction_id} edited & approved")
  173. break
  174. else:
  175. print("无效选项,请输入 a/e/d/s/q")
  176. return 0
  177. async def cmd_commit(store: FileSystemTraceStore, trace_id: str) -> int:
  178. report = await commit_approved(store, trace_id)
  179. _print_report(report)
  180. return 0 if not report.failed else 1
  181. # ===== argparse 入口 =====
  182. def build_parser() -> argparse.ArgumentParser:
  183. p = argparse.ArgumentParser(
  184. prog="python -m agent.cli.extraction_review",
  185. description="审核并提交反思侧分支暂存的待审核知识条目。",
  186. )
  187. p.add_argument("--trace", required=True, help="Trace ID")
  188. p.add_argument("--base-path", default=".trace", help="TraceStore 根目录(默认 .trace)")
  189. group = p.add_mutually_exclusive_group()
  190. group.add_argument("--list", action="store_true", help="仅列出未审核条目")
  191. group.add_argument("--list-all", action="store_true", help="列出全部条目(含已审核/已提交)")
  192. group.add_argument("--review", action="store_true", help="进入交互式审核(不自动 commit)")
  193. group.add_argument("--commit", action="store_true", help="仅批量提交已 approved 的条目")
  194. return p
  195. async def _main_async(args: argparse.Namespace) -> int:
  196. if not Path(args.base_path).exists():
  197. print(f"❌ TraceStore 根目录不存在: {args.base_path}", file=sys.stderr)
  198. return 2
  199. store = FileSystemTraceStore(base_path=args.base_path)
  200. if args.list or args.list_all:
  201. return await cmd_list(store, args.trace, show_all=args.list_all)
  202. if args.review:
  203. return await cmd_review(store, args.trace)
  204. if args.commit:
  205. return await cmd_commit(store, args.trace)
  206. # 默认:review 完紧接着 commit
  207. rc = await cmd_review(store, args.trace)
  208. if rc != 0:
  209. return rc
  210. print()
  211. confirm = input("现在把已 approved 的条目提交到 KnowHub?[Y/n]: ").strip().lower()
  212. if confirm in ("", "y", "yes"):
  213. return await cmd_commit(store, args.trace)
  214. print("未提交。需要时运行 `--commit` 子命令。")
  215. return 0
  216. def main() -> int:
  217. args = build_parser().parse_args()
  218. return asyncio.run(_main_async(args))
  219. if __name__ == "__main__":
  220. sys.exit(main())