knowhub.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. #!/usr/bin/env python
  2. """
  3. KnowHub 自包含 CLI
  4. ===================
  5. 三种查询/写入 KnowHub 的方式,都是直接 HTTP 调用,不依赖 cyber-agent 包。
  6. 1. `ask` —— 深度回顾:POST /api/agent,agent_type=remote_librarian(默认)或 remote_research
  7. 远端 Librarian Agent 会规划、检索、整合 → 带引用的自然语言回答
  8. 2. `search` —— 快速检索:GET /api/knowledge/search
  9. 语义搜索 + LLM 精排,返回结构化知识条目
  10. 3. `save` —— 保存知识:POST /api/knowledge
  11. 把单条知识写库(异步校验入库)
  12. ## 用法
  13. python knowhub.py ask --query="..."
  14. python knowhub.py ask --query="..." --deep # 走 remote_research
  15. python knowhub.py ask --query="..." --continue_from=SUB_TRACE_ID
  16. python knowhub.py search --query="..." [--top_k=5] [--min_score=3]
  17. [--types=strategy,tool] [--owner=...]
  18. [--capability_id=CAP-001]
  19. [--tool_id=...] [--requirement_id=...]
  20. python knowhub.py save --task="..." --content="..." --types=strategy
  21. [--score=4] [--source_name=...] [--source_urls=u1,u2]
  22. [--tags='{"project":"xyz"}']
  23. [--capability_ids=CAP-001,CAP-002]
  24. ## 环境变量(可选)
  25. KNOWHUB_API KnowHub 服务器地址 默认 http://43.106.118.91:9999
  26. KNOWHUB_OWNER 默认所有者(save 和 search 均用) 默认 sunlit.howard@gmail.com
  27. .env 文件:在本 skill 目录下放 `.env`,本脚本会自动读取(仅 2 个变量,纯文本解析)。
  28. ## 返回
  29. stdout 输出 JSON:
  30. - ask: {"mode":"remote", "agent_type":..., "sub_trace_id":..., "status":..., "summary":..., "stats":..., "error":?}
  31. - search: {"query":..., "count":N, "results":[...]}
  32. - save: {"knowledge_id":..., "status":"..."}
  33. 退出码:成功 0,失败 1。
  34. """
  35. import argparse
  36. import asyncio
  37. import json
  38. import os
  39. import sys
  40. from pathlib import Path
  41. from typing import Any, Dict, List, Optional
  42. import httpx
  43. # ── 默认配置 ────────────────────────────────────────
  44. DEFAULT_KNOWHUB_API = "http://43.106.118.91:9999"
  45. DEFAULT_OWNER = "sunlit.howard@gmail.com"
  46. DEFAULT_SCOPES = ["org:cybertogether"]
  47. ASK_TIMEOUT = 600.0 # Librarian agent 规划 + 多轮检索可能需要几分钟
  48. SEARCH_TIMEOUT = 60.0
  49. SAVE_TIMEOUT = 30.0
  50. # ── .env 读取(超简版,仅认 KEY=VALUE 格式,不依赖 python-dotenv) ──
  51. def _load_local_env() -> None:
  52. """从本脚本同目录的 .env 加载 KNOWHUB_API / KNOWHUB_OWNER。现有 env 优先。"""
  53. env_file = Path(__file__).resolve().parent / ".env"
  54. if not env_file.exists():
  55. return
  56. for line in env_file.read_text(encoding="utf-8").splitlines():
  57. line = line.strip()
  58. if not line or line.startswith("#") or "=" not in line:
  59. continue
  60. key, val = line.split("=", 1)
  61. key, val = key.strip(), val.strip().strip('"').strip("'")
  62. if key and key not in os.environ:
  63. os.environ[key] = val
  64. _load_local_env()
  65. KNOWHUB_API = os.getenv("KNOWHUB_API", DEFAULT_KNOWHUB_API).rstrip("/")
  66. KNOWHUB_OWNER = os.getenv("KNOWHUB_OWNER", DEFAULT_OWNER)
  67. # ── 模式实现 ────────────────────────────────────────
  68. async def ask(
  69. query: str,
  70. deep: bool = False,
  71. continue_from: Optional[str] = None,
  72. skills: Optional[List[str]] = None,
  73. ) -> Dict[str, Any]:
  74. """
  75. 深度回顾:调用远端 Librarian / Research Agent。
  76. deep=False → remote_librarian(整合已有知识库的回答)
  77. deep=True → remote_research(全网调研 + 入库,较慢)
  78. """
  79. agent_type = "remote_research" if deep else "remote_librarian"
  80. payload = {
  81. "agent_type": agent_type,
  82. "task": query,
  83. "messages": None,
  84. "continue_from": continue_from,
  85. "skills": skills,
  86. }
  87. try:
  88. async with httpx.AsyncClient(timeout=ASK_TIMEOUT) as client:
  89. r = await client.post(f"{KNOWHUB_API}/api/agent", json=payload)
  90. r.raise_for_status()
  91. result = r.json()
  92. return {
  93. "mode": "remote",
  94. "agent_type": agent_type,
  95. "sub_trace_id": result.get("sub_trace_id"),
  96. "status": result.get("status", "completed"),
  97. "summary": result.get("summary", ""),
  98. "stats": result.get("stats", {}),
  99. "error": result.get("error"),
  100. }
  101. except httpx.HTTPStatusError as e:
  102. return {
  103. "mode": "remote", "agent_type": agent_type, "status": "failed",
  104. "error": f"HTTP {e.response.status_code}: {e.response.text[:200]}",
  105. }
  106. except Exception as e:
  107. return {
  108. "mode": "remote", "agent_type": agent_type, "status": "failed",
  109. "error": f"{type(e).__name__}: {e}",
  110. }
  111. async def search(
  112. query: str,
  113. top_k: int = 5,
  114. min_score: int = 3,
  115. types: Optional[List[str]] = None,
  116. owner: Optional[str] = None,
  117. requirement_id: Optional[str] = None,
  118. capability_id: Optional[str] = None,
  119. tool_id: Optional[str] = None,
  120. ) -> Dict[str, Any]:
  121. """快速检索:调 /api/knowledge/search。"""
  122. params: Dict[str, Any] = {"q": query, "top_k": top_k, "min_score": min_score}
  123. if types:
  124. params["types"] = ",".join(types)
  125. if owner: # 显式覆盖才用;None 时不过滤(全库搜)
  126. params["owner"] = owner
  127. if requirement_id:
  128. params["requirement_id"] = requirement_id
  129. if capability_id:
  130. params["capability_id"] = capability_id
  131. if tool_id:
  132. params["tool_id"] = tool_id
  133. try:
  134. async with httpx.AsyncClient(timeout=SEARCH_TIMEOUT) as client:
  135. r = await client.get(f"{KNOWHUB_API}/api/knowledge/search", params=params)
  136. r.raise_for_status()
  137. data = r.json()
  138. return {
  139. "query": query,
  140. "count": data.get("count", 0),
  141. "results": data.get("results", []),
  142. }
  143. except httpx.HTTPStatusError as e:
  144. return {"query": query, "count": 0, "results": [],
  145. "error": f"HTTP {e.response.status_code}: {e.response.text[:200]}"}
  146. except Exception as e:
  147. return {"query": query, "count": 0, "results": [],
  148. "error": f"{type(e).__name__}: {e}"}
  149. async def save(
  150. task: str,
  151. content: str,
  152. types: List[str],
  153. tags: Optional[Dict[str, str]] = None,
  154. scopes: Optional[List[str]] = None,
  155. owner: Optional[str] = None,
  156. resource_ids: Optional[List[str]] = None,
  157. source_name: str = "",
  158. source_category: str = "exp",
  159. source_urls: Optional[List[str]] = None,
  160. agent_id: str = "knowhub_cli",
  161. submitted_by: str = "",
  162. score: int = 3,
  163. message_id: str = "",
  164. capability_ids: Optional[List[str]] = None,
  165. tool_ids: Optional[List[str]] = None,
  166. ) -> Dict[str, Any]:
  167. """保存知识:POST /api/knowledge。"""
  168. payload = {
  169. "message_id": message_id,
  170. "types": types,
  171. "task": task,
  172. "tags": tags or {},
  173. "scopes": scopes or DEFAULT_SCOPES,
  174. "owner": owner or KNOWHUB_OWNER,
  175. "content": content,
  176. "resource_ids": resource_ids or [],
  177. "source": {
  178. "name": source_name,
  179. "category": source_category,
  180. "urls": source_urls or [],
  181. "agent_id": agent_id,
  182. "submitted_by": submitted_by or KNOWHUB_OWNER,
  183. },
  184. "eval": {"score": score, "helpful": 1, "harmful": 0, "confidence": 0.5},
  185. "capability_ids": capability_ids or [],
  186. "tool_ids": tool_ids or [],
  187. }
  188. try:
  189. async with httpx.AsyncClient(timeout=SAVE_TIMEOUT) as client:
  190. r = await client.post(f"{KNOWHUB_API}/api/knowledge", json=payload)
  191. r.raise_for_status()
  192. data = r.json()
  193. return {
  194. "knowledge_id": data.get("knowledge_id"),
  195. "status": data.get("status", "submitted"),
  196. "owner": payload["owner"],
  197. }
  198. except httpx.HTTPStatusError as e:
  199. return {"knowledge_id": None, "status": "failed",
  200. "error": f"HTTP {e.response.status_code}: {e.response.text[:200]}"}
  201. except Exception as e:
  202. return {"knowledge_id": None, "status": "failed",
  203. "error": f"{type(e).__name__}: {e}"}
  204. # ── 参数工具 ────────────────────────────────────────
  205. def _split_csv(val: Optional[str]) -> Optional[List[str]]:
  206. """'a,b,c' → ['a','b','c'];None → None;空串 → None。"""
  207. if not val:
  208. return None
  209. parts = [x.strip() for x in val.split(",") if x.strip()]
  210. return parts or None
  211. def _parse_json_maybe(val: Optional[str]) -> Optional[Any]:
  212. """把字符串按 JSON 解析;解析失败则原样返回字符串。"""
  213. if val is None:
  214. return None
  215. try:
  216. return json.loads(val)
  217. except (json.JSONDecodeError, ValueError):
  218. return val
  219. # ── CLI ───────────────────────────────────────────
  220. def _build_parser() -> argparse.ArgumentParser:
  221. parser = argparse.ArgumentParser(
  222. description="KnowHub CLI - ask / search / save 知识库",
  223. formatter_class=argparse.RawDescriptionHelpFormatter,
  224. epilog=f"默认 API: {KNOWHUB_API} 默认 owner: {KNOWHUB_OWNER}",
  225. )
  226. sub = parser.add_subparsers(dest="cmd", required=True, metavar="{ask,search,save}")
  227. # ask
  228. p_ask = sub.add_parser("ask", help="深度回顾(远端 Librarian Agent)")
  229. p_ask.add_argument("--query", required=True)
  230. p_ask.add_argument("--deep", action="store_true", help="改走 remote_research,全网调研 + 入库")
  231. p_ask.add_argument("--continue_from", help="已有 sub_trace_id,传入则复用 Librarian 上下文")
  232. p_ask.add_argument("--skills", help="逗号分隔的 skill 名单(可选,由服务器白名单过滤)")
  233. # search
  234. p_s = sub.add_parser("search", help="快速检索(语义搜索 + 精排)")
  235. p_s.add_argument("--query", required=True)
  236. p_s.add_argument("--top_k", type=int, default=5)
  237. p_s.add_argument("--min_score", type=int, default=3)
  238. p_s.add_argument("--types", help="逗号分隔(user_profile/strategy/tool/usecase/definition/plan)")
  239. p_s.add_argument("--owner", help=f"覆盖默认 owner(默认不过滤,用 --owner={KNOWHUB_OWNER} 限定自己的)")
  240. p_s.add_argument("--requirement_id")
  241. p_s.add_argument("--capability_id")
  242. p_s.add_argument("--tool_id")
  243. # save
  244. p_sv = sub.add_parser("save", help="保存知识到 KnowHub")
  245. p_sv.add_argument("--task", required=True, help="任务描述:在什么情景下 + 要完成什么目标")
  246. p_sv.add_argument("--content", required=True, help="知识的核心内容")
  247. p_sv.add_argument("--types", required=True, help="逗号分隔的类型")
  248. p_sv.add_argument("--tags", help="JSON 字符串,如 '{\"project\":\"xyz\"}'")
  249. p_sv.add_argument("--scopes", help=f"逗号分隔(默认 {','.join(DEFAULT_SCOPES)})")
  250. p_sv.add_argument("--owner", help=f"覆盖默认 owner(默认 {KNOWHUB_OWNER})")
  251. p_sv.add_argument("--score", type=int, default=3, help="1-5,默认 3")
  252. p_sv.add_argument("--source_name", default="")
  253. p_sv.add_argument("--source_category", default="exp", help="paper/exp/skill/book")
  254. p_sv.add_argument("--source_urls", help="逗号分隔 URL 列表")
  255. p_sv.add_argument("--agent_id", default="knowhub_cli")
  256. p_sv.add_argument("--submitted_by", default="")
  257. p_sv.add_argument("--capability_ids", help="逗号分隔的能力 ID")
  258. p_sv.add_argument("--tool_ids", help="逗号分隔的工具 ID")
  259. p_sv.add_argument("--resource_ids", help="逗号分隔的资源 ID")
  260. p_sv.add_argument("--message_id", default="")
  261. return parser
  262. async def _dispatch(args) -> Dict[str, Any]:
  263. if args.cmd == "ask":
  264. return await ask(
  265. query=args.query,
  266. deep=args.deep,
  267. continue_from=args.continue_from,
  268. skills=_split_csv(args.skills),
  269. )
  270. if args.cmd == "search":
  271. return await search(
  272. query=args.query,
  273. top_k=args.top_k,
  274. min_score=args.min_score,
  275. types=_split_csv(args.types),
  276. owner=args.owner,
  277. requirement_id=args.requirement_id,
  278. capability_id=args.capability_id,
  279. tool_id=args.tool_id,
  280. )
  281. if args.cmd == "save":
  282. tags_val = _parse_json_maybe(args.tags) if args.tags else None
  283. return await save(
  284. task=args.task,
  285. content=args.content,
  286. types=_split_csv(args.types) or [],
  287. tags=tags_val if isinstance(tags_val, dict) else None,
  288. scopes=_split_csv(args.scopes),
  289. owner=args.owner,
  290. resource_ids=_split_csv(args.resource_ids),
  291. source_name=args.source_name,
  292. source_category=args.source_category,
  293. source_urls=_split_csv(args.source_urls),
  294. agent_id=args.agent_id,
  295. submitted_by=args.submitted_by,
  296. score=args.score,
  297. message_id=args.message_id,
  298. capability_ids=_split_csv(args.capability_ids),
  299. tool_ids=_split_csv(args.tool_ids),
  300. )
  301. raise ValueError(f"未知命令: {args.cmd}")
  302. def main() -> int:
  303. args = _build_parser().parse_args()
  304. result = asyncio.run(_dispatch(args))
  305. print(json.dumps(result, ensure_ascii=False, indent=2))
  306. # 退出码:任何 status=failed 或 error 字段非空 → 1
  307. if result.get("status") == "failed" or result.get("error"):
  308. return 1
  309. return 0
  310. if __name__ == "__main__":
  311. sys.exit(main())