knowledge.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541
  1. """
  2. 知识管理工具 - KnowHub API 封装
  3. 所有工具通过 HTTP API 调用 KnowHub Server。
  4. """
  5. import os
  6. import logging
  7. import httpx
  8. from typing import List, Dict, Optional, Any
  9. from agent.tools import tool, ToolResult, ToolContext
  10. logger = logging.getLogger(__name__)
  11. # KnowHub Server API 地址
  12. KNOWHUB_API = os.getenv("KNOWHUB_API", "http://localhost:8000")
  13. @tool(hidden_params=["context"])
  14. async def knowledge_search(
  15. query: str,
  16. top_k: int = 5,
  17. min_score: int = 3,
  18. types: Optional[List[str]] = None,
  19. owner: Optional[str] = None,
  20. context: Optional[ToolContext] = None,
  21. ) -> ToolResult:
  22. """
  23. 检索知识(两阶段:语义路由 + 质量精排)
  24. Args:
  25. query: 搜索查询(任务描述)
  26. top_k: 返回数量(默认 5)
  27. min_score: 最低评分过滤(默认 3)
  28. types: 按类型过滤(user_profile/strategy/tool/usecase/definition/plan)
  29. owner: 按所有者过滤(可选)
  30. context: 工具上下文
  31. Returns:
  32. 相关知识列表
  33. """
  34. try:
  35. params = {
  36. "q": query,
  37. "top_k": top_k,
  38. "min_score": min_score,
  39. }
  40. if types:
  41. params["types"] = ",".join(types)
  42. if owner:
  43. params["owner"] = owner
  44. async with httpx.AsyncClient(timeout=60.0) as client:
  45. response = await client.get(f"{KNOWHUB_API}/api/knowledge/search", params=params)
  46. response.raise_for_status()
  47. data = response.json()
  48. results = data.get("results", [])
  49. count = data.get("count", 0)
  50. if not results:
  51. return ToolResult(
  52. title="🔍 未找到相关知识",
  53. output=f"查询: {query}\n\n知识库中暂无相关的高质量知识。",
  54. long_term_memory=f"知识检索: 未找到相关知识 - {query[:50]}"
  55. )
  56. # 格式化输出
  57. output_lines = [f"查询: {query}\n", f"找到 {count} 条相关知识:\n"]
  58. for idx, item in enumerate(results, 1):
  59. eval_data = item.get("eval", {})
  60. score = eval_data.get("score", 3)
  61. output_lines.append(f"\n### {idx}. [{item['id']}] (⭐ {score})")
  62. output_lines.append(f"**任务**: {item['task'][:150]}...")
  63. output_lines.append(f"**内容**: {item['content'][:200]}...")
  64. return ToolResult(
  65. title="✅ 知识检索成功",
  66. output="\n".join(output_lines),
  67. long_term_memory=f"知识检索: 找到 {count} 条相关知识 - {query[:50]}",
  68. metadata={
  69. "count": count,
  70. "knowledge_ids": [item["id"] for item in results],
  71. "items": results
  72. }
  73. )
  74. except Exception as e:
  75. logger.error(f"知识检索失败: {e}")
  76. return ToolResult(
  77. title="❌ 检索失败",
  78. output=f"错误: {str(e)}",
  79. error=str(e)
  80. )
  81. @tool(
  82. hidden_params=["context", "owner"],
  83. inject_params={
  84. "owner": lambda ctx: ctx.get("knowledge_config", {}).get("owner") if ctx else None,
  85. "tags": lambda ctx, args: {
  86. **ctx.get("knowledge_config", {}).get("default_tags", {}),
  87. **(args.get("tags") or {})
  88. } if ctx else args.get("tags"),
  89. "scopes": lambda ctx, args: (args.get("scopes") or []) + (ctx.get("knowledge_config", {}).get("default_scopes") or []) if ctx else args.get("scopes"),
  90. }
  91. )
  92. async def knowledge_save(
  93. task: str,
  94. content: str,
  95. types: List[str],
  96. tags: Optional[Dict[str, str]] = None,
  97. scopes: Optional[List[str]] = None,
  98. owner: Optional[str] = None,
  99. resource_ids: Optional[List[str]] = None,
  100. source_name: str = "",
  101. source_category: str = "exp",
  102. urls: List[str] = None,
  103. agent_id: str = "research_agent",
  104. submitted_by: str = "",
  105. score: int = 3,
  106. message_id: str = "",
  107. context: Optional[ToolContext] = None,
  108. ) -> ToolResult:
  109. """
  110. 保存新知识
  111. Args:
  112. task: 任务描述(在什么情景下 + 要完成什么目标)
  113. content: 核心内容
  114. types: 知识类型标签,可选:user_profile, strategy, tool, usecase, definition, plan
  115. tags: 业务标签(JSON 对象)
  116. scopes: 可见范围(默认 ["org:cybertogether"])
  117. owner: 所有者(默认 agent:{agent_id})
  118. resource_ids: 关联的资源 ID 列表(可选)
  119. source_name: 来源名称
  120. source_category: 来源类别(paper/exp/skill/book)
  121. urls: 参考来源链接列表
  122. agent_id: 执行此调研的 agent ID
  123. submitted_by: 提交者
  124. score: 初始评分 1-5(默认 3)
  125. message_id: 来源 Message ID
  126. context: 工具上下文
  127. Returns:
  128. 保存结果
  129. """
  130. try:
  131. # 设置默认值(在 agent 代码中,不是服务器端)
  132. if scopes is None:
  133. scopes = ["org:cybertogether"]
  134. if owner is None:
  135. owner = f"agent:{agent_id}"
  136. payload = {
  137. "message_id": message_id,
  138. "types": types,
  139. "task": task,
  140. "tags": tags or {},
  141. "scopes": scopes,
  142. "owner": owner,
  143. "content": content,
  144. "resource_ids": resource_ids or [],
  145. "source": {
  146. "name": source_name,
  147. "category": source_category,
  148. "urls": urls or [],
  149. "agent_id": agent_id,
  150. "submitted_by": submitted_by,
  151. },
  152. "eval": {
  153. "score": score,
  154. "helpful": 1,
  155. "harmful": 0,
  156. "confidence": 0.5,
  157. }
  158. }
  159. async with httpx.AsyncClient(timeout=30.0) as client:
  160. response = await client.post(f"{KNOWHUB_API}/api/knowledge", json=payload)
  161. response.raise_for_status()
  162. data = response.json()
  163. knowledge_id = data.get("knowledge_id", "unknown")
  164. return ToolResult(
  165. title="✅ 知识已保存",
  166. output=f"知识 ID: {knowledge_id}\n\n任务:\n{task[:100]}...",
  167. long_term_memory=f"保存知识: {knowledge_id} - {task[:50]}",
  168. metadata={"knowledge_id": knowledge_id}
  169. )
  170. except Exception as e:
  171. logger.error(f"保存知识失败: {e}")
  172. return ToolResult(
  173. title="❌ 保存失败",
  174. output=f"错误: {str(e)}",
  175. error=str(e)
  176. )
  177. @tool(hidden_params=["context"])
  178. async def knowledge_update(
  179. knowledge_id: str,
  180. add_helpful_case: Optional[Dict] = None,
  181. add_harmful_case: Optional[Dict] = None,
  182. update_score: Optional[int] = None,
  183. evolve_feedback: Optional[str] = None,
  184. context: Optional[ToolContext] = None,
  185. ) -> ToolResult:
  186. """
  187. 更新已有知识的评估反馈
  188. Args:
  189. knowledge_id: 知识 ID
  190. add_helpful_case: 添加好用的案例
  191. add_harmful_case: 添加不好用的案例
  192. update_score: 更新评分(1-5)
  193. evolve_feedback: 经验进化反馈(触发 LLM 重写)
  194. context: 工具上下文
  195. Returns:
  196. 更新结果
  197. """
  198. try:
  199. payload = {}
  200. if add_helpful_case:
  201. payload["add_helpful_case"] = add_helpful_case
  202. if add_harmful_case:
  203. payload["add_harmful_case"] = add_harmful_case
  204. if update_score is not None:
  205. payload["update_score"] = update_score
  206. if evolve_feedback:
  207. payload["evolve_feedback"] = evolve_feedback
  208. if not payload:
  209. return ToolResult(
  210. title="⚠️ 无更新",
  211. output="未指定任何更新内容",
  212. long_term_memory="尝试更新知识但未指定更新内容"
  213. )
  214. async with httpx.AsyncClient(timeout=60.0) as client:
  215. response = await client.put(f"{KNOWHUB_API}/api/knowledge/{knowledge_id}", json=payload)
  216. response.raise_for_status()
  217. summary = []
  218. if add_helpful_case:
  219. summary.append("添加 helpful 案例")
  220. if add_harmful_case:
  221. summary.append("添加 harmful 案例")
  222. if update_score is not None:
  223. summary.append(f"更新评分: {update_score}")
  224. if evolve_feedback:
  225. summary.append("知识进化: 基于反馈重写内容")
  226. return ToolResult(
  227. title="✅ 知识已更新",
  228. output=f"知识 ID: {knowledge_id}\n\n更新内容:\n" + "\n".join(f"- {s}" for s in summary),
  229. long_term_memory=f"更新知识: {knowledge_id}"
  230. )
  231. except Exception as e:
  232. logger.error(f"更新知识失败: {e}")
  233. return ToolResult(
  234. title="❌ 更新失败",
  235. output=f"错误: {str(e)}",
  236. error=str(e)
  237. )
  238. @tool(hidden_params=["context"])
  239. async def knowledge_batch_update(
  240. feedback_list: List[Dict[str, Any]],
  241. context: Optional[ToolContext] = None,
  242. ) -> ToolResult:
  243. """
  244. 批量反馈知识的有效性
  245. Args:
  246. feedback_list: 评价列表,每个元素包含:
  247. - knowledge_id: (str) 知识 ID
  248. - is_effective: (bool) 是否有效
  249. - feedback: (str, optional) 改进建议,若有效且有建议则触发知识进化
  250. context: 工具上下文
  251. Returns:
  252. 批量更新结果
  253. """
  254. try:
  255. if not feedback_list:
  256. return ToolResult(
  257. title="⚠️ 反馈列表为空",
  258. output="未提供任何反馈",
  259. long_term_memory="批量更新知识: 反馈列表为空"
  260. )
  261. payload = {"feedback_list": feedback_list}
  262. async with httpx.AsyncClient(timeout=120.0) as client:
  263. response = await client.post(f"{KNOWHUB_API}/api/knowledge/batch_update", json=payload)
  264. response.raise_for_status()
  265. data = response.json()
  266. updated = data.get("updated", 0)
  267. return ToolResult(
  268. title="✅ 批量更新完成",
  269. output=f"成功更新 {updated} 条知识",
  270. long_term_memory=f"批量更新知识: 成功 {updated} 条"
  271. )
  272. except Exception as e:
  273. logger.error(f"列出知识失败: {e}")
  274. return ToolResult(
  275. title="❌ 列表失败",
  276. output=f"错误: {str(e)}",
  277. error=str(e)
  278. )
  279. @tool(hidden_params=["context"])
  280. async def knowledge_list(
  281. limit: int = 10,
  282. types: Optional[List[str]] = None,
  283. scopes: Optional[List[str]] = None,
  284. context: Optional[ToolContext] = None,
  285. ) -> ToolResult:
  286. """
  287. 列出已保存的知识
  288. Args:
  289. limit: 返回数量限制(默认 10)
  290. types: 按类型过滤(可选)
  291. scopes: 按范围过滤(可选)
  292. context: 工具上下文
  293. Returns:
  294. 知识列表
  295. """
  296. try:
  297. params = {"limit": limit}
  298. if types:
  299. params["types"] = ",".join(types)
  300. if scopes:
  301. params["scopes"] = ",".join(scopes)
  302. async with httpx.AsyncClient(timeout=30.0) as client:
  303. response = await client.get(f"{KNOWHUB_API}/api/knowledge", params=params)
  304. response.raise_for_status()
  305. data = response.json()
  306. results = data.get("results", [])
  307. count = data.get("count", 0)
  308. if not results:
  309. return ToolResult(
  310. title="📂 知识库为空",
  311. output="还没有保存任何知识",
  312. long_term_memory="知识库为空"
  313. )
  314. output_lines = [f"共找到 {count} 条知识:\n"]
  315. for item in results:
  316. eval_data = item.get("eval", {})
  317. score = eval_data.get("score", 3)
  318. output_lines.append(f"- [{item['id']}] (⭐{score}) {item['task'][:60]}...")
  319. return ToolResult(
  320. title="📚 知识列表",
  321. output="\n".join(output_lines),
  322. long_term_memory=f"列出 {count} 条知识"
  323. )
  324. except Exception as e:
  325. logger.error(f"列出知识失败: {e}")
  326. return ToolResult(
  327. title="❌ 列表失败",
  328. output=f"错误: {str(e)}",
  329. error=str(e)
  330. )
  331. @tool(hidden_params=["context"])
  332. async def knowledge_slim(
  333. model: str = "google/gemini-2.0-flash-001",
  334. context: Optional[ToolContext] = None,
  335. ) -> ToolResult:
  336. """
  337. 知识库瘦身:调用顶级大模型,将知识库中语义相似的知识合并精简
  338. Args:
  339. model: 使用的模型(默认 gemini-2.0-flash-001)
  340. context: 工具上下文
  341. Returns:
  342. 瘦身结果报告
  343. """
  344. try:
  345. async with httpx.AsyncClient(timeout=300.0) as client:
  346. response = await client.post(f"{KNOWHUB_API}/api/knowledge/slim", params={"model": model})
  347. response.raise_for_status()
  348. data = response.json()
  349. before = data.get("before", 0)
  350. after = data.get("after", 0)
  351. report = data.get("report", "")
  352. result = f"瘦身完成:{before} → {after} 条知识"
  353. if report:
  354. result += f"\n{report}"
  355. return ToolResult(
  356. title="✅ 知识库瘦身完成",
  357. output=result,
  358. long_term_memory=f"知识库瘦身: {before} → {after} 条"
  359. )
  360. except Exception as e:
  361. logger.error(f"知识库瘦身失败: {e}")
  362. return ToolResult(
  363. title="❌ 瘦身失败",
  364. output=f"错误: {str(e)}",
  365. error=str(e)
  366. )
  367. # ==================== Resource 资源管理工具 ====================
  368. @tool(hidden_params=["context"])
  369. async def resource_save(
  370. resource_id: str,
  371. title: str,
  372. body: str,
  373. content_type: str = "text",
  374. secure_body: str = "",
  375. metadata: Optional[Dict[str, Any]] = None,
  376. submitted_by: str = "",
  377. context: Optional[ToolContext] = None,
  378. ) -> ToolResult:
  379. """
  380. 保存资源(代码片段、凭证、Cookie 等)
  381. Args:
  382. resource_id: 资源 ID(层级路径,如 "code/selenium/login" 或 "credentials/website_a")
  383. title: 资源标题
  384. body: 公开内容(明文存储,可搜索)
  385. content_type: 内容类型(text/code/credential/cookie)
  386. secure_body: 敏感内容(加密存储,需要组织密钥访问)
  387. metadata: 元数据(如 {"language": "python", "acquired_at": "2026-03-06T10:00:00Z"})
  388. submitted_by: 提交者
  389. context: 工具上下文
  390. Returns:
  391. 保存结果
  392. """
  393. try:
  394. payload = {
  395. "id": resource_id,
  396. "title": title,
  397. "body": body,
  398. "secure_body": secure_body,
  399. "content_type": content_type,
  400. "metadata": metadata or {},
  401. "submitted_by": submitted_by,
  402. }
  403. async with httpx.AsyncClient(timeout=30.0) as client:
  404. response = await client.post(f"{KNOWHUB_API}/api/resource", json=payload)
  405. response.raise_for_status()
  406. data = response.json()
  407. return ToolResult(
  408. title="✅ 资源已保存",
  409. output=f"资源 ID: {resource_id}\n类型: {content_type}\n标题: {title}",
  410. long_term_memory=f"保存资源: {resource_id} ({content_type})",
  411. metadata={"resource_id": resource_id}
  412. )
  413. except Exception as e:
  414. logger.error(f"保存资源失败: {e}")
  415. return ToolResult(
  416. title="❌ 保存失败",
  417. output=f"错误: {str(e)}",
  418. error=str(e)
  419. )
  420. @tool(hidden_params=["context"])
  421. async def resource_get(
  422. resource_id: str,
  423. org_key: Optional[str] = None,
  424. context: Optional[ToolContext] = None,
  425. ) -> ToolResult:
  426. """
  427. 获取资源内容
  428. Args:
  429. resource_id: 资源 ID(层级路径)
  430. org_key: 组织密钥(用于解密敏感内容,可选)
  431. context: 工具上下文
  432. Returns:
  433. 资源内容
  434. """
  435. try:
  436. headers = {}
  437. if org_key:
  438. headers["X-Org-Key"] = org_key
  439. async with httpx.AsyncClient(timeout=30.0) as client:
  440. response = await client.get(
  441. f"{KNOWHUB_API}/api/resource/{resource_id}",
  442. headers=headers
  443. )
  444. response.raise_for_status()
  445. data = response.json()
  446. output = f"资源 ID: {data['id']}\n"
  447. output += f"标题: {data['title']}\n"
  448. output += f"类型: {data['content_type']}\n"
  449. output += f"\n公开内容:\n{data['body']}\n"
  450. if data.get('secure_body'):
  451. output += f"\n敏感内容:\n{data['secure_body']}\n"
  452. return ToolResult(
  453. title=f"📦 {data['title']}",
  454. output=output,
  455. metadata=data
  456. )
  457. except Exception as e:
  458. logger.error(f"获取资源失败: {e}")
  459. return ToolResult(
  460. title="❌ 获取失败",
  461. output=f"错误: {str(e)}",
  462. error=str(e)
  463. )