|
|
@@ -47,7 +47,7 @@ async def save_knowledge(
|
|
|
Args:
|
|
|
scenario: 任务描述(在什么情景下 + 要完成什么目标 + 得到能达成一个什么结果)
|
|
|
content: 核心内容
|
|
|
- tags_type: 知识类型标签,可选:tool, usercase, definition, plan
|
|
|
+ tags_type: 知识类型标签,可选:tool, usercase, definition, plan, strategy
|
|
|
urls: 参考来源链接列表(论文/GitHub/博客等)
|
|
|
agent_id: 执行此调研的 agent ID
|
|
|
score: 初始评分 1-5(默认 3)
|
|
|
@@ -120,6 +120,7 @@ async def update_knowledge(
|
|
|
add_helpful_case: Optional[Dict[str, str]] = None,
|
|
|
add_harmful_case: Optional[Dict[str, str]] = None,
|
|
|
update_score: Optional[int] = None,
|
|
|
+ evolve_feedback: Optional[str] = None,
|
|
|
) -> ToolResult:
|
|
|
"""
|
|
|
更新已有的原子知识的评估反馈
|
|
|
@@ -129,19 +130,28 @@ async def update_knowledge(
|
|
|
add_helpful_case: 添加好用的案例 {"case_id": "...", "scenario": "...", "result": "...", "timestamp": "..."}
|
|
|
add_harmful_case: 添加不好用的案例 {"case_id": "...", "scenario": "...", "result": "...", "timestamp": "..."}
|
|
|
update_score: 更新评分(1-5)
|
|
|
+ evolve_feedback: 经验进化反馈(当提供时,会使用 LLM 重写知识内容)
|
|
|
|
|
|
Returns:
|
|
|
更新结果
|
|
|
"""
|
|
|
try:
|
|
|
- # 查找文件
|
|
|
+ # 查找文件(支持 JSON 和 MD 格式)
|
|
|
knowledge_dir = Path(".cache/knowledge_atoms")
|
|
|
- file_path = knowledge_dir / f"{knowledge_id}.md"
|
|
|
-
|
|
|
- if not file_path.exists():
|
|
|
+ json_path = knowledge_dir / f"{knowledge_id}.json"
|
|
|
+ md_path = knowledge_dir / f"{knowledge_id}.md"
|
|
|
+
|
|
|
+ file_path = None
|
|
|
+ if json_path.exists():
|
|
|
+ file_path = json_path
|
|
|
+ is_json = True
|
|
|
+ elif md_path.exists():
|
|
|
+ file_path = md_path
|
|
|
+ is_json = False
|
|
|
+ else:
|
|
|
return ToolResult(
|
|
|
title="❌ 文件不存在",
|
|
|
- output=f"未找到知识文件: {file_path}",
|
|
|
+ output=f"未找到知识文件: {knowledge_id}",
|
|
|
error="文件不存在"
|
|
|
)
|
|
|
|
|
|
@@ -149,78 +159,50 @@ async def update_knowledge(
|
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
|
content = f.read()
|
|
|
|
|
|
+ # 解析数据
|
|
|
+ if is_json:
|
|
|
+ data = json.loads(content)
|
|
|
+ else:
|
|
|
+ # 解析 YAML frontmatter
|
|
|
+ yaml_match = re.search(r'^---\n(.*?)\n---', content, re.DOTALL)
|
|
|
+ if not yaml_match:
|
|
|
+ return ToolResult(
|
|
|
+ title="❌ 格式错误",
|
|
|
+ output=f"无法解析知识文件格式: {file_path}",
|
|
|
+ error="格式错误"
|
|
|
+ )
|
|
|
+ data = yaml.safe_load(yaml_match.group(1))
|
|
|
+
|
|
|
# 更新内容
|
|
|
updated = False
|
|
|
- import re
|
|
|
+ summary = []
|
|
|
|
|
|
if add_helpful_case:
|
|
|
- # 增加 helpful 计数
|
|
|
- helpful_match = re.search(r"helpful: (\d+)", content)
|
|
|
- current_helpful = int(helpful_match.group(1)) if helpful_match else 0
|
|
|
- content = re.sub(
|
|
|
- r"helpful: \d+",
|
|
|
- f"helpful: {current_helpful + 1}",
|
|
|
- content
|
|
|
- )
|
|
|
-
|
|
|
- # 添加案例到 helpful_history
|
|
|
- case_yaml = f""" - case_id: {add_helpful_case.get('case_id', 'unknown')}
|
|
|
- scenario: "{add_helpful_case.get('scenario', '')}"
|
|
|
- result: "{add_helpful_case.get('result', '')}"
|
|
|
- timestamp: {add_helpful_case.get('timestamp', datetime.now().isoformat())}"""
|
|
|
-
|
|
|
- if "helpful_history: []" in content:
|
|
|
- content = content.replace(
|
|
|
- "helpful_history: []",
|
|
|
- f"helpful_history:\n{case_yaml}"
|
|
|
- )
|
|
|
- else:
|
|
|
- # 在 helpful_history 后追加
|
|
|
- content = re.sub(
|
|
|
- r"(helpful_history:.*?)(\n harmful)",
|
|
|
- f"\\1\n{case_yaml}\\2",
|
|
|
- content,
|
|
|
- flags=re.DOTALL
|
|
|
- )
|
|
|
+ data["eval"]["helpful"] += 1
|
|
|
+ data["eval"]["helpful_history"].append(add_helpful_case)
|
|
|
+ data["metrics"]["helpful"] += 1
|
|
|
+ summary.append(f"添加 helpful 案例: {add_helpful_case.get('case_id')}")
|
|
|
updated = True
|
|
|
|
|
|
if add_harmful_case:
|
|
|
- # 增加 harmful 计数
|
|
|
- harmful_match = re.search(r"harmful: (\d+)", content)
|
|
|
- current_harmful = int(harmful_match.group(1)) if harmful_match else 0
|
|
|
- content = re.sub(
|
|
|
- r"harmful: \d+",
|
|
|
- f"harmful: {current_harmful + 1}",
|
|
|
- content
|
|
|
- )
|
|
|
-
|
|
|
- # 添加案例到 harmful_history
|
|
|
- case_yaml = f""" - case_id: {add_harmful_case.get('case_id', 'unknown')}
|
|
|
- scenario: "{add_harmful_case.get('scenario', '')}"
|
|
|
- result: "{add_harmful_case.get('result', '')}"
|
|
|
- timestamp: {add_harmful_case.get('timestamp', datetime.now().isoformat())}"""
|
|
|
-
|
|
|
- if "harmful_history: []" in content:
|
|
|
- content = content.replace(
|
|
|
- "harmful_history: []",
|
|
|
- f"harmful_history:\n{case_yaml}"
|
|
|
- )
|
|
|
- else:
|
|
|
- # 在 harmful_history 后追加
|
|
|
- content = re.sub(
|
|
|
- r"(harmful_history:.*?)(\nmetrics)",
|
|
|
- f"\\1\n{case_yaml}\\2",
|
|
|
- content,
|
|
|
- flags=re.DOTALL
|
|
|
- )
|
|
|
+ data["eval"]["harmful"] += 1
|
|
|
+ data["eval"]["harmful_history"].append(add_harmful_case)
|
|
|
+ data["metrics"]["harmful"] += 1
|
|
|
+ summary.append(f"添加 harmful 案例: {add_harmful_case.get('case_id')}")
|
|
|
updated = True
|
|
|
|
|
|
if update_score is not None:
|
|
|
- content = re.sub(
|
|
|
- r"score: \d+",
|
|
|
- f"score: {update_score}",
|
|
|
- content
|
|
|
- )
|
|
|
+ data["eval"]["score"] = update_score
|
|
|
+ summary.append(f"更新评分: {update_score}")
|
|
|
+ updated = True
|
|
|
+
|
|
|
+ # 经验进化机制
|
|
|
+ if evolve_feedback:
|
|
|
+ old_content = data.get("content", "")
|
|
|
+ evolved_content = await _evolve_knowledge_with_llm(old_content, evolve_feedback)
|
|
|
+ data["content"] = evolved_content
|
|
|
+ data["metrics"]["helpful"] += 1
|
|
|
+ summary.append(f"知识进化: 基于反馈重写内容")
|
|
|
updated = True
|
|
|
|
|
|
if not updated:
|
|
|
@@ -230,17 +212,18 @@ async def update_knowledge(
|
|
|
long_term_memory="尝试更新原子知识但未指定更新内容"
|
|
|
)
|
|
|
|
|
|
- # 保存更新
|
|
|
- with open(file_path, "w", encoding="utf-8") as f:
|
|
|
- f.write(content)
|
|
|
+ # 更新时间戳
|
|
|
+ data["updated_at"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
|
|
- summary = []
|
|
|
- if add_helpful_case:
|
|
|
- summary.append(f"添加 helpful 案例: {add_helpful_case.get('case_id')}")
|
|
|
- if add_harmful_case:
|
|
|
- summary.append(f"添加 harmful 案例: {add_harmful_case.get('case_id')}")
|
|
|
- if update_score:
|
|
|
- summary.append(f"更新评分: {update_score}")
|
|
|
+ # 保存更新
|
|
|
+ if is_json:
|
|
|
+ with open(file_path, "w", encoding="utf-8") as f:
|
|
|
+ json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
+ else:
|
|
|
+ # 重新生成 YAML frontmatter
|
|
|
+ meta_str = yaml.dump(data, allow_unicode=True).strip()
|
|
|
+ with open(file_path, "w", encoding="utf-8") as f:
|
|
|
+ f.write(f"---\n{meta_str}\n---\n")
|
|
|
|
|
|
return ToolResult(
|
|
|
title="✅ 原子知识已更新",
|
|
|
@@ -374,13 +357,111 @@ async def _route_knowledge_by_llm(query_text: str, metadata_list: List[Dict], k:
|
|
|
return []
|
|
|
|
|
|
|
|
|
-async def _get_structured_knowledge(query_text: str, top_k: int = 5, min_score: int = 3) -> List[Dict]:
|
|
|
+async def _evolve_knowledge_with_llm(old_content: str, feedback: str) -> str:
|
|
|
+ """
|
|
|
+ 使用 LLM 进行知识进化重写(类似经验进化机制)
|
|
|
+ """
|
|
|
+ prompt = f"""你是一个 AI Agent 知识库管理员。请根据反馈建议,对现有的知识内容进行重写进化。
|
|
|
+
|
|
|
+【原知识内容】:
|
|
|
+{old_content}
|
|
|
+
|
|
|
+【实战反馈建议】:
|
|
|
+{feedback}
|
|
|
+
|
|
|
+【重写要求】:
|
|
|
+1. 融合知识:将反馈中的避坑指南、新参数或修正后的选择逻辑融入原知识,使其更具通用性和准确性。
|
|
|
+2. 保持结构:如果原内容有特定格式(如 Markdown、代码示例等),请保持该格式。
|
|
|
+3. 语言:简洁直接,使用中文。
|
|
|
+4. 禁止:严禁输出任何开场白、解释语或额外的 Markdown 标题,直接返回重写后的正文。
|
|
|
+"""
|
|
|
+ try:
|
|
|
+ response = await openrouter_llm_call(
|
|
|
+ messages=[{"role": "user", "content": prompt}],
|
|
|
+ model="google/gemini-2.0-flash-001"
|
|
|
+ )
|
|
|
+
|
|
|
+ evolved_content = response.get("content", "").strip()
|
|
|
+
|
|
|
+ # 简单安全校验:如果 LLM 返回太短或为空,回退到原内容+追加
|
|
|
+ if len(evolved_content) < 5:
|
|
|
+ raise ValueError("LLM output too short")
|
|
|
+
|
|
|
+ return evolved_content
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.warning(f"知识进化失败,采用追加模式回退: {e}")
|
|
|
+ timestamp = datetime.now().strftime('%Y-%m-%d')
|
|
|
+ return f"{old_content}\n\n---\n[Update {timestamp}]: {feedback}"
|
|
|
+
|
|
|
+
|
|
|
+async def _route_knowledge_by_llm(query_text: str, metadata_list: List[Dict], k: int = 5) -> List[str]:
|
|
|
+ """
|
|
|
+ 第一阶段:语义路由。
|
|
|
+ 让 LLM 挑选出 2*k 个语义相关的 ID。
|
|
|
+ """
|
|
|
+ if not metadata_list:
|
|
|
+ return []
|
|
|
+
|
|
|
+ # 扩大筛选范围到 2*k
|
|
|
+ routing_k = k * 2
|
|
|
+
|
|
|
+ routing_data = [
|
|
|
+ {
|
|
|
+ "id": m["id"],
|
|
|
+ "tags": m["tags"],
|
|
|
+ "scenario": m["scenario"][:100] # 只取前100字符
|
|
|
+ } for m in metadata_list
|
|
|
+ ]
|
|
|
+
|
|
|
+ prompt = f"""
|
|
|
+你是一个知识检索专家。根据用户的当前任务需求,从下列原子知识元数据中挑选出最相关的最多 {routing_k} 个知识 ID。
|
|
|
+任务需求:"{query_text}"
|
|
|
+
|
|
|
+可选知识列表:
|
|
|
+{json.dumps(routing_data, ensure_ascii=False, indent=1)}
|
|
|
+
|
|
|
+请直接输出 ID 列表,用逗号分隔(例如: research-20260302-001, research-20260302-002)。若无相关项请输出 "None"。
|
|
|
+"""
|
|
|
+
|
|
|
+ try:
|
|
|
+ print(f"\n[Step 1: 知识语义路由] 任务: '{query_text}' | 候选总数: {len(metadata_list)} | 目标提取数: {routing_k}")
|
|
|
+
|
|
|
+ response = await openrouter_llm_call(
|
|
|
+ messages=[{"role": "user", "content": prompt}],
|
|
|
+ model="google/gemini-2.0-flash-001"
|
|
|
+ )
|
|
|
+
|
|
|
+ content = response.get("content", "").strip()
|
|
|
+ selected_ids = [idx.strip() for idx in re.split(r'[,\s]+', content) if idx.strip().startswith("research-")]
|
|
|
+
|
|
|
+ print(f"[Step 1: 知识语义路由] LLM 初选 ID ({len(selected_ids)}个): {selected_ids}")
|
|
|
+ return selected_ids
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"LLM 知识路由失败: {e}")
|
|
|
+ return []
|
|
|
+
|
|
|
+
|
|
|
+async def _get_structured_knowledge(
|
|
|
+ query_text: str,
|
|
|
+ top_k: int = 5,
|
|
|
+ min_score: int = 3,
|
|
|
+ context: Optional[Any] = None,
|
|
|
+ tags_filter: Optional[List[str]] = None
|
|
|
+) -> List[Dict]:
|
|
|
"""
|
|
|
- 语义检索原子知识
|
|
|
+ 语义检索原子知识(包括经验)
|
|
|
|
|
|
1. 解析知识库文件(支持 JSON 和 YAML 格式)
|
|
|
2. 语义路由:提取 2*k 个 ID
|
|
|
3. 质量精排:基于评分筛选出最终的 k 个
|
|
|
+
|
|
|
+ Args:
|
|
|
+ query_text: 查询文本
|
|
|
+ top_k: 返回数量
|
|
|
+ min_score: 最低评分过滤
|
|
|
+ context: 上下文(兼容 experience 接口)
|
|
|
+ tags_filter: 标签过滤(如 ["strategy"] 只返回经验)
|
|
|
"""
|
|
|
knowledge_dir = Path(".cache/knowledge_atoms")
|
|
|
|
|
|
@@ -431,9 +512,19 @@ async def _get_structured_knowledge(query_text: str, top_k: int = 5, min_score:
|
|
|
scenario = metadata.get("scenario", "").strip()
|
|
|
content_text = metadata.get("content", "").strip()
|
|
|
|
|
|
+ # 标签过滤
|
|
|
+ tags = metadata.get("tags", {})
|
|
|
+ if tags_filter:
|
|
|
+ # 检查 tags.type 是否包含任何过滤标签
|
|
|
+ tag_types = tags.get("type", [])
|
|
|
+ if isinstance(tag_types, str):
|
|
|
+ tag_types = [tag_types]
|
|
|
+ if not any(tag in tag_types for tag in tags_filter):
|
|
|
+ continue # 跳过不匹配的标签
|
|
|
+
|
|
|
meta_item = {
|
|
|
"id": kid,
|
|
|
- "tags": metadata.get("tags", {}),
|
|
|
+ "tags": tags,
|
|
|
"scenario": scenario,
|
|
|
"score": metadata.get("eval", {}).get("score", 3),
|
|
|
"helpful": metadata.get("metrics", {}).get("helpful", 0),
|
|
|
@@ -557,3 +648,467 @@ async def search_knowledge(
|
|
|
output=f"错误: {str(e)}",
|
|
|
error=str(e)
|
|
|
)
|
|
|
+
|
|
|
+
|
|
|
+# ===== 批量更新功能(类似经验机制)=====
|
|
|
+
|
|
|
+async def _batch_update_knowledge(
|
|
|
+ update_map: Dict[str, Dict[str, Any]],
|
|
|
+ context: Optional[Any] = None
|
|
|
+) -> int:
|
|
|
+ """
|
|
|
+ 内部函数:批量更新知识(兼容 experience 接口)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ update_map: 更新映射 {knowledge_id: {"action": "helpful/harmful/evolve", "feedback": "..."}}
|
|
|
+ context: 上下文(兼容 experience 接口)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 成功更新的数量
|
|
|
+ """
|
|
|
+ if not update_map:
|
|
|
+ return 0
|
|
|
+
|
|
|
+ knowledge_dir = Path(".cache/knowledge_atoms")
|
|
|
+ if not knowledge_dir.exists():
|
|
|
+ return 0
|
|
|
+
|
|
|
+ success_count = 0
|
|
|
+ evolution_tasks = []
|
|
|
+ evolution_registry = {} # task_idx -> (file_path, data)
|
|
|
+
|
|
|
+ for knowledge_id, instr in update_map.items():
|
|
|
+ try:
|
|
|
+ # 查找文件
|
|
|
+ json_path = knowledge_dir / f"{knowledge_id}.json"
|
|
|
+ md_path = knowledge_dir / f"{knowledge_id}.md"
|
|
|
+
|
|
|
+ file_path = None
|
|
|
+ is_json = False
|
|
|
+ if json_path.exists():
|
|
|
+ file_path = json_path
|
|
|
+ is_json = True
|
|
|
+ elif md_path.exists():
|
|
|
+ file_path = md_path
|
|
|
+ is_json = False
|
|
|
+ else:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 读取并解析
|
|
|
+ with open(file_path, "r", encoding="utf-8") as f:
|
|
|
+ content = f.read()
|
|
|
+
|
|
|
+ if is_json:
|
|
|
+ data = json.loads(content)
|
|
|
+ else:
|
|
|
+ yaml_match = re.search(r'^---\n(.*?)\n---', content, re.DOTALL)
|
|
|
+ if not yaml_match:
|
|
|
+ continue
|
|
|
+ data = yaml.safe_load(yaml_match.group(1))
|
|
|
+
|
|
|
+ # 更新 metrics
|
|
|
+ action = instr.get("action")
|
|
|
+ feedback = instr.get("feedback", "")
|
|
|
+
|
|
|
+ # 处理 mixed 中间态
|
|
|
+ if action == "mixed":
|
|
|
+ data["metrics"]["helpful"] = data.get("metrics", {}).get("helpful", 0) + 1
|
|
|
+ action = "evolve"
|
|
|
+
|
|
|
+ if action == "helpful":
|
|
|
+ data["metrics"]["helpful"] = data.get("metrics", {}).get("helpful", 0) + 1
|
|
|
+ elif action == "harmful":
|
|
|
+ data["metrics"]["harmful"] = data.get("metrics", {}).get("harmful", 0) + 1
|
|
|
+ elif action == "evolve" and feedback:
|
|
|
+ # 注册进化任务
|
|
|
+ old_content = data.get("content", "")
|
|
|
+ task = _evolve_knowledge_with_llm(old_content, feedback)
|
|
|
+ evolution_tasks.append(task)
|
|
|
+ evolution_registry[len(evolution_tasks) - 1] = (file_path, data, is_json)
|
|
|
+ data["metrics"]["helpful"] = data.get("metrics", {}).get("helpful", 0) + 1
|
|
|
+
|
|
|
+ data["updated_at"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
|
+
|
|
|
+ # 如果不需要进化,直接保存
|
|
|
+ if action != "evolve" or not feedback:
|
|
|
+ if is_json:
|
|
|
+ with open(file_path, "w", encoding="utf-8") as f:
|
|
|
+ json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
+ else:
|
|
|
+ meta_str = yaml.dump(data, allow_unicode=True).strip()
|
|
|
+ with open(file_path, "w", encoding="utf-8") as f:
|
|
|
+ f.write(f"---\n{meta_str}\n---\n")
|
|
|
+ success_count += 1
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"更新知识失败 {knowledge_id}: {e}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 并发进化
|
|
|
+ if evolution_tasks:
|
|
|
+ import asyncio
|
|
|
+ print(f"🧬 并发处理 {len(evolution_tasks)} 条知识进化...")
|
|
|
+ evolved_results = await asyncio.gather(*evolution_tasks)
|
|
|
+
|
|
|
+ # 回填进化结果
|
|
|
+ for task_idx, (file_path, data, is_json) in evolution_registry.items():
|
|
|
+ data["content"] = evolved_results[task_idx].strip()
|
|
|
+
|
|
|
+ if is_json:
|
|
|
+ with open(file_path, "w", encoding="utf-8") as f:
|
|
|
+ json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
+ else:
|
|
|
+ meta_str = yaml.dump(data, allow_unicode=True).strip()
|
|
|
+ with open(file_path, "w", encoding="utf-8") as f:
|
|
|
+ f.write(f"---\n{meta_str}\n---\n")
|
|
|
+ success_count += 1
|
|
|
+
|
|
|
+ return success_count
|
|
|
+
|
|
|
+
|
|
|
+@tool()
|
|
|
+async def batch_update_knowledge(
|
|
|
+ feedback_list: List[Dict[str, Any]],
|
|
|
+ context: Optional[ToolContext] = None,
|
|
|
+) -> ToolResult:
|
|
|
+ """
|
|
|
+ 批量反馈知识的有效性(类似经验机制)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ feedback_list: 评价列表,每个元素包含:
|
|
|
+ - knowledge_id: (str) 知识 ID
|
|
|
+ - is_effective: (bool) 是否有效
|
|
|
+ - feedback: (str, optional) 改进建议,若有效且有建议则触发知识进化
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 批量更新结果
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ if not feedback_list:
|
|
|
+ return ToolResult(
|
|
|
+ title="⚠️ 反馈列表为空",
|
|
|
+ output="未提供任何反馈",
|
|
|
+ long_term_memory="批量更新知识: 反馈列表为空"
|
|
|
+ )
|
|
|
+
|
|
|
+ knowledge_dir = Path(".cache/knowledge_atoms")
|
|
|
+ if not knowledge_dir.exists():
|
|
|
+ return ToolResult(
|
|
|
+ title="❌ 知识库不存在",
|
|
|
+ output="知识库目录不存在",
|
|
|
+ error="知识库不存在"
|
|
|
+ )
|
|
|
+
|
|
|
+ success_count = 0
|
|
|
+ failed_items = []
|
|
|
+
|
|
|
+ for item in feedback_list:
|
|
|
+ knowledge_id = item.get("knowledge_id")
|
|
|
+ is_effective = item.get("is_effective")
|
|
|
+ feedback = item.get("feedback", "")
|
|
|
+
|
|
|
+ if not knowledge_id:
|
|
|
+ failed_items.append({"id": "unknown", "reason": "缺少 knowledge_id"})
|
|
|
+ continue
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 查找文件
|
|
|
+ json_path = knowledge_dir / f"{knowledge_id}.json"
|
|
|
+ md_path = knowledge_dir / f"{knowledge_id}.md"
|
|
|
+
|
|
|
+ file_path = None
|
|
|
+ is_json = False
|
|
|
+ if json_path.exists():
|
|
|
+ file_path = json_path
|
|
|
+ is_json = True
|
|
|
+ elif md_path.exists():
|
|
|
+ file_path = md_path
|
|
|
+ is_json = False
|
|
|
+ else:
|
|
|
+ failed_items.append({"id": knowledge_id, "reason": "文件不存在"})
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 读取并解析
|
|
|
+ with open(file_path, "r", encoding="utf-8") as f:
|
|
|
+ content = f.read()
|
|
|
+
|
|
|
+ if is_json:
|
|
|
+ data = json.loads(content)
|
|
|
+ else:
|
|
|
+ yaml_match = re.search(r'^---\n(.*?)\n---', content, re.DOTALL)
|
|
|
+ if not yaml_match:
|
|
|
+ failed_items.append({"id": knowledge_id, "reason": "格式错误"})
|
|
|
+ continue
|
|
|
+ data = yaml.safe_load(yaml_match.group(1))
|
|
|
+
|
|
|
+ # 更新 metrics
|
|
|
+ if is_effective:
|
|
|
+ data["metrics"]["helpful"] = data.get("metrics", {}).get("helpful", 0) + 1
|
|
|
+ # 如果有反馈建议,触发进化
|
|
|
+ if feedback:
|
|
|
+ old_content = data.get("content", "")
|
|
|
+ evolved_content = await _evolve_knowledge_with_llm(old_content, feedback)
|
|
|
+ data["content"] = evolved_content
|
|
|
+ else:
|
|
|
+ data["metrics"]["harmful"] = data.get("metrics", {}).get("harmful", 0) + 1
|
|
|
+
|
|
|
+ data["updated_at"] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
|
+
|
|
|
+ # 保存
|
|
|
+ if is_json:
|
|
|
+ with open(file_path, "w", encoding="utf-8") as f:
|
|
|
+ json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
+ else:
|
|
|
+ meta_str = yaml.dump(data, allow_unicode=True).strip()
|
|
|
+ with open(file_path, "w", encoding="utf-8") as f:
|
|
|
+ f.write(f"---\n{meta_str}\n---\n")
|
|
|
+
|
|
|
+ success_count += 1
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ failed_items.append({"id": knowledge_id, "reason": str(e)})
|
|
|
+ continue
|
|
|
+
|
|
|
+ output_lines = [f"成功更新 {success_count} 条知识"]
|
|
|
+ if failed_items:
|
|
|
+ output_lines.append(f"\n失败 {len(failed_items)} 条:")
|
|
|
+ for item in failed_items:
|
|
|
+ output_lines.append(f" - {item['id']}: {item['reason']}")
|
|
|
+
|
|
|
+ return ToolResult(
|
|
|
+ title="✅ 批量更新完成",
|
|
|
+ output="\n".join(output_lines),
|
|
|
+ long_term_memory=f"批量更新知识: 成功 {success_count} 条,失败 {len(failed_items)} 条"
|
|
|
+ )
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"批量更新知识失败: {e}")
|
|
|
+ return ToolResult(
|
|
|
+ title="❌ 批量更新失败",
|
|
|
+ output=f"错误: {str(e)}",
|
|
|
+ error=str(e)
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+# ===== 知识库瘦身功能(类似经验机制)=====
|
|
|
+
|
|
|
+@tool()
|
|
|
+async def slim_knowledge(
|
|
|
+ model: str = "anthropic/claude-sonnet-4.5",
|
|
|
+ context: Optional[ToolContext] = None,
|
|
|
+) -> ToolResult:
|
|
|
+ """
|
|
|
+ 知识库瘦身:调用顶级大模型,将知识库中语义相似的知识合并精简
|
|
|
+
|
|
|
+ Args:
|
|
|
+ model: 使用的模型(默认 claude-sonnet-4.5)
|
|
|
+ context: 工具上下文
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 瘦身结果报告
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ knowledge_dir = Path(".cache/knowledge_atoms")
|
|
|
+
|
|
|
+ if not knowledge_dir.exists():
|
|
|
+ return ToolResult(
|
|
|
+ title="📂 知识库不存在",
|
|
|
+ output="知识库目录不存在,无需瘦身",
|
|
|
+ long_term_memory="知识库瘦身: 目录不存在"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 获取所有文件
|
|
|
+ json_files = list(knowledge_dir.glob("*.json"))
|
|
|
+ md_files = list(knowledge_dir.glob("*.md"))
|
|
|
+ files = json_files + md_files
|
|
|
+
|
|
|
+ if len(files) < 2:
|
|
|
+ return ToolResult(
|
|
|
+ title="📂 知识库过小",
|
|
|
+ output=f"知识库仅有 {len(files)} 条,无需瘦身",
|
|
|
+ long_term_memory=f"知识库瘦身: 仅有 {len(files)} 条"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 解析所有知识
|
|
|
+ parsed = []
|
|
|
+ for file_path in files:
|
|
|
+ try:
|
|
|
+ with open(file_path, "r", encoding="utf-8") as f:
|
|
|
+ content = f.read()
|
|
|
+
|
|
|
+ if file_path.suffix == ".json":
|
|
|
+ data = json.loads(content)
|
|
|
+ else:
|
|
|
+ yaml_match = re.search(r'^---\n(.*?)\n---', content, re.DOTALL)
|
|
|
+ if not yaml_match:
|
|
|
+ continue
|
|
|
+ data = yaml.safe_load(yaml_match.group(1))
|
|
|
+
|
|
|
+ parsed.append({
|
|
|
+ "file_path": file_path,
|
|
|
+ "data": data,
|
|
|
+ "is_json": file_path.suffix == ".json"
|
|
|
+ })
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"解析文件失败 {file_path}: {e}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ if len(parsed) < 2:
|
|
|
+ return ToolResult(
|
|
|
+ title="📂 有效知识过少",
|
|
|
+ output=f"有效知识仅有 {len(parsed)} 条,无需瘦身",
|
|
|
+ long_term_memory=f"知识库瘦身: 有效知识 {len(parsed)} 条"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 构造发给大模型的内容
|
|
|
+ entries_text = ""
|
|
|
+ for p in parsed:
|
|
|
+ data = p["data"]
|
|
|
+ entries_text += f"[ID: {data.get('id')}] [Tags: {data.get('tags', {})}] "
|
|
|
+ entries_text += f"[Metrics: {data.get('metrics', {})}] [Score: {data.get('eval', {}).get('score', 3)}]\n"
|
|
|
+ entries_text += f"Scenario: {data.get('scenario', 'N/A')}\n"
|
|
|
+ entries_text += f"Content: {data.get('content', '')[:200]}...\n\n"
|
|
|
+
|
|
|
+ prompt = f"""你是一个 AI Agent 知识库管理员。以下是当前知识库的全部条目,请执行瘦身操作:
|
|
|
+
|
|
|
+【任务】:
|
|
|
+1. 识别语义高度相似或重复的知识,将它们合并为一条更精炼、更通用的知识。
|
|
|
+2. 合并时保留 helpful 最高的那条的 ID 和 metrics(metrics 中 helpful/harmful 取各条之和)。
|
|
|
+3. 对于独立的、无重复的知识,保持原样不动。
|
|
|
+4. 保持原有的知识结构和格式。
|
|
|
+
|
|
|
+【当前知识库】:
|
|
|
+{entries_text}
|
|
|
+
|
|
|
+【输出格式要求】:
|
|
|
+严格按以下格式输出每条知识,条目之间用 === 分隔:
|
|
|
+ID: <保留的id>
|
|
|
+TAGS: <yaml格式的tags>
|
|
|
+METRICS: <yaml格式的metrics>
|
|
|
+SCORE: <评分>
|
|
|
+SCENARIO: <场景描述>
|
|
|
+CONTENT: <合并后的知识内容>
|
|
|
+===
|
|
|
+
|
|
|
+最后一行输出合并报告,格式:
|
|
|
+REPORT: 原有 X 条,合并后 Y 条,精简了 Z 条。
|
|
|
+
|
|
|
+禁止输出任何开场白或解释。"""
|
|
|
+
|
|
|
+ print(f"\n[知识瘦身] 正在调用 {model} 分析 {len(parsed)} 条知识...")
|
|
|
+ response = await openrouter_llm_call(
|
|
|
+ messages=[{"role": "user", "content": prompt}],
|
|
|
+ model=model
|
|
|
+ )
|
|
|
+ content = response.get("content", "").strip()
|
|
|
+ if not content:
|
|
|
+ return ToolResult(
|
|
|
+ title="❌ 大模型返回为空",
|
|
|
+ output="大模型返回为空,瘦身失败",
|
|
|
+ error="大模型返回为空"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 解析大模型输出
|
|
|
+ report_line = ""
|
|
|
+ new_entries = []
|
|
|
+ blocks = [b.strip() for b in content.split("===") if b.strip()]
|
|
|
+
|
|
|
+ for block in blocks:
|
|
|
+ if block.startswith("REPORT:"):
|
|
|
+ report_line = block
|
|
|
+ continue
|
|
|
+
|
|
|
+ lines = block.split("\n")
|
|
|
+ kid, tags, metrics, score, scenario, content_lines = None, {}, {}, 3, "", []
|
|
|
+ current_field = None
|
|
|
+
|
|
|
+ for line in lines:
|
|
|
+ if line.startswith("ID:"):
|
|
|
+ kid = line[3:].strip()
|
|
|
+ current_field = None
|
|
|
+ elif line.startswith("TAGS:"):
|
|
|
+ try:
|
|
|
+ tags = yaml.safe_load(line[5:].strip()) or {}
|
|
|
+ except Exception:
|
|
|
+ tags = {}
|
|
|
+ current_field = None
|
|
|
+ elif line.startswith("METRICS:"):
|
|
|
+ try:
|
|
|
+ metrics = yaml.safe_load(line[8:].strip()) or {}
|
|
|
+ except Exception:
|
|
|
+ metrics = {"helpful": 0, "harmful": 0}
|
|
|
+ current_field = None
|
|
|
+ elif line.startswith("SCORE:"):
|
|
|
+ try:
|
|
|
+ score = int(line[6:].strip())
|
|
|
+ except Exception:
|
|
|
+ score = 3
|
|
|
+ current_field = None
|
|
|
+ elif line.startswith("SCENARIO:"):
|
|
|
+ scenario = line[9:].strip()
|
|
|
+ current_field = "scenario"
|
|
|
+ elif line.startswith("CONTENT:"):
|
|
|
+ content_lines.append(line[8:].strip())
|
|
|
+ current_field = "content"
|
|
|
+ elif current_field == "scenario":
|
|
|
+ scenario += "\n" + line
|
|
|
+ elif current_field == "content":
|
|
|
+ content_lines.append(line)
|
|
|
+
|
|
|
+ if kid and content_lines:
|
|
|
+ new_data = {
|
|
|
+ "id": kid,
|
|
|
+ "tags": tags,
|
|
|
+ "scenario": scenario,
|
|
|
+ "content": "\n".join(content_lines).strip(),
|
|
|
+ "metrics": metrics,
|
|
|
+ "eval": {
|
|
|
+ "score": score,
|
|
|
+ "helpful": 0,
|
|
|
+ "harmful": 0,
|
|
|
+ "helpful_history": [],
|
|
|
+ "harmful_history": []
|
|
|
+ },
|
|
|
+ "updated_at": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
|
|
+ }
|
|
|
+ new_entries.append(new_data)
|
|
|
+
|
|
|
+ if not new_entries:
|
|
|
+ return ToolResult(
|
|
|
+ title="❌ 解析失败",
|
|
|
+ output="解析大模型输出失败,知识库未修改",
|
|
|
+ error="解析失败"
|
|
|
+ )
|
|
|
+
|
|
|
+ # 删除旧文件
|
|
|
+ for p in parsed:
|
|
|
+ try:
|
|
|
+ p["file_path"].unlink()
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"删除旧文件失败 {p['file_path']}: {e}")
|
|
|
+
|
|
|
+ # 写入新文件(统一使用 JSON 格式)
|
|
|
+ for data in new_entries:
|
|
|
+ file_path = knowledge_dir / f"{data['id']}.json"
|
|
|
+ with open(file_path, "w", encoding="utf-8") as f:
|
|
|
+ json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
+
|
|
|
+ result = f"瘦身完成:{len(parsed)} → {len(new_entries)} 条知识"
|
|
|
+ if report_line:
|
|
|
+ result += f"\n{report_line}"
|
|
|
+
|
|
|
+ print(f"[知识瘦身] {result}")
|
|
|
+ return ToolResult(
|
|
|
+ title="✅ 知识库瘦身完成",
|
|
|
+ output=result,
|
|
|
+ long_term_memory=f"知识库瘦身: {len(parsed)} → {len(new_entries)} 条"
|
|
|
+ )
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"知识库瘦身失败: {e}")
|
|
|
+ return ToolResult(
|
|
|
+ title="❌ 瘦身失败",
|
|
|
+ output=f"错误: {str(e)}",
|
|
|
+ error=str(e)
|
|
|
+ )
|
|
|
+
|