|
|
@@ -1,17 +1,19 @@
|
|
|
"""
|
|
|
Context 压缩 — 两级压缩策略
|
|
|
|
|
|
-Level 1: GoalTree 过滤(确定性,零成本)
|
|
|
- - 跳过 completed/abandoned goals 的消息(信息已在 GoalTree summary 中)
|
|
|
- - 始终保留:system prompt、第一条 user message、当前 focus goal 的消息
|
|
|
+Level 1: Goal 完成压缩(确定性,零 LLM 成本)
|
|
|
+ - 对 completed/abandoned goals:保留 goal 工具消息,移除非 goal 工具消息
|
|
|
+ - 三种模式:none / on_complete / on_overflow
|
|
|
|
|
|
Level 2: LLM 总结(仅在 Level 1 后仍超限时触发)
|
|
|
- - 在消息列表末尾追加压缩 prompt → 主模型回复 → summary 存为新消息
|
|
|
- - summary 的 parent_sequence 跳过被压缩的范围
|
|
|
+ - 通过侧分支多轮 agent 模式压缩
|
|
|
+ - 压缩后重建 history 为:system prompt + 第一条 user message + summary
|
|
|
|
|
|
-压缩不修改存储:原始消息永远保留在 messages/,通过 parent_sequence 树结构实现跳过。
|
|
|
+压缩不修改存储:原始消息永远保留在 messages/,纯内存操作。
|
|
|
"""
|
|
|
|
|
|
+import copy
|
|
|
+import json
|
|
|
import logging
|
|
|
from dataclasses import dataclass
|
|
|
from typing import List, Dict, Any, Optional, Set
|
|
|
@@ -19,7 +21,6 @@ from typing import List, Dict, Any, Optional, Set
|
|
|
from .goal_models import GoalTree
|
|
|
from .models import Message
|
|
|
from agent.core.prompts import (
|
|
|
- COMPRESSION_EVAL_PROMPT_TEMPLATE,
|
|
|
REFLECT_PROMPT,
|
|
|
build_compression_eval_prompt,
|
|
|
)
|
|
|
@@ -99,84 +100,112 @@ class CompressionConfig:
|
|
|
return int(window * self.threshold_ratio)
|
|
|
|
|
|
|
|
|
-# ===== Level 1: GoalTree 过滤 =====
|
|
|
+# ===== Level 1: Goal 完成压缩 =====
|
|
|
|
|
|
-def filter_by_goal_status(
|
|
|
+def compress_completed_goals(
|
|
|
messages: List[Message],
|
|
|
goal_tree: Optional[GoalTree],
|
|
|
) -> List[Message]:
|
|
|
"""
|
|
|
- Level 1 过滤:跳过 completed/abandoned goals 的消息
|
|
|
+ Level 1 压缩:移除 completed/abandoned goals 的非 goal 工具消息
|
|
|
|
|
|
- 始终保留:
|
|
|
- - goal_id 为 None 的消息(system prompt、初始 user message)
|
|
|
- - 当前 focus goal 及其祖先链上的消息
|
|
|
- - in_progress 和 pending goals 的消息
|
|
|
+ 对每个 completed/abandoned goal:
|
|
|
+ - 保留:所有调用 goal 工具的 assistant 消息及其 tool result
|
|
|
+ - 移除:所有非 goal 工具的 assistant 消息及其 tool result
|
|
|
+ - 替换:goal(done=...) 的 tool result 内容为 "具体执行过程已清理"
|
|
|
+ - goal_id 为 None 的消息始终保留(system prompt、初始 user message)
|
|
|
+ - pending / in_progress goals 的消息不受影响
|
|
|
|
|
|
- 跳过:
|
|
|
- - completed 且不在焦点路径上的 goals 的消息
|
|
|
- - abandoned goals 的消息
|
|
|
+ 纯内存操作,不修改原始 Message 对象,不涉及持久化。
|
|
|
|
|
|
Args:
|
|
|
- messages: 主路径上的有序消息列表
|
|
|
+ messages: 主路径上的有序消息列表(Message 对象)
|
|
|
goal_tree: GoalTree 实例
|
|
|
|
|
|
Returns:
|
|
|
- 过滤后的消息列表
|
|
|
+ 压缩后的消息列表
|
|
|
"""
|
|
|
if not goal_tree or not goal_tree.goals:
|
|
|
return messages
|
|
|
|
|
|
- # 构建焦点路径(当前焦点 + 父链 + 直接子节点)
|
|
|
- focus_path = _get_focus_path(goal_tree)
|
|
|
+ # 收集 completed/abandoned goal IDs
|
|
|
+ completed_ids: Set[str] = {
|
|
|
+ g.id for g in goal_tree.goals
|
|
|
+ if g.status in ("completed", "abandoned")
|
|
|
+ }
|
|
|
+ if not completed_ids:
|
|
|
+ return messages
|
|
|
|
|
|
- # 构建需要跳过的 goal IDs
|
|
|
- skip_goal_ids: Set[str] = set()
|
|
|
- for goal in goal_tree.goals:
|
|
|
- if goal.id in focus_path:
|
|
|
- continue # 焦点路径上的 goal 始终保留
|
|
|
- if goal.status in ("completed", "abandoned"):
|
|
|
- skip_goal_ids.add(goal.id)
|
|
|
+ # Pass 1: 扫描 assistant 消息,分类 tool_call_ids
|
|
|
+ remove_seqs: Set[int] = set() # 要移除的 assistant 消息 sequence
|
|
|
+ remove_tc_ids: Set[str] = set() # 要移除的 tool result 的 tool_call_id
|
|
|
+ done_tc_ids: Set[str] = set() # goal(done=...) 的 tool_call_id(替换 tool result)
|
|
|
|
|
|
- # 过滤消息
|
|
|
- result = []
|
|
|
for msg in messages:
|
|
|
- if msg.goal_id is None:
|
|
|
- result.append(msg) # 无 goal 的消息始终保留
|
|
|
- elif msg.goal_id not in skip_goal_ids:
|
|
|
- result.append(msg) # 不在跳过列表中的消息保留
|
|
|
-
|
|
|
- return result
|
|
|
-
|
|
|
-
|
|
|
-def _get_focus_path(goal_tree: GoalTree) -> Set[str]:
|
|
|
- """
|
|
|
- 获取焦点路径上需要保留消息的 goal IDs
|
|
|
-
|
|
|
- 保留:焦点自身 + 父链 + 未完成的直接子节点
|
|
|
- 不保留:已完成/已放弃的直接子节点(信息已在 goal.summary 中)
|
|
|
- """
|
|
|
- focus_ids: Set[str] = set()
|
|
|
-
|
|
|
- if not goal_tree.current_id:
|
|
|
- return focus_ids
|
|
|
-
|
|
|
- # 焦点自身
|
|
|
- focus_ids.add(goal_tree.current_id)
|
|
|
-
|
|
|
- # 父链
|
|
|
- goal = goal_tree.find(goal_tree.current_id)
|
|
|
- while goal and goal.parent_id:
|
|
|
- focus_ids.add(goal.parent_id)
|
|
|
- goal = goal_tree.find(goal.parent_id)
|
|
|
+ if msg.goal_id not in completed_ids:
|
|
|
+ continue
|
|
|
+ if msg.role != "assistant":
|
|
|
+ continue
|
|
|
+
|
|
|
+ content = msg.content
|
|
|
+ tc_list = []
|
|
|
+ if isinstance(content, dict):
|
|
|
+ tc_list = content.get("tool_calls", [])
|
|
|
+
|
|
|
+ if not tc_list:
|
|
|
+ # 纯文本 assistant 消息(无工具调用),移除
|
|
|
+ remove_seqs.add(msg.sequence)
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 检查是否包含 goal 工具调用
|
|
|
+ has_goal_call = False
|
|
|
+ for tc in tc_list:
|
|
|
+ func_name = tc.get("function", {}).get("name", "")
|
|
|
+ if func_name == "goal":
|
|
|
+ has_goal_call = True
|
|
|
+ # 检查是否为 done 调用
|
|
|
+ args_str = tc.get("function", {}).get("arguments", "{}")
|
|
|
+ try:
|
|
|
+ args = json.loads(args_str) if isinstance(args_str, str) else (args_str or {})
|
|
|
+ except json.JSONDecodeError:
|
|
|
+ args = {}
|
|
|
+ if args.get("done") is not None:
|
|
|
+ tc_id = tc.get("id")
|
|
|
+ if tc_id:
|
|
|
+ done_tc_ids.add(tc_id)
|
|
|
+
|
|
|
+ if not has_goal_call:
|
|
|
+ # 不含 goal 工具调用 → 移除整条 assistant 及其所有 tool results
|
|
|
+ remove_seqs.add(msg.sequence)
|
|
|
+ for tc in tc_list:
|
|
|
+ tc_id = tc.get("id")
|
|
|
+ if tc_id:
|
|
|
+ remove_tc_ids.add(tc_id)
|
|
|
+
|
|
|
+ # 无需压缩
|
|
|
+ if not remove_seqs and not done_tc_ids:
|
|
|
+ return messages
|
|
|
|
|
|
- # 直接子节点:仅保留未完成的(completed/abandoned 的信息已在 summary 中)
|
|
|
- children = goal_tree.get_children(goal_tree.current_id)
|
|
|
- for child in children:
|
|
|
- if child.status not in ("completed", "abandoned"):
|
|
|
- focus_ids.add(child.id)
|
|
|
+ # Pass 2: 构建结果
|
|
|
+ result: List[Message] = []
|
|
|
+ for msg in messages:
|
|
|
+ # 跳过标记移除的 assistant 消息
|
|
|
+ if msg.sequence in remove_seqs:
|
|
|
+ continue
|
|
|
+ # 跳过标记移除的 tool result
|
|
|
+ if msg.role == "tool" and msg.tool_call_id in remove_tc_ids:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 替换 done 的 tool result 内容
|
|
|
+ if msg.role == "tool" and msg.tool_call_id in done_tc_ids:
|
|
|
+ modified = copy.copy(msg)
|
|
|
+ modified.content = {"tool_name": "goal", "result": "具体执行过程已清理"}
|
|
|
+ result.append(modified)
|
|
|
+ continue
|
|
|
+
|
|
|
+ result.append(msg)
|
|
|
|
|
|
- return focus_ids
|
|
|
+ return result
|
|
|
|
|
|
|
|
|
# ===== Token 估算 =====
|