|
@@ -1101,18 +1101,15 @@ class AgentRunner:
|
|
|
# 构建 LLM messages(注入上下文)
|
|
# 构建 LLM messages(注入上下文)
|
|
|
llm_messages = list(history)
|
|
llm_messages = list(history)
|
|
|
|
|
|
|
|
- # 收集需要持久化的 system 消息
|
|
|
|
|
- system_messages_to_persist = []
|
|
|
|
|
-
|
|
|
|
|
- # 研究流程引导(仅在启用且处于研究阶段时)
|
|
|
|
|
- research_state = self._get_research_state(trace_id)
|
|
|
|
|
- if research_state and research_state["stage"] != "execution":
|
|
|
|
|
- research_guide = self._build_research_guide(research_state)
|
|
|
|
|
- if research_guide:
|
|
|
|
|
- system_msg = {"role": "system", "content": research_guide}
|
|
|
|
|
- llm_messages.append(system_msg)
|
|
|
|
|
- system_messages_to_persist.append(("研究流程引导", system_msg))
|
|
|
|
|
|
|
+ # 先对历史消息应用 Prompt Caching(在注入动态内容之前)
|
|
|
|
|
+ # 这样可以确保历史消息的缓存点固定,不受动态注入影响
|
|
|
|
|
+ llm_messages = self._add_cache_control(
|
|
|
|
|
+ llm_messages,
|
|
|
|
|
+ config.model,
|
|
|
|
|
+ config.enable_prompt_caching
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
|
|
+ # 然后追加动态注入的内容(不影响已缓存的历史消息)
|
|
|
# 周期性注入 GoalTree + Collaborators
|
|
# 周期性注入 GoalTree + Collaborators
|
|
|
if iteration % CONTEXT_INJECTION_INTERVAL == 0:
|
|
if iteration % CONTEXT_INJECTION_INTERVAL == 0:
|
|
|
context_injection = self._build_context_injection(trace, goal_tree)
|
|
context_injection = self._build_context_injection(trace, goal_tree)
|
|
@@ -1149,6 +1146,7 @@ class AgentRunner:
|
|
|
logger.warning("经验检索失败: %s", e)
|
|
logger.warning("经验检索失败: %s", e)
|
|
|
_cached_exp_text = ""
|
|
_cached_exp_text = ""
|
|
|
|
|
|
|
|
|
|
+ # 经验注入:goal切换时注入相关历史经验
|
|
|
if _cached_exp_text:
|
|
if _cached_exp_text:
|
|
|
system_msg = {"role": "system", "content": _cached_exp_text}
|
|
system_msg = {"role": "system", "content": _cached_exp_text}
|
|
|
llm_messages.append(system_msg)
|
|
llm_messages.append(system_msg)
|
|
@@ -1175,12 +1173,6 @@ class AgentRunner:
|
|
|
head_seq = sequence
|
|
head_seq = sequence
|
|
|
sequence += 1
|
|
sequence += 1
|
|
|
|
|
|
|
|
- # 应用 Prompt Caching(不修改原始 history,只在发送给 LLM 时添加缓存标记)
|
|
|
|
|
- llm_messages = self._add_cache_control(
|
|
|
|
|
- llm_messages,
|
|
|
|
|
- config.model,
|
|
|
|
|
- config.enable_prompt_caching
|
|
|
|
|
- )
|
|
|
|
|
|
|
|
|
|
# 调用 LLM
|
|
# 调用 LLM
|
|
|
result = await self.llm_call(
|
|
result = await self.llm_call(
|
|
@@ -1931,9 +1923,10 @@ created_at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
|
|
"""
|
|
"""
|
|
|
为支持的模型添加 Prompt Caching 标记
|
|
为支持的模型添加 Prompt Caching 标记
|
|
|
|
|
|
|
|
- 策略:
|
|
|
|
|
- 1. system message 添加缓存(如果存在且足够长)
|
|
|
|
|
- 2. 倒数第 3-5 条 user/assistant 消息添加缓存点
|
|
|
|
|
|
|
+ 策略:固定位置 + 延迟查找
|
|
|
|
|
+ 1. system message 添加缓存(如果足够长)
|
|
|
|
|
+ 2. 固定位置缓存点(20, 40, 60, 80),确保每个缓存点间隔 >= 1024 tokens
|
|
|
|
|
+ 3. 最多使用 4 个缓存点(含 system)
|
|
|
|
|
|
|
|
Args:
|
|
Args:
|
|
|
messages: 原始消息列表
|
|
messages: 原始消息列表
|
|
@@ -1955,62 +1948,99 @@ created_at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
|
|
|
messages = copy.deepcopy(messages)
|
|
messages = copy.deepcopy(messages)
|
|
|
|
|
|
|
|
# 策略 1: 为 system message 添加缓存
|
|
# 策略 1: 为 system message 添加缓存
|
|
|
|
|
+ system_cached = False
|
|
|
for msg in messages:
|
|
for msg in messages:
|
|
|
if msg.get("role") == "system":
|
|
if msg.get("role") == "system":
|
|
|
content = msg.get("content", "")
|
|
content = msg.get("content", "")
|
|
|
- # 只有足够长的 system prompt 才值得缓存(>1024 tokens 约 4000 字符)
|
|
|
|
|
if isinstance(content, str) and len(content) > 1000:
|
|
if isinstance(content, str) and len(content) > 1000:
|
|
|
- # Anthropic API 格式:在 content 的最后一个 block 添加 cache_control
|
|
|
|
|
- # 如果 content 是 string,需要转换为 list 格式
|
|
|
|
|
- msg["content"] = [
|
|
|
|
|
- {
|
|
|
|
|
- "type": "text",
|
|
|
|
|
- "text": content,
|
|
|
|
|
- "cache_control": {"type": "ephemeral"}
|
|
|
|
|
- }
|
|
|
|
|
- ]
|
|
|
|
|
|
|
+ msg["content"] = [{
|
|
|
|
|
+ "type": "text",
|
|
|
|
|
+ "text": content,
|
|
|
|
|
+ "cache_control": {"type": "ephemeral"}
|
|
|
|
|
+ }]
|
|
|
|
|
+ system_cached = True
|
|
|
logger.debug(f"[Cache] 为 system message 添加缓存标记 (len={len(content)})")
|
|
logger.debug(f"[Cache] 为 system message 添加缓存标记 (len={len(content)})")
|
|
|
break
|
|
break
|
|
|
|
|
|
|
|
- # 策略 2: 为倒数第 3-5 条消息添加缓存点
|
|
|
|
|
- # 这样可以缓存大部分历史对话,只有最新的几条消息是新的
|
|
|
|
|
|
|
+ # 策略 2: 固定位置缓存点
|
|
|
|
|
+ CACHE_INTERVAL = 20
|
|
|
|
|
+ MAX_POINTS = 3 if system_cached else 4
|
|
|
|
|
+ MIN_TOKENS = 1024
|
|
|
|
|
+ AVG_TOKENS_PER_MSG = 70
|
|
|
|
|
+
|
|
|
|
|
+ total_msgs = len(messages)
|
|
|
|
|
+ if total_msgs == 0:
|
|
|
|
|
+ return messages
|
|
|
|
|
+
|
|
|
cache_positions = []
|
|
cache_positions = []
|
|
|
- user_assistant_msgs = [
|
|
|
|
|
- (i, msg) for i, msg in enumerate(messages)
|
|
|
|
|
- if msg.get("role") in ("user", "assistant")
|
|
|
|
|
- ]
|
|
|
|
|
-
|
|
|
|
|
- if len(user_assistant_msgs) >= 5:
|
|
|
|
|
- # 在倒数第 5 条添加缓存点
|
|
|
|
|
- cache_positions.append(user_assistant_msgs[-5][0])
|
|
|
|
|
- elif len(user_assistant_msgs) >= 3:
|
|
|
|
|
- # 在倒数第 3 条添加缓存点
|
|
|
|
|
- cache_positions.append(user_assistant_msgs[-3][0])
|
|
|
|
|
|
|
+ last_cache_pos = 0
|
|
|
|
|
+
|
|
|
|
|
+ for i in range(1, MAX_POINTS + 1):
|
|
|
|
|
+ target_pos = i * CACHE_INTERVAL - 1 # 19, 39, 59, 79
|
|
|
|
|
|
|
|
|
|
+ if target_pos >= total_msgs:
|
|
|
|
|
+ break
|
|
|
|
|
+
|
|
|
|
|
+ # 从目标位置开始查找合适的 user/assistant 消息
|
|
|
|
|
+ for j in range(target_pos, total_msgs):
|
|
|
|
|
+ msg = messages[j]
|
|
|
|
|
+
|
|
|
|
|
+ if msg.get("role") not in ("user", "assistant"):
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ content = msg.get("content", "")
|
|
|
|
|
+ if not content:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ # 检查 content 是否非空
|
|
|
|
|
+ is_valid = False
|
|
|
|
|
+ if isinstance(content, str):
|
|
|
|
|
+ is_valid = len(content) > 0
|
|
|
|
|
+ elif isinstance(content, list):
|
|
|
|
|
+ is_valid = any(
|
|
|
|
|
+ isinstance(block, dict) and
|
|
|
|
|
+ block.get("type") == "text" and
|
|
|
|
|
+ len(block.get("text", "")) > 0
|
|
|
|
|
+ for block in content
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if not is_valid:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ # 检查 token 距离
|
|
|
|
|
+ msg_count = j - last_cache_pos
|
|
|
|
|
+ estimated_tokens = msg_count * AVG_TOKENS_PER_MSG
|
|
|
|
|
+
|
|
|
|
|
+ if estimated_tokens >= MIN_TOKENS:
|
|
|
|
|
+ cache_positions.append(j)
|
|
|
|
|
+ last_cache_pos = j
|
|
|
|
|
+ logger.debug(f"[Cache] 在位置 {j} 添加缓存点 (估算 {estimated_tokens} tokens)")
|
|
|
|
|
+ break
|
|
|
|
|
+
|
|
|
|
|
+ # 应用缓存标记
|
|
|
for idx in cache_positions:
|
|
for idx in cache_positions:
|
|
|
msg = messages[idx]
|
|
msg = messages[idx]
|
|
|
content = msg.get("content", "")
|
|
content = msg.get("content", "")
|
|
|
|
|
|
|
|
- # 处理 string content
|
|
|
|
|
if isinstance(content, str):
|
|
if isinstance(content, str):
|
|
|
- msg["content"] = [
|
|
|
|
|
- {
|
|
|
|
|
- "type": "text",
|
|
|
|
|
- "text": content,
|
|
|
|
|
- "cache_control": {"type": "ephemeral"}
|
|
|
|
|
- }
|
|
|
|
|
- ]
|
|
|
|
|
|
|
+ msg["content"] = [{
|
|
|
|
|
+ "type": "text",
|
|
|
|
|
+ "text": content,
|
|
|
|
|
+ "cache_control": {"type": "ephemeral"}
|
|
|
|
|
+ }]
|
|
|
logger.debug(f"[Cache] 为 message[{idx}] ({msg.get('role')}) 添加缓存标记")
|
|
logger.debug(f"[Cache] 为 message[{idx}] ({msg.get('role')}) 添加缓存标记")
|
|
|
-
|
|
|
|
|
- # 处理 list content(多模态消息)
|
|
|
|
|
- elif isinstance(content, list) and len(content) > 0:
|
|
|
|
|
|
|
+ elif isinstance(content, list):
|
|
|
# 在最后一个 text block 添加 cache_control
|
|
# 在最后一个 text block 添加 cache_control
|
|
|
- for i in range(len(content) - 1, -1, -1):
|
|
|
|
|
- if isinstance(content[i], dict) and content[i].get("type") == "text":
|
|
|
|
|
- content[i]["cache_control"] = {"type": "ephemeral"}
|
|
|
|
|
- logger.debug(f"[Cache] 为 message[{idx}] ({msg.get('role')}) 的 content[{i}] 添加缓存标记")
|
|
|
|
|
|
|
+ for block in reversed(content):
|
|
|
|
|
+ if isinstance(block, dict) and block.get("type") == "text":
|
|
|
|
|
+ block["cache_control"] = {"type": "ephemeral"}
|
|
|
|
|
+ logger.debug(f"[Cache] 为 message[{idx}] ({msg.get('role')}) 添加缓存标记")
|
|
|
break
|
|
break
|
|
|
|
|
|
|
|
|
|
+ logger.debug(
|
|
|
|
|
+ f"[Cache] 总消息: {total_msgs}, "
|
|
|
|
|
+ f"缓存点: {len(cache_positions)} at {cache_positions}"
|
|
|
|
|
+ )
|
|
|
return messages
|
|
return messages
|
|
|
|
|
|
|
|
def _get_tool_schemas(self, tools: Optional[List[str]]) -> List[Dict]:
|
|
def _get_tool_schemas(self, tools: Optional[List[str]]) -> List[Dict]:
|