| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225 |
- #!/usr/bin/env python3
- """测试简化后的缓存点设置逻辑"""
- def add_cache_control_simplified(messages, system_cached=False):
- """简化版的 _add_cache_control 逻辑(无状态)"""
- import copy
- messages = copy.deepcopy(messages)
- CACHE_INTERVAL = 20
- MAX_POINTS = 3 if system_cached else 4
- MIN_TOKENS = 1024
- AVG_TOKENS_PER_MSG = 70
- total_msgs = len(messages)
- if total_msgs == 0:
- return messages, []
- cache_positions = []
- last_cache_pos = 0
- for i in range(1, MAX_POINTS + 1):
- target_pos = i * CACHE_INTERVAL - 1 # 19, 39, 59, 79
- if target_pos >= total_msgs:
- break
- # 从目标位置开始查找合适的 user/assistant 消息
- for j in range(target_pos, total_msgs):
- msg = messages[j]
- if msg.get("role") not in ("user", "assistant"):
- continue
- content = msg.get("content", "")
- if not content:
- continue
- # 检查 content 是否非空
- is_valid = False
- if isinstance(content, str):
- is_valid = len(content) > 0
- elif isinstance(content, list):
- is_valid = any(
- isinstance(block, dict) and
- block.get("type") == "text" and
- len(block.get("text", "")) > 0
- for block in content
- )
- if not is_valid:
- continue
- # 检查 token 距离
- msg_count = j - last_cache_pos
- estimated_tokens = msg_count * AVG_TOKENS_PER_MSG
- if estimated_tokens >= MIN_TOKENS:
- cache_positions.append(j)
- last_cache_pos = j
- print(f" ✓ 目标位置 {target_pos} -> message[{j}] (估算 {estimated_tokens} tokens)")
- # 添加缓存标记
- if isinstance(content, str):
- msg["content"] = [{
- "type": "text",
- "text": content,
- "cache_control": {"type": "ephemeral"}
- }]
- elif isinstance(content, list):
- for block in reversed(content):
- if isinstance(block, dict) and block.get("type") == "text":
- block["cache_control"] = {"type": "ephemeral"}
- break
- break
- return messages, cache_positions
- print("=" * 70)
- print("测试场景1:消息逐条增长(模拟 Agent Loop)")
- print("=" * 70)
- print()
- messages = []
- # 迭代 1: 2 条消息
- messages.append({"role": "system", "content": "You are a helpful assistant"})
- messages.append({"role": "user", "content": "Hello"})
- print(f"迭代 1 (2 条消息):")
- _, positions = add_cache_control_simplified(messages)
- print(f" 缓存点位置: {positions}")
- print()
- # 迭代 2: 10 条消息
- for i in range(2, 10):
- messages.append({"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"})
- print(f"迭代 2 (10 条消息):")
- _, positions = add_cache_control_simplified(messages)
- print(f" 缓存点位置: {positions}")
- print()
- # 迭代 3: 25 条消息(应该创建第一个缓存点)
- for i in range(10, 25):
- messages.append({"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"})
- print(f"迭代 3 (25 条消息):")
- _, positions = add_cache_control_simplified(messages)
- print(f" 缓存点位置: {positions}")
- print()
- # 迭代 4: 35 条消息(缓存点位置应该和迭代3相同)
- for i in range(25, 35):
- messages.append({"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"})
- print(f"迭代 4 (35 条消息):")
- _, positions = add_cache_control_simplified(messages)
- print(f" 缓存点位置: {positions} ← 应该和迭代3相同")
- print()
- # 迭代 5: 50 条消息(应该创建第二个缓存点)
- for i in range(35, 50):
- messages.append({"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"})
- print(f"迭代 5 (50 条消息):")
- _, positions = add_cache_control_simplified(messages)
- print(f" 缓存点位置: {positions}")
- print()
- print("=" * 70)
- print("测试场景2:第19条是tool消息(应该跳过,在后面找user/assistant)")
- print("=" * 70)
- print()
- messages2 = []
- for i in range(19):
- messages2.append({"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"})
- messages2.append({"role": "tool", "content": "tool result"}) # 第19条是tool
- messages2.append({"role": "assistant", "content": ""}) # 第20条是空content
- messages2.append({"role": "user", "content": "msg 21"}) # 第21条是user
- print(f"消息结构:")
- print(f" [0-18]: user/assistant")
- print(f" [19]: tool (应该跳过)")
- print(f" [20]: assistant 空content (应该跳过)")
- print(f" [21]: user 非空 (应该在这里创建缓存点)")
- print()
- _, positions = add_cache_control_simplified(messages2)
- print(f" 缓存点位置: {positions}")
- print()
- print("=" * 70)
- print("测试场景3:压缩后重新增长(模拟 Level 2 压缩)")
- print("=" * 70)
- print()
- # 压缩前:50 条消息
- messages3 = []
- for i in range(50):
- messages3.append({"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"})
- print(f"压缩前 (50 条消息):")
- _, positions_before = add_cache_control_simplified(messages3)
- print(f" 缓存点位置: {positions_before}")
- print()
- # 压缩后:只剩 system + summary
- messages3_compressed = [
- {"role": "system", "content": "You are a helpful assistant"},
- {"role": "user", "content": "## 对话历史摘要\n\n这是压缩后的摘要..."}
- ]
- print(f"压缩后 (2 条消息):")
- _, positions_after = add_cache_control_simplified(messages3_compressed)
- print(f" 缓存点位置: {positions_after} ← 应该为空")
- print()
- # 重新增长到 30 条
- for i in range(2, 30):
- messages3_compressed.append({"role": "user" if i % 2 == 0 else "assistant", "content": f"new msg {i}"})
- print(f"重新增长 (30 条消息):")
- _, positions_regrow = add_cache_control_simplified(messages3_compressed)
- print(f" 缓存点位置: {positions_regrow} ← 自动重建缓存点")
- print()
- print("=" * 70)
- print("测试场景4:验证缓存点位置稳定性")
- print("=" * 70)
- print()
- messages4 = []
- for i in range(25):
- messages4.append({"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"})
- print(f"第1次调用 (25 条消息):")
- result1, pos1 = add_cache_control_simplified(messages4)
- print(f" 缓存点位置: {pos1}")
- # 检查缓存标记是否添加
- has_cache = False
- for i, msg in enumerate(result1):
- content = msg.get("content")
- if isinstance(content, list):
- for block in content:
- if isinstance(block, dict) and block.get("cache_control"):
- has_cache = True
- print(f" message[{i}] 有缓存标记 ✓")
- print()
- # 追加消息后再次调用
- for i in range(25, 35):
- messages4.append({"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"})
- print(f"第2次调用 (35 条消息):")
- result2, pos2 = add_cache_control_simplified(messages4)
- print(f" 缓存点位置: {pos2}")
- # 验证位置是否相同
- if pos1 == pos2[:len(pos1)]:
- print(f" ✓ 缓存点位置稳定(前 {len(pos1)} 个位置相同)")
- else:
- print(f" ✗ 缓存点位置不稳定!")
- print()
- print("=" * 70)
- print("测试完成")
- print("=" * 70)
|