debug_cache.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. """
  2. 调试脚本:检查缓存失效的原因
  3. 分析trace中的消息,看看为什么缓存点没有生效
  4. """
  5. import json
  6. from pathlib import Path
  7. import hashlib
  8. trace_dir = Path("/Users/elksmmx/Desktop/agent 2.10/Agent/examples/find knowledge/.trace/7ff963bc-3106-47fc-9725-b712f3e0d7d5/messages")
  9. print("=== 分析缓存失效原因 ===\n")
  10. # 读取前几次assistant消息,看看它们发送的消息列表是否一致
  11. assistant_calls = []
  12. for i in range(1, 101):
  13. fname = trace_dir / f"7ff963bc-3106-47fc-9725-b712f3e0d7d5-{i:04d}.json"
  14. if not fname.exists():
  15. continue
  16. with open(fname) as f:
  17. data = json.load(f)
  18. if data.get('role') == 'assistant':
  19. assistant_calls.append({
  20. 'seq': i,
  21. 'cache_read': data.get('cache_read_tokens', 0),
  22. 'cache_create': data.get('cache_creation_tokens', 0),
  23. })
  24. print("=== Assistant消息的缓存情况 ===")
  25. for call in assistant_calls[:20]: # 只看前20次
  26. seq = call['seq']
  27. cache_read = call['cache_read']
  28. cache_create = call['cache_create']
  29. if cache_create > 0:
  30. print(f"seq={seq:03d} 🆕 创建缓存 {cache_create}")
  31. elif cache_read > 0:
  32. print(f"seq={seq:03d} ✅ 命中缓存 {cache_read}")
  33. else:
  34. print(f"seq={seq:03d} ❌ 缓存失效")
  35. print("\n=== 关键发现 ===")
  36. print(f"总共{len(assistant_calls)}次assistant调用")
  37. print(f"创建缓存: {sum(1 for c in assistant_calls if c['cache_create'] > 0)}次")
  38. print(f"命中缓存: {sum(1 for c in assistant_calls if c['cache_read'] > 0)}次")
  39. print(f"缓存失效: {sum(1 for c in assistant_calls if c['cache_read'] == 0 and c['cache_create'] == 0)}次")
  40. # 分析缓存创建的token数
  41. cache_creates = [c['cache_create'] for c in assistant_calls if c['cache_create'] > 0]
  42. if cache_creates:
  43. print(f"\n缓存创建的token数: {cache_creates}")
  44. print(f"所有缓存大小都是 {cache_creates[0]} tokens,说明只缓存了system prompt")
  45. # 分析缓存命中的token数
  46. cache_reads = [c['cache_read'] for c in assistant_calls if c['cache_read'] > 0]
  47. if cache_reads:
  48. unique_reads = set(cache_reads)
  49. print(f"\n缓存命中的token数: {unique_reads}")
  50. if len(unique_reads) == 1:
  51. print(f"所有缓存命中都是 {list(unique_reads)[0]} tokens,说明只命中了system prompt")
  52. print("\n=== 结论 ===")
  53. print("虽然我们在第20、40、60条消息上设置了缓存点,")
  54. print("但Anthropic API只创建了system prompt的缓存。")
  55. print("这说明后续消息的缓存点没有生效。")
  56. print("\n可能的原因:")
  57. print("1. 消息内容在每次调用时发生了变化")
  58. print("2. 消息格式转换(str->list)导致Anthropic认为是不同的消息")
  59. print("3. Anthropic的缓存机制有其他限制(如最小缓存长度)")