| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133 |
- """
- 分析缓存命中情况的脚本
- """
- import json
- import sys
- from pathlib import Path
- def analyze_cache(trace_dir):
- """分析指定 trace 的缓存情况"""
- events_file = Path(trace_dir) / "events.jsonl"
- if not events_file.exists():
- print(f"错误: 找不到 {events_file}")
- return
- print("=" * 80)
- print("缓存命中情况详细分析")
- print("=" * 80)
- print()
- # 统计数据
- total_messages = 0
- assistant_messages = 0
- tool_messages = 0
- total_prompt_tokens = 0
- total_cache_creation = 0
- total_cache_read = 0
- cache_events = []
- with open(events_file) as f:
- for line in f:
- event = json.loads(line)
- if event.get("event") == "message_added":
- msg = event["message"]
- role = msg.get("role")
- total_messages += 1
- if role == "assistant":
- assistant_messages += 1
- seq = msg["sequence"]
- prompt = msg.get("prompt_tokens", 0)
- creation = msg.get("cache_creation_tokens", 0)
- read = msg.get("cache_read_tokens", 0)
- total_prompt_tokens += prompt
- total_cache_creation += creation
- total_cache_read += read
- cache_events.append({
- "seq": seq,
- "prompt": prompt,
- "creation": creation,
- "read": read,
- })
- elif role == "tool":
- tool_messages += 1
- # 打印统计
- print(f"总消息数: {total_messages}")
- print(f" - Assistant: {assistant_messages}")
- print(f" - Tool: {tool_messages}")
- print(f" - 其他: {total_messages - assistant_messages - tool_messages}")
- print()
- print(f"Token 统计:")
- print(f" - Total prompt tokens: {total_prompt_tokens:,}")
- print(f" - Cache creation: {total_cache_creation:,} ({total_cache_creation/total_prompt_tokens*100:.1f}%)")
- print(f" - Cache read: {total_cache_read:,} ({total_cache_read/total_prompt_tokens*100:.1f}%)")
- print()
- # 计算缓存命中率
- if total_prompt_tokens > 0:
- hit_rate = total_cache_read / total_prompt_tokens * 100
- print(f"缓存命中率: {hit_rate:.1f}%")
- print()
- # 详细的缓存事件
- print("=" * 80)
- print("详细缓存事件")
- print("=" * 80)
- print()
- print(f"{'Seq':>4} | {'Prompt Tokens':>14} | {'Cache Creation':>15} | {'Cache Read':>11} | {'Hit Rate':>9}")
- print("-" * 80)
- for event in cache_events:
- seq = event["seq"]
- prompt = event["prompt"]
- creation = event["creation"]
- read = event["read"]
- rate = f"{read/prompt*100:.1f}%" if prompt > 0 else "0%"
- # 标记特殊事件
- marker = ""
- if creation > 0:
- marker = " 🔵 创建"
- elif read == 0:
- marker = " ❌ 失效"
- elif read > 0:
- marker = " ✅ 命中"
- print(f"{seq:4d} | {prompt:14,d} | {creation:15,d} | {read:11,d} | {rate:>9s}{marker}")
- print()
- # 分析缓存失效的原因
- print("=" * 80)
- print("缓存失效分析")
- print("=" * 80)
- print()
- miss_count = sum(1 for e in cache_events if e["read"] == 0 and e["creation"] == 0)
- if miss_count > 0:
- print(f"发现 {miss_count} 次缓存失效")
- print("可能的原因:")
- print(" 1. Level 2 压缩导致消息序列改变")
- print(" 2. 缓存过期(5 分钟)")
- print(" 3. 消息内容发生变化")
- else:
- print("没有发现缓存失效")
- print()
- if __name__ == "__main__":
- if len(sys.argv) < 2:
- print("用法: python3 analyze.py <trace-directory>")
- print("示例: python3 analyze.py .trace/abc123...")
- sys.exit(1)
- trace_dir = sys.argv[1]
- analyze_cache(trace_dir)
|