store.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571
  1. """
  2. FileSystem Trace Store - 文件系统存储实现
  3. 用于跨进程数据共享,数据持久化到 .trace/ 目录
  4. 目录结构:
  5. .trace/{trace_id}/
  6. ├── meta.json # Trace 元数据
  7. ├── goal.json # GoalTree(扁平 JSON,通过 parent_id 构建层级)
  8. ├── messages/ # Messages(每条独立文件)
  9. │ ├── {message_id}.json
  10. │ └── ...
  11. └── events.jsonl # 事件流(WebSocket 续传)
  12. Sub-Trace 是完全独立的 Trace,有自己的目录:
  13. .trace/{parent_id}@{mode}-{timestamp}-{seq}/
  14. ├── meta.json # parent_trace_id 指向父 Trace
  15. ├── goal.json
  16. ├── messages/
  17. └── events.jsonl
  18. """
  19. import json
  20. import os
  21. from pathlib import Path
  22. from typing import Dict, List, Optional, Any
  23. from datetime import datetime
  24. from .models import Trace, Message
  25. from .goal_models import GoalTree, Goal, GoalStats
  26. class FileSystemTraceStore:
  27. """文件系统 Trace 存储"""
  28. def __init__(self, base_path: str = ".trace"):
  29. self.base_path = Path(base_path)
  30. self.base_path.mkdir(exist_ok=True)
  31. def _get_trace_dir(self, trace_id: str) -> Path:
  32. """获取 trace 目录"""
  33. return self.base_path / trace_id
  34. def _get_meta_file(self, trace_id: str) -> Path:
  35. """获取 meta.json 文件路径"""
  36. return self._get_trace_dir(trace_id) / "meta.json"
  37. def _get_goal_file(self, trace_id: str) -> Path:
  38. """获取 goal.json 文件路径"""
  39. return self._get_trace_dir(trace_id) / "goal.json"
  40. def _get_messages_dir(self, trace_id: str) -> Path:
  41. """获取 messages 目录"""
  42. return self._get_trace_dir(trace_id) / "messages"
  43. def _get_message_file(self, trace_id: str, message_id: str) -> Path:
  44. """获取 message 文件路径"""
  45. return self._get_messages_dir(trace_id) / f"{message_id}.json"
  46. def _get_events_file(self, trace_id: str) -> Path:
  47. """获取 events.jsonl 文件路径"""
  48. return self._get_trace_dir(trace_id) / "events.jsonl"
  49. # ===== Trace 操作 =====
  50. async def create_trace(self, trace: Trace) -> str:
  51. """创建新的 Trace"""
  52. trace_dir = self._get_trace_dir(trace.trace_id)
  53. trace_dir.mkdir(exist_ok=True)
  54. # 创建 messages 目录
  55. messages_dir = self._get_messages_dir(trace.trace_id)
  56. messages_dir.mkdir(exist_ok=True)
  57. # 写入 meta.json
  58. meta_file = self._get_meta_file(trace.trace_id)
  59. meta_file.write_text(json.dumps(trace.to_dict(), indent=2, ensure_ascii=False))
  60. # 创建空的 events.jsonl
  61. events_file = self._get_events_file(trace.trace_id)
  62. events_file.touch()
  63. return trace.trace_id
  64. async def get_trace(self, trace_id: str) -> Optional[Trace]:
  65. """获取 Trace"""
  66. meta_file = self._get_meta_file(trace_id)
  67. if not meta_file.exists():
  68. return None
  69. data = json.loads(meta_file.read_text())
  70. # 解析 datetime 字段
  71. if data.get("created_at"):
  72. data["created_at"] = datetime.fromisoformat(data["created_at"])
  73. if data.get("completed_at"):
  74. data["completed_at"] = datetime.fromisoformat(data["completed_at"])
  75. return Trace(**data)
  76. async def update_trace(self, trace_id: str, **updates) -> None:
  77. """更新 Trace"""
  78. trace = await self.get_trace(trace_id)
  79. if not trace:
  80. return
  81. # 更新字段
  82. for key, value in updates.items():
  83. if hasattr(trace, key):
  84. setattr(trace, key, value)
  85. # 写回文件
  86. meta_file = self._get_meta_file(trace_id)
  87. meta_file.write_text(json.dumps(trace.to_dict(), indent=2, ensure_ascii=False))
  88. async def list_traces(
  89. self,
  90. mode: Optional[str] = None,
  91. agent_type: Optional[str] = None,
  92. uid: Optional[str] = None,
  93. status: Optional[str] = None,
  94. limit: int = 50
  95. ) -> List[Trace]:
  96. """列出 Traces"""
  97. traces = []
  98. if not self.base_path.exists():
  99. return []
  100. for trace_dir in self.base_path.iterdir():
  101. if not trace_dir.is_dir():
  102. continue
  103. meta_file = trace_dir / "meta.json"
  104. if not meta_file.exists():
  105. continue
  106. try:
  107. data = json.loads(meta_file.read_text())
  108. # 过滤
  109. if mode and data.get("mode") != mode:
  110. continue
  111. if agent_type and data.get("agent_type") != agent_type:
  112. continue
  113. if uid and data.get("uid") != uid:
  114. continue
  115. if status and data.get("status") != status:
  116. continue
  117. # 解析 datetime
  118. if data.get("created_at"):
  119. data["created_at"] = datetime.fromisoformat(data["created_at"])
  120. if data.get("completed_at"):
  121. data["completed_at"] = datetime.fromisoformat(data["completed_at"])
  122. traces.append(Trace(**data))
  123. except Exception:
  124. continue
  125. # 排序(最新的在前)
  126. traces.sort(key=lambda t: t.created_at, reverse=True)
  127. return traces[:limit]
  128. # ===== GoalTree 操作 =====
  129. async def get_goal_tree(self, trace_id: str) -> Optional[GoalTree]:
  130. """获取 GoalTree"""
  131. goal_file = self._get_goal_file(trace_id)
  132. if not goal_file.exists():
  133. return None
  134. try:
  135. data = json.loads(goal_file.read_text())
  136. return GoalTree.from_dict(data)
  137. except Exception:
  138. return None
  139. async def update_goal_tree(self, trace_id: str, tree: GoalTree) -> None:
  140. """更新完整 GoalTree"""
  141. goal_file = self._get_goal_file(trace_id)
  142. goal_file.write_text(json.dumps(tree.to_dict(), indent=2, ensure_ascii=False))
  143. async def add_goal(self, trace_id: str, goal: Goal) -> None:
  144. """添加 Goal 到 GoalTree"""
  145. tree = await self.get_goal_tree(trace_id)
  146. if not tree:
  147. return
  148. tree.goals.append(goal)
  149. await self.update_goal_tree(trace_id, tree)
  150. # 推送 goal_added 事件
  151. event_data = {
  152. "goal": goal.to_dict(),
  153. "parent_id": goal.parent_id
  154. }
  155. await self.append_event(trace_id, "goal_added", event_data)
  156. print(f"[DEBUG] Pushed goal_added event: goal_id={goal.id}, parent_id={goal.parent_id}")
  157. async def update_goal(self, trace_id: str, goal_id: str, **updates) -> None:
  158. """更新 Goal 字段"""
  159. tree = await self.get_goal_tree(trace_id)
  160. if not tree:
  161. return
  162. goal = tree.find(goal_id)
  163. if not goal:
  164. return
  165. # 更新字段
  166. for key, value in updates.items():
  167. if hasattr(goal, key):
  168. # 特殊处理 stats 字段(可能是 dict)
  169. if key in ["self_stats", "cumulative_stats"] and isinstance(value, dict):
  170. value = GoalStats.from_dict(value)
  171. setattr(goal, key, value)
  172. await self.update_goal_tree(trace_id, tree)
  173. # 推送 goal_updated 事件
  174. # 如果状态变为 completed,检查是否需要级联完成父 Goal
  175. affected_goals = [{"goal_id": goal_id, "updates": updates}]
  176. if updates.get("status") == "completed":
  177. # 检查级联完成:如果所有兄弟 Goal 都完成,父 Goal 也完成
  178. cascade_completed = await self._check_cascade_completion(trace_id, goal)
  179. affected_goals.extend(cascade_completed)
  180. await self.append_event(trace_id, "goal_updated", {
  181. "goal_id": goal_id,
  182. "updates": updates,
  183. "affected_goals": affected_goals
  184. })
  185. print(f"[DEBUG] Pushed goal_updated event: goal_id={goal_id}, updates={updates}, affected={len(affected_goals)}")
  186. async def _check_cascade_completion(
  187. self,
  188. trace_id: str,
  189. completed_goal: Goal
  190. ) -> List[Dict[str, Any]]:
  191. """
  192. 检查级联完成:如果一个 Goal 的所有子 Goal 都完成,则自动完成父 Goal
  193. Args:
  194. trace_id: Trace ID
  195. completed_goal: 刚完成的 Goal
  196. Returns:
  197. 受影响的父 Goals 列表(自动完成的)
  198. """
  199. if not completed_goal.parent_id:
  200. return []
  201. tree = await self.get_goal_tree(trace_id)
  202. if not tree:
  203. return []
  204. affected = []
  205. parent = tree.find(completed_goal.parent_id)
  206. if not parent:
  207. return []
  208. # 获取父 Goal 的所有子 Goal
  209. children = tree.get_children(parent.id)
  210. # 检查是否所有子 Goal 都已完成(排除 abandoned)
  211. all_completed = all(
  212. child.status in ["completed", "abandoned"]
  213. for child in children
  214. )
  215. if all_completed and parent.status != "completed":
  216. # 自动完成父 Goal
  217. parent.status = "completed"
  218. if not parent.summary:
  219. # 生成自动摘要
  220. completed_count = sum(1 for c in children if c.status == "completed")
  221. parent.summary = f"所有子目标已完成 ({completed_count}/{len(children)})"
  222. await self.update_goal_tree(trace_id, tree)
  223. affected.append({
  224. "goal_id": parent.id,
  225. "status": "completed",
  226. "summary": parent.summary,
  227. "cumulative_stats": parent.cumulative_stats.to_dict()
  228. })
  229. # 递归检查祖父 Goal
  230. grandparent_affected = await self._check_cascade_completion(trace_id, parent)
  231. affected.extend(grandparent_affected)
  232. return affected
  233. # ===== Message 操作 =====
  234. async def add_message(self, message: Message) -> str:
  235. """
  236. 添加 Message
  237. 自动更新关联 Goal 的 stats(self_stats 和祖先的 cumulative_stats)
  238. """
  239. trace_id = message.trace_id
  240. # 1. 写入 message 文件
  241. messages_dir = self._get_messages_dir(trace_id)
  242. message_file = messages_dir / f"{message.message_id}.json"
  243. message_file.write_text(json.dumps(message.to_dict(), indent=2, ensure_ascii=False), encoding="utf-8")
  244. # 2. 更新 trace 统计
  245. trace = await self.get_trace(trace_id)
  246. if trace:
  247. trace.total_messages += 1
  248. trace.last_sequence = max(trace.last_sequence, message.sequence)
  249. # 累计 tokens(完整版)
  250. if message.prompt_tokens:
  251. trace.total_prompt_tokens += message.prompt_tokens
  252. if message.completion_tokens:
  253. trace.total_completion_tokens += message.completion_tokens
  254. if message.reasoning_tokens:
  255. trace.total_reasoning_tokens += message.reasoning_tokens
  256. if message.cache_creation_tokens:
  257. trace.total_cache_creation_tokens += message.cache_creation_tokens
  258. if message.cache_read_tokens:
  259. trace.total_cache_read_tokens += message.cache_read_tokens
  260. # 向后兼容:也更新 total_tokens
  261. if message.tokens:
  262. trace.total_tokens += message.tokens
  263. elif message.prompt_tokens or message.completion_tokens:
  264. trace.total_tokens += (message.prompt_tokens or 0) + (message.completion_tokens or 0)
  265. if message.cost:
  266. trace.total_cost += message.cost
  267. if message.duration_ms:
  268. trace.total_duration_ms += message.duration_ms
  269. # 更新 Trace
  270. await self.update_trace(
  271. trace_id,
  272. total_messages=trace.total_messages,
  273. last_sequence=trace.last_sequence,
  274. total_tokens=trace.total_tokens,
  275. total_prompt_tokens=trace.total_prompt_tokens,
  276. total_completion_tokens=trace.total_completion_tokens,
  277. total_reasoning_tokens=trace.total_reasoning_tokens,
  278. total_cache_creation_tokens=trace.total_cache_creation_tokens,
  279. total_cache_read_tokens=trace.total_cache_read_tokens,
  280. total_cost=trace.total_cost,
  281. total_duration_ms=trace.total_duration_ms
  282. )
  283. # 3. 更新 Goal stats
  284. await self._update_goal_stats(trace_id, message)
  285. # 4. 追加 message_added 事件
  286. affected_goals = await self._get_affected_goals(trace_id, message)
  287. await self.append_event(trace_id, "message_added", {
  288. "message": message.to_dict(),
  289. "affected_goals": affected_goals
  290. })
  291. return message.message_id
  292. async def _update_goal_stats(self, trace_id: str, message: Message) -> None:
  293. """更新 Goal 的 self_stats 和祖先的 cumulative_stats"""
  294. tree = await self.get_goal_tree(trace_id)
  295. if not tree:
  296. return
  297. # 找到关联的 Goal
  298. goal = tree.find(message.goal_id)
  299. if not goal:
  300. return
  301. # 更新自身 self_stats
  302. goal.self_stats.message_count += 1
  303. if message.tokens:
  304. goal.self_stats.total_tokens += message.tokens
  305. if message.cost:
  306. goal.self_stats.total_cost += message.cost
  307. # TODO: 更新 preview(工具调用摘要)
  308. # 更新自身 cumulative_stats
  309. goal.cumulative_stats.message_count += 1
  310. if message.tokens:
  311. goal.cumulative_stats.total_tokens += message.tokens
  312. if message.cost:
  313. goal.cumulative_stats.total_cost += message.cost
  314. # 沿祖先链向上更新 cumulative_stats
  315. current_goal = goal
  316. while current_goal.parent_id:
  317. parent = tree.find(current_goal.parent_id)
  318. if not parent:
  319. break
  320. parent.cumulative_stats.message_count += 1
  321. if message.tokens:
  322. parent.cumulative_stats.total_tokens += message.tokens
  323. if message.cost:
  324. parent.cumulative_stats.total_cost += message.cost
  325. current_goal = parent
  326. # 保存更新后的 tree
  327. await self.update_goal_tree(trace_id, tree)
  328. async def _get_affected_goals(self, trace_id: str, message: Message) -> List[Dict[str, Any]]:
  329. """获取受影响的 Goals(自身 + 所有祖先)"""
  330. tree = await self.get_goal_tree(trace_id)
  331. if not tree:
  332. return []
  333. goal = tree.find(message.goal_id)
  334. if not goal:
  335. return []
  336. affected = []
  337. # 添加自身(包含 self_stats 和 cumulative_stats)
  338. affected.append({
  339. "goal_id": goal.id,
  340. "self_stats": goal.self_stats.to_dict(),
  341. "cumulative_stats": goal.cumulative_stats.to_dict()
  342. })
  343. # 添加所有祖先(仅 cumulative_stats)
  344. current_goal = goal
  345. while current_goal.parent_id:
  346. parent = tree.find(current_goal.parent_id)
  347. if not parent:
  348. break
  349. affected.append({
  350. "goal_id": parent.id,
  351. "cumulative_stats": parent.cumulative_stats.to_dict()
  352. })
  353. current_goal = parent
  354. return affected
  355. return affected
  356. async def get_message(self, message_id: str) -> Optional[Message]:
  357. """获取 Message(扫描所有 trace)"""
  358. for trace_dir in self.base_path.iterdir():
  359. if not trace_dir.is_dir():
  360. continue
  361. # 检查 messages 目录
  362. message_file = trace_dir / "messages" / f"{message_id}.json"
  363. if message_file.exists():
  364. try:
  365. data = json.loads(message_file.read_text())
  366. return Message.from_dict(data)
  367. except Exception:
  368. pass
  369. return None
  370. async def get_trace_messages(
  371. self,
  372. trace_id: str
  373. ) -> List[Message]:
  374. """获取 Trace 的所有 Messages"""
  375. messages_dir = self._get_messages_dir(trace_id)
  376. if not messages_dir.exists():
  377. return []
  378. messages = []
  379. for message_file in messages_dir.glob("*.json"):
  380. try:
  381. data = json.loads(message_file.read_text())
  382. messages.append(Message.from_dict(data))
  383. except Exception:
  384. continue
  385. # 按 sequence 排序
  386. messages.sort(key=lambda m: m.sequence)
  387. return messages
  388. async def get_messages_by_goal(
  389. self,
  390. trace_id: str,
  391. goal_id: str
  392. ) -> List[Message]:
  393. """获取指定 Goal 关联的所有 Messages"""
  394. all_messages = await self.get_trace_messages(trace_id)
  395. return [m for m in all_messages if m.goal_id == goal_id]
  396. async def update_message(self, message_id: str, **updates) -> None:
  397. """更新 Message 字段"""
  398. message = await self.get_message(message_id)
  399. if not message:
  400. return
  401. # 更新字段
  402. for key, value in updates.items():
  403. if hasattr(message, key):
  404. setattr(message, key, value)
  405. # 确定文件路径
  406. messages_dir = self._get_messages_dir(message.trace_id)
  407. message_file = messages_dir / f"{message_id}.json"
  408. message_file.write_text(json.dumps(message.to_dict(), indent=2, ensure_ascii=False))
  409. # ===== 事件流操作(用于 WebSocket 断线续传)=====
  410. async def get_events(
  411. self,
  412. trace_id: str,
  413. since_event_id: int = 0
  414. ) -> List[Dict[str, Any]]:
  415. """获取事件流"""
  416. events_file = self._get_events_file(trace_id)
  417. if not events_file.exists():
  418. return []
  419. events = []
  420. with events_file.open('r') as f:
  421. for line in f:
  422. try:
  423. event = json.loads(line.strip())
  424. if event.get("event_id", 0) > since_event_id:
  425. events.append(event)
  426. except Exception:
  427. continue
  428. return events
  429. async def append_event(
  430. self,
  431. trace_id: str,
  432. event_type: str,
  433. payload: Dict[str, Any]
  434. ) -> int:
  435. """追加事件,返回 event_id"""
  436. # 获取 trace 并递增 event_id
  437. trace = await self.get_trace(trace_id)
  438. if not trace:
  439. return 0
  440. trace.last_event_id += 1
  441. event_id = trace.last_event_id
  442. # 更新 trace 的 last_event_id
  443. await self.update_trace(trace_id, last_event_id=event_id)
  444. # 创建事件
  445. event = {
  446. "event_id": event_id,
  447. "event": event_type,
  448. "ts": datetime.now().isoformat(),
  449. **payload
  450. }
  451. # 追加到 events.jsonl
  452. events_file = self._get_events_file(trace_id)
  453. with events_file.open('a', encoding='utf-8') as f:
  454. f.write(json.dumps(event, ensure_ascii=False) + '\n')
  455. return event_id