| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330 |
- """
- 关键点检索工具 - 根据输入的点在图数据库中查找所有关联的点
- 用于 Agent 执行时自主调取关联关键点数据。
- """
- import json
- import os
- from pathlib import Path
- from typing import Any, Dict, List, Optional
- from agent.tools import tool, ToolResult
- # 图数据库文件路径
- GRAPH_DATA_PATH = os.getenv(
- "GRAPH_DATA_PATH",
- str(Path(__file__).parent.parent / "data/library/item_graph/item_graph_point_type_only_all_levels.json")
- )
- # 完整图数据库文件路径(包含 edges)
- GRAPH_FULL_DATA_PATH = os.getenv(
- "GRAPH_FULL_DATA_PATH",
- str(Path(__file__).parent.parent / "data/library/item_graph/item_graph_full_all_levels.json")
- )
- # 缓存图数据,避免重复加载
- _graph_cache: Optional[Dict[str, Any]] = None
- _graph_full_cache: Optional[Dict[str, Any]] = None
- def _load_graph() -> Dict[str, Any]:
- """加载图数据(带缓存)"""
- global _graph_cache
- if _graph_cache is None:
- with open(GRAPH_DATA_PATH, 'r', encoding='utf-8') as f:
- _graph_cache = json.load(f)
- return _graph_cache
- def _load_graph_full() -> Dict[str, Any]:
- """加载完整图数据(带缓存,包含 edges)"""
- global _graph_full_cache
- if _graph_full_cache is None:
- with open(GRAPH_FULL_DATA_PATH, 'r', encoding='utf-8') as f:
- _graph_full_cache = json.load(f)
- return _graph_full_cache
- def _remove_post_ids_from_edges(edges: Dict[str, Any]) -> Dict[str, Any]:
- """移除 edges 中的 _post_ids 字段"""
- if not edges:
- return edges
- cleaned_edges = {}
- for edge_name, edge_data in edges.items():
- if isinstance(edge_data, dict):
- # 移除 _post_ids 字段
- cleaned_data = {k: v for k, v in edge_data.items() if k != "_post_ids"}
- cleaned_edges[edge_name] = cleaned_data
- else:
- cleaned_edges[edge_name] = edge_data
- return cleaned_edges
- def _search_points_by_element_from_full(
- element_value: str,
- element_type: str,
- top_k: int = 10
- ) -> Dict[str, Any]:
- """
- 根据元素值和类型在完整图数据库的 elements 字段中查找匹配的点
- Args:
- element_value: 元素值,如 "标准化", "懒人妻子"
- element_type: 元素类型,"实质" / "形式" / "意图"
- top_k: 返回前 K 个点(按频率排序)
- Returns:
- 包含匹配点完整信息的字典(包括 edges,已移除 _post_ids)
- """
- graph = _load_graph_full()
- matched_points = []
- # 遍历图中所有点
- for point_name, point_data in graph.items():
- meta = point_data.get("meta", {})
- elements = meta.get("elements", {})
- dimension = meta.get("dimension")
- # 检查:元素值在 elements 中 AND dimension 匹配 element_type
- if element_value in elements and dimension == element_type:
- # 移除 edges 中的 _post_ids
- cleaned_edges = _remove_post_ids_from_edges(point_data.get("edges", {}))
- # 返回结构与 search_point_by_path_from_full_all_levels 保持一致
- point_info = {
- "point": point_name,
- "point_type": meta.get("point_type"),
- "dimension": dimension,
- "point_path": meta.get("path"),
- "frequency_in_posts": meta.get("frequency_in_posts", 0),
- "elements": elements,
- "edge_count": len(cleaned_edges),
- "edges": cleaned_edges
- }
- matched_points.append(point_info)
- if not matched_points:
- return {
- "found": False,
- "element_value": element_value,
- "element_type": element_type,
- "message": f"未找到匹配的点: element_value={element_value}, element_type={element_type}"
- }
- # 按频率降序排序,取前 top_k 个
- matched_points.sort(key=lambda x: x["frequency_in_posts"], reverse=True)
- matched_points = matched_points[:top_k]
- return {
- "found": True,
- "element_value": element_value,
- "element_type": element_type,
- "total_matched_count": len(matched_points),
- "returned_count": len(matched_points),
- "matched_points": matched_points
- }
- def _search_point_by_path_from_full(path: str) -> Dict[str, Any]:
- """
- 根据完整路径在完整图数据库中查找点
- Args:
- path: 点的完整路径,如 "关键点_形式_架构>逻辑>逻辑架构>组织逻辑>框架规划>结构设计"
- Returns:
- 包含该点完整信息的字典(包括 edges,已移除 _post_ids)
- """
- graph = _load_graph_full()
- if path not in graph:
- return {
- "found": False,
- "path": path,
- "message": f"未找到路径: {path}"
- }
- point_data = graph[path]
- meta = point_data.get("meta", {})
- # 移除 edges 中的 _post_ids
- cleaned_edges = _remove_post_ids_from_edges(point_data.get("edges", {}))
- return {
- "found": True,
- "path": path,
- "point_type": meta.get("point_type"),
- "dimension": meta.get("dimension"),
- "point_path": meta.get("path"),
- "frequency_in_posts": meta.get("frequency_in_posts"),
- "elements": meta.get("elements", {}),
- "edge_count": len(cleaned_edges),
- "edges": cleaned_edges
- }
- @tool(
- description="根据元素值和类型在完整图数据库中查找匹配的点,返回包含边信息的完整数据。",
- display={
- "zh": {
- "name": "元素类型完整检索",
- "params": {
- "element_value": "元素值",
- "element_type": "元素类型(实质/形式/意图)",
- "top_k": "返回数量(默认10)",
- },
- },
- },
- )
- async def search_point_by_element_from_full_all_levels(
- element_value: str,
- element_type: str,
- top_k: int = 10
- ) -> ToolResult:
- """
- 根据元素值和类型在完整图数据库中检索点,返回包含边信息的完整数据。
- Args:
- element_value: 元素名称,如 "标准化", "懒人妻子"
- element_type: 元素类型,"实质" / "形式" / "意图"
- top_k: 返回前 K 个点,默认 10
- Returns:
- ToolResult: 匹配点的完整数据(包括 edges)
- """
- if not element_value:
- return ToolResult(
- title="元素类型检索失败",
- output="",
- error="请提供元素值",
- )
- if element_type not in ["实质", "形式", "意图"]:
- return ToolResult(
- title="元素类型检索失败",
- output="",
- error=f"元素类型必须是 '实质'、'形式' 或 '意图',当前值: {element_type}",
- )
- try:
- result = _search_points_by_element_from_full(element_value, element_type, top_k)
- except FileNotFoundError:
- return ToolResult(
- title="元素类型检索失败",
- output="",
- error=f"图数据文件不存在: {GRAPH_FULL_DATA_PATH}",
- )
- except Exception as e:
- return ToolResult(
- title="元素类型检索失败",
- output="",
- error=f"检索异常: {str(e)}",
- )
- if not result["found"]:
- return ToolResult(
- title="元素类型检索",
- output=json.dumps(
- {
- "message": result["message"],
- "element_value": element_value,
- "element_type": element_type
- },
- ensure_ascii=False,
- indent=2
- ),
- )
- # 格式化输出
- output_data = {
- "element_value": result["element_value"],
- "element_type": result["element_type"],
- "total_matched_count": result["total_matched_count"],
- "returned_count": result["returned_count"],
- "matched_points": result["matched_points"]
- }
- output = json.dumps(output_data, ensure_ascii=False, indent=2)
- return ToolResult(
- title=f"元素类型检索 - {element_value} ({element_type})",
- output=output,
- long_term_memory=f"检索到 {result['returned_count']} 个匹配点,元素值: {element_value}, 类型: {element_type}",
- )
- @tool(
- description="根据完整路径在完整图数据库中查找点,返回包含边信息的完整数据。",
- display={
- "zh": {
- "name": "路径完整检索",
- "params": {
- "path": "点的完整路径",
- },
- },
- },
- )
- async def search_point_by_path_from_full_all_levels(path: str) -> ToolResult:
- """
- 根据完整路径在完整图数据库中检索点,返回包含边信息的完整数据。
- Args:
- path: 点的完整路径,如 "关键点_形式_架构>逻辑>逻辑架构>组织逻辑>框架规划>结构设计"
- Returns:
- ToolResult: 点的完整数据(包括 edges)
- """
- if not path:
- return ToolResult(
- title="路径检索失败",
- output="",
- error="请提供路径",
- )
- try:
- result = _search_point_by_path_from_full(path)
- except FileNotFoundError:
- return ToolResult(
- title="路径检索失败",
- output="",
- error=f"图数据文件不存在: {GRAPH_FULL_DATA_PATH}",
- )
- except Exception as e:
- return ToolResult(
- title="路径检索失败",
- output="",
- error=f"检索异常: {str(e)}",
- )
- if not result["found"]:
- return ToolResult(
- title="路径检索",
- output=json.dumps(
- {"message": result["message"], "path": path},
- ensure_ascii=False,
- indent=2
- ),
- )
- # 格式化输出
- output_data = {
- "path": result["path"],
- "point_type": result["point_type"],
- "dimension": result["dimension"],
- "point_path": result["point_path"],
- "frequency_in_posts": result["frequency_in_posts"],
- "elements": result["elements"],
- "edge_count": result["edge_count"],
- "edges": result["edges"]
- }
- output = json.dumps(output_data, ensure_ascii=False, indent=2)
- return ToolResult(
- title=f"路径检索 - {path}",
- output=output,
- long_term_memory=f"检索到路径 {path} 的完整信息,包含 {result['edge_count']} 条边",
- )
|