""" Schema Generator - 从函数签名自动生成 OpenAI Tool Schema 职责: 1. 解析函数签名(参数、类型注解、默认值) 2. 解析 docstring(Google 风格) 3. 生成 OpenAI Tool Calling 格式的 JSON Schema 从 Resonote/llm/tools/schema.py 抽取 """ import inspect import logging from typing import Any, Dict, List, Literal, Optional, Union, get_args, get_origin logger = logging.getLogger(__name__) # 尝试导入 docstring_parser,如果不可用则提供降级方案 try: from docstring_parser import parse as parse_docstring HAS_DOCSTRING_PARSER = True except ImportError: HAS_DOCSTRING_PARSER = False logger.warning("docstring_parser not installed, using fallback docstring parsing") def _simple_parse_docstring(docstring: str) -> tuple[str, Dict[str, str]]: """简单的 docstring 解析(降级方案)""" if not docstring: return "", {} lines = docstring.strip().split("\n") description = lines[0] if lines else "" param_descriptions = {} # 简单解析 Args: 部分 in_args = False for line in lines[1:]: line = line.strip() if line.lower().startswith("args:"): in_args = True continue if line.lower().startswith(("returns:", "raises:", "example:")): in_args = False continue if in_args and ":" in line: parts = line.split(":", 1) param_name = parts[0].strip() param_desc = parts[1].strip() if len(parts) > 1 else "" param_descriptions[param_name] = param_desc return description, param_descriptions class SchemaGenerator: """从函数生成 OpenAI Tool Schema""" # Python 类型到 JSON Schema 类型的映射 TYPE_MAP = { str: "string", int: "integer", float: "number", bool: "boolean", list: "array", dict: "object", List: "array", Dict: "object", } @classmethod def generate(cls, func: callable, hidden_params: Optional[List[str]] = None) -> Dict[str, Any]: """ 从函数生成 OpenAI Tool Schema Args: func: 要生成 Schema 的函数 hidden_params: 隐藏参数列表(不生成 schema) Returns: OpenAI Tool Schema(JSON 格式) """ hidden_params = hidden_params or [] # 解析函数签名 sig = inspect.signature(func) func_name = func.__name__ # 解析 docstring if HAS_DOCSTRING_PARSER: doc = parse_docstring(func.__doc__ or "") func_description = doc.short_description or doc.long_description or f"Call {func_name}" param_descriptions = {p.arg_name: p.description for p in doc.params if p.description} else: func_description, param_descriptions = _simple_parse_docstring(func.__doc__ or "") if not func_description: func_description = f"Call {func_name}" # 生成参数 Schema properties = {} required = [] for param_name, param in sig.parameters.items(): # 跳过特殊参数 if param_name in ["self", "cls", "kwargs"]: continue # 跳过隐藏参数 if param_name in hidden_params: continue # 获取类型注解 param_type = param.annotation if param.annotation != inspect.Parameter.empty else str # 生成参数 Schema param_schema = cls._type_to_schema(param_type) # 添加描述 if param_name in param_descriptions: param_schema["description"] = param_descriptions[param_name] # 添加默认值 if param.default != inspect.Parameter.empty: param_schema["default"] = param.default else: required.append(param_name) properties[param_name] = param_schema # 构建完整的 Schema schema = { "type": "function", "function": { "name": func_name, "description": func_description, "parameters": { "type": "object", "properties": properties, "required": required } } } return schema @classmethod def _type_to_schema(cls, python_type: Any) -> Dict[str, Any]: """将 Python 类型转换为 JSON Schema""" if python_type is Any: return {} origin = get_origin(python_type) args = get_args(python_type) # 处理 Literal[...] if origin is Literal: values = list(args) if all(isinstance(v, str) for v in values): return {"type": "string", "enum": values} elif all(isinstance(v, int) for v in values): return {"type": "integer", "enum": values} return {"enum": values} # 处理 Union[T, ...] 和 Optional[T] if origin is Union: if len(args) == 2 and type(None) in args: # Optional[T] = Union[T, None] inner = args[0] if args[1] is type(None) else args[1] return cls._type_to_schema(inner) non_none = [a for a in args if a is not type(None)] return {"oneOf": [cls._type_to_schema(a) for a in non_none]} # 处理 List[T] if origin is list or origin is List: if args: item_type = args[0] return { "type": "array", "items": cls._type_to_schema(item_type) } return {"type": "array"} # 处理 Dict[K, V] if origin is dict or origin is Dict: return {"type": "object"} # 处理基础类型 if python_type in cls.TYPE_MAP: return {"type": cls.TYPE_MAP[python_type]} # 检查是否是 Protocol(如 ToolContext) # Protocol 类型用于依赖注入,不应出现在 schema 中 type_name = getattr(python_type, "__name__", str(python_type)) if "Protocol" in str(type(python_type)) or type_name in ("ToolContext",): logger.debug(f"Skipping Protocol type {python_type} (used for dependency injection)") return {} # 默认为 string logger.debug(f"Unknown type {python_type}, defaulting to string") return {"type": "string"}