| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176 |
- """
- Schema Generator - 从函数签名自动生成 OpenAI Tool Schema
- 职责:
- 1. 解析函数签名(参数、类型注解、默认值)
- 2. 解析 docstring(Google 风格)
- 3. 生成 OpenAI Tool Calling 格式的 JSON Schema
- 从 Resonote/llm/tools/schema.py 抽取
- """
- import inspect
- import logging
- from typing import Any, Dict, List, Optional, get_args, get_origin
- logger = logging.getLogger(__name__)
- # 尝试导入 docstring_parser,如果不可用则提供降级方案
- try:
- from docstring_parser import parse as parse_docstring
- HAS_DOCSTRING_PARSER = True
- except ImportError:
- HAS_DOCSTRING_PARSER = False
- logger.warning("docstring_parser not installed, using fallback docstring parsing")
- def _simple_parse_docstring(docstring: str) -> tuple[str, Dict[str, str]]:
- """简单的 docstring 解析(降级方案)"""
- if not docstring:
- return "", {}
- lines = docstring.strip().split("\n")
- description = lines[0] if lines else ""
- param_descriptions = {}
- # 简单解析 Args: 部分
- in_args = False
- for line in lines[1:]:
- line = line.strip()
- if line.lower().startswith("args:"):
- in_args = True
- continue
- if line.lower().startswith(("returns:", "raises:", "example:")):
- in_args = False
- continue
- if in_args and ":" in line:
- parts = line.split(":", 1)
- param_name = parts[0].strip()
- param_desc = parts[1].strip() if len(parts) > 1 else ""
- param_descriptions[param_name] = param_desc
- return description, param_descriptions
- class SchemaGenerator:
- """从函数生成 OpenAI Tool Schema"""
- # Python 类型到 JSON Schema 类型的映射
- TYPE_MAP = {
- str: "string",
- int: "integer",
- float: "number",
- bool: "boolean",
- list: "array",
- dict: "object",
- List: "array",
- Dict: "object",
- }
- @classmethod
- def generate(cls, func: callable) -> Dict[str, Any]:
- """
- 从函数生成 OpenAI Tool Schema
- Args:
- func: 要生成 Schema 的函数
- Returns:
- OpenAI Tool Schema(JSON 格式)
- """
- # 解析函数签名
- sig = inspect.signature(func)
- func_name = func.__name__
- # 解析 docstring
- if HAS_DOCSTRING_PARSER:
- doc = parse_docstring(func.__doc__ or "")
- func_description = doc.short_description or doc.long_description or f"Call {func_name}"
- param_descriptions = {p.arg_name: p.description for p in doc.params if p.description}
- else:
- func_description, param_descriptions = _simple_parse_docstring(func.__doc__ or "")
- if not func_description:
- func_description = f"Call {func_name}"
- # 生成参数 Schema
- properties = {}
- required = []
- for param_name, param in sig.parameters.items():
- # 跳过特殊参数
- if param_name in ["self", "cls", "kwargs", "context"]:
- continue
- # 跳过 uid(由框架自动注入)
- if param_name == "uid":
- continue
- # 获取类型注解
- param_type = param.annotation if param.annotation != inspect.Parameter.empty else str
- # 生成参数 Schema
- param_schema = cls._type_to_schema(param_type)
- # 添加描述
- if param_name in param_descriptions:
- param_schema["description"] = param_descriptions[param_name]
- # 添加默认值
- if param.default != inspect.Parameter.empty:
- param_schema["default"] = param.default
- else:
- required.append(param_name)
- properties[param_name] = param_schema
- # 构建完整的 Schema
- schema = {
- "type": "function",
- "function": {
- "name": func_name,
- "description": func_description,
- "parameters": {
- "type": "object",
- "properties": properties,
- "required": required
- }
- }
- }
- return schema
- @classmethod
- def _type_to_schema(cls, python_type: Any) -> Dict[str, Any]:
- """将 Python 类型转换为 JSON Schema"""
- # 处理 Optional[T]
- origin = get_origin(python_type)
- args = get_args(python_type)
- if origin is Optional.__class__ or (origin and str(origin) == "typing.Union"):
- # Optional[T] = Union[T, None]
- if len(args) == 2 and type(None) in args:
- inner_type = args[0] if args[1] is type(None) else args[1]
- schema = cls._type_to_schema(inner_type)
- return schema
- # 处理 List[T]
- if origin is list or origin is List:
- if args:
- item_type = args[0]
- return {
- "type": "array",
- "items": cls._type_to_schema(item_type)
- }
- return {"type": "array"}
- # 处理 Dict[K, V]
- if origin is dict or origin is Dict:
- return {"type": "object"}
- # 处理基础类型
- if python_type in cls.TYPE_MAP:
- return {"type": cls.TYPE_MAP[python_type]}
- # 默认为 string
- logger.warning(f"Unknown type {python_type}, defaulting to string")
- return {"type": "string"}
|