| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402 |
- """
- Skill Loader - 从 Markdown 文件加载 Skills
- 支持两种格式:
- 格式1 - YAML Frontmatter(推荐):
- ---
- name: skill-name
- description: Skill description
- category: category-name
- scope: agent:*
- parent: parent-id
- ---
- ## When to use
- - Use case 1
- - Use case 2
- ## Guidelines
- - Guideline 1
- - Guideline 2
- 格式2 - 行内元数据(向后兼容):
- # Skill Name
- > category: web-automation
- > scope: agent:*
- ## Description
- ...
- ## Guidelines
- ...
- """
- import os
- import re
- from pathlib import Path
- from typing import List, Dict, Optional
- import logging
- from agent.skill.models import Skill
- logger = logging.getLogger(__name__)
- class SkillLoader:
- """从 Markdown 文件加载 Skills"""
- def __init__(self, skills_dir: str):
- """
- 初始化 SkillLoader
- Args:
- skills_dir: skills 目录路径
- """
- self.skills_dir = Path(skills_dir)
- if not self.skills_dir.exists():
- logger.warning(f"Skills 目录不存在: {skills_dir}")
- def load_all(self) -> List[Skill]:
- """
- 加载目录下所有 .md 文件
- Returns:
- Skill 列表
- """
- if not self.skills_dir.exists():
- return []
- skills = []
- for md_file in self.skills_dir.glob("*.md"):
- try:
- skill = self.load_file(md_file)
- if skill:
- skills.append(skill)
- logger.info(f"成功加载 skill: {skill.name} from {md_file.name}")
- except Exception as e:
- logger.error(f"加载 skill 失败 {md_file}: {e}")
- return skills
- def load_file(self, file_path: Path) -> Optional[Skill]:
- """
- 从单个 Markdown 文件加载 Skill
- Args:
- file_path: Markdown 文件路径
- Returns:
- Skill 对象,解析失败返回 None
- """
- if not file_path.exists():
- logger.warning(f"文件不存在: {file_path}")
- return None
- with open(file_path, "r", encoding="utf-8") as f:
- content = f.read()
- return self.parse_markdown(content, file_path.stem)
- def parse_markdown(self, content: str, filename: str) -> Optional[Skill]:
- """
- 解析 Markdown 内容为 Skill
- 支持两种格式:
- 格式1 - YAML Frontmatter(推荐):
- ---
- name: skill-name
- description: Skill description
- category: category-name
- scope: agent:*
- ---
- ## When to use
- - Use case 1
- ## Guidelines
- - Guideline 1
- 格式2 - 行内元数据(向后兼容):
- # Skill Name
- > category: web-automation
- > scope: agent:*
- ## Description
- 描述内容...
- ## Guidelines
- - 指导原则1
- Args:
- content: Markdown 内容
- filename: 文件名(不含扩展名)
- Returns:
- Skill 对象
- """
- # 检测格式:是否有 YAML frontmatter
- if content.strip().startswith("---"):
- return self._parse_frontmatter_format(content, filename)
- else:
- return self._parse_inline_format(content, filename)
- def _parse_frontmatter_format(self, content: str, filename: str) -> Optional[Skill]:
- """
- 解析 YAML frontmatter 格式
- ---
- name: skill-name
- description: Skill description
- category: category-name
- scope: agent:*
- parent: parent-id
- ---
- ## When to use
- ...
- ## Guidelines
- ...
- """
- lines = content.split("\n")
- # 提取 YAML frontmatter
- if not lines[0].strip() == "---":
- logger.warning("格式错误:缺少开始的 ---")
- return None
- frontmatter = {}
- i = 1
- while i < len(lines):
- line = lines[i].strip()
- if line == "---":
- break
- if ":" in line:
- key, value = line.split(":", 1)
- frontmatter[key.strip()] = value.strip()
- i += 1
- # 提取元数据
- name = frontmatter.get("name") or self._filename_to_title(filename)
- description = frontmatter.get("description", "")
- category = frontmatter.get("category", "general")
- scope = frontmatter.get("scope", "agent:*")
- parent_id = frontmatter.get("parent")
- # 提取章节内容(从 frontmatter 之后开始)
- remaining_content = "\n".join(lines[i+1:])
- remaining_lines = remaining_content.split("\n")
- # 提取 "When to use" 章节(可选)
- when_to_use = self._extract_list_items(remaining_lines, "When to use")
- if when_to_use:
- # 将 "When to use" 添加到描述中
- description += "\n\n适用场景:\n" + "\n".join(f"- {item}" for item in when_to_use)
- # 提取 Guidelines
- guidelines = self._extract_list_items(remaining_lines, "Guidelines")
- # 保存完整的内容(去掉 frontmatter)
- content = remaining_content.strip()
- # 创建 Skill
- return Skill.create(
- scope=scope,
- name=name,
- description=description.strip(),
- category=category,
- content=content, # 完整的 Markdown 内容
- guidelines=guidelines,
- parent_id=parent_id,
- )
- def _parse_inline_format(self, content: str, filename: str) -> Optional[Skill]:
- """
- 解析行内元数据格式(向后兼容)
- # Skill Name
- > category: web-automation
- > scope: agent:*
- ## Description
- ...
- ## Guidelines
- ...
- """
- lines = content.split("\n")
- # 提取标题作为 name
- name = self._extract_title(lines) or self._filename_to_title(filename)
- # 提取元数据
- metadata = self._extract_metadata(lines)
- category = metadata.get("category", "general")
- scope = metadata.get("scope", "agent:*")
- parent_id = metadata.get("parent")
- # 提取描述
- description = self._extract_section(lines, "Description") or ""
- # 提取指导原则
- guidelines = self._extract_list_items(lines, "Guidelines")
- # 提取完整内容(去掉元数据行和标题行)
- content_lines = []
- skip_metadata = False
- for line in lines:
- stripped = line.strip()
- # 跳过标题
- if stripped.startswith("# "):
- continue
- # 跳过元数据
- if stripped.startswith(">"):
- skip_metadata = True
- continue
- # 如果之前是元数据,跳过后续的空行
- if skip_metadata and not stripped:
- skip_metadata = False
- continue
- content_lines.append(line)
- content = "\n".join(content_lines).strip()
- # 创建 Skill
- return Skill.create(
- scope=scope,
- name=name,
- description=description.strip(),
- category=category,
- content=content, # 完整的 Markdown 内容
- guidelines=guidelines,
- parent_id=parent_id,
- )
- def _extract_title(self, lines: List[str]) -> Optional[str]:
- """提取 # 标题"""
- for line in lines:
- line = line.strip()
- if line.startswith("# "):
- return line[2:].strip()
- return None
- def _filename_to_title(self, filename: str) -> str:
- """将文件名转换为标题(kebab-case -> Title Case)"""
- return " ".join(word.capitalize() for word in filename.split("-"))
- def _extract_metadata(self, lines: List[str]) -> Dict[str, str]:
- """
- 提取元数据块(> key: value)
- Example:
- > category: web-automation
- > scope: agent:*
- """
- metadata = {}
- for line in lines:
- line = line.strip()
- if line.startswith(">"):
- # 去掉 > 符号
- content = line[1:].strip()
- # 分割 key: value
- if ":" in content:
- key, value = content.split(":", 1)
- metadata[key.strip()] = value.strip()
- return metadata
- def _extract_section(self, lines: List[str], section_name: str) -> Optional[str]:
- """
- 提取指定章节的内容
- Args:
- lines: 文件行列表
- section_name: 章节名称(如 "Description")
- Returns:
- 章节内容(纯文本)
- """
- in_section = False
- section_lines = []
- for line in lines:
- stripped = line.strip()
- # 遇到目标章节
- if stripped.startswith("## ") and section_name.lower() in stripped.lower():
- in_section = True
- continue
- # 遇到下一个章节,结束
- if in_section and stripped.startswith("##"):
- break
- # 收集章节内容
- if in_section:
- section_lines.append(line)
- return "\n".join(section_lines).strip() if section_lines else None
- def _extract_list_items(self, lines: List[str], section_name: str) -> List[str]:
- """
- 提取指定章节的列表项
- Args:
- lines: 文件行列表
- section_name: 章节名称(如 "Guidelines")
- Returns:
- 列表项数组
- """
- section_content = self._extract_section(lines, section_name)
- if not section_content:
- return []
- items = []
- for line in section_content.split("\n"):
- line = line.strip()
- # 匹配列表项(- item 或 * item)
- if line.startswith("- ") or line.startswith("* "):
- items.append(line[2:].strip())
- return items
- # 便捷函数
- def load_skills_from_dir(skills_dir: Optional[str] = None) -> List[Skill]:
- """
- 从目录加载所有 Skills
- 加载优先级:
- 1. 始终加载内置 skills(agent/skills/)
- 2. 如果指定了 skills_dir,额外加载该目录的 skills
- Args:
- skills_dir: 用户自定义 skills 目录路径(可选)
- Returns:
- Skill 列表(内置 + 自定义)
- """
- all_skills = []
- # 1. 加载内置 skills(agent/skill/skills/)
- builtin_skills_dir = Path(__file__).parent / "skills"
- if builtin_skills_dir.exists():
- loader = SkillLoader(str(builtin_skills_dir))
- builtin_skills = loader.load_all()
- all_skills.extend(builtin_skills)
- logger.info(f"加载了 {len(builtin_skills)} 个内置 skills")
- # 2. 加载用户自定义 skills(如果提供)
- if skills_dir:
- loader = SkillLoader(skills_dir)
- custom_skills = loader.load_all()
- all_skills.extend(custom_skills)
- logger.info(f"加载了 {len(custom_skills)} 个自定义 skills")
- return all_skills
|