skill_loader.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. """
  2. Skill Loader - 从 Markdown 文件加载 Skills
  3. 支持两种格式:
  4. 格式1 - YAML Frontmatter(推荐):
  5. ---
  6. name: skill-name
  7. description: Skill description
  8. category: category-name
  9. scope: agent:*
  10. parent: parent-id
  11. ---
  12. ## When to use
  13. - Use case 1
  14. - Use case 2
  15. ## Guidelines
  16. - Guideline 1
  17. - Guideline 2
  18. 格式2 - 行内元数据(向后兼容):
  19. # Skill Name
  20. > category: web-automation
  21. > scope: agent:*
  22. ## Description
  23. ...
  24. ## Guidelines
  25. ...
  26. """
  27. import os
  28. import re
  29. import os
  30. from pathlib import Path
  31. from typing import List, Dict, Optional
  32. import logging
  33. from agent.skill.models import Skill
  34. logger = logging.getLogger(__name__)
  35. class SkillLoader:
  36. """从 Markdown 文件加载 Skills"""
  37. def __init__(self, skills_dir: str):
  38. """
  39. 初始化 SkillLoader
  40. Args:
  41. skills_dir: skills 目录路径
  42. """
  43. self.skills_dir = Path(skills_dir)
  44. if not self.skills_dir.exists():
  45. logger.warning(f"Skills 目录不存在: {skills_dir}")
  46. def load_all(self) -> List[Skill]:
  47. """
  48. 加载目录下所有 .md 文件
  49. Returns:
  50. Skill 列表
  51. """
  52. if not self.skills_dir.exists():
  53. return []
  54. skills = []
  55. for md_file in self.skills_dir.glob("*.md"):
  56. try:
  57. skill = self.load_file(md_file)
  58. if skill:
  59. skills.append(skill)
  60. logger.info(f"成功加载 skill: {skill.name} from {md_file.name}")
  61. except Exception as e:
  62. logger.error(f"加载 skill 失败 {md_file}: {e}")
  63. return skills
  64. def load_file(self, file_path: Path) -> Optional[Skill]:
  65. """
  66. 从单个 Markdown 文件加载 Skill
  67. Args:
  68. file_path: Markdown 文件路径
  69. Returns:
  70. Skill 对象,解析失败返回 None
  71. """
  72. if not file_path.exists():
  73. logger.warning(f"文件不存在: {file_path}")
  74. return None
  75. with open(file_path, "r", encoding="utf-8") as f:
  76. content = f.read()
  77. return self.parse_markdown(content, file_path.stem)
  78. def parse_markdown(self, content: str, filename: str) -> Optional[Skill]:
  79. """
  80. 解析 Markdown 内容为 Skill
  81. 支持两种格式:
  82. 格式1 - YAML Frontmatter(推荐):
  83. ---
  84. name: skill-name
  85. description: Skill description
  86. category: category-name
  87. scope: agent:*
  88. ---
  89. ## When to use
  90. - Use case 1
  91. ## Guidelines
  92. - Guideline 1
  93. 格式2 - 行内元数据(向后兼容):
  94. # Skill Name
  95. > category: web-automation
  96. > scope: agent:*
  97. ## Description
  98. 描述内容...
  99. ## Guidelines
  100. - 指导原则1
  101. Args:
  102. content: Markdown 内容
  103. filename: 文件名(不含扩展名)
  104. Returns:
  105. Skill 对象
  106. """
  107. # 检测格式:是否有 YAML frontmatter
  108. if content.strip().startswith("---"):
  109. return self._parse_frontmatter_format(content, filename)
  110. else:
  111. return self._parse_inline_format(content, filename)
  112. def _parse_frontmatter_format(self, content: str, filename: str) -> Optional[Skill]:
  113. """
  114. 解析 YAML frontmatter 格式
  115. ---
  116. name: skill-name
  117. description: Skill description
  118. category: category-name
  119. scope: agent:*
  120. parent: parent-id
  121. ---
  122. ## When to use
  123. ...
  124. ## Guidelines
  125. ...
  126. """
  127. lines = content.split("\n")
  128. # 提取 YAML frontmatter
  129. if not lines[0].strip() == "---":
  130. logger.warning("格式错误:缺少开始的 ---")
  131. return None
  132. frontmatter = {}
  133. i = 1
  134. while i < len(lines):
  135. line = lines[i].strip()
  136. if line == "---":
  137. break
  138. if ":" in line:
  139. key, value = line.split(":", 1)
  140. frontmatter[key.strip()] = value.strip()
  141. i += 1
  142. # 提取元数据
  143. name = frontmatter.get("name") or self._filename_to_title(filename)
  144. description = frontmatter.get("description", "")
  145. category = frontmatter.get("category", "general")
  146. scope = frontmatter.get("scope", "agent:*")
  147. parent_id = frontmatter.get("parent")
  148. # 提取章节内容(从 frontmatter 之后开始)
  149. remaining_content = "\n".join(lines[i+1:])
  150. remaining_lines = remaining_content.split("\n")
  151. # 提取 "When to use" 章节(可选)
  152. when_to_use = self._extract_list_items(remaining_lines, "When to use")
  153. if when_to_use:
  154. # 将 "When to use" 添加到描述中
  155. description += "\n\n适用场景:\n" + "\n".join(f"- {item}" for item in when_to_use)
  156. # 提取 Guidelines
  157. guidelines = self._extract_list_items(remaining_lines, "Guidelines")
  158. # 保存完整的内容(去掉 frontmatter)
  159. content = remaining_content.strip()
  160. # 创建 Skill
  161. return Skill.create(
  162. scope=scope,
  163. name=name,
  164. description=description.strip(),
  165. category=category,
  166. content=content, # 完整的 Markdown 内容
  167. guidelines=guidelines,
  168. parent_id=parent_id,
  169. )
  170. def _parse_inline_format(self, content: str, filename: str) -> Optional[Skill]:
  171. """
  172. 解析行内元数据格式(向后兼容)
  173. # Skill Name
  174. > category: web-automation
  175. > scope: agent:*
  176. ## Description
  177. ...
  178. ## Guidelines
  179. ...
  180. """
  181. lines = content.split("\n")
  182. # 提取标题作为 name
  183. name = self._extract_title(lines) or self._filename_to_title(filename)
  184. # 提取元数据
  185. metadata = self._extract_metadata(lines)
  186. category = metadata.get("category", "general")
  187. scope = metadata.get("scope", "agent:*")
  188. parent_id = metadata.get("parent")
  189. # 提取描述
  190. description = self._extract_section(lines, "Description") or ""
  191. # 提取指导原则
  192. guidelines = self._extract_list_items(lines, "Guidelines")
  193. # 提取完整内容(去掉元数据行和标题行)
  194. content_lines = []
  195. skip_metadata = False
  196. for line in lines:
  197. stripped = line.strip()
  198. # 跳过标题
  199. if stripped.startswith("# "):
  200. continue
  201. # 跳过元数据
  202. if stripped.startswith(">"):
  203. skip_metadata = True
  204. continue
  205. # 如果之前是元数据,跳过后续的空行
  206. if skip_metadata and not stripped:
  207. skip_metadata = False
  208. continue
  209. content_lines.append(line)
  210. content = "\n".join(content_lines).strip()
  211. # 创建 Skill
  212. return Skill.create(
  213. scope=scope,
  214. name=name,
  215. description=description.strip(),
  216. category=category,
  217. content=content, # 完整的 Markdown 内容
  218. guidelines=guidelines,
  219. parent_id=parent_id,
  220. )
  221. def _extract_title(self, lines: List[str]) -> Optional[str]:
  222. """提取 # 标题"""
  223. for line in lines:
  224. line = line.strip()
  225. if line.startswith("# "):
  226. return line[2:].strip()
  227. return None
  228. def _filename_to_title(self, filename: str) -> str:
  229. """将文件名转换为标题(kebab-case -> Title Case)"""
  230. return " ".join(word.capitalize() for word in filename.split("-"))
  231. def _extract_metadata(self, lines: List[str]) -> Dict[str, str]:
  232. """
  233. 提取元数据块(> key: value)
  234. Example:
  235. > category: web-automation
  236. > scope: agent:*
  237. """
  238. metadata = {}
  239. for line in lines:
  240. line = line.strip()
  241. if line.startswith(">"):
  242. # 去掉 > 符号
  243. content = line[1:].strip()
  244. # 分割 key: value
  245. if ":" in content:
  246. key, value = content.split(":", 1)
  247. metadata[key.strip()] = value.strip()
  248. return metadata
  249. def _extract_section(self, lines: List[str], section_name: str) -> Optional[str]:
  250. """
  251. 提取指定章节的内容
  252. Args:
  253. lines: 文件行列表
  254. section_name: 章节名称(如 "Description")
  255. Returns:
  256. 章节内容(纯文本)
  257. """
  258. in_section = False
  259. section_lines = []
  260. for line in lines:
  261. stripped = line.strip()
  262. # 遇到目标章节
  263. if stripped.startswith("## ") and section_name.lower() in stripped.lower():
  264. in_section = True
  265. continue
  266. # 遇到下一个章节,结束
  267. if in_section and stripped.startswith("##"):
  268. break
  269. # 收集章节内容
  270. if in_section:
  271. section_lines.append(line)
  272. return "\n".join(section_lines).strip() if section_lines else None
  273. def _extract_list_items(self, lines: List[str], section_name: str) -> List[str]:
  274. """
  275. 提取指定章节的列表项
  276. Args:
  277. lines: 文件行列表
  278. section_name: 章节名称(如 "Guidelines")
  279. Returns:
  280. 列表项数组
  281. """
  282. section_content = self._extract_section(lines, section_name)
  283. if not section_content:
  284. return []
  285. items = []
  286. for line in section_content.split("\n"):
  287. line = line.strip()
  288. # 匹配列表项(- item 或 * item)
  289. if line.startswith("- ") or line.startswith("* "):
  290. items.append(line[2:].strip())
  291. return items
  292. # 便捷函数
  293. def load_skills_from_dir(skills_dir: Optional[str] = None) -> List[Skill]:
  294. """
  295. 从目录加载所有 Skills
  296. 加载优先级:
  297. 1. 始终加载内置 skills(agent/skills/)
  298. 2. 如果指定了 skills_dir,额外加载该目录的 skills
  299. Args:
  300. skills_dir: 用户自定义 skills 目录路径(可选)
  301. Returns:
  302. Skill 列表(内置 + 自定义)
  303. """
  304. all_skills = []
  305. disable_builtin = os.getenv("AGENT_DISABLE_BUILTIN_SKILLS", "").lower() in {"1", "true", "yes", "on"}
  306. # 1. 加载内置 skills(agent/skill/skills/)
  307. if disable_builtin:
  308. logger.info("已禁用内置 skills 加载 (AGENT_DISABLE_BUILTIN_SKILLS)")
  309. else:
  310. builtin_skills_dir = Path(__file__).parent / "skills"
  311. if builtin_skills_dir.exists():
  312. loader = SkillLoader(str(builtin_skills_dir))
  313. builtin_skills = loader.load_all()
  314. all_skills.extend(builtin_skills)
  315. logger.info(f"加载了 {len(builtin_skills)} 个内置 skills")
  316. # 2. 加载用户自定义 skills(如果提供)
  317. if skills_dir:
  318. loader = SkillLoader(skills_dir)
  319. custom_skills = loader.load_all()
  320. all_skills.extend(custom_skills)
  321. logger.info(f"加载了 {len(custom_skills)} 个自定义 skills")
  322. return all_skills