skill_loader.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. """
  2. Skill Loader - 从 Markdown 文件加载 Skills
  3. 支持两种格式:
  4. 格式1 - YAML Frontmatter(推荐):
  5. ---
  6. name: skill-name
  7. description: Skill description
  8. category: category-name
  9. scope: agent:*
  10. parent: parent-id
  11. ---
  12. ## When to use
  13. - Use case 1
  14. - Use case 2
  15. ## Guidelines
  16. - Guideline 1
  17. - Guideline 2
  18. 格式2 - 行内元数据(向后兼容):
  19. # Skill Name
  20. > category: web-automation
  21. > scope: agent:*
  22. ## Description
  23. ...
  24. ## Guidelines
  25. ...
  26. """
  27. import os
  28. import re
  29. from pathlib import Path
  30. from typing import List, Dict, Optional
  31. import logging
  32. from agent.models.memory import Skill
  33. logger = logging.getLogger(__name__)
  34. class SkillLoader:
  35. """从 Markdown 文件加载 Skills"""
  36. def __init__(self, skills_dir: str):
  37. """
  38. 初始化 SkillLoader
  39. Args:
  40. skills_dir: skills 目录路径
  41. """
  42. self.skills_dir = Path(skills_dir)
  43. if not self.skills_dir.exists():
  44. logger.warning(f"Skills 目录不存在: {skills_dir}")
  45. def load_all(self) -> List[Skill]:
  46. """
  47. 加载目录下所有 .md 文件
  48. Returns:
  49. Skill 列表
  50. """
  51. if not self.skills_dir.exists():
  52. return []
  53. skills = []
  54. for md_file in self.skills_dir.glob("*.md"):
  55. try:
  56. skill = self.load_file(md_file)
  57. if skill:
  58. skills.append(skill)
  59. logger.info(f"成功加载 skill: {skill.name} from {md_file.name}")
  60. except Exception as e:
  61. logger.error(f"加载 skill 失败 {md_file}: {e}")
  62. return skills
  63. def load_file(self, file_path: Path) -> Optional[Skill]:
  64. """
  65. 从单个 Markdown 文件加载 Skill
  66. Args:
  67. file_path: Markdown 文件路径
  68. Returns:
  69. Skill 对象,解析失败返回 None
  70. """
  71. if not file_path.exists():
  72. logger.warning(f"文件不存在: {file_path}")
  73. return None
  74. with open(file_path, "r", encoding="utf-8") as f:
  75. content = f.read()
  76. return self.parse_markdown(content, file_path.stem)
  77. def parse_markdown(self, content: str, filename: str) -> Optional[Skill]:
  78. """
  79. 解析 Markdown 内容为 Skill
  80. 支持两种格式:
  81. 格式1 - YAML Frontmatter(推荐):
  82. ---
  83. name: skill-name
  84. description: Skill description
  85. category: category-name
  86. scope: agent:*
  87. ---
  88. ## When to use
  89. - Use case 1
  90. ## Guidelines
  91. - Guideline 1
  92. 格式2 - 行内元数据(向后兼容):
  93. # Skill Name
  94. > category: web-automation
  95. > scope: agent:*
  96. ## Description
  97. 描述内容...
  98. ## Guidelines
  99. - 指导原则1
  100. Args:
  101. content: Markdown 内容
  102. filename: 文件名(不含扩展名)
  103. Returns:
  104. Skill 对象
  105. """
  106. # 检测格式:是否有 YAML frontmatter
  107. if content.strip().startswith("---"):
  108. return self._parse_frontmatter_format(content, filename)
  109. else:
  110. return self._parse_inline_format(content, filename)
  111. def _parse_frontmatter_format(self, content: str, filename: str) -> Optional[Skill]:
  112. """
  113. 解析 YAML frontmatter 格式
  114. ---
  115. name: skill-name
  116. description: Skill description
  117. category: category-name
  118. scope: agent:*
  119. parent: parent-id
  120. ---
  121. ## When to use
  122. ...
  123. ## Guidelines
  124. ...
  125. """
  126. lines = content.split("\n")
  127. # 提取 YAML frontmatter
  128. if not lines[0].strip() == "---":
  129. logger.warning("格式错误:缺少开始的 ---")
  130. return None
  131. frontmatter = {}
  132. i = 1
  133. while i < len(lines):
  134. line = lines[i].strip()
  135. if line == "---":
  136. break
  137. if ":" in line:
  138. key, value = line.split(":", 1)
  139. frontmatter[key.strip()] = value.strip()
  140. i += 1
  141. # 提取元数据
  142. name = frontmatter.get("name") or self._filename_to_title(filename)
  143. description = frontmatter.get("description", "")
  144. category = frontmatter.get("category", "general")
  145. scope = frontmatter.get("scope", "agent:*")
  146. parent_id = frontmatter.get("parent")
  147. # 提取章节内容(从 frontmatter 之后开始)
  148. remaining_content = "\n".join(lines[i+1:])
  149. remaining_lines = remaining_content.split("\n")
  150. # 提取 "When to use" 章节(可选)
  151. when_to_use = self._extract_list_items(remaining_lines, "When to use")
  152. if when_to_use:
  153. # 将 "When to use" 添加到描述中
  154. description += "\n\n适用场景:\n" + "\n".join(f"- {item}" for item in when_to_use)
  155. # 提取 Guidelines
  156. guidelines = self._extract_list_items(remaining_lines, "Guidelines")
  157. # 创建 Skill
  158. return Skill.create(
  159. scope=scope,
  160. name=name,
  161. description=description.strip(),
  162. category=category,
  163. guidelines=guidelines,
  164. parent_id=parent_id,
  165. )
  166. def _parse_inline_format(self, content: str, filename: str) -> Optional[Skill]:
  167. """
  168. 解析行内元数据格式(向后兼容)
  169. # Skill Name
  170. > category: web-automation
  171. > scope: agent:*
  172. ## Description
  173. ...
  174. ## Guidelines
  175. ...
  176. """
  177. lines = content.split("\n")
  178. # 提取标题作为 name
  179. name = self._extract_title(lines) or self._filename_to_title(filename)
  180. # 提取元数据
  181. metadata = self._extract_metadata(lines)
  182. category = metadata.get("category", "general")
  183. scope = metadata.get("scope", "agent:*")
  184. parent_id = metadata.get("parent")
  185. # 提取描述
  186. description = self._extract_section(lines, "Description") or ""
  187. # 提取指导原则
  188. guidelines = self._extract_list_items(lines, "Guidelines")
  189. # 创建 Skill
  190. return Skill.create(
  191. scope=scope,
  192. name=name,
  193. description=description.strip(),
  194. category=category,
  195. guidelines=guidelines,
  196. parent_id=parent_id,
  197. )
  198. def _extract_title(self, lines: List[str]) -> Optional[str]:
  199. """提取 # 标题"""
  200. for line in lines:
  201. line = line.strip()
  202. if line.startswith("# "):
  203. return line[2:].strip()
  204. return None
  205. def _filename_to_title(self, filename: str) -> str:
  206. """将文件名转换为标题(kebab-case -> Title Case)"""
  207. return " ".join(word.capitalize() for word in filename.split("-"))
  208. def _extract_metadata(self, lines: List[str]) -> Dict[str, str]:
  209. """
  210. 提取元数据块(> key: value)
  211. Example:
  212. > category: web-automation
  213. > scope: agent:*
  214. """
  215. metadata = {}
  216. for line in lines:
  217. line = line.strip()
  218. if line.startswith(">"):
  219. # 去掉 > 符号
  220. content = line[1:].strip()
  221. # 分割 key: value
  222. if ":" in content:
  223. key, value = content.split(":", 1)
  224. metadata[key.strip()] = value.strip()
  225. return metadata
  226. def _extract_section(self, lines: List[str], section_name: str) -> Optional[str]:
  227. """
  228. 提取指定章节的内容
  229. Args:
  230. lines: 文件行列表
  231. section_name: 章节名称(如 "Description")
  232. Returns:
  233. 章节内容(纯文本)
  234. """
  235. in_section = False
  236. section_lines = []
  237. for line in lines:
  238. stripped = line.strip()
  239. # 遇到目标章节
  240. if stripped.startswith("## ") and section_name.lower() in stripped.lower():
  241. in_section = True
  242. continue
  243. # 遇到下一个章节,结束
  244. if in_section and stripped.startswith("##"):
  245. break
  246. # 收集章节内容
  247. if in_section:
  248. section_lines.append(line)
  249. return "\n".join(section_lines).strip() if section_lines else None
  250. def _extract_list_items(self, lines: List[str], section_name: str) -> List[str]:
  251. """
  252. 提取指定章节的列表项
  253. Args:
  254. lines: 文件行列表
  255. section_name: 章节名称(如 "Guidelines")
  256. Returns:
  257. 列表项数组
  258. """
  259. section_content = self._extract_section(lines, section_name)
  260. if not section_content:
  261. return []
  262. items = []
  263. for line in section_content.split("\n"):
  264. line = line.strip()
  265. # 匹配列表项(- item 或 * item)
  266. if line.startswith("- ") or line.startswith("* "):
  267. items.append(line[2:].strip())
  268. return items
  269. # 便捷函数
  270. def load_skills_from_dir(skills_dir: str) -> List[Skill]:
  271. """
  272. 从目录加载所有 Skills
  273. Args:
  274. skills_dir: skills 目录路径
  275. Returns:
  276. Skill 列表
  277. """
  278. loader = SkillLoader(skills_dir)
  279. return loader.load_all()