skill_loader.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. """
  2. Skill Loader - 从 Markdown 文件加载 Skills
  3. 支持两种格式:
  4. 格式1 - YAML Frontmatter(推荐):
  5. ---
  6. name: skill-name
  7. description: Skill description
  8. category: category-name
  9. scope: agent:*
  10. parent: parent-id
  11. ---
  12. ## When to use
  13. - Use case 1
  14. - Use case 2
  15. ## Guidelines
  16. - Guideline 1
  17. - Guideline 2
  18. 格式2 - 行内元数据(向后兼容):
  19. # Skill Name
  20. > category: web-automation
  21. > scope: agent:*
  22. ## Description
  23. ...
  24. ## Guidelines
  25. ...
  26. """
  27. import os
  28. import re
  29. from pathlib import Path
  30. from typing import List, Dict, Optional
  31. import logging
  32. from agent.models.memory import Skill
  33. logger = logging.getLogger(__name__)
  34. class SkillLoader:
  35. """从 Markdown 文件加载 Skills"""
  36. def __init__(self, skills_dir: str):
  37. """
  38. 初始化 SkillLoader
  39. Args:
  40. skills_dir: skills 目录路径
  41. """
  42. self.skills_dir = Path(skills_dir)
  43. if not self.skills_dir.exists():
  44. logger.warning(f"Skills 目录不存在: {skills_dir}")
  45. def load_all(self) -> List[Skill]:
  46. """
  47. 加载目录下所有 .md 文件
  48. Returns:
  49. Skill 列表
  50. """
  51. if not self.skills_dir.exists():
  52. return []
  53. skills = []
  54. for md_file in self.skills_dir.glob("*.md"):
  55. try:
  56. skill = self.load_file(md_file)
  57. if skill:
  58. skills.append(skill)
  59. logger.info(f"成功加载 skill: {skill.name} from {md_file.name}")
  60. except Exception as e:
  61. logger.error(f"加载 skill 失败 {md_file}: {e}")
  62. return skills
  63. def load_file(self, file_path: Path) -> Optional[Skill]:
  64. """
  65. 从单个 Markdown 文件加载 Skill
  66. Args:
  67. file_path: Markdown 文件路径
  68. Returns:
  69. Skill 对象,解析失败返回 None
  70. """
  71. if not file_path.exists():
  72. logger.warning(f"文件不存在: {file_path}")
  73. return None
  74. with open(file_path, "r", encoding="utf-8") as f:
  75. content = f.read()
  76. return self.parse_markdown(content, file_path.stem)
  77. def parse_markdown(self, content: str, filename: str) -> Optional[Skill]:
  78. """
  79. 解析 Markdown 内容为 Skill
  80. 支持两种格式:
  81. 格式1 - YAML Frontmatter(推荐):
  82. ---
  83. name: skill-name
  84. description: Skill description
  85. category: category-name
  86. scope: agent:*
  87. ---
  88. ## When to use
  89. - Use case 1
  90. ## Guidelines
  91. - Guideline 1
  92. 格式2 - 行内元数据(向后兼容):
  93. # Skill Name
  94. > category: web-automation
  95. > scope: agent:*
  96. ## Description
  97. 描述内容...
  98. ## Guidelines
  99. - 指导原则1
  100. Args:
  101. content: Markdown 内容
  102. filename: 文件名(不含扩展名)
  103. Returns:
  104. Skill 对象
  105. """
  106. # 检测格式:是否有 YAML frontmatter
  107. if content.strip().startswith("---"):
  108. return self._parse_frontmatter_format(content, filename)
  109. else:
  110. return self._parse_inline_format(content, filename)
  111. def _parse_frontmatter_format(self, content: str, filename: str) -> Optional[Skill]:
  112. """
  113. 解析 YAML frontmatter 格式
  114. ---
  115. name: skill-name
  116. description: Skill description
  117. category: category-name
  118. scope: agent:*
  119. parent: parent-id
  120. ---
  121. ## When to use
  122. ...
  123. ## Guidelines
  124. ...
  125. """
  126. lines = content.split("\n")
  127. # 提取 YAML frontmatter
  128. if not lines[0].strip() == "---":
  129. logger.warning("格式错误:缺少开始的 ---")
  130. return None
  131. frontmatter = {}
  132. i = 1
  133. while i < len(lines):
  134. line = lines[i].strip()
  135. if line == "---":
  136. break
  137. if ":" in line:
  138. key, value = line.split(":", 1)
  139. frontmatter[key.strip()] = value.strip()
  140. i += 1
  141. # 提取元数据
  142. name = frontmatter.get("name") or self._filename_to_title(filename)
  143. description = frontmatter.get("description", "")
  144. category = frontmatter.get("category", "general")
  145. scope = frontmatter.get("scope", "agent:*")
  146. parent_id = frontmatter.get("parent")
  147. # 提取章节内容(从 frontmatter 之后开始)
  148. remaining_content = "\n".join(lines[i+1:])
  149. remaining_lines = remaining_content.split("\n")
  150. # 提取 "When to use" 章节(可选)
  151. when_to_use = self._extract_list_items(remaining_lines, "When to use")
  152. if when_to_use:
  153. # 将 "When to use" 添加到描述中
  154. description += "\n\n适用场景:\n" + "\n".join(f"- {item}" for item in when_to_use)
  155. # 提取 Guidelines
  156. guidelines = self._extract_list_items(remaining_lines, "Guidelines")
  157. # 保存完整的内容(去掉 frontmatter)
  158. content = remaining_content.strip()
  159. # 创建 Skill
  160. return Skill.create(
  161. scope=scope,
  162. name=name,
  163. description=description.strip(),
  164. category=category,
  165. content=content, # 完整的 Markdown 内容
  166. guidelines=guidelines,
  167. parent_id=parent_id,
  168. )
  169. def _parse_inline_format(self, content: str, filename: str) -> Optional[Skill]:
  170. """
  171. 解析行内元数据格式(向后兼容)
  172. # Skill Name
  173. > category: web-automation
  174. > scope: agent:*
  175. ## Description
  176. ...
  177. ## Guidelines
  178. ...
  179. """
  180. lines = content.split("\n")
  181. # 提取标题作为 name
  182. name = self._extract_title(lines) or self._filename_to_title(filename)
  183. # 提取元数据
  184. metadata = self._extract_metadata(lines)
  185. category = metadata.get("category", "general")
  186. scope = metadata.get("scope", "agent:*")
  187. parent_id = metadata.get("parent")
  188. # 提取描述
  189. description = self._extract_section(lines, "Description") or ""
  190. # 提取指导原则
  191. guidelines = self._extract_list_items(lines, "Guidelines")
  192. # 提取完整内容(去掉元数据行和标题行)
  193. content_lines = []
  194. skip_metadata = False
  195. for line in lines:
  196. stripped = line.strip()
  197. # 跳过标题
  198. if stripped.startswith("# "):
  199. continue
  200. # 跳过元数据
  201. if stripped.startswith(">"):
  202. skip_metadata = True
  203. continue
  204. # 如果之前是元数据,跳过后续的空行
  205. if skip_metadata and not stripped:
  206. skip_metadata = False
  207. continue
  208. content_lines.append(line)
  209. content = "\n".join(content_lines).strip()
  210. # 创建 Skill
  211. return Skill.create(
  212. scope=scope,
  213. name=name,
  214. description=description.strip(),
  215. category=category,
  216. content=content, # 完整的 Markdown 内容
  217. guidelines=guidelines,
  218. parent_id=parent_id,
  219. )
  220. def _extract_title(self, lines: List[str]) -> Optional[str]:
  221. """提取 # 标题"""
  222. for line in lines:
  223. line = line.strip()
  224. if line.startswith("# "):
  225. return line[2:].strip()
  226. return None
  227. def _filename_to_title(self, filename: str) -> str:
  228. """将文件名转换为标题(kebab-case -> Title Case)"""
  229. return " ".join(word.capitalize() for word in filename.split("-"))
  230. def _extract_metadata(self, lines: List[str]) -> Dict[str, str]:
  231. """
  232. 提取元数据块(> key: value)
  233. Example:
  234. > category: web-automation
  235. > scope: agent:*
  236. """
  237. metadata = {}
  238. for line in lines:
  239. line = line.strip()
  240. if line.startswith(">"):
  241. # 去掉 > 符号
  242. content = line[1:].strip()
  243. # 分割 key: value
  244. if ":" in content:
  245. key, value = content.split(":", 1)
  246. metadata[key.strip()] = value.strip()
  247. return metadata
  248. def _extract_section(self, lines: List[str], section_name: str) -> Optional[str]:
  249. """
  250. 提取指定章节的内容
  251. Args:
  252. lines: 文件行列表
  253. section_name: 章节名称(如 "Description")
  254. Returns:
  255. 章节内容(纯文本)
  256. """
  257. in_section = False
  258. section_lines = []
  259. for line in lines:
  260. stripped = line.strip()
  261. # 遇到目标章节
  262. if stripped.startswith("## ") and section_name.lower() in stripped.lower():
  263. in_section = True
  264. continue
  265. # 遇到下一个章节,结束
  266. if in_section and stripped.startswith("##"):
  267. break
  268. # 收集章节内容
  269. if in_section:
  270. section_lines.append(line)
  271. return "\n".join(section_lines).strip() if section_lines else None
  272. def _extract_list_items(self, lines: List[str], section_name: str) -> List[str]:
  273. """
  274. 提取指定章节的列表项
  275. Args:
  276. lines: 文件行列表
  277. section_name: 章节名称(如 "Guidelines")
  278. Returns:
  279. 列表项数组
  280. """
  281. section_content = self._extract_section(lines, section_name)
  282. if not section_content:
  283. return []
  284. items = []
  285. for line in section_content.split("\n"):
  286. line = line.strip()
  287. # 匹配列表项(- item 或 * item)
  288. if line.startswith("- ") or line.startswith("* "):
  289. items.append(line[2:].strip())
  290. return items
  291. # 便捷函数
  292. def load_skills_from_dir(skills_dir: str) -> List[Skill]:
  293. """
  294. 从目录加载所有 Skills
  295. Args:
  296. skills_dir: skills 目录路径
  297. Returns:
  298. Skill 列表
  299. """
  300. loader = SkillLoader(skills_dir)
  301. return loader.load_all()