youtube.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. """
  2. YouTube 平台实现
  3. 后端:crawler.aiddit.com/crawler/youtube
  4. """
  5. import json
  6. from typing import Any, Dict, List, Optional
  7. import httpx
  8. from agent.tools.models import ToolResult
  9. from agent.tools.utils.image import build_image_grid, encode_base64, load_images
  10. from agent.tools.builtin.content.registry import (
  11. PlatformDef, ParamSpec, register_platform,
  12. )
  13. CRAWLER_BASE_URL = "http://crawler.aiddit.com/crawler"
  14. DEFAULT_TIMEOUT = 60.0
  15. # ── 搜索 ──
  16. async def search(
  17. platform_id: str,
  18. keyword: str,
  19. max_count: int = 20,
  20. cursor: str = "",
  21. extras: Optional[Dict[str, Any]] = None,
  22. ) -> ToolResult:
  23. try:
  24. async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
  25. response = await client.post(
  26. f"{CRAWLER_BASE_URL}/youtube/keyword",
  27. json={"keyword": keyword},
  28. )
  29. response.raise_for_status()
  30. data = response.json()
  31. if data.get("code") != 0:
  32. return ToolResult(title="YouTube 搜索失败", output="", error=data.get("msg", "未知错误"))
  33. result_data = data.get("data", {})
  34. videos = result_data.get("data", []) if isinstance(result_data, dict) else []
  35. # 概览
  36. summary_list = []
  37. for idx, video in enumerate(videos[:max_count], 1):
  38. summary_list.append({
  39. "index": idx,
  40. "title": video.get("title", ""),
  41. "author": video.get("author", ""),
  42. "video_id": video.get("video_id", ""),
  43. })
  44. # 拼图
  45. images = []
  46. collage_b64 = await _build_video_collage(videos[:max_count])
  47. if collage_b64:
  48. images.append({"type": "base64", "media_type": "image/png", "data": collage_b64})
  49. return ToolResult(
  50. title=f"YouTube: {keyword}",
  51. output=json.dumps({"data": summary_list}, ensure_ascii=False, indent=2),
  52. long_term_memory=f"Searched YouTube for '{keyword}', {len(videos)} results.",
  53. images=images,
  54. metadata={"posts": videos[:max_count]},
  55. )
  56. except Exception as e:
  57. return ToolResult(title="YouTube 搜索异常", output="", error=str(e))
  58. # ── 详情 ──
  59. async def detail(post: Dict[str, Any], extras: Optional[Dict[str, Any]] = None) -> ToolResult:
  60. """
  61. YouTube 详情:需要额外 HTTP 调用获取字幕/下载等。
  62. post 来自搜索缓存,extras 支持 include_captions / download_video。
  63. """
  64. extras = extras or {}
  65. content_id = post.get("video_id", "")
  66. include_captions = extras.get("include_captions", True)
  67. download_video = extras.get("download_video", False)
  68. try:
  69. async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
  70. resp = await client.post(
  71. f"{CRAWLER_BASE_URL}/youtube/detail",
  72. json={"content_id": content_id},
  73. )
  74. resp.raise_for_status()
  75. detail_data = resp.json()
  76. if detail_data.get("code") != 0:
  77. return ToolResult(title="详情获取失败", output="", error=detail_data.get("msg", "未知错误"))
  78. result_data = detail_data.get("data", {})
  79. video_info = result_data.get("data", {}) if isinstance(result_data, dict) else {}
  80. # 字幕
  81. captions_text = None
  82. if include_captions or download_video:
  83. try:
  84. async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
  85. cap_resp = await client.post(
  86. f"{CRAWLER_BASE_URL}/youtube/captions",
  87. json={"content_id": content_id},
  88. )
  89. cap_resp.raise_for_status()
  90. cap_data = cap_resp.json()
  91. if cap_data.get("code") == 0:
  92. inner = cap_data.get("data", {})
  93. if isinstance(inner, dict):
  94. inner2 = inner.get("data", {})
  95. if isinstance(inner2, dict):
  96. captions_text = inner2.get("content")
  97. except Exception:
  98. pass
  99. # 下载
  100. video_path = None
  101. video_outline = None
  102. if download_video:
  103. import asyncio
  104. from agent.tools.builtin.content.media import download_youtube_video, parse_srt_to_outline
  105. video_path = await asyncio.to_thread(download_youtube_video, content_id)
  106. if captions_text:
  107. video_outline = parse_srt_to_outline(captions_text)
  108. output_data = {
  109. "video_id": content_id,
  110. "title": video_info.get("title", ""),
  111. "channel": video_info.get("channel_account_name", ""),
  112. "description": video_info.get("body_text", ""),
  113. "like_count": video_info.get("like_count"),
  114. "comment_count": video_info.get("comment_count"),
  115. "content_link": video_info.get("content_link", ""),
  116. "captions": captions_text,
  117. }
  118. if download_video:
  119. output_data["video_path"] = video_path
  120. output_data["video_outline"] = video_outline
  121. return ToolResult(
  122. title=f"YouTube 详情: {video_info.get('title', content_id)}",
  123. output=json.dumps(output_data, ensure_ascii=False, indent=2),
  124. long_term_memory=f"YouTube detail for {content_id}" + (" with captions" if captions_text else ""),
  125. )
  126. except Exception as e:
  127. return ToolResult(title="YouTube 详情异常", output="", error=str(e))
  128. # ── 拼图 ──
  129. async def _build_video_collage(videos: List[Dict[str, Any]]) -> Optional[str]:
  130. urls, titles = [], []
  131. for video in videos:
  132. thumb = None
  133. if "thumbnails" in video and isinstance(video["thumbnails"], list) and video["thumbnails"]:
  134. thumb = video["thumbnails"][0].get("url")
  135. elif "thumbnail" in video:
  136. thumb = video.get("thumbnail")
  137. elif "cover_url" in video:
  138. thumb = video.get("cover_url")
  139. if thumb:
  140. urls.append(thumb)
  141. titles.append(video.get("title", ""))
  142. if not urls:
  143. return None
  144. loaded = await load_images(urls)
  145. valid_images, valid_labels = [], []
  146. for (_, img), title in zip(loaded, titles):
  147. if img is not None:
  148. valid_images.append(img)
  149. valid_labels.append(title)
  150. if not valid_images:
  151. return None
  152. grid = build_image_grid(images=valid_images, labels=valid_labels)
  153. b64, _ = encode_base64(grid, format="PNG")
  154. return b64
  155. # ── 注册 ──
  156. _YOUTUBE = PlatformDef(
  157. id="youtube",
  158. name="YouTube",
  159. aliases=["yt", "油管"],
  160. detail_extras={
  161. "include_captions": ParamSpec(note="是否获取字幕,默认 True"),
  162. "download_video": ParamSpec(note="是否下载视频到本地,默认 False"),
  163. },
  164. )
  165. _YOUTUBE.search_impl = search
  166. _YOUTUBE.detail_impl = detail
  167. register_platform(_YOUTUBE)