| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420 |
- """
- 搜索工具模块
- 提供帖子搜索、帖子详情查看和建议词搜索功能,支持多个渠道平台。
- 主要功能:
- 1. search_posts - 帖子搜索(浏览模式:封面图+标题+内容截断)
- 2. select_post - 帖子详情(从搜索结果中选取单个帖子的完整内容)
- 3. get_search_suggestions - 获取平台的搜索补全建议词
- """
- import json
- from enum import Enum
- from typing import Any, Dict, List, Optional
- import httpx
- from agent.tools import tool, ToolResult
- from agent.tools.utils.image import build_image_grid, encode_base64, load_images
- # API 基础配置
- BASE_URL = "http://aigc-channel.aiddit.com/aigc/channel"
- DEFAULT_TIMEOUT = 60.0
- # 搜索结果缓存,以序号为 key
- _search_cache: Dict[int, Dict[str, Any]] = {}
- async def _build_collage(posts: List[Dict[str, Any]]) -> Optional[str]:
- """
- 将帖子封面图+序号+标题拼接成网格图,返回 base64 编码的 PNG。
- 复用 agent.tools.utils.image 中的共享拼图逻辑。
- """
- if not posts:
- return None
- # 收集有封面图的帖子
- urls: List[str] = []
- titles: List[str] = []
- for post in posts:
- imgs = post.get("images", [])
- cover_url = imgs[0] if imgs else None
- if cover_url:
- urls.append(cover_url)
- titles.append(post.get("title", "") or "")
- if not urls:
- return None
- # 并发加载图片
- loaded = await load_images(urls)
- # 过滤加载失败的(保持 url 和 title 对齐)
- valid_images = []
- valid_labels = []
- for (_, img), title in zip(loaded, titles):
- if img is not None:
- valid_images.append(img)
- valid_labels.append(title)
- if not valid_images:
- return None
- grid = build_image_grid(images=valid_images, labels=valid_labels)
- b64, _ = encode_base64(grid, format="PNG")
- return b64
- class PostSearchChannel(str, Enum):
- """
- 帖子搜索支持的渠道类型
- """
- XHS = "xhs" # 小红书
- GZH = "gzh" # 公众号
- SPH = "sph" # 视频号
- GITHUB = "github" # GitHub
- TOUTIAO = "toutiao" # 头条
- DOUYIN = "douyin" # 抖音
- BILI = "bili" # B站
- ZHIHU = "zhihu" # 知乎
- WEIBO = "weibo" # 微博
- class SuggestSearchChannel(str, Enum):
- """
- 建议词搜索支持的渠道类型
- """
- XHS = "xhs" # 小红书
- WX = "wx" # 微信
- GITHUB = "github" # GitHub
- TOUTIAO = "toutiao" # 头条
- DOUYIN = "douyin" # 抖音
- BILI = "bili" # B站
- ZHIHU = "zhihu" # 知乎
- @tool(
- display={
- "zh": {
- "name": "帖子搜索",
- "params": {
- "keyword": "搜索关键词",
- "channel": "搜索渠道(xhs=小红书, gzh=公众号, sph=视频号, github, toutiao=头条, douyin=抖音, bili=B站, zhihu=知乎, weibo=微博)",
- "cursor": "分页游标",
- "max_count": "返回条数",
- "content_type": "内容类型-视频/图文",
- "sort_type": "排序方式(xhs专用)",
- "publish_time": "发布时间筛选(xhs专用)",
- "filter_note_range": "笔记时长筛选(xhs专用)"
- }
- },
- "en": {
- "name": "Search Posts",
- "params": {
- "keyword": "Search keyword",
- "channel": "Search channel (xhs=XiaoHongShu, gzh=WeChat Official Account, sph=WeChat Channels, github, toutiao, douyin, bili, zhihu, weibo)",
- "cursor": "Pagination cursor",
- "max_count": "Max results",
- "content_type": "content type-视频/图文",
- "sort_type": "Sort type (xhs only)",
- "publish_time": "Publish time filter (xhs only)",
- "filter_note_range": "Note duration filter (xhs only)"
- }
- }
- }
- )
- async def search_posts(
- keyword: str,
- channel: str = "xhs",
- cursor: str = "",
- max_count: int = 20,
- content_type: str = "",
- sort_type: str = "综合排序",
- publish_time: str = "不限",
- filter_note_range: str = "不限"
- ) -> ToolResult:
- """
- 帖子搜索(浏览模式)
- 根据关键词在指定渠道平台搜索帖子,返回封面图+标题+内容摘要,用于快速浏览。
- 如需查看某个帖子的完整内容,请使用 select_post 工具。
- Args:
- keyword: 搜索关键词
- channel: 搜索渠道,支持的渠道有:
- - xhs: 小红书
- - gzh: 公众号
- - sph: 视频号
- - github: GitHub
- - toutiao: 头条
- - douyin: 抖音
- - bili: B站
- - zhihu: 知乎
- - weibo: 微博
- cursor: 分页游标,首次请求为空字符串,后续使用上次返回的 cursor
- max_count: 返回的最大条数,默认为 20
- content_type: 内容类型筛选,默认不限;
- xhs 可选值:'不限' | '图文' | '视频' | '文章';
- 其他渠道可选值:'视频' | '图文'
- sort_type: 排序方式(仅 xhs 有效),可选值:'综合排序' | '最新发布' | '最多点赞',默认'综合排序'
- publish_time: 发布时间筛选(仅 xhs 有效),可选值:'不限' | '近30天' | '近7天' | '近1天',默认'不限'
- filter_note_range: 笔记时长筛选,视频内容有效(仅 xhs 有效),可选值:'不限' | '1分钟以内' | '1-5分钟' | '5分钟以上',默认'不限'
- Returns:
- ToolResult 包含搜索结果摘要列表(封面图+标题+内容截断),
- 可通过 channel_content_id 调用 select_post 查看完整内容。
- """
- global _search_cache
- try:
- channel_value = channel.value if isinstance(channel, PostSearchChannel) else channel
- url = f"{BASE_URL}/data"
- if channel_value == "xhs":
- payload = {
- "type": channel_value,
- "keyword": keyword,
- "cursor": cursor,
- "content_type": content_type if content_type else "不限",
- "sort_type": sort_type,
- "publish_time": publish_time,
- "filter_note_range": filter_note_range,
- }
- else:
- payload = {
- "type": channel_value,
- "keyword": keyword,
- "cursor": cursor if cursor else "0",
- "max_count": max_count,
- "content_type": content_type,
- }
- async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
- response = await client.post(
- url,
- json=payload,
- headers={"Content-Type": "application/json"},
- )
- response.raise_for_status()
- data = response.json()
- posts = data.get("data", [])
- # 缓存完整结果(以序号为 key)
- _search_cache.clear()
- for idx, post in enumerate(posts):
- _search_cache[idx + 1] = post
- # 构建摘要列表(带序号)
- summary_list = []
- for idx, post in enumerate(posts):
- body = post.get("body_text", "") or ""
- title = post.get("title") or body[:20] or ""
- summary_list.append({
- "index": idx + 1,
- "channel_content_id": post.get("channel_content_id"),
- "title": title,
- "body_text": body[:100] + ("..." if len(body) > 100 else ""),
- "like_count": post.get("like_count"),
- "collect_count": post.get("collect_count"),
- "comment_count": post.get("comment_count"),
- "channel": post.get("channel"),
- "link": post.get("link"),
- "content_type": post.get("content_type"),
- "publish_timestamp": post.get("publish_timestamp"),
- })
- # 拼接封面图网格
- images = []
- try:
- collage_b64 = await _build_collage(posts)
- if collage_b64:
- images.append({
- "type": "base64",
- "media_type": "image/png",
- "data": collage_b64
- })
- except Exception as collage_error:
- # 图片拼接失败不影响主流程,记录错误但继续返回结果
- import logging
- logging.warning(f"Failed to build collage for {channel_value}: {collage_error}")
- output_data = {
- "code": data.get("code"),
- "message": data.get("message"),
- "data": summary_list
- }
- return ToolResult(
- title=f"搜索结果: {keyword} ({channel_value})",
- output=json.dumps(output_data, ensure_ascii=False, indent=2),
- long_term_memory=f"Searched '{keyword}' on {channel_value}, found {len(posts)} posts. Use select_post(index) to view full details of a specific post.",
- images=images
- )
- except httpx.HTTPStatusError as e:
- return ToolResult(
- title="搜索失败",
- output="",
- error=f"HTTP error {e.response.status_code}: {e.response.text}"
- )
- except Exception as e:
- return ToolResult(
- title="搜索失败",
- output="",
- error=str(e)
- )
- @tool(
- display={
- "zh": {
- "name": "帖子详情",
- "params": {
- "index": "帖子序号"
- }
- },
- "en": {
- "name": "Select Post",
- "params": {
- "index": "Post index"
- }
- }
- }
- )
- async def select_post(
- index: int,
- ) -> ToolResult:
- """
- 查看帖子详情
- 从最近一次 search_posts 的搜索结果中,根据序号选取指定帖子并返回完整内容(全部正文、全部图片、视频等)。
- 需要先调用 search_posts 进行搜索。
- Args:
- index: 帖子序号,来自 search_posts 返回结果中的 index 字段(从 1 开始)
- Returns:
- ToolResult 包含该帖子的完整信息和所有图片。
- """
- post = _search_cache.get(index)
- if not post:
- return ToolResult(
- title="未找到帖子",
- output="",
- error=f"未找到序号 {index} 的帖子,请先调用 search_posts 搜索。"
- )
- # 返回所有图片
- images = []
- for img_url in post.get("images", []):
- if img_url:
- images.append({
- "type": "url",
- "url": img_url
- })
- return ToolResult(
- title=f"帖子详情 #{index}: {post.get('title', '')}",
- output=json.dumps(post, ensure_ascii=False, indent=2),
- long_term_memory=f"Viewed post detail #{index}: {post.get('title', '')}",
- images=images
- )
- @tool(
- display={
- "zh": {
- "name": "获取搜索关键词补全建议",
- "params": {
- "keyword": "搜索关键词",
- "channel": "搜索渠道"
- }
- },
- "en": {
- "name": "Get Search Suggestions",
- "params": {
- "keyword": "Search keyword",
- "channel": "Search channel"
- }
- }
- }
- )
- async def get_search_suggestions(
- keyword: str,
- channel: str = "xhs",
- ) -> ToolResult:
- """
- 获取搜索关键词补全建议
- 根据关键词在指定渠道平台获取搜索建议词。
- Args:
- keyword: 搜索关键词
- channel: 搜索渠道,支持的渠道有:
- - xhs: 小红书
- - wx: 微信
- - github: GitHub
- - toutiao: 头条
- - douyin: 抖音
- - bili: B站
- - zhihu: 知乎
- Returns:
- ToolResult 包含建议词数据:
- {
- "code": 0, # 状态码,0 表示成功
- "message": "success", # 状态消息
- "data": [ # 建议词数据
- {
- "type": "xhs", # 渠道类型
- "list": [ # 建议词列表
- {
- "name": "彩虹染发" # 建议词
- }
- ]
- }
- ]
- }
- """
- try:
- # 处理 channel 参数,支持枚举和字符串
- channel_value = channel.value if isinstance(channel, SuggestSearchChannel) else channel
- url = f"{BASE_URL}/suggest"
- payload = {
- "type": channel_value,
- "keyword": keyword,
- }
- async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
- response = await client.post(
- url,
- json=payload,
- headers={"Content-Type": "application/json"},
- )
- response.raise_for_status()
- data = response.json()
- # 计算建议词数量
- suggestion_count = 0
- for item in data.get("data", []):
- suggestion_count += len(item.get("list", []))
- return ToolResult(
- title=f"建议词: {keyword} ({channel_value})",
- output=json.dumps(data, ensure_ascii=False, indent=2),
- long_term_memory=f"Got {suggestion_count} suggestions for '{keyword}' on {channel_value}"
- )
- except httpx.HTTPStatusError as e:
- return ToolResult(
- title="获取建议词失败",
- output="",
- error=f"HTTP error {e.response.status_code}: {e.response.text}"
- )
- except Exception as e:
- return ToolResult(
- title="获取建议词失败",
- output="",
- error=str(e)
- )
|