|
|
@@ -0,0 +1,280 @@
|
|
|
+"""
|
|
|
+抖音关键词搜索工具(示例)
|
|
|
+
|
|
|
+调用内部爬虫服务进行抖音关键词搜索。
|
|
|
+"""
|
|
|
+import asyncio
|
|
|
+import logging
|
|
|
+import time
|
|
|
+from typing import Optional
|
|
|
+
|
|
|
+import requests
|
|
|
+
|
|
|
+from agent.tools import tool, ToolResult
|
|
|
+
|
|
|
+logger = logging.getLogger(__name__)
|
|
|
+
|
|
|
+
|
|
|
+# 解析工具:从 business_data 的单条记录中安全提取 aweme_info
|
|
|
+def _get_aweme_info(item: object) -> dict:
|
|
|
+ if not isinstance(item, dict):
|
|
|
+ return {}
|
|
|
+ data = item.get("data")
|
|
|
+ if not isinstance(data, dict):
|
|
|
+ return {}
|
|
|
+ aweme_info = data.get("aweme_info")
|
|
|
+ return aweme_info if isinstance(aweme_info, dict) else {}
|
|
|
+
|
|
|
+
|
|
|
+# API 基础配置
|
|
|
+DOUYIN_SEARCH_API = "https://api.tikhub.io/api/v1/douyin/search/fetch_video_search_v2"
|
|
|
+DEFAULT_TIMEOUT = 60.0
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+@tool(description="通过关键词搜索抖音视频内容兜底接口")
|
|
|
+async def douyin_search(
|
|
|
+ keyword: str,
|
|
|
+ content_type: str = "0",
|
|
|
+ sort_type: str = "0",
|
|
|
+ publish_time: str = "0",
|
|
|
+ cursor: int = 0,
|
|
|
+ filter_duration: str = "0",
|
|
|
+ search_id: str = "",
|
|
|
+ backtrace: str = "",
|
|
|
+ timeout: Optional[float] = None,
|
|
|
+) -> ToolResult:
|
|
|
+ """
|
|
|
+ 抖音关键词搜索
|
|
|
+
|
|
|
+ 通过关键词搜索抖音平台的视频内容,支持多种排序和筛选方式。
|
|
|
+
|
|
|
+ Args:
|
|
|
+ keyword: 搜索关键词,如 "机器人"
|
|
|
+ cursor: 翻页游标(首次请求传 0,翻页时使用上次响应的 cursor)
|
|
|
+ sort_type: 排序方式
|
|
|
+ 0: 综合排序
|
|
|
+ 1: 最多点赞
|
|
|
+ 2: 最新发布
|
|
|
+ publish_time: 发布时间筛选
|
|
|
+ 0: 不限
|
|
|
+ 1: 最近一天
|
|
|
+ 7: 最近一周
|
|
|
+ 180: 最近半年
|
|
|
+ filter_duration: 视频时长筛选
|
|
|
+ 0: 不限
|
|
|
+ 0-1: 1 分钟以内
|
|
|
+ 1-5: 1-5 分钟
|
|
|
+ 5-10000: 5 分钟以上
|
|
|
+ content_type: 内容类型筛选
|
|
|
+ 0: 不限
|
|
|
+ 1: 视频
|
|
|
+ 2: 图片
|
|
|
+ 3: 文章
|
|
|
+ search_id: 搜索ID(分页时使用,从上一次响应获取)
|
|
|
+ backtrace: 翻页回溯标识(分页时使用,从上一次响应获取)
|
|
|
+ timeout: 超时时间(秒),默认 60
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ ToolResult: 包含以下内容:
|
|
|
+ - output: 文本格式的搜索结果摘要
|
|
|
+ - metadata.search_results: 结构化的搜索结果列表
|
|
|
+ - aweme_id: 视频ID
|
|
|
+ - desc: 视频描述(最多100字符)
|
|
|
+ - author: 作者信息
|
|
|
+ - nickname: 作者昵称
|
|
|
+ - sec_uid: 作者ID(完整,约80字符)
|
|
|
+ - statistics: 统计数据
|
|
|
+ - digg_count: 点赞数
|
|
|
+ - comment_count: 评论数
|
|
|
+ - share_count: 分享数
|
|
|
+ - metadata.raw_data: 原始 API 返回数据
|
|
|
+
|
|
|
+ Note:
|
|
|
+ - 使用 cursor 参数可以获取下一页结果
|
|
|
+ - 建议从 metadata.search_results 获取结构化数据,而非解析 output 文本
|
|
|
+ - author.sec_uid 约 80 字符,使用时不要截断
|
|
|
+ - 返回的 cursor 值可用于下一次搜索的 cursor 参数
|
|
|
+ """
|
|
|
+ start_time = time.time()
|
|
|
+
|
|
|
+ try:
|
|
|
+ payload = {
|
|
|
+ "keyword": keyword,
|
|
|
+ "cursor": cursor,
|
|
|
+ "sort_type": sort_type,
|
|
|
+ "publish_time": publish_time,
|
|
|
+ "filter_duration": filter_duration,
|
|
|
+ "content_type": content_type,
|
|
|
+ "search_id": search_id,
|
|
|
+ "backtrace": backtrace,
|
|
|
+ }
|
|
|
+
|
|
|
+ request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
|
|
|
+
|
|
|
+ response = requests.post(
|
|
|
+ DOUYIN_SEARCH_API,
|
|
|
+ json=payload,
|
|
|
+ headers={"Content-Type": "application/json",
|
|
|
+ "Authorization": "Bearer hb8FH+kMgkuLlk7ORbWzzknwPRSSerhe3i7c4n+BW9m8mW6fI1CgVQi9CQ=="},
|
|
|
+ timeout=request_timeout
|
|
|
+ )
|
|
|
+ response.raise_for_status()
|
|
|
+ data = response.json()
|
|
|
+
|
|
|
+ # 格式化输出摘要
|
|
|
+ summary_lines = [f"搜索关键词「{keyword}」"]
|
|
|
+
|
|
|
+ data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {}
|
|
|
+ items = data_block.get("business_data", []) if isinstance(data_block.get("business_data"), list) else []
|
|
|
+ business_config = data_block.get("business_config", {}) if isinstance(data_block.get("business_config"), dict) else {}
|
|
|
+ has_more = business_config.get("has_more") == 1
|
|
|
+ next_page = business_config.get("next_page", {}) if isinstance(business_config.get("next_page"), dict) else {}
|
|
|
+ cursor_value = next_page.get("cursor", "")
|
|
|
+ search_id_value = next_page.get("search_id", "")
|
|
|
+ backtrace_value = business_config.get("backtrace", "")
|
|
|
+
|
|
|
+ summary_lines.append(
|
|
|
+ f"找到 {len(items)} 条结果"
|
|
|
+ + (f",还有更多(cursor={cursor_value},search_id={search_id_value},backtrace={backtrace_value})" if has_more else "")
|
|
|
+ )
|
|
|
+ summary_lines.append("")
|
|
|
+
|
|
|
+ for i, item in enumerate(items, 1):
|
|
|
+ aweme_info = _get_aweme_info(item)
|
|
|
+ aweme_id = aweme_info.get("aweme_id", "unknown")
|
|
|
+ desc = (aweme_info.get("desc") or aweme_info.get("item_title") or "无标题")[:50]
|
|
|
+
|
|
|
+ author = aweme_info.get("author") if isinstance(aweme_info.get("author"), dict) else {}
|
|
|
+ author_name = author.get("nickname", "未知作者")
|
|
|
+ author_id = author.get("sec_uid", "")
|
|
|
+
|
|
|
+ stats = aweme_info.get("statistics") if isinstance(aweme_info.get("statistics"), dict) else {}
|
|
|
+ digg_count = stats.get("digg_count", 0)
|
|
|
+ comment_count = stats.get("comment_count", 0)
|
|
|
+ share_count = stats.get("share_count", 0)
|
|
|
+
|
|
|
+ summary_lines.append(f"{i}. {desc}")
|
|
|
+ summary_lines.append(f" ID: {aweme_id}")
|
|
|
+ summary_lines.append(f" 链接: https://www.douyin.com/video/{aweme_id}")
|
|
|
+ summary_lines.append(f" 作者: {author_name}")
|
|
|
+ summary_lines.append(f" sec_uid: {author_id}")
|
|
|
+ summary_lines.append(f" 数据: 点赞 {digg_count:,} | 评论 {comment_count:,} | 分享 {share_count:,}")
|
|
|
+ summary_lines.append("")
|
|
|
+
|
|
|
+ duration_ms = int((time.time() - start_time) * 1000)
|
|
|
+ logger.info(
|
|
|
+ "douyin_search completed",
|
|
|
+ extra={
|
|
|
+ "keyword": keyword,
|
|
|
+ "results_count": len(items),
|
|
|
+ "has_more": has_more,
|
|
|
+ "cursor": cursor_value,
|
|
|
+ "duration_ms": duration_ms
|
|
|
+ }
|
|
|
+ )
|
|
|
+
|
|
|
+ return ToolResult(
|
|
|
+ title=f"抖音搜索: {keyword}",
|
|
|
+ output="\n".join(summary_lines),
|
|
|
+ long_term_memory=f"Searched Douyin for '{keyword}', found {len(items)} results",
|
|
|
+ metadata={
|
|
|
+ "request_params": {
|
|
|
+ "keyword": keyword,
|
|
|
+ "cursor": cursor,
|
|
|
+ "sort_type": sort_type,
|
|
|
+ "publish_time": publish_time,
|
|
|
+ "filter_duration": filter_duration,
|
|
|
+ "content_type": content_type,
|
|
|
+ "search_id": search_id,
|
|
|
+ "backtrace": backtrace,
|
|
|
+ },
|
|
|
+ "next_page": {
|
|
|
+ "has_more": has_more,
|
|
|
+ "cursor": cursor_value,
|
|
|
+ "search_id": search_id_value,
|
|
|
+ "backtrace": backtrace_value,
|
|
|
+ },
|
|
|
+ "raw_data": data,
|
|
|
+ "search_results": [ # 结构化搜索结果,供 Agent 直接引用
|
|
|
+ {
|
|
|
+ "aweme_id": _get_aweme_info(item).get("aweme_id"),
|
|
|
+ "desc": (
|
|
|
+ _get_aweme_info(item).get("desc")
|
|
|
+ or _get_aweme_info(item).get("item_title")
|
|
|
+ or "无标题"
|
|
|
+ )[:100],
|
|
|
+ "author": {
|
|
|
+ "nickname": (
|
|
|
+ (_get_aweme_info(item).get("author") if isinstance(_get_aweme_info(item).get("author"), dict) else {})
|
|
|
+ .get("nickname", "未知作者")
|
|
|
+ ),
|
|
|
+ "sec_uid": (
|
|
|
+ (_get_aweme_info(item).get("author") if isinstance(_get_aweme_info(item).get("author"), dict) else {})
|
|
|
+ .get("sec_uid", "")
|
|
|
+ ),
|
|
|
+ },
|
|
|
+ "statistics": {
|
|
|
+ "digg_count": (
|
|
|
+ (_get_aweme_info(item).get("statistics") if isinstance(_get_aweme_info(item).get("statistics"), dict) else {})
|
|
|
+ .get("digg_count", 0)
|
|
|
+ ),
|
|
|
+ "comment_count": (
|
|
|
+ (_get_aweme_info(item).get("statistics") if isinstance(_get_aweme_info(item).get("statistics"), dict) else {})
|
|
|
+ .get("comment_count", 0)
|
|
|
+ ),
|
|
|
+ "share_count": (
|
|
|
+ (_get_aweme_info(item).get("statistics") if isinstance(_get_aweme_info(item).get("statistics"), dict) else {})
|
|
|
+ .get("share_count", 0)
|
|
|
+ ),
|
|
|
+ }
|
|
|
+ }
|
|
|
+ for item in items
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ )
|
|
|
+ except requests.exceptions.HTTPError as e:
|
|
|
+ logger.error(
|
|
|
+ "douyin_search HTTP error",
|
|
|
+ extra={
|
|
|
+ "keyword": keyword,
|
|
|
+ "status_code": e.response.status_code,
|
|
|
+ "error": str(e)
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return ToolResult(
|
|
|
+ title="抖音搜索失败",
|
|
|
+ output="",
|
|
|
+ error=f"HTTP {e.response.status_code}: {e.response.text}"
|
|
|
+ )
|
|
|
+ except requests.exceptions.Timeout:
|
|
|
+ logger.error("douyin_search timeout", extra={"keyword": keyword, "timeout": request_timeout})
|
|
|
+ return ToolResult(
|
|
|
+ title="抖音搜索失败",
|
|
|
+ output="",
|
|
|
+ error=f"请求超时({request_timeout}秒)"
|
|
|
+ )
|
|
|
+ except requests.exceptions.RequestException as e:
|
|
|
+ logger.error("douyin_search network error", extra={"keyword": keyword, "error": str(e)})
|
|
|
+ return ToolResult(
|
|
|
+ title="抖音搜索失败",
|
|
|
+ output="",
|
|
|
+ error=f"网络错误: {str(e)}"
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ logger.error("douyin_search unexpected error", extra={"keyword": keyword, "error": str(e)}, exc_info=True)
|
|
|
+ return ToolResult(
|
|
|
+ title="抖音搜索失败",
|
|
|
+ output="",
|
|
|
+ error=f"未知错误: {str(e)}"
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+async def main():
|
|
|
+ result = await douyin_search(
|
|
|
+ keyword="养老政策",
|
|
|
+ )
|
|
|
+ print(result.output)
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ asyncio.run(main())
|