| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280 |
- """
- 抖音关键词搜索工具(备用)
- 调用内部爬虫服务进行抖音关键词搜索。
- """
- import asyncio
- import logging
- import time
- from typing import Optional
- import requests
- from agent.tools import tool, ToolResult
- logger = logging.getLogger(__name__)
- # 解析工具:从 business_data 的单条记录中安全提取 aweme_info
- def _get_aweme_info(item: object) -> dict:
- if not isinstance(item, dict):
- return {}
- data = item.get("data")
- if not isinstance(data, dict):
- return {}
- aweme_info = data.get("aweme_info")
- return aweme_info if isinstance(aweme_info, dict) else {}
- # API 基础配置
- DOUYIN_SEARCH_API = "https://api.tikhub.io/api/v1/douyin/search/fetch_video_search_v2"
- DEFAULT_TIMEOUT = 60.0
- @tool(description="通过关键词搜索抖音视频内容兜底接口")
- async def douyin_search_fallback(
- keyword: str,
- content_type: str = "0",
- sort_type: str = "0",
- publish_time: str = "0",
- cursor: int = 0,
- filter_duration: str = "0",
- search_id: str = "",
- backtrace: str = "",
- timeout: Optional[float] = None,
- ) -> ToolResult:
- """
- 抖音关键词搜索
- 通过关键词搜索抖音平台的视频内容,支持多种排序和筛选方式。
- Args:
- keyword: 搜索关键词,如 "机器人"
- cursor: 翻页游标(首次请求传 0,翻页时使用上次响应的 cursor)
- sort_type: 排序方式
- 0: 综合排序
- 1: 最多点赞
- 2: 最新发布
- publish_time: 发布时间筛选
- 0: 不限
- 1: 最近一天
- 7: 最近一周
- 180: 最近半年
- filter_duration: 视频时长筛选
- 0: 不限
- 0-1: 1 分钟以内
- 1-5: 1-5 分钟
- 5-10000: 5 分钟以上
- content_type: 内容类型筛选
- 0: 不限
- 1: 视频
- 2: 图片
- 3: 文章
- search_id: 搜索ID(分页时使用,从上一次响应获取)
- backtrace: 翻页回溯标识(分页时使用,从上一次响应获取)
- timeout: 超时时间(秒),默认 60
- Returns:
- ToolResult: 包含以下内容:
- - output: 文本格式的搜索结果摘要
- - metadata.search_results: 结构化的搜索结果列表
- - aweme_id: 视频ID
- - desc: 视频描述(最多100字符)
- - author: 作者信息
- - nickname: 作者昵称
- - sec_uid: 作者ID(完整,约80字符)
- - statistics: 统计数据
- - digg_count: 点赞数
- - comment_count: 评论数
- - share_count: 分享数
- - metadata.raw_data: 原始 API 返回数据
- Note:
- - 使用 cursor 参数可以获取下一页结果
- - 建议从 metadata.search_results 获取结构化数据,而非解析 output 文本
- - author.sec_uid 约 80 字符,使用时不要截断
- - 返回的 cursor 值可用于下一次搜索的 cursor 参数
- """
- start_time = time.time()
- try:
- payload = {
- "keyword": keyword,
- "cursor": cursor,
- "sort_type": sort_type,
- "publish_time": publish_time,
- "filter_duration": filter_duration,
- "content_type": content_type,
- "search_id": search_id,
- "backtrace": backtrace,
- }
- request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
- response = requests.post(
- DOUYIN_SEARCH_API,
- json=payload,
- headers={"Content-Type": "application/json",
- "Authorization": "Bearer hb8FH+kMgkuLlk7ORbWzzknwPRSSerhe3i7c4n+BW9m8mW6fI1CgVQi9CQ=="},
- timeout=request_timeout
- )
- response.raise_for_status()
- data = response.json()
- # 格式化输出摘要
- summary_lines = [f"搜索关键词「{keyword}」"]
- data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {}
- items = data_block.get("business_data", []) if isinstance(data_block.get("business_data"), list) else []
- business_config = data_block.get("business_config", {}) if isinstance(data_block.get("business_config"), dict) else {}
- has_more = business_config.get("has_more") == 1
- next_page = business_config.get("next_page", {}) if isinstance(business_config.get("next_page"), dict) else {}
- cursor_value = next_page.get("cursor", "")
- search_id_value = next_page.get("search_id", "")
- backtrace_value = business_config.get("backtrace", "")
- summary_lines.append(
- f"找到 {len(items)} 条结果"
- + (f",还有更多(cursor={cursor_value},search_id={search_id_value},backtrace={backtrace_value})" if has_more else "")
- )
- summary_lines.append("")
- for i, item in enumerate(items, 1):
- aweme_info = _get_aweme_info(item)
- aweme_id = aweme_info.get("aweme_id", "unknown")
- desc = (aweme_info.get("desc") or aweme_info.get("item_title") or "无标题")[:50]
- author = aweme_info.get("author") if isinstance(aweme_info.get("author"), dict) else {}
- author_name = author.get("nickname", "未知作者")
- author_id = author.get("sec_uid", "")
- stats = aweme_info.get("statistics") if isinstance(aweme_info.get("statistics"), dict) else {}
- digg_count = stats.get("digg_count", 0)
- comment_count = stats.get("comment_count", 0)
- share_count = stats.get("share_count", 0)
- summary_lines.append(f"{i}. {desc}")
- summary_lines.append(f" ID: {aweme_id}")
- summary_lines.append(f" 链接: https://www.douyin.com/video/{aweme_id}")
- summary_lines.append(f" 作者: {author_name}")
- summary_lines.append(f" sec_uid: {author_id}")
- summary_lines.append(f" 数据: 点赞 {digg_count:,} | 评论 {comment_count:,} | 分享 {share_count:,}")
- summary_lines.append("")
- duration_ms = int((time.time() - start_time) * 1000)
- logger.info(
- "douyin_search completed",
- extra={
- "keyword": keyword,
- "results_count": len(items),
- "has_more": has_more,
- "cursor": cursor_value,
- "duration_ms": duration_ms
- }
- )
- return ToolResult(
- title=f"抖音搜索: {keyword}",
- output="\n".join(summary_lines),
- long_term_memory=f"Searched Douyin for '{keyword}', found {len(items)} results",
- metadata={
- "request_params": {
- "keyword": keyword,
- "cursor": cursor,
- "sort_type": sort_type,
- "publish_time": publish_time,
- "filter_duration": filter_duration,
- "content_type": content_type,
- "search_id": search_id,
- "backtrace": backtrace,
- },
- "next_page": {
- "has_more": has_more,
- "cursor": cursor_value,
- "search_id": search_id_value,
- "backtrace": backtrace_value,
- },
- "raw_data": data,
- "search_results": [ # 结构化搜索结果,供 Agent 直接引用
- {
- "aweme_id": _get_aweme_info(item).get("aweme_id"),
- "desc": (
- _get_aweme_info(item).get("desc")
- or _get_aweme_info(item).get("item_title")
- or "无标题"
- )[:100],
- "author": {
- "nickname": (
- (_get_aweme_info(item).get("author") if isinstance(_get_aweme_info(item).get("author"), dict) else {})
- .get("nickname", "未知作者")
- ),
- "sec_uid": (
- (_get_aweme_info(item).get("author") if isinstance(_get_aweme_info(item).get("author"), dict) else {})
- .get("sec_uid", "")
- ),
- },
- "statistics": {
- "digg_count": (
- (_get_aweme_info(item).get("statistics") if isinstance(_get_aweme_info(item).get("statistics"), dict) else {})
- .get("digg_count", 0)
- ),
- "comment_count": (
- (_get_aweme_info(item).get("statistics") if isinstance(_get_aweme_info(item).get("statistics"), dict) else {})
- .get("comment_count", 0)
- ),
- "share_count": (
- (_get_aweme_info(item).get("statistics") if isinstance(_get_aweme_info(item).get("statistics"), dict) else {})
- .get("share_count", 0)
- ),
- }
- }
- for item in items
- ]
- }
- )
- except requests.exceptions.HTTPError as e:
- logger.error(
- "douyin_search HTTP error",
- extra={
- "keyword": keyword,
- "status_code": e.response.status_code,
- "error": str(e)
- }
- )
- return ToolResult(
- title="抖音搜索失败",
- output="",
- error=f"HTTP {e.response.status_code}: {e.response.text}"
- )
- except requests.exceptions.Timeout:
- logger.error("douyin_search timeout", extra={"keyword": keyword, "timeout": request_timeout})
- return ToolResult(
- title="抖音搜索失败",
- output="",
- error=f"请求超时({request_timeout}秒)"
- )
- except requests.exceptions.RequestException as e:
- logger.error("douyin_search network error", extra={"keyword": keyword, "error": str(e)})
- return ToolResult(
- title="抖音搜索失败",
- output="",
- error=f"网络错误: {str(e)}"
- )
- except Exception as e:
- logger.error("douyin_search unexpected error", extra={"keyword": keyword, "error": str(e)}, exc_info=True)
- return ToolResult(
- title="抖音搜索失败",
- output="",
- error=f"未知错误: {str(e)}"
- )
- async def main():
- result = await douyin_search(
- keyword="养老政策",
- )
- print(result.output)
- if __name__ == "__main__":
- asyncio.run(main())
|