zhangliang 1 deň pred
rodič
commit
6b29574b02

+ 280 - 0
examples/content_finder/tools/douyin_search_fallback.py

@@ -0,0 +1,280 @@
+"""
+抖音关键词搜索工具(示例)
+
+调用内部爬虫服务进行抖音关键词搜索。
+"""
+import asyncio
+import logging
+import time
+from typing import Optional
+
+import requests
+
+from agent.tools import tool, ToolResult
+
+logger = logging.getLogger(__name__)
+
+
+# 解析工具:从 business_data 的单条记录中安全提取 aweme_info
+def _get_aweme_info(item: object) -> dict:
+    if not isinstance(item, dict):
+        return {}
+    data = item.get("data")
+    if not isinstance(data, dict):
+        return {}
+    aweme_info = data.get("aweme_info")
+    return aweme_info if isinstance(aweme_info, dict) else {}
+
+
+# API 基础配置
+DOUYIN_SEARCH_API = "https://api.tikhub.io/api/v1/douyin/search/fetch_video_search_v2"
+DEFAULT_TIMEOUT = 60.0
+
+
+
+@tool(description="通过关键词搜索抖音视频内容兜底接口")
+async def douyin_search(
+    keyword: str,
+    content_type: str = "0",
+    sort_type: str = "0",
+    publish_time: str = "0",
+    cursor: int = 0,
+    filter_duration: str = "0",
+    search_id: str = "",
+    backtrace: str = "",
+    timeout: Optional[float] = None,
+) -> ToolResult:
+    """
+    抖音关键词搜索
+
+    通过关键词搜索抖音平台的视频内容,支持多种排序和筛选方式。
+
+    Args:
+        keyword: 搜索关键词,如 "机器人"
+        cursor: 翻页游标(首次请求传 0,翻页时使用上次响应的 cursor)
+        sort_type: 排序方式
+                   0: 综合排序
+                   1: 最多点赞
+                   2: 最新发布
+        publish_time: 发布时间筛选
+                    0: 不限
+                    1: 最近一天
+                    7: 最近一周
+                    180: 最近半年
+        filter_duration: 视频时长筛选
+                        0: 不限
+                        0-1: 1 分钟以内
+                        1-5: 1-5 分钟
+                        5-10000: 5 分钟以上
+        content_type: 内容类型筛选
+                    0: 不限
+                    1: 视频
+                    2: 图片
+                    3: 文章
+        search_id: 搜索ID(分页时使用,从上一次响应获取)
+        backtrace: 翻页回溯标识(分页时使用,从上一次响应获取)
+        timeout: 超时时间(秒),默认 60
+
+    Returns:
+        ToolResult: 包含以下内容:
+            - output: 文本格式的搜索结果摘要
+            - metadata.search_results: 结构化的搜索结果列表
+                - aweme_id: 视频ID
+                - desc: 视频描述(最多100字符)
+                - author: 作者信息
+                    - nickname: 作者昵称
+                    - sec_uid: 作者ID(完整,约80字符)
+                - statistics: 统计数据
+                    - digg_count: 点赞数
+                    - comment_count: 评论数
+                    - share_count: 分享数
+            - metadata.raw_data: 原始 API 返回数据
+
+    Note:
+        - 使用 cursor 参数可以获取下一页结果
+        - 建议从 metadata.search_results 获取结构化数据,而非解析 output 文本
+        - author.sec_uid 约 80 字符,使用时不要截断
+        - 返回的 cursor 值可用于下一次搜索的 cursor 参数
+    """
+    start_time = time.time()
+
+    try:
+        payload = {
+            "keyword": keyword,
+            "cursor": cursor,
+            "sort_type": sort_type,
+            "publish_time": publish_time,
+            "filter_duration": filter_duration,
+            "content_type": content_type,
+            "search_id": search_id,
+            "backtrace": backtrace,
+        }
+
+        request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
+
+        response = requests.post(
+            DOUYIN_SEARCH_API,
+            json=payload,
+            headers={"Content-Type": "application/json",
+                     "Authorization": "Bearer hb8FH+kMgkuLlk7ORbWzzknwPRSSerhe3i7c4n+BW9m8mW6fI1CgVQi9CQ=="},
+            timeout=request_timeout
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        # 格式化输出摘要
+        summary_lines = [f"搜索关键词「{keyword}」"]
+
+        data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {}
+        items = data_block.get("business_data", []) if isinstance(data_block.get("business_data"), list) else []
+        business_config = data_block.get("business_config", {}) if isinstance(data_block.get("business_config"), dict) else {}
+        has_more = business_config.get("has_more") == 1
+        next_page = business_config.get("next_page", {}) if isinstance(business_config.get("next_page"), dict) else {}
+        cursor_value = next_page.get("cursor", "")
+        search_id_value = next_page.get("search_id", "")
+        backtrace_value = business_config.get("backtrace", "")
+
+        summary_lines.append(
+            f"找到 {len(items)} 条结果"
+            + (f",还有更多(cursor={cursor_value},search_id={search_id_value},backtrace={backtrace_value})" if has_more else "")
+        )
+        summary_lines.append("")
+
+        for i, item in enumerate(items, 1):
+            aweme_info = _get_aweme_info(item)
+            aweme_id = aweme_info.get("aweme_id", "unknown")
+            desc = (aweme_info.get("desc") or aweme_info.get("item_title") or "无标题")[:50]
+
+            author = aweme_info.get("author") if isinstance(aweme_info.get("author"), dict) else {}
+            author_name = author.get("nickname", "未知作者")
+            author_id = author.get("sec_uid", "")
+
+            stats = aweme_info.get("statistics") if isinstance(aweme_info.get("statistics"), dict) else {}
+            digg_count = stats.get("digg_count", 0)
+            comment_count = stats.get("comment_count", 0)
+            share_count = stats.get("share_count", 0)
+
+            summary_lines.append(f"{i}. {desc}")
+            summary_lines.append(f"   ID: {aweme_id}")
+            summary_lines.append(f"   链接: https://www.douyin.com/video/{aweme_id}")
+            summary_lines.append(f"   作者: {author_name}")
+            summary_lines.append(f"   sec_uid: {author_id}")
+            summary_lines.append(f"   数据: 点赞 {digg_count:,} | 评论 {comment_count:,} | 分享 {share_count:,}")
+            summary_lines.append("")
+
+        duration_ms = int((time.time() - start_time) * 1000)
+        logger.info(
+            "douyin_search completed",
+            extra={
+                "keyword": keyword,
+                "results_count": len(items),
+                "has_more": has_more,
+                "cursor": cursor_value,
+                "duration_ms": duration_ms
+            }
+        )
+
+        return ToolResult(
+            title=f"抖音搜索: {keyword}",
+            output="\n".join(summary_lines),
+            long_term_memory=f"Searched Douyin for '{keyword}', found {len(items)} results",
+            metadata={
+                "request_params": {
+                    "keyword": keyword,
+                    "cursor": cursor,
+                    "sort_type": sort_type,
+                    "publish_time": publish_time,
+                    "filter_duration": filter_duration,
+                    "content_type": content_type,
+                    "search_id": search_id,
+                    "backtrace": backtrace,
+                },
+                "next_page": {
+                    "has_more": has_more,
+                    "cursor": cursor_value,
+                    "search_id": search_id_value,
+                    "backtrace": backtrace_value,
+                },
+                "raw_data": data,
+                "search_results": [  # 结构化搜索结果,供 Agent 直接引用
+                    {
+                        "aweme_id": _get_aweme_info(item).get("aweme_id"),
+                        "desc": (
+                            _get_aweme_info(item).get("desc")
+                            or _get_aweme_info(item).get("item_title")
+                            or "无标题"
+                        )[:100],
+                        "author": {
+                            "nickname": (
+                                (_get_aweme_info(item).get("author") if isinstance(_get_aweme_info(item).get("author"), dict) else {})
+                                .get("nickname", "未知作者")
+                            ),
+                            "sec_uid": (
+                                (_get_aweme_info(item).get("author") if isinstance(_get_aweme_info(item).get("author"), dict) else {})
+                                .get("sec_uid", "")
+                            ),
+                        },
+                        "statistics": {
+                            "digg_count": (
+                                (_get_aweme_info(item).get("statistics") if isinstance(_get_aweme_info(item).get("statistics"), dict) else {})
+                                .get("digg_count", 0)
+                            ),
+                            "comment_count": (
+                                (_get_aweme_info(item).get("statistics") if isinstance(_get_aweme_info(item).get("statistics"), dict) else {})
+                                .get("comment_count", 0)
+                            ),
+                            "share_count": (
+                                (_get_aweme_info(item).get("statistics") if isinstance(_get_aweme_info(item).get("statistics"), dict) else {})
+                                .get("share_count", 0)
+                            ),
+                        }
+                    }
+                    for item in items
+                ]
+            }
+        )
+    except requests.exceptions.HTTPError as e:
+        logger.error(
+            "douyin_search HTTP error",
+            extra={
+                "keyword": keyword,
+                "status_code": e.response.status_code,
+                "error": str(e)
+            }
+        )
+        return ToolResult(
+            title="抖音搜索失败",
+            output="",
+            error=f"HTTP {e.response.status_code}: {e.response.text}"
+        )
+    except requests.exceptions.Timeout:
+        logger.error("douyin_search timeout", extra={"keyword": keyword, "timeout": request_timeout})
+        return ToolResult(
+            title="抖音搜索失败",
+            output="",
+            error=f"请求超时({request_timeout}秒)"
+        )
+    except requests.exceptions.RequestException as e:
+        logger.error("douyin_search network error", extra={"keyword": keyword, "error": str(e)})
+        return ToolResult(
+            title="抖音搜索失败",
+            output="",
+            error=f"网络错误: {str(e)}"
+        )
+    except Exception as e:
+        logger.error("douyin_search unexpected error", extra={"keyword": keyword, "error": str(e)}, exc_info=True)
+        return ToolResult(
+            title="抖音搜索失败",
+            output="",
+            error=f"未知错误: {str(e)}"
+        )
+
+
+async def main():
+    result = await douyin_search(
+        keyword="养老政策",
+    )
+    print(result.output)
+
+if __name__ == "__main__":
+    asyncio.run(main())