douyin_search_fallback.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. """
  2. 抖音关键词搜索工具(备用)
  3. 调用内部爬虫服务进行抖音关键词搜索。
  4. """
  5. import asyncio
  6. import logging
  7. import time
  8. from typing import Optional
  9. import requests
  10. from agent.tools import tool, ToolResult
  11. logger = logging.getLogger(__name__)
  12. # 解析工具:从 business_data 的单条记录中安全提取 aweme_info
  13. def _get_aweme_info(item: object) -> dict:
  14. if not isinstance(item, dict):
  15. return {}
  16. data = item.get("data")
  17. if not isinstance(data, dict):
  18. return {}
  19. aweme_info = data.get("aweme_info")
  20. return aweme_info if isinstance(aweme_info, dict) else {}
  21. # API 基础配置
  22. DOUYIN_SEARCH_API = "https://api.tikhub.io/api/v1/douyin/search/fetch_video_search_v2"
  23. DEFAULT_TIMEOUT = 60.0
  24. @tool(description="通过关键词搜索抖音视频内容兜底接口")
  25. async def douyin_search_fallback(
  26. keyword: str,
  27. content_type: str = "0",
  28. sort_type: str = "0",
  29. publish_time: str = "0",
  30. cursor: int = 0,
  31. filter_duration: str = "0",
  32. search_id: str = "",
  33. backtrace: str = "",
  34. timeout: Optional[float] = None,
  35. ) -> ToolResult:
  36. """
  37. 抖音关键词搜索
  38. 通过关键词搜索抖音平台的视频内容,支持多种排序和筛选方式。
  39. Args:
  40. keyword: 搜索关键词,如 "机器人"
  41. cursor: 翻页游标(首次请求传 0,翻页时使用上次响应的 cursor)
  42. sort_type: 排序方式
  43. 0: 综合排序
  44. 1: 最多点赞
  45. 2: 最新发布
  46. publish_time: 发布时间筛选
  47. 0: 不限
  48. 1: 最近一天
  49. 7: 最近一周
  50. 180: 最近半年
  51. filter_duration: 视频时长筛选
  52. 0: 不限
  53. 0-1: 1 分钟以内
  54. 1-5: 1-5 分钟
  55. 5-10000: 5 分钟以上
  56. content_type: 内容类型筛选
  57. 0: 不限
  58. 1: 视频
  59. 2: 图片
  60. 3: 文章
  61. search_id: 搜索ID(分页时使用,从上一次响应获取)
  62. backtrace: 翻页回溯标识(分页时使用,从上一次响应获取)
  63. timeout: 超时时间(秒),默认 60
  64. Returns:
  65. ToolResult: 包含以下内容:
  66. - output: 文本格式的搜索结果摘要
  67. - metadata.search_results: 结构化的搜索结果列表
  68. - aweme_id: 视频ID
  69. - desc: 视频描述(最多100字符)
  70. - author: 作者信息
  71. - nickname: 作者昵称
  72. - sec_uid: 作者ID(完整,约80字符)
  73. - statistics: 统计数据
  74. - digg_count: 点赞数
  75. - comment_count: 评论数
  76. - share_count: 分享数
  77. - metadata.raw_data: 原始 API 返回数据
  78. Note:
  79. - 使用 cursor 参数可以获取下一页结果
  80. - 建议从 metadata.search_results 获取结构化数据,而非解析 output 文本
  81. - author.sec_uid 约 80 字符,使用时不要截断
  82. - 返回的 cursor 值可用于下一次搜索的 cursor 参数
  83. """
  84. start_time = time.time()
  85. try:
  86. payload = {
  87. "keyword": keyword,
  88. "cursor": cursor,
  89. "sort_type": sort_type,
  90. "publish_time": publish_time,
  91. "filter_duration": filter_duration,
  92. "content_type": content_type,
  93. "search_id": search_id,
  94. "backtrace": backtrace,
  95. }
  96. request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
  97. response = requests.post(
  98. DOUYIN_SEARCH_API,
  99. json=payload,
  100. headers={"Content-Type": "application/json",
  101. "Authorization": "Bearer hb8FH+kMgkuLlk7ORbWzzknwPRSSerhe3i7c4n+BW9m8mW6fI1CgVQi9CQ=="},
  102. timeout=request_timeout
  103. )
  104. response.raise_for_status()
  105. data = response.json()
  106. # 格式化输出摘要
  107. summary_lines = [f"搜索关键词「{keyword}」"]
  108. data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {}
  109. items = data_block.get("business_data", []) if isinstance(data_block.get("business_data"), list) else []
  110. business_config = data_block.get("business_config", {}) if isinstance(data_block.get("business_config"), dict) else {}
  111. has_more = business_config.get("has_more") == 1
  112. next_page = business_config.get("next_page", {}) if isinstance(business_config.get("next_page"), dict) else {}
  113. cursor_value = next_page.get("cursor", "")
  114. search_id_value = next_page.get("search_id", "")
  115. backtrace_value = business_config.get("backtrace", "")
  116. summary_lines.append(
  117. f"找到 {len(items)} 条结果"
  118. + (f",还有更多(cursor={cursor_value},search_id={search_id_value},backtrace={backtrace_value})" if has_more else "")
  119. )
  120. summary_lines.append("")
  121. for i, item in enumerate(items, 1):
  122. aweme_info = _get_aweme_info(item)
  123. aweme_id = aweme_info.get("aweme_id", "unknown")
  124. desc = (aweme_info.get("desc") or aweme_info.get("item_title") or "无标题")[:50]
  125. author = aweme_info.get("author") if isinstance(aweme_info.get("author"), dict) else {}
  126. author_name = author.get("nickname", "未知作者")
  127. author_id = author.get("sec_uid", "")
  128. stats = aweme_info.get("statistics") if isinstance(aweme_info.get("statistics"), dict) else {}
  129. digg_count = stats.get("digg_count", 0)
  130. comment_count = stats.get("comment_count", 0)
  131. share_count = stats.get("share_count", 0)
  132. summary_lines.append(f"{i}. {desc}")
  133. summary_lines.append(f" ID: {aweme_id}")
  134. summary_lines.append(f" 链接: https://www.douyin.com/video/{aweme_id}")
  135. summary_lines.append(f" 作者: {author_name}")
  136. summary_lines.append(f" sec_uid: {author_id}")
  137. summary_lines.append(f" 数据: 点赞 {digg_count:,} | 评论 {comment_count:,} | 分享 {share_count:,}")
  138. summary_lines.append("")
  139. duration_ms = int((time.time() - start_time) * 1000)
  140. logger.info(
  141. "douyin_search completed",
  142. extra={
  143. "keyword": keyword,
  144. "results_count": len(items),
  145. "has_more": has_more,
  146. "cursor": cursor_value,
  147. "duration_ms": duration_ms
  148. }
  149. )
  150. return ToolResult(
  151. title=f"抖音搜索: {keyword}",
  152. output="\n".join(summary_lines),
  153. long_term_memory=f"Searched Douyin for '{keyword}', found {len(items)} results",
  154. metadata={
  155. "request_params": {
  156. "keyword": keyword,
  157. "cursor": cursor,
  158. "sort_type": sort_type,
  159. "publish_time": publish_time,
  160. "filter_duration": filter_duration,
  161. "content_type": content_type,
  162. "search_id": search_id,
  163. "backtrace": backtrace,
  164. },
  165. "next_page": {
  166. "has_more": has_more,
  167. "cursor": cursor_value,
  168. "search_id": search_id_value,
  169. "backtrace": backtrace_value,
  170. },
  171. "raw_data": data,
  172. "search_results": [ # 结构化搜索结果,供 Agent 直接引用
  173. {
  174. "aweme_id": _get_aweme_info(item).get("aweme_id"),
  175. "desc": (
  176. _get_aweme_info(item).get("desc")
  177. or _get_aweme_info(item).get("item_title")
  178. or "无标题"
  179. )[:100],
  180. "author": {
  181. "nickname": (
  182. (_get_aweme_info(item).get("author") if isinstance(_get_aweme_info(item).get("author"), dict) else {})
  183. .get("nickname", "未知作者")
  184. ),
  185. "sec_uid": (
  186. (_get_aweme_info(item).get("author") if isinstance(_get_aweme_info(item).get("author"), dict) else {})
  187. .get("sec_uid", "")
  188. ),
  189. },
  190. "statistics": {
  191. "digg_count": (
  192. (_get_aweme_info(item).get("statistics") if isinstance(_get_aweme_info(item).get("statistics"), dict) else {})
  193. .get("digg_count", 0)
  194. ),
  195. "comment_count": (
  196. (_get_aweme_info(item).get("statistics") if isinstance(_get_aweme_info(item).get("statistics"), dict) else {})
  197. .get("comment_count", 0)
  198. ),
  199. "share_count": (
  200. (_get_aweme_info(item).get("statistics") if isinstance(_get_aweme_info(item).get("statistics"), dict) else {})
  201. .get("share_count", 0)
  202. ),
  203. }
  204. }
  205. for item in items
  206. ]
  207. }
  208. )
  209. except requests.exceptions.HTTPError as e:
  210. logger.error(
  211. "douyin_search HTTP error",
  212. extra={
  213. "keyword": keyword,
  214. "status_code": e.response.status_code,
  215. "error": str(e)
  216. }
  217. )
  218. return ToolResult(
  219. title="抖音搜索失败",
  220. output="",
  221. error=f"HTTP {e.response.status_code}: {e.response.text}"
  222. )
  223. except requests.exceptions.Timeout:
  224. logger.error("douyin_search timeout", extra={"keyword": keyword, "timeout": request_timeout})
  225. return ToolResult(
  226. title="抖音搜索失败",
  227. output="",
  228. error=f"请求超时({request_timeout}秒)"
  229. )
  230. except requests.exceptions.RequestException as e:
  231. logger.error("douyin_search network error", extra={"keyword": keyword, "error": str(e)})
  232. return ToolResult(
  233. title="抖音搜索失败",
  234. output="",
  235. error=f"网络错误: {str(e)}"
  236. )
  237. except Exception as e:
  238. logger.error("douyin_search unexpected error", extra={"keyword": keyword, "error": str(e)}, exc_info=True)
  239. return ToolResult(
  240. title="抖音搜索失败",
  241. output="",
  242. error=f"未知错误: {str(e)}"
  243. )
  244. async def main():
  245. result = await douyin_search(
  246. keyword="养老政策",
  247. )
  248. print(result.output)
  249. if __name__ == "__main__":
  250. asyncio.run(main())