douyin_search.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. """
  2. 抖音关键词搜索工具(示例)
  3. 调用内部爬虫服务进行抖音关键词搜索。
  4. """
  5. import asyncio
  6. import logging
  7. import time
  8. from typing import Optional
  9. import requests
  10. from agent.tools import tool, ToolResult
  11. logger = logging.getLogger(__name__)
  12. # API 基础配置
  13. DOUYIN_SEARCH_API = "http://crawapi.piaoquantv.com/crawler/dou_yin/keyword"
  14. DEFAULT_TIMEOUT = 60.0
  15. @tool(description="通过关键词搜索抖音视频内容")
  16. async def douyin_search(
  17. keyword: str,
  18. content_type: str = "视频",
  19. sort_type: str = "综合排序",
  20. publish_time: str = "不限",
  21. cursor: str = "0",
  22. account_id: str = "771431186",
  23. timeout: Optional[float] = None,
  24. ) -> ToolResult:
  25. """
  26. 抖音关键词搜索
  27. 通过关键词搜索抖音平台的视频内容,支持多种排序和筛选方式。
  28. Args:
  29. keyword: 搜索关键词
  30. content_type: 内容类型(可选:视频/图文, 默认 "视频")
  31. sort_type: 排序方式(可选:综合排序/最新发布/最多点赞,默认 "综合排序")
  32. publish_time: 发布时间范围(可选:不限/一天内/一周内/半年内,默认 "不限")
  33. cursor: 分页游标,用于获取下一页结果,默认 "0"
  34. account_id: 账号ID(可选)
  35. timeout: 超时时间(秒),默认 60
  36. Returns:
  37. ToolResult: 包含以下内容:
  38. - output: 文本格式的搜索结果摘要
  39. - metadata.search_results: 结构化的搜索结果列表
  40. - aweme_id: 视频ID
  41. - desc: 视频描述(最多100字符)
  42. - author: 作者信息
  43. - nickname: 作者昵称
  44. - sec_uid: 作者ID(完整,约80字符)
  45. - statistics: 统计数据
  46. - digg_count: 点赞数
  47. - comment_count: 评论数
  48. - share_count: 分享数
  49. - metadata.raw_data: 原始 API 返回数据
  50. Note:
  51. - 使用 cursor 参数可以获取下一页结果
  52. - 建议从 metadata.search_results 获取结构化数据,而非解析 output 文本
  53. - author.sec_uid 约 80 字符,使用时不要截断
  54. - 返回的 cursor 值可用于下一次搜索的 cursor 参数
  55. """
  56. start_time = time.time()
  57. try:
  58. payload = {
  59. "keyword": keyword,
  60. "content_type": content_type,
  61. "sort_type": sort_type,
  62. "publish_time": publish_time,
  63. "cursor": cursor,
  64. "account_id": account_id
  65. }
  66. request_timeout = timeout if timeout is not None else DEFAULT_TIMEOUT
  67. response = requests.post(
  68. DOUYIN_SEARCH_API,
  69. json=payload,
  70. headers={"Content-Type": "application/json"},
  71. timeout=request_timeout
  72. )
  73. response.raise_for_status()
  74. data = response.json()
  75. # 格式化输出摘要
  76. summary_lines = [f"搜索关键词「{keyword}」"]
  77. data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {}
  78. items = data_block.get("data", []) if isinstance(data_block.get("data"), list) else []
  79. has_more = data_block.get("has_more", False)
  80. cursor_value = data_block.get("next_cursor", "")
  81. summary_lines.append(f"找到 {len(items)} 条结果" + (f",还有更多(cursor={cursor_value})" if has_more else ""))
  82. summary_lines.append("")
  83. for i, item in enumerate(items, 1):
  84. aweme_id = item.get("aweme_id", "unknown")
  85. desc = (item.get("desc") or item.get("item_title") or "无标题")[:50]
  86. author = item.get("author", {})
  87. author_name = author.get("nickname", "未知作者")
  88. author_id = author.get("sec_uid", "")
  89. stats = item.get("statistics", {})
  90. digg_count = stats.get("digg_count", 0)
  91. comment_count = stats.get("comment_count", 0)
  92. share_count = stats.get("share_count", 0)
  93. summary_lines.append(f"{i}. {desc}")
  94. summary_lines.append(f" ID: {aweme_id}")
  95. summary_lines.append(f" 链接: https://www.douyin.com/video/{aweme_id}")
  96. summary_lines.append(f" 作者: {author_name}")
  97. summary_lines.append(f" sec_uid: {author_id}")
  98. summary_lines.append(f" 数据: 点赞 {digg_count:,} | 评论 {comment_count:,} | 分享 {share_count:,}")
  99. summary_lines.append("")
  100. duration_ms = int((time.time() - start_time) * 1000)
  101. logger.info(
  102. "douyin_search completed",
  103. extra={
  104. "keyword": keyword,
  105. "results_count": len(items),
  106. "has_more": has_more,
  107. "cursor": cursor_value,
  108. "duration_ms": duration_ms
  109. }
  110. )
  111. return ToolResult(
  112. title=f"抖音搜索: {keyword}",
  113. output="\n".join(summary_lines),
  114. long_term_memory=f"Searched Douyin for '{keyword}', found {len(items)} results",
  115. metadata={
  116. "raw_data": data,
  117. "search_results": [ # 结构化搜索结果,供 Agent 直接引用
  118. {
  119. "aweme_id": item.get("aweme_id"),
  120. "desc": (item.get("desc") or item.get("item_title") or "无标题")[:100],
  121. "author": {
  122. "nickname": item.get("author", {}).get("nickname", "未知作者"),
  123. "sec_uid": item.get("author", {}).get("sec_uid", ""),
  124. },
  125. "statistics": {
  126. "digg_count": item.get("statistics", {}).get("digg_count", 0),
  127. "comment_count": item.get("statistics", {}).get("comment_count", 0),
  128. "share_count": item.get("statistics", {}).get("share_count", 0),
  129. }
  130. }
  131. for item in items
  132. ]
  133. }
  134. )
  135. except requests.exceptions.HTTPError as e:
  136. logger.error(
  137. "douyin_search HTTP error",
  138. extra={
  139. "keyword": keyword,
  140. "status_code": e.response.status_code,
  141. "error": str(e)
  142. }
  143. )
  144. return ToolResult(
  145. title="抖音搜索失败",
  146. output="",
  147. error=f"HTTP {e.response.status_code}: {e.response.text}"
  148. )
  149. except requests.exceptions.Timeout:
  150. logger.error("douyin_search timeout", extra={"keyword": keyword, "timeout": request_timeout})
  151. return ToolResult(
  152. title="抖音搜索失败",
  153. output="",
  154. error=f"请求超时({request_timeout}秒)"
  155. )
  156. except requests.exceptions.RequestException as e:
  157. logger.error("douyin_search network error", extra={"keyword": keyword, "error": str(e)})
  158. return ToolResult(
  159. title="抖音搜索失败",
  160. output="",
  161. error=f"网络错误: {str(e)}"
  162. )
  163. except Exception as e:
  164. logger.error("douyin_search unexpected error", extra={"keyword": keyword, "error": str(e)}, exc_info=True)
  165. return ToolResult(
  166. title="抖音搜索失败",
  167. output="",
  168. error=f"未知错误: {str(e)}"
  169. )
  170. async def main():
  171. result = await douyin_search(
  172. keyword="养老政策",
  173. account_id="771431186"
  174. )
  175. print(result.output)
  176. if __name__ == "__main__":
  177. asyncio.run(main())