douyin_search.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. #!/usr/bin/env python3
  2. """
  3. 抖音内容搜索工具
  4. 根据关键词搜索抖音内容
  5. """
  6. import requests
  7. import json
  8. import os
  9. import argparse
  10. from datetime import datetime
  11. from typing import Dict, Any
  12. class DouyinSearch:
  13. """抖音搜索API封装类"""
  14. BASE_URL = "http://47.84.182.56:8001"
  15. TOOL_NAME = "douyin_search_by_keyword"
  16. PLATFORM = "douyin"
  17. def __init__(self, results_dir: str = None):
  18. """
  19. 初始化API客户端
  20. Args:
  21. results_dir: 结果输出目录,默认为项目根目录下的 data/search 文件夹
  22. """
  23. self.api_url = f"{self.BASE_URL}/tools/call/{self.TOOL_NAME}"
  24. # 设置结果输出目录
  25. if results_dir:
  26. self.results_base_dir = results_dir
  27. else:
  28. # 默认使用项目根目录的 data/search 文件夹
  29. script_dir = os.path.dirname(os.path.abspath(__file__))
  30. project_root = os.path.dirname(os.path.dirname(script_dir))
  31. self.results_base_dir = os.path.join(project_root, "data", "search")
  32. def search(self, keyword: str, timeout: int = 30) -> Dict[str, Any]:
  33. """
  34. 搜索抖音内容
  35. Args:
  36. keyword: 搜索关键词
  37. timeout: 请求超时时间(秒),默认30秒
  38. Returns:
  39. API响应的JSON数据
  40. Raises:
  41. requests.exceptions.RequestException: 请求失败时抛出异常
  42. """
  43. payload = {
  44. "keyword": keyword
  45. }
  46. try:
  47. response = requests.post(
  48. self.api_url,
  49. json=payload,
  50. timeout=timeout,
  51. headers={"Content-Type": "application/json"}
  52. )
  53. response.raise_for_status()
  54. return response.json()
  55. except requests.exceptions.RequestException as e:
  56. print(f"请求失败: {e}")
  57. raise
  58. def save_result(self, keyword: str, result: Dict[str, Any]) -> str:
  59. """
  60. 保存结果到文件
  61. 目录结构: results/douyin_search/关键词/时间戳.json
  62. Args:
  63. keyword: 搜索关键词
  64. result: API返回的结果
  65. Returns:
  66. 保存的文件路径
  67. """
  68. # 创建目录结构: results/douyin_search/关键词/
  69. result_dir = os.path.join(self.results_base_dir, "douyin_search", keyword)
  70. os.makedirs(result_dir, exist_ok=True)
  71. # 文件名使用时间戳
  72. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  73. filename = f"{timestamp}.json"
  74. filepath = os.path.join(result_dir, filename)
  75. # 保存结果
  76. with open(filepath, 'w', encoding='utf-8') as f:
  77. json.dump(result, f, ensure_ascii=False, indent=2)
  78. return filepath
  79. def main():
  80. """示例使用"""
  81. # 解析命令行参数
  82. parser = argparse.ArgumentParser(description='抖音内容搜索工具')
  83. parser.add_argument(
  84. '--results-dir',
  85. type=str,
  86. default='data/search',
  87. help='结果输出目录 (默认: data/search)'
  88. )
  89. parser.add_argument(
  90. '--keyword',
  91. type=str,
  92. required=True,
  93. help='搜索关键词 (必填)'
  94. )
  95. args = parser.parse_args()
  96. # 创建API客户端实例
  97. client = DouyinSearch(results_dir=args.results_dir)
  98. # 执行搜索并保存
  99. try:
  100. result = client.search(args.keyword)
  101. filepath = client.save_result(args.keyword, result)
  102. print(f"Output: {filepath}")
  103. except Exception as e:
  104. print(f"Error: {e}", file=__import__('sys').stderr)
  105. if __name__ == "__main__":
  106. main()