custom_search.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. #!/usr/bin/env python3
  2. """
  3. 通用搜索工具
  4. 支持Google、Baidu、Bing等搜索引擎
  5. """
  6. import requests
  7. import json
  8. import os
  9. import argparse
  10. from datetime import datetime
  11. from typing import Dict, Any
  12. import sys
  13. from pathlib import Path
  14. # 添加项目根目录到路径并导入配置
  15. sys.path.insert(0, str(Path(__file__).parent.parent.parent))
  16. from lib.config import get_data_dir
  17. class CustomSearch:
  18. """通用搜索API封装类"""
  19. BASE_URL = "http://47.84.182.56:8001"
  20. TOOL_NAME = "custom_search"
  21. def __init__(self, results_dir: str = None):
  22. """
  23. 初始化API客户端
  24. Args:
  25. results_dir: 结果输出目录,默认为项目根目录下的 data/search 文件夹
  26. """
  27. self.api_url = f"{self.BASE_URL}/tools/call/{self.TOOL_NAME}"
  28. # 设置结果输出目录
  29. if results_dir:
  30. self.results_base_dir = results_dir
  31. else:
  32. # 默认从配置读取
  33. self.results_base_dir = get_data_dir("search")
  34. def search(self, keyword: str, platform: str = "google", timeout: int = 30) -> Dict[str, Any]:
  35. """
  36. 执行搜索
  37. Args:
  38. keyword: 搜索关键词
  39. platform: 搜索平台,可选值:google, baidu, bing,默认为google
  40. timeout: 请求超时时间(秒),默认30秒
  41. Returns:
  42. API响应的JSON数据
  43. Raises:
  44. requests.exceptions.RequestException: 请求失败时抛出异常
  45. """
  46. payload = {
  47. "keyword": keyword,
  48. "platform": platform
  49. }
  50. try:
  51. response = requests.post(
  52. self.api_url,
  53. json=payload,
  54. timeout=timeout,
  55. headers={"Content-Type": "application/json"}
  56. )
  57. response.raise_for_status()
  58. return response.json()
  59. except requests.exceptions.RequestException as e:
  60. print(f"请求失败: {e}")
  61. raise
  62. def save_result(self, keyword: str, platform: str, result: Dict[str, Any]) -> str:
  63. """
  64. 保存结果到文件
  65. 目录结构: results/custom_search/平台/关键词/时间戳.json
  66. Args:
  67. keyword: 搜索关键词
  68. platform: 搜索平台
  69. result: API返回的结果
  70. Returns:
  71. 保存的文件路径
  72. """
  73. # 创建目录结构: results/custom_search/平台/关键词/
  74. result_dir = os.path.join(self.results_base_dir, "custom_search", platform, keyword)
  75. os.makedirs(result_dir, exist_ok=True)
  76. # 文件名使用时间戳
  77. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  78. filename = f"{timestamp}.json"
  79. filepath = os.path.join(result_dir, filename)
  80. # 保存结果
  81. with open(filepath, 'w', encoding='utf-8') as f:
  82. json.dump(result, f, ensure_ascii=False, indent=2)
  83. return filepath
  84. def main():
  85. """示例使用"""
  86. # 解析命令行参数
  87. parser = argparse.ArgumentParser(description='通用搜索工具')
  88. parser.add_argument(
  89. '--results-dir',
  90. type=str,
  91. default=None,
  92. help='结果输出目录 (默认: 从配置读取)'
  93. )
  94. parser.add_argument(
  95. '--keyword',
  96. type=str,
  97. required=True,
  98. help='搜索关键词 (必填)'
  99. )
  100. parser.add_argument(
  101. '--platform',
  102. type=str,
  103. default='google',
  104. choices=['google', 'baidu', 'bing'],
  105. help='搜索平台 (默认: google)'
  106. )
  107. args = parser.parse_args()
  108. # 创建API客户端实例
  109. client = CustomSearch(results_dir=args.results_dir)
  110. # 执行搜索并保存
  111. try:
  112. result = client.search(args.keyword, args.platform)
  113. filepath = client.save_result(args.keyword, args.platform, result)
  114. print(f"Output: {filepath}")
  115. except Exception as e:
  116. print(f"Error: {e}", file=__import__('sys').stderr)
  117. if __name__ == "__main__":
  118. main()