|
|
@@ -0,0 +1,186 @@
|
|
|
+#!/usr/bin/env python3
|
|
|
+"""
|
|
|
+小红书笔记搜索工具
|
|
|
+根据关键词搜索小红书笔记,支持多种筛选条件
|
|
|
+"""
|
|
|
+
|
|
|
+import requests
|
|
|
+import json
|
|
|
+import os
|
|
|
+import argparse
|
|
|
+from datetime import datetime
|
|
|
+from typing import Dict, Any
|
|
|
+
|
|
|
+
|
|
|
+class XiaohongshuSearch:
|
|
|
+ """小红书笔记搜索API封装类"""
|
|
|
+
|
|
|
+ BASE_URL = "http://47.84.182.56:8001"
|
|
|
+ TOOL_NAME = "xhs_note_search"
|
|
|
+ PLATFORM = "xiaohongshu"
|
|
|
+
|
|
|
+ def __init__(self, results_dir: str = None):
|
|
|
+ """
|
|
|
+ 初始化API客户端
|
|
|
+
|
|
|
+ Args:
|
|
|
+ results_dir: 结果输出目录,默认为项目根目录下的 data/search 文件夹
|
|
|
+ """
|
|
|
+ self.api_url = f"{self.BASE_URL}/tools/call/{self.TOOL_NAME}"
|
|
|
+
|
|
|
+ # 设置结果输出目录
|
|
|
+ if results_dir:
|
|
|
+ self.results_base_dir = results_dir
|
|
|
+ else:
|
|
|
+ # 默认使用项目根目录的 data/search 文件夹
|
|
|
+ script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
|
+ project_root = os.path.dirname(os.path.dirname(script_dir))
|
|
|
+ self.results_base_dir = os.path.join(project_root, "data", "search")
|
|
|
+
|
|
|
+ def search(
|
|
|
+ self,
|
|
|
+ keyword: str,
|
|
|
+ content_type: str = "不限",
|
|
|
+ sort_type: str = "综合",
|
|
|
+ publish_time: str = "不限",
|
|
|
+ cursor: str = "",
|
|
|
+ timeout: int = 30
|
|
|
+ ) -> Dict[str, Any]:
|
|
|
+ """
|
|
|
+ 搜索小红书笔记
|
|
|
+
|
|
|
+ Args:
|
|
|
+ keyword: 搜索关键词
|
|
|
+ content_type: 内容类型,可选值:不限、视频、图文,默认为'不限'
|
|
|
+ sort_type: 排序方式,可选值:综合、最新、最多点赞、最多评论,默认为'综合'
|
|
|
+ publish_time: 发布时间筛选,可选值:不限、一天内、一周内、半年内,默认为'不限'
|
|
|
+ cursor: 翻页游标,第一页默认为空,下一页的游标在上一页的返回值中获取
|
|
|
+ timeout: 请求超时时间(秒),默认30秒
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ API响应的JSON数据
|
|
|
+
|
|
|
+ Raises:
|
|
|
+ requests.exceptions.RequestException: 请求失败时抛出异常
|
|
|
+ """
|
|
|
+ payload = {
|
|
|
+ "keyword": keyword,
|
|
|
+ "content_type": content_type,
|
|
|
+ "sort_type": sort_type,
|
|
|
+ "publish_time": publish_time,
|
|
|
+ "cursor": cursor
|
|
|
+ }
|
|
|
+
|
|
|
+ try:
|
|
|
+ response = requests.post(
|
|
|
+ self.api_url,
|
|
|
+ json=payload,
|
|
|
+ timeout=timeout,
|
|
|
+ headers={"Content-Type": "application/json"}
|
|
|
+ )
|
|
|
+ response.raise_for_status()
|
|
|
+ return response.json()
|
|
|
+ except requests.exceptions.RequestException as e:
|
|
|
+ print(f"请求失败: {e}")
|
|
|
+ raise
|
|
|
+
|
|
|
+ def save_result(self, keyword: str, result: Dict[str, Any], page: int = 1) -> str:
|
|
|
+ """
|
|
|
+ 保存结果到文件
|
|
|
+ 目录结构: results/xiaohongshu_search/关键词/时间戳_page{页码}.json
|
|
|
+
|
|
|
+ Args:
|
|
|
+ keyword: 搜索关键词
|
|
|
+ result: API返回的结果
|
|
|
+ page: 页码
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 保存的文件路径
|
|
|
+ """
|
|
|
+ # 创建目录结构: results/xiaohongshu_search/关键词/
|
|
|
+ result_dir = os.path.join(self.results_base_dir, "xiaohongshu_search", keyword)
|
|
|
+ os.makedirs(result_dir, exist_ok=True)
|
|
|
+
|
|
|
+ # 文件名使用时间戳和页码
|
|
|
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
+ filename = f"{timestamp}_page{page}.json"
|
|
|
+ filepath = os.path.join(result_dir, filename)
|
|
|
+
|
|
|
+ # 保存结果
|
|
|
+ with open(filepath, 'w', encoding='utf-8') as f:
|
|
|
+ json.dump(result, f, ensure_ascii=False, indent=2)
|
|
|
+
|
|
|
+ return filepath
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+ """示例使用"""
|
|
|
+ # 解析命令行参数
|
|
|
+ parser = argparse.ArgumentParser(description='小红书笔记搜索工具')
|
|
|
+ parser.add_argument(
|
|
|
+ '--results-dir',
|
|
|
+ type=str,
|
|
|
+ default='data/search',
|
|
|
+ help='结果输出目录 (默认: data/search)'
|
|
|
+ )
|
|
|
+ parser.add_argument(
|
|
|
+ '--keyword',
|
|
|
+ type=str,
|
|
|
+ required=True,
|
|
|
+ help='搜索关键词 (必填)'
|
|
|
+ )
|
|
|
+ parser.add_argument(
|
|
|
+ '--content-type',
|
|
|
+ type=str,
|
|
|
+ default='不限',
|
|
|
+ choices=['不限', '视频', '图文'],
|
|
|
+ help='内容类型 (默认: 不限)'
|
|
|
+ )
|
|
|
+ parser.add_argument(
|
|
|
+ '--sort-type',
|
|
|
+ type=str,
|
|
|
+ default='综合',
|
|
|
+ choices=['综合', '最新', '最多点赞', '最多评论'],
|
|
|
+ help='排序方式 (默认: 综合)'
|
|
|
+ )
|
|
|
+ parser.add_argument(
|
|
|
+ '--publish-time',
|
|
|
+ type=str,
|
|
|
+ default='不限',
|
|
|
+ choices=['不限', '一天内', '一周内', '半年内'],
|
|
|
+ help='发布时间筛选 (默认: 不限)'
|
|
|
+ )
|
|
|
+ parser.add_argument(
|
|
|
+ '--cursor',
|
|
|
+ type=str,
|
|
|
+ default='',
|
|
|
+ help='翻页游标 (默认为空,即第一页)'
|
|
|
+ )
|
|
|
+ parser.add_argument(
|
|
|
+ '--page',
|
|
|
+ type=int,
|
|
|
+ default=1,
|
|
|
+ help='页码标识,用于保存文件名 (默认: 1)'
|
|
|
+ )
|
|
|
+ args = parser.parse_args()
|
|
|
+
|
|
|
+ # 创建API客户端实例
|
|
|
+ client = XiaohongshuSearch(results_dir=args.results_dir)
|
|
|
+
|
|
|
+ # 执行搜索并保存
|
|
|
+ try:
|
|
|
+ result = client.search(
|
|
|
+ args.keyword,
|
|
|
+ args.content_type,
|
|
|
+ args.sort_type,
|
|
|
+ args.publish_time,
|
|
|
+ args.cursor
|
|
|
+ )
|
|
|
+ filepath = client.save_result(args.keyword, result, args.page)
|
|
|
+ print(f"Output: {filepath}")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"Error: {e}", file=__import__('sys').stderr)
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ main()
|