|
|
2 тижнів тому | |
|---|---|---|
| .. | ||
| README.md | 2 тижнів тому | |
| __init__.py | 2 тижнів тому | |
| xiaohongshu_detail.py | 2 тижнів тому | |
from script.detail import get_xiaohongshu_detail
# 获取笔记详情
detail = get_xiaohongshu_detail("68d62e4500000000130085fc")
print(f"标题: {detail['title']}")
print(f"正文: {detail['body_text']}")
print(f"视频: {detail['video']}")
print(f"类型: {detail['content_type']}")
print(f"发布时间: {detail['publish_time']}")
# 获取详情
python script/detail/xiaohongshu_detail.py --note-id "68d62e4500000000130085fc"
# 强制刷新
python script/detail/xiaohongshu_detail.py --note-id "68d62e4500000000130085fc" --force
detail = get_xiaohongshu_detail(
note_id: str, # 必填:笔记ID
force=False # 可选:强制刷新(忽略缓存)
)
{
"channel_content_id": "68d62e4500000000130085fc",
"link": "https://www.xiaohongshu.com/explore/68d62e4500000000130085fc",
"comment_count": null,
"images": [
"http://res.cybertogether.net/crawler/image/bf6a0e92ed7252ae8414121edf26f2d3.jpeg"
],
"like_count": 14,
"body_text": "时隔两个月,终于有时间把穿越极圈的航拍视频剪辑出来了...",
"title": "穿越北极圈,终生难忘",
"collect_count": 6,
"channel_account_id": "664954500000000007006ac0",
"channel_account_name": "Colin SW",
"content_type": "video", # 根据 video 字段自动判断
"video": "http://sns-video-hw.xhscdn.com/stream/1/110/258/...",
"publish_timestamp": 1758877418000,
"publish_time": "2025-09-26 17:03:38"
}
| 字段 | 类型 | 说明 |
|---|---|---|
| channel_content_id | string/null | 笔记ID |
| link | string/null | 笔记链接 |
| title | string/null | 标题 |
| body_text | string/null | 完整正文内容 |
| channel_account_name | string/null | 作者名称 |
| channel_account_id | string/null | 作者ID |
| like_count | number/null | 点赞数 |
| comment_count | number/null | 评论数 |
| collect_count | number/null | 收藏数 |
| images | array | 图片URL列表(已去重) |
| video | string/null | 视频链接 |
| content_type | string | 内容类型("video" 或 "normal") |
| publish_timestamp | number/null | 发布时间戳(毫秒) |
| publish_time | string/null | 发布时间(格式:YYYY-MM-DD HH:MM:SS) |
注意:
null 表示,而非空字符串或 0content_type 自动判断:有 video 字段时为 "video",否则为 "normal"from script.detail import get_xiaohongshu_detail
# 获取笔记详情
detail = get_xiaohongshu_detail("68d62e4500000000130085fc")
print(f"标题: {detail['title']}")
print(f"正文: {detail['body_text']}")
print(f"点赞: {detail['like_count']}")
# 忽略缓存,重新请求 API
detail = get_xiaohongshu_detail("68d62e4500000000130085fc", force=True)
detail = get_xiaohongshu_detail("68d62e4500000000130085fc")
if detail['content_type'] == 'video':
print(f"视频链接: {detail['video']}")
else:
print(f"图文笔记,图片数量: {len(detail['images'])}")
from script.search import search_xiaohongshu
from script.detail import get_xiaohongshu_detail
# 1. 搜索笔记
search_result = search_xiaohongshu("产品测试", publish_time="一周内")
# 2. 获取前 5 条的完整详情
for note in search_result['notes'][:5]:
note_id = note['channel_content_id']
# 获取详情
detail = get_xiaohongshu_detail(note_id)
print(f"\n标题: {detail['title']}")
print(f"摘要: {note['desc'][:50]}...") # 搜索结果的摘要
print(f"完整正文: {detail['body_text'][:100]}...") # 详情的完整正文
print(f"点赞: {detail['like_count']}")
print(f"类型: {detail['content_type']}")
note_ids = [
"68d62e4500000000130085fc",
"68b69ea9000000001c035a4d",
"6808c0e8000000001c00a771"
]
for note_id in note_ids:
try:
detail = get_xiaohongshu_detail(note_id)
print(f"✓ {detail['title']}")
except Exception as e:
print(f"✗ {note_id}: {e}")
python script/detail/xiaohongshu_detail.py --note-id "68d62e4500000000130085fc"
python script/detail/xiaohongshu_detail.py --note-id "68d62e4500000000130085fc" --force
python script/detail/xiaohongshu_detail.py --note-id "68d62e4500000000130085fc" --no-cache
| 参数 | 默认值 | 说明 |
|---|---|---|
| --note-id | 必填 | 笔记ID |
| --force | False | 强制刷新(忽略缓存) |
| --no-cache | False | 禁用缓存功能 |
| --results-dir | data/detail | 输出目录 |
| --timeout | 30 | 超时时间(秒) |
| --max-retries | 5 | 最大重试次数 |
| --retry-delay | 2 | 重试延迟(秒) |
相同的笔记 ID 会自动使用缓存:
# 第一次:请求 API
detail1 = get_xiaohongshu_detail("68d62e4500000000130085fc")
# 第二次:使用缓存(瞬间返回)
detail2 = get_xiaohongshu_detail("68d62e4500000000130085fc")
# 强制刷新
detail3 = get_xiaohongshu_detail("68d62e4500000000130085fc", force=True)
指数退避策略:2秒 → 4秒 → 8秒 → 16秒 → 32秒
详情结果自动保存到 data/detail/xiaohongshu_detail/
目录结构:
data/detail/xiaohongshu_detail/
└── {note_id}/
├── raw/ # 原始数据(完整 API 响应)
│ └── {timestamp}.json
└── clean/ # 清洗数据(扁平化结构)
└── {timestamp}.json
文件名示例:
20251113_144230.json注意: 只有新请求的数据才会保存,使用缓存时不会重复保存文件。
图片 URL 会自动按顺序去重:
# 原始数据可能有重复
# images: ["url1", "url1", "url2"]
# 返回的数据已去重
# images: ["url1", "url2"]
根据 video 字段自动判断内容类型:
# 有视频
detail['video'] = "http://..."
detail['content_type'] = "video"
# 无视频
detail['video'] = null
detail['content_type'] = "normal"
自动将时间戳转换为可读格式:
detail['publish_timestamp'] = 1758877418000 # 毫秒时间戳
detail['publish_time'] = "2025-09-26 17:03:38" # 格式化时间
{
"channel_content_id": "68d62e4500000000130085fc",
"link": "https://www.xiaohongshu.com/explore/68d62e4500000000130085fc",
"comment_count": null,
"images": [
"http://res.cybertogether.net/crawler/image/bf6a0e92ed7252ae8414121edf26f2d3.jpeg"
],
"like_count": 14,
"body_text": "完整正文内容...",
"title": "穿越北极圈,终生难忘",
"collect_count": 6,
"channel_account_id": "664954500000000007006ac0",
"channel_account_name": "Colin SW",
"content_type": "video",
"video": "http://sns-video-hw.xhscdn.com/...",
"publish_timestamp": 1758877418000,
"publish_time": "2025-09-26 17:03:38"
}
完整的 API 响应,包含所有元数据和嵌套结构:
{
"note_id": "68d62e4500000000130085fc",
"timestamp": "20251113_144230",
"api_response": {
"success": true,
"result": [...],
"tool_name": "get_xhs_detail_by_note_id",
"call_type": "api"
}
}
A:
data/detail/xiaohongshu_detail/{note_id}/ 目录force=True 参数强制刷新A: 看控制台输出:
✓ 使用缓存数据: ...正在获取笔记详情: ... (尝试 1/5)A:
A: API 返回的数据中,某些字段可能不存在或为 null,我们保持原样返回,不会强制转换为 0。
A:
video 字段(非 null)时为 "video"data/detail只有当 API 返回 success: true 时才视为成功并保存缓存,否则会继续重试。
详情模块通常与搜索模块配合使用:
from script.search import search_xiaohongshu
from script.detail import get_xiaohongshu_detail
# 1. 搜索笔记(获取摘要)
search_result = search_xiaohongshu("产品测试")
# 2. 对感兴趣的笔记获取详情(获取完整正文和视频)
for note in search_result['notes'][:5]:
note_id = note['channel_content_id']
detail = get_xiaohongshu_detail(note_id)
# 搜索结果的摘要
print(f"摘要: {note['desc']}")
# 详情的完整正文
print(f"完整正文: {detail['body_text']}")
# 视频(如果有)
if detail['video']:
print(f"视频: {detail['video']}")
关键区别:
desc),不返回 body_text 和 videobody_text)和视频链接(video)