123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- from __future__ import annotations
- import json
- import base64
- import hashlib
- import requests
- import urllib.parse
- from datetime import datetime
- from tenacity import retry
- from uuid import uuid4
- from fake_useragent import FakeUserAgent
- from applications import log
- from applications.utils import proxy, request_retry
- retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30)
- @retry(**retry_desc)
- def haokan_search_videos(search_key: str) -> dict | None:
- """
- get haokan search videos
- :param search_key: search key
- :return: haokan search videos
- """
- timestamp_with_ms = datetime.now().timestamp()
- timestamp_ms = int(timestamp_with_ms * 1000)
- query_string = urllib.parse.quote(search_key)
- strings = "{}_{}_{}_{}_{}".format(1, query_string, 10, timestamp_ms, 1)
- sign = hashlib.md5(strings.encode()).hexdigest()
- url = f"https://haokan.baidu.com/haokan/ui-search/pc/search/video?pn=1&rn=10&type=video&query={query_string}&sign={sign}&version=1×tamp={timestamp_ms}"
- base_64_string = base64.b64encode(str(uuid4()).encode()).decode()
- headers = {
- "Accept": "*/*",
- "Accept-Language": "zh",
- "Connection": "keep-alive",
- "Referer": "https://haokan.baidu.com/web/search/page?query={}".format(
- query_string
- ),
- "User-Agent": FakeUserAgent().chrome,
- "Cookie": "BAIDUID={}".format(base_64_string),
- }
- try:
- response = requests.get(url, headers=headers, proxies=proxy(), timeout=120)
- response.raise_for_status()
- return response.json()
- except requests.exceptions.RequestException as e:
- log(
- task="haokan_crawler_videos",
- function="haokan_search_videos",
- message=f"API请求失败: {e}",
- data={"search_key": search_key},
- )
- except json.JSONDecodeError as e:
- log(
- task="haokan_crawler_videos",
- function="haokan_search_videos",
- message=f"响应解析失败: {e}",
- data={"search_key": search_key},
- )
- return None
- @retry(**retry_desc)
- def haokan_fetch_video_detail(video_id: str) -> dict | None:
- """
- get haokan video detail
- :param video_id: video id
- :return: haokan video detail
- """
- url = "https://haokan.baidu.com/v"
- params = {
- 'vid': video_id,
- '_format': 'json'
- }
- base_64_string = base64.b64encode(str(uuid4()).encode()).decode()
- headers = {
- 'Accept': '*/*',
- 'cookie': "BIDUPSID={}".format(base_64_string),
- 'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8',
- 'Cache-Control': 'no-cache',
- 'Connection': 'keep-alive',
- 'Content-Type': 'application/x-www-form-urlencoded',
- 'Referer': 'https://haokan.baidu.com',
- 'User-Agent': FakeUserAgent().chrome,
- }
- try:
- response = requests.get(url, headers=headers, proxies=proxy(), params=params, timeout=120)
- response.raise_for_status()
- return response.json()
- except requests.exceptions.RequestException as e:
- log(
- task="haokan_crawler_videos",
- function="haokan_get_detail",
- message=f"API请求失败: {e}",
- data={"video_id": video_id},
- )
- except json.JSONDecodeError as e:
- log(
- task="haokan_crawler_videos",
- function="haokan_get_detail",
- message=f"响应解析失败: {e}",
- data={"video_id": video_id},
- )
- return None
|