from __future__ import annotations import json import base64 import hashlib import requests import urllib.parse from datetime import datetime from tenacity import retry from uuid import uuid4 from fake_useragent import FakeUserAgent from applications import log from applications.utils import proxy, request_retry retry_desc = request_retry(retry_times=3, min_retry_delay=2, max_retry_delay=30) @retry(**retry_desc) def haokan_search_videos(search_key: str) -> dict | None: """ get haokan search videos :param search_key: search key :return: haokan search videos """ timestamp_with_ms = datetime.now().timestamp() timestamp_ms = int(timestamp_with_ms * 1000) query_string = urllib.parse.quote(search_key) strings = "{}_{}_{}_{}_{}".format(1, query_string, 10, timestamp_ms, 1) sign = hashlib.md5(strings.encode()).hexdigest() url = f"https://haokan.baidu.com/haokan/ui-search/pc/search/video?pn=1&rn=10&type=video&query={query_string}&sign={sign}&version=1×tamp={timestamp_ms}" base_64_string = base64.b64encode(str(uuid4()).encode()).decode() headers = { "Accept": "*/*", "Accept-Language": "zh", "Connection": "keep-alive", "Referer": "https://haokan.baidu.com/web/search/page?query={}".format( query_string ), "User-Agent": FakeUserAgent().chrome, "Cookie": "BAIDUID={}".format(base_64_string), } try: response = requests.get(url, headers=headers, proxies=proxy(), timeout=120) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: log( task="haokan_crawler_videos", function="haokan_search_videos", message=f"API请求失败: {e}", data={"search_key": search_key}, ) except json.JSONDecodeError as e: log( task="haokan_crawler_videos", function="haokan_search_videos", message=f"响应解析失败: {e}", data={"search_key": search_key}, ) return None @retry(**retry_desc) def haokan_fetch_video_detail(video_id: str) -> dict | None: """ get haokan video detail :param video_id: video id :return: haokan video detail """ url = "https://haokan.baidu.com/v" params = { 'vid': video_id, '_format': 'json' } base_64_string = base64.b64encode(str(uuid4()).encode()).decode() headers = { 'Accept': '*/*', 'cookie': "BIDUPSID={}".format(base_64_string), 'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': 'https://haokan.baidu.com', 'User-Agent': FakeUserAgent().chrome, } try: response = requests.get(url, headers=headers, proxies=proxy(), params=params, timeout=120) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: log( task="haokan_crawler_videos", function="haokan_get_detail", message=f"API请求失败: {e}", data={"video_id": video_id}, ) except json.JSONDecodeError as e: log( task="haokan_crawler_videos", function="haokan_get_detail", message=f"响应解析失败: {e}", data={"video_id": video_id}, ) return None