""" @author: luojunhui """ import base64 import uuid import requests from fake_useragent import FakeUserAgent from applications.exception import SpiderError from applications import Functions functions = Functions() def baidu_account_video_crawler(account_id, cursor=None): """ baidu account video crawler :param account_id: 百度账号id :param cursor: 游标, 默认为None,表示从最新的开始爬取 success requests: """ cookie_str = uuid.uuid4().__str__().replace('-', '').upper() url = "https://haokan.baidu.com/web/author/listall?" params = { 'app_id': account_id, 'ctime': cursor, 'rn': 10, 'searchAfter': '', '_api': 1 } headers = { 'Accept': '*/*', 'Accept-Language': 'zh,zh-CN;q=0.9', 'Connection': 'keep-alive', 'Referer': 'https://haokan.baidu.com/author/{}'.format(account_id), 'User-Agent': FakeUserAgent().chrome, 'x-requested-with': 'xmlhttprequest', 'Cookie': 'BAIDUID={}:FG=1; BAIDUID_BFESS={}:FG=1'.format(cookie_str, cookie_str) } try: response = requests.request("GET", url, headers=headers, params=params, proxies=functions.proxy()) response_json = response.json() if response_json['errmsg'] == '成功': response_data = response_json['data'] return response_data else: raise SpiderError( platform="baidu", spider="account_video_crawler", error=response_json['errmsg'], url=url ) except Exception as e: raise SpiderError( platform="baidu", spider="account_video_crawler", error=str(e), url=url ) def baidu_single_video_crawler(video_id): """ baidu video crawler :param video_id: 视频id """ url = "https://haokan.baidu.com/v" params = { 'vid': video_id, '_format': 'json' } base_64_string = base64.b64encode(str(uuid.uuid4()).encode()).decode() headers = { 'Accept': '*/*', 'cookie': "BIDUPSID={}".format(base_64_string), 'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': 'https://haokan.baidu.com', 'User-Agent': FakeUserAgent().chrome, } try: response = requests.request("GET", url, headers=headers, params=params, proxies=functions.proxy()) response_json = response.json() return response_json['data']['apiData']['curVideoMeta'] except Exception as e: raise SpiderError( platform="baidu", spider="single_video_crawler", error=str(e), url=url )