123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596 |
- """
- @author: luojunhui
- """
- import base64
- import uuid
- import requests
- from fake_useragent import FakeUserAgent
- from applications.exception import SpiderError
- from applications import Functions
- functions = Functions()
- def baidu_account_video_crawler(account_id, cursor=None):
- """
- baidu account video crawler
- :param account_id: 百度账号id
- :param cursor: 游标, 默认为None,表示从最新的开始爬取
- success requests:
- """
- cookie_str = uuid.uuid4().__str__().replace('-', '').upper()
- url = "https://haokan.baidu.com/web/author/listall?"
- params = {
- 'app_id': account_id,
- 'ctime': cursor,
- 'rn': 10,
- 'searchAfter': '',
- '_api': 1
- }
- headers = {
- 'Accept': '*/*',
- 'Accept-Language': 'zh,zh-CN;q=0.9',
- 'Connection': 'keep-alive',
- 'Referer': 'https://haokan.baidu.com/author/{}'.format(account_id),
- 'User-Agent': FakeUserAgent().chrome,
- 'x-requested-with': 'xmlhttprequest',
- 'Cookie': 'BAIDUID={}:FG=1; BAIDUID_BFESS={}:FG=1'.format(cookie_str, cookie_str)
- }
- try:
- response = requests.request("GET", url, headers=headers, params=params, proxies=functions.proxy())
- response_json = response.json()
- if response_json['errmsg'] == '成功':
- response_data = response_json['data']
- return response_data
- else:
- raise SpiderError(
- platform="baidu",
- spider="account_video_crawler",
- error=response_json['errmsg'],
- url=url
- )
- except Exception as e:
- raise SpiderError(
- platform="baidu",
- spider="account_video_crawler",
- error=str(e),
- url=url
- )
- def baidu_single_video_crawler(video_id):
- """
- baidu video crawler
- :param video_id: 视频id
- """
- url = "https://haokan.baidu.com/v"
- params = {
- 'vid': video_id,
- '_format': 'json'
- }
- base_64_string = base64.b64encode(str(uuid.uuid4()).encode()).decode()
- headers = {
- 'Accept': '*/*',
- 'cookie': "BIDUPSID={}".format(base_64_string),
- 'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8',
- 'Cache-Control': 'no-cache',
- 'Connection': 'keep-alive',
- 'Content-Type': 'application/x-www-form-urlencoded',
- 'Referer': 'https://haokan.baidu.com',
- 'User-Agent': FakeUserAgent().chrome,
- }
- try:
- response = requests.request("GET", url, headers=headers, params=params, proxies=functions.proxy())
- response_json = response.json()
- return response_json['data']['apiData']['curVideoMeta']
- except Exception as e:
- raise SpiderError(
- platform="baidu",
- spider="single_video_crawler",
- error=str(e),
- url=url
- )
|