123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123 |
- """
- @author: luojunhui
- """
- import base64
- import uuid
- import requests
- from fake_useragent import FakeUserAgent
- from applications.exception import SpiderError
- from applications import Functions
- functions = Functions()
- def baidu_account_video_crawler(account_id, cursor=None):
- """
- baidu account video crawler
- :param account_id: 百度账号id
- :param cursor: 游标, 默认为None,表示从最新的开始爬取
- success requests:
- {
- "errno": 0,
- "errmsg": "成功",
- "data": {
- "response_count": 10,
- "has_more": 1,
- "ctime" : timestamp_ms plus one integer,
- "results": [
- {
- "tplName": "video",
- "type": "video",
- "content": {
- "vid": "6472901034127874496",
- "publish_time": "昨天",
- "title": "8年前妈妈囤黄金当彩礼,金价飙升后,我们全家乐开了花",
- "cover_src": "https://f7.baidu.com/it/u=1085139160,1164454909&fm=222&app=106&f=JPEG@s_0,w_660,h_370,q_80,f_auto",
- "cover_src_pc": "https://f7.baidu.com/it/u=1085139160,1164454909&fm=222&app=106&f=JPEG@s_0,w_660,h_370,q_80,f_auto",
- "thumbnails": "https://gimg0.baidu.com/gimg/src=h&refer=http%3A%2F%2Fwww.baidu.com&app=0&size=f339,225&n=0&g=0n&q=80?sec=0&t=f01af5f96ffb6d0d1904b33cbc2e136b",
- "duration": "03:15",
- "poster": "https://f7.baidu.com/it/u=1085139160,1164454909&fm=222&app=106&f=JPEG@s_0,w_660,h_370,q_80,f_auto",
- "playcnt": "1054",
- "playcntText": "1054次播放"
- }
- }...
- ]
- }
- }
- """
- cookie_str = uuid.uuid4().__str__().replace('-', '').upper()
- url = "https://haokan.baidu.com/web/author/listall?"
- params = {
- 'app_id': account_id,
- 'ctime': cursor,
- 'rn': 10,
- 'searchAfter': '',
- '_api': 1
- }
- headers = {
- 'Accept': '*/*',
- 'Accept-Language': 'zh,zh-CN;q=0.9',
- 'Connection': 'keep-alive',
- 'Referer': 'https://haokan.baidu.com/author/{}'.format(account_id),
- 'User-Agent': FakeUserAgent().chrome,
- 'x-requested-with': 'xmlhttprequest',
- 'Cookie': 'BAIDUID={}:FG=1; BAIDUID_BFESS={}:FG=1'.format(cookie_str, cookie_str)
- }
- try:
- response = requests.request("GET", url, headers=headers, params=params, proxies=functions.proxy())
- response_json = response.json()
- if response_json['errmsg'] == '成功':
- response_data = response_json['data']
- return response_data
- else:
- raise SpiderError(
- platform="baidu",
- spider="account_video_crawler",
- error=response_json['errmsg'],
- url=url
- )
- except Exception as e:
- raise SpiderError(
- platform="baidu",
- spider="account_video_crawler",
- error=str(e),
- url=url
- )
- def baidu_single_video_crawler(video_id):
- """
- baidu video crawler
- :param video_id: 视频id
- """
- url = "https://haokan.baidu.com/v"
- params = {
- 'vid': video_id,
- '_format': 'json'
- }
- base_64_string = base64.b64encode(str(uuid.uuid4()).encode()).decode()
- headers = {
- 'Accept': '*/*',
- 'cookie': "BIDUPSID={}".format(base_64_string),
- 'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8',
- 'Cache-Control': 'no-cache',
- 'Connection': 'keep-alive',
- 'Content-Type': 'application/x-www-form-urlencoded',
- 'Referer': 'https://haokan.baidu.com',
- 'User-Agent': FakeUserAgent().chrome,
- }
- try:
- response = requests.request("GET", url, headers=headers, params=params, proxies=functions.proxy())
- response_json = response.json()
- return response_json['data']['apiData']['curVideoMeta']
- except Exception as e:
- raise SpiderError(
- platform="baidu",
- spider="single_video_crawler",
- error=str(e),
- url=url
- )
|