|
@@ -33,6 +33,21 @@ headers = {
|
|
|
}
|
|
|
|
|
|
|
|
|
+def format_nums(data):
|
|
|
+ data_dict = [{'亿': 100000000}, {'百万': 1000000}, {'万': 10000}, {'k': 1000}, {'w': 10000}, {'m': 1000000},
|
|
|
+ {'千': 1000}, {'M': 1000000}, {'K': 1000}, {'W': 10000}]
|
|
|
+ data = str(data)
|
|
|
+ for i in data_dict:
|
|
|
+ index = data.find(list(i.keys())[0])
|
|
|
+ if index > 0:
|
|
|
+ count = int(float(data[:index]) * list(i.values())[0])
|
|
|
+ return count
|
|
|
+ elif index < 0:
|
|
|
+ continue
|
|
|
+ count = int(float(re.findall(r'\d+', data)[0]))
|
|
|
+ return count
|
|
|
+
|
|
|
+
|
|
|
class Follow:
|
|
|
# 翻页参数
|
|
|
continuation = ''
|
|
@@ -114,331 +129,33 @@ class Follow:
|
|
|
'out_create_time': 站外用户创建时间}
|
|
|
"""
|
|
|
try:
|
|
|
- url = "https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8&prettyPrint=false"
|
|
|
- payload = json.dumps({
|
|
|
- "context": {
|
|
|
- "client": {
|
|
|
- "hl": "zh-CN",
|
|
|
- "gl": "US",
|
|
|
- "remoteHost": "38.93.247.21",
|
|
|
- "deviceMake": "Apple",
|
|
|
- "deviceModel": "",
|
|
|
- "visitorData": "CgtraDZfVnB4NXdIWSjL1IKfBg%3D%3D",
|
|
|
- "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36,gzip(gfe)",
|
|
|
- "clientName": "WEB",
|
|
|
- "clientVersion": "2.20230201.01.00",
|
|
|
- "osName": "Macintosh",
|
|
|
- "osVersion": "10_15_7",
|
|
|
- "originalUrl": f"https://www.youtube.com/{out_user_id}/about",
|
|
|
- "screenPixelDensity": 1,
|
|
|
- "platform": "DESKTOP",
|
|
|
- "clientFormFactor": "UNKNOWN_FORM_FACTOR",
|
|
|
- "configInfo": {
|
|
|
- "appInstallData": "CMvUgp8GEKLsrgUQzN-uBRC41K4FENfkrgUQsvWuBRDkoP4SELiLrgUQo_muBRDn964FENnprgUQlPiuBRC2nP4SEPuj_hIQ4tSuBRCJ6K4FEILdrgUQh92uBRD-7q4FEMz1rgUQ76P-EhDJya4FEJan_hIQkfj8Eg%3D%3D"
|
|
|
- },
|
|
|
- "screenDensityFloat": 1,
|
|
|
- "timeZone": "Asia/Shanghai",
|
|
|
- "browserName": "Chrome",
|
|
|
- "browserVersion": "109.0.0.0",
|
|
|
- "acceptHeader": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
|
|
- "deviceExperimentId": "ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09EMvUgp8GGOmU7Z4G",
|
|
|
- "screenWidthPoints": 805,
|
|
|
- "screenHeightPoints": 969,
|
|
|
- "utcOffsetMinutes": 480,
|
|
|
- "userInterfaceTheme": "USER_INTERFACE_THEME_LIGHT",
|
|
|
- "memoryTotalKbytes": "8000000",
|
|
|
- "mainAppWebInfo": {
|
|
|
- "graftUrl": f"/{out_user_id}/about",
|
|
|
- "pwaInstallabilityStatus": "PWA_INSTALLABILITY_STATUS_CAN_BE_INSTALLED",
|
|
|
- "webDisplayMode": "WEB_DISPLAY_MODE_FULLSCREEN",
|
|
|
- "isWebNativeShareAvailable": True
|
|
|
- }
|
|
|
- },
|
|
|
- "user": {
|
|
|
- "lockedSafetyMode": False
|
|
|
- },
|
|
|
- "request": {
|
|
|
- "useSsl": True,
|
|
|
- "internalExperimentFlags": [],
|
|
|
- "consistencyTokenJars": []
|
|
|
- },
|
|
|
- "clickTracking": {
|
|
|
- "clickTrackingParams": "CBMQ8JMBGAoiEwjY34r0rYD9AhURSEwIHfHZAak="
|
|
|
- },
|
|
|
- "adSignalsInfo": {
|
|
|
- "params": [
|
|
|
- {
|
|
|
- "key": "dt",
|
|
|
- "value": "1675668045032"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "flash",
|
|
|
- "value": "0"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "frm",
|
|
|
- "value": "0"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "u_tz",
|
|
|
- "value": "480"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "u_his",
|
|
|
- "value": "1"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "u_h",
|
|
|
- "value": "1080"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "u_w",
|
|
|
- "value": "1920"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "u_ah",
|
|
|
- "value": "1080"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "u_aw",
|
|
|
- "value": "1920"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "u_cd",
|
|
|
- "value": "24"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "bc",
|
|
|
- "value": "31"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "bih",
|
|
|
- "value": "969"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "biw",
|
|
|
- "value": "805"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "brdim",
|
|
|
- "value": "-269,-1080,-269,-1080,1920,-1080,1920,1080,805,969"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "vis",
|
|
|
- "value": "1"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "wgl",
|
|
|
- "value": "true"
|
|
|
- },
|
|
|
- {
|
|
|
- "key": "ca_type",
|
|
|
- "value": "image"
|
|
|
- }
|
|
|
- ],
|
|
|
- "bid": "ANyPxKqvCBKtjNeHQ6uTC7sKj2ZwIvEkk3oRlmdU7H_soRJWLc4IQCkqMVP68RR-Xae0h3nMdOKYOtVh_Yb2OYr4znd60I5j7A"
|
|
|
- }
|
|
|
- },
|
|
|
- # "browseId": browse_id,
|
|
|
- "params": "EgVhYm91dPIGBAoCEgA%3D"
|
|
|
- })
|
|
|
- headers = {
|
|
|
- 'authority': 'www.youtube.com',
|
|
|
- 'accept': '*/*',
|
|
|
- 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
|
|
- 'cache-control': 'no-cache',
|
|
|
- 'content-type': 'application/json',
|
|
|
- 'cookie': 'VISITOR_INFO1_LIVE=kh6_Vpx5wHY; YSC=UupqFrWvAR0; DEVICE_INFO=ChxOekU1TlRReU5qWTBOVFExTVRRNU5qRTBOdz09EOmU7Z4GGOmU7Z4G; GPS=1; PREF=tz=Asia.Shanghai; ST-h076le=itct=CBMQ8JMBGAoiEwjY34r0rYD9AhURSEwIHfHZAak%3D&csn=MC45NDM2MjgyNzM1ODE5NDAz&endpoint=%7B%22clickTrackingParams%22%3A%22CBMQ8JMBGAoiEwjY34r0rYD9AhURSEwIHfHZAak%3D%22%2C%22commandMetadata%22%3A%7B%22webCommandMetadata%22%3A%7B%22url%22%3A%22%2F%40weitravel%2Fabout%22%2C%22webPageType%22%3A%22WEB_PAGE_TYPE_CHANNEL%22%2C%22rootVe%22%3A3611%2C%22apiUrl%22%3A%22%2Fyoutubei%2Fv1%2Fbrowse%22%7D%7D%2C%22browseEndpoint%22%3A%7B%22browseId%22%3A%22UC08jgxf119fzynp2uHCvZIg%22%2C%22params%22%3A%22EgVhYm91dPIGBAoCEgA%253D%22%2C%22canonicalBaseUrl%22%3A%22%2F%40weitravel%22%7D%7D',
|
|
|
- 'origin': 'https://www.youtube.com',
|
|
|
- 'pragma': 'no-cache',
|
|
|
- 'referer': f'https://www.youtube.com/{out_user_id}/videos',
|
|
|
- 'sec-ch-ua': '"Not_A Brand";v="99", "Chromium";v="109", "Google Chrome";v="109.0.5414.87"',
|
|
|
- 'sec-ch-ua-arch': '"arm"',
|
|
|
- 'sec-ch-ua-bitness': '"64"',
|
|
|
- 'sec-ch-ua-full-version': '"109.0.1518.52"',
|
|
|
- 'sec-ch-ua-full-version-list': '"Not_A Brand";v="99.0.0.0", "Microsoft Edge";v="109.0.1518.52", "Chromium";v="109.0.5414.87"',
|
|
|
- 'sec-ch-ua-mobile': '?0',
|
|
|
- 'sec-ch-ua-model': '',
|
|
|
- 'sec-ch-ua-platform': '"macOS"',
|
|
|
- 'sec-ch-ua-platform-version': '"12.4.0"',
|
|
|
- 'sec-ch-ua-wow64': '?0',
|
|
|
- 'sec-fetch-dest': 'empty',
|
|
|
- 'sec-fetch-mode': 'same-origin',
|
|
|
- 'sec-fetch-site': 'same-origin',
|
|
|
- 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
|
|
|
- 'x-goog-visitor-id': 'CgtraDZfVnB4NXdIWSjL1IKfBg%3D%3D',
|
|
|
- 'x-youtube-bootstrap-logged-in': 'false',
|
|
|
- 'x-youtube-client-name': '1',
|
|
|
- 'x-youtube-client-version': '2.20230201.01.00'
|
|
|
+ url = f'https://www.youtube.com/{out_user_id}/about'
|
|
|
+ res = requests.get(url=url, headers=headers)
|
|
|
+ info = re.findall(r'var ytInitialData = (.*?);</script>', res.text, re.S)[0]
|
|
|
+ data = json.loads(info)
|
|
|
+ header = data['header']['c4TabbedHeaderRenderer']
|
|
|
+ tabs = data['contents']['twoColumnBrowseResultsRenderer']['tabs']
|
|
|
+ subsimpleText = header['subscriberCountText']['simpleText'].replace('位订阅者', '')
|
|
|
+ for tab in tabs:
|
|
|
+ if 'tabRenderer' not in tab or 'content' not in tab['tabRenderer']:
|
|
|
+ continue
|
|
|
+ viewCountText = \
|
|
|
+ tab['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer'][
|
|
|
+ 'contents'][0]['channelAboutFullMetadataRenderer']['viewCountText']['simpleText']
|
|
|
+ out_create_time = \
|
|
|
+ tab['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer'][
|
|
|
+ 'contents'][0]['channelAboutFullMetadataRenderer']['joinedDateText']['runs'][1]['text']
|
|
|
+ break
|
|
|
+ out_user_dict = {
|
|
|
+ 'out_user_name': header['title'],
|
|
|
+ 'out_avatar_url': header['avatar']['thumbnails'][-1]['url'],
|
|
|
+ 'out_fans': format_nums(subsimpleText),
|
|
|
+ 'out_play_cnt': int(
|
|
|
+ viewCountText.replace('收看次數:', '').replace('次', '').replace(',', '')) if viewCountText else 0,
|
|
|
+ 'out_create_time': out_create_time.replace('年', '-').replace('月', '-').replace('日', ''),
|
|
|
}
|
|
|
- response = requests.post(url=url, headers=headers, data=payload)
|
|
|
- if response.status_code != 200:
|
|
|
- Common.logger(log_type, crawler).warning(f'get_out_user_info:{response.text}\n')
|
|
|
- elif 'contents' not in response.text or 'header' not in response.text:
|
|
|
- Common.logger(log_type, crawler).warning(f'get_out_user_info:{response.text}\n')
|
|
|
- elif 'c4TabbedHeaderRenderer' not in response.json()['header']:
|
|
|
- Common.logger(log_type, crawler).warning(f'get_out_user_info:{response.json()["header"]}\n')
|
|
|
- elif 'twoColumnBrowseResultsRenderer' not in response.json()['contents']:
|
|
|
- Common.logger(log_type, crawler).warning(f'get_out_user_info:{response.json()}\n')
|
|
|
- elif 'tabs' not in response.json()['contents']['twoColumnBrowseResultsRenderer']:
|
|
|
- Common.logger(log_type, crawler).warning(
|
|
|
- f"get_out_user_info:{response.json()['contents']['twoColumnBrowseResultsRenderer']}\n")
|
|
|
- else:
|
|
|
- header = response.json()['header']['c4TabbedHeaderRenderer']
|
|
|
- tabs = response.json()['contents']['twoColumnBrowseResultsRenderer']['tabs']
|
|
|
- for i in range(len(tabs)):
|
|
|
- if 'tabRenderer' not in tabs[i]:
|
|
|
- title = ''
|
|
|
- elif 'title' not in tabs[i]['tabRenderer']:
|
|
|
- title = ''
|
|
|
- else:
|
|
|
- title = tabs[i]['tabRenderer']['title']
|
|
|
-
|
|
|
- if title == '简介':
|
|
|
- if 'tabRenderer' not in tabs[i]:
|
|
|
- Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]}\n")
|
|
|
- elif 'content' not in tabs[i]['tabRenderer']:
|
|
|
- Common.logger(log_type, crawler).warning(f"get_out_user_info:{tabs[i]['tabRenderer']}\n")
|
|
|
- elif 'sectionListRenderer' not in tabs[i]['tabRenderer']['content']:
|
|
|
- Common.logger(log_type, crawler).warning(
|
|
|
- f"get_out_user_info:{tabs[i]['tabRenderer']['content']}\n")
|
|
|
- elif 'contents' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']:
|
|
|
- Common.logger(log_type, crawler).warning(
|
|
|
- f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']}\n")
|
|
|
- elif len(tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents']) == 0:
|
|
|
- Common.logger(log_type, crawler).warning(
|
|
|
- f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']}\n")
|
|
|
- elif 'itemSectionRenderer' not in \
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]:
|
|
|
- Common.logger(log_type, crawler).warning(
|
|
|
- f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]}\n")
|
|
|
- elif 'contents' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']:
|
|
|
- Common.logger(log_type, crawler).warning(
|
|
|
- f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']}\n")
|
|
|
- elif len(tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents']) == 0:
|
|
|
- Common.logger(log_type, crawler).warning(
|
|
|
- f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']}\n")
|
|
|
- elif 'channelAboutFullMetadataRenderer' not in \
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]:
|
|
|
- Common.logger(log_type, crawler).warning(
|
|
|
- f"get_out_user_info:{tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]}\n")
|
|
|
- else:
|
|
|
- # 站外用户昵称
|
|
|
- if 'title' not in header and 'title' not in \
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']:
|
|
|
- out_user_name = ''
|
|
|
- elif 'title' in header:
|
|
|
- out_user_name = header['title']
|
|
|
- elif 'simpleText' not in \
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'][
|
|
|
- 'title']:
|
|
|
- out_user_name = ''
|
|
|
- else:
|
|
|
- out_user_name = tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']['title'][
|
|
|
- 'simpleText']
|
|
|
-
|
|
|
- # 站外用户头像
|
|
|
- if 'avatar' not in header and 'avatar' not in \
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']:
|
|
|
- out_avatar_url = ''
|
|
|
- elif 'thumbnails' not in header['avatar'] and 'thumbnails' not in \
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'][
|
|
|
- 'avatar']:
|
|
|
- out_avatar_url = ''
|
|
|
- elif len(header['avatar']['thumbnails']) == 0 and len(
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'][
|
|
|
- 'avatar']['thumbnails']) == 0:
|
|
|
- out_avatar_url = ''
|
|
|
- elif 'url' not in header['avatar']['thumbnails'][-1] and 'url' not in \
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'][
|
|
|
- 'avatar']['thumbnails'][-1]:
|
|
|
- out_avatar_url = ''
|
|
|
- elif 'url' in header['avatar']['thumbnails'][-1]:
|
|
|
- out_avatar_url = header['avatar']['thumbnails'][-1]['url']
|
|
|
- else:
|
|
|
- out_avatar_url = \
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'][
|
|
|
- 'avatar'][
|
|
|
- 'thumbnails'][-1]['url']
|
|
|
-
|
|
|
- # 站外用户粉丝
|
|
|
- if 'subscriberCountText' not in header:
|
|
|
- out_fans = 0
|
|
|
- elif 'accessibility' not in header['subscriberCountText']:
|
|
|
- out_fans = 0
|
|
|
- elif 'accessibilityData' not in header['subscriberCountText']['accessibility']:
|
|
|
- out_fans = 0
|
|
|
- elif 'label' not in header['subscriberCountText']['accessibility']['accessibilityData']:
|
|
|
- out_fans = 0
|
|
|
- else:
|
|
|
- out_fans = header['subscriberCountText']['accessibility']['accessibilityData']['label']
|
|
|
- if '万' in out_fans:
|
|
|
- out_fans = int(float(out_fans.split('万')[0]) * 10000)
|
|
|
- elif "位" in out_fans:
|
|
|
- out_fans = int(out_fans.split('位')[0].replace(",", ""))
|
|
|
- else:
|
|
|
- pass
|
|
|
-
|
|
|
- # 站外用户总播放量
|
|
|
- if 'viewCountText' not in \
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']:
|
|
|
- out_play_cnt = 0
|
|
|
- elif 'simpleText' not in \
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'][
|
|
|
- 'viewCountText']:
|
|
|
- out_play_cnt = 0
|
|
|
- else:
|
|
|
- out_play_cnt = int(
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'][
|
|
|
- 'viewCountText']['simpleText'].split('次')[0].replace(',', ''))
|
|
|
-
|
|
|
- # 站外用户注册时间
|
|
|
- if 'joinedDateText' not in \
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer']:
|
|
|
- out_create_time = ''
|
|
|
- elif 'runs' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'][
|
|
|
- 'joinedDateText']:
|
|
|
- out_create_time = ''
|
|
|
- elif len(tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'][
|
|
|
- 'joinedDateText']['runs']) == 0:
|
|
|
- out_create_time = ''
|
|
|
- elif 'text' not in tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'][
|
|
|
- 'joinedDateText']['runs'][0]:
|
|
|
- out_create_time = ''
|
|
|
- else:
|
|
|
- out_create_time = \
|
|
|
- tabs[i]['tabRenderer']['content']['sectionListRenderer']['contents'][0][
|
|
|
- 'itemSectionRenderer']['contents'][0]['channelAboutFullMetadataRenderer'][
|
|
|
- 'joinedDateText']['runs'][0]['text'].replace('年', '-').replace('月',
|
|
|
- '-').replace(
|
|
|
- '日', '')
|
|
|
- out_user_dict = {
|
|
|
- 'out_user_name': out_user_name,
|
|
|
- 'out_avatar_url': out_avatar_url,
|
|
|
- 'out_fans': out_fans,
|
|
|
- 'out_play_cnt': out_play_cnt,
|
|
|
- 'out_create_time': out_create_time,
|
|
|
- }
|
|
|
- # print(out_user_dict)
|
|
|
- return out_user_dict
|
|
|
+ # print(out_user_dict)
|
|
|
+ return out_user_dict
|
|
|
except Exception as e:
|
|
|
Common.logger(log_type, crawler).error(f'get_out_user_info异常:{e}\n')
|
|
|
|
|
@@ -857,9 +574,9 @@ class Follow:
|
|
|
video_id = data["richItemRenderer"]["content"]['videoRenderer']['videoId']
|
|
|
video_dict = cls.get_video_info(log_type, crawler, out_uid, video_id, machine)
|
|
|
# video_dict = cls.parse_video(video_dict, log_type, crawler, out_uid, video_id, machine)
|
|
|
- # 发布时间<=30天
|
|
|
+ # 发布时间<=7天
|
|
|
publish_time = int(time.mktime(time.strptime(video_dict['publish_time'], "%Y-%m-%d")))
|
|
|
- if int(time.time()) - publish_time <= 3600 * 24 * 30:
|
|
|
+ if int(time.time()) - publish_time <= 3600 * 24 * 7:
|
|
|
cls.download_publish(log_type, crawler, video_dict, strategy, our_uid, env, oss_endpoint,
|
|
|
machine)
|
|
|
else:
|
|
@@ -881,9 +598,9 @@ class Follow:
|
|
|
if 'richItemRenderer' in data:
|
|
|
video_id = data["richItemRenderer"]["content"]['videoRenderer']['videoId']
|
|
|
video_dict = cls.get_video_info(log_type, crawler, out_uid, video_id, machine)
|
|
|
- # 发布时间<=30天
|
|
|
+ # 发布时间<=7天
|
|
|
publish_time = int(time.mktime(time.strptime(video_dict['publish_time'], "%Y-%m-%d")))
|
|
|
- if int(time.time()) - publish_time <= 3600 * 24 * 30:
|
|
|
+ if int(time.time()) - publish_time <= 3600 * 24 * 7:
|
|
|
cls.download_publish(log_type, crawler, video_dict, strategy, our_uid, env, oss_endpoint,
|
|
|
machine)
|
|
|
else:
|