|
@@ -21,6 +21,13 @@ proxies = {"http": None, "https": None}
|
|
|
|
|
|
class XiaoniangaoHourScheduling:
|
|
|
platform = "小年糕"
|
|
|
+ words = "abcdefghijklmnopqrstuvwxyz0123456789"
|
|
|
+ uid = f"""{"".join(random.sample(words, 8))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 4))}-{"".join(random.sample(words, 12))}"""
|
|
|
+ token = "".join(random.sample(words, 32))
|
|
|
+ uid_token_dict = {
|
|
|
+ "uid": uid,
|
|
|
+ "token": token
|
|
|
+ }
|
|
|
|
|
|
# 生成 uid、token
|
|
|
@classmethod
|
|
@@ -45,10 +52,10 @@ class XiaoniangaoHourScheduling:
|
|
|
:param rule_dict: 规则信息,字典格式
|
|
|
:return: 满足规则,返回 True;反之,返回 False
|
|
|
"""
|
|
|
- rule_playCnt_min = rule_dict.get('playCnt', {}).get('min', 0)
|
|
|
- rule_playCnt_max = rule_dict.get('playCnt', {}).get('max', 100000000)
|
|
|
- if rule_playCnt_max == 0:
|
|
|
- rule_playCnt_max = 100000000
|
|
|
+ rule_play_cnt_min = rule_dict.get('play_cnt', {}).get('min', 0)
|
|
|
+ rule_play_cnt_max = rule_dict.get('play_cnt', {}).get('max', 100000000)
|
|
|
+ if rule_play_cnt_max == 0:
|
|
|
+ rule_play_cnt_max = 100000000
|
|
|
|
|
|
rule_duration_min = rule_dict.get('duration', {}).get('min', 0)
|
|
|
rule_duration_max = rule_dict.get('duration', {}).get('max', 100000000)
|
|
@@ -59,67 +66,75 @@ class XiaoniangaoHourScheduling:
|
|
|
# rule_period_max = rule_dict.get('period', {}).get('max', 100000000)
|
|
|
# if rule_period_max == 0:
|
|
|
# rule_period_max = 100000000
|
|
|
- #
|
|
|
- # rule_fans_min = rule_dict.get('fans', {}).get('min', 0)
|
|
|
- # rule_fans_max = rule_dict.get('fans', {}).get('max', 100000000)
|
|
|
- # if rule_fans_max == 0:
|
|
|
- # rule_fans_max = 100000000
|
|
|
- #
|
|
|
- # rule_videos_min = rule_dict.get('videos', {}).get('min', 0)
|
|
|
- # rule_videos_max = rule_dict.get('videos', {}).get('max', 100000000)
|
|
|
- # if rule_videos_max == 0:
|
|
|
- # rule_videos_max = 100000000
|
|
|
-
|
|
|
- rule_like_min = rule_dict.get('like', {}).get('min', 0)
|
|
|
- rule_like_max = rule_dict.get('like', {}).get('max', 100000000)
|
|
|
- if rule_like_max == 0:
|
|
|
- rule_like_max = 100000000
|
|
|
-
|
|
|
- rule_videoWidth_min = rule_dict.get('videoWidth', {}).get('min', 0)
|
|
|
- rule_videoWidth_max = rule_dict.get('videoWidth', {}).get('max', 100000000)
|
|
|
- if rule_videoWidth_max == 0:
|
|
|
- rule_videoWidth_max = 100000000
|
|
|
-
|
|
|
- rule_videoHeight_min = rule_dict.get('videoHeight', {}).get('min', 0)
|
|
|
- rule_videoHeight_max = rule_dict.get('videoHeight', {}).get('max', 100000000)
|
|
|
- if rule_videoHeight_max == 0:
|
|
|
- rule_videoHeight_max = 100000000
|
|
|
-
|
|
|
- rule_shareCnt_min = rule_dict.get('shareCnt', {}).get('min', 0)
|
|
|
- rule_shareCnt_max = rule_dict.get('shareCnt', {}).get('max', 100000000)
|
|
|
- if rule_shareCnt_max == 0:
|
|
|
- rule_shareCnt_max = 100000000
|
|
|
-
|
|
|
- rule_commentCnt_min = rule_dict.get('commentCnt', {}).get('min', 0)
|
|
|
- rule_commentCnt_max = rule_dict.get('commentCnt', {}).get('max', 100000000)
|
|
|
- if rule_commentCnt_max == 0:
|
|
|
- rule_commentCnt_max = 100000000
|
|
|
+
|
|
|
+ rule_fans_cnt_min = rule_dict.get('fans_cnt', {}).get('min', 0)
|
|
|
+ rule_fans_cnt_max = rule_dict.get('fans_cnt', {}).get('max', 100000000)
|
|
|
+ if rule_fans_cnt_max == 0:
|
|
|
+ rule_fans_cnt_max = 100000000
|
|
|
+
|
|
|
+ rule_videos_cnt_min = rule_dict.get('videos_cnt', {}).get('min', 0)
|
|
|
+ rule_videos_cnt_max = rule_dict.get('videos_cnt', {}).get('max', 100000000)
|
|
|
+ if rule_videos_cnt_max == 0:
|
|
|
+ rule_videos_cnt_max = 100000000
|
|
|
+
|
|
|
+ rule_like_cnt_min = rule_dict.get('like_cnt', {}).get('min', 0)
|
|
|
+ rule_like_cnt_max = rule_dict.get('like_cnt', {}).get('max', 100000000)
|
|
|
+ if rule_like_cnt_max == 0:
|
|
|
+ rule_like_cnt_max = 100000000
|
|
|
+
|
|
|
+ rule_width_min = rule_dict.get('width', {}).get('min', 0)
|
|
|
+ rule_width_max = rule_dict.get('width', {}).get('max', 100000000)
|
|
|
+ if rule_width_max == 0:
|
|
|
+ rule_width_max = 100000000
|
|
|
+
|
|
|
+ rule_height_min = rule_dict.get('height', {}).get('min', 0)
|
|
|
+ rule_height_max = rule_dict.get('height', {}).get('max', 100000000)
|
|
|
+ if rule_height_max == 0:
|
|
|
+ rule_height_max = 100000000
|
|
|
+
|
|
|
+ rule_share_cnt_min = rule_dict.get('share_cnt', {}).get('min', 0)
|
|
|
+ rule_share_cnt_max = rule_dict.get('share_cnt', {}).get('max', 100000000)
|
|
|
+ if rule_share_cnt_max == 0:
|
|
|
+ rule_share_cnt_max = 100000000
|
|
|
+
|
|
|
+ rule_comment_cnt_min = rule_dict.get('comment_cnt', {}).get('min', 0)
|
|
|
+ rule_comment_cnt_max = rule_dict.get('comment_cnt', {}).get('max', 100000000)
|
|
|
+ if rule_comment_cnt_max == 0:
|
|
|
+ rule_comment_cnt_max = 100000000
|
|
|
+
|
|
|
+ rule_publish_time_min = rule_dict.get('publish_time', {}).get('min', 0)
|
|
|
+ rule_publish_time_max = rule_dict.get('publish_time', {}).get('max', 100000000)
|
|
|
+ if rule_publish_time_max == 0:
|
|
|
+ rule_publish_time_max = 4102415999000 # 2099-12-31 23:59:59
|
|
|
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
- f'rule_playCnt_max:{int(rule_playCnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_playCnt_min:{int(rule_playCnt_min)}')
|
|
|
+ f'rule_play_cnt_max:{int(rule_play_cnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_play_cnt_min:{int(rule_play_cnt_min)}')
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
- f'rule_like_max:{int(rule_like_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_min:{int(rule_like_min)}')
|
|
|
+ f'rule_like_cnt_max:{int(rule_like_cnt_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_cnt_min:{int(rule_like_cnt_min)}')
|
|
|
+ Common.logger(log_type, crawler).info(
|
|
|
+ f'rule_comment_cnt_max:{int(rule_comment_cnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_comment_cnt_min:{int(rule_comment_cnt_min)}')
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
- f'rule_commentCnt_max:{int(rule_commentCnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_commentCnt_min:{int(rule_commentCnt_min)}')
|
|
|
+ f'rule_share_cnt_max:{int(rule_share_cnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_share_cnt_min:{int(rule_share_cnt_min)}')
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
- f'rule_shareCnt_max:{int(rule_shareCnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_shareCnt_min:{int(rule_shareCnt_min)}')
|
|
|
+ f'rule_width_max:{int(rule_width_max)} >= video_width:{int(video_dict["video_width"])} >= rule_width_min:{int(rule_width_min)}')
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
- f'rule_videoWidth_max:{int(rule_videoWidth_max)} >= video_width:{int(video_dict["video_width"])} >= rule_videoWidth_min:{int(rule_videoWidth_min)}')
|
|
|
+ f'rule_height_max:{int(rule_height_max)} >= video_height:{int(video_dict["video_height"])} >= rule_height_min:{int(rule_height_min)}')
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
- f'rule_videoHeight_max:{int(rule_videoHeight_max)} >= video_height:{int(video_dict["video_height"])} >= rule_videoHeight_min:{int(rule_videoHeight_min)}')
|
|
|
+ f'rule_publish_time_max:{int(rule_publish_time_max)} >= publish_time_stamp:{int(video_dict["publish_time_stamp"])} >= rule_publish_time_min:{int(rule_publish_time_min)}')
|
|
|
|
|
|
if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
|
|
|
- and int(rule_playCnt_max) >= int(video_dict['play_cnt']) >= int(rule_playCnt_min) \
|
|
|
+ and int(rule_play_cnt_max) >= int(video_dict['play_cnt']) >= int(rule_play_cnt_min) \
|
|
|
and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min) \
|
|
|
- and int(rule_like_max) >= int(video_dict['like_cnt']) >= int(rule_like_min) \
|
|
|
- and int(rule_commentCnt_max) >= int(video_dict['comment_cnt']) >= int(rule_commentCnt_min) \
|
|
|
- and int(rule_shareCnt_max) >= int(video_dict['share_cnt']) >= int(rule_shareCnt_min) \
|
|
|
- and int(rule_videoWidth_max) >= int(video_dict['video_width']) >= int(rule_videoWidth_min) \
|
|
|
- and int(rule_videoHeight_max) >= int(video_dict['video_height']) >= int(rule_videoHeight_min):
|
|
|
+ and int(rule_like_cnt_max) >= int(video_dict['like_cnt']) >= int(rule_like_cnt_min) \
|
|
|
+ and int(rule_comment_cnt_max) >= int(video_dict['comment_cnt']) >= int(rule_comment_cnt_min) \
|
|
|
+ and int(rule_share_cnt_max) >= int(video_dict['share_cnt']) >= int(rule_share_cnt_min) \
|
|
|
+ and int(rule_width_max) >= int(video_dict['video_width']) >= int(rule_width_min) \
|
|
|
+ and int(rule_height_max) >= int(video_dict['video_height']) >= int(rule_height_min) \
|
|
|
+ and int(rule_publish_time_max) >= int(video_dict['publish_time_stamp']) >= int(rule_publish_time_min):
|
|
|
return True
|
|
|
else:
|
|
|
return False
|
|
@@ -139,7 +154,7 @@ class XiaoniangaoHourScheduling:
|
|
|
# 获取列表
|
|
|
@classmethod
|
|
|
def get_videoList(cls, log_type, crawler, rule_dict, env):
|
|
|
- uid_token_dict = cls.get_uid_token()
|
|
|
+ uid_token_dict = cls.uid_token_dict
|
|
|
url = "https://kapi.xiaoniangao.cn/trends/get_recommend_trends"
|
|
|
headers = {
|
|
|
"x-b3-traceid": '1c403a4aa72e3c',
|
|
@@ -334,7 +349,7 @@ class XiaoniangaoHourScheduling:
|
|
|
|
|
|
@classmethod
|
|
|
def get_video_info(cls, log_type, crawler, p_id, p_mid, v_title, v_id):
|
|
|
- uid_token_dict = cls.get_uid_token()
|
|
|
+ uid_token_dict = cls.uid_token_dict
|
|
|
url = "https://kapi.xiaoniangao.cn/profile/get_profile_by_id"
|
|
|
headers = {
|
|
|
"x-b3-traceid": '1c403a4aa72e3c',
|
|
@@ -609,9 +624,9 @@ class XiaoniangaoHourScheduling:
|
|
|
if cls.repeat_video(log_type, crawler, video_info_dict["video_id"], env) != 0:
|
|
|
Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
# 播放量大于 50000,直接下载
|
|
|
- elif int(video_info_dict["play_cnt"]) >= 50000:
|
|
|
+ elif int(video_info_dict["play_cnt"]) >= 30000:
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
- f"播放量:{video_info_dict['play_cnt']} >= 50000,满足下载规则,开始下载视频")
|
|
|
+ f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
|
|
|
cls.download(log_type=log_type,
|
|
|
crawler=crawler,
|
|
|
video_info_dict=video_info_dict,
|
|
@@ -621,10 +636,10 @@ class XiaoniangaoHourScheduling:
|
|
|
env=env)
|
|
|
|
|
|
# 上升榜判断逻辑,任意时间段上升量>=5000,连续两个时间段上升量>=2000
|
|
|
- elif int(update_video_info['ten_play_cnt']) >= 5000 or int(
|
|
|
- update_video_info['fifteen_play_cnt']) >= 5000 or int(update_video_info['twenty_play_cnt']) >= 5000:
|
|
|
+ elif int(update_video_info['ten_play_cnt']) >= 3000 or int(
|
|
|
+ update_video_info['fifteen_play_cnt']) >= 3000 or int(update_video_info['twenty_play_cnt']) >= 3000:
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
- f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 5000")
|
|
|
+ f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
|
|
|
Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
|
|
|
cls.download(log_type=log_type,
|
|
|
crawler=crawler,
|
|
@@ -634,9 +649,9 @@ class XiaoniangaoHourScheduling:
|
|
|
oss_endpoint=oss_endpoint,
|
|
|
env=env)
|
|
|
|
|
|
- elif int(update_video_info['ten_play_cnt']) >= 2000 and int(update_video_info['fifteen_play_cnt']) >= 2000:
|
|
|
+ elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['fifteen_play_cnt']) >= 1000:
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
- f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 2000")
|
|
|
+ f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
|
|
|
Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
|
|
|
cls.download(log_type=log_type,
|
|
|
crawler=crawler,
|
|
@@ -646,9 +661,9 @@ class XiaoniangaoHourScheduling:
|
|
|
oss_endpoint=oss_endpoint,
|
|
|
env=env)
|
|
|
|
|
|
- elif int(update_video_info['fifteen_play_cnt']) >= 2000 and int(update_video_info['twenty_play_cnt']) >= 2000:
|
|
|
+ elif int(update_video_info['fifteen_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
- f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 2000")
|
|
|
+ f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
|
|
|
Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
|
|
|
cls.download(log_type=log_type,
|
|
|
crawler=crawler,
|
|
@@ -658,9 +673,9 @@ class XiaoniangaoHourScheduling:
|
|
|
oss_endpoint=oss_endpoint,
|
|
|
env=env)
|
|
|
|
|
|
- elif int(update_video_info['ten_play_cnt']) >= 2000 and int(update_video_info['twenty_play_cnt']) >= 2000:
|
|
|
+ elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
|
|
|
Common.logger(log_type, crawler).info(
|
|
|
- f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 2000")
|
|
|
+ f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
|
|
|
Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
|
|
|
cls.download(log_type=log_type,
|
|
|
crawler=crawler,
|