|
@@ -11,6 +11,7 @@ import time
|
|
|
from hashlib import md5
|
|
|
import requests
|
|
|
import urllib3
|
|
|
+from common.mq import MQ
|
|
|
sys.path.append(os.getcwd())
|
|
|
from common.common import Common
|
|
|
from common.feishu import Feishu
|
|
@@ -364,6 +365,21 @@ class XiaoniangaoHourScheduling:
|
|
|
Common.logger(log_type, crawler).error(f'更新{update_video_info["video_title"]}时异常:{e}\n')
|
|
|
Common.logging(log_type, crawler, env, f'更新{update_video_info["video_title"]}时异常:{e}\n')
|
|
|
|
|
|
+ @classmethod
|
|
|
+ def send_to_mq(cls, log_type, crawler, video_info_dict, rule_dict, env):
|
|
|
+ video_info_dict["out_user_id"] = video_info_dict["profile_id"]
|
|
|
+ video_info_dict["platform"] = crawler
|
|
|
+ video_info_dict["strategy"] = log_type
|
|
|
+ video_info_dict["out_video_id"] = video_info_dict["video_id"]
|
|
|
+ video_info_dict["width"] = video_info_dict["video_width"]
|
|
|
+ video_info_dict["height"] = video_info_dict["video_height"]
|
|
|
+ video_info_dict["crawler_rule"] = json.dumps(rule_dict)
|
|
|
+ video_info_dict["user_id"] = video_info_dict["uid"]
|
|
|
+ video_info_dict["publish_time"] = video_info_dict["publish_time_str"]
|
|
|
+
|
|
|
+ mq = MQ(topic_name="topic_crawler_etl_" + env)
|
|
|
+ mq.send_msg(video_info_dict)
|
|
|
+
|
|
|
@classmethod
|
|
|
def download(cls, log_type, crawler, video_info_dict, rule_dict, our_uid, env):
|
|
|
# 下载视频
|
|
@@ -491,12 +507,13 @@ class XiaoniangaoHourScheduling:
|
|
|
elif int(video_info_dict["play_cnt"]) >= 30000:
|
|
|
Common.logger(log_type, crawler).info(f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
|
|
|
Common.logging(log_type, crawler, env, f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
|
|
|
- cls.download(log_type=log_type,
|
|
|
- crawler=crawler,
|
|
|
- video_info_dict=video_info_dict,
|
|
|
- rule_dict=rule_dict,
|
|
|
- our_uid=our_uid,
|
|
|
- env=env)
|
|
|
+ # cls.download(log_type=log_type,
|
|
|
+ # crawler=crawler,
|
|
|
+ # video_info_dict=video_info_dict,
|
|
|
+ # rule_dict=rule_dict,
|
|
|
+ # our_uid=our_uid,
|
|
|
+ # env=env)
|
|
|
+ cls.send_to_mq(log_type=log_type, crawler=crawler, video_info_dict=video_info_dict, rule_dict=rule_dict, env=env)
|
|
|
|
|
|
# 上升榜判断逻辑,任意时间段上升量>=5000,连续两个时间段上升量>=2000
|
|
|
elif int(update_video_info['ten_play_cnt']) >= 3000 or int(
|
|
@@ -505,24 +522,26 @@ class XiaoniangaoHourScheduling:
|
|
|
Common.logging(log_type, crawler, env, f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
|
|
|
Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
|
|
|
Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
|
|
|
- cls.download(log_type=log_type,
|
|
|
- crawler=crawler,
|
|
|
- video_info_dict=video_info_dict,
|
|
|
- rule_dict=rule_dict,
|
|
|
- our_uid=our_uid,
|
|
|
- env=env)
|
|
|
+ # cls.download(log_type=log_type,
|
|
|
+ # crawler=crawler,
|
|
|
+ # video_info_dict=video_info_dict,
|
|
|
+ # rule_dict=rule_dict,
|
|
|
+ # our_uid=our_uid,
|
|
|
+ # env=env)
|
|
|
+ cls.send_to_mq(log_type=log_type, crawler=crawler, video_info_dict=video_info_dict, rule_dict=rule_dict, env=env)
|
|
|
|
|
|
elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['fifteen_play_cnt']) >= 1000:
|
|
|
Common.logger(log_type, crawler).info(f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
|
|
|
Common.logging(log_type, crawler, env, f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
|
|
|
Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
|
|
|
Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
|
|
|
- cls.download(log_type=log_type,
|
|
|
- crawler=crawler,
|
|
|
- video_info_dict=video_info_dict,
|
|
|
- rule_dict=rule_dict,
|
|
|
- our_uid=our_uid,
|
|
|
- env=env)
|
|
|
+ # cls.download(log_type=log_type,
|
|
|
+ # crawler=crawler,
|
|
|
+ # video_info_dict=video_info_dict,
|
|
|
+ # rule_dict=rule_dict,
|
|
|
+ # our_uid=our_uid,
|
|
|
+ # env=env)
|
|
|
+ cls.send_to_mq(log_type=log_type, crawler=crawler, video_info_dict=video_info_dict, rule_dict=rule_dict, env=env)
|
|
|
|
|
|
elif int(update_video_info['fifteen_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
|
|
|
Common.logger(log_type, crawler).info(
|
|
@@ -530,12 +549,13 @@ class XiaoniangaoHourScheduling:
|
|
|
Common.logging(log_type, crawler, env, f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
|
|
|
Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
|
|
|
Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
|
|
|
- cls.download(log_type=log_type,
|
|
|
- crawler=crawler,
|
|
|
- video_info_dict=video_info_dict,
|
|
|
- rule_dict=rule_dict,
|
|
|
- our_uid=our_uid,
|
|
|
- env=env)
|
|
|
+ # cls.download(log_type=log_type,
|
|
|
+ # crawler=crawler,
|
|
|
+ # video_info_dict=video_info_dict,
|
|
|
+ # rule_dict=rule_dict,
|
|
|
+ # our_uid=our_uid,
|
|
|
+ # env=env)
|
|
|
+ cls.send_to_mq(log_type=log_type, crawler=crawler, video_info_dict=video_info_dict, rule_dict=rule_dict, env=env)
|
|
|
|
|
|
elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
|
|
|
Common.logger(log_type, crawler).info(
|
|
@@ -543,12 +563,13 @@ class XiaoniangaoHourScheduling:
|
|
|
Common.logging(log_type, crawler, env, f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
|
|
|
Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
|
|
|
Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
|
|
|
- cls.download(log_type=log_type,
|
|
|
- crawler=crawler,
|
|
|
- video_info_dict=video_info_dict,
|
|
|
- rule_dict=rule_dict,
|
|
|
- our_uid=our_uid,
|
|
|
- env=env)
|
|
|
+ # cls.download(log_type=log_type,
|
|
|
+ # crawler=crawler,
|
|
|
+ # video_info_dict=video_info_dict,
|
|
|
+ # rule_dict=rule_dict,
|
|
|
+ # our_uid=our_uid,
|
|
|
+ # env=env)
|
|
|
+ cls.send_to_mq(log_type=log_type, crawler=crawler, video_info_dict=video_info_dict, rule_dict=rule_dict, env=env)
|
|
|
|
|
|
else:
|
|
|
Common.logger(log_type, crawler).info("上升量不满足下载规则")
|