|
@@ -230,6 +230,40 @@ def download_rule(log_type, crawler, video_dict, rule_dict):
|
|
return True
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
+def download_rule_v2(log_type, crawler, video_dict, rule_dict):
|
|
|
|
+ """
|
|
|
|
+ 下载视频的基本规则
|
|
|
|
+ :param log_type: 日志
|
|
|
|
+ :param crawler: 哪款爬虫
|
|
|
|
+ :param video_dict: 视频信息,字典格式
|
|
|
|
+ :param rule_dict: 规则信息,字典格式
|
|
|
|
+ :return: 满足规则,返回 True;反之,返回 False
|
|
|
|
+ """
|
|
|
|
+ # 格式化 video_dict:publish_time_stamp
|
|
|
|
+ if video_dict.get("publish_time_stamp"):
|
|
|
|
+ video_dict["publish_time"] = video_dict["publish_time_stamp"] * 1000
|
|
|
|
+ # 格式化 video_dict:period
|
|
|
|
+ if video_dict.get("publish_time") and video_dict.get("period", "noperiod") == "noperiod":
|
|
|
|
+ video_dict["period"] = int((int(time.time() * 1000) - video_dict["publish_time"]) / (3600 * 24 * 1000))
|
|
|
|
+ # 格式化 rule_dict 最大值取值为 0 的问题
|
|
|
|
+ for key in video_dict:
|
|
|
|
+ if rule_dict.get(key):
|
|
|
|
+ max_value = int(rule_dict[key]["max"]) if int(rule_dict[key]["max"]) > 0 else 999999999999999
|
|
|
|
+ if key == "peroid":
|
|
|
|
+ flag = 0 <= int(video_dict[key]) <= max_value
|
|
|
|
+ Common.logger(log_type, crawler).info('{}: 0 <= {} <= {}, {}'.format(key, video_dict[key], max_value, flag))
|
|
|
|
+ if not flag:
|
|
|
|
+ return flag
|
|
|
|
+ else:
|
|
|
|
+ flag = int(rule_dict[key]["min"]) <= int(video_dict[key] <= max_value)
|
|
|
|
+ Common.logger(log_type, crawler).info('{}: {} <= {} <= {}, {}'.format(key, rule_dict[key]["min"],video_dict[key], max_value, flag))
|
|
|
|
+ if not flag:
|
|
|
|
+ return flag
|
|
|
|
+ else:
|
|
|
|
+ continue
|
|
|
|
+ return True
|
|
|
|
+
|
|
|
|
+
|
|
def get_word_score(log_type, crawler, score_sheet, word):
|
|
def get_word_score(log_type, crawler, score_sheet, word):
|
|
while True:
|
|
while True:
|
|
score_sheet = Feishu.get_values_batch(log_type, crawler, score_sheet)
|
|
score_sheet = Feishu.get_values_batch(log_type, crawler, score_sheet)
|