|  | @@ -4,7 +4,6 @@
 | 
	
		
			
				|  |  |  import os, sys
 | 
	
		
			
				|  |  |  import time
 | 
	
		
			
				|  |  |  import random
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |  sys.path.append(os.getcwd())
 | 
	
		
			
				|  |  |  from common.common import Common
 | 
	
		
			
				|  |  |  from common.scheduling_db import MysqlHelper
 | 
	
	
		
			
				|  | @@ -12,27 +11,6 @@ from common.scheduling_db import MysqlHelper
 | 
	
		
			
				|  |  |  # from scheduling_db import MysqlHelper
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -# 过滤词库
 | 
	
		
			
				|  |  | -def filter_word(log_type, crawler, source, env):
 | 
	
		
			
				|  |  | -    """
 | 
	
		
			
				|  |  | -    过滤词库
 | 
	
		
			
				|  |  | -    :param log_type: 日志
 | 
	
		
			
				|  |  | -    :param crawler: 哪款爬虫,如:xiaoniangao
 | 
	
		
			
				|  |  | -    :param source: 哪款爬虫,如:小年糕
 | 
	
		
			
				|  |  | -    :param env: 环境
 | 
	
		
			
				|  |  | -    :return: word_list
 | 
	
		
			
				|  |  | -    """
 | 
	
		
			
				|  |  | -    select_sql = f""" select * from crawler_filter_word where source="{source}" """
 | 
	
		
			
				|  |  | -    words = MysqlHelper.get_values(log_type, crawler, select_sql, env, action='')
 | 
	
		
			
				|  |  | -    word_list = []
 | 
	
		
			
				|  |  | -    if len(words) == 0:
 | 
	
		
			
				|  |  | -        return word_list
 | 
	
		
			
				|  |  | -    for word in words:
 | 
	
		
			
				|  |  | -        word_list.append(word['filter_word'])
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    return word_list
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |  def get_user_from_mysql(log_type, crawler, source, env, action=''):
 | 
	
		
			
				|  |  |      sql = f"select * from crawler_user_v3 where source='{source}' and mode='{log_type}'"
 | 
	
		
			
				|  |  |      results = MysqlHelper.get_values(log_type, crawler, sql, env, action=action)
 | 
	
	
		
			
				|  | @@ -109,89 +87,38 @@ def download_rule(log_type, crawler, video_dict, rule_dict):
 | 
	
		
			
				|  |  |      :param rule_dict: 规则信息,字典格式
 | 
	
		
			
				|  |  |      :return: 满足规则,返回 True;反之,返回 False
 | 
	
		
			
				|  |  |      """
 | 
	
		
			
				|  |  | -    rule_playCnt_min = rule_dict.get('playCnt', {}).get('min', 0)
 | 
	
		
			
				|  |  | -    rule_playCnt_max = rule_dict.get('playCnt', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | -    if rule_playCnt_max == 0:
 | 
	
		
			
				|  |  | -        rule_playCnt_max = 100000000
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    rule_duration_min = rule_dict.get('duration', {}).get('min', 0)
 | 
	
		
			
				|  |  | -    rule_duration_max = rule_dict.get('duration', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | -    if rule_duration_max == 0:
 | 
	
		
			
				|  |  | -        rule_duration_max = 100000000
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    rule_period_min = rule_dict.get('period', {}).get('min', 0)
 | 
	
		
			
				|  |  | -    # rule_period_max = rule_dict.get('period', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | -    # if rule_period_max == 0:
 | 
	
		
			
				|  |  | -    #     rule_period_max = 100000000
 | 
	
		
			
				|  |  | -    #
 | 
	
		
			
				|  |  | -    # rule_fans_min = rule_dict.get('fans', {}).get('min', 0)
 | 
	
		
			
				|  |  | -    # rule_fans_max = rule_dict.get('fans', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | -    # if rule_fans_max == 0:
 | 
	
		
			
				|  |  | -    #     rule_fans_max = 100000000
 | 
	
		
			
				|  |  | -    #
 | 
	
		
			
				|  |  | -    # rule_videos_min = rule_dict.get('videos', {}).get('min', 0)
 | 
	
		
			
				|  |  | -    # rule_videos_max = rule_dict.get('videos', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | -    # if rule_videos_max == 0:
 | 
	
		
			
				|  |  | -    #     rule_videos_max = 100000000
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    rule_like_min = rule_dict.get('like', {}).get('min', 0)
 | 
	
		
			
				|  |  | -    rule_like_max = rule_dict.get('like', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | -    if rule_like_max == 0:
 | 
	
		
			
				|  |  | -        rule_like_max = 100000000
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    rule_videoWidth_min = rule_dict.get('videoWidth', {}).get('min', 0)
 | 
	
		
			
				|  |  | -    rule_videoWidth_max = rule_dict.get('videoWidth', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | -    if rule_videoWidth_max == 0:
 | 
	
		
			
				|  |  | -        rule_videoWidth_max = 100000000
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    rule_videoHeight_min = rule_dict.get('videoHeight', {}).get('min', 0)
 | 
	
		
			
				|  |  | -    rule_videoHeight_max = rule_dict.get('videoHeight', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | -    if rule_videoHeight_max == 0:
 | 
	
		
			
				|  |  | -        rule_videoHeight_max = 100000000
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    rule_shareCnt_min = rule_dict.get('shareCnt', {}).get('min', 0)
 | 
	
		
			
				|  |  | -    rule_shareCnt_max = rule_dict.get('shareCnt', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | -    if rule_shareCnt_max == 0:
 | 
	
		
			
				|  |  | -        rule_shareCnt_max = 100000000
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    rule_commentCnt_min = rule_dict.get('commentCnt', {}).get('min', 0)
 | 
	
		
			
				|  |  | -    rule_commentCnt_max = rule_dict.get('commentCnt', {}).get('max', 100000000)
 | 
	
		
			
				|  |  | -    if rule_commentCnt_max == 0:
 | 
	
		
			
				|  |  | -        rule_commentCnt_max = 100000000
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | -        f'rule_duration_max:{rule_duration_max} >= duration:{int(float(video_dict["duration"]))} >= rule_duration_min:{int(rule_duration_min)}')
 | 
	
		
			
				|  |  | -    Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | -        f'rule_playCnt_max:{int(rule_playCnt_max)} >= play_cnt:{int(video_dict["play_cnt"])} >= rule_playCnt_min:{int(rule_playCnt_min)}')
 | 
	
		
			
				|  |  | -    Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | -        f'now:{int(time.time())} - publish_time_stamp:{int(video_dict["publish_time_stamp"])} <= {3600 * 24 * int(rule_period_min)}')
 | 
	
		
			
				|  |  | -    Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | -        f'rule_like_max:{int(rule_like_max)} >= like_cnt:{int(video_dict["like_cnt"])} >= rule_like_min:{int(rule_like_min)}')
 | 
	
		
			
				|  |  | -    Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | -        f'rule_commentCnt_max:{int(rule_commentCnt_max)} >= comment_cnt:{int(video_dict["comment_cnt"])} >= rule_commentCnt_min:{int(rule_commentCnt_min)}')
 | 
	
		
			
				|  |  | -    Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | -        f'rule_shareCnt_max:{int(rule_shareCnt_max)} >= share_cnt:{int(video_dict["share_cnt"])} >= rule_shareCnt_min:{int(rule_shareCnt_min)}')
 | 
	
		
			
				|  |  | -    Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | -        f'rule_videoWidth_max:{int(rule_videoWidth_max)} >= video_width:{int(video_dict["video_width"])} >= rule_videoWidth_min:{int(rule_videoWidth_min)}')
 | 
	
		
			
				|  |  | -    Common.logger(log_type, crawler).info(
 | 
	
		
			
				|  |  | -        f'rule_videoHeight_max:{int(rule_videoHeight_max)} >= video_height:{int(video_dict["video_height"])} >= rule_videoHeight_min:{int(rule_videoHeight_min)}')
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -    if int(rule_duration_max) >= int(float(video_dict["duration"])) >= int(rule_duration_min) \
 | 
	
		
			
				|  |  | -            and int(rule_playCnt_max) >= int(video_dict['play_cnt']) >= int(rule_playCnt_min) \
 | 
	
		
			
				|  |  | -            and int(time.time()) - int(video_dict["publish_time_stamp"]) <= 3600 * 24 * int(rule_period_min) \
 | 
	
		
			
				|  |  | -            and int(rule_like_max) >= int(video_dict['like_cnt']) >= int(rule_like_min) \
 | 
	
		
			
				|  |  | -            and int(rule_commentCnt_max) >= int(video_dict['comment_cnt']) >= int(rule_commentCnt_min) \
 | 
	
		
			
				|  |  | -            and int(rule_shareCnt_max) >= int(video_dict['share_cnt']) >= int(rule_shareCnt_min) \
 | 
	
		
			
				|  |  | -            and int(rule_videoWidth_max) >= int(video_dict['video_width']) >= int(rule_videoWidth_min) \
 | 
	
		
			
				|  |  | -            and int(rule_videoHeight_max) >= int(video_dict['video_height']) >= int(rule_videoHeight_min):
 | 
	
		
			
				|  |  | -        return True
 | 
	
		
			
				|  |  | -    else:
 | 
	
		
			
				|  |  | -        return False
 | 
	
		
			
				|  |  | +    # 格式化 video_dict:publish_time_stamp
 | 
	
		
			
				|  |  | +    if "publish_time_stamp" in video_dict.keys():
 | 
	
		
			
				|  |  | +        video_dict["publish_time"] = video_dict["publish_time_stamp"] * 1000
 | 
	
		
			
				|  |  | +    # 格式化 video_dict:period
 | 
	
		
			
				|  |  | +    if "period" not in video_dict.keys() and "publish_time" in video_dict.keys():
 | 
	
		
			
				|  |  | +        video_dict["period"] = int((int(time.time() * 1000) - video_dict["publish_time"]) / (3600 * 24 * 1000))
 | 
	
		
			
				|  |  | +    # 格式化 rule_dict 最大值取值为 0 的问题
 | 
	
		
			
				|  |  | +    for rule_value in rule_dict.values():
 | 
	
		
			
				|  |  | +        if rule_value["max"] == 0:
 | 
	
		
			
				|  |  | +            rule_value["max"] = 999999999999999
 | 
	
		
			
				|  |  | +    # 格式化 rule_dict 有的 key,video_dict 中没有的问题
 | 
	
		
			
				|  |  | +    for rule_key in rule_dict.keys():
 | 
	
		
			
				|  |  | +        if rule_key not in video_dict.keys():
 | 
	
		
			
				|  |  | +            video_dict[rule_key] = int(rule_dict[rule_key]["max"] / 2)
 | 
	
		
			
				|  |  | +    # 比较结果,输出:True / False
 | 
	
		
			
				|  |  | +    for video_key, video_value in video_dict.items():
 | 
	
		
			
				|  |  | +        for rule_key, rule_value in rule_dict.items():
 | 
	
		
			
				|  |  | +            if video_key == rule_key:
 | 
	
		
			
				|  |  | +                result = rule_value["min"] <= video_value <= rule_value["max"]
 | 
	
		
			
				|  |  | +                # print(f'{video_key}: {rule_value["min"]} <= {video_value} <= {rule_value["max"]},{result}')
 | 
	
		
			
				|  |  | +                Common.logger(log_type, crawler).info(f'{video_key}: {rule_value["min"]} <= {video_value} <= {rule_value["max"]},{result}')
 | 
	
		
			
				|  |  | +                if result is False:
 | 
	
		
			
				|  |  | +                    return False
 | 
	
		
			
				|  |  | +                else:
 | 
	
		
			
				|  |  | +                    continue
 | 
	
		
			
				|  |  | +    return True
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  if __name__ == "__main__":
 | 
	
		
			
				|  |  |      # print(filter_word('public', 'xiaoniangao', '小年糕', 'prod'))
 | 
	
		
			
				|  |  | -    print(get_config_from_mysql('hour', 'xiaoniangao', 'prod', 'emoji'))
 | 
	
		
			
				|  |  | +    # print(get_config_from_mysql('test', 'gongzhonghao', 'prod', 'filter'))
 | 
	
		
			
				|  |  | +    # print(filter_word('test', 'gongzhonghao', '公众号', 'prod'))
 | 
	
		
			
				|  |  |      # task_str = "[('task_id','11')," \
 | 
	
		
			
				|  |  |      #            "('task_name','小年糕小时榜')," \
 | 
	
		
			
				|  |  |      #            "('source','xiaoniangao')," \
 |