|  | @@ -7,6 +7,7 @@
 | 
	
		
			
				|  |  |  import multiprocessing
 | 
	
		
			
				|  |  |  import os
 | 
	
		
			
				|  |  |  import sys
 | 
	
		
			
				|  |  | +import time
 | 
	
		
			
				|  |  |  import traceback
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  import gevent
 | 
	
	
		
			
				|  | @@ -802,90 +803,93 @@ def dup_to_redis_with_timecheck(h_video_ids, now_date, now_h, rule_key, h_rule_k
 | 
	
		
			
				|  |  |      """将地域分组小时级数据与其他召回视频池去重,存入对应的redis"""
 | 
	
		
			
				|  |  |      # 获取并判断其他数据表更新状态
 | 
	
		
			
				|  |  |      redis_helper = RedisHelper()
 | 
	
		
			
				|  |  | -    rule_24h_status = redis_helper.get_data_from_redis(key_name=config_.RULE_24H_DATA_STATUS)
 | 
	
		
			
				|  |  | -    region_24h_status = redis_helper.get_data_from_redis(key_name=config_.REGION_24H_DATA_STATUS)
 | 
	
		
			
				|  |  | -    rule_h_status = redis_helper.get_data_from_redis(key_name=config_.RULE_H_DATA_STATUS)
 | 
	
		
			
				|  |  | -    if rule_24h_status == '1' and region_24h_status == '1' and rule_h_status == '1':
 | 
	
		
			
				|  |  | -        log_.info("dup data start ....")
 | 
	
		
			
				|  |  | -        # ##### 去重更新不区分地域小时级列表,并另存为redis中
 | 
	
		
			
				|  |  | -        if h_rule_key is not None:
 | 
	
		
			
				|  |  | -            h_key_name = \
 | 
	
		
			
				|  |  | -                f"{config_.RECALL_KEY_NAME_PREFIX_BY_H_H}{data_key}:{h_rule_key}:" \
 | 
	
		
			
				|  |  | +    while True:
 | 
	
		
			
				|  |  | +        rule_24h_status = redis_helper.get_data_from_redis(key_name=config_.RULE_24H_DATA_STATUS)
 | 
	
		
			
				|  |  | +        region_24h_status = redis_helper.get_data_from_redis(key_name=config_.REGION_24H_DATA_STATUS)
 | 
	
		
			
				|  |  | +        rule_h_status = redis_helper.get_data_from_redis(key_name=config_.RULE_H_DATA_STATUS)
 | 
	
		
			
				|  |  | +        if rule_24h_status == '1' and region_24h_status == '1' and rule_h_status == '1':
 | 
	
		
			
				|  |  | +            log_.info("dup data start ....")
 | 
	
		
			
				|  |  | +            # ##### 去重更新不区分地域小时级列表,并另存为redis中
 | 
	
		
			
				|  |  | +            if h_rule_key is not None:
 | 
	
		
			
				|  |  | +                h_key_name = \
 | 
	
		
			
				|  |  | +                    f"{config_.RECALL_KEY_NAME_PREFIX_BY_H_H}{data_key}:{h_rule_key}:" \
 | 
	
		
			
				|  |  | +                    f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | +                h_dup_key_name = \
 | 
	
		
			
				|  |  | +                    f"{config_.RECALL_KEY_NAME_PREFIX_DUP_H_H}{region}:{data_key}:{rule_key}:" \
 | 
	
		
			
				|  |  | +                    f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | +                h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_key_name,
 | 
	
		
			
				|  |  | +                                       dup_key_name=h_dup_key_name, region=region, political_filter=political_filter,
 | 
	
		
			
				|  |  | +                                       shield_config=shield_config, dup_remove=dup_remove)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            # ##### 去重更新地域分组小时级24h列表,并另存为redis中
 | 
	
		
			
				|  |  | +            region_24h_key_name = \
 | 
	
		
			
				|  |  | +                f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_24H}{region}:{data_key}:{region_24h_rule_key}:" \
 | 
	
		
			
				|  |  |                  f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | -            h_dup_key_name = \
 | 
	
		
			
				|  |  | -                f"{config_.RECALL_KEY_NAME_PREFIX_DUP_H_H}{region}:{data_key}:{rule_key}:" \
 | 
	
		
			
				|  |  | +            region_24h_dup_key_name = \
 | 
	
		
			
				|  |  | +                f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
 | 
	
		
			
				|  |  |                  f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | -            h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_key_name,
 | 
	
		
			
				|  |  | -                                   dup_key_name=h_dup_key_name, region=region, political_filter=political_filter,
 | 
	
		
			
				|  |  | +            h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=region_24h_key_name,
 | 
	
		
			
				|  |  | +                                   dup_key_name=region_24h_dup_key_name, region=region, political_filter=political_filter,
 | 
	
		
			
				|  |  |                                     shield_config=shield_config, dup_remove=dup_remove)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        # ##### 去重更新地域分组小时级24h列表,并另存为redis中
 | 
	
		
			
				|  |  | -        region_24h_key_name = \
 | 
	
		
			
				|  |  | -            f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_24H}{region}:{data_key}:{region_24h_rule_key}:" \
 | 
	
		
			
				|  |  | -            f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | -        region_24h_dup_key_name = \
 | 
	
		
			
				|  |  | -            f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
 | 
	
		
			
				|  |  | -            f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | -        h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=region_24h_key_name,
 | 
	
		
			
				|  |  | -                               dup_key_name=region_24h_dup_key_name, region=region, political_filter=political_filter,
 | 
	
		
			
				|  |  | -                               shield_config=shield_config, dup_remove=dup_remove)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        if rule_rank_h_flag == '48h':
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -            # ##### 去重小程序相对48h更新结果,并另存为redis中
 | 
	
		
			
				|  |  | -            h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H}{data_key}:{by_48h_rule_key}:" \
 | 
	
		
			
				|  |  | -                             f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | -            h_48h_dup_key_name = \
 | 
	
		
			
				|  |  | -                f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
 | 
	
		
			
				|  |  | -                f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | -            h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_48h_key_name,
 | 
	
		
			
				|  |  | -                                   dup_key_name=h_48h_dup_key_name, region=region, political_filter=political_filter,
 | 
	
		
			
				|  |  | -                                   shield_config=shield_config, dup_remove=dup_remove)
 | 
	
		
			
				|  |  | +            if rule_rank_h_flag == '48h':
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -            # ##### 去重小程序相对48h 筛选后剩余数据 更新结果,并另存为redis中
 | 
	
		
			
				|  |  | -            if by_48h_rule_key == 'rule1':
 | 
	
		
			
				|  |  | -                other_h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H_OTHER}{data_key}:" \
 | 
	
		
			
				|  |  | -                                       f"{by_48h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | -                other_h_48h_dup_key_name = \
 | 
	
		
			
				|  |  | -                    f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
 | 
	
		
			
				|  |  | +                # ##### 去重小程序相对48h更新结果,并另存为redis中
 | 
	
		
			
				|  |  | +                h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H}{data_key}:{by_48h_rule_key}:" \
 | 
	
		
			
				|  |  | +                                 f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | +                h_48h_dup_key_name = \
 | 
	
		
			
				|  |  | +                    f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
 | 
	
		
			
				|  |  |                      f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | -                h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_48h_key_name,
 | 
	
		
			
				|  |  | -                                       dup_key_name=other_h_48h_dup_key_name, region=region,
 | 
	
		
			
				|  |  | -                                       political_filter=political_filter, shield_config=shield_config,
 | 
	
		
			
				|  |  | -                                       dup_remove=dup_remove)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | +                h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_48h_key_name,
 | 
	
		
			
				|  |  | +                                       dup_key_name=h_48h_dup_key_name, region=region, political_filter=political_filter,
 | 
	
		
			
				|  |  | +                                       shield_config=shield_config, dup_remove=dup_remove)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                # ##### 去重小程序相对48h 筛选后剩余数据 更新结果,并另存为redis中
 | 
	
		
			
				|  |  | +                if by_48h_rule_key == 'rule1':
 | 
	
		
			
				|  |  | +                    other_h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H_OTHER}{data_key}:" \
 | 
	
		
			
				|  |  | +                                           f"{by_48h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | +                    other_h_48h_dup_key_name = \
 | 
	
		
			
				|  |  | +                        f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
 | 
	
		
			
				|  |  | +                        f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | +                    h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_48h_key_name,
 | 
	
		
			
				|  |  | +                                           dup_key_name=other_h_48h_dup_key_name, region=region,
 | 
	
		
			
				|  |  | +                                           political_filter=political_filter, shield_config=shield_config,
 | 
	
		
			
				|  |  | +                                           dup_remove=dup_remove)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            else:
 | 
	
		
			
				|  |  | +                # ##### 去重小程序相对24h更新结果,并另存为redis中
 | 
	
		
			
				|  |  | +                h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{data_key}:{by_24h_rule_key}:" \
 | 
	
		
			
				|  |  | +                                 f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | +                h_24h_dup_key_name = \
 | 
	
		
			
				|  |  | +                    f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
 | 
	
		
			
				|  |  | +                    f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | +                h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_24h_key_name,
 | 
	
		
			
				|  |  | +                                       dup_key_name=h_24h_dup_key_name, region=region, political_filter=political_filter,
 | 
	
		
			
				|  |  | +                                       shield_config=shield_config, dup_remove=dup_remove)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +                # ##### 去重小程序相对24h 筛选后剩余数据 更新结果,并另存为redis中
 | 
	
		
			
				|  |  | +                # if by_24h_rule_key in ['rule3', 'rule4']:
 | 
	
		
			
				|  |  | +                other_h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{data_key}:" \
 | 
	
		
			
				|  |  | +                                       f"{by_24h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | +                other_h_24h_dup_key_name = \
 | 
	
		
			
				|  |  | +                    f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
 | 
	
		
			
				|  |  | +                    f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | +                h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_24h_key_name,
 | 
	
		
			
				|  |  | +                                       dup_key_name=other_h_24h_dup_key_name, region=region,
 | 
	
		
			
				|  |  | +                                       political_filter=political_filter,
 | 
	
		
			
				|  |  | +                                       shield_config=shield_config, dup_remove=dup_remove)
 | 
	
		
			
				|  |  | +            break
 | 
	
		
			
				|  |  |          else:
 | 
	
		
			
				|  |  | -            # ##### 去重小程序相对24h更新结果,并另存为redis中
 | 
	
		
			
				|  |  | -            h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{data_key}:{by_24h_rule_key}:" \
 | 
	
		
			
				|  |  | -                             f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | -            h_24h_dup_key_name = \
 | 
	
		
			
				|  |  | -                f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
 | 
	
		
			
				|  |  | -                f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | -            h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_24h_key_name,
 | 
	
		
			
				|  |  | -                                   dup_key_name=h_24h_dup_key_name, region=region, political_filter=political_filter,
 | 
	
		
			
				|  |  | -                                   shield_config=shield_config, dup_remove=dup_remove)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -            # ##### 去重小程序相对24h 筛选后剩余数据 更新结果,并另存为redis中
 | 
	
		
			
				|  |  | -            # if by_24h_rule_key in ['rule3', 'rule4']:
 | 
	
		
			
				|  |  | -            other_h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{data_key}:" \
 | 
	
		
			
				|  |  | -                                   f"{by_24h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | -            other_h_24h_dup_key_name = \
 | 
	
		
			
				|  |  | -                f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
 | 
	
		
			
				|  |  | -                f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
 | 
	
		
			
				|  |  | -            h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_24h_key_name,
 | 
	
		
			
				|  |  | -                                   dup_key_name=other_h_24h_dup_key_name, region=region,
 | 
	
		
			
				|  |  | -                                   political_filter=political_filter,
 | 
	
		
			
				|  |  | -                                   shield_config=shield_config, dup_remove=dup_remove)
 | 
	
		
			
				|  |  | -    else:
 | 
	
		
			
				|  |  | -        # 数据没准备好,1分钟后重新检查
 | 
	
		
			
				|  |  | -        log_.info("dup data wait ....")
 | 
	
		
			
				|  |  | -        Timer(
 | 
	
		
			
				|  |  | -            60,
 | 
	
		
			
				|  |  | -            dup_to_redis_with_timecheck,
 | 
	
		
			
				|  |  | -            args=[h_video_ids, now_date, now_h, rule_key, h_rule_key, region_24h_rule_key,
 | 
	
		
			
				|  |  | -                  by_24h_rule_key, by_48h_rule_key, region, data_key, rule_rank_h_flag,
 | 
	
		
			
				|  |  | -                  political_filter, shield_config, dup_remove]
 | 
	
		
			
				|  |  | -        ).start()
 | 
	
		
			
				|  |  | +            # 数据没准备好,1分钟后重新检查
 | 
	
		
			
				|  |  | +            log_.info("dup data wait ....")
 | 
	
		
			
				|  |  | +            time.sleep(60)
 | 
	
		
			
				|  |  | +            # Timer(
 | 
	
		
			
				|  |  | +            #     60,
 | 
	
		
			
				|  |  | +            #     dup_to_redis_with_timecheck,
 | 
	
		
			
				|  |  | +            #     args=[h_video_ids, now_date, now_h, rule_key, h_rule_key, region_24h_rule_key,
 | 
	
		
			
				|  |  | +            #           by_24h_rule_key, by_48h_rule_key, region, data_key, rule_rank_h_flag,
 | 
	
		
			
				|  |  | +            #           political_filter, shield_config, dup_remove]
 | 
	
		
			
				|  |  | +            # ).start()
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 |