|
@@ -7,6 +7,7 @@
|
|
|
import multiprocessing
|
|
|
import os
|
|
|
import sys
|
|
|
+import time
|
|
|
import traceback
|
|
|
|
|
|
import gevent
|
|
@@ -802,90 +803,93 @@ def dup_to_redis_with_timecheck(h_video_ids, now_date, now_h, rule_key, h_rule_k
|
|
|
"""将地域分组小时级数据与其他召回视频池去重,存入对应的redis"""
|
|
|
# 获取并判断其他数据表更新状态
|
|
|
redis_helper = RedisHelper()
|
|
|
- rule_24h_status = redis_helper.get_data_from_redis(key_name=config_.RULE_24H_DATA_STATUS)
|
|
|
- region_24h_status = redis_helper.get_data_from_redis(key_name=config_.REGION_24H_DATA_STATUS)
|
|
|
- rule_h_status = redis_helper.get_data_from_redis(key_name=config_.RULE_H_DATA_STATUS)
|
|
|
- if rule_24h_status == '1' and region_24h_status == '1' and rule_h_status == '1':
|
|
|
- log_.info("dup data start ....")
|
|
|
- # ##### 去重更新不区分地域小时级列表,并另存为redis中
|
|
|
- if h_rule_key is not None:
|
|
|
- h_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_BY_H_H}{data_key}:{h_rule_key}:" \
|
|
|
+ while True:
|
|
|
+ rule_24h_status = redis_helper.get_data_from_redis(key_name=config_.RULE_24H_DATA_STATUS)
|
|
|
+ region_24h_status = redis_helper.get_data_from_redis(key_name=config_.REGION_24H_DATA_STATUS)
|
|
|
+ rule_h_status = redis_helper.get_data_from_redis(key_name=config_.RULE_H_DATA_STATUS)
|
|
|
+ if rule_24h_status == '1' and region_24h_status == '1' and rule_h_status == '1':
|
|
|
+ log_.info("dup data start ....")
|
|
|
+ # ##### 去重更新不区分地域小时级列表,并另存为redis中
|
|
|
+ if h_rule_key is not None:
|
|
|
+ h_key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_BY_H_H}{data_key}:{h_rule_key}:" \
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
+ h_dup_key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP_H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
+ h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_key_name,
|
|
|
+ dup_key_name=h_dup_key_name, region=region, political_filter=political_filter,
|
|
|
+ shield_config=shield_config, dup_remove=dup_remove)
|
|
|
+
|
|
|
+ # ##### 去重更新地域分组小时级24h列表,并另存为redis中
|
|
|
+ region_24h_key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_24H}{region}:{data_key}:{region_24h_rule_key}:" \
|
|
|
f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- h_dup_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_DUP_H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
+ region_24h_dup_key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_key_name,
|
|
|
- dup_key_name=h_dup_key_name, region=region, political_filter=political_filter,
|
|
|
+ h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=region_24h_key_name,
|
|
|
+ dup_key_name=region_24h_dup_key_name, region=region, political_filter=political_filter,
|
|
|
shield_config=shield_config, dup_remove=dup_remove)
|
|
|
|
|
|
- # ##### 去重更新地域分组小时级24h列表,并另存为redis中
|
|
|
- region_24h_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_24H}{region}:{data_key}:{region_24h_rule_key}:" \
|
|
|
- f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- region_24h_dup_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
- f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=region_24h_key_name,
|
|
|
- dup_key_name=region_24h_dup_key_name, region=region, political_filter=political_filter,
|
|
|
- shield_config=shield_config, dup_remove=dup_remove)
|
|
|
-
|
|
|
- if rule_rank_h_flag == '48h':
|
|
|
-
|
|
|
- # ##### 去重小程序相对48h更新结果,并另存为redis中
|
|
|
- h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H}{data_key}:{by_48h_rule_key}:" \
|
|
|
- f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- h_48h_dup_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
- f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_48h_key_name,
|
|
|
- dup_key_name=h_48h_dup_key_name, region=region, political_filter=political_filter,
|
|
|
- shield_config=shield_config, dup_remove=dup_remove)
|
|
|
+ if rule_rank_h_flag == '48h':
|
|
|
|
|
|
- # ##### 去重小程序相对48h 筛选后剩余数据 更新结果,并另存为redis中
|
|
|
- if by_48h_rule_key == 'rule1':
|
|
|
- other_h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H_OTHER}{data_key}:" \
|
|
|
- f"{by_48h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- other_h_48h_dup_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
+ # ##### 去重小程序相对48h更新结果,并另存为redis中
|
|
|
+ h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H}{data_key}:{by_48h_rule_key}:" \
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
+ h_48h_dup_key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_48h_key_name,
|
|
|
- dup_key_name=other_h_48h_dup_key_name, region=region,
|
|
|
- political_filter=political_filter, shield_config=shield_config,
|
|
|
- dup_remove=dup_remove)
|
|
|
-
|
|
|
+ h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_48h_key_name,
|
|
|
+ dup_key_name=h_48h_dup_key_name, region=region, political_filter=political_filter,
|
|
|
+ shield_config=shield_config, dup_remove=dup_remove)
|
|
|
+
|
|
|
+ # ##### 去重小程序相对48h 筛选后剩余数据 更新结果,并另存为redis中
|
|
|
+ if by_48h_rule_key == 'rule1':
|
|
|
+ other_h_48h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_48H_OTHER}{data_key}:" \
|
|
|
+ f"{by_48h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
+ other_h_48h_dup_key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_48H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
+ h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_48h_key_name,
|
|
|
+ dup_key_name=other_h_48h_dup_key_name, region=region,
|
|
|
+ political_filter=political_filter, shield_config=shield_config,
|
|
|
+ dup_remove=dup_remove)
|
|
|
+
|
|
|
+ else:
|
|
|
+ # ##### 去重小程序相对24h更新结果,并另存为redis中
|
|
|
+ h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{data_key}:{by_24h_rule_key}:" \
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
+ h_24h_dup_key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
+ h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_24h_key_name,
|
|
|
+ dup_key_name=h_24h_dup_key_name, region=region, political_filter=political_filter,
|
|
|
+ shield_config=shield_config, dup_remove=dup_remove)
|
|
|
+
|
|
|
+ # ##### 去重小程序相对24h 筛选后剩余数据 更新结果,并另存为redis中
|
|
|
+ # if by_24h_rule_key in ['rule3', 'rule4']:
|
|
|
+ other_h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{data_key}:" \
|
|
|
+ f"{by_24h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
+ other_h_24h_dup_key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
+ h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_24h_key_name,
|
|
|
+ dup_key_name=other_h_24h_dup_key_name, region=region,
|
|
|
+ political_filter=political_filter,
|
|
|
+ shield_config=shield_config, dup_remove=dup_remove)
|
|
|
+ break
|
|
|
else:
|
|
|
- # ##### 去重小程序相对24h更新结果,并另存为redis中
|
|
|
- h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{data_key}:{by_24h_rule_key}:" \
|
|
|
- f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- h_24h_dup_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
- f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=h_24h_key_name,
|
|
|
- dup_key_name=h_24h_dup_key_name, region=region, political_filter=political_filter,
|
|
|
- shield_config=shield_config, dup_remove=dup_remove)
|
|
|
-
|
|
|
- # ##### 去重小程序相对24h 筛选后剩余数据 更新结果,并另存为redis中
|
|
|
- # if by_24h_rule_key in ['rule3', 'rule4']:
|
|
|
- other_h_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{data_key}:" \
|
|
|
- f"{by_24h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- other_h_24h_dup_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{data_key}:{rule_key}:" \
|
|
|
- f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=other_h_24h_key_name,
|
|
|
- dup_key_name=other_h_24h_dup_key_name, region=region,
|
|
|
- political_filter=political_filter,
|
|
|
- shield_config=shield_config, dup_remove=dup_remove)
|
|
|
- else:
|
|
|
- # 数据没准备好,1分钟后重新检查
|
|
|
- log_.info("dup data wait ....")
|
|
|
- Timer(
|
|
|
- 60,
|
|
|
- dup_to_redis_with_timecheck,
|
|
|
- args=[h_video_ids, now_date, now_h, rule_key, h_rule_key, region_24h_rule_key,
|
|
|
- by_24h_rule_key, by_48h_rule_key, region, data_key, rule_rank_h_flag,
|
|
|
- political_filter, shield_config, dup_remove]
|
|
|
- ).start()
|
|
|
+ # 数据没准备好,1分钟后重新检查
|
|
|
+ log_.info("dup data wait ....")
|
|
|
+ time.sleep(60)
|
|
|
+ # Timer(
|
|
|
+ # 60,
|
|
|
+ # dup_to_redis_with_timecheck,
|
|
|
+ # args=[h_video_ids, now_date, now_h, rule_key, h_rule_key, region_24h_rule_key,
|
|
|
+ # by_24h_rule_key, by_48h_rule_key, region, data_key, rule_rank_h_flag,
|
|
|
+ # political_filter, shield_config, dup_remove]
|
|
|
+ # ).start()
|
|
|
|
|
|
|
|
|
|