|
@@ -277,167 +277,12 @@ def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region_24h_rule_key, by
|
|
|
dup_key_name=other_h_24h_dup_key_name, region=region)
|
|
|
|
|
|
# ##### 去重小程序模型更新结果,并另存为redis中
|
|
|
- model_key_name = get_rov_redis_key(now_date=now_date)
|
|
|
- model_data_dup_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}:{data_key}:{rule_key}:" \
|
|
|
- f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=model_key_name,
|
|
|
- dup_key_name=model_data_dup_key_name, region=region)
|
|
|
-
|
|
|
- """
|
|
|
- redis_helper = RedisHelper()
|
|
|
- # # ##### 去重更新地域分组天级列表,并另存为redis中
|
|
|
- # region_day_key_name = \
|
|
|
- # f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_DAY}{region}.rule1." \
|
|
|
- # f"{datetime.datetime.strftime(now_date, '%Y%m%d')}"
|
|
|
- # if redis_helper.key_exists(key_name=region_day_key_name):
|
|
|
- # region_day_data = redis_helper.get_data_zset_with_index(
|
|
|
- # key_name=region_day_key_name, start=0, end=-1, with_scores=True)
|
|
|
- # log_.info(f'region day data count = {len(region_day_data)}')
|
|
|
- # region_day_dup = {}
|
|
|
- # for video_id, score in region_day_data:
|
|
|
- # if int(video_id) not in h_video_ids:
|
|
|
- # region_day_dup[int(video_id)] = score
|
|
|
- # h_video_ids.append(int(video_id))
|
|
|
- # log_.info(f"region day data dup count = {len(region_day_dup)}")
|
|
|
- # region_day_dup_key_name = \
|
|
|
- # f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_DAY_H}{region}.{rule_key}." \
|
|
|
- # f"{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
- # if len(region_day_dup) > 0:
|
|
|
- # redis_helper.add_data_with_zset(key_name=region_day_dup_key_name, data=region_day_dup, expire_time=23 * 3600)
|
|
|
-
|
|
|
-
|
|
|
- if redis_helper.key_exists(key_name=region_24h_key_name):
|
|
|
- region_24h_data = redis_helper.get_all_data_from_zset(key_name=region_24h_key_name, with_scores=True)
|
|
|
- # log_.info(f'region 24h data count = {len(region_24h_data)}')
|
|
|
-
|
|
|
- # 屏蔽视频过滤
|
|
|
- region_24h_video_ids = [int(video_id) for video_id, _ in region_24h_data]
|
|
|
- shield_key_name_list = config_.SHIELD_CONFIG.get(region, None)
|
|
|
- if shield_key_name_list is not None:
|
|
|
- region_24h_video_ids = filter_shield_video(video_ids=region_24h_video_ids, shield_key_name_list=shield_key_name_list)
|
|
|
- # log_.info(f"shield filtered_videos count = {len(region_24h_video_ids)}")
|
|
|
-
|
|
|
- region_24h_dup = {}
|
|
|
- for video_id, score in region_24h_data:
|
|
|
- if int(video_id) not in h_video_ids and int(video_id) in region_24h_video_ids:
|
|
|
- region_24h_dup[int(video_id)] = score
|
|
|
- h_video_ids.append(int(video_id))
|
|
|
- # log_.info(f"region 24h data dup count = {len(region_24h_dup)}")
|
|
|
- region_24h_dup_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_24H_H}{region}:{app_type}:{data_key}:{rule_key}:" \
|
|
|
- f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- if len(region_24h_dup) > 0:
|
|
|
- redis_helper.add_data_with_zset(key_name=region_24h_dup_key_name, data=region_24h_dup, expire_time=23 * 3600)
|
|
|
- # 限流视频score调整
|
|
|
- update_limit_video_score(initial_videos=region_24h_dup, key_name=region_24h_dup_key_name)
|
|
|
- # 清空线上过滤应用列表
|
|
|
- # redis_helper.del_keys(key_name=f"{config_.REGION_H_VIDEO_FILER_24H}{app_type}.{data_key}.{region}.{rule_key}")
|
|
|
-
|
|
|
- # ##### 去重小程序天级更新结果,并另存为redis中
|
|
|
- # day_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_DAY}rule2.{datetime.datetime.strftime(now_date, '%Y%m%d')}"
|
|
|
- # if redis_helper.key_exists(key_name=day_key_name):
|
|
|
- # day_data = redis_helper.get_data_zset_with_index(
|
|
|
- # key_name=day_key_name, start=0, end=-1, with_scores=True)
|
|
|
- # log_.info(f'day data count = {len(day_data)}')
|
|
|
- # day_dup = {}
|
|
|
- # for video_id, score in day_data:
|
|
|
- # if int(video_id) not in h_video_ids:
|
|
|
- # day_dup[int(video_id)] = score
|
|
|
- # h_video_ids.append(int(video_id))
|
|
|
- # log_.info(f"day data dup count = {len(day_dup)}")
|
|
|
- # day_dup_key_name = \
|
|
|
- # f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_DAY_H}{region}.{rule_key}." \
|
|
|
- # f"{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
- # if len(day_dup) > 0:
|
|
|
- # redis_helper.add_data_with_zset(key_name=day_dup_key_name, data=day_dup, expire_time=23 * 3600)
|
|
|
-
|
|
|
-
|
|
|
- # ##### 去重小程序相对24h更新结果,并另存为redis中
|
|
|
- day_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}{app_type}:{data_key}:{by_24h_rule_key}:" \
|
|
|
- f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- if redis_helper.key_exists(key_name=day_key_name):
|
|
|
- day_data = redis_helper.get_all_data_from_zset(key_name=day_key_name, with_scores=True)
|
|
|
- # log_.info(f'24h data count = {len(day_data)}')
|
|
|
-
|
|
|
- # 屏蔽视频过滤
|
|
|
- day_video_ids = [int(video_id) for video_id, _ in day_data]
|
|
|
- shield_key_name_list = config_.SHIELD_CONFIG.get(region, None)
|
|
|
- if shield_key_name_list is not None:
|
|
|
- day_video_ids = filter_shield_video(video_ids=day_video_ids, shield_key_name_list=shield_key_name_list)
|
|
|
- # log_.info(f"shield filtered_videos count = {len(day_video_ids)}")
|
|
|
-
|
|
|
- day_dup = {}
|
|
|
- for video_id, score in day_data:
|
|
|
- if int(video_id) not in h_video_ids and int(video_id) in day_video_ids:
|
|
|
- day_dup[int(video_id)] = score
|
|
|
- h_video_ids.append(int(video_id))
|
|
|
- # log_.info(f"24h data dup count = {len(day_dup)}")
|
|
|
- day_dup_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_24H_H}{region}:{app_type}:{data_key}:{rule_key}:" \
|
|
|
- f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- if len(day_dup) > 0:
|
|
|
- redis_helper.add_data_with_zset(key_name=day_dup_key_name, data=day_dup, expire_time=23 * 3600)
|
|
|
- # 限流视频score调整
|
|
|
- update_limit_video_score(initial_videos=day_dup, key_name=day_dup_key_name)
|
|
|
- # 清空线上过滤应用列表
|
|
|
- # redis_helper.del_keys(key_name=f"{config_.H_VIDEO_FILER_24H}{region}.{app_type}.{data_key}.{rule_key}")
|
|
|
-
|
|
|
- # ##### 去重小程序相对24h 筛选后剩余数据 更新结果,并另存为redis中
|
|
|
- if by_24h_rule_key == 'rule3':
|
|
|
- other_h_24h_recall_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H_OTHER}{app_type}:{data_key}:" \
|
|
|
- f"{by_24h_rule_key}:{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- if redis_helper.key_exists(key_name=other_h_24h_recall_key_name):
|
|
|
- other_24h_data = redis_helper.get_all_data_from_zset(key_name=other_h_24h_recall_key_name, with_scores=True)
|
|
|
- log_.info(f'24h other data count = {len(other_24h_data)}')
|
|
|
-
|
|
|
- # 屏蔽视频过滤
|
|
|
- other_24h_video_ids = [int(video_id) for video_id, _ in other_24h_data]
|
|
|
- shield_key_name_list = config_.SHIELD_CONFIG.get(region, None)
|
|
|
- if shield_key_name_list is not None:
|
|
|
- other_24h_video_ids = filter_shield_video(video_ids=other_24h_video_ids, shield_key_name_list=shield_key_name_list)
|
|
|
- log_.info(f"shield filtered_videos count = {len(other_24h_video_ids)}")
|
|
|
-
|
|
|
- other_24h_dup = {}
|
|
|
- for video_id, score in other_24h_data:
|
|
|
- if int(video_id) not in h_video_ids and int(video_id) in other_24h_video_ids:
|
|
|
- other_24h_dup[int(video_id)] = score
|
|
|
- h_video_ids.append(int(video_id))
|
|
|
- log_.info(f"other 24h data dup count = {len(other_24h_dup)}")
|
|
|
- other_24h_dup_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_DUP3_REGION_24H_H}{region}:{app_type}:{data_key}:{rule_key}:" \
|
|
|
- f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- if len(other_24h_dup) > 0:
|
|
|
- redis_helper.add_data_with_zset(key_name=other_24h_dup_key_name, data=other_24h_dup, expire_time=23 * 3600)
|
|
|
- # 限流视频score调整
|
|
|
- update_limit_video_score(initial_videos=other_24h_dup, key_name=other_24h_dup_key_name)
|
|
|
-
|
|
|
- # ##### 去重小程序模型更新结果,并另存为redis中
|
|
|
- model_key_name = get_rov_redis_key(now_date=now_date)
|
|
|
- model_data = redis_helper.get_all_data_from_zset(key_name=model_key_name, with_scores=True)
|
|
|
- # log_.info(f'model data count = {len(model_data)}')
|
|
|
-
|
|
|
- # 屏蔽视频过滤
|
|
|
- model_video_ids = [int(video_id) for video_id, _ in model_data]
|
|
|
- shield_key_name_list = config_.SHIELD_CONFIG.get(region, None)
|
|
|
- if shield_key_name_list is not None:
|
|
|
- model_video_ids = filter_shield_video(video_ids=model_video_ids, shield_key_name_list=shield_key_name_list)
|
|
|
- # log_.info(f"shield filtered_videos count = {len(model_video_ids)}")
|
|
|
-
|
|
|
- model_data_dup = {}
|
|
|
- for video_id, score in model_data:
|
|
|
- if int(video_id) not in h_video_ids and int(video_id) in model_video_ids:
|
|
|
- model_data_dup[int(video_id)] = score
|
|
|
- h_video_ids.append(int(video_id))
|
|
|
- # log_.info(f"model data dup count = {len(model_data_dup)}")
|
|
|
- model_data_dup_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}:{app_type}:{data_key}:{rule_key}:" \
|
|
|
- f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
- if len(model_data_dup) > 0:
|
|
|
- redis_helper.add_data_with_zset(key_name=model_data_dup_key_name, data=model_data_dup, expire_time=23 * 3600)
|
|
|
- # 限流视频score调整
|
|
|
- update_limit_video_score(initial_videos=model_data_dup, key_name=model_data_dup_key_name)
|
|
|
- """
|
|
|
+ # model_key_name = get_rov_redis_key(now_date=now_date)
|
|
|
+ # model_data_dup_key_name = \
|
|
|
+ # f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}:{data_key}:{rule_key}:" \
|
|
|
+ # f"{datetime.datetime.strftime(now_date, '%Y%m%d')}:{now_h}"
|
|
|
+ # h_video_ids = dup_data(h_video_ids=h_video_ids, initial_key_name=model_key_name,
|
|
|
+ # dup_key_name=model_data_dup_key_name, region=region)
|
|
|
|
|
|
|
|
|
def merge_df(df_left, df_right):
|