|
@@ -155,7 +155,7 @@ def video_rank(df, now_date, now_h, rule_key, param):
|
|
|
filtered_videos = filter_video_status(h_recall_videos)
|
|
|
log_.info('filtered_videos count = {}'.format(len(filtered_videos)))
|
|
|
# 写入对应的redis
|
|
|
- h_video_ids =[]
|
|
|
+ h_video_ids = []
|
|
|
h_recall_result = {}
|
|
|
for video_id in filtered_videos:
|
|
|
score = h_recall_df[h_recall_df['videoid'] == video_id]['score']
|
|
@@ -168,33 +168,55 @@ def video_rank(df, now_date, now_h, rule_key, param):
|
|
|
# 清空线上过滤应用列表
|
|
|
redis_helper.del_keys(key_name=f"{config_.H_VIDEO_FILER}{rule_key}")
|
|
|
|
|
|
+ dup_to_redis(h_video_ids, now_date, now_h, rule_key)
|
|
|
+
|
|
|
# 去重更新rov模型结果,并另存为redis中
|
|
|
- initial_data_dup = {}
|
|
|
- for video_id, score in initial_data:
|
|
|
+ # initial_data_dup = {}
|
|
|
+ # for video_id, score in initial_data:
|
|
|
+ # if int(video_id) not in h_video_ids:
|
|
|
+ # initial_data_dup[int(video_id)] = score
|
|
|
+ # log_.info(f"initial data dup count = {len(initial_data_dup)}")
|
|
|
+ # initial_key_name = \
|
|
|
+ # f"{config_.RECALL_KEY_NAME_PREFIX_DUP_H}{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
+ # if len(initial_data_dup) > 0:
|
|
|
+ # redis_helper.add_data_with_zset(key_name=initial_key_name, data=initial_data_dup, expire_time=23 * 3600)
|
|
|
+
|
|
|
+
|
|
|
+def dup_to_redis(h_video_ids, now_date, now_h, rule_key):
|
|
|
+ """将小时级数据与其他召回视频池去重,存入对应的redis"""
|
|
|
+ redis_helper = RedisHelper()
|
|
|
+
|
|
|
+ # ##### 去重小程序相对24h数据更新结果,并另存为redis中
|
|
|
+ rule_24h_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_24H}rule1.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
+ if redis_helper.key_exists(key_name=rule_24h_key_name):
|
|
|
+ rule_24h_data = redis_helper.get_data_zset_with_index(
|
|
|
+ key_name=rule_24h_key_name, start=0, end=-1, with_scores=True)
|
|
|
+ log_.info(f'rule_24h data count = {len(rule_24h_data)}')
|
|
|
+ rule_24h_dup = {}
|
|
|
+ for video_id, score in rule_24h_data:
|
|
|
+ if int(video_id) not in h_video_ids:
|
|
|
+ rule_24h_dup[int(video_id)] = score
|
|
|
+ h_video_ids.append(int(video_id))
|
|
|
+ log_.info(f"rule_24h data dup count = {len(rule_24h_dup)}")
|
|
|
+ rule_24h_dup_key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP_24H_H}{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
+ if len(rule_24h_dup) > 0:
|
|
|
+ redis_helper.add_data_with_zset(key_name=rule_24h_dup_key_name, data=rule_24h_dup, expire_time=23 * 3600)
|
|
|
+
|
|
|
+ # ##### 去重小程序模型更新结果,并另存为redis中
|
|
|
+ model_key_name = get_rov_redis_key(now_date=now_date)
|
|
|
+ model_data = redis_helper.get_data_zset_with_index(key_name=model_key_name, start=0, end=-1, with_scores=True)
|
|
|
+ log_.info(f'model data count = {len(model_data)}')
|
|
|
+ model_data_dup = {}
|
|
|
+ for video_id, score in model_data:
|
|
|
if int(video_id) not in h_video_ids:
|
|
|
- initial_data_dup[int(video_id)] = score
|
|
|
- log_.info(f"initial data dup count = {len(initial_data_dup)}")
|
|
|
- initial_key_name = \
|
|
|
+ model_data_dup[int(video_id)] = score
|
|
|
+ h_video_ids.append(int(video_id))
|
|
|
+ log_.info(f"model data dup count = {len(model_data_dup)}")
|
|
|
+ model_data_dup_key_name = \
|
|
|
f"{config_.RECALL_KEY_NAME_PREFIX_DUP_H}{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
- if len(initial_data_dup) > 0:
|
|
|
- redis_helper.add_data_with_zset(key_name=initial_key_name, data=initial_data_dup, expire_time=23 * 3600)
|
|
|
-
|
|
|
-
|
|
|
- # # 去重合并
|
|
|
- # final_videos = [int(item) for item in h_recall_videos]
|
|
|
- # temp_videos = [int(video_id) for video_id, _ in initial_data if int(video_id) not in final_videos]
|
|
|
- # final_videos = final_videos + temp_videos
|
|
|
- # log_.info(f'final videos count = {len(final_videos)}')
|
|
|
- #
|
|
|
- # # 重新给定score
|
|
|
- # final_data = {}
|
|
|
- # for i, video_id in enumerate(final_videos):
|
|
|
- # score = 100 - i * config_.ROV_SCORE_D
|
|
|
- # final_data[video_id] = score
|
|
|
- #
|
|
|
- # # 存入对应的redis
|
|
|
- # final_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_H}{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
- # redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=24 * 3600)
|
|
|
+ if len(model_data_dup) > 0:
|
|
|
+ redis_helper.add_data_with_zset(key_name=model_data_dup_key_name, data=model_data_dup, expire_time=23 * 3600)
|
|
|
|
|
|
|
|
|
def rank_by_h(now_date, now_h, rule_params):
|
|
@@ -233,20 +255,23 @@ def h_rank_bottom(now_date, now_h, rule_key):
|
|
|
else:
|
|
|
redis_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
|
redis_h = now_h - 1
|
|
|
- key_prefix_list = [config_.RECALL_KEY_NAME_PREFIX_BY_H, config_.RECALL_KEY_NAME_PREFIX_DUP_H]
|
|
|
+ key_prefix_list = [config_.RECALL_KEY_NAME_PREFIX_BY_H]
|
|
|
for key_prefix in key_prefix_list:
|
|
|
key_name = f"{key_prefix}{rule_key}.{redis_dt}.{redis_h}"
|
|
|
initial_data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1, with_scores=True)
|
|
|
final_data = dict()
|
|
|
+ h_video_ids = []
|
|
|
for video_id, score in initial_data:
|
|
|
final_data[video_id] = score
|
|
|
+ h_video_ids.append(int(video_id))
|
|
|
# 存入对应的redis
|
|
|
final_key_name = \
|
|
|
f"{key_prefix}{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
if len(final_data) > 0:
|
|
|
redis_helper.add_data_with_zset(key_name=final_key_name, data=final_data, expire_time=23 * 3600)
|
|
|
- # 清空线上过滤应用列表
|
|
|
- redis_helper.del_keys(key_name=f"{config_.H_VIDEO_FILER}{rule_key}")
|
|
|
+ # 清空线上过滤应用列表
|
|
|
+ redis_helper.del_keys(key_name=f"{config_.H_VIDEO_FILER}{rule_key}")
|
|
|
+ dup_to_redis(h_video_ids, now_date, now_h, rule_key)
|
|
|
|
|
|
|
|
|
def h_timer_check():
|