|
@@ -160,11 +160,11 @@ def video_rank(df, now_date, now_h, rule_key, param, region):
|
|
|
:param region: 所属地域
|
|
|
:return:
|
|
|
"""
|
|
|
- # 获取rov模型结果
|
|
|
redis_helper = RedisHelper()
|
|
|
- key_name = get_rov_redis_key(now_date=now_date)
|
|
|
- initial_data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1, with_scores=True)
|
|
|
- log_.info(f'initial data count = {len(initial_data)}')
|
|
|
+ # # 获取rov模型结果
|
|
|
+ # key_name = get_rov_redis_key(now_date=now_date)
|
|
|
+ # initial_data = redis_helper.get_data_zset_with_index(key_name=key_name, start=0, end=-1, with_scores=True)
|
|
|
+ # log_.info(f'initial data count = {len(initial_data)}')
|
|
|
|
|
|
# 获取符合进入召回源条件的视频,进入条件:小时级回流>=20 && score>=0.005
|
|
|
return_count = param.get('return_count', 1)
|
|
@@ -192,15 +192,76 @@ def video_rank(df, now_date, now_h, rule_key, param, region):
|
|
|
redis_helper.del_keys(key_name=f"{config_.REGION_H_VIDEO_FILER}{region}.{rule_key}")
|
|
|
|
|
|
# 去重更新rov模型结果,并另存为redis中
|
|
|
- initial_data_dup = {}
|
|
|
- for video_id, score in initial_data:
|
|
|
+ # initial_data_dup = {}
|
|
|
+ # for video_id, score in initial_data:
|
|
|
+ # if int(video_id) not in h_video_ids:
|
|
|
+ # initial_data_dup[int(video_id)] = score
|
|
|
+ # log_.info(f"initial data dup count = {len(initial_data_dup)}")
|
|
|
+ # initial_key_name = \
|
|
|
+ # f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}.{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
+ # if len(initial_data_dup) > 0:
|
|
|
+ # redis_helper.add_data_with_zset(key_name=initial_key_name, data=initial_data_dup, expire_time=23 * 3600)
|
|
|
+
|
|
|
+ # 与其他召回视频池去重,存入对应的redis
|
|
|
+ dup_to_redis(h_video_ids=h_video_ids, now_date=now_date, now_h=now_h, rule_key=rule_key, region=region)
|
|
|
+
|
|
|
+
|
|
|
+def dup_to_redis(h_video_ids, now_date, now_h, rule_key, region):
|
|
|
+ """将地域分组小时级数据与其他召回视频池去重,存入对应的redis"""
|
|
|
+ redis_helper = RedisHelper()
|
|
|
+ # ##### 去重更新地域分组天级列表,并另存为redis中
|
|
|
+ region_day_key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_REGION_BY_DAY}{region}.rule1." \
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}"
|
|
|
+ if redis_helper.key_exists(key_name=region_day_key_name):
|
|
|
+ region_day_data = redis_helper.get_data_zset_with_index(
|
|
|
+ key_name=region_day_key_name, start=0, end=-1, with_scores=True)
|
|
|
+ log_.info(f'region day data count = {len(region_day_data)}')
|
|
|
+ region_day_dup = {}
|
|
|
+ for video_id, score in region_day_data:
|
|
|
+ if int(video_id) not in h_video_ids:
|
|
|
+ region_day_dup[int(video_id)] = score
|
|
|
+ h_video_ids.append(int(video_id))
|
|
|
+ log_.info(f"region day data dup count = {len(region_day_dup)}")
|
|
|
+ region_day_dup_key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_DAY_H}{region}.{rule_key}." \
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
+ if len(region_day_dup) > 0:
|
|
|
+ redis_helper.add_data_with_zset(key_name=region_day_dup_key_name, data=region_day_dup, expire_time=23 * 3600)
|
|
|
+
|
|
|
+ # ##### 去重小程序天级更新结果,并另存为redis中
|
|
|
+ day_key_name = f"{config_.RECALL_KEY_NAME_PREFIX_BY_DAY}rule2.{datetime.datetime.strftime(now_date, '%Y%m%d')}"
|
|
|
+ if redis_helper.key_exists(key_name=day_key_name):
|
|
|
+ day_data = redis_helper.get_data_zset_with_index(
|
|
|
+ key_name=day_key_name, start=0, end=-1, with_scores=True)
|
|
|
+ log_.info(f'day data count = {len(day_data)}')
|
|
|
+ day_dup = {}
|
|
|
+ for video_id, score in day_data:
|
|
|
+ if int(video_id) not in h_video_ids:
|
|
|
+ day_dup[int(video_id)] = score
|
|
|
+ h_video_ids.append(int(video_id))
|
|
|
+ log_.info(f"day data dup count = {len(day_dup)}")
|
|
|
+ day_dup_key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_DAY_H}{region}.{rule_key}." \
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
+ if len(day_dup) > 0:
|
|
|
+ redis_helper.add_data_with_zset(key_name=day_dup_key_name, data=day_dup, expire_time=23 * 3600)
|
|
|
+
|
|
|
+ # ##### 去重小程序模型更新结果,并另存为redis中
|
|
|
+ model_key_name = get_rov_redis_key(now_date=now_date)
|
|
|
+ model_data = redis_helper.get_data_zset_with_index(key_name=model_key_name, start=0, end=-1, with_scores=True)
|
|
|
+ log_.info(f'model data count = {len(model_data)}')
|
|
|
+ model_data_dup = {}
|
|
|
+ for video_id, score in model_data:
|
|
|
if int(video_id) not in h_video_ids:
|
|
|
- initial_data_dup[int(video_id)] = score
|
|
|
- log_.info(f"initial data dup count = {len(initial_data_dup)}")
|
|
|
- initial_key_name = \
|
|
|
- f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}.{rule_key}.{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
- if len(initial_data_dup) > 0:
|
|
|
- redis_helper.add_data_with_zset(key_name=initial_key_name, data=initial_data_dup, expire_time=23 * 3600)
|
|
|
+ model_data_dup[int(video_id)] = score
|
|
|
+ h_video_ids.append(int(video_id))
|
|
|
+ log_.info(f"model data dup count = {len(model_data_dup)}")
|
|
|
+ model_data_dup_key_name = \
|
|
|
+ f"{config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H}{region}.{rule_key}." \
|
|
|
+ f"{datetime.datetime.strftime(now_date, '%Y%m%d')}.{now_h}"
|
|
|
+ if len(model_data_dup) > 0:
|
|
|
+ redis_helper.add_data_with_zset(key_name=model_data_dup_key_name, data=model_data_dup, expire_time=23 * 3600)
|
|
|
|
|
|
|
|
|
def rank_by_h(project, table, now_date, now_h, rule_params, region_code_list):
|
|
@@ -219,14 +280,14 @@ def rank_by_h(project, table, now_date, now_h, rule_params, region_code_list):
|
|
|
score_df = cal_score(df=region_df)
|
|
|
video_rank(df=score_df, now_date=now_date, now_h=now_h, rule_key=key, param=value, region=region)
|
|
|
# to-csv
|
|
|
- score_filename = f"score_{region}_{key}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.csv"
|
|
|
- score_df.to_csv(f'./data/{score_filename}')
|
|
|
+ # score_filename = f"score_{region}_{key}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.csv"
|
|
|
+ # score_df.to_csv(f'./data/{score_filename}')
|
|
|
# to-logs
|
|
|
- log_.info({"date": datetime.datetime.strftime(now_date, '%Y%m%d%H'),
|
|
|
- "region_code": region,
|
|
|
- "redis_key_prefix": config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H,
|
|
|
- "rule_key": key,
|
|
|
- "score_df": score_df[['videoid', 'score']]})
|
|
|
+ # log_.info({"date": datetime.datetime.strftime(now_date, '%Y%m%d%H'),
|
|
|
+ # "region_code": region,
|
|
|
+ # "redis_key_prefix": config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H,
|
|
|
+ # "rule_key": key,
|
|
|
+ # "score_df": score_df[['videoid', 'score']]})
|
|
|
|
|
|
|
|
|
def h_rank_bottom(now_date, now_h, rule_key, region_code_list):
|
|
@@ -241,7 +302,12 @@ def h_rank_bottom(now_date, now_h, rule_key, region_code_list):
|
|
|
redis_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
|
redis_h = now_h - 1
|
|
|
|
|
|
- key_prefix_list = [config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H, config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H]
|
|
|
+ key_prefix_list = [
|
|
|
+ config_.RECALL_KEY_NAME_PREFIX_REGION_BY_H,
|
|
|
+ config_.RECALL_KEY_NAME_PREFIX_DUP1_REGION_DAY_H,
|
|
|
+ config_.RECALL_KEY_NAME_PREFIX_DUP2_REGION_DAY_H,
|
|
|
+ config_.RECALL_KEY_NAME_PREFIX_DUP_REGION_H
|
|
|
+ ]
|
|
|
# fea_df = get_feature_data(project=project, table=table, now_date=now_date - datetime.timedelta(hours=1))
|
|
|
# region_list = list(set(fea_df[''].to_list()))
|
|
|
for region in region_code_list:
|