|
@@ -84,7 +84,13 @@ def cal_score1(df):
|
|
|
|
|
|
|
|
|
|
def cal_score2(df):
|
|
def cal_score2(df):
|
|
- pass
|
|
|
|
|
|
+ # score2计算公式: score = share次数/(view+1000)+0.01*return/(share次数+100)
|
|
|
|
+ df = df.fillna(0)
|
|
|
|
+ df['share_rate'] = df['share次数'] / (df['view人数'] + 1000)
|
|
|
|
+ df['back_rate'] = df['回流人数'] / (df['share次数'] + 100)
|
|
|
|
+ df['score'] = df['share_rate'] + 0.01 * df['back_rate']
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ return df
|
|
|
|
|
|
|
|
|
|
def video_rank_day(df, now_date, rule_key, param):
|
|
def video_rank_day(df, now_date, rule_key, param):
|
|
@@ -104,25 +110,28 @@ def video_rank_day(df, now_date, rule_key, param):
|
|
|
|
|
|
# 获取符合进入召回源条件的视频
|
|
# 获取符合进入召回源条件的视频
|
|
return_count = param.get('return_count')
|
|
return_count = param.get('return_count')
|
|
- h_recall_df = df[df['回流人数'] > return_count]
|
|
|
|
- h_recall_videos = h_recall_df['videoid'].to_list()
|
|
|
|
- log_.info(f'h_recall videos count = {len(h_recall_videos)}')
|
|
|
|
|
|
+ if return_count:
|
|
|
|
+ day_recall_df = df[df['回流人数'] > return_count]
|
|
|
|
+ else:
|
|
|
|
+ day_recall_df = df
|
|
|
|
+ day_recall_videos = day_recall_df['videoid'].to_list()
|
|
|
|
+ log_.info(f'day_recall videos count = {len(day_recall_videos)}')
|
|
# 写入对应的redis
|
|
# 写入对应的redis
|
|
- h_video_ids =[]
|
|
|
|
- h_recall_result = {}
|
|
|
|
- for video_id in h_recall_videos:
|
|
|
|
- score = h_recall_df[h_recall_df['videoid'] == video_id]['score']
|
|
|
|
- h_recall_result[int(video_id)] = float(score)
|
|
|
|
- h_video_ids.append(int(video_id))
|
|
|
|
- h_recall_key_name = \
|
|
|
|
|
|
+ day_video_ids =[]
|
|
|
|
+ day_recall_result = {}
|
|
|
|
+ for video_id in day_recall_videos:
|
|
|
|
+ score = day_recall_df[day_recall_df['videoid'] == video_id]['score']
|
|
|
|
+ day_recall_result[int(video_id)] = float(score)
|
|
|
|
+ day_video_ids.append(int(video_id))
|
|
|
|
+ day_recall_key_name = \
|
|
f"{config_.RECALL_KEY_NAME_PREFIX_BY_DAY}{rule_key}.{datetime.strftime(now_date, '%Y%m%d')}"
|
|
f"{config_.RECALL_KEY_NAME_PREFIX_BY_DAY}{rule_key}.{datetime.strftime(now_date, '%Y%m%d')}"
|
|
- if len(h_recall_result) > 0:
|
|
|
|
- redis_helper.add_data_with_zset(key_name=h_recall_key_name, data=h_recall_result, expire_time=7 * 24 * 3600)
|
|
|
|
|
|
+ if len(day_recall_result) > 0:
|
|
|
|
+ redis_helper.add_data_with_zset(key_name=day_recall_key_name, data=day_recall_result, expire_time=7 * 24 * 3600)
|
|
|
|
|
|
# 去重更新rov模型结果,并另存为redis中
|
|
# 去重更新rov模型结果,并另存为redis中
|
|
initial_data_dup = {}
|
|
initial_data_dup = {}
|
|
for video_id, score in initial_data:
|
|
for video_id, score in initial_data:
|
|
- if int(video_id) not in h_video_ids:
|
|
|
|
|
|
+ if int(video_id) not in day_video_ids:
|
|
initial_data_dup[int(video_id)] = score
|
|
initial_data_dup[int(video_id)] = score
|
|
log_.info(f"initial data dup count = {len(initial_data_dup)}")
|
|
log_.info(f"initial data dup count = {len(initial_data_dup)}")
|
|
|
|
|
|
@@ -177,10 +186,8 @@ def day_timer_check():
|
|
project = config_.PROJECT_DAY
|
|
project = config_.PROJECT_DAY
|
|
table = config_.TABLE_DAY
|
|
table = config_.TABLE_DAY
|
|
rule_params = config_.RULE_PARAMS_DAY
|
|
rule_params = config_.RULE_PARAMS_DAY
|
|
- # return_count_list = [20, 10]
|
|
|
|
now_date = datetime.today()
|
|
now_date = datetime.today()
|
|
log_.info(f"now_date: {datetime.strftime(now_date, '%Y%m%d')}")
|
|
log_.info(f"now_date: {datetime.strftime(now_date, '%Y%m%d')}")
|
|
- now_h = datetime.now().hour
|
|
|
|
now_min = datetime.now().minute
|
|
now_min = datetime.now().minute
|
|
# 查看当前天级更新的数据是否已准备好
|
|
# 查看当前天级更新的数据是否已准备好
|
|
h_data_count = day_data_check(project=project, table=table, now_date=now_date)
|
|
h_data_count = day_data_check(project=project, table=table, now_date=now_date)
|