|
@@ -178,7 +178,7 @@ def video_rank_h(df, now_date, now_h, rule_key, param, data_key):
|
|
|
# 清空线上过滤应用列表
|
|
|
# redis_helper.del_keys(key_name=f"{config_.H_VIDEO_FILER_24H}{app_type}.{data_key}.{rule_key}")
|
|
|
|
|
|
- if rule_key == 'rule3':
|
|
|
+ if rule_key in ['rule3', 'rule4']:
|
|
|
# 去重筛选结果,保留剩余数据并写入Redis
|
|
|
all_videos = df['videoid'].to_list()
|
|
|
log_.info(f'h_by24h_recall all videos count = {len(all_videos)}')
|
|
@@ -232,6 +232,21 @@ def merge_df(df_left, df_right):
|
|
|
return df_merged[feature_list]
|
|
|
|
|
|
|
|
|
+def merge_df_with_score(df_left, df_right):
|
|
|
+ """
|
|
|
+ df 按照videoid合并,平台回流人数、回流人数、分数 分别求和
|
|
|
+ :param df_left:
|
|
|
+ :param df_right:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ df_merged = pd.merge(df_left, df_right, on=['videoid'], how='outer', suffixes=['_x', '_y'])
|
|
|
+ df_merged.fillna(0, inplace=True)
|
|
|
+ feature_list = ['videoid', '回流人数', 'platform_return', 'score']
|
|
|
+ for feature in feature_list[1:]:
|
|
|
+ df_merged[feature] = df_merged[f'{feature}_x'] + df_merged[f'{feature}_y']
|
|
|
+ return df_merged[feature_list]
|
|
|
+
|
|
|
+
|
|
|
def rank_by_h(now_date, now_h, rule_params, project, table):
|
|
|
# 获取特征数据
|
|
|
feature_df = get_feature_data(now_date=now_date, now_h=now_h, project=project, table=table)
|
|
@@ -239,6 +254,7 @@ def rank_by_h(now_date, now_h, rule_params, project, table):
|
|
|
# rank
|
|
|
data_params_item = rule_params.get('data_params')
|
|
|
rule_params_item = rule_params.get('rule_params')
|
|
|
+ """
|
|
|
for param in rule_params.get('params_list'):
|
|
|
data_key = param.get('data')
|
|
|
data_param = data_params_item.get(data_key)
|
|
@@ -257,31 +273,46 @@ def rank_by_h(now_date, now_h, rule_params, project, table):
|
|
|
score_df = cal_score1(df=df_merged)
|
|
|
video_rank_h(df=score_df, now_date=now_date, now_h=now_h,
|
|
|
rule_key=rule_key, param=rule_param, data_key=data_key)
|
|
|
-
|
|
|
"""
|
|
|
- for app_type, params in rule_params.items():
|
|
|
- log_.info(f"app_type = {app_type}")
|
|
|
- data_params_item = params.get('data_params')
|
|
|
- rule_params_item = params.get('rule_params')
|
|
|
- for param in params.get('params_list'):
|
|
|
- data_key = param.get('data')
|
|
|
- data_param = data_params_item.get(data_key)
|
|
|
- log_.info(f"data_key = {data_key}, data_param = {data_param}")
|
|
|
- df_list = [feature_df[feature_df['apptype'] == apptype] for apptype in data_param]
|
|
|
- df_merged = reduce(merge_df, df_list)
|
|
|
|
|
|
- rule_key = param.get('rule')
|
|
|
- rule_param = rule_params_item.get(rule_key)
|
|
|
- log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
|
|
|
- # 计算score
|
|
|
- cal_score_func = rule_param.get('cal_score_func', 1)
|
|
|
+ for param in rule_params.get('params_list'):
|
|
|
+ score_df_list = []
|
|
|
+ data_key = param.get('data')
|
|
|
+ data_param = data_params_item.get(data_key)
|
|
|
+ log_.info(f"data_key = {data_key}, data_param = {data_param}")
|
|
|
+ rule_key = param.get('rule')
|
|
|
+ rule_param = rule_params_item.get(rule_key)
|
|
|
+ log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
|
|
|
+ cal_score_func = rule_param.get('cal_score_func', 1)
|
|
|
+ merge_func = rule_param.get('merge_func', 1)
|
|
|
+
|
|
|
+ if merge_func == 2:
|
|
|
+ for apptype, weight in data_param.items():
|
|
|
+ df = feature_df[feature_df['apptype'] == apptype]
|
|
|
+ # 计算score
|
|
|
+ if cal_score_func == 2:
|
|
|
+ score_df = cal_score2(df=df, param=rule_param)
|
|
|
+ else:
|
|
|
+ score_df = cal_score1(df=df)
|
|
|
+ score_df['score'] = score_df['score'] * weight
|
|
|
+ score_df_list.append(score_df)
|
|
|
+ # 分数合并
|
|
|
+ df_merged = reduce(merge_df_with_score, score_df_list)
|
|
|
+ # 更新平台回流比
|
|
|
+ df_merged['platform_return_rate'] = df_merged['platform_return'] / df_merged['回流人数']
|
|
|
+ video_rank_h(df=df_merged, now_date=now_date, now_h=now_h,
|
|
|
+ rule_key=rule_key, param=rule_param, data_key=data_key)
|
|
|
+
|
|
|
+ else:
|
|
|
+ df_list = [feature_df[feature_df['apptype'] == apptype] for apptype, _ in data_param.items()]
|
|
|
+ df_merged = reduce(merge_df, df_list)
|
|
|
if cal_score_func == 2:
|
|
|
score_df = cal_score2(df=df_merged, param=rule_param)
|
|
|
else:
|
|
|
score_df = cal_score1(df=df_merged)
|
|
|
- video_rank_h(df=score_df, now_date=now_date, now_h=now_h, rule_key=rule_key, param=rule_param,
|
|
|
- app_type=app_type, data_key=data_key)
|
|
|
- """
|
|
|
+ video_rank_h(df=score_df, now_date=now_date, now_h=now_h,
|
|
|
+ rule_key=rule_key, param=rule_param, data_key=data_key)
|
|
|
+
|
|
|
# # to-csv
|
|
|
# score_filename = f"score_by24h_{key}_{datetime.strftime(now_date, '%Y%m%d%H')}.csv"
|
|
|
# score_df.to_csv(f'./data/{score_filename}')
|