|
@@ -27,6 +27,7 @@ features = [
|
|
'lastonehour_share_total_final', # 过去1小时分享次数
|
|
'lastonehour_share_total_final', # 过去1小时分享次数
|
|
'lastonehour_show', # 过去1小时video_show人数
|
|
'lastonehour_show', # 过去1小时video_show人数
|
|
'lastonehour_show_total_final', # 过去1小时video_show次数
|
|
'lastonehour_show_total_final', # 过去1小时video_show次数
|
|
|
|
+ 'platform_return',
|
|
]
|
|
]
|
|
|
|
|
|
|
|
|
|
@@ -104,6 +105,7 @@ def cal_score(df, param):
|
|
df['ctr'] = df['lastonehour_play'] / (df['lastonehour_view'] + 1000)
|
|
df['ctr'] = df['lastonehour_play'] / (df['lastonehour_view'] + 1000)
|
|
df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
df['score'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
|
|
df['score'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
|
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
df = df.sort_values(by=['score'], ascending=False)
|
|
df = df.sort_values(by=['score'], ascending=False)
|
|
return df
|
|
return df
|
|
|
|
|
|
@@ -112,6 +114,7 @@ def cal_score2(df):
|
|
# score2计算公式: score = lastonehour_return/(lastonehour_view+1000)
|
|
# score2计算公式: score = lastonehour_return/(lastonehour_view+1000)
|
|
df = df.fillna(0)
|
|
df = df.fillna(0)
|
|
df['score'] = df['lastonehour_return'] / (df['lastonehour_view'] + 1000)
|
|
df['score'] = df['lastonehour_return'] / (df['lastonehour_view'] + 1000)
|
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
df = df.sort_values(by=['score'], ascending=False)
|
|
df = df.sort_values(by=['score'], ascending=False)
|
|
return df
|
|
return df
|
|
|
|
|
|
@@ -124,6 +127,7 @@ def cal_score3(df):
|
|
df['share_rate'] = df['lastonehour_share_total_final'] / (df['lastonehour_view'] + 1000)
|
|
df['share_rate'] = df['lastonehour_share_total_final'] / (df['lastonehour_view'] + 1000)
|
|
df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share_total_final'] + 1)
|
|
df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share_total_final'] + 1)
|
|
df['score'] = df['share_rate'] + 0.03 * df['back_rate']
|
|
df['score'] = df['share_rate'] + 0.03 * df['back_rate']
|
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
df = df.sort_values(by=['score'], ascending=False)
|
|
df = df.sort_values(by=['score'], ascending=False)
|
|
return df
|
|
return df
|
|
|
|
|
|
@@ -147,7 +151,9 @@ def video_rank(df, now_date, now_h, rule_key, param):
|
|
# 获取符合进入召回源条件的视频,进入条件:小时级回流>=20 && score>=0.005
|
|
# 获取符合进入召回源条件的视频,进入条件:小时级回流>=20 && score>=0.005
|
|
return_count = param.get('return_count')
|
|
return_count = param.get('return_count')
|
|
score_value = param.get('score_rule')
|
|
score_value = param.get('score_rule')
|
|
- h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= score_value)]
|
|
|
|
|
|
+ platform_return_rate = param.get('platform_return_rate', 0)
|
|
|
|
+ h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= score_value)
|
|
|
|
+ & (df['platform_return_rate'] >= platform_return_rate)]
|
|
h_recall_df['videoid'] = h_recall_df['videoid'].astype(int)
|
|
h_recall_df['videoid'] = h_recall_df['videoid'].astype(int)
|
|
h_recall_videos = h_recall_df['videoid'].to_list()
|
|
h_recall_videos = h_recall_df['videoid'].to_list()
|
|
log_.info(f'h_recall videos count = {len(h_recall_videos)}')
|
|
log_.info(f'h_recall videos count = {len(h_recall_videos)}')
|