|
@@ -442,6 +442,49 @@ def cal_score_with_back_rate_exponential_weighting2(df, param):
|
|
|
df = df.sort_values(by=['score'], ascending=False)
|
|
|
return df
|
|
|
|
|
|
+def cal_score_with_back_rate_by_rank_weighting(df, param):
|
|
|
+ """
|
|
|
+ add by sunmingze 20231123
|
|
|
+ 计算score
|
|
|
+ :param df: 特征数据
|
|
|
+ :param param: 规则参数
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ # score计算公式: score = 1 / sharerate(rank)^0.5 + 5 / backrate(rank)^0.5 + 10 / LOG(lastonehour_return +1)(rank) ^0.5
|
|
|
+ # + 1 / K2(rank)^0.5
|
|
|
+ # sharerate = lastonehour_share / (lastonehour_play + 1000)
|
|
|
+ # backrate = lastonehour_return / (lastonehour_share + 10)
|
|
|
+ # ctr = lastonehour_play / (lastonehour_show + 1000), 对ctr限最大值:K2 = 0.6 if ctr > 0.6 else ctr
|
|
|
+
|
|
|
+ df = df.fillna(0)
|
|
|
+ df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
+ df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
|
|
|
+ df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
|
|
|
+ if param.get('view_type', None) == 'video-show':
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
|
|
|
+ elif param.get('view_type', None) == 'video-show-region':
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
|
|
|
+ else:
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
|
|
|
+ df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
|
+
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
|
+
|
|
|
+ # 分别的得到sharerate、backrate、K值、return人数的序关系
|
|
|
+ df['rank_by_sharerate'] = df['share_rate'].rank(ascending=0, method='dense')
|
|
|
+ df['rank_by_backrate'] = df['backrate'].rank(ascending=0, method='dense')
|
|
|
+ df['rank_by_K2'] = df['K2'].rank(ascending=0, method='dense')
|
|
|
+ df['rank_by_logback'] = df['log_back'].rank(ascending=0, method='dense')
|
|
|
+
|
|
|
+ # 计算基于序的加法关系函数
|
|
|
+ df['score'] = 1/(df['rank_by_sharerate'] + 10) + 5/(df['rank_by_sharerate'] + 10)
|
|
|
+ df['score'] = df['score'] + 5/(df['rank_by_logback'] + 10) + 1/(df['rank_by_K2'] + 10)
|
|
|
+
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
+ return df
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
|
|
|
def cal_score(df, param):
|
|
|
if param.get('return_data', None) == 'share_region_return':
|
|
@@ -464,6 +507,8 @@ def cal_score(df, param):
|
|
|
df = cal_score_with_back_rate_exponential_weighting1(df=df, param=param)
|
|
|
elif param.get('score_func', None) == 'back_rate_exponential_weighting2':
|
|
|
df = cal_score_with_back_rate_exponential_weighting2(df=df, param=param)
|
|
|
+ elif param.get('score_func', None) == 'back_rate_rank_weighting':
|
|
|
+ df = cal_score_with_back_rate_by_rank_weighting(df=df, param=param)
|
|
|
else:
|
|
|
df = cal_score_initial(df=df, param=param)
|
|
|
return df
|