|
@@ -145,34 +145,30 @@ def get_feature_data(project, table, now_date):
|
|
|
return feature_df
|
|
|
|
|
|
|
|
|
-def cal_score_initial_20240223(df, param):
|
|
|
+def cal_score_initial_20240322(df, param):
|
|
|
"""
|
|
|
计算score
|
|
|
:param df: 特征数据
|
|
|
:param param: 规则参数
|
|
|
:return:
|
|
|
"""
|
|
|
- log_.info("进入了cal_score_initial_20240223")
|
|
|
+ log_.info("进入了cal_score_initial_20240322")
|
|
|
df = df.fillna(0)
|
|
|
- df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
- df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
|
|
|
- df['back_rate_new'] = (df['lastonehour_return'] + 1) / (df['lastonehour_share'] + 10)
|
|
|
- df['back_rate_all'] = df['lastonehour_allreturn'] / (df['lastonehour_allreturn_sharecnt'] + 10)
|
|
|
- df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
|
|
|
- df['log_back_all'] = (df['lastonehour_allreturn'] + 1).apply(math.log)
|
|
|
- if param.get('view_type', None) == 'video-show':
|
|
|
- df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
|
|
|
- elif param.get('view_type', None) == 'video-show-region':
|
|
|
- df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
|
|
|
- else:
|
|
|
- df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
|
|
|
+ df['log_back'] = (df['lastonehour_allreturn'] + 1).apply(math.log)
|
|
|
+ df['share_rate'] = (df['lastonehour_share'] + 1) / (df['lastonehour_play'] + 1000)
|
|
|
+ df['back_rate'] = (df['lastonehour_return'] + 1) / (df['lastonehour_share'] + 10)
|
|
|
+ df['ctr'] = (df['lastonehour_play'] + 1) / (df['lastonehour_show_region'] + 1000)
|
|
|
df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
|
df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
|
- df['score'] = df['share_rate'] * (
|
|
|
- df['back_rate_new'] + 0.01 * df['back_rate_all']
|
|
|
- ) * (
|
|
|
- df['log_back'] + 0.01 * df['log_back_all']
|
|
|
- ) * df['K2']
|
|
|
+ df['score1'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
|
|
|
+ click_score_rate = param.get('click_score_rate', None)
|
|
|
+ back_score_rate = param.get('click_score_rate', None)
|
|
|
+ if click_score_rate is not None:
|
|
|
+ df['score'] = (1 - click_score_rate) * df['score1'] + click_score_rate * df['K2']
|
|
|
+ elif back_score_rate is not None:
|
|
|
+ df['score'] = (1 - back_score_rate) * df['score1'] + back_score_rate * df['back_rate']
|
|
|
+ else:
|
|
|
+ df['score'] = df['score1']
|
|
|
df = df.sort_values(by=['score'], ascending=False)
|
|
|
return df
|
|
|
|
|
@@ -557,8 +553,8 @@ def cal_score(df, param, now_h):
|
|
|
df = cal_score_with_back_rate_exponential_weighting2(df=df, param=param)
|
|
|
elif param.get('score_func', None) == 'back_rate_rank_weighting':
|
|
|
df = cal_score_with_back_rate_by_rank_weighting(df=df, param=param)
|
|
|
- elif param.get('score_func', None) == '20240223':
|
|
|
- df = cal_score_initial_20240223(df=df, param=param)
|
|
|
+ elif param.get('score_func', None) == '20240322':
|
|
|
+ df = cal_score_initial_20240322(df=df, param=param)
|
|
|
else:
|
|
|
df = cal_score_initial(df=df, param=param, now_h=now_h)
|
|
|
return df
|
|
@@ -650,8 +646,6 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank
|
|
|
return_count = param.get('return_count', 1)
|
|
|
score_value = param.get('score_rule', 0)
|
|
|
platform_return_rate = param.get('platform_return_rate', 0)
|
|
|
- # h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= score_value)
|
|
|
- # & (df['platform_return_rate'] >= platform_return_rate)]
|
|
|
# zhangbo
|
|
|
if now_h in [1, 2, 3, 4]:
|
|
|
h_recall_df = df[
|
|
@@ -664,22 +658,12 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank
|
|
|
(df['score'] >= score_value) &
|
|
|
(df['platform_return_rate'] >= platform_return_rate)
|
|
|
]
|
|
|
- if "lastonehour_allreturn" in param.keys():
|
|
|
- log_.info("采用 lastonehour_allreturn 过滤")
|
|
|
+ if "20240322" in param.keys():
|
|
|
+ log_.info("采用20240322新加的过滤")
|
|
|
h_recall_df = df[
|
|
|
- (df['lastonehour_allreturn'] > 0)
|
|
|
- ]
|
|
|
-
|
|
|
- # try:
|
|
|
- # if "return_countv2" in param.keys() and "platform_return_ratev2" in param.keys():
|
|
|
- # return_countv2 = param["return_countv2"]
|
|
|
- # platform_return_ratev2 = param["platform_return_ratev2"]
|
|
|
- # h_recall_df = h_recall_df[
|
|
|
- # df['platform_return_rate'] >= platform_return_ratev2 |
|
|
|
- # (df['platform_return_rate'] < platform_return_ratev2 & df['lastonehour_return'] > return_countv2)
|
|
|
- # ]
|
|
|
- # except Exception as e:
|
|
|
- # log_.error("return_countv2 is wrong with{}".format(e))
|
|
|
+ (df['lastonehour_return'] > 0) |
|
|
|
+ (df['lastonehour_allreturn'] > 1)
|
|
|
+ ]
|
|
|
|
|
|
# videoid重复时,保留分值高
|
|
|
h_recall_df = h_recall_df.sort_values(by=['score'], ascending=False)
|