|
@@ -23,7 +23,8 @@ region_code = config_.REGION_CODE
|
|
RULE_PARAMS = {
|
|
RULE_PARAMS = {
|
|
'rule_params': {
|
|
'rule_params': {
|
|
'rule66': {
|
|
'rule66': {
|
|
- 'view_type': 'video-show-region', 'platform_return_rate': 0.001,
|
|
+ 'view_type': 'video-show-region',
|
|
|
|
+ 'score_func': '20240223',
|
|
'region_24h_rule_key': 'rule66', '24h_rule_key': 'rule66'
|
|
'region_24h_rule_key': 'rule66', '24h_rule_key': 'rule66'
|
|
},
|
|
},
|
|
'rule67': {
|
|
'rule67': {
|
|
@@ -40,7 +41,7 @@ RULE_PARAMS = {
|
|
'data_params': config_.DATA_PARAMS,
|
|
'data_params': config_.DATA_PARAMS,
|
|
'params_list': [
|
|
'params_list': [
|
|
|
|
|
|
-
|
|
+ {'data': 'data66', 'rule': 'rule66'},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -76,6 +77,9 @@ features = [
|
|
'lastthreehour_return_now_new',
|
|
'lastthreehour_return_now_new',
|
|
'lastthreehour_return_new',
|
|
'lastthreehour_return_new',
|
|
'platform_return_new',
|
|
'platform_return_new',
|
|
|
|
+
|
|
|
|
+ 'lastonehour_allreturn',
|
|
|
|
+ 'lastonehour_allsharecnt'
|
|
]
|
|
]
|
|
|
|
|
|
|
|
|
|
@@ -144,7 +148,8 @@ def get_day_30day_videos(now_date, data_key, rule_key):
|
|
def get_feature_data(project, table, now_date):
|
|
def get_feature_data(project, table, now_date):
|
|
"""获取特征数据"""
|
|
"""获取特征数据"""
|
|
dt = datetime.datetime.strftime(now_date, '%Y%m%d%H')
|
|
dt = datetime.datetime.strftime(now_date, '%Y%m%d%H')
|
|
-
|
|
+
|
|
|
|
+ dt = '2024022319all1last0'
|
|
records = get_data_from_odps(date=dt, project=project, table=table)
|
|
records = get_data_from_odps(date=dt, project=project, table=table)
|
|
feature_data = []
|
|
feature_data = []
|
|
for record in records:
|
|
for record in records:
|
|
@@ -156,6 +161,36 @@ def get_feature_data(project, table, now_date):
|
|
return feature_df
|
|
return feature_df
|
|
|
|
|
|
|
|
|
|
|
|
+def cal_score_initial_20240223(df, param):
|
|
|
|
+ """
|
|
|
|
+ 计算score
|
|
|
|
+ :param df: 特征数据
|
|
|
|
+ :param param: 规则参数
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ df = df.fillna(0)
|
|
|
|
+ df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
|
+ df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
|
|
|
|
+ df['back_rate_new'] = (df['lastonehour_return'] + 1) / (df['lastonehour_share'] + 10)
|
|
|
|
+ df['back_rate_all'] = df['lastonehour_allreturn'] / (df['lastonehour_allsharecnt'] + 10)
|
|
|
|
+ df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
|
|
|
|
+ df['log_back_all'] = (df['lastonehour_allreturn'] + 1).apply(math.log)
|
|
|
|
+ if param.get('view_type', None) == 'video-show':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
|
|
|
|
+ elif param.get('view_type', None) == 'video-show-region':
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
|
|
|
|
+ else:
|
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
|
|
|
|
+ df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
|
|
+ df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
|
|
|
|
+ df['score'] = df['share_rate'] * (
|
|
|
|
+ df['back_rate_new'] + 0.01 * df['back_rate_all']
|
|
|
|
+ ) * (
|
|
|
|
+ df['log_back'] + 0.01 * df['log_back_all']
|
|
|
|
+ ) * df['K2']
|
|
|
|
+ df = df.sort_values(by=['score'], ascending=False)
|
|
|
|
+ return df
|
|
|
|
+
|
|
def cal_score_initial(df, param):
|
|
def cal_score_initial(df, param):
|
|
"""
|
|
"""
|
|
计算score
|
|
计算score
|
|
@@ -527,6 +562,8 @@ def cal_score(df, param):
|
|
df = cal_score_with_back_rate_exponential_weighting2(df=df, param=param)
|
|
df = cal_score_with_back_rate_exponential_weighting2(df=df, param=param)
|
|
elif param.get('score_func', None) == 'back_rate_rank_weighting':
|
|
elif param.get('score_func', None) == 'back_rate_rank_weighting':
|
|
df = cal_score_with_back_rate_by_rank_weighting(df=df, param=param)
|
|
df = cal_score_with_back_rate_by_rank_weighting(df=df, param=param)
|
|
|
|
+ elif param.get('score_func', None) == '20240223':
|
|
|
|
+ df = cal_score_initial_20240223(df=df, param=param)
|
|
else:
|
|
else:
|
|
df = cal_score_initial(df=df, param=param)
|
|
df = cal_score_initial(df=df, param=param)
|
|
return df
|
|
return df
|
|
@@ -618,8 +655,26 @@ def video_rank(df, now_date, now_h, rule_key, param, region, data_key, rule_rank
|
|
return_count = param.get('return_count', 1)
|
|
return_count = param.get('return_count', 1)
|
|
score_value = param.get('score_rule', 0)
|
|
score_value = param.get('score_rule', 0)
|
|
platform_return_rate = param.get('platform_return_rate', 0)
|
|
platform_return_rate = param.get('platform_return_rate', 0)
|
|
- h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= score_value)
|
|
+
|
|
- & (df['platform_return_rate'] >= platform_return_rate)]
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ h_recall_df = df[
|
|
|
|
+ (df['lastonehour_allreturn'] > 0)
|
|
|
|
+ ]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
|
|
|
|
|
|
|
|
h_recall_df = h_recall_df.sort_values(by=['score'], ascending=False)
|
|
h_recall_df = h_recall_df.sort_values(by=['score'], ascending=False)
|