|
@@ -15,6 +15,7 @@ project = 'loghubods'
|
|
|
table = 'video_each_hour_update'
|
|
|
features = [
|
|
|
'videoid',
|
|
|
+ 'lastonehour_preview',
|
|
|
'lastonehour_view',
|
|
|
'lastonehour_play',
|
|
|
'lastonehour_share',
|
|
@@ -71,10 +72,11 @@ def get_feature_data(now_date):
|
|
|
return feature_df
|
|
|
|
|
|
|
|
|
-def cal_score(df):
|
|
|
+def cal_score(df, param):
|
|
|
"""
|
|
|
计算score
|
|
|
:param df: 特征数据
|
|
|
+ :param param: 规则参数
|
|
|
:return:
|
|
|
"""
|
|
|
|
|
@@ -87,7 +89,10 @@ def cal_score(df):
|
|
|
df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
|
|
|
df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
|
|
|
df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
|
|
|
- df['ctr'] = df['lastonehour_play'] / (df['lastonehour_view'] + 1000)
|
|
|
+ if param.get('view_type', None) == 'pre-view':
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
|
|
|
+ else:
|
|
|
+ df['ctr'] = df['lastonehour_play'] / (df['lastonehour_view'] + 1000)
|
|
|
df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
|
|
|
df['score'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
|
|
|
df = df.sort_values(by=['score'], ascending=False)
|
|
@@ -162,15 +167,15 @@ def video_rank(df, now_date, now_h, rule_key, param):
|
|
|
def rank_by_h(now_date, now_h, rule_params):
|
|
|
|
|
|
feature_df = get_feature_data(now_date=now_date)
|
|
|
-
|
|
|
- score_df = cal_score(df=feature_df)
|
|
|
|
|
|
for key, value in rule_params.items():
|
|
|
log_.info(f"rule = {key}, param = {value}")
|
|
|
+
|
|
|
+ score_df = cal_score(df=feature_df, param=value)
|
|
|
video_rank(df=score_df, now_date=now_date, now_h=now_h, rule_key=key, param=value)
|
|
|
-
|
|
|
- score_filename = f"score_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.csv"
|
|
|
- score_df.to_csv(f'./data/{score_filename}')
|
|
|
+
|
|
|
+ score_filename = f"score_{key}_{datetime.datetime.strftime(now_date, '%Y%m%d%H')}.csv"
|
|
|
+ score_df.to_csv(f'./data/{score_filename}')
|
|
|
|
|
|
|
|
|
def h_rank_bottom(now_date, now_h, rule_key):
|
|
@@ -201,10 +206,7 @@ def h_rank_bottom(now_date, now_h, rule_key):
|
|
|
|
|
|
|
|
|
def h_timer_check():
|
|
|
- rule_params = {
|
|
|
- 'rule1': {'return_count': 20, 'score_rule': 0.005},
|
|
|
- 'rule2': {'return_count': 20, 'score_rule': 0.001}
|
|
|
- }
|
|
|
+ rule_params = config_.RULE_PARAMS
|
|
|
|
|
|
now_date = datetime.datetime.today()
|
|
|
log_.info(f"now_date: {datetime.datetime.strftime(now_date, '%Y%m%d%H')}")
|