浏览代码

add platform rule

liqian 2 年之前
父节点
当前提交
c666ef6437
共有 3 个文件被更改,包括 22 次插入2 次删除
  1. 2 2
      config.py
  2. 10 0
      region_rule_rank_h_by24h.py
  3. 10 0
      rule_rank_day.py

+ 2 - 2
config.py

@@ -102,7 +102,7 @@ class BaseConfig(object):
     # 小程序天级规则参数
     RULE_PARAMS_DAY = {
         # 'rule1': {'return_count': 200},
-        'rule2': {'cal_score_func': 2, 'return_count': 100},
+        'rule2': {'cal_score_func': 2, 'return_count': 100, 'platform_return_rate': 0.001},
     }
 
     # 小时级更新过去24h数据
@@ -139,7 +139,7 @@ class BaseConfig(object):
 
     # 地域分组小时级更新24h规则参数
     RULE_PARAMS_REGION_24H = {
-        'rule1': {'view_type': 'pre-view', 'return_count': 21, 'score_rule': 0},
+        'rule1': {'view_type': 'pre-view', 'return_count': 21, 'score_rule': 0, 'platform_return_rate': 0.001},
     }
 
     # 老视频更新使用数据

+ 10 - 0
region_rule_rank_h_by24h.py

@@ -66,6 +66,13 @@ features = [
     'lastday_view_total',  # 昨日曝光次数
     'lastday_play_total',  # 昨日播放次数
     'lastday_share_total',  # 昨日分享次数
+    'platform_return',
+    'platform_preview',
+    'platform_preview_total',
+    'platform_show',
+    'platform_show_total',
+    'platform_view',
+    'platform_view_total',
 ]
 
 
@@ -137,6 +144,7 @@ def cal_score(df):
     df['ctr'] = df['lastday_play'] / (df['lastday_preview'] + 1000)
     df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
     df['score'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
+    df['platform_return_rate'] = df['platform_return'] / df['回流人数']
     df = df.sort_values(by=['score'], ascending=False)
     return df
 
@@ -157,6 +165,8 @@ def video_rank(df, now_date, now_h, rule_key, param, region):
     return_count = param.get('return_count', 1)
     score_value = param.get('score_rule', 0)
     h_recall_df = df[(df['lastday_return'] >= return_count) & (df['score'] >= score_value)]
+    platform_return_rate = param.get('platform_return_rate', 0)
+    h_recall_df = h_recall_df[h_recall_df['platform_return_rate'] > platform_return_rate]
     # videoid重复时,保留分值高
     h_recall_df = h_recall_df.sort_values(by=['score'], ascending=False)
     h_recall_df = h_recall_df.drop_duplicates(subset=['videoid'], keep='first')

+ 10 - 0
rule_rank_day.py

@@ -21,6 +21,13 @@ features = [
     'view次数',  # 过去1天曝光次数
     'play次数',  # 过去1天播放次数
     'share次数',  # 过去1天分享次数
+    'platform_return',
+    'platform_preview',
+    'platform_preview_total',
+    'platform_show',
+    'platform_show_total',
+    'platform_view',
+    'platform_view_total',
 ]
 
 
@@ -89,6 +96,7 @@ def cal_score2(df):
     df['share_rate'] = df['share次数'] / (df['view人数'] + 1000)
     df['back_rate'] = df['回流人数'] / (df['share次数'] + 100)
     df['score'] = df['share_rate'] + 0.01 * df['back_rate']
+    df['platform_return_rate'] = df['platform_return'] / df['回流人数']
     df = df.sort_values(by=['score'], ascending=False)
     return df
 
@@ -114,6 +122,8 @@ def video_rank_day(df, now_date, rule_key, param):
         day_recall_df = df[df['回流人数'] > return_count]
     else:
         day_recall_df = df
+    platform_return_rate = param.get('platform_return_rate', 0)
+    day_recall_df = day_recall_df[day_recall_df['platform_return_rate'] > platform_return_rate]
 
     # videoid重复时,保留分值高
     day_recall_df = day_recall_df.sort_values(by=['score'], ascending=False)