liqian il y a 2 ans
Parent
commit
8d8c757b54
5 fichiers modifiés avec 19 ajouts et 9 suppressions
  1. 4 4
      config.py
  2. 5 1
      region_rule_rank_h.py
  3. 2 2
      region_rule_rank_h_by24h.py
  4. 1 1
      rule_rank_day.py
  5. 7 1
      rule_rank_h.py

+ 4 - 4
config.py

@@ -72,12 +72,12 @@ class BaseConfig(object):
 
     # 小程序小时级规则参数
     RULE_PARAMS = {
-        'rule1': {'return_count': 20, 'score_rule': 0.005},
+        'rule1': {'return_count': 20, 'score_rule': 0.005, 'platform_return_rate': 0.001},
         # 'rule2': {'return_count': 20, 'score_rule': 0.001},  # 下线
-        'rule3': {'view_type': 'pre-view', 'return_count': 20, 'score_rule': 0.005},
+        'rule3': {'view_type': 'pre-view', 'return_count': 20, 'score_rule': 0.005, 'platform_return_rate': 0.001},
         # 'rule4': {'cal_score_func': 2, 'return_count': 20, 'score_rule': 0},  # 下线
         # 'rule5': {'cal_score_func': 3, 'return_count': 20, 'score_rule': 0},  # 下线
-        'rule6': {'view_type': 'video-show', 'return_count': 20, 'score_rule': 0.005},
+        'rule6': {'view_type': 'video-show', 'return_count': 20, 'score_rule': 0.005, 'platform_return_rate': 0.001},
     }
 
     # app_type: [18, 19]预测表名
@@ -121,7 +121,7 @@ class BaseConfig(object):
     # 地域分组小时级规则参数
     RULE_PARAMS_REGION = {
         # 'rule1': {'view_type': 'pre-view', 'return_count': 20, 'score_rule': 0.005},
-        'rule1': {'view_type': 'pre-view'},
+        'rule1': {'view_type': 'pre-view', 'platform_return_rate': 0.001},
     }
 
     # 地域分组天级规则更新使用数据

+ 5 - 1
region_rule_rank_h.py

@@ -66,6 +66,7 @@ features = [
     'lastonehour_view_total',  # 过去1小时曝光次数
     'lastonehour_play_total',  # 过去1小时播放次数
     'lastonehour_share_total',  # 过去1小时分享次数
+    'platform_return',
 ]
 
 
@@ -145,6 +146,7 @@ def cal_score(df):
     df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
     df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
     df['score'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
+    df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
     df = df.sort_values(by=['score'], ascending=False)
     return df
 
@@ -169,7 +171,9 @@ def video_rank(df, now_date, now_h, rule_key, param, region):
     # 获取符合进入召回源条件的视频,进入条件:小时级回流>=20 && score>=0.005
     return_count = param.get('return_count', 1)
     score_value = param.get('score_rule', 0)
-    h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= score_value)]
+    platform_return_rate = param.get('platform_return_rate', 0)
+    h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= score_value)
+                     & (df['platform_return_rate'] >= platform_return_rate)]
     # videoid重复时,保留分值高
     h_recall_df = h_recall_df.sort_values(by=['score'], ascending=False)
     h_recall_df = h_recall_df.drop_duplicates(subset=['videoid'], keep='first')

+ 2 - 2
region_rule_rank_h_by24h.py

@@ -164,9 +164,9 @@ def video_rank(df, now_date, now_h, rule_key, param, region):
     # 获取符合进入召回源条件的视频
     return_count = param.get('return_count', 1)
     score_value = param.get('score_rule', 0)
-    h_recall_df = df[(df['lastday_return'] >= return_count) & (df['score'] >= score_value)]
     platform_return_rate = param.get('platform_return_rate', 0)
-    h_recall_df = h_recall_df[h_recall_df['platform_return_rate'] > platform_return_rate]
+    h_recall_df = df[(df['lastday_return'] >= return_count) & (df['score'] >= score_value)
+                     & (df['platform_return_rate'] >= platform_return_rate)]
     # videoid重复时,保留分值高
     h_recall_df = h_recall_df.sort_values(by=['score'], ascending=False)
     h_recall_df = h_recall_df.drop_duplicates(subset=['videoid'], keep='first')

+ 1 - 1
rule_rank_day.py

@@ -123,7 +123,7 @@ def video_rank_day(df, now_date, rule_key, param):
     else:
         day_recall_df = df
     platform_return_rate = param.get('platform_return_rate', 0)
-    day_recall_df = day_recall_df[day_recall_df['platform_return_rate'] > platform_return_rate]
+    day_recall_df = day_recall_df[day_recall_df['platform_return_rate'] >= platform_return_rate]
 
     # videoid重复时,保留分值高
     day_recall_df = day_recall_df.sort_values(by=['score'], ascending=False)

+ 7 - 1
rule_rank_h.py

@@ -27,6 +27,7 @@ features = [
     'lastonehour_share_total_final',  # 过去1小时分享次数
     'lastonehour_show',  # 过去1小时video_show人数
     'lastonehour_show_total_final',  # 过去1小时video_show次数
+    'platform_return',
 ]
 
 
@@ -104,6 +105,7 @@ def cal_score(df, param):
         df['ctr'] = df['lastonehour_play'] / (df['lastonehour_view'] + 1000)
     df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
     df['score'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
+    df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
     df = df.sort_values(by=['score'], ascending=False)
     return df
 
@@ -112,6 +114,7 @@ def cal_score2(df):
     # score2计算公式: score = lastonehour_return/(lastonehour_view+1000)
     df = df.fillna(0)
     df['score'] = df['lastonehour_return'] / (df['lastonehour_view'] + 1000)
+    df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
     df = df.sort_values(by=['score'], ascending=False)
     return df
 
@@ -124,6 +127,7 @@ def cal_score3(df):
     df['share_rate'] = df['lastonehour_share_total_final'] / (df['lastonehour_view'] + 1000)
     df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share_total_final'] + 1)
     df['score'] = df['share_rate'] + 0.03 * df['back_rate']
+    df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
     df = df.sort_values(by=['score'], ascending=False)
     return df
 
@@ -147,7 +151,9 @@ def video_rank(df, now_date, now_h, rule_key, param):
     # 获取符合进入召回源条件的视频,进入条件:小时级回流>=20 && score>=0.005
     return_count = param.get('return_count')
     score_value = param.get('score_rule')
-    h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= score_value)]
+    platform_return_rate = param.get('platform_return_rate', 0)
+    h_recall_df = df[(df['lastonehour_return'] >= return_count) & (df['score'] >= score_value)
+                     & (df['platform_return_rate'] >= platform_return_rate)]
     h_recall_df['videoid'] = h_recall_df['videoid'].astype(int)
     h_recall_videos = h_recall_df['videoid'].to_list()
     log_.info(f'h_recall videos count = {len(h_recall_videos)}')