Browse Source

merge abtest-136-137-2022090811

liqian 2 years ago
parent
commit
d15846cd8d
4 changed files with 87 additions and 14 deletions
  1. 26 3
      config.py
  2. 12 1
      region_rule_rank_h.py
  3. 12 1
      region_rule_rank_h_by24h.py
  4. 37 9
      rule_rank_h_by_24h.py

+ 26 - 3
config.py

@@ -186,12 +186,16 @@ class BaseConfig(object):
     # 小时级更新过去24h数据规则参数
     RULE_PARAMS_24H_APP_TYPE = {
         'rule_params': {
-            # 'rule2': {'cal_score_func': 2, 'return_count': 40, 'platform_return_rate': 0.001,
-            #           'view_type': 'preview'},
             'rule3': {'cal_score_func': 2, 'return_count': 100, 'platform_return_rate': 0.001,
                       'view_type': 'preview'},
             'rule4': {'cal_score_func': 2, 'return_count': 100, 'platform_return_rate': 0.001,
                       'view_type': 'preview', 'merge_func': 2},
+            # 无回流人群
+            'rule5': {'return_count': 100, 'platform_return_rate': 0.001,
+                      'view_type': 'preview', 'click_score_rate': 0.7},
+            # 有回流人群
+            'rule6': {'return_count': 100, 'platform_return_rate': 0.001,
+                      'view_type': 'preview', 'back_score_rate': 0.7},
         },
         'data_params': DATA_PARAMS,
         'params_list': [
@@ -201,6 +205,9 @@ class BaseConfig(object):
             {'data': 'data3', 'rule': 'rule4'},
             {'data': 'data4', 'rule': 'rule4'},
             {'data': 'data6', 'rule': 'rule4'},
+            {'data': 'data1', 'rule': 'rule5'},
+            {'data': 'data1', 'rule': 'rule6'},
+
         ]
     }
 
@@ -217,6 +224,12 @@ class BaseConfig(object):
                       'platform_return_rate': 0.001},
             'rule4': {'view_type': 'video-show', 'return_count': 21, 'score_rule': 0,
                       'platform_return_rate': 0.001, 'merge_func': 2},
+            # 无回流人群
+            'rule6': {'view_type': 'video-show', 'return_count': 21, 'score_rule': 0,
+                      'platform_return_rate': 0.001, 'click_score_rate': 0.7},
+            # 有回流人群
+            'rule7': {'view_type': 'video-show', 'return_count': 21, 'score_rule': 0,
+                      'platform_return_rate': 0.001, 'back_score_rate': 0.7},
         },
         'data_params': DATA_PARAMS,
         'params_list': [
@@ -226,6 +239,8 @@ class BaseConfig(object):
             {'data': 'data3', 'rule': 'rule4'},
             {'data': 'data4', 'rule': 'rule4'},
             {'data': 'data6', 'rule': 'rule4'},
+            {'data': 'data1', 'rule': 'rule6'},
+            {'data': 'data1', 'rule': 'rule7'},
         ]
     }
 
@@ -246,7 +261,13 @@ class BaseConfig(object):
             'rule7': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
                       'region_24h_rule_key': 'rule4', '24h_rule_key': 'rule4', 'merge_func': 2},
             'rule9': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
-                      'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3', '30day_rule_key': 'rule1'}
+                      'region_24h_rule_key': 'rule2', '24h_rule_key': 'rule3', '30day_rule_key': 'rule1'},
+            # 无回流人群
+            'rule10': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                       'region_24h_rule_key': 'rule6', '24h_rule_key': 'rule5', 'click_score_rate': 0.7},
+            # 有回流人群
+            'rule11': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
+                       'region_24h_rule_key': 'rule7', '24h_rule_key': 'rule6', 'back_score_rate': 0.7},
         },
         'data_params': DATA_PARAMS,
         'params_list': [
@@ -257,6 +278,8 @@ class BaseConfig(object):
             {'data': 'data4', 'rule': 'rule7'},
             {'data': 'data6', 'rule': 'rule7'},
             {'data': 'data1', 'rule': 'rule9'},
+            {'data': 'data1', 'rule': 'rule10'},
+            {'data': 'data1', 'rule': 'rule11'},
         ],
     }
 

+ 12 - 1
region_rule_rank_h.py

@@ -147,8 +147,19 @@ def cal_score(df, param):
     else:
         df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
     df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
-    df['score'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
     df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
+
+    df['score1'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
+
+    click_score_rate = param.get('click_score_rate', None)
+    back_score_rate = param.get('click_score_rate', None)
+    if click_score_rate is not None:
+        df['score'] = (1 - click_score_rate) * df['score1'] + click_score_rate * df['K2']
+    elif back_score_rate is not None:
+        df['score'] = (1 - back_score_rate) * df['score1'] + back_score_rate * df['back_rate']
+    else:
+        df['score'] = df['score1']
+
     df = df.sort_values(by=['score'], ascending=False)
     return df
 

+ 12 - 1
region_rule_rank_h_by24h.py

@@ -123,8 +123,19 @@ def cal_score(df, param):
     else:
         df['ctr'] = df['lastday_play'] / (df['lastday_preview'] + 1000)
     df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
-    df['score'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
     df['platform_return_rate'] = df['platform_return'] / df['lastday_return']
+
+    df['score1'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
+
+    click_score_rate = param.get('click_score_rate', None)
+    back_score_rate = param.get('click_score_rate', None)
+    if click_score_rate is not None:
+        df['score'] = (1 - click_score_rate) * df['score1'] + click_score_rate * df['K2']
+    elif back_score_rate is not None:
+        df['score'] = (1 - back_score_rate) * df['score1'] + back_score_rate * df['back_rate']
+    else:
+        df['score'] = df['score1']
+
     df = df.sort_values(by=['score'], ascending=False)
     return df
 

+ 37 - 9
rule_rank_h_by_24h.py

@@ -125,6 +125,40 @@ def cal_score2(df, param):
     return df
 
 
+def cal_score(df, param):
+    # score计算公式: score1 = share次数/(view+1000)+0.01*return/(share次数+100)
+    # ctr = lastonehour_play/(lastonehour_preview+1000), 对ctr限最大值:K2 = 0.6 if ctr > 0.6 else ctr
+    # score = 0.3 * score1 + 0.7 * K2
+    df = df.fillna(0)
+    if param.get('view_type', None) == 'video-show':
+        df['share_rate'] = df['share次数'] / (df['platform_show'] + 1000)
+        df['ctr'] = df['play人数'] / (df['platform_show'] + 1000)
+    elif param.get('view_type', None) == 'preview':
+        df['share_rate'] = df['share次数'] / (df['preview人数'] + 1000)
+        df['ctr'] = df['play人数'] / (df['preview人数'] + 1000)
+    else:
+        df['share_rate'] = df['share次数'] / (df['platform_show'] + 1000)
+        df['ctr'] = df['play人数'] / (df['platform_show'] + 1000)
+
+    df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
+    df['back_rate'] = df['回流人数'] / (df['share次数'] + 100)
+    df['platform_return_rate'] = df['platform_return'] / df['回流人数']
+
+    df['score1'] = df['share_rate'] + 0.01 * df['back_rate']
+
+    click_score_rate = param.get('click_score_rate', None)
+    back_score_rate = param.get('click_score_rate', None)
+    if click_score_rate is not None:
+        df['score'] = (1 - click_score_rate) * df['score1'] + click_score_rate * df['K2']
+    elif back_score_rate is not None:
+        df['score'] = (1 - back_score_rate) * df['score1'] + back_score_rate * df['back_rate']
+    else:
+        df['score'] = df['score1']
+
+    df = df.sort_values(by=['score'], ascending=False)
+    return df
+
+
 def video_rank_h(df, now_date, now_h, rule_key, param, data_key):
     """
     获取符合进入召回源条件的视频,与每日更新的rov模型结果视频列表进行合并
@@ -283,17 +317,14 @@ def rank_by_h(now_date, now_h, rule_params, project, table):
         rule_key = param.get('rule')
         rule_param = rule_params_item.get(rule_key)
         log_.info(f"rule_key = {rule_key}, rule_param = {rule_param}")
-        cal_score_func = rule_param.get('cal_score_func', 1)
+        # cal_score_func = rule_param.get('cal_score_func', 1)
         merge_func = rule_param.get('merge_func', 1)
 
         if merge_func == 2:
             for apptype, weight in data_param.items():
                 df = feature_df[feature_df['apptype'] == apptype]
                 # 计算score
-                if cal_score_func == 2:
-                    score_df = cal_score2(df=df, param=rule_param)
-                else:
-                    score_df = cal_score1(df=df)
+                score_df = cal_score(df=df, param=rule_param)
                 score_df['score'] = score_df['score'] * weight
                 score_df_list.append(score_df)
             # 分数合并
@@ -306,10 +337,7 @@ def rank_by_h(now_date, now_h, rule_params, project, table):
         else:
             df_list = [feature_df[feature_df['apptype'] == apptype] for apptype, _ in data_param.items()]
             df_merged = reduce(merge_df, df_list)
-            if cal_score_func == 2:
-                score_df = cal_score2(df=df_merged, param=rule_param)
-            else:
-                score_df = cal_score1(df=df_merged)
+            score_df = cal_score(df=df_merged, param=rule_param)
             video_rank_h(df=score_df, now_date=now_date, now_h=now_h,
                          rule_key=rule_key, param=rule_param, data_key=data_key)