Ver código fonte

add abtest: 500, 501

liqian 1 ano atrás
pai
commit
fb9ced743d
2 arquivos alterados com 59 adições e 9 exclusões
  1. 15 0
      config.py
  2. 44 9
      region_rule_rank_h.py

+ 15 - 0
config.py

@@ -417,6 +417,19 @@ class BaseConfig(object):
             'rule24': {'view_type': 'video-show-region', 'platform_return_rate': 0.001, 'region_24h_rule_key': 'rule2',
                        '24h_rule_key': 'rule3', 'score_func': 'multiply_return_retention',
                        'return_data': 'share_region_return'},
+            # score = sharerate*backrate*log(return+1)*CTR,
+            # sharerate=(lastonehour_share+1)/(lastonehour_play+1000)
+            # backrate=(lastonehour_return+1)/(lastonehour_share+10)
+            # CTR=(lastonehour_play+1)/(lastonehour_view+100), ctr不进行校正
+            'rule25': {'view_type': 'video-view', 'platform_return_rate': 0.001, 'ctr_check': False,
+                       'region_24h_rule_key': 'rule4', '24h_rule_key': 'rule4', 'merge_func': 2,
+                       'score_func': 'back_view0'},
+            # score = back_play_rate*log(return+1)*CTR,
+            # back_play_rate=(lastonehour_return+1)/(lastonehour_play+1000)
+            # CTR=(lastonehour_play+1)/(lastonehour_view+100), ctr不进行校正
+            'rule26': {'view_type': 'video-view', 'platform_return_rate': 0.001, 'ctr_check': False,
+                       'region_24h_rule_key': 'rule4', '24h_rule_key': 'rule4', 'merge_func': 2,
+                       'score_func': 'back_view1'},
 
         },
         'data_params': DATA_PARAMS,
@@ -453,6 +466,8 @@ class BaseConfig(object):
             {'data': 'data1', 'rule': 'rule22'},  # 463 vlog 分值计算公式 增加h-2分享当前小时回流/h-2分享、h-3分享当前小时回流/h-3分享 特征
             # {'data': 'data1', 'rule': 'rule23'},  # 465 vlog 回流数据使用 分享限制地域,回流不限制地域 统计数据
             # {'data': 'data1', 'rule': 'rule24'},  # 466 vlog 分值计算公式 增加[h-3,h-2]之间的回流留存特征 + 回流数据使用 分享限制地域,回流不限制地域 统计数据
+            {'data': 'data10', 'rule': 'rule25'},  # 500
+            {'data': 'data10', 'rule': 'rule26'},  # 501
         ],
         'params_list_new': [
             # {'data': 'data10', 'rule': 'rule19'},  # 316 票圈视频 + 召回在线去重

+ 44 - 9
region_rule_rank_h.py

@@ -33,15 +33,15 @@ features = [
     'apptype',
     'code',
     'videoid',
-    'lastonehour_preview',  # 过去1小时预曝光人数
-    'lastonehour_view',  # 过去1小时曝光人数
-    'lastonehour_play',  # 过去1小时播放人数
-    'lastonehour_share',  # 过去1小时分享人数
-    'lastonehour_return',  # 过去1小时分享,过去1小时回流人数
-    'lastonehour_preview_total',  # 过去1小时预曝光次数
-    'lastonehour_view_total',  # 过去1小时曝光次数
-    'lastonehour_play_total',  # 过去1小时播放次数
-    'lastonehour_share_total',  # 过去1小时分享次数
+    'lastonehour_preview',  # 过去1小时预曝光人数 - 区分地域
+    'lastonehour_view',  # 过去1小时曝光人数 - 区分地域
+    'lastonehour_play',  # 过去1小时播放人数 - 区分地域
+    'lastonehour_share',  # 过去1小时分享人数 - 区分地域
+    'lastonehour_return',  # 过去1小时分享,过去1小时回流人数 - 区分地域
+    'lastonehour_preview_total',  # 过去1小时预曝光次数 - 区分地域
+    'lastonehour_view_total',  # 过去1小时曝光次数 - 区分地域
+    'lastonehour_play_total',  # 过去1小时播放次数 - 区分地域
+    'lastonehour_share_total',  # 过去1小时分享次数 - 区分地域
     'platform_return',
     'lastonehour_show',  # 不区分地域
     'lastonehour_show_region',  # 地域分组
@@ -162,6 +162,7 @@ def cal_score_initial(df, param):
     else:
         df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
     df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
+
     df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
 
     df['score1'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['K2']
@@ -348,6 +349,36 @@ def cal_score_multiply_return_retention_with_new_return(df, param):
     return df
 
 
+def cal_score_with_back_view0(df, param):
+    # score = sharerate*backrate*log(return+1)*CTR,
+    # sharerate=(lastonehour_share+1)/(lastonehour_play+1000)
+    # backrate=(lastonehour_return+1)/(lastonehour_share+10)
+    # CTR=(lastonehour_play+1)/(lastonehour_view+100), ctr不进行校正
+    df = df.fillna(0)
+    df['share_rate'] = (df['lastonehour_share'] + 1) / (df['lastonehour_play'] + 1000)
+    df['back_rate'] = (df['lastonehour_return'] + 1) / (df['lastonehour_share'] + 10)
+    df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
+    df['ctr'] = (df['lastonehour_play'] + 1) / (df['lastonehour_view'] + 100)
+    df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
+    df['score'] = df['share_rate'] * df['back_rate'] * df['log_back'] * df['ctr']
+    df = df.sort_values(by=['score'], ascending=False)
+    return df
+
+
+def cal_score_with_back_view1(df, param):
+    # score = back_play_rate*log(return+1)*CTR,
+    # back_play_rate=(lastonehour_return+1)/(lastonehour_play+1000)
+    # CTR=(lastonehour_play+1)/(lastonehour_view+100), ctr不进行校正
+    df = df.fillna(0)
+    df['back_play_rate'] = (df['lastonehour_return'] + 1) / (df['lastonehour_play'] + 1000)
+    df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
+    df['ctr'] = (df['lastonehour_play'] + 1) / (df['lastonehour_view'] + 100)
+    df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
+    df['score'] = df['back_play_rate'] * df['log_back'] * df['ctr']
+    df = df.sort_values(by=['score'], ascending=False)
+    return df
+
+
 def cal_score(df, param):
     if param.get('return_data', None) == 'share_region_return':
         if param.get('score_func', None) == 'multiply_return_retention':
@@ -361,6 +392,10 @@ def cal_score(df, param):
             df = cal_score_multiply_return_retention(df=df, param=param)
         elif param.get('score_func', None) == 'update_backrate':
             df = cal_score_update_backrate(df=df, param=param)
+        elif param.get('score_func', None) == 'back_view0':
+            df = cal_score_with_back_view0(df=df, param=param)
+        elif param.get('score_func', None) == 'back_view1':
+            df = cal_score_with_back_view1(df=df, param=param)
         else:
             df = cal_score_initial(df=df, param=param)
     return df