Prechádzať zdrojové kódy

add abtest 461,462,463

liqian 1 rok pred
rodič
commit
630c644887
2 zmenil súbory, kde vykonal 118 pridanie a 8 odobranie
  1. 4 1
      config.py
  2. 114 7
      region_rule_rank_h.py

+ 4 - 1
config.py

@@ -398,7 +398,7 @@ class BaseConfig(object):
             'rule19': {'view_type': 'video-show-region', 'platform_return_rate': 0.001,
                        'region_24h_rule_key': 'rule4', '24h_rule_key': 'rule4', 'merge_func': 2, 'dup_remove': False},
             # 分值计算公式 增加h-2分享当前小时回流数据、h-3分享当前小时回流数据特征
-            # score = k2 * sharerate * (backrate * LOG(lastonehour_return+1) + backrate_2 * LOG(lasttwohour_return+1) + backrate_3 * LOG(lastthreehour_return+1))
+            # score = k2 * sharerate * (backrate * LOG(lastonehour_return+1) + backrate_2 * LOG(lasttwohour_return_now+1) + backrate_3 * LOG(lastthreehour_return_now+1))
             'rule20': {'view_type': 'video-show-region', 'platform_return_rate': 0.001, 'region_24h_rule_key': 'rule2',
                        '24h_rule_key': 'rule3', 'score_func': 'add_backrate*log(return+1)'},
             # 分值计算公式 增加[h-3,h-2]之间的回流留存特征
@@ -440,6 +440,9 @@ class BaseConfig(object):
             # {'data': 'data1', 'rule': 'rule17'},  # 215 vlog
             # {'data': 'data1', 'rule': 'rule18'},  # 224 vlog
             {'data': 'videos5', 'rule': 'rule7-1'},  # 428 [内容精选]
+            {'data': 'data1', 'rule': 'rule20'},  # 461 vlog 分值计算公式 增加h-2分享当前小时回流数据、h-3分享当前小时回流数据特征
+            {'data': 'data1', 'rule': 'rule21'},  # 462 vlog 分值计算公式 增加[h-3,h-2]之间的回流留存特征
+            {'data': 'data1', 'rule': 'rule22'},  # 463 vlog 分值计算公式 增加h-2分享当前小时回流/h-2分享、h-3分享当前小时回流/h-3分享 特征
         ],
         'params_list_new': [
             {'data': 'data10', 'rule': 'rule19'},  # 316 票圈视频 + 召回在线去重

+ 114 - 7
region_rule_rank_h.py

@@ -46,11 +46,11 @@ features = [
     'lastonehour_show',  # 不区分地域
     'lastonehour_show_region',  # 地域分组
     'lasttwohour_share',  # h-2小时分享人数
-    'lasttwohour_return',  # h-2分享,过去1小时回流人数
-    '上二小时分享上二小时当小时回流',  # h-2分享,h-2回流人数
+    'lasttwohour_return_now',  # h-2分享,过去1小时回流人数
+    'lasttwohour_return',  # h-2分享,h-2回流人数
     'lastthreehour_share',  # h-3小时分享人数
-    'lastthreehour_return',  # h-3分享,过去1小时回流人数
-    '上三小时分享上三小时当小时回流',  # h-3分享,h-3回流人数
+    'lastthreehour_return_now',  # h-3分享,过去1小时回流人数
+    'lastthreehour_return',  # h-3分享,h-3回流人数
 ]
 
 
@@ -172,13 +172,120 @@ def cal_score_initial(df, param):
     return df
 
 
+def cal_score_add_return(df, param):
+    # score计算公式: sharerate*(backrate*logback + backrate2*logback_now2 + backrate3*logback_now3)*ctr
+    # sharerate = lastonehour_share/(lastonehour_play+1000)
+    # backrate = lastonehour_return/(lastonehour_share+10)
+    # backrate2 = lasttwohour_return_now/(lasttwohour_share+10)
+    # backrate3 = lastthreehour_return_now/(lastthreehour_share+10)
+    # ctr = lastonehour_play/(lastonehour_preview+1000), 对ctr限最大值:K2 = 0.6 if ctr > 0.6 else ctr
+    # score = k2 * sharerate * (backrate * LOG(lastonehour_return+1) + backrate_2 * LOG(lasttwohour_return_now+1) + backrate_3 * LOG(lastthreehour_return_now+1))
+
+    df = df.fillna(0)
+    df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
+    df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
+    df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
+    df['back_rate2'] = df['lasttwohour_return_now'] / (df['lasttwohour_share'] + 10)
+    df['log_back2'] = (df['lasttwohour_return_now'] + 1).apply(math.log)
+    df['back_rate3'] = df['lastthreehour_return_now'] / (df['lastthreehour_share'] + 10)
+    df['log_back3'] = (df['lastthreehour_return_now'] + 1).apply(math.log)
+
+    if param.get('view_type', None) == 'video-show':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
+    elif param.get('view_type', None) == 'video-show-region':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
+    else:
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
+    df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
+    df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
+
+    df['score'] = df['K2'] * df['share_rate'] * (
+            df['back_rate'] * df['log_back'] +
+            df['back_rate2'] * df['log_back2'] +
+            df['back_rate3'] * df['log_back3']
+    )
+
+    df = df.sort_values(by=['score'], ascending=False)
+    return df
+
+
+def cal_score_multiply_return_retention(df, param):
+    # score计算公式: k2 * sharerate * backrate * LOG(lastonehour_return+1) * 前两小时回流留存
+    # sharerate = lastonehour_share/(lastonehour_play+1000)
+    # backrate = lastonehour_return/(lastonehour_share+10)
+    # ctr = lastonehour_play/(lastonehour_preview+1000), 对ctr限最大值:K2 = 0.6 if ctr > 0.6 else ctr
+    # 前两小时回流留存 return_retention_initial = (lasttwohour_return_now + lastthreehour_return_now)/(lasttwohour_return + lastthreehour_return + 1)
+    # return_retention = 0.5 if return_retention_initial == 0 else return_retention_initial
+    # score = k2 * sharerate * backrate * LOG(lastonehour_return+1) * return_retention
+
+    df = df.fillna(0)
+    df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
+    df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
+    df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
+
+    if param.get('view_type', None) == 'video-show':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
+    elif param.get('view_type', None) == 'video-show-region':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
+    else:
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
+    df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
+
+    df['return_retention_initial'] = (df['lasttwohour_return_now'] + df['lastthreehour_return_now']) / \
+                                     (df['lasttwohour_return'] + df['lastthreehour_return'] + 1)
+    df['return_retention'] = df['return_retention_initial'].apply(lambda x: 0.5 if x == 0 else x)
+
+    df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
+
+    df['score'] = df['K2'] * df['share_rate'] * df['back_rate'] * df['log_back'] * df['return_retention']
+
+    df = df.sort_values(by=['score'], ascending=False)
+    return df
+
+
+def cal_score_update_backrate(df, param):
+    # score计算公式: k2 * sharerate * (backrate + backrate * backrate_2 * backrate_3) * LOG(lastonehour_return+1)
+    # sharerate = lastonehour_share/(lastonehour_play+1000)
+    # backrate = lastonehour_return/(lastonehour_share+10)
+    # backrate2 = lasttwohour_return_now/(lasttwohour_share+10)
+    # backrate3 = lastthreehour_return_now/(lastthreehour_share+10)
+    # ctr = lastonehour_play/(lastonehour_preview+1000), 对ctr限最大值:K2 = 0.6 if ctr > 0.6 else ctr
+    # backrate1_3_initial = backrate * backrate_2 * backrate_3
+    # backrate1_3 = 0.02 if backrate1_3_initial == 0 else backrate1_3_initial
+    # score = k2 * sharerate * (backrate + backrate1_3) * LOG(lastonehour_return+1)
+
+    df = df.fillna(0)
+    df['share_rate'] = df['lastonehour_share'] / (df['lastonehour_play'] + 1000)
+    df['back_rate'] = df['lastonehour_return'] / (df['lastonehour_share'] + 10)
+    df['back_rate2'] = df['lasttwohour_return_now'] / (df['lasttwohour_share'] + 10)
+    df['back_rate3'] = df['lastthreehour_return_now'] / (df['lastthreehour_share'] + 10)
+    df['log_back'] = (df['lastonehour_return'] + 1).apply(math.log)
+
+    if param.get('view_type', None) == 'video-show':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show'] + 1000)
+    elif param.get('view_type', None) == 'video-show-region':
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_show_region'] + 1000)
+    else:
+        df['ctr'] = df['lastonehour_play'] / (df['lastonehour_preview'] + 1000)
+    df['K2'] = df['ctr'].apply(lambda x: 0.6 if x > 0.6 else x)
+
+    df['backrate1_3_initial'] = df['back_rate'] * df['back_rate2'] * df['back_rate3']
+    df['backrate1_3'] = df['backrate1_3_initial'].apply(lambda x: 0.02 if x == 0 else x)
+    df['platform_return_rate'] = df['platform_return'] / df['lastonehour_return']
+
+    df['score'] = df['K2'] * df['share_rate'] * (df['back_rate'] + df['backrate1_3']) * df['log_back']
+
+    df = df.sort_values(by=['score'], ascending=False)
+    return df
+
+
 def cal_score(df, param):
     if param.get('score_func', None) == 'add_backrate*log(return+1)':
-        pass
+        df = cal_score_add_return(df=df, param=param)
     elif param.get('score_func', None) == 'multiply_return_retention':
-        pass
+        df = cal_score_multiply_return_retention(df=df, param=param)
     elif param.get('score_func', None) == 'update_backrate':
-        pass
+        df = cal_score_update_backrate(df=df, param=param)
     else:
         df = cal_score_initial(df=df, param=param)
     return df