Jelajahi Sumber

add ad abtest

liqian 2 tahun lalu
induk
melakukan
0445f6515b
5 mengubah file dengan 126 tambahan dan 71 penghapusan
  1. 35 21
      ad_user_video_predict.py
  2. 17 7
      ad_users_data_update.py
  3. 17 7
      ad_video_data_update.py
  4. 57 35
      config.py
  5. 0 1
      user_group_update.py

+ 35 - 21
ad_user_video_predict.py

@@ -61,20 +61,21 @@ def predict_video_share_rate(dt, app_type):
     return video_df
 
 
-def predict_ad_group_video():
-    now_date = datetime.datetime.today()
-    dt = datetime.datetime.strftime(now_date, '%Y%m%d')
-    log_.info(f"dt = {dt}")
+def predict_ad_group_video(dt, config_key, config_param):
+    log_.info(f"config_key = {config_key} update start ...")
     # 获取用户组预测值
-    group_key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{dt}"
+    user_data_key = config_param['data'].get('user')
+    group_key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{user_data_key}:{dt}"
     group_data = redis_helper.get_all_data_from_zset(key_name=group_key_name, with_scores=True)
     if group_data is None:
         log_.info(f"group data is None!")
     group_df = pd.DataFrame(data=group_data, columns=['group', 'group_ad_share_rate'])
     group_df = group_df[group_df['group'] != 'mean_group']
     log_.info(f"group_df count = {len(group_df)}")
+
     # 获取视频预测值
-    video_key_name = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{dt}"
+    video_data_key = config_param['data'].get('video')
+    video_key_name = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{video_data_key}:{dt}"
     video_data = redis_helper.get_all_data_from_zset(key_name=video_key_name, with_scores=True)
     if video_data is None:
         log_.info(f"video data is None!")
@@ -89,22 +90,35 @@ def predict_ad_group_video():
         all_group_data.extend(predict_df[item['group']].tolist())
 
     # 计算对应的阈值
-    for app_type in config_.AD_APP_TYPE_LIST:
-        ad_threshold_mapping = config_.AD_THRESHOLD_MAPPING.get(app_type)
-        threshold_data = {}
-        for _, item in group_df.iterrows():
-            # 获取分组对应的均值作为阈值
-            threshold_data[item['group']] = predict_df[item['group']].mean() * ad_threshold_mapping['group']
-        threshold_data['mean_group'] = np.mean(all_group_data) * ad_threshold_mapping['mean_group']
-        log_.info(f"app_type = {app_type}, threshold_data = {threshold_data}")
-        # 将阈值写入redis
-        for key, val in threshold_data.items():
-            key_name = f"{config_.KEY_NAME_PREFIX_AD_THRESHOLD}{app_type}:{key}"
-            redis_helper.set_data_to_redis(key_name=key_name, value=val, expire_time=2 * 24 * 3600)
-
-    predict_df.to_csv('./data/ad_user_video_predict.csv')
+    ad_threshold_mapping = config_param.get('threshold')
+    threshold_data = {}
+    for _, item in group_df.iterrows():
+        # 获取分组对应的均值作为阈值
+        threshold_data[item['group']] = predict_df[item['group']].mean() * ad_threshold_mapping['group']
+    threshold_data['mean_group'] = np.mean(all_group_data) * ad_threshold_mapping['mean_group']
+    log_.info(f"config_key = {config_key}, threshold_data = {threshold_data}")
+
+    # 将阈值写入redis
+    abtest_config_list = config_key.split('-')
+    abtest_id, abtest_config_tag = abtest_config_list[0], abtest_config_list[1]
+    for key, val in threshold_data.items():
+        key_name = f"{config_.KEY_NAME_PREFIX_AD_THRESHOLD}{abtest_id}:{abtest_config_tag}:{key}"
+        redis_helper.set_data_to_redis(key_name=key_name, value=val, expire_time=2 * 24 * 3600)
+
+    predict_df.to_csv(f'./data/ad_user_video_predict_{config_key}.csv')
+    log_.info(f"config_key = {config_key} update end!")
+
+
+def predict():
+    now_date = datetime.datetime.today()
+    dt = datetime.datetime.strftime(now_date, '%Y%m%d')
+    log_.info(f"dt = {dt}")
+    params = config_.AD_ABTEST_CONFIG
+    for config_key, config_param in params.items():
+        predict_ad_group_video(dt=dt, config_key=config_key, config_param=config_param)
 
 
 if __name__ == '__main__':
-    predict_ad_group_video()
+    # predict_ad_group_video()
+    predict()
 

+ 17 - 7
ad_users_data_update.py

@@ -16,12 +16,12 @@ features = [
 ]
 
 
-def predict_user_group_share_rate(project, table, dt, app_type):
+def predict_user_group_share_rate(user_group_initial_df, dt, data_key, data_param):
     """预估用户组对应的有广告时分享率"""
-    # 获取用户组特征
-    user_group_df = get_feature_data(project=project, table=table, features=features, dt=dt)
+    # 获取对应的用户组特征
+    user_group_df = user_group_initial_df.copy()
     user_group_df['apptype'] = user_group_df['apptype'].astype(int)
-    user_group_df = user_group_df[user_group_df['apptype'] == app_type]
+    user_group_df = user_group_df[user_group_df['apptype'] == data_param]
     user_group_df['sharerate_all'].fillna(0, inplace=True)
     user_group_df['sharerate_ad'].fillna(0, inplace=True)
     user_group_df['sharerate_all'] = user_group_df['sharerate_all'].astype(float)
@@ -34,7 +34,7 @@ def predict_user_group_share_rate(project, table, dt, app_type):
         user_group_df['sharerate_ad'] * float(ad_all_group_share_rate) / user_group_df['sharerate_all']
     user_group_df['group_ad_share_rate'].fillna(0, inplace=True)
     # 结果写入redis
-    key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{dt}"
+    key_name = f"{config_.KEY_NAME_PREFIX_AD_GROUP}{data_key}:{dt}"
     redis_data = {}
     for index, item in user_group_df.iterrows():
         redis_data[item['group']] = item['group_ad_share_rate']
@@ -46,9 +46,19 @@ def predict_user_group_share_rate(project, table, dt, app_type):
     return user_group_df
 
 
+def update_users_data(project, table, dt, update_params):
+    """预估用户组有广告时分享率"""
+    # 获取用户组特征
+    user_group_initial_df = get_feature_data(project=project, table=table, features=features, dt=dt)
+    for data_key, data_param in update_params.items():
+        log_.info(f"data_key = {data_key} update start...")
+        predict_user_group_share_rate(user_group_initial_df=user_group_initial_df, dt=dt, data_key=data_key, data_param=data_param)
+        log_.info(f"data_key = {data_key} update end!")
+
+
 def timer_check():
     try:
-        app_type = config_.APP_TYPE['VLOG']
+        update_params = config_.AD_USER_DATA_PARAMS
         project = config_.ad_model_data['users_share_rate'].get('project')
         table = config_.ad_model_data['users_share_rate'].get('table')
         now_date = datetime.datetime.today()
@@ -60,7 +70,7 @@ def timer_check():
         if data_count > 0:
             log_.info(f"ad user group data count = {data_count}")
             # 数据准备好,进行更新
-            predict_user_group_share_rate(project=project, table=table, dt=dt, app_type=app_type)
+            update_users_data(project=project, table=table, dt=dt, update_params=update_params)
             log_.info(f"ad user group data update end!")
         elif now_min > 45:
             log_.info('ad user group data is None!')

+ 17 - 7
ad_video_data_update.py

@@ -16,12 +16,12 @@ features = [
 ]
 
 
-def predict_video_share_rate(project, table, dt, app_type):
+def predict_video_share_rate(video_initial_df, dt, data_key, data_param):
     """预估视频有广告时分享率"""
-    # 获取视频特征
-    video_df = get_feature_data(project=project, table=table, features=features, dt=dt)
+    # 获取对应的视频特征
+    video_df = video_initial_df.copy()
     video_df['apptype'] = video_df['apptype'].astype(int)
-    video_df = video_df[video_df['apptype'] == app_type]
+    video_df = video_df[video_df['apptype'] == int(data_param)]
     video_df['sharerate_all'].fillna(0, inplace=True)
     video_df['sharerate_ad'].fillna(0, inplace=True)
     video_df['sharerate_all'] = video_df['sharerate_all'].astype(float)
@@ -35,7 +35,7 @@ def predict_video_share_rate(project, table, dt, app_type):
     video_df['video_ad_share_rate'].fillna(0, inplace=True)
     video_df = video_df[video_df['video_ad_share_rate'] != 0]
     # 结果写入redis
-    key_name = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{dt}"
+    key_name = f"{config_.KEY_NAME_PREFIX_AD_VIDEO}{data_key}:{dt}"
     redis_data = {}
     for index, item in video_df.iterrows():
         redis_data[int(item['videoid'])] = item['video_ad_share_rate']
@@ -47,9 +47,19 @@ def predict_video_share_rate(project, table, dt, app_type):
     return video_df
 
 
+def update_videos_data(project, table, dt, update_params):
+    """预估视频有广告时分享率"""
+    # 获取视频特征
+    video_initial_df = get_feature_data(project=project, table=table, features=features, dt=dt)
+    for data_key, data_param in update_params.items():
+        log_.info(f"data_key = {data_key} update start...")
+        predict_video_share_rate(video_initial_df=video_initial_df, dt=dt, data_key=data_key, data_param=data_param)
+        log_.info(f"data_key = {data_key} update end!")
+
+
 def timer_check():
     try:
-        app_type = config_.APP_TYPE['VLOG']
+        update_params = config_.AD_VIDEO_DATA_PARAMS
         project = config_.ad_model_data['videos_share_rate'].get('project')
         table = config_.ad_model_data['videos_share_rate'].get('table')
         now_date = datetime.datetime.today()
@@ -61,7 +71,7 @@ def timer_check():
         if data_count > 0:
             log_.info(f"ad video data count = {data_count}")
             # 数据准备好,进行更新
-            predict_video_share_rate(project=project, table=table, dt=dt, app_type=app_type)
+            update_videos_data(project=project, table=table, dt=dt, update_params=update_params)
             log_.info(f"ad video data update end!")
         elif now_min > 45:
             log_.info('ad video data is None!')

+ 57 - 35
config.py

@@ -626,45 +626,67 @@ class BaseConfig(object):
         APP_TYPE['LAO_HAO_KAN_VIDEO'],  # 老好看视频
         APP_TYPE['ZUI_JING_QI'],  # 票圈最惊奇
     ]
-    # 广告模型阈值
-    AD_THRESHOLD_MAPPING = {
-        APP_TYPE['VLOG']: {
-            'group': 25 / 48,
-            'mean_group': 25 / 48,
-        },  # 票圈vlog
-        APP_TYPE['PIAO_QUAN_VIDEO_PLUS']: {
-            'group': 5 / 6,
-            'mean_group': 5 / 6,
-        },  # 票圈视频+
-        APP_TYPE['LOVE_LIVE']: {
-            'group': 25 / 48,
-            'mean_group': 25 / 48,
-        },  # 票圈视频
-        APP_TYPE['SHORT_VIDEO']: {
-            'group': 25 / 48,
-            'mean_group': 25 / 48,
-        },  # 票圈短视频
-        APP_TYPE['LONG_VIDEO']: {
-            'group': 25 / 48,
-            'mean_group': 25 / 48,
-        },  # 内容精选
-        APP_TYPE['LAO_HAO_KAN_VIDEO']: {
-            'group': 25 / 48,
-            'mean_group': 25 / 48,
-        },  # 老好看视频
-        APP_TYPE['ZUI_JING_QI']: {
-            'group': 25 / 48,
-            'mean_group': 25 / 48,
-        },  # 票圈最惊奇
-    }
-
-    # 用户组有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:users:group:predict:share:rate:{date}
+
+    # 广告模型视频数据
+    AD_VIDEO_DATA_PARAMS = {
+        'data1': APP_TYPE['VLOG'],  # vlog
+        'data2': APP_TYPE['LOVE_LIVE'],  # 票圈视频
+        'data3': APP_TYPE['LONG_VIDEO'],  # 内容精选
+        'data4': APP_TYPE['SHORT_VIDEO'],  # 票圈短视频
+        'data5': APP_TYPE['LAO_HAO_KAN_VIDEO'],  # 老好看视频
+        'data6': APP_TYPE['ZUI_JING_QI'],  # 票圈最惊奇
+    }
+    # 广告模型用户数据
+    AD_USER_DATA_PARAMS = {
+        'data1': APP_TYPE['VLOG'],  # vlog
+        'data2': APP_TYPE['LOVE_LIVE'],  # 票圈视频
+        'data3': APP_TYPE['LONG_VIDEO'],  # 内容精选
+        'data4': APP_TYPE['SHORT_VIDEO'],  # 票圈短视频
+        'data5': APP_TYPE['LAO_HAO_KAN_VIDEO'],  # 老好看视频
+        'data6': APP_TYPE['ZUI_JING_QI'],  # 票圈最惊奇
+    }
+    # 广告模型abtest配置
+    AD_ABTEST_CONFIG = {
+        # 票圈vlog
+        '173-a': {'data': {'video': 'data1', 'user': 'data1'},
+                  'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
+        # 票圈视频+
+        '190-a': {'data': {'video': 'data1', 'user': 'data1'},
+                  'threshold': {'group': 5 / 6, 'mean_group': 5 / 6}},
+        # 票圈视频
+        '194-a': {'data': {'video': 'data1', 'user': 'data1'},
+                  'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
+        '194-b': {'data': {'video': 'data2', 'user': 'data2'},
+                  'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
+        # 内容精选
+        '195-a': {'data': {'video': 'data1', 'user': 'data1'},
+                  'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
+        '195-b': {'data': {'video': 'data3', 'user': 'data3'},
+                  'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
+        # 票圈短视频
+        '196-a': {'data': {'video': 'data1', 'user': 'data1'},
+                  'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
+        '196-b': {'data': {'video': 'data4', 'user': 'data4'},
+                  'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
+        # 老好看视频
+        '197-a': {'data': {'video': 'data1', 'user': 'data1'},
+                  'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
+        '197-b': {'data': {'video': 'data5', 'user': 'data5'},
+                  'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
+        # 票圈最惊奇
+        '198-a': {'data': {'video': 'data1', 'user': 'data1'},
+                  'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
+        '198-b': {'data': {'video': 'data6', 'user': 'data6'},
+                  'threshold': {'group': 25 / 48, 'mean_group': 25 / 48}},
+    }
+
+    # 用户组有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:users:group:predict:share:rate:{user_data_key}:{date}
     KEY_NAME_PREFIX_AD_GROUP = 'ad:users:group:predict:share:rate:'
-    # 视频有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:video:predict:share:rate:{date}
+    # 视频有广告时的分享率预测结果存放 redis key 前缀,完整格式:ad:video:predict:share:rate:{video_data_key}:{date}
     KEY_NAME_PREFIX_AD_VIDEO = 'ad:video:predict:share:rate:'
     # 用户分组结果存放 redis key 前缀,完整格式:mid:group:{mid}
     KEY_NAME_PREFIX_MID_GROUP = 'mid:group:'
-    # 广告推荐阈值结果存放 redis key 前缀,完整格式:ad:threshold:{appType}:{group}
+    # 广告推荐阈值结果存放 redis key 前缀,完整格式:ad:threshold:{abtestId}:{abtestConfigTag}:{group}
     KEY_NAME_PREFIX_AD_THRESHOLD = 'ad:threshold:'
 
 

+ 0 - 1
user_group_update.py

@@ -61,7 +61,6 @@ def update_user_group_to_redis(project, table, dt, app_type_list):
 
 def timer_check():
     try:
-        # app_type = config_.APP_TYPE['VLOG']
         app_type_list = config_.AD_APP_TYPE_LIST
         project = config_.ad_model_data['user_group'].get('project')
         table = config_.ad_model_data['user_group'].get('table')