liqian 1 year ago
parent
commit
a42bd26ecd
2 changed files with 81 additions and 42 deletions
  1. 64 39
      ad_xgboost_predict.py
  2. 17 3
      config.py

+ 64 - 39
ad_xgboost_predict.py

@@ -2,46 +2,71 @@ import pandas as pd
 import xgboost as xgb
 from xgboost.sklearn import XGBClassifier
 from utils import RedisHelper
+from config import set_config
 redis_helper = RedisHelper()
+config_, _ = set_config()
 
 
-# 1. 模型加载
-model = XGBClassifier()
-booster = xgb.Booster()
-booster.load_model('./data/ad_xgb.model')
-model._Booster = booster
-# 2. 预测:ad_status = 0, 不出广告
-df_0 = pd.read_csv('./data/predict_data/predict_data_0.csv')
-columns_0 = df_0.columns.values.tolist()
-columns_0.remove('videoid')
-y_pred_proba_0 = model.predict_proba(df_0[columns_0[2:]])
-df_0['y_0'] = [x[1] for x in y_pred_proba_0]
-pre_df_0 = df_0[['apptype', 'mid', 'videoid', 'y_0']].copy()
-
-# 3. 预测:ad_status = 1, 不出广告
-df_1 = pd.read_csv('./data/predict_data/predict_data_1.csv')
-columns_1 = df_1.columns.values.tolist()
-columns_1.remove('videoid')
-y_pred_proba_1 = model.predict_proba(df_1[columns_1[2:]])
-df_1['y_1'] = [x[1] for x in y_pred_proba_1]
-pre_df_1 = df_1[['apptype', 'mid', 'videoid', 'y_1']].copy()
-
-# 4. merge 结果
-res_df = pd.merge(pre_df_0, pre_df_1, how='left', on=['apptype', 'mid', 'videoid'])
-res_df['res_predict'] = res_df['y_0'] - res_df['y_1']
-print(res_df.head())
-
-# 5. to csv
-res_df.to_csv('./data/predict_data/predict_res.csv', index=False)
-print("to csv finished!")
-
-# 6. to redis
-for ind, row in res_df.iterrows():
-    app_type = row['apptype']
-    mid = row['mid']
-    video_id = row['videoid']
-    pre_res = row['res_predict']
-    key = f"ad:xgb:predict:{app_type}:{mid}:{video_id}"
-    redis_helper.set_data_to_redis(key_name=key, value=pre_res, expire_time=48*3600)
-print("to redis finished!")
+def predict(app_type):
+    # 1. 模型加载
+    model = XGBClassifier()
+    booster = xgb.Booster()
+    booster.load_model('./data/ad_xgb.model')
+    model._Booster = booster
 
+    # 2. 预测:ad_status = 0, 不出广告
+    df_0 = pd.read_csv('./data/predict_data/predict_data_0.csv')
+    columns_0 = df_0.columns.values.tolist()
+    columns_0.remove('videoid')
+    y_pred_proba_0 = model.predict_proba(df_0[columns_0[2:]])
+    df_0['y_0'] = [x[1] for x in y_pred_proba_0]
+    pre_df_0 = df_0[['apptype', 'mid', 'videoid', 'y_0']].copy()
+
+    # 3. 预测:ad_status = 1, 不出广告
+    df_1 = pd.read_csv('./data/predict_data/predict_data_1.csv')
+    columns_1 = df_1.columns.values.tolist()
+    columns_1.remove('videoid')
+    y_pred_proba_1 = model.predict_proba(df_1[columns_1[2:]])
+    df_1['y_1'] = [x[1] for x in y_pred_proba_1]
+    pre_df_1 = df_1[['apptype', 'mid', 'videoid', 'y_1']].copy()
+
+    # 4. merge 结果
+    res_df = pd.merge(pre_df_0, pre_df_1, how='left', on=['apptype', 'mid', 'videoid'])
+    res_df['res_predict'] = res_df['y_0'] - res_df['y_1']
+    print(res_df.head())
+
+    # 5. to csv
+    res_df.to_csv('./data/predict_data/predict_res.csv', index=False)
+    print("to csv finished!")
+
+    xgb_config = config_.AD_MODEL_ABTEST_CONFIG['xgb']
+    # 6. to redis
+    for ind, row in res_df.iterrows():
+        app_type = row['apptype']
+        mid = row['mid']
+        video_id = row['videoid']
+        pre_res = row['res_predict']
+        key = f"{xgb_config['predict_key_prefix']}{app_type}:{mid}:{video_id}"
+        redis_helper.set_data_to_redis(key_name=key, value=pre_res, expire_time=48*3600)
+    print("to redis finished!")
+
+    # 7. 计算阈值
+    # 获取对应实验id
+    abtest_id_mapping = xgb_config['abtest_id_mapping']
+    abtest_id = abtest_id_mapping[app_type]
+    # 获取阈值参数记录
+    threshold_record = redis_helper.get_data_from_redis(key_name=xgb_config['threshold_record'])
+    threshold_record = eval(threshold_record)
+    record = threshold_record[abtest_id]
+    # 分实验组进行阈值计算
+    predict_mean = res_df['res_predict'].mean()
+    for ab_code, param in record.items():
+        threshold = predict_mean * param
+        # 写入redis
+        threshold_key = f"{xgb_config['threshold']}{abtest_id}{ab_code}"
+        redis_helper.set_data_to_redis(key_name=threshold_key, value=threshold, expire_time=48 * 3600)
+    print("update threshold finished!")
+
+
+if __name__ == '__main__':
+    predict(config_.APP_TYPE['VLOG'])

+ 17 - 3
config.py

@@ -1018,7 +1018,7 @@ class BaseConfig(object):
         ]
     }
 
-    # 广告模型abtest配置
+    # 广告模型abtest配置 - 公式
     AD_ABTEST_CONFIG = {
         # 票圈vlog
         '173-a': {'video': {'data': 'data1'},
@@ -1209,7 +1209,7 @@ class BaseConfig(object):
                   },  # else非关怀模式人群多出广告 + 所有广告类型数据
     }
 
-    # 广告模型阈值计算配置
+    # 广告模型阈值计算配置 - 公式
     AD_ABTEST_THRESHOLD_CONFIG = {
         # vlog
         '173': {
@@ -1330,7 +1330,7 @@ class BaseConfig(object):
         },
     }
 
-    # 广告模型自动调整阈值配置
+    # 广告模型自动调整阈值配置 - 公式
     AD_ABTEST_ABCODE_CONFIG = {
         # 票圈vlog
         APP_TYPE['VLOG']: {
@@ -1970,6 +1970,20 @@ class BaseConfig(object):
     # 广告推荐自动调整阈值参数记录存放 redis key,完整格式:ad:threshold:param:record
     KEY_NAME_PREFIX_AD_THRESHOLD_PARAM_RECORD = 'ad:threshold:param:record'
 
+    # 广告模型abtest配置 - 模型
+    AD_MODEL_ABTEST_CONFIG = {
+        'xgb': {
+            # 预测结果存放 redis key 前缀,完整格式:ad:xgb:predict:{app_type}:{mid}:{video_id}
+            'predict_key_prefix': 'ad:xgb:predict:',
+            # 阈值计算记录存放 redis key
+            'threshold_record': 'ad:xgb:threshold:record',
+            # 阈值结果存放 redis key 前缀,完整格式:ad:xgb:predict:{abtestId}:{abtestGroup}
+            'threshold': 'ad:xgb:threshold:',
+            # 实验ID列表
+            'abtest_id_mapping': {APP_TYPE['VLOG']: '173'}
+        }
+    }
+
 
 class DevelopmentConfig(BaseConfig):
     """开发环境配置"""