|
@@ -2,46 +2,71 @@ import pandas as pd
|
|
|
import xgboost as xgb
|
|
|
from xgboost.sklearn import XGBClassifier
|
|
|
from utils import RedisHelper
|
|
|
+from config import set_config
|
|
|
redis_helper = RedisHelper()
|
|
|
+config_, _ = set_config()
|
|
|
|
|
|
|
|
|
-# 1. 模型加载
|
|
|
-model = XGBClassifier()
|
|
|
-booster = xgb.Booster()
|
|
|
-booster.load_model('./data/ad_xgb.model')
|
|
|
-model._Booster = booster
|
|
|
-# 2. 预测:ad_status = 0, 不出广告
|
|
|
-df_0 = pd.read_csv('./data/predict_data/predict_data_0.csv')
|
|
|
-columns_0 = df_0.columns.values.tolist()
|
|
|
-columns_0.remove('videoid')
|
|
|
-y_pred_proba_0 = model.predict_proba(df_0[columns_0[2:]])
|
|
|
-df_0['y_0'] = [x[1] for x in y_pred_proba_0]
|
|
|
-pre_df_0 = df_0[['apptype', 'mid', 'videoid', 'y_0']].copy()
|
|
|
-
|
|
|
-# 3. 预测:ad_status = 1, 不出广告
|
|
|
-df_1 = pd.read_csv('./data/predict_data/predict_data_1.csv')
|
|
|
-columns_1 = df_1.columns.values.tolist()
|
|
|
-columns_1.remove('videoid')
|
|
|
-y_pred_proba_1 = model.predict_proba(df_1[columns_1[2:]])
|
|
|
-df_1['y_1'] = [x[1] for x in y_pred_proba_1]
|
|
|
-pre_df_1 = df_1[['apptype', 'mid', 'videoid', 'y_1']].copy()
|
|
|
-
|
|
|
-# 4. merge 结果
|
|
|
-res_df = pd.merge(pre_df_0, pre_df_1, how='left', on=['apptype', 'mid', 'videoid'])
|
|
|
-res_df['res_predict'] = res_df['y_0'] - res_df['y_1']
|
|
|
-print(res_df.head())
|
|
|
-
|
|
|
-# 5. to csv
|
|
|
-res_df.to_csv('./data/predict_data/predict_res.csv', index=False)
|
|
|
-print("to csv finished!")
|
|
|
-
|
|
|
-# 6. to redis
|
|
|
-for ind, row in res_df.iterrows():
|
|
|
- app_type = row['apptype']
|
|
|
- mid = row['mid']
|
|
|
- video_id = row['videoid']
|
|
|
- pre_res = row['res_predict']
|
|
|
- key = f"ad:xgb:predict:{app_type}:{mid}:{video_id}"
|
|
|
- redis_helper.set_data_to_redis(key_name=key, value=pre_res, expire_time=48*3600)
|
|
|
-print("to redis finished!")
|
|
|
+def predict(app_type):
|
|
|
+ # 1. 模型加载
|
|
|
+ model = XGBClassifier()
|
|
|
+ booster = xgb.Booster()
|
|
|
+ booster.load_model('./data/ad_xgb.model')
|
|
|
+ model._Booster = booster
|
|
|
|
|
|
+ # 2. 预测:ad_status = 0, 不出广告
|
|
|
+ df_0 = pd.read_csv('./data/predict_data/predict_data_0.csv')
|
|
|
+ columns_0 = df_0.columns.values.tolist()
|
|
|
+ columns_0.remove('videoid')
|
|
|
+ y_pred_proba_0 = model.predict_proba(df_0[columns_0[2:]])
|
|
|
+ df_0['y_0'] = [x[1] for x in y_pred_proba_0]
|
|
|
+ pre_df_0 = df_0[['apptype', 'mid', 'videoid', 'y_0']].copy()
|
|
|
+
|
|
|
+ # 3. 预测:ad_status = 1, 不出广告
|
|
|
+ df_1 = pd.read_csv('./data/predict_data/predict_data_1.csv')
|
|
|
+ columns_1 = df_1.columns.values.tolist()
|
|
|
+ columns_1.remove('videoid')
|
|
|
+ y_pred_proba_1 = model.predict_proba(df_1[columns_1[2:]])
|
|
|
+ df_1['y_1'] = [x[1] for x in y_pred_proba_1]
|
|
|
+ pre_df_1 = df_1[['apptype', 'mid', 'videoid', 'y_1']].copy()
|
|
|
+
|
|
|
+ # 4. merge 结果
|
|
|
+ res_df = pd.merge(pre_df_0, pre_df_1, how='left', on=['apptype', 'mid', 'videoid'])
|
|
|
+ res_df['res_predict'] = res_df['y_0'] - res_df['y_1']
|
|
|
+ print(res_df.head())
|
|
|
+
|
|
|
+ # 5. to csv
|
|
|
+ res_df.to_csv('./data/predict_data/predict_res.csv', index=False)
|
|
|
+ print("to csv finished!")
|
|
|
+
|
|
|
+ xgb_config = config_.AD_MODEL_ABTEST_CONFIG['xgb']
|
|
|
+ # 6. to redis
|
|
|
+ for ind, row in res_df.iterrows():
|
|
|
+ app_type = row['apptype']
|
|
|
+ mid = row['mid']
|
|
|
+ video_id = row['videoid']
|
|
|
+ pre_res = row['res_predict']
|
|
|
+ key = f"{xgb_config['predict_key_prefix']}{app_type}:{mid}:{video_id}"
|
|
|
+ redis_helper.set_data_to_redis(key_name=key, value=pre_res, expire_time=48*3600)
|
|
|
+ print("to redis finished!")
|
|
|
+
|
|
|
+ # 7. 计算阈值
|
|
|
+ # 获取对应实验id
|
|
|
+ abtest_id_mapping = xgb_config['abtest_id_mapping']
|
|
|
+ abtest_id = abtest_id_mapping[app_type]
|
|
|
+ # 获取阈值参数记录
|
|
|
+ threshold_record = redis_helper.get_data_from_redis(key_name=xgb_config['threshold_record'])
|
|
|
+ threshold_record = eval(threshold_record)
|
|
|
+ record = threshold_record[abtest_id]
|
|
|
+ # 分实验组进行阈值计算
|
|
|
+ predict_mean = res_df['res_predict'].mean()
|
|
|
+ for ab_code, param in record.items():
|
|
|
+ threshold = predict_mean * param
|
|
|
+ # 写入redis
|
|
|
+ threshold_key = f"{xgb_config['threshold']}{abtest_id}{ab_code}"
|
|
|
+ redis_helper.set_data_to_redis(key_name=threshold_key, value=threshold, expire_time=48 * 3600)
|
|
|
+ print("update threshold finished!")
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ predict(config_.APP_TYPE['VLOG'])
|