import time import pandas as pd import xgboost as xgb from xgboost.sklearn import XGBClassifier from utils import RedisHelper from config import set_config redis_helper = RedisHelper() config_, _ = set_config() def predict(app_type): # 1. 模型加载 model = XGBClassifier() booster = xgb.Booster() booster.load_model('./data/ad_xgb.model') model._Booster = booster # 2. 预测:ad_status = 0, 不出广告 df_0 = pd.read_csv('./data/predict_data/predict_data_0.csv') columns_0 = df_0.columns.values.tolist() columns_0.remove('videoid') y_pred_proba_0 = model.predict_proba(df_0[columns_0[2:]]) df_0['y_0'] = [x[1] for x in y_pred_proba_0] pre_df_0 = df_0[['apptype', 'mid', 'videoid', 'y_0']].copy() # 3. 预测:ad_status = 1, 不出广告 df_1 = pd.read_csv('./data/predict_data/predict_data_1.csv') columns_1 = df_1.columns.values.tolist() columns_1.remove('videoid') y_pred_proba_1 = model.predict_proba(df_1[columns_1[2:]]) df_1['y_1'] = [x[1] for x in y_pred_proba_1] pre_df_1 = df_1[['apptype', 'mid', 'videoid', 'y_1']].copy() # 4. merge 结果 res_df = pd.merge(pre_df_0, pre_df_1, how='left', on=['apptype', 'mid', 'videoid']) res_df['res_predict'] = res_df['y_0'] - res_df['y_1'] print(res_df.head()) # 5. to csv # res_df.to_csv('./data/predict_data/predict_res.csv', index=False) # print("to csv finished!") f = open('./data/predict_data/predict_res.txt', "w") xgb_config = config_.AD_MODEL_ABTEST_CONFIG['xgb'] # 6. to redis for ind, row in res_df.iterrows(): app_type = row['apptype'] mid = row['mid'] video_id = row['videoid'] pre_res = row['res_predict'] key = f"{xgb_config['predict_key_prefix']}{app_type}:{mid}:{video_id}" # redis_helper.set_data_to_redis(key_name=key, value=pre_res, expire_time=48*3600) f.write(f"{key},{pre_res}\n") print("to redis finished!") # 7. 计算阈值 # 获取对应实验id abtest_id_mapping = xgb_config['abtest_id_mapping'] abtest_id = abtest_id_mapping[app_type] # 获取阈值参数记录 threshold_record = redis_helper.get_data_from_redis(key_name=xgb_config['threshold_record']) threshold_record = eval(threshold_record) record = threshold_record[abtest_id] # 分实验组进行阈值计算 predict_mean = res_df['res_predict'].mean() for ab_code, param in record.items(): threshold = predict_mean * param # 写入redis threshold_key = f"{xgb_config['threshold']}{abtest_id}:{ab_code}" redis_helper.set_data_to_redis(key_name=threshold_key, value=threshold, expire_time=48 * 3600) print("update threshold finished!") if __name__ == '__main__': st_time = time.time() predict(config_.APP_TYPE['VLOG']) print(f"{time.time() - st_time}s")