ad_xgboost_predict.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. import pandas as pd
  2. import xgboost as xgb
  3. from xgboost.sklearn import XGBClassifier
  4. from utils import RedisHelper
  5. from config import set_config
  6. redis_helper = RedisHelper()
  7. config_, _ = set_config()
  8. def predict(app_type):
  9. # 1. 模型加载
  10. model = XGBClassifier()
  11. booster = xgb.Booster()
  12. booster.load_model('./data/ad_xgb.model')
  13. model._Booster = booster
  14. # 2. 预测:ad_status = 0, 不出广告
  15. df_0 = pd.read_csv('./data/predict_data/predict_data_0.csv')
  16. columns_0 = df_0.columns.values.tolist()
  17. columns_0.remove('videoid')
  18. y_pred_proba_0 = model.predict_proba(df_0[columns_0[2:]])
  19. df_0['y_0'] = [x[1] for x in y_pred_proba_0]
  20. pre_df_0 = df_0[['apptype', 'mid', 'videoid', 'y_0']].copy()
  21. # 3. 预测:ad_status = 1, 不出广告
  22. df_1 = pd.read_csv('./data/predict_data/predict_data_1.csv')
  23. columns_1 = df_1.columns.values.tolist()
  24. columns_1.remove('videoid')
  25. y_pred_proba_1 = model.predict_proba(df_1[columns_1[2:]])
  26. df_1['y_1'] = [x[1] for x in y_pred_proba_1]
  27. pre_df_1 = df_1[['apptype', 'mid', 'videoid', 'y_1']].copy()
  28. # 4. merge 结果
  29. res_df = pd.merge(pre_df_0, pre_df_1, how='left', on=['apptype', 'mid', 'videoid'])
  30. res_df['res_predict'] = res_df['y_0'] - res_df['y_1']
  31. print(res_df.head())
  32. # 5. to csv
  33. res_df.to_csv('./data/predict_data/predict_res.csv', index=False)
  34. print("to csv finished!")
  35. xgb_config = config_.AD_MODEL_ABTEST_CONFIG['xgb']
  36. # 6. to redis
  37. for ind, row in res_df.iterrows():
  38. app_type = row['apptype']
  39. mid = row['mid']
  40. video_id = row['videoid']
  41. pre_res = row['res_predict']
  42. key = f"{xgb_config['predict_key_prefix']}{app_type}:{mid}:{video_id}"
  43. redis_helper.set_data_to_redis(key_name=key, value=pre_res, expire_time=48*3600)
  44. print("to redis finished!")
  45. # 7. 计算阈值
  46. # 获取对应实验id
  47. abtest_id_mapping = xgb_config['abtest_id_mapping']
  48. abtest_id = abtest_id_mapping[app_type]
  49. # 获取阈值参数记录
  50. threshold_record = redis_helper.get_data_from_redis(key_name=xgb_config['threshold_record'])
  51. threshold_record = eval(threshold_record)
  52. record = threshold_record[abtest_id]
  53. # 分实验组进行阈值计算
  54. predict_mean = res_df['res_predict'].mean()
  55. for ab_code, param in record.items():
  56. threshold = predict_mean * param
  57. # 写入redis
  58. threshold_key = f"{xgb_config['threshold']}{abtest_id}{ab_code}"
  59. redis_helper.set_data_to_redis(key_name=threshold_key, value=threshold, expire_time=48 * 3600)
  60. print("update threshold finished!")
  61. if __name__ == '__main__':
  62. predict(config_.APP_TYPE['VLOG'])