ad_xgboost_predict.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. import pandas as pd
  2. import xgboost as xgb
  3. from xgboost.sklearn import XGBClassifier
  4. from utils import RedisHelper
  5. redis_helper = RedisHelper()
  6. # 1. 模型加载
  7. model = XGBClassifier()
  8. booster = xgb.Booster()
  9. booster.load_model('./data/ad_xgb.model')
  10. model._Booster = booster
  11. # 2. 预测:ad_status = 0, 不出广告
  12. df_0 = pd.read_csv('./data/predict_data/predict_data_0.csv')
  13. columns_0 = df_0.columns.values.tolist()
  14. columns_0.remove('videoid')
  15. y_pred_proba_0 = model.predict_proba(df_0[columns_0[2:]])
  16. df_0['y_0'] = [x[1] for x in y_pred_proba_0]
  17. pre_df_0 = df_0[['apptype', 'mid', 'videoid', 'y_0']].copy()
  18. # 3. 预测:ad_status = 1, 不出广告
  19. df_1 = pd.read_csv('./data/predict_data/predict_data_1.csv')
  20. columns_1 = df_1.columns.values.tolist()
  21. columns_1.remove('videoid')
  22. y_pred_proba_1 = model.predict_proba(df_1[columns_1[2:]])
  23. df_1['y_1'] = [x[1] for x in y_pred_proba_1]
  24. pre_df_1 = df_1[['apptype', 'mid', 'videoid', 'y_1']].copy()
  25. # 4. merge 结果
  26. res_df = pd.merge(pre_df_0, pre_df_1, how='left', on=['apptype', 'mid', 'videoid'])
  27. res_df['res_predict'] = res_df['y_0'] - res_df['y_1']
  28. print(res_df.head())
  29. # 5. to csv
  30. res_df.to_csv('./data/predict_data/predict_res.csv', index=False)
  31. # 6. to redis
  32. for ind, row in res_df.iterrows():
  33. app_type = row['apptype']
  34. mid = row['mid']
  35. video_id = row['videoid']
  36. pre_res = row['res_predict']
  37. key = f"ad:xgb:predict:{app_type}:{mid}:{video_id}"
  38. redis_helper.set_data_to_redis(key_name=key, value=pre_res, expire_time=48*3600)