import glob import os.path import numpy as np import pandas as pd import xgboost as xgb from model.XGBModel import XGBModel features = [ "cpa", "b2_3h_ctr", "b2_3h_ctcvr", "b2_3h_cvr", "b2_3h_conver", "b2_3h_ecpm", "b2_3h_click", "b2_3h_conver*log(view)", "b2_3h_conver*ctcvr", "b2_6h_ctr", "b2_6h_ctcvr", "b2_6h_cvr", "b2_6h_conver", "b2_6h_ecpm", "b2_6h_click", "b2_6h_conver*log(view)", "b2_6h_conver*ctcvr", "b2_12h_ctr", "b2_12h_ctcvr", "b2_12h_cvr", "b2_12h_conver", "b2_12h_ecpm", "b2_12h_click", "b2_12h_conver*log(view)", "b2_12h_conver*ctcvr", "b2_1d_ctr", "b2_1d_ctcvr", "b2_1d_cvr", "b2_1d_conver", "b2_1d_ecpm", "b2_1d_click", "b2_1d_conver*log(view)", "b2_1d_conver*ctcvr", "b2_3d_ctr", "b2_3d_ctcvr", "b2_3d_cvr", "b2_3d_conver", "b2_3d_ecpm", "b2_3d_click", "b2_3d_conver*log(view)", "b2_3d_conver*ctcvr", "b2_7d_ctr", "b2_7d_ctcvr", "b2_7d_cvr", "b2_7d_conver", "b2_7d_ecpm", "b2_7d_click", "b2_7d_conver*log(view)", "b2_7d_conver*ctcvr", "b3_3h_ctr", "b3_3h_ctcvr", "b3_3h_cvr", "b3_3h_conver", "b3_3h_ecpm", "b3_3h_click", "b3_3h_conver*log(view)", "b3_3h_conver*ctcvr", "b3_6h_ctr", "b3_6h_ctcvr", "b3_6h_cvr", "b3_6h_conver", "b3_6h_ecpm", "b3_6h_click", "b3_6h_conver*log(view)", "b3_6h_conver*ctcvr", "b3_12h_ctr", "b3_12h_ctcvr", "b3_12h_cvr", "b3_12h_conver", "b3_12h_ecpm", "b3_12h_click", "b3_12h_conver*log(view)", "b3_12h_conver*ctcvr", "b3_1d_ctr", "b3_1d_ctcvr", "b3_1d_cvr", "b3_1d_conver", "b3_1d_ecpm", "b3_1d_click", "b3_1d_conver*log(view)", "b3_1d_conver*ctcvr", "b3_3d_ctr", "b3_3d_ctcvr", "b3_3d_cvr", "b3_3d_conver", "b3_3d_ecpm", "b3_3d_click", "b3_3d_conver*log(view)", "b3_3d_conver*ctcvr", "b3_7d_ctr", "b3_7d_ctcvr", "b3_7d_cvr", "b3_7d_conver", "b3_7d_ecpm", "b3_7d_click", "b3_7d_conver*log(view)", "b3_7d_conver*ctcvr", "b4_3h_ctr", "b4_3h_ctcvr", "b4_3h_cvr", "b4_3h_conver", "b4_3h_ecpm", "b4_3h_click", "b4_3h_conver*log(view)", "b4_3h_conver*ctcvr", "b4_6h_ctr", "b4_6h_ctcvr", "b4_6h_cvr", "b4_6h_conver", "b4_6h_ecpm", "b4_6h_click", "b4_6h_conver*log(view)", "b4_6h_conver*ctcvr", "b4_12h_ctr", "b4_12h_ctcvr", "b4_12h_cvr", "b4_12h_conver", "b4_12h_ecpm", "b4_12h_click", "b4_12h_conver*log(view)", "b4_12h_conver*ctcvr", "b4_1d_ctr", "b4_1d_ctcvr", "b4_1d_cvr", "b4_1d_conver", "b4_1d_ecpm", "b4_1d_click", "b4_1d_conver*log(view)", "b4_1d_conver*ctcvr", "b4_3d_ctr", "b4_3d_ctcvr", "b4_3d_cvr", "b4_3d_conver", "b4_3d_ecpm", "b4_3d_click", "b4_3d_conver*log(view)", "b4_3d_conver*ctcvr", "b4_7d_ctr", "b4_7d_ctcvr", "b4_7d_cvr", "b4_7d_conver", "b4_7d_ecpm", "b4_7d_click", "b4_7d_conver*log(view)", "b4_7d_conver*ctcvr", "b5_3h_ctr", "b5_3h_ctcvr", "b5_3h_cvr", "b5_3h_conver", "b5_3h_ecpm", "b5_3h_click", "b5_3h_conver*log(view)", "b5_3h_conver*ctcvr", "b5_6h_ctr", "b5_6h_ctcvr", "b5_6h_cvr", "b5_6h_conver", "b5_6h_ecpm", "b5_6h_click", "b5_6h_conver*log(view)", "b5_6h_conver*ctcvr", "b5_12h_ctr", "b5_12h_ctcvr", "b5_12h_cvr", "b5_12h_conver", "b5_12h_ecpm", "b5_12h_click", "b5_12h_conver*log(view)", "b5_12h_conver*ctcvr", "b5_1d_ctr", "b5_1d_ctcvr", "b5_1d_cvr", "b5_1d_conver", "b5_1d_ecpm", "b5_1d_click", "b5_1d_conver*log(view)", "b5_1d_conver*ctcvr", "b5_3d_ctr", "b5_3d_ctcvr", "b5_3d_cvr", "b5_3d_conver", "b5_3d_ecpm", "b5_3d_click", "b5_3d_conver*log(view)", "b5_3d_conver*ctcvr", "b5_7d_ctr", "b5_7d_ctcvr", "b5_7d_cvr", "b5_7d_conver", "b5_7d_ecpm", "b5_7d_click", "b5_7d_conver*log(view)", "b5_7d_conver*ctcvr", "b8_3h_ctr", "b8_3h_ctcvr", "b8_3h_cvr", "b8_3h_conver", "b8_3h_ecpm", "b8_3h_click", "b8_3h_conver*log(view)", "b8_3h_conver*ctcvr", "b8_6h_ctr", "b8_6h_ctcvr", "b8_6h_cvr", "b8_6h_conver", "b8_6h_ecpm", "b8_6h_click", "b8_6h_conver*log(view)", "b8_6h_conver*ctcvr", "b8_12h_ctr", "b8_12h_ctcvr", "b8_12h_cvr", "b8_12h_conver", "b8_12h_ecpm", "b8_12h_click", "b8_12h_conver*log(view)", "b8_12h_conver*ctcvr", "b8_1d_ctr", "b8_1d_ctcvr", "b8_1d_cvr", "b8_1d_conver", "b8_1d_ecpm", "b8_1d_click", "b8_1d_conver*log(view)", "b8_1d_conver*ctcvr", "b8_3d_ctr", "b8_3d_ctcvr", "b8_3d_cvr", "b8_3d_conver", "b8_3d_ecpm", "b8_3d_click", "b8_3d_conver*log(view)", "b8_3d_conver*ctcvr", "b8_7d_ctr", "b8_7d_ctcvr", "b8_7d_cvr", "b8_7d_conver", "b8_7d_ecpm", "b8_7d_click", "b8_7d_conver*log(view)", "b8_7d_conver*ctcvr", "b6_7d_ctr", "b6_7d_ctcvr", "b6_7d_cvr", "b6_7d_conver", "b6_7d_ecpm", "b6_7d_click", "b6_7d_conver*log(view)", "b6_7d_conver*ctcvr", "b6_14d_ctr", "b6_14d_ctcvr", "b6_14d_cvr", "b6_14d_conver", "b6_14d_ecpm", "b6_14d_click", "b6_14d_conver*log(view)", "b6_14d_conver*ctcvr", "b7_7d_ctr", "b7_7d_ctcvr", "b7_7d_cvr", "b7_7d_conver", "b7_7d_ecpm", "b7_7d_click", "b7_7d_conver*log(view)", "b7_7d_conver*ctcvr", "b7_14d_ctr", "b7_14d_ctcvr", "b7_14d_cvr", "b7_14d_conver", "b7_14d_ecpm", "b7_14d_click", "b7_14d_conver*log(view)", "b7_14d_conver*ctcvr", "viewAll", "clickAll", "converAll", "incomeAll", "ctr_all", "ctcvr_all", "cvr_all", "ecpm_all", "timediff_view", "timediff_click", "timediff_conver", "actionstatic_view", "actionstatic_click", "actionstatic_conver", "actionstatic_income", "actionstatic_ctr", "actionstatic_ctcvr", "actionstatic_cvr", "e1_tags_3d_matchnum", "e1_tags_3d_maxscore", "e1_tags_3d_avgscore", "e1_tags_7d_matchnum", "e1_tags_7d_maxscore", "e1_tags_7d_avgscore", "e1_tags_14d_matchnum", "e1_tags_14d_maxscore", "e1_tags_14d_avgscore", "e2_tags_3d_matchnum", "e2_tags_3d_maxscore", "e2_tags_3d_avgscore", "e2_tags_7d_matchnum", "e2_tags_7d_maxscore", "e2_tags_7d_avgscore", "e2_tags_14d_matchnum", "e2_tags_14d_maxscore", "e2_tags_14d_avgscore", "d1_feature_3h_ctr", "d1_feature_3h_ctcvr", "d1_feature_3h_cvr", "d1_feature_3h_conver", "d1_feature_3h_ecpm", "d1_feature_6h_ctr", "d1_feature_6h_ctcvr", "d1_feature_6h_cvr", "d1_feature_6h_conver", "d1_feature_6h_ecpm", "d1_feature_12h_ctr", "d1_feature_12h_ctcvr", "d1_feature_12h_cvr", "d1_feature_12h_conver", "d1_feature_12h_ecpm", "d1_feature_1d_ctr", "d1_feature_1d_ctcvr", "d1_feature_1d_cvr", "d1_feature_1d_conver", "d1_feature_1d_ecpm", "d1_feature_3d_ctr", "d1_feature_3d_ctcvr", "d1_feature_3d_cvr", "d1_feature_3d_conver", "d1_feature_3d_ecpm", "d1_feature_7d_ctr", "d1_feature_7d_ctcvr", "d1_feature_7d_cvr", "d1_feature_7d_conver", "d1_feature_7d_ecpm", "vid_rank_ctr_1d", "vid_rank_ctr_3d", "vid_rank_ctr_7d", "vid_rank_ctr_14d", "vid_rank_ctcvr_1d", "vid_rank_ctcvr_3d", "vid_rank_ctcvr_7d", "vid_rank_ctcvr_14d", "vid_rank_ecpm_1d", "vid_rank_ecpm_3d", "vid_rank_ecpm_7d", "vid_rank_ecpm_14d" ] def load_model_and_score(model_path, feature_map): model = xgb.Booster() model.load_model(f"{model_path}/data/XGBoostClassificationModel") model.set_param({"missing": 0.0}) values = np.array([ float(feature_map.get(feature, 0.0)) for feature in features ], dtype=np.float32) dm = xgb.DMatrix(values.reshape(1, -1), missing=0.0) return float(model.predict(dm, output_margin=False)[0]) def _multi_importance_flat_map(importance_map: dict): result = [] all_features = set(key for inner_dict in importance_map.values() for key in inner_dict.keys()) for feature in all_features: item = { "feature": feature, } for key in importance_map: if feature in importance_map[key]: item[key] = importance_map[key][feature] result.append(item) return result def _main(): model_path = "/Users/zhao/Desktop/tzld/XGB/35_ad_model" all_model = glob.glob(f"{model_path}/*") model_dict = {} for e in all_model: if "model_xgb_351_1000_v2" in e: model_dict[e] = XGBModel(model_file=f"{e}/data/XGBoostClassificationModel", features=features) weight_dict = {} cover_dict = {} gain_dict = {} for key in model_dict: dt = os.path.basename(key)[-9:] weight_dict[dt] = model_dict[key].feature_weight_importance() cover_dict[dt] = model_dict[key].feature_cover_importance() gain_dict[dt] = model_dict[key].feature_gain_importance() weight = _multi_importance_flat_map(dict(sorted(weight_dict.items()))) cover = _multi_importance_flat_map(dict(sorted(cover_dict.items()))) gain = _multi_importance_flat_map(dict(sorted(gain_dict.items()))) pd.DataFrame(weight).to_csv("/Users/zhao/Desktop/weight.csv", index=False) pd.DataFrame(cover).to_csv("/Users/zhao/Desktop/cover.csv", index=False) pd.DataFrame(gain).to_csv("/Users/zhao/Desktop/gain.csv", index=False) if __name__ == '__main__': _main()