123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421 |
- import glob
- import os.path
- from datetime import timedelta
- import numpy as np
- import pandas as pd
- import xgboost as xgb
- from model.XGBModel import XGBModel
- features = [
- "cpa",
- "b2_3h_ctr",
- "b2_3h_ctcvr",
- "b2_3h_cvr",
- "b2_3h_conver",
- "b2_3h_ecpm",
- "b2_3h_click",
- "b2_3h_conver*log(view)",
- "b2_3h_conver*ctcvr",
- "b2_6h_ctr",
- "b2_6h_ctcvr",
- "b2_6h_cvr",
- "b2_6h_conver",
- "b2_6h_ecpm",
- "b2_6h_click",
- "b2_6h_conver*log(view)",
- "b2_6h_conver*ctcvr",
- "b2_12h_ctr",
- "b2_12h_ctcvr",
- "b2_12h_cvr",
- "b2_12h_conver",
- "b2_12h_ecpm",
- "b2_12h_click",
- "b2_12h_conver*log(view)",
- "b2_12h_conver*ctcvr",
- "b2_1d_ctr",
- "b2_1d_ctcvr",
- "b2_1d_cvr",
- "b2_1d_conver",
- "b2_1d_ecpm",
- "b2_1d_click",
- "b2_1d_conver*log(view)",
- "b2_1d_conver*ctcvr",
- "b2_3d_ctr",
- "b2_3d_ctcvr",
- "b2_3d_cvr",
- "b2_3d_conver",
- "b2_3d_ecpm",
- "b2_3d_click",
- "b2_3d_conver*log(view)",
- "b2_3d_conver*ctcvr",
- "b2_7d_ctr",
- "b2_7d_ctcvr",
- "b2_7d_cvr",
- "b2_7d_conver",
- "b2_7d_ecpm",
- "b2_7d_click",
- "b2_7d_conver*log(view)",
- "b2_7d_conver*ctcvr",
- "b3_3h_ctr",
- "b3_3h_ctcvr",
- "b3_3h_cvr",
- "b3_3h_conver",
- "b3_3h_ecpm",
- "b3_3h_click",
- "b3_3h_conver*log(view)",
- "b3_3h_conver*ctcvr",
- "b3_6h_ctr",
- "b3_6h_ctcvr",
- "b3_6h_cvr",
- "b3_6h_conver",
- "b3_6h_ecpm",
- "b3_6h_click",
- "b3_6h_conver*log(view)",
- "b3_6h_conver*ctcvr",
- "b3_12h_ctr",
- "b3_12h_ctcvr",
- "b3_12h_cvr",
- "b3_12h_conver",
- "b3_12h_ecpm",
- "b3_12h_click",
- "b3_12h_conver*log(view)",
- "b3_12h_conver*ctcvr",
- "b3_1d_ctr",
- "b3_1d_ctcvr",
- "b3_1d_cvr",
- "b3_1d_conver",
- "b3_1d_ecpm",
- "b3_1d_click",
- "b3_1d_conver*log(view)",
- "b3_1d_conver*ctcvr",
- "b3_3d_ctr",
- "b3_3d_ctcvr",
- "b3_3d_cvr",
- "b3_3d_conver",
- "b3_3d_ecpm",
- "b3_3d_click",
- "b3_3d_conver*log(view)",
- "b3_3d_conver*ctcvr",
- "b3_7d_ctr",
- "b3_7d_ctcvr",
- "b3_7d_cvr",
- "b3_7d_conver",
- "b3_7d_ecpm",
- "b3_7d_click",
- "b3_7d_conver*log(view)",
- "b3_7d_conver*ctcvr",
- "b4_3h_ctr",
- "b4_3h_ctcvr",
- "b4_3h_cvr",
- "b4_3h_conver",
- "b4_3h_ecpm",
- "b4_3h_click",
- "b4_3h_conver*log(view)",
- "b4_3h_conver*ctcvr",
- "b4_6h_ctr",
- "b4_6h_ctcvr",
- "b4_6h_cvr",
- "b4_6h_conver",
- "b4_6h_ecpm",
- "b4_6h_click",
- "b4_6h_conver*log(view)",
- "b4_6h_conver*ctcvr",
- "b4_12h_ctr",
- "b4_12h_ctcvr",
- "b4_12h_cvr",
- "b4_12h_conver",
- "b4_12h_ecpm",
- "b4_12h_click",
- "b4_12h_conver*log(view)",
- "b4_12h_conver*ctcvr",
- "b4_1d_ctr",
- "b4_1d_ctcvr",
- "b4_1d_cvr",
- "b4_1d_conver",
- "b4_1d_ecpm",
- "b4_1d_click",
- "b4_1d_conver*log(view)",
- "b4_1d_conver*ctcvr",
- "b4_3d_ctr",
- "b4_3d_ctcvr",
- "b4_3d_cvr",
- "b4_3d_conver",
- "b4_3d_ecpm",
- "b4_3d_click",
- "b4_3d_conver*log(view)",
- "b4_3d_conver*ctcvr",
- "b4_7d_ctr",
- "b4_7d_ctcvr",
- "b4_7d_cvr",
- "b4_7d_conver",
- "b4_7d_ecpm",
- "b4_7d_click",
- "b4_7d_conver*log(view)",
- "b4_7d_conver*ctcvr",
- "b5_3h_ctr",
- "b5_3h_ctcvr",
- "b5_3h_cvr",
- "b5_3h_conver",
- "b5_3h_ecpm",
- "b5_3h_click",
- "b5_3h_conver*log(view)",
- "b5_3h_conver*ctcvr",
- "b5_6h_ctr",
- "b5_6h_ctcvr",
- "b5_6h_cvr",
- "b5_6h_conver",
- "b5_6h_ecpm",
- "b5_6h_click",
- "b5_6h_conver*log(view)",
- "b5_6h_conver*ctcvr",
- "b5_12h_ctr",
- "b5_12h_ctcvr",
- "b5_12h_cvr",
- "b5_12h_conver",
- "b5_12h_ecpm",
- "b5_12h_click",
- "b5_12h_conver*log(view)",
- "b5_12h_conver*ctcvr",
- "b5_1d_ctr",
- "b5_1d_ctcvr",
- "b5_1d_cvr",
- "b5_1d_conver",
- "b5_1d_ecpm",
- "b5_1d_click",
- "b5_1d_conver*log(view)",
- "b5_1d_conver*ctcvr",
- "b5_3d_ctr",
- "b5_3d_ctcvr",
- "b5_3d_cvr",
- "b5_3d_conver",
- "b5_3d_ecpm",
- "b5_3d_click",
- "b5_3d_conver*log(view)",
- "b5_3d_conver*ctcvr",
- "b5_7d_ctr",
- "b5_7d_ctcvr",
- "b5_7d_cvr",
- "b5_7d_conver",
- "b5_7d_ecpm",
- "b5_7d_click",
- "b5_7d_conver*log(view)",
- "b5_7d_conver*ctcvr",
- "b8_3h_ctr",
- "b8_3h_ctcvr",
- "b8_3h_cvr",
- "b8_3h_conver",
- "b8_3h_ecpm",
- "b8_3h_click",
- "b8_3h_conver*log(view)",
- "b8_3h_conver*ctcvr",
- "b8_6h_ctr",
- "b8_6h_ctcvr",
- "b8_6h_cvr",
- "b8_6h_conver",
- "b8_6h_ecpm",
- "b8_6h_click",
- "b8_6h_conver*log(view)",
- "b8_6h_conver*ctcvr",
- "b8_12h_ctr",
- "b8_12h_ctcvr",
- "b8_12h_cvr",
- "b8_12h_conver",
- "b8_12h_ecpm",
- "b8_12h_click",
- "b8_12h_conver*log(view)",
- "b8_12h_conver*ctcvr",
- "b8_1d_ctr",
- "b8_1d_ctcvr",
- "b8_1d_cvr",
- "b8_1d_conver",
- "b8_1d_ecpm",
- "b8_1d_click",
- "b8_1d_conver*log(view)",
- "b8_1d_conver*ctcvr",
- "b8_3d_ctr",
- "b8_3d_ctcvr",
- "b8_3d_cvr",
- "b8_3d_conver",
- "b8_3d_ecpm",
- "b8_3d_click",
- "b8_3d_conver*log(view)",
- "b8_3d_conver*ctcvr",
- "b8_7d_ctr",
- "b8_7d_ctcvr",
- "b8_7d_cvr",
- "b8_7d_conver",
- "b8_7d_ecpm",
- "b8_7d_click",
- "b8_7d_conver*log(view)",
- "b8_7d_conver*ctcvr",
- "b6_7d_ctr",
- "b6_7d_ctcvr",
- "b6_7d_cvr",
- "b6_7d_conver",
- "b6_7d_ecpm",
- "b6_7d_click",
- "b6_7d_conver*log(view)",
- "b6_7d_conver*ctcvr",
- "b6_14d_ctr",
- "b6_14d_ctcvr",
- "b6_14d_cvr",
- "b6_14d_conver",
- "b6_14d_ecpm",
- "b6_14d_click",
- "b6_14d_conver*log(view)",
- "b6_14d_conver*ctcvr",
- "b7_7d_ctr",
- "b7_7d_ctcvr",
- "b7_7d_cvr",
- "b7_7d_conver",
- "b7_7d_ecpm",
- "b7_7d_click",
- "b7_7d_conver*log(view)",
- "b7_7d_conver*ctcvr",
- "b7_14d_ctr",
- "b7_14d_ctcvr",
- "b7_14d_cvr",
- "b7_14d_conver",
- "b7_14d_ecpm",
- "b7_14d_click",
- "b7_14d_conver*log(view)",
- "b7_14d_conver*ctcvr",
- "viewAll",
- "clickAll",
- "converAll",
- "incomeAll",
- "ctr_all",
- "ctcvr_all",
- "cvr_all",
- "ecpm_all",
- "timediff_view",
- "timediff_click",
- "timediff_conver",
- "actionstatic_view",
- "actionstatic_click",
- "actionstatic_conver",
- "actionstatic_income",
- "actionstatic_ctr",
- "actionstatic_ctcvr",
- "actionstatic_cvr",
- "e1_tags_3d_matchnum",
- "e1_tags_3d_maxscore",
- "e1_tags_3d_avgscore",
- "e1_tags_7d_matchnum",
- "e1_tags_7d_maxscore",
- "e1_tags_7d_avgscore",
- "e1_tags_14d_matchnum",
- "e1_tags_14d_maxscore",
- "e1_tags_14d_avgscore",
- "e2_tags_3d_matchnum",
- "e2_tags_3d_maxscore",
- "e2_tags_3d_avgscore",
- "e2_tags_7d_matchnum",
- "e2_tags_7d_maxscore",
- "e2_tags_7d_avgscore",
- "e2_tags_14d_matchnum",
- "e2_tags_14d_maxscore",
- "e2_tags_14d_avgscore",
- "d1_feature_3h_ctr",
- "d1_feature_3h_ctcvr",
- "d1_feature_3h_cvr",
- "d1_feature_3h_conver",
- "d1_feature_3h_ecpm",
- "d1_feature_6h_ctr",
- "d1_feature_6h_ctcvr",
- "d1_feature_6h_cvr",
- "d1_feature_6h_conver",
- "d1_feature_6h_ecpm",
- "d1_feature_12h_ctr",
- "d1_feature_12h_ctcvr",
- "d1_feature_12h_cvr",
- "d1_feature_12h_conver",
- "d1_feature_12h_ecpm",
- "d1_feature_1d_ctr",
- "d1_feature_1d_ctcvr",
- "d1_feature_1d_cvr",
- "d1_feature_1d_conver",
- "d1_feature_1d_ecpm",
- "d1_feature_3d_ctr",
- "d1_feature_3d_ctcvr",
- "d1_feature_3d_cvr",
- "d1_feature_3d_conver",
- "d1_feature_3d_ecpm",
- "d1_feature_7d_ctr",
- "d1_feature_7d_ctcvr",
- "d1_feature_7d_cvr",
- "d1_feature_7d_conver",
- "d1_feature_7d_ecpm",
- "vid_rank_ctr_1d",
- "vid_rank_ctr_3d",
- "vid_rank_ctr_7d",
- "vid_rank_ctr_14d",
- "vid_rank_ctcvr_1d",
- "vid_rank_ctcvr_3d",
- "vid_rank_ctcvr_7d",
- "vid_rank_ctcvr_14d",
- "vid_rank_ecpm_1d",
- "vid_rank_ecpm_3d",
- "vid_rank_ecpm_7d",
- "vid_rank_ecpm_14d"
- ]
- def load_model_and_score(model_path, feature_map):
- model = xgb.Booster()
- model.load_model(f"{model_path}/data/XGBoostClassificationModel")
- model.set_param({"missing": 0.0})
- values = np.array([
- float(feature_map.get(feature, 0.0))
- for feature in features
- ], dtype=np.float32)
- dm = xgb.DMatrix(values.reshape(1, -1), missing=0.0)
- return float(model.predict(dm, output_margin=False)[0])
- def _multi_importance_flat_map(importance_map: dict):
- result = []
- all_features = set(key for inner_dict in importance_map.values() for key in inner_dict.keys())
- for feature in all_features:
- item = {
- "feature": feature,
- }
- for key in importance_map:
- if feature in importance_map[key]:
- item[key] = importance_map[key][feature]
- result.append(item)
- return result
- def _main():
- model_path = "/Users/zhao/Desktop/tzld/ad/model"
- all_model = glob.glob(f"{model_path}/*")
- model_dict = {}
- for e in all_model:
- if "model_xgb_351_1000_v2" in e:
- model_dict[e] = XGBModel(model_file=f"{e}/data/XGBoostClassificationModel", features=features)
- weight_dict = {}
- cover_dict = {}
- gain_dict = {}
- for key in model_dict:
- dt = os.path.basename(key)[-9:]
- weight_dict[dt] = model_dict[key].feature_weight_importance()
- cover_dict[dt] = model_dict[key].feature_cover_importance()
- gain_dict[dt] = model_dict[key].feature_gain_importance()
- weight = _multi_importance_flat_map(dict(sorted(weight_dict.items())))
- cover = _multi_importance_flat_map(dict(sorted(cover_dict.items())))
- gain = _multi_importance_flat_map(dict(sorted(gain_dict.items())))
- pd.DataFrame(weight).to_csv("/Users/zhao/Desktop/weight.csv", index=False)
- pd.DataFrame(cover).to_csv("/Users/zhao/Desktop/cover.csv", index=False)
- pd.DataFrame(gain).to_csv("/Users/zhao/Desktop/gain.csv", index=False)
- if __name__ == '__main__':
- _main()
|