import pandas as pd import datetime from sklearn.model_selection import train_test_split from xgboost.sklearn import XGBClassifier from sklearn import metrics now_date = datetime.datetime.today() dt = datetime.datetime.strftime(now_date, '%Y%m%d') # 1. 读取数据 data = pd.read_csv(f'./data/train_test_data/train_test_{dt}.csv') print(data.shape) # 2. 划分x和y data_columns = data.columns.values.tolist() x = data[data_columns[:-1]] y = data[data_columns[-1]] print(f"x_shape: {x.shape}, y_shape: {y.shape}") # 3. 训练集和测试集分割 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1) print(f"x_train_shape: {x_train.shape}") print(f"x_test_shape: {x_test.shape}") # 4. 模型训练 xgb_model = XGBClassifier( objective='binary:logistic', learning_rate=0.3, max_depth=10, eval_metric='auc' ) xgb_model.fit(x_train, y_train) # 5. 模型保存 xgb_model.save_model('./data/ad_xgb.model') # 6. 测试集预测 y_test_pre = xgb_model.predict(x_test) # 7. 模型效果验证 test_accuracy = metrics.accuracy_score(y_test, y_test_pre) print("Test Accuracy: %.2f%%" % (test_accuracy * 100.0)) test_auc = metrics.roc_auc_score(y_test, y_test_pre) print("auc: %.2f%%" % (test_auc * 100.0))