|
@@ -0,0 +1,38 @@
|
|
|
+import pandas as pd
|
|
|
+import datetime
|
|
|
+from sklearn.model_selection import train_test_split
|
|
|
+from xgboost.sklearn import XGBClassifier
|
|
|
+from sklearn import metrics
|
|
|
+
|
|
|
+
|
|
|
+now_date = datetime.datetime.today()
|
|
|
+dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
|
+# 1. 读取数据
|
|
|
+data = pd.read_csv(f'./data/train_test_data/train_test_{dt}.csv')
|
|
|
+print(data.shape)
|
|
|
+# 2. 划分x和y
|
|
|
+data_columns = data.columns.values.tolist()
|
|
|
+x = data[data_columns[:-1]]
|
|
|
+y = data[data_columns[-1]]
|
|
|
+print(f"x_shape: {x.shape}, y_shape: {y.shape}")
|
|
|
+# 3. 训练集和测试集分割
|
|
|
+x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)
|
|
|
+print(f"x_train_shape: {x_train.shape}")
|
|
|
+print(f"x_test_shape: {x_test.shape}")
|
|
|
+# 4. 模型训练
|
|
|
+xgb_model = XGBClassifier(
|
|
|
+ objective='binary:logistic',
|
|
|
+ learning_rate=0.3,
|
|
|
+ max_depth=10,
|
|
|
+ eval_metric='auc'
|
|
|
+)
|
|
|
+xgb_model.fit(x_train, y_train)
|
|
|
+# 5. 模型保存
|
|
|
+xgb_model.save_model('./data/ad_xgb.model')
|
|
|
+# 6. 测试集预测
|
|
|
+y_test_pre = xgb_model.predict(x_test)
|
|
|
+# 7. 模型效果验证
|
|
|
+test_accuracy = metrics.accuracy_score(y_test, y_test_pre)
|
|
|
+print("Test Accuracy: %.2f%%" % (test_accuracy * 100.0))
|
|
|
+test_auc = metrics.roc_auc_score(y_test, y_test_pre)
|
|
|
+print("auc: %.2f%%" % (test_auc * 100.0))
|