|
@@ -8,22 +8,31 @@ from sklearn import metrics
|
|
|
now_date = datetime.datetime.today()
|
|
|
dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
|
# 1. 读取数据
|
|
|
-data = pd.read_csv(f'./data/train_test_data/train_test_{dt}.csv')
|
|
|
-print(data.shape)
|
|
|
+# data = pd.read_csv(f'./data/train_test_data/train_test_{dt}.csv')
|
|
|
+# print(data.shape)
|
|
|
+train_data = pd.read_csv(f'./data/train_test_data/train_{dt}.csv')
|
|
|
+print(train_data.shape)
|
|
|
+test_data = pd.read_csv(f'./data/train_test_data/test_{dt}.csv')
|
|
|
+print(test_data.shape)
|
|
|
# 2. 划分x和y
|
|
|
-data_columns = data.columns.values.tolist()
|
|
|
-x = data[data_columns[:-1]]
|
|
|
-y = data[data_columns[-1]]
|
|
|
-print(f"x_shape: {x.shape}, y_shape: {y.shape}")
|
|
|
+# data_columns = data.columns.values.tolist()
|
|
|
+# x = data[data_columns[:-1]]
|
|
|
+# y = data[data_columns[-1]]
|
|
|
+# print(f"x_shape: {x.shape}, y_shape: {y.shape}")
|
|
|
+data_columns = train_data.columns.values.tolist()
|
|
|
+x_train = train_data[data_columns[:-1]]
|
|
|
+y_train = train_data[data_columns[-1]]
|
|
|
+x_test = test_data[data_columns[:-1]]
|
|
|
+y_test = test_data[data_columns[-1]]
|
|
|
# 3. 训练集和测试集分割
|
|
|
-x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)
|
|
|
+# x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)
|
|
|
print(f"x_train_shape: {x_train.shape}")
|
|
|
print(f"x_test_shape: {x_test.shape}")
|
|
|
# 4. 模型训练
|
|
|
xgb_model = XGBClassifier(
|
|
|
objective='binary:logistic',
|
|
|
learning_rate=0.3,
|
|
|
- max_depth=10,
|
|
|
+ max_depth=5,
|
|
|
eval_metric=['error', 'logloss', 'auc']
|
|
|
)
|
|
|
xgb_model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_test, y_test)])
|