|
@@ -56,6 +56,8 @@ with open("whole_data/x_data_3day_up_level.json") as f1:
|
|
|
for key in float_cols:
|
|
|
X_test[key] = pd.to_numeric(X_test[key], errors='coerce')
|
|
|
|
|
|
+print("读取X数据成功!")
|
|
|
+
|
|
|
|
|
|
with open("whole_data/y_data_3day_up_level.json") as f2:
|
|
|
y_list = json.loads(f2.read())
|
|
@@ -66,10 +68,14 @@ with open("whole_data/y_data_3day_up_level.json") as f2:
|
|
|
y_train = np.array(y_list[:index_t])
|
|
|
y_test = np.array(y_list[index_t:])
|
|
|
|
|
|
+print("读取Y数据成功!")
|
|
|
+
|
|
|
+
|
|
|
# 创建LightGBM数据集
|
|
|
train_data = lgb.Dataset(X_train, label=y_train, categorical_feature=['uid', 'type', 'channel', 'mode', 'out_user_id'])
|
|
|
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
|
|
|
|
|
|
+print("数据集创建成功")
|
|
|
# 设置模型的参数
|
|
|
params = {
|
|
|
'objective': 'binary', # 指定二分类任务
|
|
@@ -83,8 +89,9 @@ params = {
|
|
|
|
|
|
# 训练模型
|
|
|
num_round = 100
|
|
|
+print("开始训练......")
|
|
|
bst = lgb.train(params, train_data, num_round, valid_sets=[test_data])
|
|
|
-
|
|
|
+print("训练完成! , 开始预测......")
|
|
|
# 预测
|
|
|
y_pred = bst.predict(X_test, num_iteration=bst.best_iteration)
|
|
|
# 转换为二进制输出
|