|
@@ -0,0 +1,39 @@
|
|
|
+import numpy as np
|
|
|
+import lightgbm as lgb
|
|
|
+from sklearn.model_selection import train_test_split
|
|
|
+from sklearn.datasets import make_classification
|
|
|
+from sklearn.metrics import accuracy_score
|
|
|
+
|
|
|
+# 生成模拟数据
|
|
|
+X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
|
|
|
+
|
|
|
+# 分割数据集
|
|
|
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
+
|
|
|
+# 创建LightGBM数据集
|
|
|
+train_data = lgb.Dataset(X_train, label=y_train)
|
|
|
+test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
|
|
|
+
|
|
|
+# 设置模型的参数
|
|
|
+params = {
|
|
|
+ 'objective': 'binary', # 指定二分类任务
|
|
|
+ 'metric': 'binary_logloss', # 评估指标为二分类的log损失
|
|
|
+ 'num_leaves': 31, # 叶子节点数
|
|
|
+ 'learning_rate': 0.05, # 学习率
|
|
|
+ 'bagging_fraction': 0.9, # 建树的样本采样比例
|
|
|
+ 'feature_fraction': 0.8, # 建树的特征选择比例
|
|
|
+ 'bagging_freq': 5, # k 意味着每 k 次迭代执行bagging
|
|
|
+}
|
|
|
+
|
|
|
+# 训练模型
|
|
|
+num_round = 100
|
|
|
+bst = lgb.train(params, train_data, num_round, valid_sets=[test_data], early_stopping_rounds=10)
|
|
|
+
|
|
|
+# 预测
|
|
|
+y_pred = bst.predict(X_test, num_iteration=bst.best_iteration)
|
|
|
+# 转换为二进制输出
|
|
|
+y_pred_binary = np.where(y_pred > 0.5, 1, 0)
|
|
|
+
|
|
|
+# 评估模型
|
|
|
+accuracy = accuracy_score(y_test, y_pred_binary)
|
|
|
+print(f'Accuracy: {accuracy}')
|