main.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. import os
  2. import sys
  3. import json
  4. sys.path.append(os.getcwd())
  5. import numpy as np
  6. import lightgbm as lgb
  7. from sklearn.model_selection import train_test_split
  8. from sklearn.datasets import make_classification
  9. from sklearn.metrics import accuracy_score
  10. with open("whole_data/x_data.json") as f1:
  11. x_list = json.loads(f1.read())
  12. X_train = np.array(x_list[:10000])
  13. X_test = np.array(x_list[10000:])
  14. with open("whole_data/y_data.json") as f2:
  15. y_list = json.loads(f2.read())
  16. y_train = np.array(y_list[:10000])
  17. y_test = np.array(y_list[10000:])
  18. # 创建LightGBM数据集
  19. train_data = lgb.Dataset(X_train, label=y_train)
  20. test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
  21. # 设置模型的参数
  22. params = {
  23. 'objective': 'binary', # 指定二分类任务
  24. 'metric': 'binary_logloss', # 评估指标为二分类的log损失
  25. 'num_leaves': 31, # 叶子节点数
  26. 'learning_rate': 0.05, # 学习率
  27. 'bagging_fraction': 0.9, # 建树的样本采样比例
  28. 'feature_fraction': 0.8, # 建树的特征选择比例
  29. 'bagging_freq': 5, # k 意味着每 k 次迭代执行bagging
  30. }
  31. # 训练模型
  32. num_round = 100
  33. bst = lgb.train(params, train_data, num_round, valid_sets=[test_data])
  34. # 预测
  35. y_pred = bst.predict(X_test, num_iteration=bst.best_iteration)
  36. # 转换为二进制输出
  37. y_pred_binary = np.where(y_pred > 0.5, 1, 0)
  38. # 评估模型
  39. accuracy = accuracy_score(y_test, y_pred_binary)
  40. print(f'Accuracy: {accuracy}')