|
@@ -72,41 +72,39 @@ class LightGBM(object):
|
|
|
features[key] = self.label_encoder.fit_transform(features[key])
|
|
|
return features, labels
|
|
|
|
|
|
- def bays(self):
|
|
|
- # 创建LightGBM数据集,注意不要在这里指定categorical_feature,因为我们使用的是玩具数据集
|
|
|
+ def objective(self, trial):
|
|
|
x, y = self.read_data()
|
|
|
- train_size = int(len(x) * 0.9)
|
|
|
+ train_size = int(len(x) * self.split_c)
|
|
|
X_train, X_test = x[:train_size], x[train_size:]
|
|
|
Y_train, Y_test = y[:train_size], y[train_size:]
|
|
|
- train_data = lgb.Dataset(X_train, label=Y_train)
|
|
|
- def lgbm_eval(num_leaves, learning_rate, feature_fraction, bagging_fraction, bagging_freq, min_child_samples):
|
|
|
- params = {
|
|
|
- 'objective': 'binary',
|
|
|
- 'metric': 'auc',
|
|
|
- 'verbose': -1,
|
|
|
- 'num_leaves': int(num_leaves),
|
|
|
- 'learning_rate': learning_rate,
|
|
|
- 'feature_fraction': feature_fraction,
|
|
|
- 'bagging_fraction': bagging_fraction,
|
|
|
- 'bagging_freq': int(bagging_freq),
|
|
|
- 'min_child_samples': int(min_child_samples),
|
|
|
- }
|
|
|
- cv_result = lgb.cv(params, train_data, nfold=5, seed=42, stratified=True, metrics=['auc'])
|
|
|
- return max(cv_result['auc-mean'])
|
|
|
-
|
|
|
- param_bounds = {
|
|
|
- 'num_leaves': (20, 40),
|
|
|
- 'learning_rate': (1e-4, 1e-2),
|
|
|
- 'feature_fraction': (0.5, 0.8),
|
|
|
- 'bagging_fraction': (0.5, 0.8),
|
|
|
- 'bagging_freq': (1, 10),
|
|
|
- 'min_child_samples': (20, 100),
|
|
|
+
|
|
|
+ dtrain = lgb.Dataset(X_train, label=Y_train)
|
|
|
+ dvalid = lgb.Dataset(X_test, label=Y_test, reference=dtrain)
|
|
|
+
|
|
|
+ param = {
|
|
|
+ 'objective': 'binary', # 根据问题修改,例如'regression'或'multiclass'
|
|
|
+ 'metric': 'binary_logloss', # 根据问题修改,例如'l2'或'multi_logloss'
|
|
|
+ 'verbosity': -1,
|
|
|
+ 'boosting_type': 'gbdt',
|
|
|
+ 'num_leaves': trial.suggest_int('num_leaves', 20, 40),
|
|
|
+ 'learning_rate': trial.suggest_float('learning_rate', 1e-4, 1e-1, log=True),
|
|
|
+ 'feature_fraction': trial.suggest_float('feature_fraction', 0.6, 0.9),
|
|
|
+ 'bagging_fraction': trial.suggest_float('bagging_fraction', 0.6, 0.9),
|
|
|
+ 'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
|
|
|
+ 'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
|
|
|
}
|
|
|
|
|
|
- optimizer = BayesianOptimization(f=lgbm_eval, pbounds=param_bounds, random_state=42)
|
|
|
- optimizer.maximize(init_points=5, n_iter=25)
|
|
|
+ gbm = lgb.train(param, dtrain, valid_sets=[dvalid], early_stopping_rounds=100, verbose_eval=False)
|
|
|
+ preds = gbm.predict(X_test)
|
|
|
+ pred_labels = np.rint(preds)
|
|
|
+ accuracy = accuracy_score(Y_test, pred_labels)
|
|
|
+ return accuracy # 或其他优化指标
|
|
|
|
|
|
- print("Best Parameters:", optimizer.max['params'])
|
|
|
+ def tune(self, n_trials=100):
|
|
|
+ study = optuna.create_study(direction='maximize')
|
|
|
+ study.optimize(self.objective, n_trials=n_trials)
|
|
|
+ print('Number of finished trials:', len(study.trials))
|
|
|
+ print('Best trial:', study.best_trial.params)
|
|
|
|
|
|
def train_model(self):
|
|
|
"""
|
|
@@ -212,7 +210,7 @@ if __name__ == "__main__":
|
|
|
# L.evaluate_model()
|
|
|
# L.feature_importance()
|
|
|
L = LightGBM("train", "whole")
|
|
|
- L.bays()
|
|
|
+ L.tune()
|
|
|
# study = optuna.create_study(direction='maximize')
|
|
|
# study.optimize(L.bays_params, n_trials=100)
|
|
|
# print('Number of finished trials:', len(study.trials))
|