浏览代码

贝叶斯调参优化

罗俊辉 1 年之前
父节点
当前提交
46e9f96612
共有 1 个文件被更改,包括 39 次插入2 次删除
  1. 39 2
      main.py

+ 39 - 2
main.py

@@ -1,6 +1,7 @@
 import os
 import sys
 import json
+import optuna
 
 from sklearn.linear_model import LogisticRegression
 
@@ -57,6 +58,38 @@ class LightGBM(object):
         self.yc = 0.8
         self.model = "lightgbm_tag_train_02.bin"
 
+    def bays_params(self, trial):
+        """
+        Bayesian parameters for
+        :return: best parameters
+        """
+        # 定义搜索空间
+        param = {
+            'objective': 'binary',
+            'metric': 'binary_logloss',
+            'verbosity': -1,
+            'boosting_type': 'gbdt',
+            'num_leaves': trial.suggest_int('num_leaves', 20, 40),
+            'learning_rate': trial.suggest_loguniform('learning_rate', 1e-8, 1.0),
+            'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
+            'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
+            'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
+            'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
+        }
+        X_train, X_test = self.generate_x_data()
+        Y_train, Y_test = self.generate_y_data()
+        train_data = lgb.Dataset(
+            X_train,
+            label=Y_train,
+            categorical_feature=["uid", "type", "channel", "mode", "out_user_id", "tag1", "tag2", "tag3"],
+        )
+        test_data = lgb.Dataset(X_test, label=Y_test, reference=train_data)
+        gbm = lgb.train(param, train_data, num_boost_round=100, valid_sets=[test_data], early_stopping_rounds=10, verbose_eval=False)
+        preds = gbm.predict(X_test)
+        pred_labels = np.rint(preds)
+        accuracy = accuracy_score(Y_test, pred_labels)
+        return accuracy
+
     def generate_x_data(self):
         """
         Generate data for feature engineering
@@ -181,6 +214,10 @@ class LightGBM(object):
 
 if __name__ == "__main__":
     L = LightGBM()
+    study = optuna.create_study(direction='maximize')
+    study.optimize(L.bays_params, n_trials=100)
+    print('Number of finished trials:', len(study.trials))
+    print('Best trial:', study.best_trial.params)
     # L.train_model()
-    L.evaluate_model()
-    L.feature_importance()
+    # L.evaluate_model()
+    # L.feature_importance()