|
@@ -1,6 +1,7 @@
|
|
|
import os
|
|
|
import sys
|
|
|
import json
|
|
|
+import optuna
|
|
|
|
|
|
from sklearn.linear_model import LogisticRegression
|
|
|
|
|
@@ -57,6 +58,38 @@ class LightGBM(object):
|
|
|
self.yc = 0.8
|
|
|
self.model = "lightgbm_tag_train_02.bin"
|
|
|
|
|
|
+ def bays_params(self, trial):
|
|
|
+ """
|
|
|
+ Bayesian parameters for
|
|
|
+ :return: best parameters
|
|
|
+ """
|
|
|
+ # 定义搜索空间
|
|
|
+ param = {
|
|
|
+ 'objective': 'binary',
|
|
|
+ 'metric': 'binary_logloss',
|
|
|
+ 'verbosity': -1,
|
|
|
+ 'boosting_type': 'gbdt',
|
|
|
+ 'num_leaves': trial.suggest_int('num_leaves', 20, 40),
|
|
|
+ 'learning_rate': trial.suggest_loguniform('learning_rate', 1e-8, 1.0),
|
|
|
+ 'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
|
|
|
+ 'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
|
|
|
+ 'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
|
|
|
+ 'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
|
|
|
+ }
|
|
|
+ X_train, X_test = self.generate_x_data()
|
|
|
+ Y_train, Y_test = self.generate_y_data()
|
|
|
+ train_data = lgb.Dataset(
|
|
|
+ X_train,
|
|
|
+ label=Y_train,
|
|
|
+ categorical_feature=["uid", "type", "channel", "mode", "out_user_id", "tag1", "tag2", "tag3"],
|
|
|
+ )
|
|
|
+ test_data = lgb.Dataset(X_test, label=Y_test, reference=train_data)
|
|
|
+ gbm = lgb.train(param, train_data, num_boost_round=100, valid_sets=[test_data], early_stopping_rounds=10, verbose_eval=False)
|
|
|
+ preds = gbm.predict(X_test)
|
|
|
+ pred_labels = np.rint(preds)
|
|
|
+ accuracy = accuracy_score(Y_test, pred_labels)
|
|
|
+ return accuracy
|
|
|
+
|
|
|
def generate_x_data(self):
|
|
|
"""
|
|
|
Generate data for feature engineering
|
|
@@ -181,6 +214,10 @@ class LightGBM(object):
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
L = LightGBM()
|
|
|
+ study = optuna.create_study(direction='maximize')
|
|
|
+ study.optimize(L.bays_params, n_trials=100)
|
|
|
+ print('Number of finished trials:', len(study.trials))
|
|
|
+ print('Best trial:', study.best_trial.params)
|
|
|
# L.train_model()
|
|
|
- L.evaluate_model()
|
|
|
- L.feature_importance()
|
|
|
+ # L.evaluate_model()
|
|
|
+ # L.feature_importance()
|