|
@@ -26,7 +26,6 @@ class LightGBM(object):
|
|
|
self.label_encoder = LabelEncoder()
|
|
|
self.my_c = [
|
|
|
"channel",
|
|
|
- "fans",
|
|
|
"view_count_user_30days",
|
|
|
"share_count_user_30days",
|
|
|
"return_count_user_30days",
|
|
@@ -41,9 +40,8 @@ class LightGBM(object):
|
|
|
"tag2",
|
|
|
"tag3"
|
|
|
]
|
|
|
- self.str_columns = ["uid", "type", "channel", "mode", "out_user_id", "tag1", "tag2", "tag3"]
|
|
|
+ self.str_columns = ["channel", "mode", "out_user_id", "tag1", "tag2", "tag3"]
|
|
|
self.float_columns = [
|
|
|
- "fans",
|
|
|
"view_count_user_30days",
|
|
|
"share_count_user_30days",
|
|
|
"return_count_user_30days",
|
|
@@ -52,11 +50,10 @@ class LightGBM(object):
|
|
|
"out_play_cnt",
|
|
|
"out_like_cnt",
|
|
|
"out_share_cnt",
|
|
|
- "out_collection_cnt",
|
|
|
]
|
|
|
self.split_c = 0.999
|
|
|
self.yc = 0.8
|
|
|
- self.model = "lightgbm_0326.bin"
|
|
|
+ self.model = "lightgbm_0326_spider.bin"
|
|
|
self.flag = flag
|
|
|
self.dt = dt
|
|
|
|
|
@@ -84,7 +81,7 @@ class LightGBM(object):
|
|
|
train_data = lgb.Dataset(
|
|
|
X_train,
|
|
|
label=Y_train,
|
|
|
- categorical_feature=["uid", "type", "channel", "mode", "out_user_id", "tag1", "tag2", "tag3"],
|
|
|
+ categorical_feature=["channel", "mode", "out_user_id", "tag1", "tag2", "tag3"],
|
|
|
)
|
|
|
test_data = lgb.Dataset(X_test, label=Y_test, reference=train_data)
|
|
|
gbm = lgb.train(param, train_data, num_boost_round=100, valid_sets=[test_data])
|
|
@@ -98,7 +95,7 @@ class LightGBM(object):
|
|
|
Generate data for feature engineering
|
|
|
:return:
|
|
|
"""
|
|
|
- with open("data/produce_data/x_data_total_return_{}_{}.json".format(self.flag, self.dt)) as f1:
|
|
|
+ with open("data/produce_data/x_data_total_return_{}_{}_spider.json".format(self.flag, self.dt)) as f1:
|
|
|
x_list = json.loads(f1.read())
|
|
|
index_t = int(len(x_list) * self.split_c)
|
|
|
X_train = pd.DataFrame(x_list[:index_t], columns=self.my_c)
|
|
@@ -118,7 +115,7 @@ class LightGBM(object):
|
|
|
Generate data for label
|
|
|
:return:
|
|
|
"""
|
|
|
- with open("data/produce_data/y_data_total_return_{}_{}.json".format(self.flag, self.dt)) as f2:
|
|
|
+ with open("data/produce_data/y_data_total_return_{}_{}_spider.json".format(self.flag, self.dt)) as f2:
|
|
|
y_list = json.loads(f2.read())
|
|
|
index_t = int(len(y_list) * self.split_c)
|
|
|
temp = sorted(y_list)
|
|
@@ -219,21 +216,22 @@ class LightGBM(object):
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- i = int(input("输入 1 训练, 输入 2 预测:\n"))
|
|
|
- if i == 1:
|
|
|
- f = "train"
|
|
|
- dt = "whole"
|
|
|
- L = LightGBM(flag=f, dt=dt)
|
|
|
- L.train_model()
|
|
|
- elif i == 2:
|
|
|
- f = "predict"
|
|
|
- dt = int(input("输入日期, 16-21:\n"))
|
|
|
- L = LightGBM(flag=f, dt=dt)
|
|
|
- L.evaluate_model()
|
|
|
- # study = optuna.create_study(direction='maximize')
|
|
|
- # study.optimize(L.bays_params, n_trials=100)
|
|
|
- # print('Number of finished trials:', len(study.trials))
|
|
|
- # print('Best trial:', study.best_trial.params)
|
|
|
+ # i = int(input("输入 1 训练, 输入 2 预测:\n"))
|
|
|
+ # if i == 1:
|
|
|
+ # f = "train"
|
|
|
+ # dt = "whole"
|
|
|
+ # L = LightGBM(flag=f, dt=dt)
|
|
|
+ # L.train_model()
|
|
|
+ # elif i == 2:
|
|
|
+ # f = "predict"
|
|
|
+ # dt = int(input("输入日期, 16-21:\n"))
|
|
|
+ # L = LightGBM(flag=f, dt=dt)
|
|
|
+ # L.evaluate_model()
|
|
|
+ L = LightGBM("train", "whole")
|
|
|
+ study = optuna.create_study(direction='maximize')
|
|
|
+ study.optimize(L.bays_params, n_trials=100)
|
|
|
+ print('Number of finished trials:', len(study.trials))
|
|
|
+ print('Best trial:', study.best_trial.params)
|
|
|
# L.train_model()
|
|
|
# L.evaluate_model()
|
|
|
# L.feature_importance()
|