|
@@ -36,8 +36,11 @@ class LightGBM(object):
|
|
|
"out_like_cnt",
|
|
|
"out_share_cnt",
|
|
|
"out_collection_cnt",
|
|
|
+ "tag1",
|
|
|
+ "tag2",
|
|
|
+ "tag3"
|
|
|
]
|
|
|
- self.str_columns = ["uid", "type", "channel", "mode", "out_user_id"]
|
|
|
+ self.str_columns = ["uid", "type", "channel", "mode", "out_user_id", "tag1", "tag2", "tag3"]
|
|
|
self.float_columns = [
|
|
|
"fans",
|
|
|
"view_count_user_30days",
|
|
@@ -50,16 +53,16 @@ class LightGBM(object):
|
|
|
"out_share_cnt",
|
|
|
"out_collection_cnt",
|
|
|
]
|
|
|
- self.split_c = 0.95
|
|
|
+ self.split_c = 0.98
|
|
|
self.yc = 0.8
|
|
|
- self.model = "lightgbm_train.bin"
|
|
|
+ self.model = "lightgbm_tag_train.bin"
|
|
|
|
|
|
def generate_x_data(self):
|
|
|
"""
|
|
|
Generate data for feature engineering
|
|
|
:return:
|
|
|
"""
|
|
|
- with open("whole_data/x_data_total_return.json") as f1:
|
|
|
+ with open("produce_data/x_data_total_return_train.json") as f1:
|
|
|
x_list = json.loads(f1.read())
|
|
|
index_t = int(len(x_list) * self.split_c)
|
|
|
X_train = pd.DataFrame(x_list[:index_t], columns=self.my_c)
|
|
@@ -79,7 +82,7 @@ class LightGBM(object):
|
|
|
Generate data for label
|
|
|
:return:
|
|
|
"""
|
|
|
- with open("whole_data/y_data_total_return.json") as f2:
|
|
|
+ with open("produce_data/y_data_total_return_train.json") as f2:
|
|
|
y_list = json.loads(f2.read())
|
|
|
index_t = int(len(y_list) * self.split_c)
|
|
|
temp = sorted(y_list)
|
|
@@ -100,7 +103,7 @@ class LightGBM(object):
|
|
|
train_data = lgb.Dataset(
|
|
|
X_train,
|
|
|
label=Y_train,
|
|
|
- categorical_feature=["uid", "type", "channel", "mode", "out_user_id"],
|
|
|
+ categorical_feature=["uid", "type", "channel", "mode", "out_user_id", "tag1", "tag2", "tag3"],
|
|
|
)
|
|
|
test_data = lgb.Dataset(X_test, label=Y_test, reference=train_data)
|
|
|
params = {
|
|
@@ -164,5 +167,5 @@ class LightGBM(object):
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
L = LightGBM()
|
|
|
- # L.train_model()
|
|
|
- L.evaluate_model()
|
|
|
+ L.train_model()
|
|
|
+ # L.evaluate_model()
|