罗俊辉 1 год назад
Родитель
Сommit
c25f52b00b
1 измененных файлов с 11 добавлено и 8 удалено
  1. 11 8
      main.py

+ 11 - 8
main.py

@@ -36,8 +36,11 @@ class LightGBM(object):
             "out_like_cnt",
             "out_share_cnt",
             "out_collection_cnt",
+            "tag1",
+            "tag2",
+            "tag3"
         ]
-        self.str_columns = ["uid", "type", "channel", "mode", "out_user_id"]
+        self.str_columns = ["uid", "type", "channel", "mode", "out_user_id", "tag1", "tag2", "tag3"]
         self.float_columns = [
             "fans",
             "view_count_user_30days",
@@ -50,16 +53,16 @@ class LightGBM(object):
             "out_share_cnt",
             "out_collection_cnt",
         ]
-        self.split_c = 0.95
+        self.split_c = 0.98
         self.yc = 0.8
-        self.model = "lightgbm_train.bin"
+        self.model = "lightgbm_tag_train.bin"
 
     def generate_x_data(self):
         """
         Generate data for feature engineering
         :return:
         """
-        with open("whole_data/x_data_total_return.json") as f1:
+        with open("produce_data/x_data_total_return_train.json") as f1:
             x_list = json.loads(f1.read())
         index_t = int(len(x_list) * self.split_c)
         X_train = pd.DataFrame(x_list[:index_t], columns=self.my_c)
@@ -79,7 +82,7 @@ class LightGBM(object):
         Generate data for label
         :return:
         """
-        with open("whole_data/y_data_total_return.json") as f2:
+        with open("produce_data/y_data_total_return_train.json") as f2:
             y_list = json.loads(f2.read())
         index_t = int(len(y_list) * self.split_c)
         temp = sorted(y_list)
@@ -100,7 +103,7 @@ class LightGBM(object):
         train_data = lgb.Dataset(
             X_train,
             label=Y_train,
-            categorical_feature=["uid", "type", "channel", "mode", "out_user_id"],
+            categorical_feature=["uid", "type", "channel", "mode", "out_user_id", "tag1", "tag2", "tag3"],
         )
         test_data = lgb.Dataset(X_test, label=Y_test, reference=train_data)
         params = {
@@ -164,5 +167,5 @@ class LightGBM(object):
 
 if __name__ == "__main__":
     L = LightGBM()
-    # L.train_model()
-    L.evaluate_model()
+    L.train_model()
+    # L.evaluate_model()