Browse Source

仅通过标题tag 分析全部数据

罗俊辉 1 year ago
parent
commit
c2ea97f9fe
1 changed files with 3 additions and 3 deletions
  1. 3 3
      main.py

+ 3 - 3
main.py

@@ -31,7 +31,7 @@ class LightGBM(object):
             "tag4"
         ]
         self.str_columns = ["tag1", "tag2", "tag3", "tag4"]
-        self.split_c = 0.7
+        self.split_c = 0.75
         self.yc = 0.8
         self.model = "models/lightgbm_0408_all_tags.bin"
         self.flag = flag
@@ -44,7 +44,7 @@ class LightGBM(object):
         """
         df = pd.read_json(path)
         df = df.dropna(subset=['label'])  # 把 label 为空的删掉
-        df = df.dropna(subset=['tag1', 'tag2', 'tag3', 'tag4'])  # 把 tag 为空的数据也删掉
+        df = df.dropna(subset=['tag1', 'tag2', 'tag3', 'tag4'], how="all")  # 把 tag 为空的数据也删掉
         labels = df['label']
         features = df.drop('label', axis=1)
         for key in self.str_columns:
@@ -108,7 +108,7 @@ class LightGBM(object):
         train_data = lgb.Dataset(
             X_train,
             label=Y_train,
-            categorical_feature=["channel", "mode", "out_user_id", "tag1", "tag2", "tag3"],
+            categorical_feature=["tag1", "tag2", "tag3", "tag4"],
         )
         test_data = lgb.Dataset(X_test, label=Y_test, reference=train_data)
         params = {