|
@@ -31,7 +31,7 @@ class LightGBM(object):
|
|
|
"tag4"
|
|
|
]
|
|
|
self.str_columns = ["tag1", "tag2", "tag3", "tag4"]
|
|
|
- self.split_c = 0.7
|
|
|
+ self.split_c = 0.75
|
|
|
self.yc = 0.8
|
|
|
self.model = "models/lightgbm_0408_all_tags.bin"
|
|
|
self.flag = flag
|
|
@@ -44,7 +44,7 @@ class LightGBM(object):
|
|
|
"""
|
|
|
df = pd.read_json(path)
|
|
|
df = df.dropna(subset=['label']) # 把 label 为空的删掉
|
|
|
- df = df.dropna(subset=['tag1', 'tag2', 'tag3', 'tag4']) # 把 tag 为空的数据也删掉
|
|
|
+ df = df.dropna(subset=['tag1', 'tag2', 'tag3', 'tag4'], how="all") # 把 tag 为空的数据也删掉
|
|
|
labels = df['label']
|
|
|
features = df.drop('label', axis=1)
|
|
|
for key in self.str_columns:
|
|
@@ -108,7 +108,7 @@ class LightGBM(object):
|
|
|
train_data = lgb.Dataset(
|
|
|
X_train,
|
|
|
label=Y_train,
|
|
|
- categorical_feature=["channel", "mode", "out_user_id", "tag1", "tag2", "tag3"],
|
|
|
+ categorical_feature=["tag1", "tag2", "tag3", "tag4"],
|
|
|
)
|
|
|
test_data = lgb.Dataset(X_test, label=Y_test, reference=train_data)
|
|
|
params = {
|