|
@@ -45,12 +45,12 @@ float_cols = [
|
|
|
]
|
|
|
with open("whole_data/x_data.json") as f1:
|
|
|
x_list = json.loads(f1.read())
|
|
|
- X_train = pd.DataFrame(x_list[:10000], columns=my_c)
|
|
|
+ X_train = pd.DataFrame(x_list[:15000], columns=my_c)
|
|
|
for key in str_cols:
|
|
|
X_train[key] = label_encoder.fit_transform(X_train[key])
|
|
|
for key in float_cols:
|
|
|
X_train[key] = pd.to_numeric(X_train[key], errors='coerce')
|
|
|
- X_test = pd.DataFrame(x_list[10000:], columns=my_c)
|
|
|
+ X_test = pd.DataFrame(x_list[15000:], columns=my_c)
|
|
|
for key in str_cols:
|
|
|
X_test[key] = label_encoder.fit_transform(X_test[key])
|
|
|
for key in float_cols:
|
|
@@ -59,8 +59,9 @@ with open("whole_data/x_data.json") as f1:
|
|
|
|
|
|
with open("whole_data/y_data.json") as f2:
|
|
|
y_list = json.loads(f2.read())
|
|
|
- y_train = np.array(y_list[:10000])
|
|
|
- y_test = np.array(y_list[10000:])
|
|
|
+ y__list = [0 if i <= 25 else 1 for i in y_list]
|
|
|
+ y_train = np.array(y__list[:15000])
|
|
|
+ y_test = np.array(y__list[15000:])
|
|
|
|
|
|
# 创建LightGBM数据集
|
|
|
train_data = lgb.Dataset(X_train, label=y_train, categorical_feature=['uid', 'type', 'channel', 'mode'])
|