Explorar o código

更新初始化代码

罗俊辉 hai 1 ano
pai
achega
776e05d7f4
Modificáronse 1 ficheiros con 8 adicións e 8 borrados
  1. 8 8
      main.py

+ 8 - 8
main.py

@@ -42,7 +42,7 @@ float_cols = [
         "out_share_cnt",
         "out_collection_cnt"
     ]
-with open("whole_data/x_data.json") as f1:
+with open("whole_data/x_data_3day_up_level.json") as f1:
     x_list = json.loads(f1.read())
     index_t = int(len(x_list) * 0.7)
     X_train = pd.DataFrame(x_list[:index_t], columns=my_c)
@@ -57,14 +57,14 @@ with open("whole_data/x_data.json") as f1:
         X_test[key] = pd.to_numeric(X_test[key], errors='coerce')
 
 
-with open("whole_data/y_data.json") as f2:
+with open("whole_data/y_data_3day_up_level.json") as f2:
     y_list = json.loads(f2.read())
     index_t = int(len(y_list) * 0.7)
-    temp = sorted(y_list)
-    yuzhi = temp[int(len(temp) * 0.8)-1]
-    y__list = [0 if i <= yuzhi else 1 for i in y_list]
-    y_train = np.array(y__list[:index_t])
-    y_test = np.array(y__list[index_t:])
+    # temp = sorted(y_list)
+    # yuzhi = temp[int(len(temp) * 0.8)-1]
+    # y__list = [0 if i <= yuzhi else 1 for i in y_list]
+    y_train = np.array(y_list[:index_t])
+    y_test = np.array(y_list[index_t:])
 
 # 创建LightGBM数据集
 train_data = lgb.Dataset(X_train, label=y_train, categorical_feature=['uid', 'type', 'channel', 'mode', 'out_user_id'])
@@ -88,7 +88,7 @@ bst = lgb.train(params, train_data, num_round, valid_sets=[test_data])
 # 预测
 y_pred = bst.predict(X_test, num_iteration=bst.best_iteration)
 # 转换为二进制输出
-y_pred_binary = np.where(y_pred > 0.5, 1, 0)
+y_pred_binary = np.where(y_pred > 0.7, 1, 0)
 
 # 评估模型
 accuracy = accuracy_score(y_test, y_pred_binary)