罗俊辉 1 rok temu
rodzic
commit
9018e06e8b
1 zmienionych plików z 27 dodań i 4 usunięć
  1. 27 4
      main.py

+ 27 - 4
main.py

@@ -3,15 +3,38 @@ import sys
 import json
 sys.path.append(os.getcwd())
 import numpy as np
+import pandas as pd
 import lightgbm as lgb
 from sklearn.model_selection import train_test_split
 from sklearn.datasets import make_classification
 from sklearn.metrics import accuracy_score
-
+my_c = [
+        "uid",
+        "type",
+        "channel",
+        "fans",
+        "view_count_user_30days",
+        "share_count_user_30days",
+        "return_count_user_30days",
+        "rov_user",
+        "str_user",
+        "out_user_id",
+        "mode",
+        "out_play_cnt",
+        "out_like_cnt",
+        "out_share_cnt",
+        "out_collection_cnt"
+    ]
 with open("whole_data/x_data.json") as f1:
     x_list = json.loads(f1.read())
-    X_train = np.array(x_list[:10000], dtype=object)
-    X_test = np.array(x_list[10000:], dtype=object)
+    X_train = pd.DataFrame(x_list[:10000], columns=my_c)
+    X_train['uid'] = X_train['uid'].astype(str)
+    X_train['type'] = X_train['type'].astype(str)
+    X_train['channel'] = X_train['channel'].astype(str)
+    X_test = pd.DataFrame(x_list[10000:], columns=my_c)
+    X_test['uid'] = X_test['uid'].astype(str)
+    X_test['type'] = X_test['type'].astype(str)
+    X_test['channel'] = X_test['channel'].astype(str)
 
 with open("whole_data/y_data.json") as f2:
     y_list = json.loads(f2.read())
@@ -19,7 +42,7 @@ with open("whole_data/y_data.json") as f2:
     y_test = np.array(y_list[10000:])
 
 # 创建LightGBM数据集
-train_data = lgb.Dataset(X_train, label=y_train)
+train_data = lgb.Dataset(X_train, label=y_train, categorical_features=['uid', 'type', 'channel'])
 test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
 
 # 设置模型的参数