罗俊辉 преди 1 година
родител
ревизия
d6c7882896
променени са 1 файла, в които са добавени 27 реда и са изтрити 8 реда
  1. 27 8
      main.py

+ 27 - 8
main.py

@@ -7,7 +7,10 @@ import pandas as pd
 import lightgbm as lgb
 from sklearn.model_selection import train_test_split
 from sklearn.datasets import make_classification
+from sklearn.preprocessing import LabelEncoder
 from sklearn.metrics import accuracy_score
+
+label_encoder = LabelEncoder()
 my_c = [
         "uid",
         "type",
@@ -25,18 +28,34 @@ my_c = [
         "out_share_cnt",
         "out_collection_cnt"
     ]
+
+str_cols = ["uid", "type", "channel", "mode"]
+float_cols = [
+        "fans",
+        "view_count_user_30days",
+        "share_count_user_30days",
+        "return_count_user_30days",
+        "rov_user",
+        "str_user",
+        "out_user_id",
+        "out_play_cnt",
+        "out_like_cnt",
+        "out_share_cnt",
+        "out_collection_cnt"
+    ]
 with open("whole_data/x_data.json") as f1:
     x_list = json.loads(f1.read())
     X_train = pd.DataFrame(x_list[:10000], columns=my_c)
-    X_train['uid'] = X_train['uid'].astype(str)
-    X_train['type'] = X_train['type'].astype(str)
-    X_train['channel'] = X_train['channel'].astype(str)
-    X_train['mode'] = X_train['mode'].astype(str)
+    for key in str_cols:
+        X_train[key] = label_encoder.fit_transform(X_train[key])
+    for key in float_cols:
+        X_train[key] = pd.to_numeric(X_train[key], errors='coerce')
     X_test = pd.DataFrame(x_list[10000:], columns=my_c)
-    X_test['uid'] = X_test['uid'].astype(str)
-    X_test['type'] = X_test['type'].astype(str)
-    X_test['channel'] = X_test['channel'].astype(str)
-    X_test['mode'] = X_test['mode'].astype(str)
+    for key in str_cols:
+        X_test[key] = label_encoder.fit_transform(X_test[key])
+    for key in float_cols:
+        X_test[key] = pd.to_numeric(X_test[key], errors='coerce')
+
 
 with open("whole_data/y_data.json") as f2:
     y_list = json.loads(f2.read())