Browse Source

处理用户模型

罗俊辉 1 năm trước cách đây
mục cha
commit
5177814c7c
2 tập tin đã thay đổi với 33 bổ sung32 xóa
  1. 25 25
      main_userupload.py
  2. 8 7
      result_analysis.py

+ 25 - 25
main_userupload.py

@@ -135,14 +135,14 @@ class LightGBM(object):
         )
         test_data = lgb.Dataset(X_test, label=Y_test, reference=train_data)
         params = {
-            'num_leaves': 25,
-            'learning_rate': 0.00435469653451866,
-            'feature_fraction': 0.8659696885542688,
-            'bagging_fraction': 0.4671847911224712,
+            'num_leaves': 29,
+            'learning_rate': 0.0005153812869522004,
+            'feature_fraction': 0.7460901121756344,
+            'bagging_fraction': 0.5744390458938479,
             'bagging_freq': 1,
-            # 'min_child_samples': 65,
-            "num_threads": 16,  # 线程数量
-        }
+            "num_threads": 16,
+            }
+
         # 训练模型
         num_round = 100
         print("开始训练......")
@@ -164,7 +164,7 @@ class LightGBM(object):
         with open("data/produce_data/y_data_total_return_predict_{}_user.json".format(self.dt)) as f2:
             Y_test = json.loads(f2.read())
 
-        Y_test = [0 if i <= 19 else 1 for i in Y_test]
+        Y_test = [0 if i <= 31 else 1 for i in Y_test]
         X_test = pd.DataFrame(x_list, columns=self.my_c)
         for key in self.str_columns:
             X_test[key] = self.label_encoder.fit_transform(X_test[key])
@@ -209,20 +209,20 @@ class LightGBM(object):
 
 
 if __name__ == "__main__":
-    # i = int(input("输入 1 训练, 输入 2 预测:\n"))
-    # if i == 1:
-    #     f = "train"
-    #     dt = "whole"
-    #     L = LightGBM(flag=f, dt=dt)
-    #     L.train_model()
-    # elif i == 2:
-    #     f = "predict"
-    #     dt = int(input("输入日期, 16-21:\n"))
-    #     L = LightGBM(flag=f, dt=dt)
-    #     # L.evaluate_model()
-    #     L.feature_importance()
-    L = LightGBM("train", "whole")
-    study = optuna.create_study(direction='maximize')
-    study.optimize(L.bays_params, n_trials=100)
-    print('Number of finished trials:', len(study.trials))
-    print('Best trial:', study.best_trial.params)
+    i = int(input("输入 1 训练, 输入 2 预测:\n"))
+    if i == 1:
+        f = "train"
+        dt = "whole"
+        L = LightGBM(flag=f, dt=dt)
+        L.train_model()
+    elif i == 2:
+        f = "predict"
+        dt = int(input("输入日期, 16-21:\n"))
+        L = LightGBM(flag=f, dt=dt)
+        # L.evaluate_model()
+        L.feature_importance()
+    # L = LightGBM("train", "whole")
+    # study = optuna.create_study(direction='maximize')
+    # study.optimize(L.bays_params, n_trials=100)
+    # print('Number of finished trials:', len(study.trials))
+    # print('Best trial:', study.best_trial.params)

+ 8 - 7
result_analysis.py

@@ -19,19 +19,20 @@ def analysis(data):
         if int(true_tag) == int(pred_tag) == 1:
             accuracy_count += 1
         total_count += 1
-    print("预测为 1 的数量", pred_count)
-    print("实际为 1 的数量", true_count)
-    print("预测为 1,实际也为 1 的数量", accuracy_count)
-    print("total_video", total_count)
-    print("准确率", accuracy_count / pred_count)
-    print("召回率", accuracy_count / true_count)
+    # print("预测为 1 的数量", pred_count)
+    # print("实际为 1 的数量", true_count)
+    # print("预测为 1,实际也为 1 的数量", accuracy_count)
+    # print("total_video", total_count)
+    # print("准确率", accuracy_count / pred_count)
+    # print("召回率", accuracy_count / true_count)
+    print(total_count, pred_count, accuracy_count, true_count, accuracy_count / true_count, accuracy_count / pred_count)
     print("\n")
 
 
 if __name__ == '__main__':
     d = [16, 17, 18, 19, 20, 21]
     for item in d:
-        path = "summary_tag_03{}_spider.txt".format(item)
+        path = "summary_tag_03{}_user.txt".format(item)
         with open(path, encoding="utf-8") as f2:
             data2 = f2.readlines()
         analysis(data2)