1 year ago · 5177814c7c
--- a/main_userupload.py
+++ b/main_userupload.py
@@ -135,14 +135,14 @@ class LightGBM(object):
 
				         )
			
 
				         test_data = lgb.Dataset(X_test, label=Y_test, reference=train_data)
			
 
				         params = {
			
 
				-            'num_leaves': 25,
			
 
				-            'learning_rate': 0.00435469653451866,
			
 
				-            'feature_fraction': 0.8659696885542688,
			
 
				-            'bagging_fraction': 0.4671847911224712,
			
 
				+            'num_leaves': 29,
			
 
				+            'learning_rate': 0.0005153812869522004,
			
 
				+            'feature_fraction': 0.7460901121756344,
			
 
				+            'bagging_fraction': 0.5744390458938479,
			
 
				             'bagging_freq': 1,
			
 
				-            # 'min_child_samples': 65,
			
 
				-            "num_threads": 16,  # 线程数量
			
 
				-        }
			
 
				+            "num_threads": 16,
			
 
				+            }
			
 
				+
			
 
				         # 训练模型
			
 
				         num_round = 100
			
 
				         print("开始训练......")
			
@@ -164,7 +164,7 @@ class LightGBM(object):
 
				         with open("data/produce_data/y_data_total_return_predict_{}_user.json".format(self.dt)) as f2:
			
 
				             Y_test = json.loads(f2.read())
			
 
				 
			
 
				-        Y_test = [0 if i <= 19 else 1 for i in Y_test]
			
 
				+        Y_test = [0 if i <= 31 else 1 for i in Y_test]
			
 
				         X_test = pd.DataFrame(x_list, columns=self.my_c)
			
 
				         for key in self.str_columns:
			
 
				             X_test[key] = self.label_encoder.fit_transform(X_test[key])
			
@@ -209,20 +209,20 @@ class LightGBM(object):
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    # i = int(input("输入 1 训练， 输入 2 预测:\n"))
			
 
				-    # if i == 1:
			
 
				-    #     f = "train"
			
 
				-    #     dt = "whole"
			
 
				-    #     L = LightGBM(flag=f, dt=dt)
			
 
				-    #     L.train_model()
			
 
				-    # elif i == 2:
			
 
				-    #     f = "predict"
			
 
				-    #     dt = int(input("输入日期， 16-21:\n"))
			
 
				-    #     L = LightGBM(flag=f, dt=dt)
			
 
				-    #     # L.evaluate_model()
			
 
				-    #     L.feature_importance()
			
 
				-    L = LightGBM("train", "whole")
			
 
				-    study = optuna.create_study(direction='maximize')
			
 
				-    study.optimize(L.bays_params, n_trials=100)
			
 
				-    print('Number of finished trials:', len(study.trials))
			
 
				-    print('Best trial:', study.best_trial.params)
			
 
				+    i = int(input("输入 1 训练， 输入 2 预测:\n"))
			
 
				+    if i == 1:
			
 
				+        f = "train"
			
 
				+        dt = "whole"
			
 
				+        L = LightGBM(flag=f, dt=dt)
			
 
				+        L.train_model()
			
 
				+    elif i == 2:
			
 
				+        f = "predict"
			
 
				+        dt = int(input("输入日期， 16-21:\n"))
			
 
				+        L = LightGBM(flag=f, dt=dt)
			
 
				+        # L.evaluate_model()
			
 
				+        L.feature_importance()
			
 
				+    # L = LightGBM("train", "whole")
			
 
				+    # study = optuna.create_study(direction='maximize')
			
 
				+    # study.optimize(L.bays_params, n_trials=100)
			
 
				+    # print('Number of finished trials:', len(study.trials))
			
 
				+    # print('Best trial:', study.best_trial.params)
			
--- a/result_analysis.py
+++ b/result_analysis.py
@@ -19,19 +19,20 @@ def analysis(data):
 
				         if int(true_tag) == int(pred_tag) == 1:
			
 
				             accuracy_count += 1
			
 
				         total_count += 1
			
 
				-    print("预测为 1 的数量", pred_count)
			
 
				-    print("实际为 1 的数量", true_count)
			
 
				-    print("预测为 1，实际也为 1 的数量", accuracy_count)
			
 
				-    print("total_video", total_count)
			
 
				-    print("准确率", accuracy_count / pred_count)
			
 
				-    print("召回率", accuracy_count / true_count)
			
 
				+    # print("预测为 1 的数量", pred_count)
			
 
				+    # print("实际为 1 的数量", true_count)
			
 
				+    # print("预测为 1，实际也为 1 的数量", accuracy_count)
			
 
				+    # print("total_video", total_count)
			
 
				+    # print("准确率", accuracy_count / pred_count)
			
 
				+    # print("召回率", accuracy_count / true_count)
			
 
				+    print(total_count, pred_count, accuracy_count, true_count, accuracy_count / true_count, accuracy_count / pred_count)
			
 
				     print("\n")
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				     d = [16, 17, 18, 19, 20, 21]
			
 
				     for item in d:
			
 
				-        path = "summary_tag_03{}_spider.txt".format(item)
			
 
				+        path = "summary_tag_03{}_user.txt".format(item)
			
 
				         with open(path, encoding="utf-8") as f2:
			
 
				             data2 = f2.readlines()
			
 
				         analysis(data2)