浏览代码

处理爬虫模型

罗俊辉 1 年之前
父节点
当前提交
2704722eb9
共有 1 个文件被更改,包括 23 次插入29 次删除
  1. 23 29
      main_spider.py

+ 23 - 29
main_spider.py

@@ -136,19 +136,16 @@ class LightGBM(object):
         train_data = lgb.Dataset(
             X_train,
             label=Y_train,
-            categorical_feature=["uid", "type", "channel", "mode", "out_user_id", "tag1", "tag2", "tag3"],
+            categorical_feature=["channel", "mode", "out_user_id", "tag1", "tag2", "tag3"],
         )
         test_data = lgb.Dataset(X_test, label=Y_test, reference=train_data)
         params = {
-            "objective": "binary",  # 指定二分类任务
-            "metric": "binary_logloss",  # 评估指标为二分类的log损失
-            "num_leaves": 36,  # 叶子节点数
-            "learning_rate":  0.08479152931388902,  # 学习率
-            "bagging_fraction": 0.6588121592044218,  # 建树的样本采样比例
-            "feature_fraction": 0.4572757903437793,  # 建树的特征选择比例
-            "bagging_freq": 2,  # k 意味着每 k 次迭代执行bagging
-            "num_threads": 16,  # 线程数量
-            "mini_child_samples": 71
+            'num_leaves': 25,
+            'learning_rate': 0.00435469653451866,
+            'feature_fraction': 0.8659696885542688,
+            'bagging_fraction': 0.4671847911224712,
+            'bagging_freq': 1,
+            'min_child_samples': 65
         }
         # 训练模型
         num_round = 100
@@ -216,22 +213,19 @@ class LightGBM(object):
 
 
 if __name__ == "__main__":
-    # i = int(input("输入 1 训练, 输入 2 预测:\n"))
-    # if i == 1:
-    #     f = "train"
-    #     dt = "whole"
-    #     L = LightGBM(flag=f, dt=dt)
-    #     L.train_model()
-    # elif i == 2:
-    #     f = "predict"
-    #     dt = int(input("输入日期, 16-21:\n"))
-    #     L = LightGBM(flag=f, dt=dt)
-    #     L.evaluate_model()
-    L = LightGBM("train", "whole")
-    study = optuna.create_study(direction='maximize')
-    study.optimize(L.bays_params, n_trials=100)
-    print('Number of finished trials:', len(study.trials))
-    print('Best trial:', study.best_trial.params)
-    # L.train_model()
-    # L.evaluate_model()
-    # L.feature_importance()
+    i = int(input("输入 1 训练, 输入 2 预测:\n"))
+    if i == 1:
+        f = "train"
+        dt = "whole"
+        L = LightGBM(flag=f, dt=dt)
+        L.train_model()
+    elif i == 2:
+        f = "predict"
+        dt = int(input("输入日期, 16-21:\n"))
+        L = LightGBM(flag=f, dt=dt)
+        L.evaluate_model()
+    # L = LightGBM("train", "whole")
+    # study = optuna.create_study(direction=clear'maximize')
+    # study.optimize(L.bays_params, n_trials=100)
+    # print('Number of finished trials:', len(study.trials))
+    # print('Best trial:', study.best_trial.params)