1 年之前 · 1beb98a29f
--- a/main_spider.py
+++ b/main_spider.py
@@ -64,11 +64,11 @@ class LightGBM(object):
 
				         df = pd.read_json(path)
			
 
				         df = df.dropna(subset=['label'])  # 把 label 为空的删掉
			
 
				         labels = df['label']
			
 
				-        if not yc:
			
 
				-            temp = sorted(labels)
			
 
				-            yc = temp[int(len(temp) * 0.7)]
			
 
				-        print("阈值", yc)
			
 
				-        labels = [0 if i < yc else 1 for i in labels]
			
 
				+        # if not yc:
			
 
				+        #     temp = sorted(labels)
			
 
				+        #     yc = temp[int(len(temp) * 0.7)]
			
 
				+        # print("阈值", yc)
			
 
				+        # labels = [0 if i < yc else 1 for i in labels]
			
 
				         features = df.drop(['video_id', 'label', 'video_title'], axis=1)
			
 
				         for key in self.float_columns:
			
 
				             features[key] = pd.to_numeric(features[key], errors="coerce")
			
@@ -78,11 +78,14 @@ class LightGBM(object):
 
				         return features, labels, df
			
 
				 
			
 
				     def best_params(self):
			
 
				-        path = "data/train_data/spider_data_240401.json"
			
 
				+        """
			
 
				+        find best params for lightgbm
			
 
				+        """
			
 
				+        path = "data/train_data/spider_train_20240408.json"
			
 
				         X, y, ori_df = self.read_data(path)
			
 
				         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
			
 
				 
			
 
				-        lgbm = lgb.LGBMClassifier(objective='binary')
			
 
				+        lgb_ = lgb.LGBMClassifier(objective='binary')
			
 
				 
			
 
				         # 设置搜索的参数范围
			
 
				         param_dist = {
			
@@ -96,7 +99,7 @@ class LightGBM(object):
 
				 
			
 
				         # 定义 RandomizedSearchCV
			
 
				         rsearch = RandomizedSearchCV(
			
 
				-            estimator=lgbm,
			
 
				+            estimator=lgb_,
			
 
				             param_distributions=param_dist,
			
 
				             n_iter=100,
			
 
				             cv=3,
			
@@ -122,7 +125,7 @@ class LightGBM(object):
 
				         Load dataset
			
 
				         :return:
			
 
				         """
			
 
				-        path = "data/train_data/spider_train_20240402.json"
			
 
				+        path = "data/train_data/spider_train_20240408.json"
			
 
				         x, y, ori_df = self.read_data(path)
			
 
				         train_size = int(len(x) * self.split_c)
			
 
				         X_train, X_test = x[:train_size], x[train_size:]