3 年之前 · c8df153563
--- a/params_search.py
+++ b/params_search.py
@@ -0,0 +1,143 @@
 
				+import pandas as pd
			
 
				+import lightgbm as lgb
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.model_selection import GridSearchCV
			
 
				+from sklearn.metrics import mean_absolute_percentage_error, r2_score
			
 
				+from rov_train import process_data
			
 
				+from config import set_config
			
 
				+
			
 
				+config_, env = set_config()
			
 
				+
			
 
				+train_filename = config_.TRAIN_DATA_FILENAME
			
 
				+X, Y, videos, fea = process_data(filename=train_filename)
			
 
				+x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.33)
			
 
				+
			
 
				+# params = {
			
 
				+#           'boosting_type': 'gbdt',
			
 
				+#           'objective': 'regression',
			
 
				+#           'metric': 'mape',
			
 
				+#           'nthread': 4,
			
 
				+#           'learning_rate': 0.1,
			
 
				+#           'num_leaves': 30,
			
 
				+#           'max_depth': 5,
			
 
				+#           'subsample': 0.8,
			
 
				+#           'colsample_bytree': 0.8,
			
 
				+#     }
			
 
				+# data_train = lgb.Dataset(x_train, y_train)
			
 
				+# cv_results = lgb.cv(
			
 
				+#     params,
			
 
				+#     data_train,
			
 
				+#     num_boost_round=1000,
			
 
				+#     nfold=5,
			
 
				+#     stratified=False,
			
 
				+#     shuffle=True,
			
 
				+#     metrics='mape',
			
 
				+#     early_stopping_rounds=50,
			
 
				+#     seed=0
			
 
				+# )
			
 
				+# print('cv_results: ', cv_results)
			
 
				+# print('best n_estimators: ', len(cv_results['mape-mean']))
			
 
				+# print('best cv score: ', pd.Series(cv_results['mape-mean']).min())
			
 
				+
			
 
				+# best n_estimators:  1000
			
 
				+# best cv score:  0.1578205729958749
			
 
				+
			
 
				+
			
 
				+# params_test1 = {'max_depth': range(3, 8, 1), 'num_leaves': range(5, 100, 10)}
			
 
				+# gsearch1 = GridSearchCV(
			
 
				+#     estimator=lgb.LGBMRegressor(boosting_type='gbdt', objective='regression', metrics='mape', learning_rate=0.1,
			
 
				+#                                 n_estimators=1000, max_depth=6, bagging_fraction=0.8, feature_fraction=0.8),
			
 
				+#     param_grid=params_test1, scoring='r2', cv=5, n_jobs=-1)
			
 
				+# gsearch1.fit(x_train, y_train)
			
 
				+# print(gsearch1.cv_results_, gsearch1.best_params_, gsearch1.best_score_)
			
 
				+
			
 
				+# {'max_depth': 3, 'num_leaves': 5} 0.9263654009553877
			
 
				+
			
 
				+
			
 
				+# params_test2 = {'max_bin': range(5, 256, 10), 'min_data_in_leaf': range(1, 102, 10)}
			
 
				+#
			
 
				+# gsearch2 = GridSearchCV(
			
 
				+#     estimator=lgb.LGBMRegressor(boosting_type='gbdt', objective='regression', metrics='mape', learning_rate=0.1,
			
 
				+#                                 n_estimators=1000, max_depth=3, num_leaves=5, bagging_fraction=0.8,
			
 
				+#                                 feature_fraction=0.8),
			
 
				+#     param_grid=params_test2, scoring='r2', cv=5, n_jobs=-1)
			
 
				+# gsearch2.fit(x_train, y_train)
			
 
				+# print(gsearch2.cv_results_, gsearch2.best_params_, gsearch2.best_score_)
			
 
				+
			
 
				+# {'max_bin': 125, 'min_data_in_leaf': 21} 0.7918455469932647
			
 
				+
			
 
				+
			
 
				+# params_test3 = {'feature_fraction': [0.6, 0.7, 0.8, 0.9, 1.0],
			
 
				+#                 'bagging_fraction': [0.6, 0.7, 0.8, 0.9, 1.0],
			
 
				+#                 'bagging_freq': range(0, 81, 10)
			
 
				+#                 }
			
 
				+#
			
 
				+# gsearch3 = GridSearchCV(
			
 
				+#     estimator=lgb.LGBMRegressor(boosting_type='gbdt', objective='regression', metrics='mape', learning_rate=0.1,
			
 
				+#                                 n_estimators=1000, max_depth=3, num_leaves=5, max_bin=125, min_data_in_leaf=21),
			
 
				+#     param_grid=params_test3, scoring='r2', cv=5, n_jobs=-1)
			
 
				+# gsearch3.fit(x_train, y_train)
			
 
				+# print(gsearch3.cv_results_, gsearch3.best_params_, gsearch3.best_score_)
			
 
				+
			
 
				+# {'bagging_fraction': 0.6, 'bagging_freq': 0, 'feature_fraction': 0.9} 0.9003939890191465
			
 
				+
			
 
				+
			
 
				+# params_test4 = {'lambda_l1': [1e-5, 1e-3, 1e-1, 0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
			
 
				+#                 'lambda_l2': [1e-5, 1e-3, 1e-1, 0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0]
			
 
				+#                 }
			
 
				+#
			
 
				+# gsearch4 = GridSearchCV(
			
 
				+#     estimator=lgb.LGBMRegressor(boosting_type='gbdt', objective='regression', metrics='mape', learning_rate=0.1,
			
 
				+#                                 n_estimators=1000, max_depth=3, num_leaves=5, max_bin=125, min_data_in_leaf=21,
			
 
				+#                                 bagging_fraction=0.6, bagging_freq=0, feature_fraction=0.9),
			
 
				+#     param_grid=params_test4, scoring='r2', cv=5, n_jobs=-1)
			
 
				+# gsearch4.fit(x_train, y_train)
			
 
				+# print(gsearch4.cv_results_, gsearch4.best_params_, gsearch4.best_score_)
			
 
				+
			
 
				+# {'lambda_l1': 0.7, 'lambda_l2': 0.9} 0.7538021411131314
			
 
				+
			
 
				+
			
 
				+# params_test5 = {'min_split_gain': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}
			
 
				+# gsearch5 = GridSearchCV(
			
 
				+#     estimator=lgb.LGBMRegressor(boosting_type='gbdt', objective='regression', metrics='mape', learning_rate=0.1,
			
 
				+#                                 n_estimators=1000, max_depth=3, num_leaves=5, max_bin=125, min_data_in_leaf=21,
			
 
				+#                                 bagging_fraction=0.6, bagging_freq=0, feature_fraction=0.9,
			
 
				+#                                 lambda_l1=0.7, lambda_l2=0.9),
			
 
				+#     param_grid=params_test5, scoring='r2', cv=5, n_jobs=-1)
			
 
				+# gsearch5.fit(x_train, y_train)
			
 
				+# print(gsearch5.cv_results_, gsearch5.best_params_, gsearch5.best_score_)
			
 
				+#
			
 
				+# {'min_split_gain': 0.0} 0.8903744931054016
			
 
				+
			
 
				+
			
 
				+# model = lgb.LGBMRegressor(boosting_type='gbdt', objective='regression', metrics='mape', learning_rate=0.01,
			
 
				+#                           n_estimators=7000, max_depth=3, num_leaves=5, max_bin=125, min_data_in_leaf=21,
			
 
				+#                           bagging_fraction=0.6, bagging_freq=0, feature_fraction=0.9,
			
 
				+#                           lambda_l1=0.7, lambda_l2=0.9, min_split_gain=0)
			
 
				+# model.fit(x_train, y_train)
			
 
				+# y_pre = model.predict(x_test)
			
 
				+# print('mape: ', mean_absolute_percentage_error(y_test, y_pre))
			
 
				+# print('r2: ', r2_score(y_test, y_pre))
			
 
				+
			
 
				+# mape:  0.7407816559793686
			
 
				+# r2:  0.9069208480190998
			
 
				+
			
 
				+
			
 
				+# model = lgb.LGBMRegressor()
			
 
				+# model.fit(x_train, y_train)
			
 
				+# y_pre = model.predict(x_test)
			
 
				+# print('mape: ', mean_absolute_percentage_error(y_test, y_pre))
			
 
				+# print('r2: ', r2_score(y_test, y_pre))
			
 
				+
			
 
				+# mape:  0.211698833035155
			
 
				+# r2:  0.893979193478978
			
 
				+
			
 
				+
			
 
				+model = lgb.LGBMRegressor(boosting_type='gbdt', objective='regression', metrics='mape', learning_rate=0.08,
			
 
				+                          n_estimators=3000, max_depth=3, num_leaves=5, max_bin=125, min_data_in_leaf=21,
			
 
				+                          bagging_fraction=0.6, bagging_freq=0, feature_fraction=0.9,
			
 
				+                          lambda_l1=0.7, lambda_l2=0.9, min_split_gain=0)
			
 
				+model.fit(x_train, y_train)
			
 
				+y_pre = model.predict(x_test)
			
 
				+print('mape: ', mean_absolute_percentage_error(y_test, y_pre))
			
 
				+print('r2: ', r2_score(y_test, y_pre))