罗俊辉 1 سال پیش
والد
کامیت
26fbc5343b
2فایلهای تغییر یافته به همراه19 افزوده شده و 12 حذف شده
  1. 5 3
      main.py
  2. 14 9
      process_data.py

+ 5 - 3
main.py

@@ -19,7 +19,7 @@ class LightGBM(object):
     LightGBM model for classification
     """
 
-    def __init__(self):
+    def __init__(self, flag, dt):
         self.label_encoder = LabelEncoder()
         self.my_c = [
             "uid",
@@ -56,7 +56,9 @@ class LightGBM(object):
         ]
         self.split_c = 0.999
         self.yc = 0.8
-        self.model = "lightgbm_tag_train_04.bin"
+        self.model = "lightgbm_0326.bin"
+        self.flag = flag
+        self.dt = dt
 
     def bays_params(self, trial):
         """
@@ -96,7 +98,7 @@ class LightGBM(object):
         Generate data for feature engineering
         :return:
         """
-        with open("produce_data/x_data_total_return_train.json") as f1:
+        with open("data/produce_data/x_data_total_return_{}.json".format(self.flag,)) as f1:
             x_list = json.loads(f1.read())
         index_t = int(len(x_list) * self.split_c)
         X_train = pd.DataFrame(x_list[:index_t], columns=self.my_c)

+ 14 - 9
process_data.py

@@ -81,17 +81,17 @@ class DataProcessor(object):
             print(video_id, "\t", e)
             return []
 
-    def producer(self):
+    def producer(self, dt):
         """
         生成数据
         :return:none
         """
         if self.flag == "train":
-            x_path = "data/hour_train.json"
-            y_path = "data/daily-label-20240101-20240320.json"
+            x_path = "data/train_data/train_2024010100_2024031523.json"
+            y_path = "data/train_data/daily-label-20240101-20240325.json"
         elif self.flag == "predict":
-            x_path = "prid_data/train_0319.json"
-            y_path = "data/daily-label-20240315-20240321.json"
+            x_path = "data/pred_data/pred_202403{}00_202403{}23.json".format(dt, dt)
+            y_path = "data/train_data/daily-label-20240101-20240325.json"
         else:
             return
         with open(x_path) as f:
@@ -106,13 +106,18 @@ class DataProcessor(object):
                 our_label, features = self.generate_train_label(video_obj, y_data, c)
                 x_list.append(features)
                 y_list.append(our_label)
-            with open("produce_data/x_data_{}_{}.json".format(c, self.flag), "w") as f1:
+            with open("data/produce_data/x_data_{}_{}_{}.json".format(c, self.flag, dt), "w") as f1:
                 f1.write(json.dumps(x_list, ensure_ascii=False))
 
-            with open("produce_data/y_data_{}_{}.json".format(c, self.flag), "w") as f2:
+            with open("data/produce_data/y_data_{}_{}_{}.json".format(c, self.flag, dt), "w") as f2:
                 f2.write(json.dumps(y_list, ensure_ascii=False))
 
 
 if __name__ == "__main__":
-    D = DataProcessor(flag="predict")
-    D.producer()
+    flag = str(input("please input method train or predict"))
+    D = DataProcessor(flag=flag)
+    if flag == "predict":
+        for d in range(16, 22):
+            D.producer(d)
+    else:
+        D.producer(dt="whole")