罗俊辉 před 1 rokem
rodič
revize
388d4ea26a
1 změnil soubory, kde provedl 2 přidání a 2 odebrání
  1. 2 2
      data_process/process_data_for_lightgbm.py

+ 2 - 2
data_process/process_data_for_lightgbm.py

@@ -53,11 +53,11 @@ class SpiderProcess(object):
         three_date_before = dt_time + datetime.timedelta(days=4)
         temp_time = three_date_before.strftime("%Y%m%d")
         if flag == "train":
-            select_sql = f"""SELECT video_id, video_title, label, channel, out_user_id, spider_mode, out_play_cnt, out_like_cnt, out_share_cnt FROM lightgbm_data WHERE type = 'spider' and daily_dt_str <= '{temp_time}' order by daily_dt_str;"""
+            select_sql = f"""SELECT video_id, video_title, rov_label, channel, out_user_id, spider_mode, out_play_cnt, out_like_cnt, out_share_cnt FROM lightgbm_data WHERE type = 'spider' and daily_dt_str <= '{temp_time}' and rov_label > 0;"""
             des_path = "/root/luojunhui/alg/data/train_data/spider_train_{}.json".format(
                 datetime.datetime.today().strftime("%Y%m%d"))
         elif flag == "predict":
-            select_sql = f"""SELECT video_id, video_title, label, channel, out_user_id, spider_mode, out_play_cnt, out_like_cnt, out_share_cnt FROM lightgbm_data WHERE type = 'spider' and daily_dt_str = '{temp_time}';"""
+            select_sql = f"""SELECT video_id, video_title, rov_label, channel, out_user_id, spider_mode, out_play_cnt, out_like_cnt, out_share_cnt FROM lightgbm_data WHERE type = 'spider' and daily_dt_str = '{temp_time}' and rov_label > 0;"""
             des_path = "/root/luojunhui/alg/data/predict_data/predict_{}.json".format(dt_time.strftime("%Y%m%d"))
         else:
             return