Browse Source

generate label for mysql

罗俊辉 1 year ago
parent
commit
c49d398c50
1 changed files with 3 additions and 3 deletions
  1. 3 3
      process_data.py

+ 3 - 3
process_data.py

@@ -129,7 +129,7 @@ class SpiderProcess(object):
         把 spider_duration 存储到数据库中
         :return:
         """
-        select_sql = "SELECT video_id, video_title, channel, out_user_id, spider_mode, out_play_cnt, out_like_cnt, out_share_cnt, label FROM lightgbm_data WHERE type = 'spider';"
+        select_sql = "SELECT video_id, video_title, label, channel, out_user_id, spider_mode, out_play_cnt, out_like_cnt, out_share_cnt FROM lightgbm_data WHERE type = 'spider';"
         data_list = self.client_spider.select(select_sql)
         df = []
         for line in tqdm(data_list):
@@ -155,8 +155,8 @@ class SpiderProcess(object):
                 df.append(temp[2:])
             except:
                 continue
-        df = pd.DataFrame(df, columns=['title', 'channel', 'out_user_id', 'mode', 'out_play_cnt', 'out_like_cnt',
-                                       'out_share_cnt', 'lop', 'duration', 'tag1', 'tag2', 'tag3'])
+        df = pd.DataFrame(df, columns=['label', 'channel', 'out_user_id', 'mode', 'out_play_cnt', 'out_like_cnt',
+                                       'out_share_cnt', 'label', 'lop', 'duration', 'tag1', 'tag2', 'tag3'])
         df.to_json("data/train_data/spider_data_{}.json".format(datetime.datetime.today().strftime("y%m%d")), orient='records')