|
@@ -129,7 +129,7 @@ class SpiderProcess(object):
|
|
|
把 spider_duration 存储到数据库中
|
|
|
:return:
|
|
|
"""
|
|
|
- select_sql = "SELECT video_id, video_title, channel, out_user_id, spider_mode, out_play_cnt, out_like_cnt, out_share_cnt, label FROM lightgbm_data WHERE type = 'spider';"
|
|
|
+ select_sql = "SELECT video_id, video_title, label, channel, out_user_id, spider_mode, out_play_cnt, out_like_cnt, out_share_cnt FROM lightgbm_data WHERE type = 'spider';"
|
|
|
data_list = self.client_spider.select(select_sql)
|
|
|
df = []
|
|
|
for line in tqdm(data_list):
|
|
@@ -155,8 +155,8 @@ class SpiderProcess(object):
|
|
|
df.append(temp[2:])
|
|
|
except:
|
|
|
continue
|
|
|
- df = pd.DataFrame(df, columns=['title', 'channel', 'out_user_id', 'mode', 'out_play_cnt', 'out_like_cnt',
|
|
|
- 'out_share_cnt', 'lop', 'duration', 'tag1', 'tag2', 'tag3'])
|
|
|
+ df = pd.DataFrame(df, columns=['label', 'channel', 'out_user_id', 'mode', 'out_play_cnt', 'out_like_cnt',
|
|
|
+ 'out_share_cnt', 'label', 'lop', 'duration', 'tag1', 'tag2', 'tag3'])
|
|
|
df.to_json("data/train_data/spider_data_{}.json".format(datetime.datetime.today().strftime("y%m%d")), orient='records')
|
|
|
|
|
|
|