|
@@ -242,16 +242,16 @@ class UserProcess(object):
|
|
|
three_date_before = dt_time + datetime.timedelta(days=4)
|
|
|
temp_time = three_date_before.strftime("%Y%m%d")
|
|
|
if flag == "train":
|
|
|
- sql = "select title, label, uid, channel, user_fans, user_view_30, user_share_30, user_return_30, user_rov, user_str, user_return_videos_30, user_return_videos_3, user_return_3, user_view_3, user_share_3, address from lighgbm_data where type = 'userupload' and daily_dt_str >= '20240305';"
|
|
|
+ sql = "select title, label, uid, channel, user_fans, user_view_30, user_share_30, user_return_30, user_rov, user_str, user_return_videos_30, user_return_videos_3, user_return_3, user_view_3, user_share_3, address from lightgbm_data where type = 'userupload' and daily_dt_str >= '20240305';"
|
|
|
des_path = "data/train_data/spider_train_{}".format(datetime.datetime.today().strftime("%Y%m%d"))
|
|
|
elif flag == "predict":
|
|
|
- sql = f"""select title, label, uid, channel, user_fans, user_view_30, user_share_30, user_return_30, user_rov, user_str, user_return_videos_30, user_return_videos_3, user_return_3, user_view_3, user_share_3, address from lighgbm_data where type = 'userupload' and daily_dt_str = '{temp_time}';"""
|
|
|
+ sql = f"""select title, label, uid, channel, user_fans, user_view_30, user_share_30, user_return_30, user_rov, user_str, user_return_videos_30, user_return_videos_3, user_return_3, user_view_3, user_share_3, address from lightgbm_data where type = 'userupload' and daily_dt_str = '{temp_time}';"""
|
|
|
des_path = "data/predict_data/predict_{}.json".format(dt_time.strftime("%Y%m%d"))
|
|
|
else:
|
|
|
return
|
|
|
dt_list = self.client_spider.select(sql)
|
|
|
df = []
|
|
|
- for line in dt_list:
|
|
|
+ for line in tqdm(dt_list):
|
|
|
title = line[0]
|
|
|
temp = line
|
|
|
title_tags = list(jieba.analyse.textrank(title, topK=3))
|