Bladeren bron

generate label for mysql

罗俊辉 1 jaar geleden
bovenliggende
commit
0e97eed0b7
1 gewijzigde bestanden met toevoegingen van 3 en 3 verwijderingen
  1. 3 3
      process_data.py

+ 3 - 3
process_data.py

@@ -242,16 +242,16 @@ class UserProcess(object):
         three_date_before = dt_time + datetime.timedelta(days=4)
         temp_time = three_date_before.strftime("%Y%m%d")
         if flag == "train":
-            sql = "select title, label, uid, channel, user_fans, user_view_30, user_share_30, user_return_30, user_rov, user_str, user_return_videos_30, user_return_videos_3, user_return_3, user_view_3, user_share_3, address from lighgbm_data where type = 'userupload' and daily_dt_str >= '20240305';"
+            sql = "select title, label, uid, channel, user_fans, user_view_30, user_share_30, user_return_30, user_rov, user_str, user_return_videos_30, user_return_videos_3, user_return_3, user_view_3, user_share_3, address from lightgbm_data where type = 'userupload' and daily_dt_str >= '20240305';"
             des_path = "data/train_data/spider_train_{}".format(datetime.datetime.today().strftime("%Y%m%d"))
         elif flag == "predict":
-            sql = f"""select title, label, uid, channel, user_fans, user_view_30, user_share_30, user_return_30, user_rov, user_str, user_return_videos_30, user_return_videos_3, user_return_3, user_view_3, user_share_3, address from lighgbm_data where type = 'userupload' and daily_dt_str = '{temp_time}';"""
+            sql = f"""select title, label, uid, channel, user_fans, user_view_30, user_share_30, user_return_30, user_rov, user_str, user_return_videos_30, user_return_videos_3, user_return_3, user_view_3, user_share_3, address from lightgbm_data where type = 'userupload' and daily_dt_str = '{temp_time}';"""
             des_path = "data/predict_data/predict_{}.json".format(dt_time.strftime("%Y%m%d"))
         else:
             return
         dt_list = self.client_spider.select(sql)
         df = []
-        for line in dt_list:
+        for line in tqdm(dt_list):
             title = line[0]
             temp = line
             title_tags = list(jieba.analyse.textrank(title, topK=3))