Browse Source

generate label for mysql

罗俊辉 1 year ago
parent
commit
fd703669a5
1 changed files with 3 additions and 3 deletions
  1. 3 3
      process_data.py

+ 3 - 3
process_data.py

@@ -152,7 +152,7 @@ class DataProcessor(object):
             label_info = self.label_data
             if not label_info:
                 print(label_info)
-            print(len(label_info))
+            # print(len(label_info))
             title = read_title(client=self.client, video_id=video_id)
             label, dt_daily = generate_label(video_id, hour_dt, label_info)
             insert_sql = f"""UPDATE lightgbm_data 
@@ -161,10 +161,10 @@ class DataProcessor(object):
             ;"""
             self.client_spider.update(insert_sql)
 
-        select_sql = "SELECT video_id, hour_dt_str FROM lightgbm_data where label = 0 and hour_dt_str < '20240327';"
+        select_sql = "SELECT video_id, hour_dt_str FROM lightgbm_data where label is NULL and hour_dt_str < '20240327';"
         init_data_tuple = self.client_spider.select(select_sql)
         init_list = list(init_data_tuple)
-        for item in init_list:
+        for item in tqdm(init_list):
             # print(item)
             process_info(item)
         # with ThreadPoolExecutor(max_workers=10) as Pool: