|
@@ -11,7 +11,6 @@ import time
|
|
|
|
|
|
from tqdm import tqdm
|
|
|
import jieba.analyse
|
|
|
-from concurrent.futures.thread import ThreadPoolExecutor
|
|
|
|
|
|
sys.path.append(os.getcwd())
|
|
|
|
|
@@ -141,7 +140,7 @@ class DataProcessor(object):
|
|
|
label_obj = label_info.get(label_dt, {}).get(video_id)
|
|
|
if label_obj:
|
|
|
label = int(label_obj["total_return"]) if label_obj["total_return"] else 0
|
|
|
- print(label)
|
|
|
+ # print(label)
|
|
|
else:
|
|
|
label = 0
|
|
|
return label, label_dt
|
|
@@ -161,11 +160,10 @@ class DataProcessor(object):
|
|
|
# print(insert_sql)
|
|
|
self.client_spider.update(insert_sql)
|
|
|
|
|
|
- select_sql = "SELECT video_id, hour_dt_str FROM lightgbm_data where label = 0 and hour_dt_str < '20240327';"
|
|
|
+ select_sql = "SELECT video_id, hour_dt_str FROM lightgbm_data where label is NULL and hour_dt_str < '20240327';"
|
|
|
init_data_tuple = self.client_spider.select(select_sql)
|
|
|
init_list = list(init_data_tuple)
|
|
|
for item in tqdm(init_list):
|
|
|
- # print(item)
|
|
|
process_info(item)
|
|
|
# time.sleep(0.5)
|
|
|
# with ThreadPoolExecutor(max_workers=8) as Pool:
|