|
@@ -158,18 +158,18 @@ class DataProcessor(object):
|
|
|
label, dt_daily = generate_label(str(video_id), hour_dt, label_info)
|
|
|
|
|
|
insert_sql = f"""UPDATE lightgbm_data set video_title = '{title}', label = '{label}', daily_dt_str = '{dt_daily}' where video_id = '{video_id}';"""
|
|
|
- print(insert_sql)
|
|
|
+ # print(insert_sql)
|
|
|
self.client_spider.update(insert_sql)
|
|
|
|
|
|
select_sql = "SELECT video_id, hour_dt_str FROM lightgbm_data where label = 0 and hour_dt_str < '20240327';"
|
|
|
init_data_tuple = self.client_spider.select(select_sql)
|
|
|
init_list = list(init_data_tuple)
|
|
|
- # for item in tqdm(init_list[):
|
|
|
- # # print(item)
|
|
|
- # process_info(item)
|
|
|
+ for item in tqdm(init_list):
|
|
|
+ # print(item)
|
|
|
+ process_info(item)
|
|
|
# time.sleep(0.5)
|
|
|
- with ThreadPoolExecutor(max_workers=8) as Pool:
|
|
|
- Pool.map(process_info, init_list)
|
|
|
+ # with ThreadPoolExecutor(max_workers=8) as Pool:
|
|
|
+ # Pool.map(process_info, init_list)
|
|
|
|
|
|
|
|
|
class SpiderProcess(object):
|