|
@@ -152,7 +152,7 @@ class DataProcessor(object):
|
|
|
:param item_:
|
|
|
"""
|
|
|
video_id, hour_dt = item_
|
|
|
- print(type(video_id))
|
|
|
+ # print(type(video_id))
|
|
|
label_info = self.label_data
|
|
|
title = read_title(client=self.client, video_id=video_id)
|
|
|
label, dt_daily = generate_label(str(video_id), hour_dt, label_info)
|
|
@@ -164,12 +164,12 @@ class DataProcessor(object):
|
|
|
select_sql = "SELECT video_id, hour_dt_str FROM lightgbm_data where label = 0 and hour_dt_str < '20240327';"
|
|
|
init_data_tuple = self.client_spider.select(select_sql)
|
|
|
init_list = list(init_data_tuple)
|
|
|
- for item in tqdm(init_list[:10]):
|
|
|
- # print(item)
|
|
|
- process_info(item)
|
|
|
- time.sleep(0.5)
|
|
|
- # with ThreadPoolExecutor(max_workers=10) as Pool:
|
|
|
- # Pool.map(process_info, init_list)
|
|
|
+ # for item in tqdm(init_list[):
|
|
|
+ # # print(item)
|
|
|
+ # process_info(item)
|
|
|
+ # time.sleep(0.5)
|
|
|
+ with ThreadPoolExecutor(max_workers=8) as Pool:
|
|
|
+ Pool.map(process_info, init_list)
|
|
|
|
|
|
|
|
|
class SpiderProcess(object):
|