|  | @@ -1,12 +1,21 @@
 | 
	
		
			
				|  |  | -from common import Material
 | 
	
		
			
				|  |  | +import os
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  from extract_data.douyin.douyin_author import douyinAuthor
 | 
	
		
			
				|  |  |  from extract_data.kuaishou.kuaishou_author import kuaishouAuthor
 | 
	
		
			
				|  |  |  from extract_data.zhannei.zhannei_author import ZhanNeiAuthor
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  import schedule
 | 
	
		
			
				|  |  | -import time
 | 
	
		
			
				|  |  |  import concurrent.futures
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | +import time
 | 
	
		
			
				|  |  | +import threading
 | 
	
		
			
				|  |  | +from common import Material
 | 
	
		
			
				|  |  | +# 控制读写速度的参数
 | 
	
		
			
				|  |  | +MAX_BPS = 120 * 1024 * 1024  # 120MB/s
 | 
	
		
			
				|  |  | +MAX_WORKERS = os.cpu_count() * 2  # 线程池最大工作线程数量
 | 
	
		
			
				|  |  | +READ_WRITE_CHUNK_SIZE = 1024 * 1024  # 每次读写的块大小 (1MB)
 | 
	
		
			
				|  |  | +SLEEP_INTERVAL = READ_WRITE_CHUNK_SIZE / MAX_BPS  # 控制每次读写的延迟时间
 | 
	
		
			
				|  |  | +# 全局锁,用于同步读写操作
 | 
	
		
			
				|  |  | +lock = threading.Lock()
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def gs_start(platform, user_data):
 | 
	
		
			
				|  |  |      print(f"执行{platform}数据抓取{user_data}")
 | 
	
	
		
			
				|  | @@ -21,13 +30,25 @@ def gs_start(platform, user_data):
 | 
	
		
			
				|  |  |  def gs_task(platform):
 | 
	
		
			
				|  |  |      data = Material.get_all_gs_user(platform)
 | 
	
		
			
				|  |  |      valid_data = [user_data for user_data in data if user_data['sheet'] is not None]
 | 
	
		
			
				|  |  | -    with concurrent.futures.ThreadPoolExecutor() as executor:
 | 
	
		
			
				|  |  | -        futures = {executor.submit(gs_start, platform, user_data): user_data for user_data in valid_data}
 | 
	
		
			
				|  |  | +    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
 | 
	
		
			
				|  |  | +        futures = {executor.submit(gs_operation, platform, user_data): user_data for user_data in valid_data}
 | 
	
		
			
				|  |  |          for future in concurrent.futures.as_completed(futures):
 | 
	
		
			
				|  |  |              result = future.result()
 | 
	
		
			
				|  |  |              print("处理结果:", result)
 | 
	
		
			
				|  |  |      print(f"{platform.capitalize()}数据抓取定时任务执行完成.")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def gs_operation(platform, data):
 | 
	
		
			
				|  |  | +    with lock:
 | 
	
		
			
				|  |  | +        start_time = time.time()
 | 
	
		
			
				|  |  | +        time.sleep(SLEEP_INTERVAL)
 | 
	
		
			
				|  |  | +        end_time = time.time()
 | 
	
		
			
				|  |  | +        elapsed_time = end_time - start_time
 | 
	
		
			
				|  |  | +        if elapsed_time < SLEEP_INTERVAL:
 | 
	
		
			
				|  |  | +            time.sleep(SLEEP_INTERVAL - elapsed_time)
 | 
	
		
			
				|  |  | +        gs_start(platform, data)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  def cg_start(platform, user_data):
 | 
	
		
			
				|  |  |      print(f"执行{platform}数据抓取{user_data}")
 | 
	
		
			
				|  |  |      if platform == "douyin":
 | 
	
	
		
			
				|  | @@ -37,13 +58,23 @@ def cg_start(platform, user_data):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def cg_task(platform):
 | 
	
		
			
				|  |  |      data = Material.get_all_user(platform)
 | 
	
		
			
				|  |  | -    with concurrent.futures.ThreadPoolExecutor() as executor:
 | 
	
		
			
				|  |  | -        futures = {executor.submit(cg_start, platform, user_data): user_data for user_data in data}
 | 
	
		
			
				|  |  | +    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
 | 
	
		
			
				|  |  | +        futures = {executor.submit(cg_operation, platform, user_data): user_data for user_data in data}
 | 
	
		
			
				|  |  |          for future in concurrent.futures.as_completed(futures):
 | 
	
		
			
				|  |  |              result = future.result()
 | 
	
		
			
				|  |  |              print("处理结果:", result)
 | 
	
		
			
				|  |  |      print(f"{platform.capitalize()}数据抓取定时任务执行完成.")
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +def cg_operation(platform, data):
 | 
	
		
			
				|  |  | +    with lock:
 | 
	
		
			
				|  |  | +        start_time = time.time()
 | 
	
		
			
				|  |  | +        time.sleep(SLEEP_INTERVAL)
 | 
	
		
			
				|  |  | +        end_time = time.time()
 | 
	
		
			
				|  |  | +        elapsed_time = end_time - start_time
 | 
	
		
			
				|  |  | +        if elapsed_time < SLEEP_INTERVAL:
 | 
	
		
			
				|  |  | +            time.sleep(SLEEP_INTERVAL - elapsed_time)
 | 
	
		
			
				|  |  | +        cg_start(platform, data)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  schedule.every().day.at("19:20").do(gs_task, "kuaishou")
 |