|  | @@ -6,7 +6,16 @@ from extract_data.zhannei.zhannei_author import ZhanNeiAuthor
 | 
	
		
			
				|  |  |  import schedule
 | 
	
		
			
				|  |  |  import time
 | 
	
		
			
				|  |  |  import concurrent.futures
 | 
	
		
			
				|  |  | +import threading
 | 
	
		
			
				|  |  | +import os
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +# 控制读写速度的参数
 | 
	
		
			
				|  |  | +MAX_BPS = 120 * 1024 * 1024  # 120MB/s
 | 
	
		
			
				|  |  | +MAX_WORKERS = os.cpu_count() * 2  # 线程池最大工作线程数量
 | 
	
		
			
				|  |  | +READ_WRITE_CHUNK_SIZE = 1024 * 1024  # 每次读写的块大小 (1MB)
 | 
	
		
			
				|  |  | +SLEEP_INTERVAL = READ_WRITE_CHUNK_SIZE / MAX_BPS  # 控制每次读写的延迟时间
 | 
	
		
			
				|  |  | +# 全局锁,用于同步读写操作
 | 
	
		
			
				|  |  | +lock = threading.Lock()
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def douyin_start(user_data):
 | 
	
		
			
				|  |  |      print(f"执行抖音数据抓取{user_data}")
 | 
	
	
		
			
				|  | @@ -29,7 +38,14 @@ def zhannei_task():
 | 
	
		
			
				|  |  |      data = Material.get_all_gs_user("zhannei")
 | 
	
		
			
				|  |  |      # 创建一个线程池
 | 
	
		
			
				|  |  |      valid_data = [user_data for user_data in data if user_data['sheet'] is None]
 | 
	
		
			
				|  |  | -    with concurrent.futures.ThreadPoolExecutor() as executor:
 | 
	
		
			
				|  |  | +    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
 | 
	
		
			
				|  |  | +        with lock:
 | 
	
		
			
				|  |  | +            start_time = time.time()
 | 
	
		
			
				|  |  | +            time.sleep(SLEEP_INTERVAL)
 | 
	
		
			
				|  |  | +            end_time = time.time()
 | 
	
		
			
				|  |  | +            elapsed_time = end_time - start_time
 | 
	
		
			
				|  |  | +            if elapsed_time < SLEEP_INTERVAL:
 | 
	
		
			
				|  |  | +                time.sleep(SLEEP_INTERVAL - elapsed_time)
 | 
	
		
			
				|  |  |          futures = [executor.submit(zhannei_start, user_data) for user_data in valid_data]
 | 
	
		
			
				|  |  |          # 等待所有任务执行完成
 | 
	
		
			
				|  |  |          for future in concurrent.futures.as_completed(futures):
 | 
	
	
		
			
				|  | @@ -44,7 +60,14 @@ def douyin_task():
 | 
	
		
			
				|  |  |      data = Material.get_all_gs_user("douyin")
 | 
	
		
			
				|  |  |      # 创建一个线程池
 | 
	
		
			
				|  |  |      valid_data = [user_data for user_data in data if user_data['sheet'] is not None]
 | 
	
		
			
				|  |  | -    with concurrent.futures.ThreadPoolExecutor() as executor:
 | 
	
		
			
				|  |  | +    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
 | 
	
		
			
				|  |  | +        with lock:
 | 
	
		
			
				|  |  | +            start_time = time.time()
 | 
	
		
			
				|  |  | +            time.sleep(SLEEP_INTERVAL)
 | 
	
		
			
				|  |  | +            end_time = time.time()
 | 
	
		
			
				|  |  | +            elapsed_time = end_time - start_time
 | 
	
		
			
				|  |  | +            if elapsed_time < SLEEP_INTERVAL:
 | 
	
		
			
				|  |  | +                time.sleep(SLEEP_INTERVAL - elapsed_time)
 | 
	
		
			
				|  |  |          futures = {executor.submit(douyin_start, user_data): user_data for user_data in valid_data}
 | 
	
		
			
				|  |  |          # 等待所有任务执行完成
 | 
	
		
			
				|  |  |          for future in concurrent.futures.as_completed(futures):
 | 
	
	
		
			
				|  | @@ -58,7 +81,14 @@ def kuanshou_task():
 | 
	
		
			
				|  |  |      data = Material.get_all_gs_user("kuaishou")
 | 
	
		
			
				|  |  |      # 创建一个线程池
 | 
	
		
			
				|  |  |      valid_data = [user_data for user_data in data if user_data['sheet'] is not None]
 | 
	
		
			
				|  |  | -    with concurrent.futures.ThreadPoolExecutor() as executor:
 | 
	
		
			
				|  |  | +    with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
 | 
	
		
			
				|  |  | +        with lock:
 | 
	
		
			
				|  |  | +            start_time = time.time()
 | 
	
		
			
				|  |  | +            time.sleep(SLEEP_INTERVAL)
 | 
	
		
			
				|  |  | +            end_time = time.time()
 | 
	
		
			
				|  |  | +            elapsed_time = end_time - start_time
 | 
	
		
			
				|  |  | +            if elapsed_time < SLEEP_INTERVAL:
 | 
	
		
			
				|  |  | +                time.sleep(SLEEP_INTERVAL - elapsed_time)
 | 
	
		
			
				|  |  |          futures = {executor.submit(kuaishou_start, user_data): user_data for user_data in valid_data}
 | 
	
		
			
				|  |  |          # 等待所有任务执行完成
 | 
	
		
			
				|  |  |          for future in concurrent.futures.as_completed(futures):
 |