data_assign_main.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. # -*- coding: utf-8 -*-
  2. from common import Material
  3. from extract_data.douyin.douyin_author import douyinAuthor
  4. from extract_data.kuaishou.kuaishou_author import kuaishouAuthor
  5. from extract_data.zhannei.zhannei_author import ZhanNeiAuthor
  6. import schedule
  7. import time
  8. import concurrent.futures
  9. import threading
  10. import os
  11. # 控制读写速度的参数
  12. MAX_BPS = 10 * 1024 * 1024 # 120MB/s
  13. MAX_WORKERS = os.cpu_count() * 2 # 线程池最大工作线程数量
  14. READ_WRITE_CHUNK_SIZE = 1024 * 1024 # 每次读写的块大小 (1MB)
  15. SLEEP_INTERVAL = READ_WRITE_CHUNK_SIZE / MAX_BPS # 控制每次读写的延迟时间
  16. # 全局锁,用于同步读写操作
  17. lock = threading.Lock()
  18. def douyin_start(user_data):
  19. print(f"执行抖音数据抓取{user_data}")
  20. douyinAuthor.get_videoList(user_data)
  21. def kuaishou_start(user_data):
  22. print(f"执行快手数据抓取{user_data}")
  23. kuaishouAuthor.get_kuaishou_videoList(user_data)
  24. def zhannei_start(user_data):
  25. print(f"执行站内数据抓取{user_data}")
  26. ZhanNeiAuthor.get_zhannei_videoList(user_data)
  27. # data = Material.get_all_gs_user("douyin")
  28. # douyin_start(data[0])
  29. # 定义定时任务
  30. def zhannei_task():
  31. data = Material.get_all_gs_user("zhannei")
  32. # 创建一个线程池
  33. valid_data = [user_data for user_data in data if user_data['sheet'] is None]
  34. with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
  35. with lock:
  36. start_time = time.time()
  37. time.sleep(SLEEP_INTERVAL)
  38. end_time = time.time()
  39. elapsed_time = end_time - start_time
  40. if elapsed_time < SLEEP_INTERVAL:
  41. time.sleep(SLEEP_INTERVAL - elapsed_time)
  42. futures = [executor.submit(zhannei_start, user_data) for user_data in valid_data]
  43. # 等待所有任务执行完成
  44. for future in concurrent.futures.as_completed(futures):
  45. # 获取每个任务的执行结果
  46. result = future.result()
  47. print("处理结果:", result)
  48. print("站内数据抓取定时任务执行完成")
  49. # 定义定时任务
  50. def douyin_task():
  51. data = Material.get_all_gs_user("douyin")
  52. # 创建一个线程池
  53. valid_data = [user_data for user_data in data if user_data['sheet'] is not None]
  54. with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
  55. with lock:
  56. start_time = time.time()
  57. time.sleep(SLEEP_INTERVAL)
  58. end_time = time.time()
  59. elapsed_time = end_time - start_time
  60. if elapsed_time < SLEEP_INTERVAL:
  61. time.sleep(SLEEP_INTERVAL - elapsed_time)
  62. futures = {executor.submit(douyin_start, user_data): user_data for user_data in valid_data}
  63. # 等待所有任务执行完成
  64. for future in concurrent.futures.as_completed(futures):
  65. # 获取每个任务的执行结果
  66. result = future.result()
  67. print("处理结果:", result)
  68. print("抖音数据抓取定时任务执行完成")
  69. # 定义定时任务
  70. def kuanshou_task():
  71. data = Material.get_all_gs_user("kuaishou")
  72. # 创建一个线程池
  73. valid_data = [user_data for user_data in data if user_data['sheet'] is not None]
  74. with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
  75. with lock:
  76. start_time = time.time()
  77. time.sleep(SLEEP_INTERVAL)
  78. end_time = time.time()
  79. elapsed_time = end_time - start_time
  80. if elapsed_time < SLEEP_INTERVAL:
  81. time.sleep(SLEEP_INTERVAL - elapsed_time)
  82. futures = {executor.submit(kuaishou_start, user_data): user_data for user_data in valid_data}
  83. # 等待所有任务执行完成
  84. for future in concurrent.futures.as_completed(futures):
  85. # 获取每个任务的执行结果
  86. result = future.result()
  87. print("处理结果:", result)
  88. print("快手数据抓取定时任务执行完成.")
  89. schedule.every().day.at("19:20").do(kuanshou_task)
  90. schedule.every().day.at("19:30").do(douyin_task)
  91. schedule.every().day.at("18:00").do(zhannei_task)
  92. kuanshou_task()
  93. douyin_task()
  94. zhannei_task()
  95. # 持续运行,直到手动终止
  96. while True:
  97. schedule.run_pending()
  98. time.sleep(1)