scheduler_main.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445
  1. # scheduler_main.py - 爬虫调度主程序
  2. import asyncio
  3. import json
  4. import time
  5. import traceback
  6. import sys
  7. import os
  8. from crawler_controller import CrawlerController
  9. from application.common.log import Local
  10. from application.common import AliyunLogger
  11. from crawler_worker.universal_crawler import AsyncCrawler
  12. async def main():
  13. """主函数"""
  14. # 设置日志
  15. logger = AliyunLogger(platform="system", mode="manager")
  16. try:
  17. # 从环境变量获取配置
  18. config_topic = os.getenv("CONFIG_TOPIC", "crawler_config")
  19. config_group = os.getenv("CONFIG_GROUP", "crawler_config_group")
  20. # 创建爬虫控制器
  21. controller = AsyncCrawler(
  22. platform: str,
  23. mode: str,
  24. )
  25. # 启动控制器
  26. await controller.run()
  27. # 保持主线程运行
  28. while True:
  29. await asyncio.sleep(60)
  30. except Exception as e:
  31. tb = traceback.format_exc()
  32. message = f"主程序发生错误: {e}\n{tb}"
  33. logger.logging(code="1006", message=message)
  34. sys.exit(1)
  35. if __name__ == "__main__":
  36. # 运行主事件循环
  37. asyncio.run(main())