spider_registry.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. # spider_registry.py完整注释版
  2. """爬虫注册表模块:维护topic到爬虫类的映射关系"""
  3. from core.utils.log.logger_manager import LoggerManager
  4. from spiders.base_spider import BaseSpider
  5. from spiders.benshanzhufu_recommend import BenshanzhufuRecommend
  6. logger = LoggerManager.get_logger()
  7. aliyun_log = LoggerManager.get_aliyun_logger()
  8. # 爬虫类映射表:topic名称 -> 爬虫类
  9. # 格式说明:键为MQ主题名称,值为继承自BaseSpider的爬虫类
  10. SPIDER_CLASS_MAP = {
  11. "bszf_recommend_prod": BenshanzhufuRecommend,
  12. # 新增爬虫时在此添加映射
  13. }
  14. def get_spider_class(topic: str):
  15. """
  16. 根据MQ主题获取对应的爬虫类
  17. Args:
  18. topic: MQ消息的主题名称,需与SPIDER_CLASS_MAP中的键一致
  19. Returns:
  20. 对应的爬虫类,继承自BaseSpider
  21. Raises:
  22. ValueError: 当topic未注册时抛出
  23. TypeError: 当注册的类不是BaseSpider子类时抛出
  24. """
  25. spider_class = SPIDER_CLASS_MAP.get(topic)
  26. if not spider_class:
  27. available = ', '.join(SPIDER_CLASS_MAP.keys())
  28. logger.error(f"未注册的topic: {topic},可用topic: {available}")
  29. raise ValueError(f"未知topic: {topic}")
  30. if not isinstance(spider_class, type) or not issubclass(spider_class, BaseSpider):
  31. logger.error(f"非法爬虫类: {spider_class},必须继承自BaseSpider")
  32. raise TypeError(f"Invalid spider class: {spider_class}")
  33. return spider_class
  34. def list_registered_topics():
  35. """获取所有已注册的topic列表"""
  36. return list(SPIDER_CLASS_MAP.keys())