spider_registry.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. # spider_registry.py完整注释版
  2. """爬虫注册表模块:维护topic到爬虫类的映射关系"""
  3. from core.utils.log.logger_manager import LoggerManager
  4. from spiders.base_spider import BaseSpider
  5. from spiders.benshanzhufu_recommend import BenshanzhufuRecommend
  6. from spiders.yuannifuqimanman_recommend import YuannifuqimanmanRecommend
  7. logger = LoggerManager.get_logger()
  8. aliyun_log = LoggerManager.get_aliyun_logger()
  9. # 爬虫类映射表:topic名称 -> 爬虫类
  10. # 格式说明:键为MQ主题名称,值为继承自BaseSpider的爬虫类
  11. SPIDER_CLASS_MAP = {
  12. "bszf_recommend_prod": BenshanzhufuRecommend,
  13. "ynfqmm_recommend_prod": YuannifuqimanmanRecommend,
  14. # 新增爬虫时在此添加映射
  15. }
  16. def get_spider_class(topic: str):
  17. """
  18. 根据MQ主题获取对应的爬虫类
  19. Args:
  20. topic: MQ消息的主题名称,需与SPIDER_CLASS_MAP中的键一致
  21. Returns:
  22. 对应的爬虫类,继承自BaseSpider
  23. Raises:
  24. ValueError: 当topic未注册时抛出
  25. TypeError: 当注册的类不是BaseSpider子类时抛出
  26. """
  27. spider_class = SPIDER_CLASS_MAP.get(topic)
  28. if not spider_class:
  29. available = ', '.join(SPIDER_CLASS_MAP.keys())
  30. logger.error(f"未注册的topic: {topic},可用topic: {available}")
  31. raise ValueError(f"未知topic: {topic}")
  32. if not isinstance(spider_class, type) or not issubclass(spider_class, BaseSpider):
  33. logger.error(f"非法爬虫类: {spider_class},必须继承自BaseSpider")
  34. raise TypeError(f"Invalid spider class: {spider_class}")
  35. return spider_class
  36. def list_registered_topics():
  37. """获取所有已注册的topic列表"""
  38. return list(SPIDER_CLASS_MAP.keys())