spider_registry.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. # spider_registry.py完整注释版
  2. """爬虫注册表模块:维护topic到爬虫类的映射关系"""
  3. from core.utils.log.logger_manager import LoggerManager
  4. from spiders.basespider import BaseSpider
  5. from spiders.benshanzhufu_recommend import BenshanzhufuRecommend
  6. from spiders.xiaoniangao_author import XiaoniangaoAuthor
  7. from spiders.yuannifuqimanman_recommend import YuannifuqimanmanRecommend
  8. logger = LoggerManager.get_logger()
  9. aliyun_log = LoggerManager.get_aliyun_logger()
  10. # 爬虫类映射表:topic名称 -> 爬虫类
  11. # 格式说明:键为MQ主题名称,值为继承自BaseSpider的爬虫类
  12. SPIDER_CLASS_MAP = {
  13. "bszf_recommend_prod": BenshanzhufuRecommend,
  14. "ynfqmm_recommend_prod": YuannifuqimanmanRecommend,
  15. "xng_author_prod": XiaoniangaoAuthor,
  16. # 新增爬虫时在此添加映射
  17. }
  18. def get_spider_class(topic: str):
  19. """
  20. 根据MQ主题获取对应的爬虫类
  21. Args:
  22. topic: MQ消息的主题名称,需与SPIDER_CLASS_MAP中的键一致
  23. Returns:
  24. 对应的爬虫类,继承自BaseSpider
  25. Raises:
  26. ValueError: 当topic未注册时抛出
  27. TypeError: 当注册的类不是BaseSpider子类时抛出
  28. """
  29. spider_class = SPIDER_CLASS_MAP.get(topic)
  30. if not spider_class:
  31. available = ', '.join(SPIDER_CLASS_MAP.keys())
  32. logger.error(f"未注册的topic: {topic},可用topic: {available}")
  33. raise ValueError(f"未知topic: {topic}")
  34. if not isinstance(spider_class, type) or not issubclass(spider_class, BaseSpider):
  35. logger.error(f"非法爬虫类: {spider_class},必须继承自BaseSpider")
  36. raise TypeError(f"Invalid spider class: {spider_class}")
  37. return spider_class
  38. def list_registered_topics():
  39. """获取所有已注册的topic列表"""
  40. return list(SPIDER_CLASS_MAP.keys())