1234567891011121314151617181920212223242526272829303132333435363738394041424344454647 |
- # spider_registry.py完整注释版
- """爬虫注册表模块:维护topic到爬虫类的映射关系"""
- from spiders.benshanzhufu_recommend import BenshanzhufuRecommend
- from spiders.base_spider import BaseSpider
- from core.utils.log.logger_manager import LoggerManager
- logger = LoggerManager.get_logger()
- aliyun_log = LoggerManager.get_aliyun_logger()
- # 爬虫类映射表:topic名称 -> 爬虫类
- # 格式说明:键为MQ主题名称,值为继承自BaseSpider的爬虫类
- SPIDER_CLASS_MAP = {
- "bszf_recommend_prod": BenshanzhufuRecommend,
- # 新增爬虫时在此添加映射
- }
- def get_spider_class(topic: str):
- """
- 根据MQ主题获取对应的爬虫类
- Args:
- topic: MQ消息的主题名称,需与SPIDER_CLASS_MAP中的键一致
- Returns:
- 对应的爬虫类,继承自BaseSpider
- Raises:
- ValueError: 当topic未注册时抛出
- TypeError: 当注册的类不是BaseSpider子类时抛出
- """
- spider_class = SPIDER_CLASS_MAP.get(topic)
- if not spider_class:
- available = ', '.join(SPIDER_CLASS_MAP.keys())
- logger.error(f"未注册的topic: {topic},可用topic: {available}")
- raise ValueError(f"未知topic: {topic}")
- if not isinstance(spider_class, type) or not issubclass(spider_class, BaseSpider):
- logger.error(f"非法爬虫类: {spider_class},必须继承自BaseSpider")
- raise TypeError(f"Invalid spider class: {spider_class}")
- return spider_class
- def list_registered_topics():
- """获取所有已注册的topic列表"""
- return list(SPIDER_CLASS_MAP.keys())
|