# spider_registry.py完整注释版 """爬虫注册表模块:维护topic到爬虫类的映射关系""" from spiders.benshanzhufu_recommend import BenshanzhufuRecommend from spiders.base_spider import BaseSpider from core.utils.log.logger_manager import LoggerManager logger = LoggerManager.get_logger() aliyun_log = LoggerManager.get_aliyun_logger() # 爬虫类映射表:topic名称 -> 爬虫类 # 格式说明:键为MQ主题名称,值为继承自BaseSpider的爬虫类 SPIDER_CLASS_MAP = { "bszf_recommend_prod": BenshanzhufuRecommend, # 新增爬虫时在此添加映射 } def get_spider_class(topic: str): """ 根据MQ主题获取对应的爬虫类 Args: topic: MQ消息的主题名称,需与SPIDER_CLASS_MAP中的键一致 Returns: 对应的爬虫类,继承自BaseSpider Raises: ValueError: 当topic未注册时抛出 TypeError: 当注册的类不是BaseSpider子类时抛出 """ spider_class = SPIDER_CLASS_MAP.get(topic) if not spider_class: available = ', '.join(SPIDER_CLASS_MAP.keys()) logger.error(f"未注册的topic: {topic},可用topic: {available}") raise ValueError(f"未知topic: {topic}") if not isinstance(spider_class, type) or not issubclass(spider_class, BaseSpider): logger.error(f"非法爬虫类: {spider_class},必须继承自BaseSpider") raise TypeError(f"Invalid spider class: {spider_class}") return spider_class def list_registered_topics(): """获取所有已注册的topic列表""" return list(SPIDER_CLASS_MAP.keys())