advanced_factory.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. # spiders/advanced_factory.py
  2. from typing import Dict, List, Type, Optional
  3. from config.spider_config import SpiderConfig
  4. from core.di.container import container
  5. from spiders.basespider import BaseSpider
  6. from spiders.spider_registry import SPIDER_CLASS_MAP, get_spider_class
  7. class AdvancedSpiderFactory:
  8. """高级爬虫工厂,支持自动配置映射和依赖注入"""
  9. @classmethod
  10. def create_spider(cls, spider_key: str, rule_dict: Dict, user_list: List,
  11. env: str = "prod", **kwargs) -> BaseSpider:
  12. """
  13. 智能创建爬虫实例
  14. """
  15. # 1. 获取配置
  16. config = SpiderConfig.get_platform_config(spider_key)
  17. # 2. 确定爬虫类
  18. spider_class = cls._resolve_spider_class(spider_key, config)
  19. # 3. 准备依赖服务
  20. dependencies = cls._prepare_dependencies(config, **kwargs)
  21. # 4. 创建实例
  22. return spider_class(
  23. rule_dict=rule_dict,
  24. user_list=user_list,
  25. env=env,
  26. **dependencies
  27. )
  28. @classmethod
  29. def create_spider_by_topic(cls, topic: str, task_id: str, env: str = "prod") -> Optional[BaseSpider]:
  30. """
  31. 根据MQ topic和任务ID创建爬虫(完整流程)
  32. """
  33. # 1. 映射topic到爬虫类型
  34. spider_key = cls._map_topic_to_spider_key(topic)
  35. if not spider_key:
  36. return None
  37. # 2. 从数据库获取任务配置
  38. rule_dict, user_list = cls._get_task_config(task_id)
  39. if not rule_dict:
  40. return None
  41. # 3. 创建爬虫实例
  42. return cls.create_spider(spider_key, rule_dict, user_list, env)
  43. @classmethod
  44. def _resolve_spider_class(cls, spider_key: str, config) -> Type[BaseSpider]:
  45. """解析爬虫类(支持自动发现)"""
  46. # 优先使用注册表
  47. if spider_key in SPIDER_CLASS_MAP:
  48. return SPIDER_CLASS_MAP[spider_key]
  49. # 根据命名约定自动推断
  50. if spider_key.endswith('recommend'):
  51. from spiders.recommendspider import RecommendSpider
  52. return RecommendSpider
  53. elif spider_key.endswith('author'):
  54. from spiders.authorspider import AuthorSpider
  55. return AuthorSpider
  56. # 默认使用基类
  57. return BaseSpider
  58. @classmethod
  59. def _prepare_dependencies(cls, config, **kwargs):
  60. """准备依赖服务"""
  61. dependencies = {}
  62. # 数据库服务
  63. if 'db_service' not in kwargs:
  64. dependencies['db_service'] = container.db_service(
  65. platform=config.platform,
  66. mode=config.mode
  67. )
  68. # MQ 生产者
  69. if 'mq_producer' not in kwargs:
  70. dependencies['mq_producer'] = container.mq_producer()
  71. return {**dependencies, **kwargs}
  72. @classmethod
  73. def _map_topic_to_spider_key(cls, topic: str) -> Optional[str]:
  74. """topic到爬虫配置的映射"""
  75. mapping = {
  76. "bszf_recommend_prod": "benshanzhufurecommend",
  77. "ynfqmm_recommend_prod": "yuannifuqimanmanrecommend",
  78. "xng_author_prod": "xiaoniangaoauthor"
  79. }
  80. return mapping.get(topic)
  81. @classmethod
  82. def _get_task_config(cls, task_id: str):
  83. """从数据库获取任务配置"""
  84. # 这里需要实现数据库查询逻辑
  85. # 返回 (rule_dict, user_list)
  86. return {}, []
  87. # 全局工厂实例
  88. spider_factory = AdvancedSpiderFactory()