recommendspider.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. # spiders/recommendspider.py
  2. from typing import List, Dict, Optional
  3. from spiders.basespider import BaseSpider
  4. from core.utils.extractors import safe_extract
  5. class RecommendSpider(BaseSpider):
  6. """推荐模式爬虫 - 重新封装版本"""
  7. def __init__(self, rule_dict: Dict, user_list: List, env: str = "prod",
  8. request_client=None, db_service=None, mq_producer=None):
  9. super().__init__(rule_dict, user_list, env, request_client, db_service, mq_producer)
  10. self.last_response = None
  11. async def execute(self):
  12. """执行核心逻辑 - 使用 make_request 方法"""
  13. if not await self.is_video_count_sufficient():
  14. self.logger.info("视频数量已达到上限,跳过执行")
  15. return
  16. iteration = 0
  17. while iteration < self.loop_times and await self.is_video_count_sufficient():
  18. self.logger.info(f"执行第 {iteration + 1} 轮")
  19. # 准备请求体
  20. request_body = self.request_preparer.prepare(
  21. self.request_body_template,
  22. self.last_response or {}
  23. )
  24. # 发送请求 - 使用 make_request 方法
  25. response = await self.make_request(request_body)
  26. if not response:
  27. self.logger.info("未获取到响应数据")
  28. iteration += 1
  29. await self.wait_between_iterations()
  30. continue
  31. self.last_response = response
  32. # 提取数据
  33. data_list = safe_extract(response, self.data_path)
  34. if not data_list:
  35. self.logger.info("未获取到数据")
  36. iteration += 1
  37. await self.wait_between_iterations()
  38. continue
  39. # 处理数据
  40. await self.process_data(data_list)
  41. iteration += 1
  42. await self.wait_between_iterations()