from spiders.basespider import BaseSpider from typing import Optional, List, Dict import aiohttp from core.utils.extractors import safe_extract class RecommendSpider(BaseSpider): """推荐模式爬虫:从推荐接口分页爬取""" async def core_loop(self): """核心循环:分页请求推荐接口""" async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=self.timeout)) as session: for loop_index in range(self.loop_times): # 检查数量限制 self.logger.info(f"检测{self.platform}当日入库视频量") if not await self.is_video_count_sufficient(): return # 获取推荐列表数据 self.logger.info(f"开始获取{self.platform}推荐列表数据") raw_data = await self.crawl_data(session) if not raw_data: self.logger.info("视频列表为空,开始下次请求") await self.wait() continue # 处理数据 await self.process_data(raw_data) # 等待下一轮 await self.wait() async def crawl_data(self, session) -> Optional[List[Dict]]: """请求推荐接口(适配推荐模式)""" request_body = self.request_preparer.prepare(self.request_body_template, self.last_response_data) response = await self.request_client.request( session=session, method=self.method, url=self.url, headers=self.headers, json=request_body ) self.last_response_data = response # 解析推荐列表 if not response: self.logger.warning("接口响应为空") return None data_list = safe_extract(response, self.data_path) if not data_list: self.logger.info(f"接口返回视频列表为空: {response}") self.aliyun_log.logging(code="9021", message="接口返回视频列表为空", data=response) return None return data_list