|
@@ -14,6 +14,7 @@ from core.video_processor import VideoProcessor
|
|
|
from services.async_mysql_service import AsyncMysqlService
|
|
|
from services.async_mq_producer import AsyncMQProducer
|
|
|
from core.base.async_request_client import AsyncRequestClient
|
|
|
+from services.async_redis_service import AsyncRedisService
|
|
|
|
|
|
|
|
|
class BaseSpider(ABC):
|
|
@@ -22,7 +23,8 @@ class BaseSpider(ABC):
|
|
|
def __init__(self, rule_dict: Dict, user_list: List, env: str = "prod",
|
|
|
request_client: AsyncRequestClient = None,
|
|
|
db_service: AsyncMysqlService = None,
|
|
|
- mq_producer: AsyncMQProducer = None):
|
|
|
+ mq_producer: AsyncMQProducer = None,
|
|
|
+ redis_service: AsyncRedisService = None):
|
|
|
# 基础属性
|
|
|
self.rule_dict = rule_dict
|
|
|
self.user_list = user_list
|
|
@@ -32,6 +34,7 @@ class BaseSpider(ABC):
|
|
|
self.request_client = request_client
|
|
|
self.db_service = db_service
|
|
|
self.mq_producer = mq_producer
|
|
|
+ self.redis_service = redis_service
|
|
|
|
|
|
# 通过类名获取配置
|
|
|
class_name = self.__class__.__name__.lower()
|
|
@@ -96,6 +99,9 @@ class BaseSpider(ABC):
|
|
|
platform=self.config.platform,
|
|
|
mode=self.config.mode
|
|
|
)
|
|
|
+ if not self.redis_service:
|
|
|
+ # RedisManager.init(redis_url=settings.redis_url)
|
|
|
+ self.redis_service = AsyncRedisService()
|
|
|
|
|
|
def _setup_from_config(self):
|
|
|
"""从配置中设置属性"""
|
|
@@ -137,6 +143,19 @@ class BaseSpider(ABC):
|
|
|
|
|
|
async def process_data(self, data: List[Dict]):
|
|
|
"""处理数据"""
|
|
|
+ # 处理data为None的情况
|
|
|
+ if data is None:
|
|
|
+ data_length = 0
|
|
|
+ else:
|
|
|
+ data_length = len(data)
|
|
|
+
|
|
|
+ self.aliyun_log.logging(
|
|
|
+ code="1001",
|
|
|
+ message=f"获取到的列表长度:{data_length}",
|
|
|
+ data=data_length,
|
|
|
+ )
|
|
|
+ if not data:
|
|
|
+ return 0
|
|
|
success_count = 0
|
|
|
for item in data:
|
|
|
self.aliyun_log.logging(
|
|
@@ -153,7 +172,7 @@ class BaseSpider(ABC):
|
|
|
except Exception as e:
|
|
|
self.logger.error(f"处理单条数据失败: {e}")
|
|
|
self.stats['fail'] += 1
|
|
|
- self.logger.info(f"批次处理完成: 成功 {success_count}/{len(data)}")
|
|
|
+ self.logger.info(f"批次处理完成: 成功 {success_count}/{data_length}")
|
|
|
return success_count
|
|
|
|
|
|
|
|
@@ -211,8 +230,8 @@ class BaseSpider(ABC):
|
|
|
return True
|
|
|
|
|
|
current_count = await self.db_service.get_today_videos()
|
|
|
+ self.logger.info(f"已抓取数量: {current_count}/{max_count}")
|
|
|
if current_count >= max_count:
|
|
|
- self.logger.info(f"视频数量达到当日最大值: {current_count}/{max_count}")
|
|
|
self.aliyun_log.logging(
|
|
|
code="1011",
|
|
|
message="视频数量达到最大值",
|