|
@@ -12,22 +12,21 @@ from spiders.spider_registry import get_spider_class
|
|
logger = LoggerManager.get_logger()
|
|
logger = LoggerManager.get_logger()
|
|
aliyun_logger = LoggerManager.get_aliyun_logger()
|
|
aliyun_logger = LoggerManager.get_aliyun_logger()
|
|
|
|
|
|
-async def async_handle_topic(topic: str,stop_event: asyncio.Event):
|
|
|
|
|
|
+
|
|
|
|
+async def async_handle_topic(topic: str, stop_event: asyncio.Event):
|
|
"""
|
|
"""
|
|
单个 topic 的消费逻辑,运行在协程中:
|
|
单个 topic 的消费逻辑,运行在协程中:
|
|
- - 从 MQ 中消费消息;
|
|
|
|
|
|
+ - 从 MQ 中消费消息(单条处理,处理完再拉取下一条);
|
|
- 根据消息内容执行对应爬虫;
|
|
- 根据消息内容执行对应爬虫;
|
|
- 使用异步数据库服务查询配置;
|
|
- 使用异步数据库服务查询配置;
|
|
- 记录日志、确认消息。
|
|
- 记录日志、确认消息。
|
|
"""
|
|
"""
|
|
-
|
|
|
|
-
|
|
|
|
- # 每个 topic 创建独立的 consumer 实例
|
|
|
|
|
|
+ # 每个 topic 创建独立的 consumer 实例(使用优化后的 AsyncRocketMQConsumer)
|
|
from services.async_mq_consumer import AsyncRocketMQConsumer
|
|
from services.async_mq_consumer import AsyncRocketMQConsumer
|
|
-
|
|
|
|
consumer = AsyncRocketMQConsumer(topic_name=topic, group_id=topic)
|
|
consumer = AsyncRocketMQConsumer(topic_name=topic, group_id=topic)
|
|
|
|
|
|
async def handle_single_message(message):
|
|
async def handle_single_message(message):
|
|
|
|
+ """处理单条消息的业务逻辑(不含拉取和循环)"""
|
|
trace_id = generate_trace_id()
|
|
trace_id = generate_trace_id()
|
|
try:
|
|
try:
|
|
payload = json.loads(message.message_body)
|
|
payload = json.loads(message.message_body)
|
|
@@ -41,19 +40,22 @@ async def async_handle_topic(topic: str,stop_event: asyncio.Event):
|
|
trace_id=trace_id,
|
|
trace_id=trace_id,
|
|
account=topic
|
|
account=topic
|
|
)
|
|
)
|
|
|
|
+
|
|
|
|
+ # 从数据库查询配置
|
|
async with AsyncMysqlService() as mysql:
|
|
async with AsyncMysqlService() as mysql:
|
|
user_list = await mysql.get_user_list(task_id)
|
|
user_list = await mysql.get_user_list(task_id)
|
|
rule_dict = await mysql.get_rule_dict(task_id)
|
|
rule_dict = await mysql.get_rule_dict(task_id)
|
|
|
|
|
|
|
|
+ # 执行爬虫任务
|
|
CrawlerClass = get_spider_class(topic)
|
|
CrawlerClass = get_spider_class(topic)
|
|
crawler = CrawlerClass(
|
|
crawler = CrawlerClass(
|
|
rule_dict=rule_dict,
|
|
rule_dict=rule_dict,
|
|
user_list=user_list,
|
|
user_list=user_list,
|
|
trace_id=trace_id
|
|
trace_id=trace_id
|
|
)
|
|
)
|
|
- await crawler.run()
|
|
|
|
|
|
+ await crawler.run() # 爬虫成功执行后再确认消息
|
|
|
|
|
|
- # ack 由 run 成功后执行
|
|
|
|
|
|
+ # 确认消息(单条消息处理成功后才 Ack)
|
|
await consumer.ack_message(message.receipt_handle)
|
|
await consumer.ack_message(message.receipt_handle)
|
|
|
|
|
|
logger.info(f"{trace_id} - 任务 {task_id} 执行成功并已 Ack")
|
|
logger.info(f"{trace_id} - 任务 {task_id} 执行成功并已 Ack")
|
|
@@ -61,19 +63,15 @@ async def async_handle_topic(topic: str,stop_event: asyncio.Event):
|
|
code="1010",
|
|
code="1010",
|
|
message="任务执行成功",
|
|
message="任务执行成功",
|
|
trace_id=trace_id,
|
|
trace_id=trace_id,
|
|
- data={
|
|
|
|
- "task_id": task_id,
|
|
|
|
- "topic": topic
|
|
|
|
- },
|
|
|
|
|
|
+ data={"task_id": task_id, "topic": topic},
|
|
account=topic
|
|
account=topic
|
|
-
|
|
|
|
)
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
- logger.error(f"{trace_id} - 任务处理失败: {e} /n {traceback.format_exc()}")
|
|
|
|
|
|
+ logger.error(f"{trace_id} - 任务处理失败: {e} \n {traceback.format_exc()}")
|
|
aliyun_logger.logging(
|
|
aliyun_logger.logging(
|
|
code="9001",
|
|
code="9001",
|
|
- message=f"处理消息失败: {str(e)} /n {traceback.format_exc()}",
|
|
|
|
|
|
+ message=f"处理消息失败: {str(e)} \n {traceback.format_exc()}",
|
|
trace_id=trace_id,
|
|
trace_id=trace_id,
|
|
data={
|
|
data={
|
|
"error_type": type(e).__name__,
|
|
"error_type": type(e).__name__,
|
|
@@ -82,22 +80,36 @@ async def async_handle_topic(topic: str,stop_event: asyncio.Event):
|
|
},
|
|
},
|
|
account=topic
|
|
account=topic
|
|
)
|
|
)
|
|
- # 自动重启消费循环
|
|
|
|
- while not stop_event.is_set():
|
|
|
|
|
|
+ # 处理失败不 Ack,消息会被 MQ 重新投递(依赖 MQ 的重试机制)
|
|
|
|
+
|
|
|
|
+ # 独立的消费循环:拉取消息并调用处理函数
|
|
|
|
+ async def consume_loop():
|
|
|
|
+ logger.info(f"[{topic}] 启动消费循环,开始拉取消息...")
|
|
|
|
+ while not stop_event.is_set(): # 监听停止信号,支持优雅退出
|
|
try:
|
|
try:
|
|
- await consumer.run_forever(handle_single_message)
|
|
|
|
|
|
+ # 拉取单条消息(依赖优化后的 receive_message,无消息时返回 None 不报错)
|
|
|
|
+ message = await consumer.receive_message()
|
|
|
|
+ if message:
|
|
|
|
+ # 有消息则处理,处理完成后再进入下一次循环
|
|
|
|
+ await handle_single_message(message)
|
|
|
|
+ else:
|
|
|
|
+ # 无消息时短暂休眠,避免频繁空轮询
|
|
|
|
+ await asyncio.sleep(1)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
|
+ # 非消息处理的异常(如 MQ 连接失败),记录并重试
|
|
|
|
+ logger.error(f"[{topic}] 消费循环异常: {e}", exc_info=True)
|
|
aliyun_logger.logging(
|
|
aliyun_logger.logging(
|
|
code="9002",
|
|
code="9002",
|
|
- message=f"{topic} 消费循环异常即将重启: {str(e)}",
|
|
|
|
- data={
|
|
|
|
- "error_type": type(e).__name__,
|
|
|
|
- "stack_trace": traceback.format_exc(),
|
|
|
|
- },
|
|
|
|
|
|
+ message=f"{topic} 消费循环异常,即将重试: {str(e)}",
|
|
|
|
+ data={"error_type": type(e).__name__, "stack_trace": traceback.format_exc()},
|
|
account=topic
|
|
account=topic
|
|
)
|
|
)
|
|
- logger.warning(f"[{topic}] 消费循环异常: {e},5秒后重启")
|
|
|
|
- await asyncio.sleep(5)
|
|
|
|
|
|
+ await asyncio.sleep(5) # 异常后延迟重试,减轻服务压力
|
|
|
|
+
|
|
|
|
+ logger.info(f"[{topic}] 消费循环已停止(收到退出信号)")
|
|
|
|
+
|
|
|
|
+ # 启动消费循环(这是消费逻辑的入口)
|
|
|
|
+ await consume_loop()
|
|
|
|
|
|
|
|
|
|
async def run_all_topics(topics: List[str]):
|
|
async def run_all_topics(topics: List[str]):
|
|
@@ -105,6 +117,7 @@ async def run_all_topics(topics: List[str]):
|
|
loop = asyncio.get_running_loop()
|
|
loop = asyncio.get_running_loop()
|
|
|
|
|
|
def shutdown():
|
|
def shutdown():
|
|
|
|
+ """处理停止信号(如 Ctrl+C),触发优雅退出"""
|
|
logger.warning("[系统] 收到停止信号,准备优雅退出...")
|
|
logger.warning("[系统] 收到停止信号,准备优雅退出...")
|
|
aliyun_logger.logging(
|
|
aliyun_logger.logging(
|
|
code="1600",
|
|
code="1600",
|
|
@@ -112,33 +125,31 @@ async def run_all_topics(topics: List[str]):
|
|
)
|
|
)
|
|
stop_event.set()
|
|
stop_event.set()
|
|
|
|
|
|
|
|
+ # 注册信号处理(支持 Ctrl+C 和 kill 命令)
|
|
for sig in [signal.SIGINT, signal.SIGTERM]:
|
|
for sig in [signal.SIGINT, signal.SIGTERM]:
|
|
loop.add_signal_handler(sig, shutdown)
|
|
loop.add_signal_handler(sig, shutdown)
|
|
|
|
|
|
|
|
+ # 为每个 topic 创建独立协程任务
|
|
tasks = [asyncio.create_task(async_handle_topic(topic, stop_event)) for topic in topics]
|
|
tasks = [asyncio.create_task(async_handle_topic(topic, stop_event)) for topic in topics]
|
|
|
|
|
|
- await stop_event.wait() # 等待停止信号
|
|
|
|
|
|
+ await stop_event.wait() # 等待退出信号
|
|
|
|
|
|
- logger.warning(f"[系统] 正在取消所有消费任务...{tasks}")
|
|
|
|
- aliyun_logger.logging(
|
|
|
|
- code="1601",
|
|
|
|
- message="[系统] 收到停止信号,准备优雅退出...",
|
|
|
|
- data=f"任务列表{tasks}"
|
|
|
|
- )
|
|
|
|
|
|
+ # 取消所有任务并等待结束
|
|
|
|
+ logger.warning(f"[系统] 正在取消所有消费任务...")
|
|
for task in tasks:
|
|
for task in tasks:
|
|
task.cancel()
|
|
task.cancel()
|
|
|
|
|
|
|
|
+ # 收集任务结果,忽略取消异常
|
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
-
|
|
|
|
for idx, result in enumerate(results):
|
|
for idx, result in enumerate(results):
|
|
- if isinstance(result, Exception):
|
|
|
|
|
|
+ if isinstance(result, Exception) and not isinstance(result, asyncio.CancelledError):
|
|
logger.error(f"[系统] 任务 {topics[idx]} 异常退出: {result}")
|
|
logger.error(f"[系统] 任务 {topics[idx]} 异常退出: {result}")
|
|
|
|
|
|
logger.warning(f"[系统] 所有任务已退出,进程已关闭...")
|
|
logger.warning(f"[系统] 所有任务已退出,进程已关闭...")
|
|
aliyun_logger.logging(
|
|
aliyun_logger.logging(
|
|
code="1602",
|
|
code="1602",
|
|
message="[系统] 所有任务已退出,进程已关闭...",
|
|
message="[系统] 所有任务已退出,进程已关闭...",
|
|
- data=f"任务列表{tasks}"
|
|
|
|
|
|
+ data={"task_count": len(tasks)}
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|