|
@@ -6,22 +6,35 @@ from mq_http_sdk.mq_exception import MQExceptionBase
|
|
|
|
|
|
sys.path.append(os.getcwd())
|
|
|
|
|
|
-from application.common import MysqlHelper, AliyunLogger, get_consumer, ack_message
|
|
|
+from application.common import AliyunLogger, get_consumer, ack_message
|
|
|
from application.config import TopicGroup
|
|
|
|
|
|
|
|
|
async def run(task_id, mode, platform):
|
|
|
"""
|
|
|
传入参数,然后根据参数执行爬虫代码
|
|
|
+ :param task_id: 任务id
|
|
|
+ :param mode: 任务类型
|
|
|
+ :param platform: 哪个抓取平台
|
|
|
:return: None
|
|
|
"""
|
|
|
- # 创建并等待一个子进程
|
|
|
+ # 创建一个aliyun日志对象
|
|
|
+ logger = AliyunLogger(platform=platform, mode=mode)
|
|
|
+ logger.logging(
|
|
|
+ code=1003,
|
|
|
+ message="{}: 开始一轮抓取".format(platform)
|
|
|
+ )
|
|
|
+ # 创建并一个子进程
|
|
|
await asyncio.create_subprocess_shell(
|
|
|
"python3 scheduler/run_spider_online.py --task_id {} --mode {} --platform {}".format(task_id, mode, platform)
|
|
|
)
|
|
|
|
|
|
|
|
|
async def consume_single_message(spider):
|
|
|
+ """
|
|
|
+ 消费单个消息,若消费成功则启动爬虫新协程;
|
|
|
+ :param spider: 爬虫类
|
|
|
+ """
|
|
|
topic = spider['topic']
|
|
|
group = spider['group']
|
|
|
consumer = get_consumer(topic, group)
|
|
@@ -54,6 +67,9 @@ async def consume_single_message(spider):
|
|
|
|
|
|
|
|
|
async def main():
|
|
|
+ """
|
|
|
+ 主函数
|
|
|
+ """
|
|
|
spider_list = TopicGroup().produce()
|
|
|
while spider_list:
|
|
|
async_tasks = []
|
|
@@ -61,6 +77,7 @@ async def main():
|
|
|
task = asyncio.create_task(consume_single_message(spider))
|
|
|
async_tasks.append(task)
|
|
|
await asyncio.gather(*async_tasks)
|
|
|
+ await asyncio.sleep(60) # 每分钟接收一次MQ,
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|