|
@@ -71,18 +71,22 @@ def main(log_type, crawler, topic_name, group_id, env):
|
|
|
# ack_mq_message
|
|
|
ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
|
|
|
|
|
|
- # 处理爬虫业务
|
|
|
+ # 解析 task_dict
|
|
|
task_dict = task_fun_mq(msg.message_body)['task_dict']
|
|
|
Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
|
|
|
Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
|
|
|
|
|
|
+ # 解析 rule_dict
|
|
|
rule_dict = task_fun_mq(msg.message_body)['rule_dict']
|
|
|
Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}\n")
|
|
|
Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}\n")
|
|
|
|
|
|
+ # 解析 user_list
|
|
|
task_id = task_dict['id']
|
|
|
select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
|
|
|
user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
|
|
|
+
|
|
|
+ # 计算启动脚本数 crawler_num
|
|
|
user_num = len(user_list)
|
|
|
chunk_size = 100 # 每个进程处理的用户数量
|
|
|
crawler_num = int(user_num // chunk_size) # 向下取整
|