# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2023/3/2 import os import sys import time sys.path.append(os.getcwd()) from common.common import Common from common.db import MysqlHelper class Scheduling: # 任务列表 task_list = [] # 读取任务表 @classmethod def get_task(cls, log_type, crawler, env, machine): get_sql = """ select * from crawler_task_1 """ all_task_list = MysqlHelper.get_values(log_type=log_type, crawler=crawler, sql=get_sql, env=env, machine=machine) pre_task_list = [] for task in all_task_list: task_id = task[0] task_name = task[1] source = task[2] next_time = task[3] interval_piaoquan = task[4] spider_rule = task[5] task_type = task[6] spider_link = task[7] spider_name = task[8] min_publish_time = task[9] min_publish_day = task[10] media_id = task[11] applets_status = task[12] app_status = task[13] user_tag = task[14] user_content_tag = task[15] machine = task[16] insert_time = task[17] update_time = task[18] if next_time >= int(time.time()): task_dict = { "task_id": task_id, "task_name": task_name, "source": source, "next_time": next_time, "interval_piaoquan": interval_piaoquan, "spider_rule": spider_rule, "task_type": task_type, "spider_link": spider_link, "spider_name": spider_name, "min_publish_time": min_publish_time, "min_publish_day": min_publish_day, "media_id": media_id, "applets_status": applets_status, "app_status": app_status, "user_tag": user_tag, "user_content_tag": user_content_tag, "machine": machine, "insert_time": insert_time, "update_time": update_time, } pre_task_list.append(task_dict) return pre_task_list # 组装任务 @classmethod def update_task(cls, log_type, crawler, env, machine): pre_task_list = cls.get_task(log_type=log_type, crawler=crawler, env=env, machine=machine) if len(pre_task_list) == 0: Common.logger(log_type, crawler).info("暂无新任务\n") else: for i in range(len(pre_task_list)): task_id = pre_task_list[i]["task_id"] task_name = pre_task_list[i]["task_name"] next_time = pre_task_list[i]["next_time"] interval_piaoquan = pre_task_list[i]["interval_piaoquan"] spider_rule = pre_task_list[i]["spider_rule"] print(f"task_id:{task_id}") print(f"task_name:{task_name}") print(f"next_time:{next_time}") print(f"interval_piaoquan:{interval_piaoquan}") print(f"spider_rule:{spider_rule}\n") # 资源分配 @classmethod def resource_allocation(cls, log_type, crawler, env, machine): pass # 写入任务队列 @classmethod def write_to_queue(cls): pass if __name__ == "__main__": # task_list = Scheduling.get_task("Scheduling", "scheduling", "dev", "local") # print(task_list) Scheduling.update_task("Scheduling", "scheduling", "dev", "local") pass