123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103 |
- # -*- coding: utf-8 -*-
- # @Author: wangkun
- # @Time: 2023/3/2
- import os
- import sys
- import time
- sys.path.append(os.getcwd())
- from common.common import Common
- from common.db import MysqlHelper
- class Scheduling:
- # 任务列表
- task_list = []
- # 读取任务表
- @classmethod
- def get_task(cls, log_type, crawler, env, machine):
- get_sql = """ select * from crawler_task_1 """
- all_task_list = MysqlHelper.get_values(log_type=log_type, crawler=crawler, sql=get_sql, env=env, machine=machine)
- pre_task_list = []
- for task in all_task_list:
- task_id = task[0]
- task_name = task[1]
- source = task[2]
- next_time = task[3]
- interval_piaoquan = task[4]
- spider_rule = task[5]
- task_type = task[6]
- spider_link = task[7]
- spider_name = task[8]
- min_publish_time = task[9]
- min_publish_day = task[10]
- media_id = task[11]
- applets_status = task[12]
- app_status = task[13]
- user_tag = task[14]
- user_content_tag = task[15]
- machine = task[16]
- insert_time = task[17]
- update_time = task[18]
- if next_time >= int(time.time()):
- task_dict = {
- "task_id": task_id,
- "task_name": task_name,
- "source": source,
- "next_time": next_time,
- "interval_piaoquan": interval_piaoquan,
- "spider_rule": spider_rule,
- "task_type": task_type,
- "spider_link": spider_link,
- "spider_name": spider_name,
- "min_publish_time": min_publish_time,
- "min_publish_day": min_publish_day,
- "media_id": media_id,
- "applets_status": applets_status,
- "app_status": app_status,
- "user_tag": user_tag,
- "user_content_tag": user_content_tag,
- "machine": machine,
- "insert_time": insert_time,
- "update_time": update_time,
- }
- pre_task_list.append(task_dict)
- return pre_task_list
- # 组装任务
- @classmethod
- def update_task(cls, log_type, crawler, env, machine):
- pre_task_list = cls.get_task(log_type=log_type, crawler=crawler, env=env, machine=machine)
- if len(pre_task_list) == 0:
- Common.logger(log_type, crawler).info("暂无新任务\n")
- else:
- for i in range(len(pre_task_list)):
- task_id = pre_task_list[i]["task_id"]
- task_name = pre_task_list[i]["task_name"]
- next_time = pre_task_list[i]["next_time"]
- interval_piaoquan = pre_task_list[i]["interval_piaoquan"]
- spider_rule = pre_task_list[i]["spider_rule"]
- print(f"task_id:{task_id}")
- print(f"task_name:{task_name}")
- print(f"next_time:{next_time}")
- print(f"interval_piaoquan:{interval_piaoquan}")
- print(f"spider_rule:{spider_rule}\n")
- # 资源分配
- @classmethod
- def resource_allocation(cls, log_type, crawler, env, machine):
- pass
- # 写入任务队列
- @classmethod
- def write_to_queue(cls):
- pass
- if __name__ == "__main__":
- # task_list = Scheduling.get_task("Scheduling", "scheduling", "dev", "local")
- # print(task_list)
- Scheduling.update_task("Scheduling", "scheduling", "dev", "local")
- pass
|