import json import os import sys import argparse sys.path.append(os.getcwd()) from application.common.mysql import MysqlHelper from application.config import spider_map class OnlineManager(object): def __init__(self, task_id, mode, platform): self.env = "prod" self.task_id = task_id self.mode = mode self.platform = platform self.MySQL = MysqlHelper(mode=self.mode, platform=self.platform, env=self.env) def get_task_rule(self): """ :return: 返回任务的规则, task_rule """ rule_dict = {} task_rule_sql = f"SELECT rule FROM crawler_task_v3 WHERE id = {self.task_id};" data = self.MySQL.select(task_rule_sql) if data: rule_list = json.loads(data[0][0]) for item in rule_list: for key in item: rule_dict[key] = item[key] return rule_dict def get_task_user_list(self): """ :return: 返回用户列表 """ task_user_list_sql = f"SELECT uid, link from crawler_user_v3 where task_id = {self.task_id};" uid_list = self.MySQL.select(task_user_list_sql) user_list = [{"uid": i[0], "link": i[1]} for i in uid_list] if uid_list else [] return user_list def start_crawl(self): rule_dict = self.get_task_rule() user_list = self.get_task_user_list() if rule_dict and user_list: spider_class = spider_map[self.platform][self.mode] main_process = spider_class( platform=self.platform, mode=self.mode, rule_dict=rule_dict, user_list=user_list, env=self.env ) main_process.run() if __name__ == "__main__": parser = argparse.ArgumentParser() ## 新建参数解释器对象 parser.add_argument("--task_id") parser.add_argument("--mode") parser.add_argument("--platform") args = parser.parse_args() M = OnlineManager(task_id=args.task_id, mode=args.mode, platform=args.platform) M.start_crawl()