""" @author: luojunhui """ import datetime from tqdm import tqdm from applications import AIDTApi, DeNetMysql, PQMySQL, Functions, ODPSApi from config import poolTagMap from stratrgy import ArticlePoolStrategy class SendToMultiLevels(object): """ 冷启分配任务 """ AidApi = AIDTApi() DeMysql = DeNetMysql() PqMysql = PQMySQL() Fun = Functions() OA = ODPSApi() @classmethod def getYesterdayData(cls): """ 获取前一天数据表现 :return: """ odps_sql = "select * from loghubods.changwen_article_datastat where dt = '20240724';" result = cls.OA.select(sql=odps_sql) response_list = [ { "article_id": record["article_id"], "increase_read_count": record["increase_read_count"], "read_count": record["read_count"], "increase_income": record["increase_income"], "income": record["income"], "increase_share_count": record["increase_share_count"], "share_count": record["share_count"], "update_timestamp": record["update_timestamp"] } for record in result if record['increase_read_count'] >= 1000 ] return response_list @classmethod def splitToDifferentPools(cls, yesterday_data): """ 分类至Pools :return: """ pool_level_1 = [cls.Fun.matchLinkById(i['article_id']) for i in tqdm(yesterday_data) if i['increase_read_count'] >= 9000] pool_level_2 = [cls.Fun.matchLinkById(i['article_id']) for i in tqdm(yesterday_data) if 3500 <= i['increase_read_count'] < 9000] pool_level_3 = [cls.Fun.matchLinkById(i['article_id']) for i in tqdm(yesterday_data) if 1000 <= i['increase_read_count'] < 3500] L = { "Level1": pool_level_1, "Level2": pool_level_2, "Level3": pool_level_3 } return L @classmethod def sendToEachCrawlerPlan(cls, key, url_list): """ :param key: :param url_list: :return: """ # daily自动创建新抓取计划 # cls.AidApi.updateArticleIntoCrawlerPlan( # plan_id=None, # plan_name="{}--{}".format(datetime.datetime.today().__str__().split(" ")[0], key), # plan_tag=poolTagMap[key], # url_list=url_list # ) @classmethod def sendToDifferentPools(cls, pool_info): """ 获取文章url :return: """ for key in pool_info: cls.sendToEachCrawlerPlan(key, pool_info[key]) @classmethod def deal(cls): """ Dealing function :return: """ yesterday_data = cls.getYesterdayData() level_url_list_map = cls.splitToDifferentPools(yesterday_data) cls.sendToDifferentPools(pool_info=level_url_list_map) STML = SendToMultiLevels() yesterday_data = STML.getYesterdayData() S = ArticlePoolStrategy() detail_list = S.getData(article_list=yesterday_data) S.splitByStrategy(detail_list=detail_list)