""" @author: luojunhui """ from tqdm import tqdm from applications import AIDTApi, DeNetMysql, PQMySQL class ColdStartPool(object): """ 冷启动任务 """ AidApi = AIDTApi() DeMysql = DeNetMysql() PqMysql = PQMySQL() @classmethod def getPlanAllArticles(cls, plan_id): """ 获取一个计划的所有内容 :param plan_id: :return: """ page = 1 response = cls.AidApi.getPlanArticleList(plan_id=plan_id, page_index=page).get("data", {}) data_list = response['data'] all_articles_count = response['totalCount'] while len(data_list) + 50 * (page - 1) < all_articles_count: page += 1 response_next_page = cls.AidApi.getPlanArticleList(plan_id=plan_id, page_index=page).get("data", {}) data_list += response_next_page['data'] return data_list @classmethod def updateToPool(cls, plan_id): """ 获取计划内容并且写入冷启池 :param plan_id: :return: """ each_plan_articles = cls.getPlanAllArticles(plan_id) for article in tqdm(each_plan_articles): try: cls.updateEachArticle(article) except Exception as e: print(e) # with ThreadPoolExecutor(max_workers=10) as Pool: # Pool.map(cls.updateEachArticle, each_plan_articles) @classmethod def updateEachArticle(cls, article_obj): """ update each article to db :param article_obj: :return: """ sql = f""" INSERT INTO cold_start_article_pool (content_id, content_link, title, cover, view_count, like_count, looking_count, publish_time_stamp, plan_id, category, content_channel_id, status) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); """ params = ( article_obj.get("id"), article_obj.get("contentLink"), article_obj.get("title"), article_obj.get("coverImageUrl"), article_obj.get("viewCount"), article_obj.get("likeCount"), article_obj.get("lookingCount"), article_obj.get("publishTimestamp"), article_obj.get("sourceCrawlerPlans")[0].get("id"), article_obj.get("sourceCrawlerPlans")[0].get("name").split("-")[1], article_obj.get("channelContentId"), 1 ) cls.PqMysql.update(sql=sql, params=params) @classmethod def deal(cls): """ 获取非空抓取计划id :return: """ plan_id_list = cls.DeMysql.getUnEmptyPlan() for plan_id in tqdm(plan_id_list): cls.updateToPool(plan_id) if __name__ == '__main__': CP = ColdStartPool() CP.deal()