123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293 |
- """
- @author: luojunhui
- """
- from tqdm import tqdm
- from applications import AIDTApi, DeNetMysql, PQMySQL
- class ColdStartPool(object):
- """
- 冷启动任务
- """
- AidApi = AIDTApi()
- DeMysql = DeNetMysql()
- PqMysql = PQMySQL()
- @classmethod
- def getPlanAllArticles(cls, plan_id):
- """
- 获取一个计划的所有内容
- :param plan_id:
- :return:
- """
- page = 1
- response = cls.AidApi.getPlanArticleList(plan_id=plan_id, page_index=page).get("data", {})
- data_list = response['data']
- all_articles_count = response['totalCount']
- while len(data_list) + 50 * (page - 1) < all_articles_count:
- page += 1
- response_next_page = cls.AidApi.getPlanArticleList(plan_id=plan_id, page_index=page).get("data", {})
- data_list += response_next_page['data']
- return data_list
- @classmethod
- def updateToPool(cls, plan_id):
- """
- 获取计划内容并且写入冷启池
- :param plan_id:
- :return:
- """
- each_plan_articles = cls.getPlanAllArticles(plan_id)
- for article in tqdm(each_plan_articles):
- try:
- cls.updateEachArticle(article)
- except Exception as e:
- print(e)
- # with ThreadPoolExecutor(max_workers=10) as Pool:
- # Pool.map(cls.updateEachArticle, each_plan_articles)
- @classmethod
- def updateEachArticle(cls, article_obj):
- """
- update each article to db
- :param article_obj:
- :return:
- """
- sql = f"""
- INSERT INTO cold_start_article_pool
- (content_id, content_link, title, cover, view_count, like_count, looking_count, publish_time_stamp, plan_id, category, content_channel_id, status)
- VALUES
- (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
- """
- params = (
- article_obj.get("id"),
- article_obj.get("contentLink"),
- article_obj.get("title"),
- article_obj.get("coverImageUrl"),
- article_obj.get("viewCount"),
- article_obj.get("likeCount"),
- article_obj.get("lookingCount"),
- article_obj.get("publishTimestamp"),
- article_obj.get("sourceCrawlerPlans")[0].get("id"),
- article_obj.get("sourceCrawlerPlans")[0].get("name").split("-")[1],
- article_obj.get("channelContentId"),
- 1
- )
- cls.PqMysql.update(sql=sql, params=params)
- @classmethod
- def deal(cls):
- """
- 获取非空抓取计划id
- :return:
- """
- plan_id_list = cls.DeMysql.getUnEmptyPlan()
- for plan_id in tqdm(plan_id_list):
- cls.updateToPool(plan_id)
- if __name__ == '__main__':
- CP = ColdStartPool()
- CP.deal()
|