12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- """
- @author: luojunhui
- """
- import json
- from tqdm import tqdm
- from applications import AIDTApi, DeNetMysql, PQMySQL
- class ColdStartPool(object):
- """
- 冷启动任务
- """
- AidApi = AIDTApi()
- DeMysql = DeNetMysql()
- PqMysql = PQMySQL()
- @classmethod
- def getPlanAllArticles(cls, plan_id):
- """
- 获取一个计划的所有内容
- :param plan_id:
- :return:
- """
- page = 1
- response = cls.AidApi.getPlanArticleList(plan_id=plan_id, page_index=page).get("data", {})
- data_list = response['data']
- all_articles_count = response['totalCount']
- while len(data_list) + 50 * (page - 1) < all_articles_count:
- page += 1
- response_next_page = cls.AidApi.getPlanArticleList(plan_id=plan_id, page_index=page).get("data", {})
- data_list += response_next_page['data']
- return data_list
- @classmethod
- def updateToPool(cls, plan_id):
- """
- 获取计划内容并且写入冷启池
- :param plan_id:
- :return:
- """
- each_plan_articles = cls.getPlanAllArticles(plan_id)
- for article in tqdm(each_plan_articles):
- try:
- cls.updateEachArticle(article)
- except Exception as e:
- print(e)
- # with ThreadPoolExecutor(max_workers=10) as Pool:
- # Pool.map(cls.updateEachArticle, each_plan_articles)
- @classmethod
- def updateEachArticle(cls, article_obj):
- """
- update each article to db
- :param article_obj:
- :return:
- """
- sql = f"""
- INSERT INTO cold_start_article_pool
- (content_id, content_link, title, cover, view_count, like_count, looking_count, publish_time_stamp, plan_id, category, content_channel_id, status)
- VALUES
- (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
- """
- params = (
- article_obj.get("id"),
- article_obj.get("contentLink"),
- article_obj.get("title"),
- article_obj.get("coverImageUrl"),
- article_obj.get("viewCount"),
- article_obj.get("likeCount"),
- article_obj.get("lookingCount"),
- article_obj.get("publishTimestamp"),
- article_obj.get("sourceCrawlerPlans")[0].get("id"),
- article_obj.get("sourceCrawlerPlans")[0].get("name").split("-")[1],
- article_obj.get("channelContentId"),
- 1
- )
- cls.PqMysql.update(sql=sql, params=params)
- @classmethod
- def deal(cls):
- """
- 获取非空抓取计划id
- :return:
- """
- plan_id_list = cls.DeMysql.getUnEmptyPlan()
- for plan_id in tqdm(plan_id_list):
- cls.updateToPool(plan_id)
- CST = ColdStartPool()
- CST.deal()
|