123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- """
- @author: luojunhui
- """
- import datetime
- from tqdm import tqdm
- from applications import AIDTApi, DeNetMysql, PQMySQL, Functions, longArticlesMySQL
- from config import poolTagMap
- from stratrgy import ArticlePoolStrategy
- class SendToMultiLevels(object):
- """
- 流量池任务
- """
- AidApi = AIDTApi()
- DeMysql = DeNetMysql()
- PqMysql = PQMySQL()
- Fun = Functions()
- # Ad =
- @classmethod
- def getYesterdayData(cls):
- """
- 获取前一天数据表现
- :return:
- """
- sql = f"""
- select article_id, read_count from changwen_article_datastat
- where article_id in (
- select id from changwen_article
- where publish_timestamp >= 1722268800000
- ) and read_count > 100;
- """
- result = cls.Ad.select(sql=sql)
- response_list = [
- {
- "id": line[0],
- "read_count": line[1]
- } for line in result
- ]
- return response_list
- @classmethod
- def splitToDifferentPools(cls, yesterday_data):
- """
- 分类至Pools
- :return:
- """
- S = ArticlePoolStrategy()
- detail_list = S.getData(article_list=yesterday_data)
- result = S.splitByStrategy(detail_list=detail_list)
- return result
- @classmethod
- def sendToEachCrawlerPlan(cls, key, result_list):
- """
- :param result_list:
- :param key:
- :return:
- """
- # print(key)
- # print(len(result_list))
- # for index, i in enumerate(result_list):
- # print(index, "\t", i['level_rate'], "\t", i['title'], "\t", i['avg_read'], "\t", i['article_read'], "\t", i['key'])
- # print(url_list)
- # daily自动创建新抓取计划
- cls.AidApi.updateArticleIntoCrawlerPlan(
- plan_id=None,
- plan_name="流量池晋级--{}--{}".format(datetime.datetime.today().__str__().split(" ")[0], key),
- plan_tag=poolTagMap[key],
- url_list=[i['url'] for i in result_list]
- )
- @classmethod
- def sendToDifferentPools(cls, pool_info):
- """
- 获取文章url
- :return:
- """
- for key in pool_info:
- cls.sendToEachCrawlerPlan(key, pool_info[key])
- @classmethod
- def deal(cls):
- """
- Dealing function
- :return:
- """
- yesterday_data = cls.getYesterdayData()
- level_url_list_map = cls.splitToDifferentPools(yesterday_data)
- cls.sendToDifferentPools(pool_info=level_url_list_map)
- if __name__ == '__main__':
- S = SendToMultiLevels()
- S.deal()
- # yesterday_data = S.getYesterdayData()
- # for line in tqdm(yesterday_data):
- # print(line)
|