123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- """
- @author: luojunhui
- """
- import datetime
- from tqdm import tqdm
- from applications import AIDTApi, DeNetMysql, PQMySQL, Functions, ODPSApi
- from config import poolTagMap
- from stratrgy import ArticlePoolStrategy
- class SendToMultiLevels(object):
- """
- 冷启分配任务
- """
- AidApi = AIDTApi()
- DeMysql = DeNetMysql()
- PqMysql = PQMySQL()
- Fun = Functions()
- OA = ODPSApi()
- @classmethod
- def getYesterdayData(cls):
- """
- 获取前一天数据表现
- :return:
- """
- odps_sql = "select * from loghubods.changwen_article_datastat where dt = '20240724';"
- result = cls.OA.select(sql=odps_sql)
- response_list = [
- {
- "article_id": record["article_id"],
- "increase_read_count": record["increase_read_count"],
- "read_count": record["read_count"],
- "increase_income": record["increase_income"],
- "income": record["income"],
- "increase_share_count": record["increase_share_count"],
- "share_count": record["share_count"],
- "update_timestamp": record["update_timestamp"]
- } for record in result if record['increase_read_count'] >= 1000
- ]
- return response_list
- @classmethod
- def splitToDifferentPools(cls, yesterday_data):
- """
- 分类至Pools
- :return:
- """
- pool_level_1 = [cls.Fun.matchLinkById(i['article_id']) for i in tqdm(yesterday_data) if
- i['increase_read_count'] >= 9000]
- pool_level_2 = [cls.Fun.matchLinkById(i['article_id']) for i in tqdm(yesterday_data) if
- 3500 <= i['increase_read_count'] < 9000]
- pool_level_3 = [cls.Fun.matchLinkById(i['article_id']) for i in tqdm(yesterday_data) if
- 1000 <= i['increase_read_count'] < 3500]
- L = {
- "Level1": pool_level_1,
- "Level2": pool_level_2,
- "Level3": pool_level_3
- }
- return L
- @classmethod
- def sendToEachCrawlerPlan(cls, key, url_list):
- """
- :param key:
- :param url_list:
- :return:
- """
- # daily自动创建新抓取计划
- # cls.AidApi.updateArticleIntoCrawlerPlan(
- # plan_id=None,
- # plan_name="{}--{}".format(datetime.datetime.today().__str__().split(" ")[0], key),
- # plan_tag=poolTagMap[key],
- # url_list=url_list
- # )
- @classmethod
- def sendToDifferentPools(cls, pool_info):
- """
- 获取文章url
- :return:
- """
- for key in pool_info:
- cls.sendToEachCrawlerPlan(key, pool_info[key])
- @classmethod
- def deal(cls):
- """
- Dealing function
- :return:
- """
- yesterday_data = cls.getYesterdayData()
- level_url_list_map = cls.splitToDifferentPools(yesterday_data)
- cls.sendToDifferentPools(pool_info=level_url_list_map)
- STML = SendToMultiLevels()
- yesterday_data = STML.getYesterdayData()
- S = ArticlePoolStrategy()
- detail_list = S.getData(article_list=yesterday_data)
- S.splitByStrategy(detail_list=detail_list)
|