|  | @@ -5,7 +5,7 @@ import datetime
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  from tqdm import tqdm
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -from applications import AIDTApi, DeNetMysql, PQMySQL, Functions, ODPSApi
 | 
	
		
			
				|  |  | +from applications import AIDTApi, DeNetMysql, PQMySQL, Functions, AdMySQL
 | 
	
		
			
				|  |  |  from config import poolTagMap
 | 
	
		
			
				|  |  |  from stratrgy import ArticlePoolStrategy
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -18,7 +18,7 @@ class SendToMultiLevels(object):
 | 
	
		
			
				|  |  |      DeMysql = DeNetMysql()
 | 
	
		
			
				|  |  |      PqMysql = PQMySQL()
 | 
	
		
			
				|  |  |      Fun = Functions()
 | 
	
		
			
				|  |  | -    OA = ODPSApi()
 | 
	
		
			
				|  |  | +    Ad = AdMySQL()
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      @classmethod
 | 
	
		
			
				|  |  |      def getYesterdayData(cls):
 | 
	
	
		
			
				|  | @@ -26,19 +26,19 @@ class SendToMultiLevels(object):
 | 
	
		
			
				|  |  |          获取前一天数据表现
 | 
	
		
			
				|  |  |          :return:
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  | -        odps_sql = "select * from loghubods.changwen_article_datastat where dt = '20240729';"
 | 
	
		
			
				|  |  | -        result = cls.OA.select(sql=odps_sql)
 | 
	
		
			
				|  |  | +        sql = f"""
 | 
	
		
			
				|  |  | +        select article_id, read_count from changwen_article_datastat
 | 
	
		
			
				|  |  | +        where article_id in (
 | 
	
		
			
				|  |  | +            select id from changwen_article
 | 
	
		
			
				|  |  | +            where publish_timestamp >= 1721664000000
 | 
	
		
			
				|  |  | +        ) and read_count > 100;
 | 
	
		
			
				|  |  | +        """
 | 
	
		
			
				|  |  | +        result = cls.Ad.select(sql=sql)
 | 
	
		
			
				|  |  |          response_list = [
 | 
	
		
			
				|  |  |              {
 | 
	
		
			
				|  |  | -                "article_id": record["article_id"],
 | 
	
		
			
				|  |  | -                "increase_read_count": record["increase_read_count"],
 | 
	
		
			
				|  |  | -                "read_count": record["read_count"],
 | 
	
		
			
				|  |  | -                "increase_income": record["increase_income"],
 | 
	
		
			
				|  |  | -                "income": record["income"],
 | 
	
		
			
				|  |  | -                "increase_share_count": record["increase_share_count"],
 | 
	
		
			
				|  |  | -                "share_count": record["share_count"],
 | 
	
		
			
				|  |  | -                "update_timestamp": record["update_timestamp"]
 | 
	
		
			
				|  |  | -            } for record in result if record['increase_read_count'] >= 1000
 | 
	
		
			
				|  |  | +                "id": line[0],
 | 
	
		
			
				|  |  | +                "read_count": line[1]
 | 
	
		
			
				|  |  | +            } for line in result
 | 
	
		
			
				|  |  |          ]
 | 
	
		
			
				|  |  |          return response_list
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -54,23 +54,25 @@ class SendToMultiLevels(object):
 | 
	
		
			
				|  |  |          return result
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      @classmethod
 | 
	
		
			
				|  |  | -    def sendToEachCrawlerPlan(cls, key, url_list):
 | 
	
		
			
				|  |  | +    def sendToEachCrawlerPlan(cls, key, result_list):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +        :param result_list:
 | 
	
		
			
				|  |  |          :param key:
 | 
	
		
			
				|  |  | -        :param url_list:
 | 
	
		
			
				|  |  |          :return:
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  | -        print(key)
 | 
	
		
			
				|  |  | -        print(len(url_list))
 | 
	
		
			
				|  |  | -        print(url_list)
 | 
	
		
			
				|  |  | +        # print(key)
 | 
	
		
			
				|  |  | +        # print(len(result_list))
 | 
	
		
			
				|  |  | +        # for index, i in enumerate(result_list):
 | 
	
		
			
				|  |  | +        #     print(index, "\t",  i['level_rate'], "\t", i['title'], "\t", i['avg_read'], "\t", i['article_read'], "\t", i['key'])
 | 
	
		
			
				|  |  | +        # print(url_list)
 | 
	
		
			
				|  |  |          # daily自动创建新抓取计划
 | 
	
		
			
				|  |  | -        # cls.AidApi.updateArticleIntoCrawlerPlan(
 | 
	
		
			
				|  |  | -        #     plan_id=None,
 | 
	
		
			
				|  |  | -        #     plan_name="{}--{}".format(datetime.datetime.today().__str__().split(" ")[0], key),
 | 
	
		
			
				|  |  | -        #     plan_tag=poolTagMap[key],
 | 
	
		
			
				|  |  | -        #     url_list=url_list
 | 
	
		
			
				|  |  | -        # )
 | 
	
		
			
				|  |  | +        cls.AidApi.updateArticleIntoCrawlerPlan(
 | 
	
		
			
				|  |  | +            plan_id=None,
 | 
	
		
			
				|  |  | +            plan_name="流量池晋级--{}--{}".format(datetime.datetime.today().__str__().split(" ")[0], key),
 | 
	
		
			
				|  |  | +            plan_tag=poolTagMap[key],
 | 
	
		
			
				|  |  | +            url_list=[i['url'] for i in result_list]
 | 
	
		
			
				|  |  | +        )
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      @classmethod
 | 
	
		
			
				|  |  |      def sendToDifferentPools(cls, pool_info):
 | 
	
	
		
			
				|  | @@ -89,9 +91,14 @@ class SendToMultiLevels(object):
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          yesterday_data = cls.getYesterdayData()
 | 
	
		
			
				|  |  |          level_url_list_map = cls.splitToDifferentPools(yesterday_data)
 | 
	
		
			
				|  |  | +        # for line in level_url_list_map:
 | 
	
		
			
				|  |  | +        #     print(line)
 | 
	
		
			
				|  |  |          cls.sendToDifferentPools(pool_info=level_url_list_map)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  if __name__ == '__main__':
 | 
	
		
			
				|  |  | -    ST = SendToMultiLevels()
 | 
	
		
			
				|  |  | -    ST.deal()
 | 
	
		
			
				|  |  | +    S = SendToMultiLevels()
 | 
	
		
			
				|  |  | +    S.deal()
 | 
	
		
			
				|  |  | +    # yesterday_data = S.getYesterdayData()
 | 
	
		
			
				|  |  | +    # for line in tqdm(yesterday_data):
 | 
	
		
			
				|  |  | +    #     print(line)
 |