|  | @@ -1,37 +1,94 @@
 | 
											
												
													
														|  | -import json
 |  | 
 | 
											
												
													
														|  | 
 |  | +"""
 | 
											
												
													
														|  | 
 |  | +@author luojunhui
 | 
											
												
													
														|  | 
 |  | +@description Update Daily
 | 
											
												
													
														|  | 
 |  | +"""
 | 
											
												
													
														|  | 
 |  | +import time
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | -from concurrent.futures.thread import ThreadPoolExecutor
 |  | 
 | 
											
												
													
														|  |  from tqdm import tqdm
 |  |  from tqdm import tqdm
 | 
											
												
													
														|  |  from datetime import datetime, timedelta
 |  |  from datetime import datetime, timedelta
 | 
											
												
													
														|  | 
 |  | +import schedule
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | -from applications import AdMySQL, PQMySQL, WeixinSpider
 |  | 
 | 
											
												
													
														|  | 
 |  | +from applications import longArticlesMySQL, PQMySQL, WeixinSpider, Functions
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  class DailyDataManager(object):
 |  |  class DailyDataManager(object):
 | 
											
												
													
														|  |      """
 |  |      """
 | 
											
												
													
														|  |      daily 数据每日更新
 |  |      daily 数据每日更新
 | 
											
												
													
														|  |      """
 |  |      """
 | 
											
												
													
														|  | -    ad_mysql = AdMySQL()
 |  | 
 | 
											
												
													
														|  | -    pq_mysql = PQMySQL()
 |  | 
 | 
											
												
													
														|  | -    wx_spider = WeixinSpider()
 |  | 
 | 
											
												
													
														|  | 
 |  | +    laMysql = longArticlesMySQL()
 | 
											
												
													
														|  | 
 |  | +    pqMysql = PQMySQL()
 | 
											
												
													
														|  | 
 |  | +    wxSpider = WeixinSpider()
 | 
											
												
													
														|  | 
 |  | +    functions = Functions()
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |      @classmethod
 |  |      @classmethod
 | 
											
												
													
														|  |      def getPublishedArticles(cls):
 |  |      def getPublishedArticles(cls):
 | 
											
												
													
														|  |          """
 |  |          """
 | 
											
												
													
														|  | -        获取已经发布的文章的信息
 |  | 
 | 
											
												
													
														|  | 
 |  | +        获取已经发布的文章的信息, createTime 选择为前一天的 0 点并且转化为时间戳
 | 
											
												
													
														|  |          :return:
 |  |          :return:
 | 
											
												
													
														|  |          """
 |  |          """
 | 
											
												
													
														|  | 
 |  | +        today = datetime.today()
 | 
											
												
													
														|  | 
 |  | +        # 获取昨天的日期
 | 
											
												
													
														|  | 
 |  | +        yesterday = today - timedelta(days=1)
 | 
											
												
													
														|  | 
 |  | +        yesterday_midnight = datetime(year=yesterday.year, month=yesterday.month, day=yesterday.day)
 | 
											
												
													
														|  | 
 |  | +        yesterday_timestamp = yesterday_midnight.timestamp()
 | 
											
												
													
														|  |          sql2 = f"""
 |  |          sql2 = f"""
 | 
											
												
													
														|  | -        select ContentUrl, wx_sn, createTime 
 |  | 
 | 
											
												
													
														|  | -        from official_articles_v2 
 |  | 
 | 
											
												
													
														|  | -        where createTime >= 1724774400
 |  | 
 | 
											
												
													
														|  | 
 |  | +        select ContentUrl, wx_sn, createTime
 | 
											
												
													
														|  | 
 |  | +        from official_articles_v2
 | 
											
												
													
														|  | 
 |  | +        where createTime >= {yesterday_timestamp}
 | 
											
												
													
														|  |          and accountName in (
 |  |          and accountName in (
 | 
											
												
													
														|  |                          select distinct account_name from account_avg_info_v2
 |  |                          select distinct account_name from account_avg_info_v2
 | 
											
												
													
														|  |                          );
 |  |                          );
 | 
											
												
													
														|  |          """
 |  |          """
 | 
											
												
													
														|  | -        result_list = cls.pq_mysql.select(sql2)
 |  | 
 | 
											
												
													
														|  | 
 |  | +        result_list = cls.pqMysql.select(sql2)
 | 
											
												
													
														|  |          return result_list
 |  |          return result_list
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | 
 |  | +    @classmethod
 | 
											
												
													
														|  | 
 |  | +    def updateInfo(cls, line):
 | 
											
												
													
														|  | 
 |  | +        """
 | 
											
												
													
														|  | 
 |  | +        update info into mysql
 | 
											
												
													
														|  | 
 |  | +        :return:
 | 
											
												
													
														|  | 
 |  | +        """
 | 
											
												
													
														|  | 
 |  | +        try:
 | 
											
												
													
														|  | 
 |  | +            wx_sn, mini_info, create_time = cls.getRootSourceIds(line)
 | 
											
												
													
														|  | 
 |  | +            dt_object = datetime.fromtimestamp(create_time)
 | 
											
												
													
														|  | 
 |  | +            publish_dt = dt_object.strftime('%Y-%m-%d')
 | 
											
												
													
														|  | 
 |  | +            one_day = timedelta(days=1)
 | 
											
												
													
														|  | 
 |  | +            two_day = timedelta(days=2)
 | 
											
												
													
														|  | 
 |  | +            next_day = dt_object + one_day
 | 
											
												
													
														|  | 
 |  | +            next_next_day = dt_object + two_day
 | 
											
												
													
														|  | 
 |  | +            recall_dt_list = [dt_object, next_day, next_next_day]
 | 
											
												
													
														|  | 
 |  | +            recall_dt_str_list = [i.strftime('%Y-%m-%d') for i in recall_dt_list]
 | 
											
												
													
														|  | 
 |  | +            for dt_str in recall_dt_str_list:
 | 
											
												
													
														|  | 
 |  | +                for index, item in enumerate(mini_info, 1):
 | 
											
												
													
														|  | 
 |  | +                    image_url = item['image_url']
 | 
											
												
													
														|  | 
 |  | +                    nick_name = item['nike_name']
 | 
											
												
													
														|  | 
 |  | +                    root_source_id = item['path'].split("rootSourceId%3D")[-1]
 | 
											
												
													
														|  | 
 |  | +                    video_id = item['path'].split("videos%3Fid%3D")[1].split("%26su%3D")[0]
 | 
											
												
													
														|  | 
 |  | +                    kimi_title = item['title']
 | 
											
												
													
														|  | 
 |  | +                    insert_sql = f"""
 | 
											
												
													
														|  | 
 |  | +                            INSERT INTO long_articles_detail_info
 | 
											
												
													
														|  | 
 |  | +                            (wx_sn, mini_title, mini_name, cover_url, video_index, root_source_id, video_id, publish_dt, recall_dt)
 | 
											
												
													
														|  | 
 |  | +                            values
 | 
											
												
													
														|  | 
 |  | +                            (%s, %s, %s, %s, %s, %s, %s, %s, %s);
 | 
											
												
													
														|  | 
 |  | +                        """
 | 
											
												
													
														|  | 
 |  | +                    cls.pqMysql.update(
 | 
											
												
													
														|  | 
 |  | +                        sql=insert_sql,
 | 
											
												
													
														|  | 
 |  | +                        params=(
 | 
											
												
													
														|  | 
 |  | +                            wx_sn,
 | 
											
												
													
														|  | 
 |  | +                            kimi_title,
 | 
											
												
													
														|  | 
 |  | +                            nick_name,
 | 
											
												
													
														|  | 
 |  | +                            image_url,
 | 
											
												
													
														|  | 
 |  | +                            index,
 | 
											
												
													
														|  | 
 |  | +                            root_source_id,
 | 
											
												
													
														|  | 
 |  | +                            video_id,
 | 
											
												
													
														|  | 
 |  | +                            publish_dt,
 | 
											
												
													
														|  | 
 |  | +                            dt_str
 | 
											
												
													
														|  | 
 |  | +                        )
 | 
											
												
													
														|  | 
 |  | +                    )
 | 
											
												
													
														|  | 
 |  | +        except Exception as e:
 | 
											
												
													
														|  | 
 |  | +            print(e)
 | 
											
												
													
														|  | 
 |  | +            pass
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  |      @classmethod
 |  |      @classmethod
 | 
											
												
													
														|  |      def getRootSourceIds(cls, data_info):
 |  |      def getRootSourceIds(cls, data_info):
 | 
											
												
													
														|  |          """
 |  |          """
 | 
											
										
											
												
													
														|  | @@ -39,9 +96,7 @@ class DailyDataManager(object):
 | 
											
												
													
														|  |          :return:
 |  |          :return:
 | 
											
												
													
														|  |          """
 |  |          """
 | 
											
												
													
														|  |          url = data_info[0]
 |  |          url = data_info[0]
 | 
											
												
													
														|  | -        article_detail = cls.wx_spider.get_article_text(url)
 |  | 
 | 
											
												
													
														|  | -        print(url)
 |  | 
 | 
											
												
													
														|  | -        print(article_detail)
 |  | 
 | 
											
												
													
														|  | 
 |  | +        article_detail = cls.wxSpider.get_article_text(url)
 | 
											
												
													
														|  |          mini_info = article_detail['data']['data']['mini_program']
 |  |          mini_info = article_detail['data']['data']['mini_program']
 | 
											
												
													
														|  |          return data_info[1].decode(), mini_info, data_info[2]
 |  |          return data_info[1].decode(), mini_info, data_info[2]
 | 
											
												
													
														|  |  
 |  |  
 | 
											
										
											
												
													
														|  | @@ -49,7 +104,7 @@ class DailyDataManager(object):
 | 
											
												
													
														|  |      def getMinigramInfo(cls, rootSourceId):
 |  |      def getMinigramInfo(cls, rootSourceId):
 | 
											
												
													
														|  |          """
 |  |          """
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | -        :param rootIdTuple:
 |  | 
 | 
											
												
													
														|  | 
 |  | +        :param rootSourceId:
 | 
											
												
													
														|  |          :return:
 |  |          :return:
 | 
											
												
													
														|  |          """
 |  |          """
 | 
											
												
													
														|  |          sql = f"""
 |  |          sql = f"""
 | 
											
										
											
												
													
														|  | @@ -57,7 +112,7 @@ class DailyDataManager(object):
 | 
											
												
													
														|  |          from changwen_data_base_v2
 |  |          from changwen_data_base_v2
 | 
											
												
													
														|  |          where rootsourceid = '{rootSourceId}';
 |  |          where rootsourceid = '{rootSourceId}';
 | 
											
												
													
														|  |          """
 |  |          """
 | 
											
												
													
														|  | -        result_list = cls.ad_mysql.select(sql)
 |  | 
 | 
											
												
													
														|  | 
 |  | +        result_list = cls.laMysql.select(sql)
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |          def summarize(values):
 |  |          def summarize(values):
 | 
											
												
													
														|  |              """
 |  |              """
 | 
											
										
											
												
													
														|  | @@ -117,32 +172,21 @@ class DailyDataManager(object):
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |          return summarize(result_list)
 |  |          return summarize(result_list)
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | -    @classmethod
 |  | 
 | 
											
												
													
														|  | -    def getArticleInfo(cls, trace_id):
 |  | 
 | 
											
												
													
														|  | -        """
 |  | 
 | 
											
												
													
														|  | -        通过 trace_id来获取文章信息
 |  | 
 | 
											
												
													
														|  | -        :param trace_id:
 |  | 
 | 
											
												
													
														|  | -        :return:
 |  | 
 | 
											
												
													
														|  | -        """
 |  | 
 | 
											
												
													
														|  | -        sql = f"""
 |  | 
 | 
											
												
													
														|  | -        SELECT account_name, article_title
 |  | 
 | 
											
												
													
														|  | -        FROM long_articles_video
 |  | 
 | 
											
												
													
														|  | -        WHERE trace_id = '{trace_id}';
 |  | 
 | 
											
												
													
														|  | -        """
 |  | 
 | 
											
												
													
														|  | -        info = cls.pq_mysql.select(sql)
 |  | 
 | 
											
												
													
														|  | -        return info[0]
 |  | 
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  |      @classmethod
 |  |      @classmethod
 | 
											
												
													
														|  |      def updateDetail(cls):
 |  |      def updateDetail(cls):
 | 
											
												
													
														|  |          """
 |  |          """
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  |          :return:
 |  |          :return:
 | 
											
												
													
														|  |          """
 |  |          """
 | 
											
												
													
														|  | 
 |  | +        today = datetime.today()
 | 
											
												
													
														|  | 
 |  | +        # 获取昨天的日期
 | 
											
												
													
														|  | 
 |  | +        yesterday = today - timedelta(days=3)
 | 
											
												
													
														|  | 
 |  | +        yesterday_str = yesterday.__str__().split(" ")[0]
 | 
											
												
													
														|  |          sql = f"""
 |  |          sql = f"""
 | 
											
												
													
														|  |          select distinct root_source_id
 |  |          select distinct root_source_id
 | 
											
												
													
														|  | -        from long_articles_detail_info;
 |  | 
 | 
											
												
													
														|  | 
 |  | +        from long_articles_detail_info
 | 
											
												
													
														|  | 
 |  | +        where publish_dt >= '{yesterday_str}';
 | 
											
												
													
														|  |          """
 |  |          """
 | 
											
												
													
														|  | -        source_id_list = cls.pq_mysql.select(sql)
 |  | 
 | 
											
												
													
														|  | 
 |  | +        source_id_list = cls.pqMysql.select(sql)
 | 
											
												
													
														|  |          for item in tqdm(source_id_list):
 |  |          for item in tqdm(source_id_list):
 | 
											
												
													
														|  |              s_id = item[0]
 |  |              s_id = item[0]
 | 
											
												
													
														|  |              try:
 |  |              try:
 | 
											
										
											
												
													
														|  | @@ -153,14 +197,13 @@ class DailyDataManager(object):
 | 
											
												
													
														|  |                      fission_0 = result[key][1]
 |  |                      fission_0 = result[key][1]
 | 
											
												
													
														|  |                      fission_1 = result[key][2]
 |  |                      fission_1 = result[key][2]
 | 
											
												
													
														|  |                      fission_2 = result[key][3]
 |  |                      fission_2 = result[key][3]
 | 
											
												
													
														|  | -                    print(key, first_level, fission_0, fission_1, fission_2)
 |  | 
 | 
											
												
													
														|  |                      update_sql = f"""
 |  |                      update_sql = f"""
 | 
											
												
													
														|  |                      UPDATE long_articles_detail_info
 |  |                      UPDATE long_articles_detail_info
 | 
											
												
													
														|  |                      set first_level = %s, fission_0 = %s, fission_1 = %s, fission_2 = %s
 |  |                      set first_level = %s, fission_0 = %s, fission_1 = %s, fission_2 = %s
 | 
											
												
													
														|  |                      where root_source_id = %s and recall_dt = %s;
 |  |                      where root_source_id = %s and recall_dt = %s;
 | 
											
												
													
														|  |                      """
 |  |                      """
 | 
											
												
													
														|  |                      try:
 |  |                      try:
 | 
											
												
													
														|  | -                        cls.pq_mysql.update(
 |  | 
 | 
											
												
													
														|  | 
 |  | +                        cls.pqMysql.update(
 | 
											
												
													
														|  |                              sql=update_sql,
 |  |                              sql=update_sql,
 | 
											
												
													
														|  |                              params=(
 |  |                              params=(
 | 
											
												
													
														|  |                                  first_level, fission_0, fission_1, fission_2, s_id, recall_dt
 |  |                                  first_level, fission_0, fission_1, fission_2, s_id, recall_dt
 | 
											
										
											
												
													
														|  | @@ -172,8 +215,34 @@ class DailyDataManager(object):
 | 
											
												
													
														|  |                  print(e)
 |  |                  print(e)
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | 
 |  | +def updateArticlesJob():
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +    更新文章数据
 | 
											
												
													
														|  | 
 |  | +    :return:
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +    DDM = DailyDataManager()
 | 
											
												
													
														|  | 
 |  | +    article_list = DDM.getPublishedArticles()
 | 
											
												
													
														|  | 
 |  | +    for article in tqdm(article_list):
 | 
											
												
													
														|  | 
 |  | +        DDM.updateInfo(article)
 | 
											
												
													
														|  | 
 |  | +    print("文章更新完成---{}".format(datetime.today().__str__()))
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +def updateMinigramInfoJob():
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +    更新前三天小程序数据
 | 
											
												
													
														|  | 
 |  | +    :return:
 | 
											
												
													
														|  | 
 |  | +    """
 | 
											
												
													
														|  | 
 |  | +    DDM = DailyDataManager()
 | 
											
												
													
														|  | 
 |  | +    DDM.updateDetail()
 | 
											
												
													
														|  | 
 |  | +    print("小程序更新完成---{}".format(datetime.today().__str__()))
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  |  if __name__ == '__main__':
 |  |  if __name__ == '__main__':
 | 
											
												
													
														|  | -    DM = DailyDataManager()
 |  | 
 | 
											
												
													
														|  | -    # result = DM.getMinigramInfo("longArticles_d409f27d9d64501d6811b47a3779d2d7")
 |  | 
 | 
											
												
													
														|  | -    # print(result)
 |  | 
 | 
											
												
													
														|  | -    # DM.updateDetail()
 |  | 
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    schedule.every().day.at("01:00").do(Functions().job_with_thread, updateArticlesJob)
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    schedule.every().day.at("04:30").do(Functions().job_with_thread, updateMinigramInfoJob)
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    while True:
 | 
											
												
													
														|  | 
 |  | +        schedule.run_pending()
 | 
											
												
													
														|  | 
 |  | +        time.sleep(1)
 |