Jelajahi Sumber

Update updateMinigramInfoDaily: support reload manually

StrayWarrior 6 bulan lalu
induk
melakukan
60b4c99e20
1 mengubah file dengan 53 tambahan dan 31 penghapusan
  1. 53 31
      updateMinigramInfoDaily.py

+ 53 - 31
updateMinigramInfoDaily.py

@@ -3,13 +3,19 @@
 @description Update Minigram Info Daily
 """
 import time
+import sys
 
 from tqdm import tqdm
 from datetime import datetime, timedelta
 import schedule
+from argparse import ArgumentParser
 
 from applications import longArticlesMySQL, PQMySQL, WeixinSpider, Functions, log, bot
 
+def get_yesterday():
+    yesterday = datetime.today() - timedelta(1)
+    return yesterday
+
 
 class DailyDataManager(object):
     """
@@ -21,20 +27,18 @@ class DailyDataManager(object):
     functions = Functions()
 
     @classmethod
-    def getPublishedArticles(cls):
+    def getPublishedArticles(cls, biz_date):
         """
         获取已经发布的文章的信息, createTime 选择为前一天的 0 点并且转化为时间戳
         :return:
         """
-        today = datetime.today()
-        # 获取昨天的日期
-        yesterday = today - timedelta(days=1)
-        yesterday_midnight = datetime(year=yesterday.year, month=yesterday.month, day=yesterday.day)
-        yesterday_timestamp = yesterday_midnight.timestamp()
+        biz_date_midnight = datetime(year=biz_date.year, month=biz_date.month, day=biz_date.day)
+        biz_date_ts = biz_date_midnight.timestamp()
+        biz_date_end_ts = biz_date_ts + 24 * 60 * 60 - 1
         sql2 = f"""
         select ContentUrl, wx_sn, createTime
         from official_articles_v2
-        where createTime >= {yesterday_timestamp};
+        where createTime between {biz_date_ts} and {biz_date_end_ts};
 --         and accountName in (
 --                         select distinct account_name from account_avg_info_v2
 --                         );
@@ -146,7 +150,7 @@ class DailyDataManager(object):
         :return:
         """
         sql = f"""
-        select type, machinecode, create_time, first_level_dt 
+        select type, machinecode, create_time, first_level_dt
         from changwen_data_base_v2
         where rootsourceid = '{rootSourceId}';
         """
@@ -227,18 +231,18 @@ class DailyDataManager(object):
             return None
 
     @classmethod
-    def updateDetail(cls):
+    def updateDetail(cls, biz_date):
         """
         :return:
         """
         today = datetime.today()
         # 获取三天前的日期
-        yesterday = today - timedelta(days=3)
-        yesterday_str = yesterday.__str__().split(" ")[0]
+        stats_date = biz_date - timedelta(days=3)
+        stats_date_str = stats_date.__str__().split(" ")[0]
         sql = f"""
             select distinct root_source_id
             from long_articles_detail_info
-            where publish_dt >= '{yesterday_str}';
+            where publish_dt >= '{stats_date_str}';
         """
         source_id_list = cls.pqMysql.select(sql)
         log(
@@ -285,55 +289,73 @@ class DailyDataManager(object):
                 )
 
 
-def updateArticlesJob():
+def updateArticlesJob(biz_date=None):
     """
     更新文章数据
     :return:
     """
+    if not biz_date:
+        biz_date = get_yesterday()
     DDM = DailyDataManager()
-    article_list = DDM.getPublishedArticles()
+    article_list = DDM.getPublishedArticles(biz_date)
     for article in tqdm(article_list):
         DDM.updateInfo(article)
     log(
         task="updateMinigramInfoDaily",
         function="updateArticlesJob",
-        message="文章更新完成---{}".format(datetime.today().__str__())
+        message="文章更新完成---{}".format(biz_date.__str__())
     )
 
 
-def updateMinigramInfoJob():
+def updateMinigramInfoJob(biz_date=None):
     """
     更新前三天小程序数据
     :return:
     """
+    if not biz_date:
+        biz_date = get_yesterday()
     DDM = DailyDataManager()
     try:
-        DDM.updateDetail()
+        DDM.updateDetail(biz_date)
         log(
             task="updateMinigramInfoDaily",
             function="updateArticlesJob",
-            message="小程序更新完成---{}".format(datetime.today().__str__())
+            message="小程序更新完成---{}".format(biz_date.__str__())
         )
     except Exception as e:
         log(
             task="updateMinigramInfoDaily",
             function="updateArticlesJob",
             status="fail",
-            message="小程序更新失败---{}, 报错信息是: {}".format(datetime.today().__str__(), e)
+            message="小程序更新失败---{}, 报错信息是: {}".format(biz_date.__str__(), e)
         )
 
+def main():
+    parser = ArgumentParser()
+    parser.add_argument("--run-date",
+                        help="Run only once for date in format of %Y%m%d. \
+                        If no specified, run as daily jobs.")
+    args = parser.parse_args()
 
-if __name__ == '__main__':
-    # updateMinigramInfoJob()
-    schedule.every().day.at("01:30").do(Functions().job_with_thread, updateArticlesJob)
+    if args.run_date:
+        biz_date = datetime.strptime(args.run_date, "%Y%m%d")
+        print("Run in manual mode. Date: {}".format(args.run_date))
+        updateArticlesJob(biz_date)
+        updateMinigramInfoJob(biz_date)
+        return
+    else:
+        print("Run in daily mode.")
+        schedule.every().day.at("01:30").do(Functions().job_with_thread, updateArticlesJob)
+        schedule.every().day.at("03:30").do(Functions().job_with_thread, updateMinigramInfoJob)
 
-    schedule.every().day.at("03:30").do(Functions().job_with_thread, updateMinigramInfoJob)
+        while True:
+            schedule.run_pending()
+            time.sleep(1)
+            # log(
+            #     task="updateMinigramInfoDaily",
+            #     function="main",
+            #     message="更新文章小程序信息任务正常执行"
+            # )
 
-    while True:
-        schedule.run_pending()
-        time.sleep(1)
-        # log(
-        #     task="updateMinigramInfoDaily",
-        #     function="main",
-        #     message="更新文章小程序信息任务正常执行"
-        # )
+if __name__ == '__main__':
+    main()