luojunhui 4 miesięcy temu
rodzic
commit
5a68d8d00d

+ 2 - 6
coldStartTasks/crawler/weixin_account_crawler.py

@@ -6,7 +6,6 @@ import traceback
 from typing import List, Set, Dict, Tuple
 
 from tqdm import tqdm
-from datetime import datetime
 from pymysql.cursors import DictCursor
 
 from applications import WeixinSpider, longArticlesMySQL, log, bot, aiditApi
@@ -67,7 +66,7 @@ class WeixinAccountCrawler(object):
         """
         :return:
         """
-        publish_timestamp_threshold = int(datetime.strptime(run_date, "%Y-%m-%d").timestamp()) - const.STAT_PERIOD
+        publish_timestamp_threshold = int(run_date.timestamp()) - const.STAT_PERIOD
         sql = f"""
             SELECT distinct title
             FROM datastat_sort_strategy
@@ -188,14 +187,11 @@ class WeixinAccountCrawler(object):
                     }
                 )
 
-    def run(self, run_date=None) -> None:
+    def run(self, run_date) -> None:
         """
         入口函数
         :return:
         """
-        if not run_date:
-            run_date = time.strftime("%Y-%m-%d", time.localtime())
-
         # get seed titles
         title_list = self.get_seed_titles(run_date)
         # get inner accounts set

+ 15 - 1
run_video_account_crawler.py

@@ -4,6 +4,9 @@
 """
 import traceback
 
+from datetime import datetime
+from argparse import ArgumentParser
+
 from applications import bot
 from coldStartTasks.crawler import WeixinAccountCrawler, WeixinVideoCrawler
 
@@ -16,9 +19,20 @@ def main():
     主函数
     :return:
     """
+    parser = ArgumentParser()
+    parser.add_argument("--run-date",
+                        help="Run only once for date in format of %Y%m%d. \
+                            If no specified, run as daily jobs.")
+    args = parser.parse_args()
+
+    if args.run_date:
+        run_date = datetime.strptime(args.run_date, "%Y-%m-%d")
+        print("Run in manual mode. Date: {}".format(args.run_date))
+    else:
+        run_date = datetime.today()
     # 先执行账号抓取
     try:
-        account_crawler.run()
+        account_crawler.run(run_date)
     except Exception as e:
         error_msg = traceback.format_exc()
         bot(