浏览代码

视频抓取v1提交

luojunhui 7 月之前
父节点
当前提交
7064e40549
共有 2 个文件被更改,包括 6 次插入3 次删除
  1. 3 0
      applications/const.py
  2. 3 3
      coldStartTasks/crawler/weixin_video_crawler.py

+ 3 - 0
applications/const.py

@@ -101,4 +101,7 @@ class WeixinVideoCrawlerConst:
     # 获取种子标题的统计周期
     STAT_PERIOD = 7 * 24 * 60 * 60
 
+    # 接口请求成功code
+    REQUEST_SUCCESS = 0
+
 

+ 3 - 3
coldStartTasks/crawler/weixin_video_crawler.py

@@ -76,7 +76,7 @@ class WeixinVideoCrawler(object):
             latest_crawler_timestamp = const.DEFAULT_TIMESTAMP
         # 调用爬虫接口
         response = spider.update_msg_list(gh_id, index=cursor)
-        if response['code'] == 0:
+        if response['code'] == const.REQUEST_SUCCESS:
             # 一般返回最近10天的msg_list
             msg_list = response.get('data', {}).get("data", [])
             if msg_list:
@@ -181,11 +181,11 @@ class WeixinVideoCrawler(object):
         抓取任务
         :return:
         """
-        account_list = self.get_crawler_accounts()
+        account_list = self.get_crawler_accounts()[2:3]
         for account_obj in tqdm(account_list, desc="crawler_video_for_each_account"):
             self.crawler_article_video_list(account_obj)
             self.update_account_latest_crawler_timestamp(gh_id=account_obj["gh_id"])
-            time.sleep(1)
+            time.sleep(const.SLEEP_SECONDS)
 
     def mention(self, start_timestamp):
         """