瀏覽代碼

上线账号抓取v2

luojunhui 6 月之前
父節點
當前提交
bb036de401
共有 1 個文件被更改,包括 3 次插入2 次删除
  1. 3 2
      coldStartTasks/crawler/weixin_account_crawler.py

+ 3 - 2
coldStartTasks/crawler/weixin_account_crawler.py

@@ -213,6 +213,9 @@ class WeixinAccountCrawler(object):
         for crawler_article_obj in tqdm(crawler_article_list, desc="crawler article list"):
         for crawler_article_obj in tqdm(crawler_article_list, desc="crawler article list"):
             try:
             try:
                 article_id = crawler_article_obj['id']
                 article_id = crawler_article_obj['id']
+                # 记录处理过的id
+                article_id_list.append(int(article_id))
+
                 article_url = crawler_article_obj['article_url']
                 article_url = crawler_article_obj['article_url']
                 # 判断文章是否原创
                 # 判断文章是否原创
                 if self.is_original(article_url):
                 if self.is_original(article_url):
@@ -231,8 +234,6 @@ class WeixinAccountCrawler(object):
                 else:
                 else:
                     continue
                     continue
 
 
-                # 记录处理过的id
-                article_id_list.append(int(article_id))
             except Exception as e:
             except Exception as e:
                 print(e)
                 print(e)
                 print(traceback.format_exc())
                 print(traceback.format_exc())