Browse Source

上线账号抓取v2

luojunhui 6 months ago
parent
commit
bb036de401
1 changed files with 3 additions and 2 deletions
  1. 3 2
      coldStartTasks/crawler/weixin_account_crawler.py

+ 3 - 2
coldStartTasks/crawler/weixin_account_crawler.py

@@ -213,6 +213,9 @@ class WeixinAccountCrawler(object):
         for crawler_article_obj in tqdm(crawler_article_list, desc="crawler article list"):
             try:
                 article_id = crawler_article_obj['id']
+                # 记录处理过的id
+                article_id_list.append(int(article_id))
+
                 article_url = crawler_article_obj['article_url']
                 # 判断文章是否原创
                 if self.is_original(article_url):
@@ -231,8 +234,6 @@ class WeixinAccountCrawler(object):
                 else:
                     continue
 
-                # 记录处理过的id
-                article_id_list.append(int(article_id))
             except Exception as e:
                 print(e)
                 print(traceback.format_exc())