Ver Fonte

新增抓公众号粉丝

luojunhui há 2 meses atrás
pai
commit
4f253151c5
1 ficheiros alterados com 12 adições e 4 exclusões
  1. 12 4
      applications/tasks/crawler_tasks/crawler_gzh_fans.py

+ 12 - 4
applications/tasks/crawler_tasks/crawler_gzh_fans.py

@@ -64,10 +64,17 @@ class CrawlerGzhFansBase(CrawlerGzhFansConst):
             query=query, params=(self.INVALID_STATUS, gh_id)
         )
 
+    # 修改抓取账号状态
+    async def update_account_crawl_history_status(self, gh_id, status):
+        query = """
+            UPDATE gzh_account_info SET crawl_history_status = %s WHERE gh_id = %s;
+        """
+        return await self.pool.async_save(query=query, params=(status, gh_id))
+
     # 获取账号列表
     async def get_account_list_from_database(self):
         query = """
-            SELECT gh_id, account_name, app_id, app_secret, cursor_openid, cursor_timestamp 
+            SELECT gh_id, account_name, app_id, app_secret, cursor_openid, cursor_timestamp, crawl_history_status
             FROM gzh_account_info WHERE status = %s and gh_id != 'gh_77f36c109fb1'; 
         """
         return await self.pool.async_fetch(query=query, params=(self.AVAILABLE_STATUS,))
@@ -232,7 +239,7 @@ class CrawlerGzhFans(CrawlerGzhFansBase):
                         env="cookie_monitor_bot",
                         mention=False,
                     )
-                    await self.set_cookie_token_as_invalid(account_info["gh_id"])
+                    await self.update_account_crawl_history_status(account_info["gh_id"], self.INVALID_STATUS)
 
                 next_cursor_id = user_list[-1].get("user_openid")
                 next_cursor_timestamp = user_list[-1].get("user_create_time")
@@ -307,9 +314,10 @@ class CrawlerGzhFans(CrawlerGzhFansBase):
         account_list = await self.get_account_list_from_database()
 
         match task_name:
-            case "get_fans":
+            case "get_history_fans":
+                crawl_history_accounts = [i for i in account_list if i['crawl_history_status'] == self.AVAILABLE_STATUS]
                 return await run_tasks_with_asyncio_task_group(
-                    task_list=account_list,
+                    task_list=crawl_history_accounts,
                     handler=self.crawl_history_fans_for_each_account,
                     max_concurrency=self.MAX_CONCURRENCY,
                     fail_fast=False,