|
@@ -1,4 +1,5 @@
|
|
|
import asyncio
|
|
import asyncio
|
|
|
|
|
+import json
|
|
|
|
|
|
|
|
from applications.crawler.wechat import (
|
|
from applications.crawler.wechat import (
|
|
|
get_gzh_fans,
|
|
get_gzh_fans,
|
|
@@ -198,13 +199,81 @@ class CrawlerGzhFansBase(CrawlerGzhFansConst):
|
|
|
query=query, params=(access_token, self.AVAILABLE_STATUS, gh_id)
|
|
query=query, params=(access_token, self.AVAILABLE_STATUS, gh_id)
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
+ async def get_max_cursor_id(self, gh_id):
|
|
|
|
|
+ query = """
|
|
|
|
|
+ SELECT user_openid, user_create_time
|
|
|
|
|
+ FROM gzh_fans_info WHERE gh_id = %s
|
|
|
|
|
+ ORDER BY user_create_time DESC LIMIT 1;
|
|
|
|
|
+ """
|
|
|
|
|
+ return await self.pool.async_fetch(query=query, params=(gh_id,))
|
|
|
|
|
+
|
|
|
|
|
|
|
|
class CrawlerGzhFans(CrawlerGzhFansBase):
|
|
class CrawlerGzhFans(CrawlerGzhFansBase):
|
|
|
def __init__(self, pool, log_client):
|
|
def __init__(self, pool, log_client):
|
|
|
super().__init__(pool, log_client)
|
|
super().__init__(pool, log_client)
|
|
|
|
|
|
|
|
|
|
+ # 抓取账号新增粉丝
|
|
|
|
|
+ async def crawl_new_fans_for_each_account(self, account_info: dict):
|
|
|
|
|
+ cookie_obj = await self.get_cookie_token_from_database(account_info["gh_id"])
|
|
|
|
|
+ if not cookie_obj:
|
|
|
|
|
+ return
|
|
|
|
|
+
|
|
|
|
|
+ cursor_openid = ""
|
|
|
|
|
+ cursor_timestamp = ""
|
|
|
|
|
+
|
|
|
|
|
+ newest_fans = await self.get_max_cursor_id(account_info["gh_id"])
|
|
|
|
|
+ newest_timestamp = newest_fans[0]["user_create_time"]
|
|
|
|
|
+
|
|
|
|
|
+ while True:
|
|
|
|
|
+ response = await get_gzh_fans(
|
|
|
|
|
+ token=cookie_obj[0]["token"],
|
|
|
|
|
+ cookie=cookie_obj[0]["cookie"],
|
|
|
|
|
+ cursor_id=cursor_openid,
|
|
|
|
|
+ cursor_timestamp=cursor_timestamp,
|
|
|
|
|
+ )
|
|
|
|
|
+ print(json.dumps(response, ensure_ascii=False, indent=4))
|
|
|
|
|
+ base_resp = response.get("base_resp", {})
|
|
|
|
|
+ code = base_resp.get("ret")
|
|
|
|
|
+ error_msg = base_resp.get("err_msg")
|
|
|
|
|
+
|
|
|
|
|
+ match code:
|
|
|
|
|
+ case 0:
|
|
|
|
|
+ user_list = response.get("user_list", {}).get("user_info_list")
|
|
|
|
|
+ next_cursor_id = user_list[-1].get("user_openid")
|
|
|
|
|
+ next_cursor_timestamp = user_list[-1].get("user_create_time")
|
|
|
|
|
+ await self.insert_gzh_fans_batch(account_info, user_list)
|
|
|
|
|
+ if next_cursor_timestamp <= newest_timestamp:
|
|
|
|
|
+ print("新粉丝更新完成")
|
|
|
|
|
+ break
|
|
|
|
|
+
|
|
|
|
|
+ else:
|
|
|
|
|
+ cursor_openid = next_cursor_id
|
|
|
|
|
+ cursor_timestamp = next_cursor_timestamp
|
|
|
|
|
+
|
|
|
|
|
+ case "00040":
|
|
|
|
|
+ print(f"token 非法: {error_msg}")
|
|
|
|
|
+ await self.set_cookie_token_as_invalid(account_info["gh_id"])
|
|
|
|
|
+ await feishu_robot.bot(
|
|
|
|
|
+ title=f"{account_info['account_name']}的 token && cookie 失效,请及时更新",
|
|
|
|
|
+ detail=account_info,
|
|
|
|
|
+ env="cookie_monitor",
|
|
|
|
|
+ mention=False,
|
|
|
|
|
+ )
|
|
|
|
|
+ break
|
|
|
|
|
+
|
|
|
|
|
+ case _:
|
|
|
|
|
+ print("token 异常, 请及时刷新")
|
|
|
|
|
+ await self.set_cookie_token_as_invalid(account_info["gh_id"])
|
|
|
|
|
+ await feishu_robot.bot(
|
|
|
|
|
+ title=f"{account_info['account_name']}的 token && cookie 失效,请及时更新",
|
|
|
|
|
+ detail=account_info,
|
|
|
|
|
+ env="cookie_monitor",
|
|
|
|
|
+ mention=False,
|
|
|
|
|
+ )
|
|
|
|
|
+ break
|
|
|
|
|
+
|
|
|
# 抓取单个账号存量的粉丝
|
|
# 抓取单个账号存量的粉丝
|
|
|
- async def crawl_history_fans_for_each_account(self, account_info):
|
|
|
|
|
|
|
+ async def crawl_history_fans_for_each_account(self, account_info: dict):
|
|
|
cookie_obj = await self.get_cookie_token_from_database(account_info["gh_id"])
|
|
cookie_obj = await self.get_cookie_token_from_database(account_info["gh_id"])
|
|
|
if not cookie_obj:
|
|
if not cookie_obj:
|
|
|
return
|
|
return
|
|
@@ -340,9 +409,12 @@ class CrawlerGzhFans(CrawlerGzhFansBase):
|
|
|
|
|
|
|
|
case "get_new_fans":
|
|
case "get_new_fans":
|
|
|
for account in account_list:
|
|
for account in account_list:
|
|
|
- print(account)
|
|
|
|
|
|
|
+ if account["gh_id"] != 'gh_e7704c34d264':
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ await self.crawl_new_fans_for_each_account(account)
|
|
|
# return await run_tasks_with_asyncio_task_group()
|
|
# return await run_tasks_with_asyncio_task_group()
|
|
|
- return 0
|
|
|
|
|
|
|
+ return {}
|
|
|
|
|
|
|
|
case _:
|
|
case _:
|
|
|
return {"err_msg": "invalid task_name"}
|
|
return {"err_msg": "invalid task_name"}
|