|
@@ -3,7 +3,7 @@ import traceback
|
|
|
|
|
|
|
|
import numpy as np
|
|
import numpy as np
|
|
|
from collections import defaultdict
|
|
from collections import defaultdict
|
|
|
-from typing import Dict, List
|
|
|
|
|
|
|
+from typing import Dict, List, Set
|
|
|
from pandas import DataFrame
|
|
from pandas import DataFrame
|
|
|
from scipy import stats
|
|
from scipy import stats
|
|
|
from tqdm.asyncio import tqdm
|
|
from tqdm.asyncio import tqdm
|
|
@@ -43,23 +43,10 @@ class AccountPositionInfoConst:
|
|
|
# 状态
|
|
# 状态
|
|
|
USING_STATUS = 1
|
|
USING_STATUS = 1
|
|
|
NOT_USING_STATUS = 0
|
|
NOT_USING_STATUS = 0
|
|
|
|
|
+ PUBLISH_SUCCESS_STATUS = 2
|
|
|
|
|
|
|
|
- # 服务号
|
|
|
|
|
- GROUP_ACCOUNT_SET = {
|
|
|
|
|
- "gh_9cf3b7ff486b",
|
|
|
|
|
- "gh_ecb21c0453af",
|
|
|
|
|
- "gh_45beb952dc74",
|
|
|
|
|
- # "gh_84e744b16b3a",
|
|
|
|
|
- "gh_b3ffc1ca3a04",
|
|
|
|
|
- "gh_b8baac4296cb",
|
|
|
|
|
- "gh_efaf7da157f5",
|
|
|
|
|
- # "gh_5855bed97938",
|
|
|
|
|
- "gh_b32125c73861",
|
|
|
|
|
- "gh_761976bb98a6",
|
|
|
|
|
- "gh_5e543853d8f0",
|
|
|
|
|
- "gh_05a0318105be"
|
|
|
|
|
- # "gh_61a72b720de3",
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ # 不再使用的服务号
|
|
|
|
|
+ NOT_USED_SERVER_ACCOUNT = {"gh_84e744b16b3a", "gh_5855bed97938", "gh_61a72b720de3"}
|
|
|
|
|
|
|
|
# 违禁账号
|
|
# 违禁账号
|
|
|
FORBIDDEN_GH_IDS = {
|
|
FORBIDDEN_GH_IDS = {
|
|
@@ -138,6 +125,21 @@ class AccountPositionReadRateAvg(AccountPositionInfoConst):
|
|
|
account_list = await self.pool.async_fetch(query, db_name="aigc")
|
|
account_list = await self.pool.async_fetch(query, db_name="aigc")
|
|
|
return [i for i in account_list if "自动回复" not in str(i["account_remark"])]
|
|
return [i for i in account_list if "自动回复" not in str(i["account_remark"])]
|
|
|
|
|
|
|
|
|
|
+ # 获取服务号分组发文信息
|
|
|
|
|
+ async def get_server_group_publish_accounts(self) -> Set[str]:
|
|
|
|
|
+ query = """
|
|
|
|
|
+ select gzh_id from article_gzh_developer;
|
|
|
|
|
+ """
|
|
|
|
|
+ fetch_response = await self.pool.async_fetch(
|
|
|
|
|
+ query=query, db_name="piaoquan_crawler"
|
|
|
|
|
+ )
|
|
|
|
|
+ gh_id_list = [
|
|
|
|
|
+ i["gzh_id"]
|
|
|
|
|
+ for i in fetch_response
|
|
|
|
|
+ if i["gzh_id"] not in self.NOT_USED_SERVER_ACCOUNT
|
|
|
|
|
+ ]
|
|
|
|
|
+ return set(gh_id_list)
|
|
|
|
|
+
|
|
|
# 获取统计周期内,每个账号的粉丝量
|
|
# 获取统计周期内,每个账号的粉丝量
|
|
|
async def get_fans_for_each_date(self, start_date: str):
|
|
async def get_fans_for_each_date(self, start_date: str):
|
|
|
# 获取订阅号粉丝量
|
|
# 获取订阅号粉丝量
|
|
@@ -153,16 +155,19 @@ class AccountPositionReadRateAvg(AccountPositionInfoConst):
|
|
|
"""
|
|
"""
|
|
|
task1 = self.pool.async_fetch(query=query, db_name="aigc", params=(start_date,))
|
|
task1 = self.pool.async_fetch(query=query, db_name="aigc", params=(start_date,))
|
|
|
|
|
|
|
|
- if self.GROUP_ACCOUNT_SET:
|
|
|
|
|
- gh_ids = tuple(self.GROUP_ACCOUNT_SET)
|
|
|
|
|
- placeholders = ",".join(["%s"] * len(gh_ids))
|
|
|
|
|
|
|
+ group_account_set = await self.get_server_group_publish_accounts()
|
|
|
|
|
+ if group_account_set:
|
|
|
query_group = f"""
|
|
query_group = f"""
|
|
|
- SELECT gh_id, publish_date AS dt, CAST(SUM(sent_count) / 8 AS SIGNED) AS fans
|
|
|
|
|
- FROM long_articles_group_send_result
|
|
|
|
|
- WHERE publish_date >= %s AND gh_id IN ({placeholders})
|
|
|
|
|
- GROUP BY publish_date, gh_id;
|
|
|
|
|
|
|
+ select publish_date as dt, gh_id, account_name, CAST(SUM(total_sent_fans) AS SIGNED) AS fans
|
|
|
|
|
+ from (
|
|
|
|
|
+ select publish_date, account_name, gh_id, push_id, avg(sent_count) as 'total_sent_fans'
|
|
|
|
|
+ from long_articles_group_send_result
|
|
|
|
|
+ where publish_date >= %s and status = %s
|
|
|
|
|
+ group by publish_date, account_name, push_id
|
|
|
|
|
+ ) as lagsr
|
|
|
|
|
+ group by lagsr.publish_date, gh_id;
|
|
|
"""
|
|
"""
|
|
|
- params_group = (start_date, *gh_ids)
|
|
|
|
|
|
|
+ params_group = (start_date, self.PUBLISH_SUCCESS_STATUS)
|
|
|
task2 = self.pool.async_fetch(query=query_group, params=params_group)
|
|
task2 = self.pool.async_fetch(query=query_group, params=params_group)
|
|
|
else:
|
|
else:
|
|
|
# 没有 group 账号,返回空列表
|
|
# 没有 group 账号,返回空列表
|
|
@@ -320,7 +325,6 @@ class AccountPositionReadRateAvg(AccountPositionInfoConst):
|
|
|
start_dt = self.generate_stat_duration(end_date)
|
|
start_dt = self.generate_stat_duration(end_date)
|
|
|
|
|
|
|
|
fans_mapper = await self.get_fans_for_each_date(start_date=start_dt)
|
|
fans_mapper = await self.get_fans_for_each_date(start_date=start_dt)
|
|
|
-
|
|
|
|
|
accounts = await self.get_publishing_accounts()
|
|
accounts = await self.get_publishing_accounts()
|
|
|
for account in tqdm(accounts, desc="计算单个账号阅读率均值"):
|
|
for account in tqdm(accounts, desc="计算单个账号阅读率均值"):
|
|
|
if account["gh_id"] in self.FORBIDDEN_GH_IDS:
|
|
if account["gh_id"] in self.FORBIDDEN_GH_IDS:
|