ソースを参照

Merge branch 'feature/luojunhui/fwh_fans_improve' of Server/LongArticleTaskServer into master

luojunhui 2 ヶ月 前
コミット
58e02565c9
1 ファイル変更30 行追加26 行削除
  1. 30 26
      applications/tasks/analysis_task/account_position_info.py

+ 30 - 26
applications/tasks/analysis_task/account_position_info.py

@@ -3,7 +3,7 @@ import traceback
 
 import numpy as np
 from collections import defaultdict
-from typing import Dict, List
+from typing import Dict, List, Set
 from pandas import DataFrame
 from scipy import stats
 from tqdm.asyncio import tqdm
@@ -43,23 +43,10 @@ class AccountPositionInfoConst:
     # 状态
     USING_STATUS = 1
     NOT_USING_STATUS = 0
+    PUBLISH_SUCCESS_STATUS = 2
 
-    # 服务号
-    GROUP_ACCOUNT_SET = {
-        "gh_9cf3b7ff486b",
-        "gh_ecb21c0453af",
-        "gh_45beb952dc74",
-        # "gh_84e744b16b3a",
-        "gh_b3ffc1ca3a04",
-        "gh_b8baac4296cb",
-        "gh_efaf7da157f5",
-        # "gh_5855bed97938",
-        "gh_b32125c73861",
-        "gh_761976bb98a6",
-        "gh_5e543853d8f0",
-        "gh_05a0318105be"
-        # "gh_61a72b720de3",
-    }
+    # 不再使用的服务号
+    NOT_USED_SERVER_ACCOUNT = {"gh_84e744b16b3a", "gh_5855bed97938", "gh_61a72b720de3"}
 
     # 违禁账号
     FORBIDDEN_GH_IDS = {
@@ -138,6 +125,21 @@ class AccountPositionReadRateAvg(AccountPositionInfoConst):
         account_list = await self.pool.async_fetch(query, db_name="aigc")
         return [i for i in account_list if "自动回复" not in str(i["account_remark"])]
 
+    # 获取服务号分组发文信息
+    async def get_server_group_publish_accounts(self) -> Set[str]:
+        query = """
+            select gzh_id from article_gzh_developer;
+        """
+        fetch_response = await self.pool.async_fetch(
+            query=query, db_name="piaoquan_crawler"
+        )
+        gh_id_list = [
+            i["gzh_id"]
+            for i in fetch_response
+            if i["gzh_id"] not in self.NOT_USED_SERVER_ACCOUNT
+        ]
+        return set(gh_id_list)
+
     # 获取统计周期内,每个账号的粉丝量
     async def get_fans_for_each_date(self, start_date: str):
         # 获取订阅号粉丝量
@@ -153,16 +155,19 @@ class AccountPositionReadRateAvg(AccountPositionInfoConst):
         """
         task1 = self.pool.async_fetch(query=query, db_name="aigc", params=(start_date,))
 
-        if self.GROUP_ACCOUNT_SET:
-            gh_ids = tuple(self.GROUP_ACCOUNT_SET)
-            placeholders = ",".join(["%s"] * len(gh_ids))
+        group_account_set = await self.get_server_group_publish_accounts()
+        if group_account_set:
             query_group = f"""
-                    SELECT gh_id, publish_date AS dt, CAST(SUM(sent_count) / 8 AS SIGNED) AS fans
-                    FROM long_articles_group_send_result
-                    WHERE publish_date >= %s AND gh_id IN ({placeholders})
-                    GROUP BY publish_date, gh_id;
+                select publish_date as dt, gh_id, account_name, CAST(SUM(total_sent_fans) AS SIGNED) AS fans
+                from (
+                        select publish_date, account_name, gh_id, push_id, avg(sent_count) as 'total_sent_fans'
+                        from long_articles_group_send_result
+                        where publish_date >= %s  and status = %s
+                        group by publish_date, account_name, push_id
+                    ) as lagsr
+                group by lagsr.publish_date, gh_id;
             """
-            params_group = (start_date, *gh_ids)
+            params_group = (start_date, self.PUBLISH_SUCCESS_STATUS)
             task2 = self.pool.async_fetch(query=query_group, params=params_group)
         else:
             # 没有 group 账号,返回空列表
@@ -320,7 +325,6 @@ class AccountPositionReadRateAvg(AccountPositionInfoConst):
         start_dt = self.generate_stat_duration(end_date)
 
         fans_mapper = await self.get_fans_for_each_date(start_date=start_dt)
-
         accounts = await self.get_publishing_accounts()
         for account in tqdm(accounts, desc="计算单个账号阅读率均值"):
             if account["gh_id"] in self.FORBIDDEN_GH_IDS: