Browse Source

计算阅读均值优化

luojunhui 1 month ago
parent
commit
b9e72d47ef
2 changed files with 21 additions and 8 deletions
  1. 7 1
      applications/const/__init__.py
  2. 14 7
      cal_account_read_rate_avg_daily.py

+ 7 - 1
applications/const/__init__.py

@@ -88,7 +88,7 @@ class updatePublishedMsgTaskConst:
     SUBSCRIBE_FAIL_RATE_THRESHOLD = 0.3
 
 
-class updateAccountReadRateTaskConst:
+class UpdateAccountReadRateTaskConst:
     """
     更新账号阅读率常量配置
     """
@@ -104,6 +104,12 @@ class updateAccountReadRateTaskConst:
     # 文章位置
     ARTICLE_INDEX_LIST = [1, 2, 3, 4, 5, 6, 7, 8]
 
+    # 默认粉丝
+    DEFAULT_FANS = 0
+
+    # 最低粉丝量
+    MIN_FANS = 1000
+
 
 class UpdateAccountReadAvgTaskConst:
     """

+ 14 - 7
cal_account_read_rate_avg_daily.py

@@ -9,16 +9,16 @@ from argparse import ArgumentParser
 from datetime import datetime
 from pymysql.cursors import DictCursor
 
-from applications import bot, Functions
+from applications import bot, Functions, log
 from applications import create_feishu_columns_sheet
 from applications.db import DatabaseConnector
-from applications.const import updateAccountReadRateTaskConst
+from applications.const import UpdateAccountReadRateTaskConst
 from applications.utils import fetch_publishing_account_list
 from applications.utils import fetch_account_fans
 from config import apolloConfig, long_articles_config, piaoquan_crawler_config, denet_config
 
 
-const = updateAccountReadRateTaskConst()
+const = UpdateAccountReadRateTaskConst()
 config = apolloConfig()
 unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
 backup_account_fans = json.loads(config.getConfigValue("backup_account_fans"))
@@ -70,13 +70,19 @@ def cal_account_read_rate(article_list, fans_dict) -> DataFrame:
     for line in article_list:
         gh_id = line['ghId']
         dt = functions.timestamp_to_str(timestamp=line['publish_timestamp'], string_format='%Y-%m-%d')
-        fans = fans_dict.get(gh_id, {}).get(dt, 0)
+        fans = fans_dict.get(gh_id, {}).get(dt, const.DEFAULT_FANS)
         if not fans:
-            fans = int(unauthorized_account.get(gh_id, 0))
+            fans = int(unauthorized_account.get(gh_id, const.DEFAULT_FANS))
         if not fans:
-            fans = int(backup_account_fans.get(gh_id, 0))
+            fans = int(backup_account_fans.get(gh_id, const.DEFAULT_FANS))
+            log(
+                task='cal_read_rate_avg_task',
+                function='cal_account_read_rate',
+                message='未获取到粉丝,使用备份粉丝表',
+                data=line
+            )
         line['fans'] = fans
-        if fans > 1000:
+        if fans > const.MIN_FANS:
             line['readRate'] = line['show_view_count'] / fans if fans else 0
             response.append(line)
     return DataFrame(response, columns=['ghId', 'accountName', 'ItemIndex', 'show_view_count', 'publish_timestamp', 'readRate'])
@@ -265,6 +271,7 @@ def update_single_day(dt, account_list, article_df, lam):
             mention=False
         )
 
+
 def main() -> None:
     """
     main function