Przeglądaj źródła

计算阅读均值优化

luojunhui 1 miesiąc temu
rodzic
commit
a4cb67591c

+ 14 - 1
applications/const/__init__.py

@@ -105,7 +105,7 @@ class updateAccountReadRateTaskConst:
     ARTICLE_INDEX_LIST = [1, 2, 3, 4, 5, 6, 7, 8]
 
 
-class updateAccountReadAvgTaskConst:
+class UpdateAccountReadAvgTaskConst:
     """
     更新账号阅读均值常量配置
     """
@@ -124,6 +124,19 @@ class updateAccountReadAvgTaskConst:
     ARTICLES_DAILY = 1
     TOULIU = 2
 
+    # 默认粉丝
+    DEFAULT_FANS = 0
+
+    # index list
+    ARTICLE_INDEX_LIST = [1, 2, 3, 4, 5, 6, 7, 8]
+
+    # 默认点赞
+    DEFAULT_LIKE = 0
+
+    # 状态
+    USING_STATUS = 1
+    NOT_USING_STATUS = 0
+
 
 class WeixinVideoCrawlerConst:
     """

+ 5 - 5
cal_account_read_rate_avg_daily.py

@@ -23,7 +23,7 @@ config = apolloConfig()
 unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
 backup_account_fans = json.loads(config.getConfigValue("backup_account_fans"))
 functions = Functions()
-read_rate_table = "long_articles_read_rate_dev"
+read_rate_table = "long_articles_read_rate"
 
 
 def filter_outlier_data(group, key='show_view_count'):
@@ -187,7 +187,7 @@ def update_single_day(dt, account_list, article_df, lam):
             if articles_count:
                 processed_account_set.add(account['gh_id'])
                 # check read rate in position 1 and 2
-                if index in {1, 2}:
+                if index in [1, 2]:
                     error_obj = check_each_position(
                         db_client=lam,
                         gh_id=account['gh_id'],
@@ -225,6 +225,7 @@ def update_single_day(dt, account_list, article_df, lam):
                 except Exception as e:
                     print(e)
                     insert_error_list.append(str(e))
+
     # bot sql error
     if insert_error_list:
         bot(
@@ -245,7 +246,7 @@ def update_single_day(dt, account_list, article_df, lam):
                                         display_name="相对变化率")
         ]
         bot(
-            title="更新阅读率均值,头次出现异常值通知",
+            title="阅读率均值表异常信息, 总共处理{}个账号".format(len(processed_account_set)),
             detail={
                 "columns": columns,
                 "rows": error_list
@@ -257,14 +258,13 @@ def update_single_day(dt, account_list, article_df, lam):
     # if no error, send success info
     if not error_list and not insert_error_list:
         bot(
-            title="阅读率均值表更新成功",
+            title="阅读率均值表更新成功, 总共处理{}个账号".format(len(processed_account_set)),
             detail={
                 "日期": dt
             },
             mention=False
         )
 
-
 def main() -> None:
     """
     main function

+ 15 - 14
updateAccountV3.py

@@ -9,7 +9,7 @@ from datetime import datetime, timedelta
 from argparse import ArgumentParser
 from pymysql.cursors import DictCursor
 
-from applications.const import updateAccountReadAvgTaskConst
+from applications.const import UpdateAccountReadAvgTaskConst
 from applications.db import DatabaseConnector
 from applications.utils import fetch_account_fans
 from applications.utils import fetch_publishing_account_list
@@ -19,7 +19,7 @@ from config import long_articles_config, denet_config, piaoquan_crawler_config
 read_rate_table = "long_articles_read_rate"
 read_avg_table = "account_avg_info_v3"
 config = apolloConfig()
-const = updateAccountReadAvgTaskConst()
+const = UpdateAccountReadAvgTaskConst()
 unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
 touliu_accounts = set(json.loads(config.getConfigValue("touliu_gh_id_list")))
 backup_account_fans = json.loads(config.getConfigValue("backup_account_fans"))
@@ -65,7 +65,7 @@ class UpdateAccountInfoVersion3(object):
         do it
         """
         # get fans dict from aigc
-        fans_dict = fetch_account_fans(self.piaoquan_crawler_db_client, dt)
+        fans_dict = fetch_account_fans(self.denet_db_client, dt)
 
         # get publishing account list from aigc
         account_list = fetch_publishing_account_list(self.denet_db_client)
@@ -76,25 +76,26 @@ class UpdateAccountInfoVersion3(object):
         for account in tqdm(account_list, desc=dt):
             gh_id = account["gh_id"]
             business_type = const.TOULIU if gh_id in touliu_accounts else const.ARTICLES_DAILY
-            fans = fans_dict.get(gh_id, {}).get(dt, 0)
+            fans = fans_dict.get(gh_id, {}).get(dt, const.DEFAULT_FANS)
 
             # use unauthorized account's fans if not found in aigc
             if not fans:
-                fans = int(unauthorized_account.get(gh_id, 0))
+                fans = int(unauthorized_account.get(gh_id, const.DEFAULT_FANS))
 
             # use backup account's fans if not found in aigc
             if not fans:
-                fans = int(backup_account_fans.get(gh_id, 0))
+                fans = int(backup_account_fans.get(gh_id, const.DEFAULT_FANS))
 
             if fans:
-                for index in range(1, 9):
+                for index in const.ARTICLE_INDEX_LIST:
                     gh_id_position = "{}_{}".format(gh_id, index)
                     if read_rate_avg_dict.get(gh_id_position):
                         # fetch read rate avg
                         read_rate_avg = read_rate_avg_dict[gh_id_position]
                         # cal read avg
                         read_avg = fans * read_rate_avg
-                        print(read_rate_avg, read_avg)
+
+                        # insert into database
                         insert_sql = f"""
                             insert into {read_avg_table}
                             (gh_id, position, update_time, account_name, fans, read_avg, like_avg, status, account_type, account_mode, account_source, account_status, business_type, read_rate_avg)
@@ -111,8 +112,8 @@ class UpdateAccountInfoVersion3(object):
                                     account['account_name'],
                                     fans,
                                     read_avg,
-                                    0,
-                                    1,
+                                    const.DEFAULT_LIKE,
+                                    const.USING_STATUS,
                                     account['account_type'],
                                     account['mode_type'],
                                     account['account_source'],
@@ -151,7 +152,7 @@ class UpdateAccountInfoVersion3(object):
                         self.piaoquan_crawler_db_client.save(
                             query=update_status_sql,
                             params=(
-                                0, dt, account['gh_id'], index
+                                const.NOT_USING_STATUS, dt, account['gh_id'], index
                             )
                         )
 
@@ -166,15 +167,15 @@ def main():
                         help="Run only once for date in format of %Y-%m-%d. \
                                 If no specified, run as daily jobs.")
     args = parser.parse_args()
-    Up = UpdateAccountInfoVersion3()
+    update_account_read_avg_task = UpdateAccountInfoVersion3()
     if args.run_date:
-        Up.do_task_list(dt=args.run_date)
+        update_account_read_avg_task.do_task_list(dt=args.run_date)
     else:
         dt_object = datetime.fromtimestamp(int(time.time()))
         one_day = timedelta(days=1)
         yesterday = dt_object - one_day
         yesterday_str = yesterday.strftime('%Y-%m-%d')
-        Up.do_task_list(dt=yesterday_str)
+        update_account_read_avg_task.do_task_list(dt=yesterday_str)
 
 
 if __name__ == '__main__':