luojunhui 1 месяц назад
Родитель
Сommit
97cb92aed2
1 измененных файлов с 50 добавлено и 43 удалено
  1. 50 43
      cal_account_read_rate_avg_daily.py

+ 50 - 43
cal_account_read_rate_avg_daily.py

@@ -21,7 +21,7 @@ from config import apolloConfig, long_articles_config, piaoquan_crawler_config,
 const = updateAccountReadRateTaskConst()
 config = apolloConfig()
 unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
-backup_account_detail = json.loads(config.getConfigValue("backup_account_detail"))
+backup_account_fans = json.loads(config.getConfigValue("backup_account_fans"))
 functions = Functions()
 read_rate_table = "long_articles_read_rate_dev"
 
@@ -74,7 +74,7 @@ def cal_account_read_rate(article_list, fans_dict) -> DataFrame:
         if not fans:
             fans = int(unauthorized_account.get(gh_id, 0))
         if not fans:
-            fans = int(backup_account_detail.get(gh_id, 0))
+            fans = int(backup_account_fans.get(gh_id, 0))
         line['fans'] = fans
         if fans > 1000:
             line['readRate'] = line['show_view_count'] / fans if fans else 0
@@ -127,7 +127,7 @@ def check_each_position(db_client, gh_id, index, dt, avg_rate) -> dict:
         WHERE gh_id = '{gh_id}' and position = {index} and dt_version < {dt}
         ORDER BY dt_version DESC limit 1;
     """
-    result = db_client.select(select_sql)
+    result = db_client.fetch(select_sql)
     if result:
         account_name = result[0][0]
         previous_read_rate_avg = result[0][1]
@@ -169,7 +169,9 @@ def update_single_day(dt, account_list, article_df, lam):
         string_format='%Y-%m-%d'
     )
 
-    process_account_cnt = 0
+    # processed_account_set
+    processed_account_set = set()
+
     for account in tqdm(account_list, desc=dt):
         for index in const.ARTICLE_INDEX_LIST:
             read_rate_detail = cal_avg_account_read_rate(
@@ -183,7 +185,8 @@ def update_single_day(dt, account_list, article_df, lam):
             min_publish_time = read_rate_detail['min_publish_time']
             articles_count = read_rate_detail['records']
             if articles_count:
-                process_account_cnt += 1
+                processed_account_set.add(account['gh_id'])
+                # check read rate in position 1 and 2
                 if index in {1, 2}:
                     error_obj = check_each_position(
                         db_client=lam,
@@ -194,6 +197,7 @@ def update_single_day(dt, account_list, article_df, lam):
                     )
                     if error_obj:
                         error_list.append(error_obj)
+                # insert into database
                 try:
                     if not read_rate_avg:
                         continue
@@ -203,8 +207,8 @@ def update_single_day(dt, account_list, article_df, lam):
                         values
                         (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
                     """
-                    lam.update(
-                        sql=insert_sql,
+                    lam.save(
+                        query=insert_sql,
                         params=(
                             account['account_name'],
                             account['gh_id'],
@@ -219,43 +223,46 @@ def update_single_day(dt, account_list, article_df, lam):
                         )
                     )
                 except Exception as e:
+                    print(e)
                     insert_error_list.append(str(e))
-
-    print(process_account_cnt)
-    # if insert_error_list:
-    #     bot(
-    #         title="更新阅读率均值,存在sql 插入失败",
-    #         detail=insert_error_list
-    #     )
-    #
-    # if error_list:
-    #     columns = [
-    #         create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="account_name", display_name="账号名称"),
-    #         create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="position", display_name="文章位置"),
-    #         create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="read_rate_avg_yesterday",
-    #                                     display_name="昨日阅读率均值"),
-    #         create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="read_rate_avg_the_day_before_yesterday",
-    #                                     display_name="前天阅读率均值"),
-    #         create_feishu_columns_sheet(sheet_type="options", sheet_name="relative_change_rate",
-    #                                     display_name="相对变化率")
-    #     ]
-    #     bot(
-    #         title="更新阅读率均值,头次出现异常值通知",
-    #         detail={
-    #             "columns": columns,
-    #             "rows": error_list
-    #         },
-    #         table=True,
-    #         mention=False
-    #     )
-    #
-    # if not error_list and not insert_error_list:
-    #     bot(
-    #         title="阅读率均值表,更新成功",
-    #         detail={
-    #             "日期": dt
-    #         }
-    #     )
+    # bot sql error
+    if insert_error_list:
+        bot(
+            title="更新阅读率均值,存在sql 插入失败",
+            detail=insert_error_list
+        )
+
+    # bot outliers
+    if error_list:
+        columns = [
+            create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="account_name", display_name="账号名称"),
+            create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="position", display_name="文章位置"),
+            create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="read_rate_avg_yesterday",
+                                        display_name="昨日阅读率均值"),
+            create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="read_rate_avg_the_day_before_yesterday",
+                                        display_name="前天阅读率均值"),
+            create_feishu_columns_sheet(sheet_type="options", sheet_name="relative_change_rate",
+                                        display_name="相对变化率")
+        ]
+        bot(
+            title="更新阅读率均值,头次出现异常值通知",
+            detail={
+                "columns": columns,
+                "rows": error_list
+            },
+            table=True,
+            mention=False
+        )
+
+    # if no error, send success info
+    if not error_list and not insert_error_list:
+        bot(
+            title="阅读率均值表,更新成功",
+            detail={
+                "日期": dt
+            },
+            mention=False
+        )
 
 
 def main() -> None: