Przeglądaj źródła

兼容 aigc 系统出现账号名称为空的情况

luojunhui 1 tydzień temu
rodzic
commit
cce3bd2c7c
2 zmienionych plików z 36 dodań i 12 usunięć
  1. 5 1
      applications/aiditApi.py
  2. 31 11
      cal_account_read_rate_avg_daily.py

+ 5 - 1
applications/aiditApi.py

@@ -7,6 +7,10 @@ import json
 
 from applications.decoratorApi import retryOnTimeout
 from applications.denetMysql import DeNetMysql
+from config import apolloConfig
+
+config = apolloConfig()
+backup_gzh_account_name = json.loads(config.getConfigValue("backup_gzh_account_name"))
 
 HEADERS = {
     'Accept': 'application/json',
@@ -116,7 +120,7 @@ def get_publish_account_from_aigc():
     info_tuple = db.select(sql)
     info_list = [
         {
-            "name": line[0],
+            "name": line[0] if line[0] else backup_gzh_account_name.get(line[0], line[1]),
             "ghId": line[1],
             "follower_count": line[2],
             "account_init_timestamp": int(line[3]),

+ 31 - 11
cal_account_read_rate_avg_daily.py

@@ -22,6 +22,7 @@ const = UpdateAccountReadRateTaskConst()
 config = apolloConfig()
 unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
 backup_account_fans = json.loads(config.getConfigValue("backup_account_fans"))
+backup_gzh_account_name = json.loads(config.getConfigValue("backup_gzh_account_name"))
 functions = Functions()
 read_rate_table = "long_articles_read_rate"
 
@@ -67,7 +68,7 @@ def cal_account_read_rate(article_list, fans_dict) -> DataFrame:
     :return:
     """
     response = []
-    for line in article_list:
+    for line in tqdm(article_list):
         gh_id = line['ghId']
         dt = functions.timestamp_to_str(timestamp=line['publish_timestamp'], string_format='%Y-%m-%d')
         fans = fans_dict.get(gh_id, {}).get(dt, const.DEFAULT_FANS)
@@ -85,7 +86,7 @@ def cal_account_read_rate(article_list, fans_dict) -> DataFrame:
         if fans > const.MIN_FANS:
             line['readRate'] = line['show_view_count'] / fans if fans else 0
             response.append(line)
-    return DataFrame(response, columns=['ghId', 'accountName', 'ItemIndex', 'show_view_count', 'publish_timestamp', 'readRate'])
+    return DataFrame(response, columns=['ghId', 'accountName', 'ItemIndex', 'show_view_count', 'publish_timestamp', 'fans', 'readRate'])
 
 
 def cal_avg_account_read_rate(df, gh_id, index, dt) -> dict:
@@ -115,12 +116,13 @@ def cal_avg_account_read_rate(df, gh_id, index, dt) -> dict:
     }
 
 
-def check_each_position(db_client, gh_id, index, dt, avg_rate) -> dict:
+def check_each_position(db_client, gh_id, account_name, index, dt, avg_rate) -> dict:
     """
     检验某个具体账号的具体文章的阅读率均值和前段日子的比较
     :param avg_rate: 当天计算出的阅读率均值
     :param db_client: 数据库连接
     :param gh_id: 账号 id
+    :param account_name: 账号名称
     :param index: 账号  index
     :param dt:
     :return:
@@ -128,15 +130,14 @@ def check_each_position(db_client, gh_id, index, dt, avg_rate) -> dict:
 
     dt = int(dt.replace("-", ""))
     select_sql = f"""
-        SELECT account_name, read_rate_avg
+        SELECT read_rate_avg
         FROM {read_rate_table}
         WHERE gh_id = '{gh_id}' and position = {index} and dt_version < {dt}
         ORDER BY dt_version DESC limit 1;
     """
-    result = db_client.fetch(select_sql)
+    result = db_client.fetch(select_sql, cursor_type=DictCursor)
     if result:
-        account_name = result[0][0]
-        previous_read_rate_avg = result[0][1]
+        previous_read_rate_avg = result[0]['read_rate_avg']
         relative_value = (avg_rate - previous_read_rate_avg) / previous_read_rate_avg
         if -const.RELATIVE_VALUE_THRESHOLD <= relative_value <= const.RELATIVE_VALUE_THRESHOLD:
             return {}
@@ -154,6 +155,8 @@ def check_each_position(db_client, gh_id, index, dt, avg_rate) -> dict:
                 ]
             }
             return response
+    else:
+        return {}
 
 
 def update_single_day(dt, account_list, article_df, lam):
@@ -177,12 +180,18 @@ def update_single_day(dt, account_list, article_df, lam):
 
     # processed_account_set
     processed_account_set = set()
+    without_name_account_set = set()
 
     for account in tqdm(account_list, desc=dt):
+        account_name = account['account_name']
+        gh_id = account['gh_id']
+        if not account_name:
+            account_name = backup_gzh_account_name.get(gh_id, "")
+
         for index in const.ARTICLE_INDEX_LIST:
             read_rate_detail = cal_avg_account_read_rate(
                 df=article_df,
-                gh_id=account['gh_id'],
+                gh_id=gh_id,
                 index=index,
                 dt=dt
             )
@@ -192,11 +201,14 @@ def update_single_day(dt, account_list, article_df, lam):
             articles_count = read_rate_detail['records']
             if articles_count:
                 processed_account_set.add(account['gh_id'])
+                if not account_name:
+                    without_name_account_set.add(gh_id)
                 # check read rate in position 1 and 2
                 if index in [1, 2]:
                     error_obj = check_each_position(
                         db_client=lam,
-                        gh_id=account['gh_id'],
+                        gh_id=gh_id,
+                        account_name=account_name,
                         index=index,
                         dt=dt,
                         avg_rate=read_rate_avg
@@ -216,8 +228,8 @@ def update_single_day(dt, account_list, article_df, lam):
                     lam.save(
                         query=insert_sql,
                         params=(
-                            account['account_name'],
-                            account['gh_id'],
+                            account_name,
+                            gh_id,
                             index,
                             read_rate_avg,
                             "从 {} 开始往前计算 31  天".format(dt),
@@ -232,6 +244,14 @@ def update_single_day(dt, account_list, article_df, lam):
                     print(e)
                     insert_error_list.append(str(e))
 
+    # bot no name account
+    if without_name_account_set:
+        bot(
+            title="更新阅读率均值,存在无名称账号",
+            detail=list(without_name_account_set),
+            mention=False
+        )
+
     # bot sql error
     if insert_error_list:
         bot(