|
@@ -21,7 +21,7 @@ from config import apolloConfig, long_articles_config, piaoquan_crawler_config,
|
|
|
const = updateAccountReadRateTaskConst()
|
|
|
config = apolloConfig()
|
|
|
unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
|
|
|
-backup_account_detail = json.loads(config.getConfigValue("backup_account_detail"))
|
|
|
+backup_account_fans = json.loads(config.getConfigValue("backup_account_fans"))
|
|
|
functions = Functions()
|
|
|
read_rate_table = "long_articles_read_rate_dev"
|
|
|
|
|
@@ -74,7 +74,7 @@ def cal_account_read_rate(article_list, fans_dict) -> DataFrame:
|
|
|
if not fans:
|
|
|
fans = int(unauthorized_account.get(gh_id, 0))
|
|
|
if not fans:
|
|
|
- fans = int(backup_account_detail.get(gh_id, 0))
|
|
|
+ fans = int(backup_account_fans.get(gh_id, 0))
|
|
|
line['fans'] = fans
|
|
|
if fans > 1000:
|
|
|
line['readRate'] = line['show_view_count'] / fans if fans else 0
|
|
@@ -127,7 +127,7 @@ def check_each_position(db_client, gh_id, index, dt, avg_rate) -> dict:
|
|
|
WHERE gh_id = '{gh_id}' and position = {index} and dt_version < {dt}
|
|
|
ORDER BY dt_version DESC limit 1;
|
|
|
"""
|
|
|
- result = db_client.select(select_sql)
|
|
|
+ result = db_client.fetch(select_sql)
|
|
|
if result:
|
|
|
account_name = result[0][0]
|
|
|
previous_read_rate_avg = result[0][1]
|
|
@@ -169,7 +169,9 @@ def update_single_day(dt, account_list, article_df, lam):
|
|
|
string_format='%Y-%m-%d'
|
|
|
)
|
|
|
|
|
|
- process_account_cnt = 0
|
|
|
+ # processed_account_set
|
|
|
+ processed_account_set = set()
|
|
|
+
|
|
|
for account in tqdm(account_list, desc=dt):
|
|
|
for index in const.ARTICLE_INDEX_LIST:
|
|
|
read_rate_detail = cal_avg_account_read_rate(
|
|
@@ -183,7 +185,8 @@ def update_single_day(dt, account_list, article_df, lam):
|
|
|
min_publish_time = read_rate_detail['min_publish_time']
|
|
|
articles_count = read_rate_detail['records']
|
|
|
if articles_count:
|
|
|
- process_account_cnt += 1
|
|
|
+ processed_account_set.add(account['gh_id'])
|
|
|
+ # check read rate in position 1 and 2
|
|
|
if index in {1, 2}:
|
|
|
error_obj = check_each_position(
|
|
|
db_client=lam,
|
|
@@ -194,6 +197,7 @@ def update_single_day(dt, account_list, article_df, lam):
|
|
|
)
|
|
|
if error_obj:
|
|
|
error_list.append(error_obj)
|
|
|
+ # insert into database
|
|
|
try:
|
|
|
if not read_rate_avg:
|
|
|
continue
|
|
@@ -203,8 +207,8 @@ def update_single_day(dt, account_list, article_df, lam):
|
|
|
values
|
|
|
(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
"""
|
|
|
- lam.update(
|
|
|
- sql=insert_sql,
|
|
|
+ lam.save(
|
|
|
+ query=insert_sql,
|
|
|
params=(
|
|
|
account['account_name'],
|
|
|
account['gh_id'],
|
|
@@ -219,43 +223,46 @@ def update_single_day(dt, account_list, article_df, lam):
|
|
|
)
|
|
|
)
|
|
|
except Exception as e:
|
|
|
+ print(e)
|
|
|
insert_error_list.append(str(e))
|
|
|
-
|
|
|
- print(process_account_cnt)
|
|
|
- # if insert_error_list:
|
|
|
- # bot(
|
|
|
- # title="更新阅读率均值,存在sql 插入失败",
|
|
|
- # detail=insert_error_list
|
|
|
- # )
|
|
|
- #
|
|
|
- # if error_list:
|
|
|
- # columns = [
|
|
|
- # create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="account_name", display_name="账号名称"),
|
|
|
- # create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="position", display_name="文章位置"),
|
|
|
- # create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="read_rate_avg_yesterday",
|
|
|
- # display_name="昨日阅读率均值"),
|
|
|
- # create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="read_rate_avg_the_day_before_yesterday",
|
|
|
- # display_name="前天阅读率均值"),
|
|
|
- # create_feishu_columns_sheet(sheet_type="options", sheet_name="relative_change_rate",
|
|
|
- # display_name="相对变化率")
|
|
|
- # ]
|
|
|
- # bot(
|
|
|
- # title="更新阅读率均值,头次出现异常值通知",
|
|
|
- # detail={
|
|
|
- # "columns": columns,
|
|
|
- # "rows": error_list
|
|
|
- # },
|
|
|
- # table=True,
|
|
|
- # mention=False
|
|
|
- # )
|
|
|
- #
|
|
|
- # if not error_list and not insert_error_list:
|
|
|
- # bot(
|
|
|
- # title="阅读率均值表,更新成功",
|
|
|
- # detail={
|
|
|
- # "日期": dt
|
|
|
- # }
|
|
|
- # )
|
|
|
+ # bot sql error
|
|
|
+ if insert_error_list:
|
|
|
+ bot(
|
|
|
+ title="更新阅读率均值,存在sql 插入失败",
|
|
|
+ detail=insert_error_list
|
|
|
+ )
|
|
|
+
|
|
|
+ # bot outliers
|
|
|
+ if error_list:
|
|
|
+ columns = [
|
|
|
+ create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="account_name", display_name="账号名称"),
|
|
|
+ create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="position", display_name="文章位置"),
|
|
|
+ create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="read_rate_avg_yesterday",
|
|
|
+ display_name="昨日阅读率均值"),
|
|
|
+ create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="read_rate_avg_the_day_before_yesterday",
|
|
|
+ display_name="前天阅读率均值"),
|
|
|
+ create_feishu_columns_sheet(sheet_type="options", sheet_name="relative_change_rate",
|
|
|
+ display_name="相对变化率")
|
|
|
+ ]
|
|
|
+ bot(
|
|
|
+ title="更新阅读率均值,头次出现异常值通知",
|
|
|
+ detail={
|
|
|
+ "columns": columns,
|
|
|
+ "rows": error_list
|
|
|
+ },
|
|
|
+ table=True,
|
|
|
+ mention=False
|
|
|
+ )
|
|
|
+
|
|
|
+ # if no error, send success info
|
|
|
+ if not error_list and not insert_error_list:
|
|
|
+ bot(
|
|
|
+ title="阅读率均值表,更新成功",
|
|
|
+ detail={
|
|
|
+ "日期": dt
|
|
|
+ },
|
|
|
+ mention=False
|
|
|
+ )
|
|
|
|
|
|
|
|
|
def main() -> None:
|