|
|
@@ -51,17 +51,37 @@ def get_account_articles_detail(db_client, gh_id_tuple, min_publish_timestamp) -
|
|
|
"""
|
|
|
sql = f"""
|
|
|
SELECT
|
|
|
- ghId, accountName, ItemIndex, show_view_count, publish_timestamp
|
|
|
+ ghId, accountName, ItemIndex,
|
|
|
+ avg(show_view_count),
|
|
|
+ FROM_UNIXTIME(publish_timestamp, '%Y-%m-%d') AS pub_dt,
|
|
|
+ publish_timestamp
|
|
|
FROM
|
|
|
official_articles_v2
|
|
|
WHERE
|
|
|
- ghId IN {gh_id_tuple} and Type = '{const.BULK_PUBLISH_TYPE}' and publish_timestamp >= {min_publish_timestamp};
|
|
|
+ ghId IN {gh_id_tuple} and Type = '{const.BULK_PUBLISH_TYPE}' and publish_timestamp >= {min_publish_timestamp}
|
|
|
+ GROUP BY ghId, accountName, ItemIndex, pub_dt;
|
|
|
"""
|
|
|
response_list = db_client.fetch(query=sql, cursor_type=DictCursor)
|
|
|
return response_list
|
|
|
|
|
|
|
|
|
-def cal_account_read_rate(article_list, fans_dict) -> DataFrame:
|
|
|
+def get_fans_from_group_send_accounts(db_client,gh_id, dt):
|
|
|
+ """
|
|
|
+ 获取指定日期发送的文章的账号粉丝数
|
|
|
+ """
|
|
|
+ query = """
|
|
|
+ SELECT CAST(SUM(sent_count) / 8 as signed) as fans
|
|
|
+ FROM long_articles_group_send_result
|
|
|
+ WHERE publish_date = %s AND gh_id = %s;
|
|
|
+ """
|
|
|
+ response = db_client.fetch(query=query, cursor_type=DictCursor, params=(dt, gh_id))
|
|
|
+ if response:
|
|
|
+ return response[0]['fans']
|
|
|
+
|
|
|
+ return 0
|
|
|
+
|
|
|
+
|
|
|
+def cal_account_read_rate(db_client, article_list, fans_dict) -> DataFrame:
|
|
|
"""
|
|
|
计算账号位置的阅读率
|
|
|
:return:
|
|
|
@@ -71,8 +91,12 @@ def cal_account_read_rate(article_list, fans_dict) -> DataFrame:
|
|
|
gh_id = line['ghId']
|
|
|
dt = functions.timestamp_to_str(timestamp=line['publish_timestamp'], string_format='%Y-%m-%d')
|
|
|
fans = fans_dict.get(gh_id, {}).get(dt, const.DEFAULT_FANS)
|
|
|
+ if gh_id in const.GROUP_ACCOUNT_SET:
|
|
|
+ fans = get_fans_from_group_send_accounts(db_client, gh_id, dt)
|
|
|
+
|
|
|
if not fans:
|
|
|
fans = int(unauthorized_account.get(gh_id, const.DEFAULT_FANS))
|
|
|
+
|
|
|
if not fans:
|
|
|
fans = int(backup_account_fans.get(gh_id, const.DEFAULT_FANS))
|
|
|
log(
|
|
|
@@ -313,7 +337,7 @@ def main() -> None:
|
|
|
article_list = get_account_articles_detail(db_client=piaoquan_crawler_db_client, gh_id_tuple=gh_id_tuple, min_publish_timestamp=min_time)
|
|
|
|
|
|
# cal account read rate and make a dataframe
|
|
|
- read_rate_dataframe = cal_account_read_rate(article_list, fans_dict)
|
|
|
+ read_rate_dataframe = cal_account_read_rate(db_client=long_articles_db_client, article_list=article_list, fans_dict=fans_dict)
|
|
|
|
|
|
# update each day's data
|
|
|
update_single_day(dt, account_list, read_rate_dataframe, long_articles_db_client)
|