|
@@ -64,7 +64,7 @@ def str_to_timestamp(date_string) -> int:
|
|
|
return int(timestamp)
|
|
|
|
|
|
|
|
|
-def get_account_fans_by_dt(db_client) -> dict:
|
|
|
+def get_account_fans_by_dt(db_client) -> tuple[dict, dict]:
|
|
|
"""
|
|
|
获取每个账号发粉丝,通过日期来区分
|
|
|
:return:
|
|
@@ -83,16 +83,21 @@ def get_account_fans_by_dt(db_client) -> dict:
|
|
|
ORDER BY t1.date_str;
|
|
|
"""
|
|
|
result = db_client.select(sql)
|
|
|
- D = {}
|
|
|
+ # 分日期的粉丝数据
|
|
|
+ gh_id_fans_dt_dict = {}
|
|
|
+ # 不分日期的粉丝数据
|
|
|
+ gh_id_fans_dict = {}
|
|
|
for line in result:
|
|
|
dt = line[0]
|
|
|
fans = line[1]
|
|
|
gh_id = line[2]
|
|
|
- if D.get(gh_id):
|
|
|
- D[gh_id][dt] = fans
|
|
|
+ if gh_id_fans_dt_dict.get(gh_id):
|
|
|
+ gh_id_fans_dt_dict[gh_id][dt] = fans
|
|
|
else:
|
|
|
- D[gh_id] = {dt: fans}
|
|
|
- return D
|
|
|
+ gh_id_fans_dt_dict[gh_id] = {dt: fans}
|
|
|
+ if fans:
|
|
|
+ gh_id_fans_dict[gh_id] = fans
|
|
|
+ return gh_id_fans_dt_dict, gh_id_fans_dict
|
|
|
|
|
|
|
|
|
def get_publishing_accounts(db_client) -> list[dict]:
|
|
@@ -167,7 +172,7 @@ def cal_account_read_rate(gh_id_tuple) -> DataFrame:
|
|
|
pq_db = PQMySQL()
|
|
|
de_db = DeNetMysql()
|
|
|
response = []
|
|
|
- fans_dict_each_day = get_account_fans_by_dt(db_client=de_db)
|
|
|
+ fans_dict_each_day, fans_dict = get_account_fans_by_dt(db_client=de_db)
|
|
|
account_article_detail = get_account_articles_detail(
|
|
|
db_client=pq_db,
|
|
|
gh_id_tuple=gh_id_tuple
|
|
@@ -176,6 +181,8 @@ def cal_account_read_rate(gh_id_tuple) -> DataFrame:
|
|
|
gh_id = line['ghId']
|
|
|
dt = timestamp_to_str(line['updateTime'])
|
|
|
fans = fans_dict_each_day.get(gh_id, {}).get(dt, 0)
|
|
|
+ if fans == 0:
|
|
|
+ fans = fans_dict.get(gh_id, 0)
|
|
|
line['fans'] = fans
|
|
|
if fans:
|
|
|
line['readRate'] = line['show_view_count'] / fans if fans else 0
|
|
@@ -198,9 +205,8 @@ def cal_avg_account_read_rate(df, gh_id, index, dt) -> tuple:
|
|
|
& (df["updateTime"] <= max_time)
|
|
|
& (df['ItemIndex'] == index)
|
|
|
]
|
|
|
- # print("位置", index)
|
|
|
finalDF = filter_outlier_data(filterDataFrame)
|
|
|
- # finalDF = finalDF.sort_values(by=['updateTime'], ascending=False)
|
|
|
+ finalDF = finalDF.sort_values(by=['updateTime'], ascending=False)
|
|
|
# if index == 1:
|
|
|
# for i in finalDF.values.tolist():
|
|
|
# print(datetime.fromtimestamp(i[2]).strftime('%Y-%m-%d'), i)
|