|
@@ -4,6 +4,8 @@
|
|
|
import json
|
|
|
import time
|
|
|
|
|
|
+import numpy as np
|
|
|
+from scipy import stats
|
|
|
from tqdm import tqdm
|
|
|
from datetime import datetime, timedelta
|
|
|
from argparse import ArgumentParser
|
|
@@ -60,6 +62,28 @@ class UpdateAccountInfoVersion3(object):
|
|
|
account_read_rate_dict[key] = item['read_rate_avg']
|
|
|
return account_read_rate_dict
|
|
|
|
|
|
+ def cal_read_avg_ci(self, gh_id, position):
|
|
|
+ """
|
|
|
+ 计算阅读均值的置信区间
|
|
|
+ """
|
|
|
+ fetch_query = f"""
|
|
|
+ select read_avg
|
|
|
+ from {read_avg_table}
|
|
|
+ where gh_id = %s and position = %s
|
|
|
+ order by update_time desc limit {const.STAT_PERIOD};
|
|
|
+ """
|
|
|
+ fetch_response_list = self.piaoquan_crawler_db_client.fetch(
|
|
|
+ query=fetch_query, params=(gh_id, position), cursor_type=DictCursor
|
|
|
+ )
|
|
|
+ read_avg_list = [i["read_avg"] for i in fetch_response_list]
|
|
|
+ n = len(read_avg_list)
|
|
|
+ mean = np.mean(read_avg_list)
|
|
|
+ std = np.std(read_avg_list, ddof=1)
|
|
|
+ se = std / np.sqrt(n)
|
|
|
+ t = stats.t.ppf(const.DEFAULT_UPPER_QUANTILE, df=n - 1)
|
|
|
+ upper_t = mean + t * se
|
|
|
+ return upper_t
|
|
|
+
|
|
|
def do_task_list(self, dt):
|
|
|
"""
|
|
|
do it
|
|
@@ -95,12 +119,16 @@ class UpdateAccountInfoVersion3(object):
|
|
|
# cal read avg
|
|
|
read_avg = fans * read_rate_avg
|
|
|
|
|
|
+ # cal read avg ci upper
|
|
|
+ read_avg_ci_upper = self.cal_read_avg_ci(gh_id, index)
|
|
|
+
|
|
|
# insert into database
|
|
|
insert_sql = f"""
|
|
|
insert into {read_avg_table}
|
|
|
- (gh_id, position, update_time, account_name, fans, read_avg, like_avg, status, account_type, account_mode, account_source, account_status, business_type, read_rate_avg)
|
|
|
+ (gh_id, position, update_time, account_name, fans, read_avg, like_avg, status, account_type,
|
|
|
+ account_mode, account_source, account_status, business_type, read_rate_avg, read_avg_ci_upper)
|
|
|
values
|
|
|
- (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
+ (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
"""
|
|
|
try:
|
|
|
self.piaoquan_crawler_db_client.save(
|
|
@@ -119,13 +147,14 @@ class UpdateAccountInfoVersion3(object):
|
|
|
account['account_source'],
|
|
|
account['status'],
|
|
|
business_type,
|
|
|
- read_rate_avg
|
|
|
+ read_rate_avg,
|
|
|
+ read_avg_ci_upper
|
|
|
)
|
|
|
)
|
|
|
except Exception as e:
|
|
|
update_sql = f"""
|
|
|
update {read_avg_table}
|
|
|
- set fans = %s, read_avg = %s, read_rate_avg = %s
|
|
|
+ set fans = %s, read_avg = %s, read_rate_avg = %s, read_avg_ci_upper = %s
|
|
|
where gh_id = %s and position = %s and update_time = %s
|
|
|
"""
|
|
|
try:
|
|
@@ -135,6 +164,7 @@ class UpdateAccountInfoVersion3(object):
|
|
|
fans,
|
|
|
read_avg,
|
|
|
read_rate_avg,
|
|
|
+ read_avg_ci_upper,
|
|
|
account['gh_id'],
|
|
|
index,
|
|
|
dt
|