|
@@ -1,15 +1,27 @@
|
|
|
"""
|
|
|
@author: luojunhui
|
|
|
+CREATE TABLE `account_avg_info_v2` (
|
|
|
+ `gh_id` varchar(32) NOT NULL COMMENT 'ghid',
|
|
|
+ `position` int(11) NOT NULL COMMENT '位置',
|
|
|
+ `account_name` varchar(255) DEFAULT NULL COMMENT '账号名称',
|
|
|
+ `fans` int(10) DEFAULT NULL COMMENT '粉丝量',
|
|
|
+ `read_avg` double(8,2) DEFAULT NULL COMMENT '阅读均值',
|
|
|
+ `like_avg` double(8,2) DEFAULT NULL COMMENT '点赞均值',
|
|
|
+ `update_time` datetime DEFAULT NULL COMMENT '更新时间 dt',
|
|
|
+ `status` int(1) DEFAULT NULL COMMENT ' 状态',
|
|
|
+ PRIMARY KEY (`gh_id`,`position`)
|
|
|
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 AVG_ROW_LENGTH=202 ROW_FORMAT=DYNAMIC
|
|
|
"""
|
|
|
|
|
|
import json
|
|
|
-import threading
|
|
|
+import time
|
|
|
+import schedule
|
|
|
from datetime import datetime
|
|
|
|
|
|
from pandas import DataFrame
|
|
|
from tqdm import tqdm
|
|
|
|
|
|
-from applications import PQMySQL
|
|
|
+from applications import PQMySQL, DeNetMysql, Functions
|
|
|
|
|
|
|
|
|
def filter_outlier_data(group, key='show_view_count'):
|
|
@@ -29,16 +41,46 @@ def filter_outlier_data(group, key='show_view_count'):
|
|
|
return filtered_group
|
|
|
|
|
|
|
|
|
-class UpdateMsgDaily(object):
|
|
|
+class UpdateAvgDaily(object):
|
|
|
"""
|
|
|
日常更新文章
|
|
|
"""
|
|
|
|
|
|
- db_client = PQMySQL()
|
|
|
- with open("config/accountInfoV2.json", encoding="utf-8") as f:
|
|
|
- account_list = json.loads(f.read())
|
|
|
- # subscription_account = [i for i in account_list if i['type'] == '订阅号']
|
|
|
- # server_account = [i for i in account_list if i['type'] == '服务号']
|
|
|
+ pqClient = PQMySQL()
|
|
|
+ deNetClient = DeNetMysql()
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def getAccountList(cls):
|
|
|
+ """
|
|
|
+ 获取账号 list
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ sql = f"""
|
|
|
+ SELECT t1.`name`,t1.gh_id, t1.follower_count
|
|
|
+ FROM `publish_account` t1
|
|
|
+ JOIN wx_statistics_group_source_account t2
|
|
|
+ ON t1.id = t2.account_id
|
|
|
+ UNION
|
|
|
+ SELECT t1.`name`, t1.gh_id, t1.follower_count
|
|
|
+ FROM `publish_account` t1
|
|
|
+ where t1.`name` in (
|
|
|
+ '晚年家人',
|
|
|
+ '历史长河流淌',
|
|
|
+ '史趣探秘',
|
|
|
+ '暖心一隅',
|
|
|
+ '小阳看天下',
|
|
|
+ '小惠爱厨房');
|
|
|
+ """
|
|
|
+ response = cls.deNetClient.select(sql)
|
|
|
+ L = []
|
|
|
+ for item in response:
|
|
|
+ temp = {
|
|
|
+ "accountName": item[0],
|
|
|
+ "ghId": item[1],
|
|
|
+ "fans": item[2]
|
|
|
+ }
|
|
|
+ L.append(temp)
|
|
|
+ return L
|
|
|
|
|
|
@classmethod
|
|
|
def getAccountIdDict(cls):
|
|
@@ -52,6 +94,33 @@ class UpdateMsgDaily(object):
|
|
|
gh_id_dict[gh_id] = line
|
|
|
return gh_id_dict
|
|
|
|
|
|
+ @classmethod
|
|
|
+ def insertIntoMysql(cls, data):
|
|
|
+ """
|
|
|
+ 将数据插入到 Mysql 中
|
|
|
+ :param data:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ sql = f"""
|
|
|
+ INSERT INTO account_avg_info_v2
|
|
|
+ (gh_id, position, account_name, fans, read_avg, like_avg, update_time, status)
|
|
|
+ values
|
|
|
+ (%s, %s, %s, %s, %s, %s, %s, %s);
|
|
|
+ """
|
|
|
+ cls.pqClient.update(
|
|
|
+ sql=sql,
|
|
|
+ params=(
|
|
|
+ data['gh_id'],
|
|
|
+ data['position'],
|
|
|
+ data['account_name'],
|
|
|
+ data['fans'],
|
|
|
+ data['avg_read'],
|
|
|
+ data['avg_like'],
|
|
|
+ data['update_time'],
|
|
|
+ 1
|
|
|
+ )
|
|
|
+ )
|
|
|
+
|
|
|
@classmethod
|
|
|
def getAllAvgRead(cls):
|
|
|
"""
|
|
@@ -59,31 +128,43 @@ class UpdateMsgDaily(object):
|
|
|
:return:
|
|
|
"""
|
|
|
L = []
|
|
|
- record_list = cls.account_list
|
|
|
+ record_list = cls.getAccountList()
|
|
|
+ dt_str = datetime.today().__str__().split(" ")[0]
|
|
|
for item in tqdm(record_list):
|
|
|
index_list = [i for i in range(1, 9)]
|
|
|
for index in index_list:
|
|
|
- account_name = item['name']
|
|
|
- print(account_name, index)
|
|
|
- avg_read, avg_like = cls.getArticleByFilter(
|
|
|
- account_name=account_name,
|
|
|
- index=index,
|
|
|
- min_time=1716480000,
|
|
|
- max_time=1721836800
|
|
|
- )
|
|
|
- obj = {
|
|
|
- "account_name": account_name,
|
|
|
- "gh_id": item['ghId'],
|
|
|
- "fans": item.get('follower_count', 0),
|
|
|
- "position": index,
|
|
|
- "avg_read": avg_read,
|
|
|
- "avg_like": avg_like
|
|
|
- }
|
|
|
- L.append(obj)
|
|
|
-
|
|
|
- with open("new_account_avg_v2.json", "w", encoding="utf-8") as f:
|
|
|
+ try:
|
|
|
+ account_name = item['accountName']
|
|
|
+ avg_read, avg_like = cls.getArticleByFilter(
|
|
|
+ account_name=account_name,
|
|
|
+ index=index,
|
|
|
+ min_time=int(time.time()) - 31 * 24 * 3600,
|
|
|
+ max_time=int(time.time())
|
|
|
+ )
|
|
|
+ obj = {
|
|
|
+ "account_name": account_name,
|
|
|
+ "gh_id": item['ghId'],
|
|
|
+ "fans": item.get('fans', 0),
|
|
|
+ "position": index,
|
|
|
+ "avg_read": avg_read if str(avg_read) != "nan" else 0,
|
|
|
+ "avg_like": avg_like if str(avg_like) != "nan" else 0,
|
|
|
+ "update_time": dt_str
|
|
|
+ }
|
|
|
+ cls.insertIntoMysql(obj)
|
|
|
+ L.append(obj)
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+
|
|
|
+ with open("new_account_avg_v3.json", "w", encoding="utf-8") as f:
|
|
|
f.write(json.dumps(L, ensure_ascii=False, indent=4))
|
|
|
|
|
|
+ update_sql = f"""
|
|
|
+ UPDATE account_avg_info_v2
|
|
|
+ SET status = %s
|
|
|
+ where update_time != '{dt_str}';
|
|
|
+ """
|
|
|
+ cls.pqClient.update(sql=update_sql, params=0)
|
|
|
+
|
|
|
@classmethod
|
|
|
def getEachAvgRead(cls, account_name, index):
|
|
|
"""
|
|
@@ -103,7 +184,7 @@ class UpdateMsgDaily(object):
|
|
|
SELECT {", ".join(keys)}
|
|
|
FROM official_articles_v2
|
|
|
WHERE accountName = '{account_name}' and ItemIndex = {index};"""
|
|
|
- result = cls.db_client.select(sql=sql)
|
|
|
+ result = cls.pqClient.select(sql=sql)
|
|
|
return DataFrame(result, columns=keys)
|
|
|
|
|
|
@classmethod
|
|
@@ -141,16 +222,17 @@ class UpdateMsgDaily(object):
|
|
|
return finalDF['show_view_count'].mean(), finalDF['show_like_count'].mean()
|
|
|
|
|
|
|
|
|
-def job_with_thread(job_func):
|
|
|
+def updateAvgJob():
|
|
|
"""
|
|
|
- 每个任务放到单个线程中
|
|
|
- :param job_func:
|
|
|
:return:
|
|
|
"""
|
|
|
- job_thread = threading.Thread(target=job_func)
|
|
|
- job_thread.start()
|
|
|
+ S = UpdateAvgDaily()
|
|
|
+ S.getAllAvgRead()
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- UMD = UpdateMsgDaily()
|
|
|
- UMD.getAllAvgRead()
|
|
|
+ schedule.every().day.at("22:30").do(Functions().job_with_thread, updateAvgJob)
|
|
|
+
|
|
|
+ while True:
|
|
|
+ schedule.run_pending()
|
|
|
+ time.sleep(1)
|