Browse Source

更新账号均值代码上线

罗俊辉 9 tháng trước cách đây
mục cha
commit
0033ac9f85
1 tập tin đã thay đổi với 118 bổ sung36 xóa
  1. 118 36
      updateAccountAvgDaily.py

+ 118 - 36
updateAccountAvgDaily.py

@@ -1,15 +1,27 @@
 """
 @author: luojunhui
+CREATE TABLE `account_avg_info_v2` (
+  `gh_id` varchar(32) NOT NULL COMMENT 'ghid',
+  `position` int(11) NOT NULL COMMENT '位置',
+  `account_name` varchar(255) DEFAULT NULL COMMENT '账号名称',
+  `fans` int(10) DEFAULT NULL COMMENT '粉丝量',
+  `read_avg` double(8,2) DEFAULT NULL COMMENT '阅读均值',
+  `like_avg` double(8,2) DEFAULT NULL COMMENT '点赞均值',
+  `update_time` datetime DEFAULT NULL COMMENT '更新时间 dt',
+  `status` int(1) DEFAULT NULL COMMENT ' 状态',
+  PRIMARY KEY (`gh_id`,`position`)
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 AVG_ROW_LENGTH=202 ROW_FORMAT=DYNAMIC
 """
 
 import json
-import threading
+import time
+import schedule
 from datetime import datetime
 
 from pandas import DataFrame
 from tqdm import tqdm
 
-from applications import PQMySQL
+from applications import PQMySQL, DeNetMysql, Functions
 
 
 def filter_outlier_data(group, key='show_view_count'):
@@ -29,16 +41,46 @@ def filter_outlier_data(group, key='show_view_count'):
     return filtered_group
 
 
-class UpdateMsgDaily(object):
+class UpdateAvgDaily(object):
     """
     日常更新文章
     """
 
-    db_client = PQMySQL()
-    with open("config/accountInfoV2.json", encoding="utf-8") as f:
-        account_list = json.loads(f.read())
-    # subscription_account = [i for i in account_list if i['type'] == '订阅号']
-    # server_account = [i for i in account_list if i['type'] == '服务号']
+    pqClient = PQMySQL()
+    deNetClient = DeNetMysql()
+
+    @classmethod
+    def getAccountList(cls):
+        """
+        获取账号 list
+        :return:
+        """
+        sql = f"""
+        SELECT t1.`name`,t1.gh_id, t1.follower_count
+        FROM `publish_account` t1
+        JOIN wx_statistics_group_source_account t2
+        ON t1.id = t2.account_id
+        UNION
+        SELECT t1.`name`, t1.gh_id, t1.follower_count
+        FROM `publish_account` t1
+        where t1.`name` in (
+                            '晚年家人',
+                            '历史长河流淌',
+                            '史趣探秘',
+                            '暖心一隅',
+                            '小阳看天下',
+                            '小惠爱厨房');
+        """
+        response = cls.deNetClient.select(sql)
+        L = []
+        for item in response:
+            temp = {
+                "accountName": item[0],
+                "ghId": item[1],
+                "fans": item[2]
+            }
+            L.append(temp)
+        return L
 
     @classmethod
     def getAccountIdDict(cls):
@@ -52,6 +94,33 @@ class UpdateMsgDaily(object):
             gh_id_dict[gh_id] = line
         return gh_id_dict
 
+    @classmethod
+    def insertIntoMysql(cls, data):
+        """
+        将数据插入到 Mysql 中
+        :param data:
+        :return:
+        """
+        sql = f"""
+        INSERT INTO account_avg_info_v2
+        (gh_id, position, account_name, fans, read_avg, like_avg, update_time, status)
+        values 
+        (%s, %s, %s, %s, %s, %s, %s, %s);
+        """
+        cls.pqClient.update(
+            sql=sql,
+            params=(
+                data['gh_id'],
+                data['position'],
+                data['account_name'],
+                data['fans'],
+                data['avg_read'],
+                data['avg_like'],
+                data['update_time'],
+                1
+            )
+        )
+
     @classmethod
     def getAllAvgRead(cls):
         """
@@ -59,31 +128,43 @@ class UpdateMsgDaily(object):
         :return:
         """
         L = []
-        record_list = cls.account_list
+        record_list = cls.getAccountList()
+        dt_str = datetime.today().__str__().split(" ")[0]
         for item in tqdm(record_list):
             index_list = [i for i in range(1, 9)]
             for index in index_list:
-                account_name = item['name']
-                print(account_name, index)
-                avg_read, avg_like = cls.getArticleByFilter(
-                    account_name=account_name,
-                    index=index,
-                    min_time=1716480000,
-                    max_time=1721836800
-                )
-                obj = {
-                    "account_name": account_name,
-                    "gh_id": item['ghId'],
-                    "fans": item.get('follower_count', 0),
-                    "position": index,
-                    "avg_read": avg_read,
-                    "avg_like": avg_like
-                }
-                L.append(obj)
-
-        with open("new_account_avg_v2.json", "w", encoding="utf-8") as f:
+                try:
+                    account_name = item['accountName']
+                    avg_read, avg_like = cls.getArticleByFilter(
+                        account_name=account_name,
+                        index=index,
+                        min_time=int(time.time()) - 31 * 24 * 3600,
+                        max_time=int(time.time())
+                    )
+                    obj = {
+                        "account_name": account_name,
+                        "gh_id": item['ghId'],
+                        "fans": item.get('fans', 0),
+                        "position": index,
+                        "avg_read": avg_read if str(avg_read) != "nan" else 0,
+                        "avg_like": avg_like if str(avg_like) != "nan" else 0,
+                        "update_time": dt_str
+                    }
+                    cls.insertIntoMysql(obj)
+                    L.append(obj)
+                except Exception as e:
+                    print(e)
+
+        with open("new_account_avg_v3.json", "w", encoding="utf-8") as f:
             f.write(json.dumps(L, ensure_ascii=False, indent=4))
 
+        update_sql = f"""
+        UPDATE account_avg_info_v2
+        SET status = %s
+        where update_time != '{dt_str}';
+        """
+        cls.pqClient.update(sql=update_sql, params=0)
+
     @classmethod
     def getEachAvgRead(cls, account_name, index):
         """
@@ -103,7 +184,7 @@ class UpdateMsgDaily(object):
                     SELECT {", ".join(keys)}
                     FROM official_articles_v2
                     WHERE accountName = '{account_name}' and ItemIndex = {index};"""
-        result = cls.db_client.select(sql=sql)
+        result = cls.pqClient.select(sql=sql)
         return DataFrame(result, columns=keys)
 
     @classmethod
@@ -141,16 +222,17 @@ class UpdateMsgDaily(object):
         return finalDF['show_view_count'].mean(), finalDF['show_like_count'].mean()
 
 
-def job_with_thread(job_func):
+def updateAvgJob():
     """
-    每个任务放到单个线程中
-    :param job_func:
     :return:
     """
-    job_thread = threading.Thread(target=job_func)
-    job_thread.start()
+    S = UpdateAvgDaily()
+    S.getAllAvgRead()
 
 
 if __name__ == "__main__":
-    UMD = UpdateMsgDaily()
-    UMD.getAllAvgRead()
+    schedule.every().day.at("22:30").do(Functions().job_with_thread, updateAvgJob)
+
+    while True:
+        schedule.run_pending()
+        time.sleep(1)