ソースを参照

优化数据库

luojunhui 6 ヶ月 前
コミット
537531ae59
1 ファイル変更46 行追加30 行削除
  1. 46 30
      updateMinigramInfoDaily.py

+ 46 - 30
updateMinigramInfoDaily.py

@@ -3,7 +3,6 @@
 @description Update Minigram Info Daily
 """
 import time
-import sys
 import traceback
 
 from tqdm import tqdm
@@ -11,7 +10,9 @@ from datetime import datetime, timedelta
 import schedule
 from argparse import ArgumentParser
 
-from applications import longArticlesMySQL, PQMySQL, WeixinSpider, Functions, log, bot
+from applications import WeixinSpider, Functions, log, bot
+from applications.db import DatabaseConnector
+from config import long_articles_config, piaoquan_crawler_config
 
 TASK_NAME = "updateMinigramInfoDaily"
 SPIDER_SUCCESS_STATUS = 0
@@ -30,13 +31,35 @@ class DailyDataManager(object):
     """
     daily 数据每日更新
     """
-    long_articles_db = longArticlesMySQL()
-    pq_db = PQMySQL()
-    wx_spider = WeixinSpider()
-    functions = Functions()
 
-    @classmethod
-    def get_published_articles(cls, biz_date):
+    def __init__(self):
+        self.piaoquan_crawler_db_client = None
+        self.long_articles_db_client = None
+        self.spider = WeixinSpider()
+
+    def init_database(self) -> None:
+        """
+        init database connector
+        :return:
+        """
+        # 初始化数据库连接
+        try:
+            self.piaoquan_crawler_db_client = DatabaseConnector(piaoquan_crawler_config)
+            self.piaoquan_crawler_db_client.connect()
+            self.long_articles_db_client = DatabaseConnector(long_articles_config)
+            self.long_articles_db_client.connect()
+        except Exception as e:
+            error_msg = traceback.format_exc()
+            bot(
+                title="更新小程序裂变信息任务连接数据库失败",
+                detail={
+                    "error": e,
+                    "msg": error_msg
+                }
+            )
+            return
+
+    def get_published_articles(self, biz_date):
         """
         获取已经发布的文章的信息, updateTime 选择为前一天的 0 点并且转化为时间戳
         :return:
@@ -48,11 +71,8 @@ class DailyDataManager(object):
         select ContentUrl, wx_sn, publish_timestamp, accountName, title
         from official_articles_v2
         where publish_timestamp between {biz_date_ts} and {biz_date_end_ts};
---         and accountName in (
---                         select distinct account_name from account_avg_info_v2
---                         );
         """
-        result_list = cls.pq_db.select(sql2)
+        result_list = self.piaoquan_crawler_db_client.fetch(sql2)
         log(
             task=TASK_NAME,
             function="get_published_articles",
@@ -60,8 +80,7 @@ class DailyDataManager(object):
         )
         return result_list
 
-    @classmethod
-    def update_article_info(cls, line):
+    def update_article_info(self, line):
         """
         update info into mysql
         :return:
@@ -69,7 +88,7 @@ class DailyDataManager(object):
         url = line[0]
         update_time = line[2]
         wx_sn = line[1].decode()
-        article_detail = cls.get_root_source_ids(line)
+        article_detail = self.get_root_source_ids(line)
         if article_detail:
             response_code = article_detail['code']
             if response_code == SPIDER_SUCCESS_STATUS:
@@ -109,8 +128,8 @@ class DailyDataManager(object):
                                         values
                                         (%s, %s, %s, %s, %s, %s, %s, %s, %s);
                                     """
-                                cls.pq_db.update(
-                                    sql=insert_sql,
+                                self.piaoquan_crawler_db_client.save(
+                                    query=insert_sql,
                                     params=(
                                         wx_sn,
                                         kimi_title,
@@ -144,15 +163,14 @@ class DailyDataManager(object):
         else:
             return line
 
-    @classmethod
-    def get_root_source_ids(cls, data_info):
+    def get_root_source_ids(self, data_info):
         """
         通过抓取接口获取 data_info
         :return:
         """
         url = data_info[0]
         try:
-            article_detail = cls.wx_spider.get_article_text(url)
+            article_detail = self.spider.get_article_text(url)
             return article_detail
         except Exception as e:
             log(
@@ -166,8 +184,7 @@ class DailyDataManager(object):
             )
             return False
 
-    @classmethod
-    def get_minigram_info(cls, rootSourceId):
+    def get_minigram_info(self, rootSourceId):
         """
 
         :param rootSourceId:
@@ -178,7 +195,7 @@ class DailyDataManager(object):
         from changwen_data_base_v2
         where rootsourceid = '{rootSourceId}';
         """
-        result_list = cls.long_articles_db.select(sql)
+        result_list = self.long_articles_db_client.fetch(sql)
 
         def summarize(values):
             """
@@ -255,8 +272,7 @@ class DailyDataManager(object):
             )
             return None
 
-    @classmethod
-    def update_minigram_detail(cls, biz_date):
+    def update_minigram_detail(self, biz_date):
         """
         :return:
         """
@@ -269,7 +285,7 @@ class DailyDataManager(object):
             from long_articles_detail_info
             where publish_dt between '{datestr_begin}' and '{datestr_end}';
         """
-        source_id_list = cls.pq_db.select(sql)
+        source_id_list = self.piaoquan_crawler_db_client.fetch(sql)
         log(
             task=TASK_NAME,
             function="update_minigram_detail",
@@ -279,7 +295,7 @@ class DailyDataManager(object):
         for item in tqdm(source_id_list):
             s_id = item[0]
             try:
-                result = cls.get_minigram_info(s_id)
+                result = self.get_minigram_info(s_id)
                 for key in result:
                     recall_dt = key
                     first_level = result[key][0]
@@ -293,8 +309,8 @@ class DailyDataManager(object):
                         where root_source_id = %s and recall_dt = %s;
                     """
                     try:
-                        cls.pq_db.update(
-                            sql=update_sql,
+                        self.piaoquan_crawler_db_client.save(
+                            query=update_sql,
                             params=(
                                 first_level, fission_0, fission_1, fission_2, s_id, recall_dt
                             )
@@ -319,7 +335,7 @@ class DailyDataManager(object):
             bot(
                 title="{} fail because of lam db error".format(TASK_NAME),
                 detail={
-                    "fail_count":  fail_count
+                    "fail_count": fail_count
                 }
             )