浏览代码

article cold start task bug fix

luojunhui 3 月之前
父节点
当前提交
9749fa9ea1
共有 1 个文件被更改,包括 50 次插入1 次删除
  1. 50 1
      tasks/data_tasks/account_position_read_avg_task.py

+ 50 - 1
tasks/data_tasks/account_position_read_avg_task.py

@@ -23,7 +23,7 @@ touliu_accounts = set(json.loads(config.getConfigValue("touliu_gh_id_list")))
 backup_account_fans = json.loads(config.getConfigValue("backup_account_fans"))
 
 
-class AccountPositionReadAvgTask(object):
+class AccountDataTask:
 
     def __init__(self):
         # init piaoquan crawler db client
@@ -38,6 +38,9 @@ class AccountPositionReadAvgTask(object):
         self.denet_db_client = DatabaseConnector(denet_config)
         self.denet_db_client.connect()
 
+
+class AccountPositionReadAvgTask(AccountDataTask):
+
     def fetch_read_rate_avg_for_each_account(self, dt):
         dt = int(dt.replace("-", ""))
         sql = f"""
@@ -198,3 +201,49 @@ class AccountPositionReadAvgTask(object):
             self.cal_read_avg_for_each_account(
                 account, fans_dict, read_rate_avg_dict, dt
             )
+
+
+class AccountOpenRateAvgTask(AccountDataTask):
+    """
+    cal open rate avg for each account
+    """
+    def insert_record_into_database(self, gh_id, date_str, account_name, open_rate_obj):
+        avg_open_rate = open_rate_obj["avg_open_rate"]
+        insert_sql = f"""
+            insert ignore into account_avg_info_v3
+            (gh_id, position, update_time, account_name, open_rate_avg)
+            values (%s, %s, %s, %s, %s);
+        """
+        params_list = [
+            (gh_id, position, date_str, account_name, avg_open_rate)
+            for position in const.ARTICLE_INDEX_LIST
+        ]
+        affected_rows = self.long_articles_db_client.save_many(
+            insert_sql,
+            params_list=params_list
+        )
+        print(affected_rows)
+
+    def get_account_open_rate(self, account_name, gh_id, date_str):
+        date_str_ = date_str.replace("-", "")
+        fetch_query = f"""
+            select 
+                sum(view_count) as 'total_read', 
+                sum(first_level) as 'total_first_level',
+                sum(first_level) / sum(view_count) as 'avg_open_rate'
+            from datastat_sort_strategy
+            where gh_id = '{gh_id}' and date_str between date_sub(str_to_date('{date_str_}', '%Y%m%d'), interval 30 day)
+            and str_to_date('{date_str_}', '%Y%m%d');
+        """
+        res = self.long_articles_db_client.fetch(
+            query=fetch_query, cursor_type=DictCursor
+        )[0]
+        self.insert_record_into_database(gh_id=gh_id, date_str=date_str, open_rate_obj=res, account_name=account_name)
+
+    def deal(self, date_str):
+        account_list = fetch_publishing_account_list(self.denet_db_client)
+        for account in tqdm(account_list):
+            gh_id = account["gh_id"]
+            account_name = account["account_name"]
+            self.get_account_open_rate(account_name, gh_id, date_str)
+