Browse Source

update_published_articles_read_detail
v2

luojunhui 5 tháng trước cách đây
mục cha
commit
8702c3c76f
1 tập tin đã thay đổi với 88 bổ sung57 xóa
  1. 88 57
      tasks/update_published_articles_read_detail.py

+ 88 - 57
tasks/update_published_articles_read_detail.py

@@ -26,6 +26,7 @@ ARTICLE_TABLE = "official_articles"
 const = updatePublishedMsgTaskConst()
 spider = WeixinSpider()
 functions = Functions()
+empty_dict = {}
 
 
 def generate_bot_columns():
@@ -92,6 +93,23 @@ class UpdatePublishedArticlesReadDetail(object):
         ]
         return account_list
 
+    def get_article_info_by_trace_id(self, trace_id: str) -> Dict:
+        """
+        通过trace_id来查询文章信息
+        """
+        select_sql = f"""
+            SELECT t1.gh_id, t1.account_name, t2.article_title
+            FROM long_articles_match_videos t1
+            JOIN long_articles_text t2
+            ON t1.content_id = t2.content_id
+            WHERE t1.trace_id = '{trace_id}';
+        """
+        article_info = self.long_articles_db_client.fetch(select_sql, cursor_type=DictCursor)
+        if article_info:
+            return article_info[0]
+        else:
+            return empty_dict
+
     def init_database(self):
         """
         初始化数据库连接
@@ -294,15 +312,17 @@ class UpdatePublishedArticlesReadDetail(object):
         更新单个账号的文章
         """
         gh_id = account_info['ghId']
-        account_name = account_info['name']
         select_sql = f"""
-            SELECT published_url, publish_timestamp, root_source_id_list, create_timestamp
+            SELECT trace_id, wx_sn, published_url, publish_timestamp, root_source_id_list, create_timestamp
             FROM long_articles_published_trace_id
-            WHERE gh_id = '{gh_id}' AND publish_timestamp > UNIX_TIMESTAMP(DATE_SUB('{run_date}', INTERVAL 3 DAY));
+            WHERE gh_id = '{gh_id}' AND publish_timestamp > UNIX_TIMESTAMP(DATE_SUB('{run_date}', INTERVAL 3 DAY)) AND delete_status = 0;
         """
         result = self.long_articles_db_client.fetch(select_sql, cursor_type=DictCursor)
         for article in result:
+            trace_id = article['trace_id']
+            wx_sn = article['wx_sn']
             published_url = article['published_url']
+            publish_timestamp = article['publish_timestamp']
             article_info = spider.get_article_text(content_link=published_url, is_cache=False, is_count=True)
             response_code = article_info['code']
             match response_code:
@@ -351,6 +371,7 @@ class UpdatePublishedArticlesReadDetail(object):
                         show_like_count=show_like_count,
                         wx_sn=wx_sn
                     )
+
                 case const.ARTICLE_DELETE_CODE:
                     log(
                         task="updatePublishedMsgDaily",
@@ -361,35 +382,46 @@ class UpdatePublishedArticlesReadDetail(object):
                             "publishedUrl": published_url
                         }
                     )
+
                 case const.ARTICLE_ILLEGAL_CODE:
-                    error_detail = article_info.get("msg")
-                    insert_sql = f"""
-                        INSERT IGNORE INTO illegal_articles 
-                        (gh_id, account_name, title, wx_sn, publish_date, illegal_reason)
-                        VALUES 
-                        (%s, %s, %s, %s, %s, %s);
-                    """
-                    affected_rows = self.long_articles_db_client.save(
-                        query=insert_sql,
-                        params=(gh_id, account_name, title, wx_sn, publish_date, error_detail),
-                    )
-                    if affected_rows:
-                        bot(
-                            title="文章违规告警",
-                            detail={
-                                "account_name": account_name,
-                                "gh_id": gh_id,
-                                "title": title,
-                                "wx_sn": wx_sn.decode("utf-8"),
-                                "publish_date": str(publish_date),
-                                "error_detail": error_detail,
-                            },
-                            mention=False
-                        )
-                        aiditApi.delete_articles(
-                            gh_id=gh_id,
-                            title=title
+                    article_detail = self.get_article_info_by_trace_id(trace_id)
+                    if article_detail:
+                        error_detail = article_info.get("msg")
+                        insert_sql = f"""
+                                INSERT IGNORE INTO illegal_articles 
+                                (gh_id, account_name, title, wx_sn, publish_date, illegal_reason)
+                                VALUES 
+                                (%s, %s, %s, %s, %s, %s);
+                            """
+
+                        affected_rows = self.long_articles_db_client.save(
+                            query=insert_sql,
+                            params=(
+                                article_info['gh_id'],
+                                article_info['account_name'],
+                                article_info['article_title'],
+                                wx_sn,
+                                functions.timestamp_to_str(publish_timestamp),
+                                error_detail
+                            )
                         )
+                        if affected_rows:
+                            bot(
+                                title="文章违规告警(new task)",
+                                detail={
+                                    "account_name": article_info['account_name'],
+                                    "gh_id": article_info['gh_id'],
+                                    "title": article_info['article_title'],
+                                    "wx_sn": wx_sn,
+                                    "publish_date": functions.timestamp_to_str(publish_timestamp),
+                                    "error_detail": error_detail,
+                                },
+                                mention=False
+                            )
+                            aiditApi.delete_articles(
+                                gh_id=article_info['gh_id'],
+                                title=article_info['article_title']
+                            )
 
     def get_account_info(self, gh_id: str) -> int:
         """
@@ -539,7 +571,7 @@ class UpdatePublishedArticlesReadDetail(object):
             try:
                 self.process_single_account(account_info=account, run_date=biz_date)
                 success_count += 1
-                time.sleep(1)
+                time.sleep(3)
             except Exception as e:
                 fail_count += 1
                 log(
@@ -566,14 +598,14 @@ class UpdatePublishedArticlesReadDetail(object):
                     "failRate": fail_count / (success_count + fail_count)
                 }
             )
-        # bot(
-        #     title="更新每日发布文章任务完成通知",
-        #     detail={
-        #         "msg": "订阅号更新完成",
-        #         "finish_time": datetime.today().__str__()
-        #     },
-        #     mention=False
-        # )
+        bot(
+            title="更新每日发布文章任务完成通知(new)",
+            detail={
+                "msg": "订阅号更新完成",
+                "finish_time": datetime.today().__str__()
+            },
+            mention=False
+        )
 
         # 服务号
         server_accounts = [i for i in account_list if i['account_type'] == const.SERVICE_TYPE]
@@ -583,14 +615,14 @@ class UpdatePublishedArticlesReadDetail(object):
                 time.sleep(1)
             except Exception as e:
                 print(e)
-        # bot(
-        #     title="更新每日发布文章任务完成通知",
-        #     detail={
-        #         "msg": "服务号更新完成",
-        #         "finish_time": datetime.today().__str__()
-        #     },
-        #     mention=False
-        # )
+        bot(
+            title="更新每日发布文章任务完成通知(new)",
+            detail={
+                "msg": "服务号更新完成",
+                "finish_time": datetime.today().__str__()
+            },
+            mention=False
+        )
 
     def check_job(self, biz_date: str = None):
         """
@@ -619,20 +651,19 @@ class UpdatePublishedArticlesReadDetail(object):
                 fail_list.append(sub_item)
         if fail_list:
             try:
-                print(fail_list)
-                # bot(
-                #     title="更新当天发布文章,存在未更新的账号",
-                #     detail={
-                #         "columns": generate_bot_columns(),
-                #         "rows": fail_list
-                #     },
-                #     table=True
-                # )
+                bot(
+                    title="更新当天发布文章,存在未更新的账号(new)",
+                    detail={
+                        "columns": generate_bot_columns(),
+                        "rows": fail_list
+                    },
+                    table=True
+                )
             except Exception as e:
                 print("Timeout Error: {}".format(e))
         else:
             bot(
-                title="更新当天发布文章,所有账号均更新成功",
+                title="更新当天发布文章,所有账号均更新成功(new)",
                 mention=False,
                 detail={
                     "msg": "校验任务完成",