| 
					
				 | 
			
			
				@@ -0,0 +1,742 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+@author: luojunhui 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+@desc: 更新文章的阅读详情 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import json 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import traceback 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import urllib.parse 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from datetime import datetime 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from typing import Dict, List 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from pymysql.cursors import DictCursor 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from tqdm import tqdm 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from applications import aiditApi 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from applications import bot 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from applications import create_feishu_columns_sheet 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from applications import Functions 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from applications import log 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from applications import WeixinSpider 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from applications.const import updatePublishedMsgTaskConst 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from applications.db import DatabaseConnector 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from config import denet_config, long_articles_config, piaoquan_crawler_config 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ARTICLE_TABLE = "official_articles" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+const = updatePublishedMsgTaskConst() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+spider = WeixinSpider() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+functions = Functions() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+empty_dict = {} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def generate_bot_columns(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    生成列 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    :return: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    columns = [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="name", display_name="公众号名称"), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="ghId", display_name="ghId"), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        create_feishu_columns_sheet(sheet_type="number", sheet_name="follower_count", display_name="粉丝数"), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        create_feishu_columns_sheet(sheet_type="date", sheet_name="account_init_timestamp", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    display_name="账号接入系统时间"), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="using_status", display_name="利用状态") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return columns 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+class UpdatePublishedArticlesReadDetail(object): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    更新每日发布文章的阅读详情 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def __init__(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.aigc_db_client = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.piaoquan_crawler_db_client = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.long_articles_db_client = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_account_list(self) -> List[Dict]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        从 aigc 数据库中获取目前处于发布状态的账号 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "name": line[0], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "ghId": line[1], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "follower_count": line[2], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "account_init_time": int(line[3] / 1000), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "account_type": line[4], # 订阅号 or 服务号 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        "account_auth": line[5] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        def get_account_status() -> Dict: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            获取账号的实验状态 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            :return: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            sql = f"""   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                SELECT t1.account_id, t2.status 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                FROM wx_statistics_group_source_account t1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                JOIN wx_statistics_group_source t2 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ON t1.group_source_name = t2.account_source_name; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            account_status_list = self.aigc_db_client.fetch(sql, cursor_type=DictCursor) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            account_status = {account['account_id']: account['status'] for account in account_status_list} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return account_status 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        account_list_with_out_using_status = aiditApi.get_publish_account_from_aigc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        account_status_dict = get_account_status() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        account_list = [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                **item, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                'using_status': 0 if account_status_dict.get(item['account_id']) == '实验' else 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for item in account_list_with_out_using_status 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return account_list 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_article_info_by_trace_id(self, trace_id: str) -> Dict: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        通过trace_id来查询文章信息 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        select_sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            SELECT t1.gh_id, t1.account_name, t2.article_title 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            FROM long_articles_match_videos t1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            JOIN long_articles_text t2 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ON t1.content_id = t2.content_id 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            WHERE t1.trace_id = '{trace_id}'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        article_info = self.long_articles_db_client.fetch(select_sql, cursor_type=DictCursor) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if article_info: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return article_info[0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return empty_dict 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def init_database(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        初始化数据库连接 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 初始化数据库连接 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.piaoquan_crawler_db_client = DatabaseConnector(piaoquan_crawler_config) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.piaoquan_crawler_db_client.connect() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.aigc_db_client = DatabaseConnector(denet_config) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.aigc_db_client.connect() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.long_articles_db_client = DatabaseConnector(long_articles_config) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.long_articles_db_client.connect() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            error_msg = traceback.format_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bot( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                title="更新文章任务连接数据库失败", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                detail={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "error": e, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "msg": error_msg 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def insert_each_msg(self, account_info: Dict, msg_list: List[Dict]) -> None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        把消息数据更新到数据库中 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param account_info: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param msg_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        gh_id = account_info['ghId'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        account_name = account_info['name'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for info in msg_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            baseInfo = info.get("BaseInfo", {}) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            appMsgId = info.get("AppMsg", {}).get("BaseInfo", {}).get("AppMsgId", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            createTime = info.get("AppMsg", {}).get("BaseInfo", {}).get("CreateTime", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            updateTime = info.get("AppMsg", {}).get("BaseInfo", {}).get("UpdateTime", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            Type = info.get("AppMsg", {}).get("BaseInfo", {}).get("Type", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            detail_article_list = info.get("AppMsg", {}).get("DetailInfo", []) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if detail_article_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                for article in detail_article_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    title = article.get("Title", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    Digest = article.get("Digest", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    ItemIndex = article.get("ItemIndex", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    ContentUrl = article.get("ContentUrl", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    SourceUrl = article.get("SourceUrl", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    CoverImgUrl = article.get("CoverImgUrl", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    CoverImgUrl_1_1 = article.get("CoverImgUrl_1_1", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    CoverImgUrl_235_1 = article.get("CoverImgUrl_235_1", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    ItemShowType = article.get("ItemShowType", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    IsOriginal = article.get("IsOriginal", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    ShowDesc = article.get("ShowDesc", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    show_stat = functions.show_desc_to_sta(ShowDesc) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    ori_content = article.get("ori_content", None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    show_view_count = show_stat.get("show_view_count", 0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    show_like_count = show_stat.get("show_like_count", 0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    show_zs_count = show_stat.get("show_zs_count", 0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    show_pay_count = show_stat.get("show_pay_count", 0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    wx_sn = ContentUrl.split("&sn=")[1].split("&")[0] if ContentUrl else None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    status = account_info['using_status'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    info_tuple = ( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        gh_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        account_name, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        appMsgId, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        title, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        Type, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        createTime, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        updateTime, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        Digest, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        ItemIndex, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        ContentUrl, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        SourceUrl, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        CoverImgUrl, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        CoverImgUrl_1_1, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        CoverImgUrl_235_1, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        ItemShowType, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        IsOriginal, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        ShowDesc, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        ori_content, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        show_view_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        show_like_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        show_zs_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        show_pay_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        wx_sn, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        json.dumps(baseInfo, ensure_ascii=False), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        functions.str_to_md5(title), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        status 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    self.insert_each_article( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        info_tuple=info_tuple, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        show_view_count=show_view_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        show_like_count=show_like_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        wx_sn=wx_sn 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def insert_each_article(self, info_tuple, show_view_count, show_like_count, wx_sn): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        插入每一篇文章 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            insert_sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    INSERT INTO {ARTICLE_TABLE} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    (ghId, accountName, appMsgId, title, Type, createTime, updateTime, Digest, ItemIndex, ContentUrl, SourceUrl, CoverImgUrl, CoverImgUrl_1_1, CoverImgUrl_255_1, ItemShowType, IsOriginal, ShowDesc, ori_content, show_view_count, show_like_count, show_zs_count, show_pay_count, wx_sn, baseInfo, title_md5, status) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    values 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.piaoquan_crawler_db_client.save(query=insert_sql, params=info_tuple) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                task="updatePublishedMsgDaily", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                function="insert_each_msg", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                message="插入文章数据成功", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "info": info_tuple 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                update_sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    UPDATE {ARTICLE_TABLE} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    SET show_view_count = %s, show_like_count=%s 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    WHERE wx_sn = %s; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.piaoquan_crawler_db_client.save(query=update_sql, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                                     params=(show_view_count, show_like_count, wx_sn)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    task="updatePublishedMsgDaily", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    function="insert_each_msg", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    message="更新文章数据成功", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    data={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        "wxSn": wx_sn, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        "likeCount": show_like_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        "viewCount": show_view_count 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    task="updatePublishedMsgDaily", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    function="insert_each_msg", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    message="更新文章失败, 报错原因是: {}".format(e), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    status="fail" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def update_account_by_spider(self, account_info: Dict, cursor=None): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        更新每一个账号信息 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param account_info: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param cursor: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        gh_id = account_info['ghId'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        latest_update_time = self.get_account_info(gh_id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        response = spider.update_msg_list(ghId=gh_id, index=cursor) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if not response: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                task="updatePublishedMsgDaily", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                function="update_account_by_spider", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                status="fail", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                message="账号更新请求爬虫接口失败", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data=account_info 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        msg_list = response.get("data", {}).get("data", []) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if msg_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # do 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            last_article_in_this_msg = msg_list[-1] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            last_time_stamp_in_this_msg = last_article_in_this_msg['AppMsg']['BaseInfo']['UpdateTime'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            last_url = last_article_in_this_msg['AppMsg']['DetailInfo'][0]['ContentUrl'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            resdata = spider.get_account_by_url(last_url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            check_id = resdata['data'].get('data', {}).get('wx_gh') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if check_id == gh_id: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.insert_each_msg( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    account_info=account_info, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    msg_list=msg_list 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # if last_time_stamp_in_this_msg > latest_update_time: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                #     next_cursor = response['data']['next_cursor'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                #     return self.update_account_by_spider( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                #         account_info=account_info, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                #         cursor=next_cursor 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                #     ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    task="updatePublishedMsgDaily", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    function="update_each_account", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    message="账号文章更新成功", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    data=response 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                task="updatePublishedMsgDaily", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                function="update_each_account", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                message="账号文章更新失败", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                status="fail", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data=response 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def update_account_by_aigc(self, account_info: Dict, run_date: str): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        更新单个账号的文章 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        gh_id = account_info['ghId'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        select_sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            SELECT trace_id, wx_sn, published_url, publish_timestamp, root_source_id_list, create_timestamp 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            FROM long_articles_published_trace_id 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            WHERE gh_id = '{gh_id}' AND publish_timestamp > UNIX_TIMESTAMP(DATE_SUB('{run_date}', INTERVAL 3 DAY)) AND delete_status = 0; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        result = self.long_articles_db_client.fetch(select_sql, cursor_type=DictCursor) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for article in result: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            trace_id = article['trace_id'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            wx_sn = article['wx_sn'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            published_url = article['published_url'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            publish_timestamp = article['publish_timestamp'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            article_info = spider.get_article_text(content_link=published_url, is_cache=False, is_count=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            response_code = article_info['code'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            match response_code: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                case const.ARTICLE_SUCCESS_CODE: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    response_data = article_info['data']['data'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    title = response_data['title'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    article_url = response_data['content_link'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    show_view_count = response_data['view_count'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    show_like_count = response_data['like_count'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    show_zs_count = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    show_pay_count = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    wx_sn = article_url.split("&sn=")[1].split("&")[0] if article_url else None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    app_msg_id = article_url.split("&mid=")[1].split("&")[0] if article_url else None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    status = account_info['using_status'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    info_tuple = ( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        gh_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        account_info['name'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        app_msg_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        title, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        "9", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        article['create_timestamp'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        response_data['update_timestamp'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        response_data['item_index'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        response_data['content_link'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        response_data.get("is_original", None), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        show_view_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        show_like_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        show_zs_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        show_pay_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        wx_sn, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        functions.str_to_md5(title), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        status 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    self.insert_each_article( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        info_tuple=info_tuple, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        show_view_count=show_view_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        show_like_count=show_like_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        wx_sn=wx_sn 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                case const.ARTICLE_DELETE_CODE: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    log( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        task="updatePublishedMsgDaily", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        function="update_account_by_aigc", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        message="文章被删除", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        data={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            "ghId": gh_id, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            "publishedUrl": published_url 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                case const.ARTICLE_ILLEGAL_CODE: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    article_detail = self.get_article_info_by_trace_id(trace_id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    if article_detail: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        error_detail = article_info.get("msg") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        insert_sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                INSERT IGNORE INTO illegal_articles  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                (gh_id, account_name, title, wx_sn, publish_date, illegal_reason) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                VALUES  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                (%s, %s, %s, %s, %s, %s); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        affected_rows = self.long_articles_db_client.save( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            query=insert_sql, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            params=( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                article_info['gh_id'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                article_info['account_name'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                article_info['article_title'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                wx_sn, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                functions.timestamp_to_str(publish_timestamp), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                error_detail 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        if affected_rows: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            bot( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                title="文章违规告警(new task)", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                detail={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    "account_name": article_info['account_name'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    "gh_id": article_info['gh_id'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    "title": article_info['article_title'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    "wx_sn": wx_sn, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    "publish_date": functions.timestamp_to_str(publish_timestamp), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    "error_detail": error_detail, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                mention=False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            aiditApi.delete_articles( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                gh_id=article_info['gh_id'], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                title=article_info['article_title'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_account_info(self, gh_id: str) -> int: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        通过 gh_id查询账号信息的最新发布时间 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param gh_id: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            SELECT MAX(publish_timestamp) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            FROM {ARTICLE_TABLE} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            WHERE ghId = '{gh_id}'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        result = self.piaoquan_crawler_db_client.fetch(sql) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if result: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return result[0][0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # 新号,抓取周期定位抓取时刻往前推30天 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return int(time.time()) - const.NEW_ACCOUNT_CRAWL_PERIOD 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def check_single_account(self, account_item: Dict) -> bool: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        校验每个账号是否更新 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param account_item: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: True / False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        gh_id = account_item['ghId'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        account_type = account_item['account_type'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        today_str = datetime.today().strftime("%Y-%m-%d") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        today_date_time = datetime.strptime(today_str, "%Y-%m-%d") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        today_timestamp = today_date_time.timestamp() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                SELECT max(updateTime) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                FROM {ARTICLE_TABLE} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                WHERE ghId = '{gh_id}'; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            latest_update_time = self.piaoquan_crawler_db_client.fetch(sql)[0][0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # 判断该账号当天发布的文章是否被收集 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if account_type in const.SUBSCRIBE_TYPE_SET: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if int(latest_update_time) > int(today_timestamp): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    return True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    return False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if int(latest_update_time) > int(today_timestamp) - 7 * 24 * 3600: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    return True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    return False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print(e) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def process_single_account(self, account_info: Dict, run_date: str): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        处理单个账号 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        gh_id = account_info['ghId'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 判断该账号当天是否有自动群发且没有无限流发表 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        select_sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            SELECT push_type 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            FROM long_articles_published_trace_id 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            WHERE gh_id = '{gh_id}' AND publish_timestamp > UNIX_TIMESTAMP('{run_date}'); 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        response = self.long_articles_db_client.fetch(select_sql, cursor_type=DictCursor) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        UNLIMITED_PUSH = 3 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if response: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            unlimited_push_list = [item for item in response if item['push_type'] == UNLIMITED_PUSH] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if unlimited_push_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.update_account_by_spider(account_info=account_info) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print("By AIGC", account_info) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.update_account_by_aigc(account_info=account_info, run_date=run_date) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self.update_account_by_spider(account_info=account_info) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def update_publish_timestamp(self, article_info: Dict): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        更新发布时间戳 && minigram 信息 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :param article_info: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        url = article_info['ContentUrl'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        wx_sn = article_info['wx_sn'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            response = spider.get_article_text(url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            response_code = response['code'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if response_code == const.ARTICLE_DELETE_CODE: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                publish_timestamp_s = const.DELETE_STATUS 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                root_source_id_list = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            elif response_code == const.ARTICLE_ILLEGAL_CODE: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                publish_timestamp_s = const.ILLEGAL_STATUS 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                root_source_id_list = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            elif response_code == const.ARTICLE_SUCCESS_CODE: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data = response['data']['data'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                publish_timestamp_ms = data['publish_timestamp'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                publish_timestamp_s = int(publish_timestamp_ms / 1000) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                mini_program = data.get('mini_program', []) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if mini_program: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    root_source_id_list = [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        urllib.parse.parse_qs( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            urllib.parse.unquote(i['path']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        )['rootSourceId'][0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        for i in mini_program 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    root_source_id_list = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                publish_timestamp_s = const.UNKNOWN_STATUS 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                root_source_id_list = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            publish_timestamp_s = const.REQUEST_FAIL_STATUS 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            root_source_id_list = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            error_msg = traceback.format_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            print(e, error_msg) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        update_sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                UPDATE {ARTICLE_TABLE} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                SET publish_timestamp = %s, root_source_id_list = %s 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                WHERE wx_sn = %s; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.piaoquan_crawler_db_client.save( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            query=update_sql, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            params=( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                publish_timestamp_s, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                json.dumps(root_source_id_list, ensure_ascii=False), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                wx_sn 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            )) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if publish_timestamp_s == const.REQUEST_FAIL_STATUS: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return article_info 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def update_job(self, biz_date: str = None): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        执行更新任务 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        account_list = self.get_account_list() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if not biz_date: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            biz_date = datetime.today().strftime('%Y-%m-%d') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 处理订阅号 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        subscription_accounts = [i for i in account_list if i['account_type'] in const.SUBSCRIBE_TYPE_SET] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        success_count = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        fail_count = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for account in tqdm(subscription_accounts[:10]): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.process_single_account(account_info=account, run_date=biz_date) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                success_count += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                time.sleep(3) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                fail_count += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    task="updatePublishedMsgDaily", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    function="update_job", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    message="单个账号文章更新失败, 报错信息是: {}".format(e), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    status="fail", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            task="updatePublishedMsgDaily", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            function="update_job", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            message="订阅号更新完成", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            data={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "success": success_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "fail": fail_count 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if fail_count / (success_count + fail_count) > const.SUBSCRIBE_FAIL_RATE_THRESHOLD: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bot( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                title="订阅号超过 {}% 的账号更新失败".format(int(const.SUBSCRIBE_FAIL_RATE_THRESHOLD * 100)), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                detail={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "success": success_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "fail": fail_count, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "failRate": fail_count / (success_count + fail_count) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        bot( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            title="更新每日发布文章任务完成通知(new)", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            detail={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "msg": "订阅号更新完成", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "finish_time": datetime.today().__str__() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            mention=False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 服务号 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        server_accounts = [i for i in account_list if i['account_type'] == const.SERVICE_TYPE] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for account in tqdm(server_accounts): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.process_single_account(account_info=account, run_date=biz_date) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                time.sleep(1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print(e) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        bot( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            title="更新每日发布文章任务完成通知(new)", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            detail={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "msg": "服务号更新完成", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "finish_time": datetime.today().__str__() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            mention=False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def check_job(self, biz_date: str = None): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        执行检查任务,check each account 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if not biz_date: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            biz_date = datetime.today().strftime('%Y-%m-%d') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        account_list = self.get_account_list() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        subscription_accounts = [i for i in account_list if i['account_type'] in const.SUBSCRIBE_TYPE_SET] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        fail_list = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # check and rework if fail 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for sub_item in tqdm(subscription_accounts[:10]): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            res = self.check_single_account(sub_item) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if not res: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.process_single_account(sub_item, biz_date) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # check whether success and bot if fails 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for sub_item in tqdm(subscription_accounts[:10]): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            res = self.check_single_account(sub_item) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if not res: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # 去掉三个不需要查看的字段 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                sub_item.pop('account_type', None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                sub_item.pop('account_auth', None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                sub_item.pop('account_id', None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                fail_list.append(sub_item) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if fail_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                bot( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    title="更新当天发布文章,存在未更新的账号(new)", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    detail={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        "columns": generate_bot_columns(), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        "rows": fail_list 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    table=True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print("Timeout Error: {}".format(e)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bot( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                title="更新当天发布文章,所有账号均更新成功(new)", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                mention=False, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                detail={ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "msg": "校验任务完成", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "finish_time": datetime.today().__str__() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def get_article_detail_job(self): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        获取发布文章详情 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        :return: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        select_sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            SELECT ContentUrl, wx_sn  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            FROM {ARTICLE_TABLE} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            WHERE publish_timestamp in {(const.DEFAULT_STATUS, const.REQUEST_FAIL_STATUS)}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        article_list = self.piaoquan_crawler_db_client.fetch(select_sql, cursor_type=DictCursor) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for article in tqdm(article_list): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                self.update_publish_timestamp(article) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print(e) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                error_msg = traceback.format_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                print(error_msg) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # check 一遍存在请求失败-1 && 0 的文章 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        select_sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    SELECT ContentUrl, wx_sn  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    FROM {ARTICLE_TABLE} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    WHERE publish_timestamp in {(const.DEFAULT_STATUS, const.REQUEST_FAIL_STATUS)}; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        process_failed_articles = self.piaoquan_crawler_db_client.fetch(select_sql, cursor_type=DictCursor) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        fail_list = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if process_failed_articles: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for article in tqdm(process_failed_articles): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    res = self.update_publish_timestamp(article) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    fail_list.append(res) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    print(e) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    error_msg = traceback.format_exc() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    print(error_msg) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 通过msgId 来修改publish_timestamp 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        update_sql = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            UPDATE {ARTICLE_TABLE} oav  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            JOIN ( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                SELECT ghId, appMsgId, MAX(publish_timestamp) AS publish_timestamp  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                FROM {ARTICLE_TABLE}  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                WHERE publish_timestamp > %s  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                GROUP BY ghId, appMsgId 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ) vv 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                ON oav.appMsgId = vv.appMsgId and oav.ghId = vv.ghId 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            SET oav.publish_timestamp = vv.publish_timestamp 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            WHERE oav.publish_timestamp <= %s; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.piaoquan_crawler_db_client.save( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            query=update_sql, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            params=(0, 0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 若还是无 publish_timestamp,用update_time当作 publish_timestamp 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        update_sql_2 = f""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            UPDATE {ARTICLE_TABLE} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            SET publish_timestamp = updateTime 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            WHERE publish_timestamp < %s; 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.piaoquan_crawler_db_client.save( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            query=update_sql_2, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            params=0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if fail_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            bot( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                title="更新文章任务,请求detail失败", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                detail=fail_list 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ) 
			 |