Pārlūkot izejas kodu

delete unused file

luojunhui 3 mēneši atpakaļ
vecāks
revīzija
d9aa9f2e78
1 mainītis faili ar 0 papildinājumiem un 742 dzēšanām
  1. 0 742
      tasks/update_published_articles_read_detail.py

+ 0 - 742
tasks/update_published_articles_read_detail.py

@@ -1,742 +0,0 @@
-"""
-@author: luojunhui
-@desc: 更新文章的阅读详情
-"""
-import json
-import time
-import traceback
-import urllib.parse
-from datetime import datetime
-from typing import Dict, List
-
-from pymysql.cursors import DictCursor
-from tqdm import tqdm
-
-from applications import aiditApi
-from applications import bot
-from applications import create_feishu_columns_sheet
-from applications import Functions
-from applications import log
-from applications import WeixinSpider
-from applications.const import updatePublishedMsgTaskConst
-from applications.db import DatabaseConnector
-from config import denet_config, long_articles_config, piaoquan_crawler_config
-
-ARTICLE_TABLE = "official_articles"
-const = updatePublishedMsgTaskConst()
-spider = WeixinSpider()
-functions = Functions()
-empty_dict = {}
-
-
-def generate_bot_columns():
-    """
-    生成列
-    :return:
-    """
-    columns = [
-        create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="name", display_name="公众号名称"),
-        create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="ghId", display_name="ghId"),
-        create_feishu_columns_sheet(sheet_type="number", sheet_name="follower_count", display_name="粉丝数"),
-        create_feishu_columns_sheet(sheet_type="date", sheet_name="account_init_timestamp",
-                                    display_name="账号接入系统时间"),
-        create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="using_status", display_name="利用状态")
-    ]
-    return columns
-
-
-class UpdatePublishedArticlesReadDetail(object):
-    """
-    更新每日发布文章的阅读详情
-    """
-
-    def __init__(self):
-        self.aigc_db_client = None
-        self.piaoquan_crawler_db_client = None
-        self.long_articles_db_client = None
-
-    def get_account_list(self) -> List[Dict]:
-        """
-        从 aigc 数据库中获取目前处于发布状态的账号
-        :return:
-        "name": line[0],
-        "ghId": line[1],
-        "follower_count": line[2],
-        "account_init_time": int(line[3] / 1000),
-        "account_type": line[4], # 订阅号 or 服务号
-        "account_auth": line[5]
-        """
-
-        def get_account_status() -> Dict:
-            """
-            获取账号的实验状态
-            :return:
-            """
-            sql = f"""  
-                SELECT t1.account_id, t2.status
-                FROM wx_statistics_group_source_account t1
-                JOIN wx_statistics_group_source t2
-                ON t1.group_source_name = t2.account_source_name;
-            """
-            account_status_list = self.aigc_db_client.fetch(sql, cursor_type=DictCursor)
-            account_status = {account['account_id']: account['status'] for account in account_status_list}
-            return account_status
-
-        account_list_with_out_using_status = aiditApi.get_publish_account_from_aigc()
-        account_status_dict = get_account_status()
-        account_list = [
-            {
-                **item,
-                'using_status': 0 if account_status_dict.get(item['account_id']) == '实验' else 1
-            }
-            for item in account_list_with_out_using_status
-        ]
-        return account_list
-
-    def get_article_info_by_trace_id(self, trace_id: str) -> Dict:
-        """
-        通过trace_id来查询文章信息
-        """
-        select_sql = f"""
-            SELECT t1.gh_id, t1.account_name, t2.article_title
-            FROM long_articles_match_videos t1
-            JOIN long_articles_text t2
-            ON t1.content_id = t2.content_id
-            WHERE t1.trace_id = '{trace_id}';
-        """
-        article_info = self.long_articles_db_client.fetch(select_sql, cursor_type=DictCursor)
-        if article_info:
-            return article_info[0]
-        else:
-            return empty_dict
-
-    def init_database(self):
-        """
-        初始化数据库连接
-        """
-        # 初始化数据库连接
-        try:
-            self.piaoquan_crawler_db_client = DatabaseConnector(piaoquan_crawler_config)
-            self.piaoquan_crawler_db_client.connect()
-            self.aigc_db_client = DatabaseConnector(denet_config)
-            self.aigc_db_client.connect()
-            self.long_articles_db_client = DatabaseConnector(long_articles_config)
-            self.long_articles_db_client.connect()
-        except Exception as e:
-            error_msg = traceback.format_exc()
-            bot(
-                title="更新文章任务连接数据库失败",
-                detail={
-                    "error": e,
-                    "msg": error_msg
-                }
-            )
-            return
-
-    def insert_each_msg(self, account_info: Dict, msg_list: List[Dict]) -> None:
-        """
-        把消息数据更新到数据库中
-        :param account_info:
-        :param msg_list:
-        :return:
-        """
-        gh_id = account_info['ghId']
-        account_name = account_info['name']
-        for info in msg_list:
-            baseInfo = info.get("BaseInfo", {})
-            appMsgId = info.get("AppMsg", {}).get("BaseInfo", {}).get("AppMsgId", None)
-            createTime = info.get("AppMsg", {}).get("BaseInfo", {}).get("CreateTime", None)
-            updateTime = info.get("AppMsg", {}).get("BaseInfo", {}).get("UpdateTime", None)
-            Type = info.get("AppMsg", {}).get("BaseInfo", {}).get("Type", None)
-            detail_article_list = info.get("AppMsg", {}).get("DetailInfo", [])
-            if detail_article_list:
-                for article in detail_article_list:
-                    title = article.get("Title", None)
-                    Digest = article.get("Digest", None)
-                    ItemIndex = article.get("ItemIndex", None)
-                    ContentUrl = article.get("ContentUrl", None)
-                    SourceUrl = article.get("SourceUrl", None)
-                    CoverImgUrl = article.get("CoverImgUrl", None)
-                    CoverImgUrl_1_1 = article.get("CoverImgUrl_1_1", None)
-                    CoverImgUrl_235_1 = article.get("CoverImgUrl_235_1", None)
-                    ItemShowType = article.get("ItemShowType", None)
-                    IsOriginal = article.get("IsOriginal", None)
-                    ShowDesc = article.get("ShowDesc", None)
-                    show_stat = functions.show_desc_to_sta(ShowDesc)
-                    ori_content = article.get("ori_content", None)
-                    show_view_count = show_stat.get("show_view_count", 0)
-                    show_like_count = show_stat.get("show_like_count", 0)
-                    show_zs_count = show_stat.get("show_zs_count", 0)
-                    show_pay_count = show_stat.get("show_pay_count", 0)
-                    wx_sn = ContentUrl.split("&sn=")[1].split("&")[0] if ContentUrl else None
-                    status = account_info['using_status']
-                    info_tuple = (
-                        gh_id,
-                        account_name,
-                        appMsgId,
-                        title,
-                        Type,
-                        createTime,
-                        updateTime,
-                        Digest,
-                        ItemIndex,
-                        ContentUrl,
-                        SourceUrl,
-                        CoverImgUrl,
-                        CoverImgUrl_1_1,
-                        CoverImgUrl_235_1,
-                        ItemShowType,
-                        IsOriginal,
-                        ShowDesc,
-                        ori_content,
-                        show_view_count,
-                        show_like_count,
-                        show_zs_count,
-                        show_pay_count,
-                        wx_sn,
-                        json.dumps(baseInfo, ensure_ascii=False),
-                        functions.str_to_md5(title),
-                        status
-                    )
-                    self.insert_each_article(
-                        info_tuple=info_tuple,
-                        show_view_count=show_view_count,
-                        show_like_count=show_like_count,
-                        wx_sn=wx_sn
-                    )
-
-    def insert_each_article(self, info_tuple, show_view_count, show_like_count, wx_sn):
-        """
-        插入每一篇文章
-        """
-        try:
-            insert_sql = f"""
-                    INSERT INTO {ARTICLE_TABLE}
-                    (ghId, accountName, appMsgId, title, Type, createTime, updateTime, Digest, ItemIndex, ContentUrl, SourceUrl, CoverImgUrl, CoverImgUrl_1_1, CoverImgUrl_255_1, ItemShowType, IsOriginal, ShowDesc, ori_content, show_view_count, show_like_count, show_zs_count, show_pay_count, wx_sn, baseInfo, title_md5, status)
-                    values
-                    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
-            """
-            self.piaoquan_crawler_db_client.save(query=insert_sql, params=info_tuple)
-            log(
-                task="updatePublishedMsgDaily",
-                function="insert_each_msg",
-                message="插入文章数据成功",
-                data={
-                    "info": info_tuple
-                }
-            )
-        except Exception as e:
-            try:
-                update_sql = f"""
-                    UPDATE {ARTICLE_TABLE}
-                    SET show_view_count = %s, show_like_count=%s
-                    WHERE wx_sn = %s;
-                """
-                self.piaoquan_crawler_db_client.save(query=update_sql,
-                                                     params=(show_view_count, show_like_count, wx_sn))
-                log(
-                    task="updatePublishedMsgDaily",
-                    function="insert_each_msg",
-                    message="更新文章数据成功",
-                    data={
-                        "wxSn": wx_sn,
-                        "likeCount": show_like_count,
-                        "viewCount": show_view_count
-                    }
-
-                )
-            except Exception as e:
-                log(
-                    task="updatePublishedMsgDaily",
-                    function="insert_each_msg",
-                    message="更新文章失败, 报错原因是: {}".format(e),
-                    status="fail"
-                )
-
-    def update_account_by_spider(self, account_info: Dict, cursor=None):
-        """
-        更新每一个账号信息
-        :param account_info:
-        :param cursor:
-        :return: None
-        """
-        gh_id = account_info['ghId']
-        latest_update_time = self.get_account_info(gh_id)
-        response = spider.update_msg_list(ghId=gh_id, index=cursor)
-        if not response:
-            log(
-                task="updatePublishedMsgDaily",
-                function="update_account_by_spider",
-                status="fail",
-                message="账号更新请求爬虫接口失败",
-                data=account_info
-            )
-            return
-        msg_list = response.get("data", {}).get("data", [])
-        if msg_list:
-            # do
-            last_article_in_this_msg = msg_list[-1]
-            last_time_stamp_in_this_msg = last_article_in_this_msg['AppMsg']['BaseInfo']['UpdateTime']
-            last_url = last_article_in_this_msg['AppMsg']['DetailInfo'][0]['ContentUrl']
-            resdata = spider.get_account_by_url(last_url)
-            check_id = resdata['data'].get('data', {}).get('wx_gh')
-            if check_id == gh_id:
-                self.insert_each_msg(
-                    account_info=account_info,
-                    msg_list=msg_list
-                )
-                # if last_time_stamp_in_this_msg > latest_update_time:
-                #     next_cursor = response['data']['next_cursor']
-                #     return self.update_account_by_spider(
-                #         account_info=account_info,
-                #         cursor=next_cursor
-                #     )
-                log(
-                    task="updatePublishedMsgDaily",
-                    function="update_each_account",
-                    message="账号文章更新成功",
-                    data=response
-                )
-        else:
-            log(
-                task="updatePublishedMsgDaily",
-                function="update_each_account",
-                message="账号文章更新失败",
-                status="fail",
-                data=response
-            )
-            return
-
-    def update_account_by_aigc(self, account_info: Dict, run_date: str):
-        """
-        更新单个账号的文章
-        """
-        gh_id = account_info['ghId']
-        select_sql = f"""
-            SELECT trace_id, wx_sn, published_url, publish_timestamp, root_source_id_list, create_timestamp
-            FROM long_articles_published_trace_id
-            WHERE gh_id = '{gh_id}' AND publish_timestamp > UNIX_TIMESTAMP(DATE_SUB('{run_date}', INTERVAL 3 DAY)) AND delete_status = 0;
-        """
-        result = self.long_articles_db_client.fetch(select_sql, cursor_type=DictCursor)
-        for article in result:
-            trace_id = article['trace_id']
-            wx_sn = article['wx_sn']
-            published_url = article['published_url']
-            publish_timestamp = article['publish_timestamp']
-            article_info = spider.get_article_text(content_link=published_url, is_cache=False, is_count=True)
-            response_code = article_info['code']
-            match response_code:
-                case const.ARTICLE_SUCCESS_CODE:
-                    response_data = article_info['data']['data']
-                    title = response_data['title']
-                    article_url = response_data['content_link']
-                    show_view_count = response_data['view_count']
-                    show_like_count = response_data['like_count']
-                    show_zs_count = 0
-                    show_pay_count = 0
-                    wx_sn = article_url.split("&sn=")[1].split("&")[0] if article_url else None
-                    app_msg_id = article_url.split("&mid=")[1].split("&")[0] if article_url else None
-                    status = account_info['using_status']
-                    info_tuple = (
-                        gh_id,
-                        account_info['name'],
-                        app_msg_id,
-                        title,
-                        "9",
-                        article['create_timestamp'],
-                        response_data['update_timestamp'],
-                        None,
-                        response_data['item_index'],
-                        response_data['content_link'],
-                        None,
-                        None,
-                        None,
-                        None,
-                        None,
-                        response_data.get("is_original", None),
-                        None,
-                        None,
-                        show_view_count,
-                        show_like_count,
-                        show_zs_count,
-                        show_pay_count,
-                        wx_sn,
-                        None,
-                        functions.str_to_md5(title),
-                        status
-                    )
-                    self.insert_each_article(
-                        info_tuple=info_tuple,
-                        show_view_count=show_view_count,
-                        show_like_count=show_like_count,
-                        wx_sn=wx_sn
-                    )
-
-                case const.ARTICLE_DELETE_CODE:
-                    log(
-                        task="updatePublishedMsgDaily",
-                        function="update_account_by_aigc",
-                        message="文章被删除",
-                        data={
-                            "ghId": gh_id,
-                            "publishedUrl": published_url
-                        }
-                    )
-
-                case const.ARTICLE_ILLEGAL_CODE:
-                    article_detail = self.get_article_info_by_trace_id(trace_id)
-                    if article_detail:
-                        error_detail = article_info.get("msg")
-                        insert_sql = f"""
-                                INSERT IGNORE INTO illegal_articles 
-                                (gh_id, account_name, title, wx_sn, publish_date, illegal_reason)
-                                VALUES 
-                                (%s, %s, %s, %s, %s, %s);
-                            """
-
-                        affected_rows = self.long_articles_db_client.save(
-                            query=insert_sql,
-                            params=(
-                                article_info['gh_id'],
-                                article_info['account_name'],
-                                article_info['article_title'],
-                                wx_sn,
-                                functions.timestamp_to_str(publish_timestamp),
-                                error_detail
-                            )
-                        )
-                        if affected_rows:
-                            bot(
-                                title="文章违规告警(new task)",
-                                detail={
-                                    "account_name": article_info['account_name'],
-                                    "gh_id": article_info['gh_id'],
-                                    "title": article_info['article_title'],
-                                    "wx_sn": wx_sn,
-                                    "publish_date": functions.timestamp_to_str(publish_timestamp),
-                                    "error_detail": error_detail,
-                                },
-                                mention=False
-                            )
-                            aiditApi.delete_articles(
-                                gh_id=article_info['gh_id'],
-                                title=article_info['article_title']
-                            )
-
-    def get_account_info(self, gh_id: str) -> int:
-        """
-        通过 gh_id查询账号信息的最新发布时间
-        :param gh_id:
-        :return:
-        """
-        sql = f"""
-            SELECT MAX(publish_timestamp)
-            FROM {ARTICLE_TABLE}
-            WHERE ghId = '{gh_id}';
-            """
-        result = self.piaoquan_crawler_db_client.fetch(sql)
-        if result:
-            return result[0][0]
-        else:
-            # 新号,抓取周期定位抓取时刻往前推30天
-            return int(time.time()) - const.NEW_ACCOUNT_CRAWL_PERIOD
-
-    def check_single_account(self, account_item: Dict) -> bool:
-        """
-        校验每个账号是否更新
-        :param account_item:
-        :return: True / False
-        """
-        gh_id = account_item['ghId']
-        account_type = account_item['account_type']
-        today_str = datetime.today().strftime("%Y-%m-%d")
-        today_date_time = datetime.strptime(today_str, "%Y-%m-%d")
-        today_timestamp = today_date_time.timestamp()
-        sql = f"""
-                SELECT max(updateTime)
-                FROM {ARTICLE_TABLE}
-                WHERE ghId = '{gh_id}';
-                """
-        try:
-            latest_update_time = self.piaoquan_crawler_db_client.fetch(sql)[0][0]
-            # 判断该账号当天发布的文章是否被收集
-            if account_type in const.SUBSCRIBE_TYPE_SET:
-                if int(latest_update_time) > int(today_timestamp):
-                    return True
-                else:
-                    return False
-            else:
-                if int(latest_update_time) > int(today_timestamp) - 7 * 24 * 3600:
-                    return True
-                else:
-                    return False
-        except Exception as e:
-            print(e)
-            return False
-
-    def process_single_account(self, account_info: Dict, run_date: str):
-        """
-        处理单个账号
-        """
-        gh_id = account_info['ghId']
-        # 判断该账号当天是否有自动群发且没有无限流发表
-        select_sql = f"""
-            SELECT push_type
-            FROM long_articles_published_trace_id
-            WHERE gh_id = '{gh_id}' AND publish_timestamp > UNIX_TIMESTAMP('{run_date}');
-        """
-        response = self.long_articles_db_client.fetch(select_sql, cursor_type=DictCursor)
-        UNLIMITED_PUSH = 3
-        if response:
-            unlimited_push_list = [item for item in response if item['push_type'] == UNLIMITED_PUSH]
-            if unlimited_push_list:
-                self.update_account_by_spider(account_info=account_info)
-            else:
-                print("By AIGC", account_info)
-                self.update_account_by_aigc(account_info=account_info, run_date=run_date)
-        else:
-            self.update_account_by_spider(account_info=account_info)
-
-    def update_publish_timestamp(self, article_info: Dict):
-        """
-        更新发布时间戳 && minigram 信息
-        :param article_info:
-        :return:
-        """
-        url = article_info['ContentUrl']
-        wx_sn = article_info['wx_sn']
-        try:
-            response = spider.get_article_text(url)
-            response_code = response['code']
-
-            if response_code == const.ARTICLE_DELETE_CODE:
-                publish_timestamp_s = const.DELETE_STATUS
-                root_source_id_list = []
-            elif response_code == const.ARTICLE_ILLEGAL_CODE:
-                publish_timestamp_s = const.ILLEGAL_STATUS
-                root_source_id_list = []
-            elif response_code == const.ARTICLE_SUCCESS_CODE:
-                data = response['data']['data']
-                publish_timestamp_ms = data['publish_timestamp']
-                publish_timestamp_s = int(publish_timestamp_ms / 1000)
-                mini_program = data.get('mini_program', [])
-                if mini_program:
-                    root_source_id_list = [
-                        urllib.parse.parse_qs(
-                            urllib.parse.unquote(i['path'])
-                        )['rootSourceId'][0]
-                        for i in mini_program
-                    ]
-                else:
-                    root_source_id_list = []
-            else:
-                publish_timestamp_s = const.UNKNOWN_STATUS
-                root_source_id_list = []
-        except Exception as e:
-            publish_timestamp_s = const.REQUEST_FAIL_STATUS
-            root_source_id_list = None
-            error_msg = traceback.format_exc()
-            print(e, error_msg)
-
-        update_sql = f"""
-                UPDATE {ARTICLE_TABLE}
-                SET publish_timestamp = %s, root_source_id_list = %s
-                WHERE wx_sn = %s;
-            """
-        self.piaoquan_crawler_db_client.save(
-            query=update_sql,
-            params=(
-                publish_timestamp_s,
-                json.dumps(root_source_id_list, ensure_ascii=False),
-                wx_sn
-            ))
-        if publish_timestamp_s == const.REQUEST_FAIL_STATUS:
-            return article_info
-        else:
-            return None
-
-    def update_job(self, biz_date: str = None):
-        """
-        执行更新任务
-        """
-        account_list = self.get_account_list()
-        if not biz_date:
-            biz_date = datetime.today().strftime('%Y-%m-%d')
-
-        # 处理订阅号
-        subscription_accounts = [i for i in account_list if i['account_type'] in const.SUBSCRIBE_TYPE_SET]
-        success_count = 0
-        fail_count = 0
-        for account in tqdm(subscription_accounts):
-            try:
-                self.process_single_account(account_info=account, run_date=biz_date)
-                success_count += 1
-                time.sleep(3)
-            except Exception as e:
-                fail_count += 1
-                log(
-                    task="updatePublishedMsgDaily",
-                    function="update_job",
-                    message="单个账号文章更新失败, 报错信息是: {}".format(e),
-                    status="fail",
-                )
-        log(
-            task="updatePublishedMsgDaily",
-            function="update_job",
-            message="订阅号更新完成",
-            data={
-                "success": success_count,
-                "fail": fail_count
-            }
-        )
-        if fail_count / (success_count + fail_count) > const.SUBSCRIBE_FAIL_RATE_THRESHOLD:
-            bot(
-                title="订阅号超过 {}% 的账号更新失败".format(int(const.SUBSCRIBE_FAIL_RATE_THRESHOLD * 100)),
-                detail={
-                    "success": success_count,
-                    "fail": fail_count,
-                    "failRate": fail_count / (success_count + fail_count)
-                }
-            )
-        bot(
-            title="更新每日发布文章任务完成通知(new)",
-            detail={
-                "msg": "订阅号更新完成",
-                "finish_time": datetime.today().__str__()
-            },
-            mention=False
-        )
-
-        # 服务号
-        server_accounts = [i for i in account_list if i['account_type'] == const.SERVICE_TYPE]
-        for account in tqdm(server_accounts):
-            try:
-                self.process_single_account(account_info=account, run_date=biz_date)
-                time.sleep(1)
-            except Exception as e:
-                print(e)
-        bot(
-            title="更新每日发布文章任务完成通知(new)",
-            detail={
-                "msg": "服务号更新完成",
-                "finish_time": datetime.today().__str__()
-            },
-            mention=False
-        )
-
-    def check_job(self, biz_date: str = None):
-        """
-        执行检查任务,check each account
-        """
-        if not biz_date:
-            biz_date = datetime.today().strftime('%Y-%m-%d')
-
-        account_list = self.get_account_list()
-        subscription_accounts = [i for i in account_list if i['account_type'] in const.SUBSCRIBE_TYPE_SET]
-        fail_list = []
-        # check and rework if fail
-        for sub_item in tqdm(subscription_accounts):
-            res = self.check_single_account(sub_item)
-            if not res:
-                self.process_single_account(sub_item, biz_date)
-
-        # check whether success and bot if fails
-        for sub_item in tqdm(subscription_accounts):
-            res = self.check_single_account(sub_item)
-            if not res:
-                # 去掉三个不需要查看的字段
-                sub_item.pop('account_type', None)
-                sub_item.pop('account_auth', None)
-                sub_item.pop('account_id', None)
-                fail_list.append(sub_item)
-        if fail_list:
-            try:
-                bot(
-                    title="更新当天发布文章,存在未更新的账号(new)",
-                    detail={
-                        "columns": generate_bot_columns(),
-                        "rows": fail_list
-                    },
-                    table=True
-                )
-            except Exception as e:
-                print("Timeout Error: {}".format(e))
-        else:
-            bot(
-                title="更新当天发布文章,所有账号均更新成功(new)",
-                mention=False,
-                detail={
-                    "msg": "校验任务完成",
-                    "finish_time": datetime.today().__str__()
-                }
-            )
-
-    def get_article_detail_job(self):
-        """
-        获取发布文章详情
-        :return:
-        """
-        select_sql = f"""
-            SELECT ContentUrl, wx_sn 
-            FROM {ARTICLE_TABLE}
-            WHERE publish_timestamp in {(const.DEFAULT_STATUS, const.REQUEST_FAIL_STATUS)};
-        """
-        article_list = self.piaoquan_crawler_db_client.fetch(select_sql, cursor_type=DictCursor)
-        for article in tqdm(article_list):
-            try:
-                self.update_publish_timestamp(article)
-            except Exception as e:
-                print(e)
-                error_msg = traceback.format_exc()
-                print(error_msg)
-        # check 一遍存在请求失败-1 && 0 的文章
-        select_sql = f"""
-                    SELECT ContentUrl, wx_sn 
-                    FROM {ARTICLE_TABLE}
-                    WHERE publish_timestamp in {(const.DEFAULT_STATUS, const.REQUEST_FAIL_STATUS)};
-                """
-        process_failed_articles = self.piaoquan_crawler_db_client.fetch(select_sql, cursor_type=DictCursor)
-        fail_list = []
-        if process_failed_articles:
-            for article in tqdm(process_failed_articles):
-                try:
-                    res = self.update_publish_timestamp(article)
-                    fail_list.append(res)
-                except Exception as e:
-                    print(e)
-                    error_msg = traceback.format_exc()
-                    print(error_msg)
-
-        # 通过msgId 来修改publish_timestamp
-        update_sql = f"""
-            UPDATE {ARTICLE_TABLE} oav 
-            JOIN (
-                SELECT ghId, appMsgId, MAX(publish_timestamp) AS publish_timestamp 
-                FROM {ARTICLE_TABLE} 
-                WHERE publish_timestamp > %s 
-                GROUP BY ghId, appMsgId
-                ) vv
-                ON oav.appMsgId = vv.appMsgId and oav.ghId = vv.ghId
-            SET oav.publish_timestamp = vv.publish_timestamp
-            WHERE oav.publish_timestamp <= %s;
-        """
-        self.piaoquan_crawler_db_client.save(
-            query=update_sql,
-            params=(0, 0)
-        )
-
-        # 若还是无 publish_timestamp,用update_time当作 publish_timestamp
-        update_sql_2 = f"""
-            UPDATE {ARTICLE_TABLE}
-            SET publish_timestamp = updateTime
-            WHERE publish_timestamp < %s;
-        """
-        self.piaoquan_crawler_db_client.save(
-            query=update_sql_2,
-            params=0
-        )
-        if fail_list:
-            bot(
-                title="更新文章任务,请求detail失败",
-                detail=fail_list
-            )