123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742 |
- """
- @author: luojunhui
- @desc: 更新文章的阅读详情
- """
- import json
- import time
- import traceback
- import urllib.parse
- from datetime import datetime
- from typing import Dict, List
- from pymysql.cursors import DictCursor
- from tqdm import tqdm
- from applications import aiditApi
- from applications import bot
- from applications import create_feishu_columns_sheet
- from applications import Functions
- from applications import log
- from applications import WeixinSpider
- from applications.const import updatePublishedMsgTaskConst
- from applications.db import DatabaseConnector
- from config import denet_config, long_articles_config, piaoquan_crawler_config
- ARTICLE_TABLE = "official_articles"
- const = updatePublishedMsgTaskConst()
- spider = WeixinSpider()
- functions = Functions()
- empty_dict = {}
- def generate_bot_columns():
- """
- 生成列
- :return:
- """
- columns = [
- create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="name", display_name="公众号名称"),
- create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="ghId", display_name="ghId"),
- create_feishu_columns_sheet(sheet_type="number", sheet_name="follower_count", display_name="粉丝数"),
- create_feishu_columns_sheet(sheet_type="date", sheet_name="account_init_timestamp",
- display_name="账号接入系统时间"),
- create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="using_status", display_name="利用状态")
- ]
- return columns
- class UpdatePublishedArticlesReadDetail(object):
- """
- 更新每日发布文章的阅读详情
- """
- def __init__(self):
- self.aigc_db_client = None
- self.piaoquan_crawler_db_client = None
- self.long_articles_db_client = None
- def get_account_list(self) -> List[Dict]:
- """
- 从 aigc 数据库中获取目前处于发布状态的账号
- :return:
- "name": line[0],
- "ghId": line[1],
- "follower_count": line[2],
- "account_init_time": int(line[3] / 1000),
- "account_type": line[4], # 订阅号 or 服务号
- "account_auth": line[5]
- """
- def get_account_status() -> Dict:
- """
- 获取账号的实验状态
- :return:
- """
- sql = f"""
- SELECT t1.account_id, t2.status
- FROM wx_statistics_group_source_account t1
- JOIN wx_statistics_group_source t2
- ON t1.group_source_name = t2.account_source_name;
- """
- account_status_list = self.aigc_db_client.fetch(sql, cursor_type=DictCursor)
- account_status = {account['account_id']: account['status'] for account in account_status_list}
- return account_status
- account_list_with_out_using_status = aiditApi.get_publish_account_from_aigc()
- account_status_dict = get_account_status()
- account_list = [
- {
- **item,
- 'using_status': 0 if account_status_dict.get(item['account_id']) == '实验' else 1
- }
- for item in account_list_with_out_using_status
- ]
- return account_list
- def get_article_info_by_trace_id(self, trace_id: str) -> Dict:
- """
- 通过trace_id来查询文章信息
- """
- select_sql = f"""
- SELECT t1.gh_id, t1.account_name, t2.article_title
- FROM long_articles_match_videos t1
- JOIN long_articles_text t2
- ON t1.content_id = t2.content_id
- WHERE t1.trace_id = '{trace_id}';
- """
- article_info = self.long_articles_db_client.fetch(select_sql, cursor_type=DictCursor)
- if article_info:
- return article_info[0]
- else:
- return empty_dict
- def init_database(self):
- """
- 初始化数据库连接
- """
- # 初始化数据库连接
- try:
- self.piaoquan_crawler_db_client = DatabaseConnector(piaoquan_crawler_config)
- self.piaoquan_crawler_db_client.connect()
- self.aigc_db_client = DatabaseConnector(denet_config)
- self.aigc_db_client.connect()
- self.long_articles_db_client = DatabaseConnector(long_articles_config)
- self.long_articles_db_client.connect()
- except Exception as e:
- error_msg = traceback.format_exc()
- bot(
- title="更新文章任务连接数据库失败",
- detail={
- "error": e,
- "msg": error_msg
- }
- )
- return
- def insert_each_msg(self, account_info: Dict, msg_list: List[Dict]) -> None:
- """
- 把消息数据更新到数据库中
- :param account_info:
- :param msg_list:
- :return:
- """
- gh_id = account_info['ghId']
- account_name = account_info['name']
- for info in msg_list:
- baseInfo = info.get("BaseInfo", {})
- appMsgId = info.get("AppMsg", {}).get("BaseInfo", {}).get("AppMsgId", None)
- createTime = info.get("AppMsg", {}).get("BaseInfo", {}).get("CreateTime", None)
- updateTime = info.get("AppMsg", {}).get("BaseInfo", {}).get("UpdateTime", None)
- Type = info.get("AppMsg", {}).get("BaseInfo", {}).get("Type", None)
- detail_article_list = info.get("AppMsg", {}).get("DetailInfo", [])
- if detail_article_list:
- for article in detail_article_list:
- title = article.get("Title", None)
- Digest = article.get("Digest", None)
- ItemIndex = article.get("ItemIndex", None)
- ContentUrl = article.get("ContentUrl", None)
- SourceUrl = article.get("SourceUrl", None)
- CoverImgUrl = article.get("CoverImgUrl", None)
- CoverImgUrl_1_1 = article.get("CoverImgUrl_1_1", None)
- CoverImgUrl_235_1 = article.get("CoverImgUrl_235_1", None)
- ItemShowType = article.get("ItemShowType", None)
- IsOriginal = article.get("IsOriginal", None)
- ShowDesc = article.get("ShowDesc", None)
- show_stat = functions.show_desc_to_sta(ShowDesc)
- ori_content = article.get("ori_content", None)
- show_view_count = show_stat.get("show_view_count", 0)
- show_like_count = show_stat.get("show_like_count", 0)
- show_zs_count = show_stat.get("show_zs_count", 0)
- show_pay_count = show_stat.get("show_pay_count", 0)
- wx_sn = ContentUrl.split("&sn=")[1].split("&")[0] if ContentUrl else None
- status = account_info['using_status']
- info_tuple = (
- gh_id,
- account_name,
- appMsgId,
- title,
- Type,
- createTime,
- updateTime,
- Digest,
- ItemIndex,
- ContentUrl,
- SourceUrl,
- CoverImgUrl,
- CoverImgUrl_1_1,
- CoverImgUrl_235_1,
- ItemShowType,
- IsOriginal,
- ShowDesc,
- ori_content,
- show_view_count,
- show_like_count,
- show_zs_count,
- show_pay_count,
- wx_sn,
- json.dumps(baseInfo, ensure_ascii=False),
- functions.str_to_md5(title),
- status
- )
- self.insert_each_article(
- info_tuple=info_tuple,
- show_view_count=show_view_count,
- show_like_count=show_like_count,
- wx_sn=wx_sn
- )
- def insert_each_article(self, info_tuple, show_view_count, show_like_count, wx_sn):
- """
- 插入每一篇文章
- """
- try:
- insert_sql = f"""
- INSERT INTO {ARTICLE_TABLE}
- (ghId, accountName, appMsgId, title, Type, createTime, updateTime, Digest, ItemIndex, ContentUrl, SourceUrl, CoverImgUrl, CoverImgUrl_1_1, CoverImgUrl_255_1, ItemShowType, IsOriginal, ShowDesc, ori_content, show_view_count, show_like_count, show_zs_count, show_pay_count, wx_sn, baseInfo, title_md5, status)
- values
- (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
- """
- self.piaoquan_crawler_db_client.save(query=insert_sql, params=info_tuple)
- log(
- task="updatePublishedMsgDaily",
- function="insert_each_msg",
- message="插入文章数据成功",
- data={
- "info": info_tuple
- }
- )
- except Exception as e:
- try:
- update_sql = f"""
- UPDATE {ARTICLE_TABLE}
- SET show_view_count = %s, show_like_count=%s
- WHERE wx_sn = %s;
- """
- self.piaoquan_crawler_db_client.save(query=update_sql,
- params=(show_view_count, show_like_count, wx_sn))
- log(
- task="updatePublishedMsgDaily",
- function="insert_each_msg",
- message="更新文章数据成功",
- data={
- "wxSn": wx_sn,
- "likeCount": show_like_count,
- "viewCount": show_view_count
- }
- )
- except Exception as e:
- log(
- task="updatePublishedMsgDaily",
- function="insert_each_msg",
- message="更新文章失败, 报错原因是: {}".format(e),
- status="fail"
- )
- def update_account_by_spider(self, account_info: Dict, cursor=None):
- """
- 更新每一个账号信息
- :param account_info:
- :param cursor:
- :return: None
- """
- gh_id = account_info['ghId']
- latest_update_time = self.get_account_info(gh_id)
- response = spider.update_msg_list(ghId=gh_id, index=cursor)
- if not response:
- log(
- task="updatePublishedMsgDaily",
- function="update_account_by_spider",
- status="fail",
- message="账号更新请求爬虫接口失败",
- data=account_info
- )
- return
- msg_list = response.get("data", {}).get("data", [])
- if msg_list:
- # do
- last_article_in_this_msg = msg_list[-1]
- last_time_stamp_in_this_msg = last_article_in_this_msg['AppMsg']['BaseInfo']['UpdateTime']
- last_url = last_article_in_this_msg['AppMsg']['DetailInfo'][0]['ContentUrl']
- resdata = spider.get_account_by_url(last_url)
- check_id = resdata['data'].get('data', {}).get('wx_gh')
- if check_id == gh_id:
- self.insert_each_msg(
- account_info=account_info,
- msg_list=msg_list
- )
- # if last_time_stamp_in_this_msg > latest_update_time:
- # next_cursor = response['data']['next_cursor']
- # return self.update_account_by_spider(
- # account_info=account_info,
- # cursor=next_cursor
- # )
- log(
- task="updatePublishedMsgDaily",
- function="update_each_account",
- message="账号文章更新成功",
- data=response
- )
- else:
- log(
- task="updatePublishedMsgDaily",
- function="update_each_account",
- message="账号文章更新失败",
- status="fail",
- data=response
- )
- return
- def update_account_by_aigc(self, account_info: Dict, run_date: str):
- """
- 更新单个账号的文章
- """
- gh_id = account_info['ghId']
- select_sql = f"""
- SELECT trace_id, wx_sn, published_url, publish_timestamp, root_source_id_list, create_timestamp
- FROM long_articles_published_trace_id
- WHERE gh_id = '{gh_id}' AND publish_timestamp > UNIX_TIMESTAMP(DATE_SUB('{run_date}', INTERVAL 3 DAY)) AND delete_status = 0;
- """
- result = self.long_articles_db_client.fetch(select_sql, cursor_type=DictCursor)
- for article in result:
- trace_id = article['trace_id']
- wx_sn = article['wx_sn']
- published_url = article['published_url']
- publish_timestamp = article['publish_timestamp']
- article_info = spider.get_article_text(content_link=published_url, is_cache=False, is_count=True)
- response_code = article_info['code']
- match response_code:
- case const.ARTICLE_SUCCESS_CODE:
- response_data = article_info['data']['data']
- title = response_data['title']
- article_url = response_data['content_link']
- show_view_count = response_data['view_count']
- show_like_count = response_data['like_count']
- show_zs_count = 0
- show_pay_count = 0
- wx_sn = article_url.split("&sn=")[1].split("&")[0] if article_url else None
- app_msg_id = article_url.split("&mid=")[1].split("&")[0] if article_url else None
- status = account_info['using_status']
- info_tuple = (
- gh_id,
- account_info['name'],
- app_msg_id,
- title,
- "9",
- article['create_timestamp'],
- response_data['update_timestamp'],
- None,
- response_data['item_index'],
- response_data['content_link'],
- None,
- None,
- None,
- None,
- None,
- response_data.get("is_original", None),
- None,
- None,
- show_view_count,
- show_like_count,
- show_zs_count,
- show_pay_count,
- wx_sn,
- None,
- functions.str_to_md5(title),
- status
- )
- self.insert_each_article(
- info_tuple=info_tuple,
- show_view_count=show_view_count,
- show_like_count=show_like_count,
- wx_sn=wx_sn
- )
- case const.ARTICLE_DELETE_CODE:
- log(
- task="updatePublishedMsgDaily",
- function="update_account_by_aigc",
- message="文章被删除",
- data={
- "ghId": gh_id,
- "publishedUrl": published_url
- }
- )
- case const.ARTICLE_ILLEGAL_CODE:
- article_detail = self.get_article_info_by_trace_id(trace_id)
- if article_detail:
- error_detail = article_info.get("msg")
- insert_sql = f"""
- INSERT IGNORE INTO illegal_articles
- (gh_id, account_name, title, wx_sn, publish_date, illegal_reason)
- VALUES
- (%s, %s, %s, %s, %s, %s);
- """
- affected_rows = self.long_articles_db_client.save(
- query=insert_sql,
- params=(
- article_info['gh_id'],
- article_info['account_name'],
- article_info['article_title'],
- wx_sn,
- functions.timestamp_to_str(publish_timestamp),
- error_detail
- )
- )
- if affected_rows:
- bot(
- title="文章违规告警(new task)",
- detail={
- "account_name": article_info['account_name'],
- "gh_id": article_info['gh_id'],
- "title": article_info['article_title'],
- "wx_sn": wx_sn,
- "publish_date": functions.timestamp_to_str(publish_timestamp),
- "error_detail": error_detail,
- },
- mention=False
- )
- aiditApi.delete_articles(
- gh_id=article_info['gh_id'],
- title=article_info['article_title']
- )
- def get_account_info(self, gh_id: str) -> int:
- """
- 通过 gh_id查询账号信息的最新发布时间
- :param gh_id:
- :return:
- """
- sql = f"""
- SELECT MAX(publish_timestamp)
- FROM {ARTICLE_TABLE}
- WHERE ghId = '{gh_id}';
- """
- result = self.piaoquan_crawler_db_client.fetch(sql)
- if result:
- return result[0][0]
- else:
- # 新号,抓取周期定位抓取时刻往前推30天
- return int(time.time()) - const.NEW_ACCOUNT_CRAWL_PERIOD
- def check_single_account(self, account_item: Dict) -> bool:
- """
- 校验每个账号是否更新
- :param account_item:
- :return: True / False
- """
- gh_id = account_item['ghId']
- account_type = account_item['account_type']
- today_str = datetime.today().strftime("%Y-%m-%d")
- today_date_time = datetime.strptime(today_str, "%Y-%m-%d")
- today_timestamp = today_date_time.timestamp()
- sql = f"""
- SELECT max(updateTime)
- FROM {ARTICLE_TABLE}
- WHERE ghId = '{gh_id}';
- """
- try:
- latest_update_time = self.piaoquan_crawler_db_client.fetch(sql)[0][0]
- # 判断该账号当天发布的文章是否被收集
- if account_type in const.SUBSCRIBE_TYPE_SET:
- if int(latest_update_time) > int(today_timestamp):
- return True
- else:
- return False
- else:
- if int(latest_update_time) > int(today_timestamp) - 7 * 24 * 3600:
- return True
- else:
- return False
- except Exception as e:
- print(e)
- return False
- def process_single_account(self, account_info: Dict, run_date: str):
- """
- 处理单个账号
- """
- gh_id = account_info['ghId']
- # 判断该账号当天是否有自动群发且没有无限流发表
- select_sql = f"""
- SELECT push_type
- FROM long_articles_published_trace_id
- WHERE gh_id = '{gh_id}' AND publish_timestamp > UNIX_TIMESTAMP('{run_date}');
- """
- response = self.long_articles_db_client.fetch(select_sql, cursor_type=DictCursor)
- UNLIMITED_PUSH = 3
- if response:
- unlimited_push_list = [item for item in response if item['push_type'] == UNLIMITED_PUSH]
- if unlimited_push_list:
- self.update_account_by_spider(account_info=account_info)
- else:
- print("By AIGC", account_info)
- self.update_account_by_aigc(account_info=account_info, run_date=run_date)
- else:
- self.update_account_by_spider(account_info=account_info)
- def update_publish_timestamp(self, article_info: Dict):
- """
- 更新发布时间戳 && minigram 信息
- :param article_info:
- :return:
- """
- url = article_info['ContentUrl']
- wx_sn = article_info['wx_sn']
- try:
- response = spider.get_article_text(url)
- response_code = response['code']
- if response_code == const.ARTICLE_DELETE_CODE:
- publish_timestamp_s = const.DELETE_STATUS
- root_source_id_list = []
- elif response_code == const.ARTICLE_ILLEGAL_CODE:
- publish_timestamp_s = const.ILLEGAL_STATUS
- root_source_id_list = []
- elif response_code == const.ARTICLE_SUCCESS_CODE:
- data = response['data']['data']
- publish_timestamp_ms = data['publish_timestamp']
- publish_timestamp_s = int(publish_timestamp_ms / 1000)
- mini_program = data.get('mini_program', [])
- if mini_program:
- root_source_id_list = [
- urllib.parse.parse_qs(
- urllib.parse.unquote(i['path'])
- )['rootSourceId'][0]
- for i in mini_program
- ]
- else:
- root_source_id_list = []
- else:
- publish_timestamp_s = const.UNKNOWN_STATUS
- root_source_id_list = []
- except Exception as e:
- publish_timestamp_s = const.REQUEST_FAIL_STATUS
- root_source_id_list = None
- error_msg = traceback.format_exc()
- print(e, error_msg)
- update_sql = f"""
- UPDATE {ARTICLE_TABLE}
- SET publish_timestamp = %s, root_source_id_list = %s
- WHERE wx_sn = %s;
- """
- self.piaoquan_crawler_db_client.save(
- query=update_sql,
- params=(
- publish_timestamp_s,
- json.dumps(root_source_id_list, ensure_ascii=False),
- wx_sn
- ))
- if publish_timestamp_s == const.REQUEST_FAIL_STATUS:
- return article_info
- else:
- return None
- def update_job(self, biz_date: str = None):
- """
- 执行更新任务
- """
- account_list = self.get_account_list()
- if not biz_date:
- biz_date = datetime.today().strftime('%Y-%m-%d')
- # 处理订阅号
- subscription_accounts = [i for i in account_list if i['account_type'] in const.SUBSCRIBE_TYPE_SET]
- success_count = 0
- fail_count = 0
- for account in tqdm(subscription_accounts):
- try:
- self.process_single_account(account_info=account, run_date=biz_date)
- success_count += 1
- time.sleep(3)
- except Exception as e:
- fail_count += 1
- log(
- task="updatePublishedMsgDaily",
- function="update_job",
- message="单个账号文章更新失败, 报错信息是: {}".format(e),
- status="fail",
- )
- log(
- task="updatePublishedMsgDaily",
- function="update_job",
- message="订阅号更新完成",
- data={
- "success": success_count,
- "fail": fail_count
- }
- )
- if fail_count / (success_count + fail_count) > const.SUBSCRIBE_FAIL_RATE_THRESHOLD:
- bot(
- title="订阅号超过 {}% 的账号更新失败".format(int(const.SUBSCRIBE_FAIL_RATE_THRESHOLD * 100)),
- detail={
- "success": success_count,
- "fail": fail_count,
- "failRate": fail_count / (success_count + fail_count)
- }
- )
- bot(
- title="更新每日发布文章任务完成通知(new)",
- detail={
- "msg": "订阅号更新完成",
- "finish_time": datetime.today().__str__()
- },
- mention=False
- )
- # 服务号
- server_accounts = [i for i in account_list if i['account_type'] == const.SERVICE_TYPE]
- for account in tqdm(server_accounts):
- try:
- self.process_single_account(account_info=account, run_date=biz_date)
- time.sleep(1)
- except Exception as e:
- print(e)
- bot(
- title="更新每日发布文章任务完成通知(new)",
- detail={
- "msg": "服务号更新完成",
- "finish_time": datetime.today().__str__()
- },
- mention=False
- )
- def check_job(self, biz_date: str = None):
- """
- 执行检查任务,check each account
- """
- if not biz_date:
- biz_date = datetime.today().strftime('%Y-%m-%d')
- account_list = self.get_account_list()
- subscription_accounts = [i for i in account_list if i['account_type'] in const.SUBSCRIBE_TYPE_SET]
- fail_list = []
- # check and rework if fail
- for sub_item in tqdm(subscription_accounts):
- res = self.check_single_account(sub_item)
- if not res:
- self.process_single_account(sub_item, biz_date)
- # check whether success and bot if fails
- for sub_item in tqdm(subscription_accounts):
- res = self.check_single_account(sub_item)
- if not res:
- # 去掉三个不需要查看的字段
- sub_item.pop('account_type', None)
- sub_item.pop('account_auth', None)
- sub_item.pop('account_id', None)
- fail_list.append(sub_item)
- if fail_list:
- try:
- bot(
- title="更新当天发布文章,存在未更新的账号(new)",
- detail={
- "columns": generate_bot_columns(),
- "rows": fail_list
- },
- table=True
- )
- except Exception as e:
- print("Timeout Error: {}".format(e))
- else:
- bot(
- title="更新当天发布文章,所有账号均更新成功(new)",
- mention=False,
- detail={
- "msg": "校验任务完成",
- "finish_time": datetime.today().__str__()
- }
- )
- def get_article_detail_job(self):
- """
- 获取发布文章详情
- :return:
- """
- select_sql = f"""
- SELECT ContentUrl, wx_sn
- FROM {ARTICLE_TABLE}
- WHERE publish_timestamp in {(const.DEFAULT_STATUS, const.REQUEST_FAIL_STATUS)};
- """
- article_list = self.piaoquan_crawler_db_client.fetch(select_sql, cursor_type=DictCursor)
- for article in tqdm(article_list):
- try:
- self.update_publish_timestamp(article)
- except Exception as e:
- print(e)
- error_msg = traceback.format_exc()
- print(error_msg)
- # check 一遍存在请求失败-1 && 0 的文章
- select_sql = f"""
- SELECT ContentUrl, wx_sn
- FROM {ARTICLE_TABLE}
- WHERE publish_timestamp in {(const.DEFAULT_STATUS, const.REQUEST_FAIL_STATUS)};
- """
- process_failed_articles = self.piaoquan_crawler_db_client.fetch(select_sql, cursor_type=DictCursor)
- fail_list = []
- if process_failed_articles:
- for article in tqdm(process_failed_articles):
- try:
- res = self.update_publish_timestamp(article)
- fail_list.append(res)
- except Exception as e:
- print(e)
- error_msg = traceback.format_exc()
- print(error_msg)
- # 通过msgId 来修改publish_timestamp
- update_sql = f"""
- UPDATE {ARTICLE_TABLE} oav
- JOIN (
- SELECT ghId, appMsgId, MAX(publish_timestamp) AS publish_timestamp
- FROM {ARTICLE_TABLE}
- WHERE publish_timestamp > %s
- GROUP BY ghId, appMsgId
- ) vv
- ON oav.appMsgId = vv.appMsgId and oav.ghId = vv.ghId
- SET oav.publish_timestamp = vv.publish_timestamp
- WHERE oav.publish_timestamp <= %s;
- """
- self.piaoquan_crawler_db_client.save(
- query=update_sql,
- params=(0, 0)
- )
- # 若还是无 publish_timestamp,用update_time当作 publish_timestamp
- update_sql_2 = f"""
- UPDATE {ARTICLE_TABLE}
- SET publish_timestamp = updateTime
- WHERE publish_timestamp < %s;
- """
- self.piaoquan_crawler_db_client.save(
- query=update_sql_2,
- params=0
- )
- if fail_list:
- bot(
- title="更新文章任务,请求detail失败",
- detail=fail_list
- )
|