import time, json import traceback import urllib.parse from tqdm.asyncio import tqdm from .recycle_daily_publish_articles import UpdateRootSourceIdAndUpdateTimeTask from .recycle_daily_publish_articles import Const from applications.crawler.wechat import get_article_list_from_account from applications.crawler.wechat import get_article_detail from applications.pipeline import insert_outside_article_into_recycle_pool from applications.api import feishu_robot account_name_set = { "念念私语", "发现趣论奇闻", "一晴方春", "生活技巧悦读", "妙招百科享生活", "精选问候祝福寄语", "生活实用妙招收录", "生活妙计通", "逸事趣闻说", "天天一起跳广场舞", "零点生活志", "居家生活实录", "生活妙招实录", "生活妙招点子库", "生活百科巧事通", "早晨送你暖心祝福", "生活情感感悟", "家庭百科大全", "清晨早安温馨问候", "经典祝福语大全集", "日常技巧全书", "无忧生活锦囊", "家庭妙方实录", "生活技巧宝藏库", "妙招集锦全书", "乐享技巧馆", "暖心问候语录", "乐享技巧汇", "巧手理想家", "品质生活有妙招", "生活妙招万家通", "生活妙招大赏", "温暖祝福语大全", "好愿祝福语录", "精选日常祝福语", "邻家生活点滴", "创新生活妙招百科", "小技巧生活录", "妙生活家手记", "生活窍门事事通", "巧手技巧百科", "生活技巧小支招", "趣生活百科", "生活妙用技巧合集", "日常祝福语大全", "妙招达人养成记", "妙生活锦囊", "生活百科一点知", "生活妙计科普馆", "巧妈生活妙招助手", "小窍门生活录", "好生活点点通", "实用技巧馆", "生活情感肆读", "生活技巧小奥秘", "送温暖祝福精选", "生活妙招选集", "每日祝福语问候", "日常生活点子库", "每日好友祝福集", "节气祝福问候录", "趣味生活铺子", "居家生活妙招大全", "落日情绪屋", "邻家生活技巧", "每日精选祝福语录", "邻家生活有妙招", "妙招集合录", "多学生活技巧", "祝福问候手册", "点滴生活坊", "早安心语合集", "微情话语录", "邻家妙招知识宝典", "生活妙招好能手", "节庆祝福语大全", "精选早安祝福合集", "节日问候语大全", "居家生活妙招技巧", "精选早安问候语合集", "治愈情感宝典", "生活小妙知", "生活妙想指南", "技巧达人站", "幸福语录精选", "技巧生活手册", "早安祝福语集", "家居生活小窍门全集", "祝福问候大集锦", "乐活研究社", "实用生活365", "无忧生活小支招", "家有妙计百变通", "生活技巧智慧库", "点滴乐活小妙招", "早安问候精选祝福", "生活妙招巧思汇", "热门广场舞大全", "中老年妙招大全", "技巧百变生活馆", "生活巧手指南", "有妙招享生活", "巧手生活录", "生活技巧我知道", "每日早晚安祝福语录", "情暖祝福语录", "有趣生活妙招屋", "家居生活趣用", "日常生活妙招百科", "灵巧生活一点通", "拾遗情感铺", "广场歌舞热榜", "邻里妙方百科", "妙招技巧帮", "经典广场舞热榜", "早安祝福集大全", "祝福语每日送", "情感情报库", "生活妙招技巧汇", "实用妙招宝", "巧思收集录", "日用妙招点点通", "家庭技巧生活录", "每日早安祝福集", "清晨祝福合集", "技巧生活集萃", "生活妙思巧手集", "晨间送祝语", "便捷小妙招", "趣招一点通", "妙招收集馆", "情感慢读", "安心祝福集", "生活技巧点子库", "懂点技巧吧", "技巧能手妙招库", "事事妙招集锦", "祝福语合集", "技巧生活录", "生活妙思小帮手", "暖心祝福寄语录", "生活妙计百宝库", "每日祝福语选集", "巧手常识集锦", "日常祝福问候语", "日常问候心愿语录", "小窍门宝典", "情感阅读舍", "精选日常祝福", "生活小机智", "生活乐享君", "早上好心情祝福", "幸福小窍门", "绸缪情感铺", "晨间暖心语录", "生活妙方宝典", "生活妙招点通铺", "美好微祝福语录", "生活最有招", "祝福语温暖问候集锦", "节日问候温馨祝福", "每日幸福语录集锦", "生活巧招点子库", "生活情感课堂", "情感避风湾", "实用生活小方法", "什锦生活录", "晨间祝福语精选", "巧居生活妙招站", "巧知生活集", "日常妙招收集录", "技巧生活百知", "居家小妙术", "时光说情感", "好愿祝福温馨问候", "最美祝愿问候", "生活妙招乐享", "情绪解忧屋", "生活妙招空间站", "乐活技巧馆", "如意祝福选集", "日常技巧大讲堂", "三分钟技巧集", "情感生活百态", "早安问候语精选大全", "招招妙招百事通", "经典热门广场舞曲", "日常生活技能大全", "一点儿窍门馆", "邻家生活技巧合集", "每日送祝福手册", "早上好常用祝福问候", "妙招巧生活杂货铺", "生活全知通", "友友刷刷看", "退休也乐呵", "奇闻怪异集", "退休养老攻略", "竹边生活记", "诡秘奇闻记", "趣味生活简记", "生活墨记", "企退老人心声", "生活静语", "生活趣时光", "生活向暖", "老年退休小贴士", "退休乐时光", "生活趣谈会", "生活百科闲谈", "生活志记", "退休生活报告", "奇闻趣世界", "日常饮食百科", "趣享生活时光", "趣读奇闻汇", "生活点滴小栈", "浮光生活记", "生活解忧坊", "生活多彩时光", "退休生活那些事", "生活茶话集", "美好生活闲谈", "民间奇闻集", "生活百味记", "悠享生活指南", "杂谈异闻社", "饮食智慧屋", "漫谈奇闻社", "退休生活驿站", "幸福养身大全", "生活念记", "生活栖风", "生活沐暖", "生活絮事记", "生活集韵", "轻享生活记", "幽巷奇闻谈", "饮食匠心录", "生活畅聊集", "鉴赏奇闻集", "生活拾碎光", "企业退休杂谈", "智慧退休计划", "老年退休指南", "饮食创意坊", "趣闻奇谈汇", "生活漫读集", "生活趣事乐园", "慢品生活味", "企退老年乐园", "生活悠闲记", "生活饮食百科", "畅享饮食集", "幸福生活乐园", "退休知识宝典", "生活流年记", "生活知百味", "生活感悟集", "精致饮食指南", "生活知识宝典", "\\N", "沧桑时光生活", "自得美好生活", "生活暖暖舒心", "每日技巧集", "温暖问候精选", "百惠优生活", "生活爱问百科", "技巧便生活", "生活一点知", "精选祝福问候大全", "巧手生活碎片", "巧思知识库", "温暖祝福寄语", "智巧日常百宝库", "招招巧手汇", "早安祝福精选手册", "每日祝福寄语", "生活技能百科全知", "生活妙招窍门指南", "美好生活智慧录", "生活妙招小工匠", "幸知情感书房", "便捷生活通", "心享好生活", "祝福语录问候精选", "解忧生活铺", "家庭妙招站", "智享生活巧招", "早上好祝福问候心语", "技巧达人生活馆", "暖心幸福语集", "养身百科常谈", "省时省力小技巧", "妙招干货合集", "万事生活通", "每日精选祝福寄语", "祝福语正能量问候大全", "生活妙手多", "妙生活集市录", "生活无忧大全", "巧手生活技巧指南", "窍门百事通", "生活锦囊妙集", "技巧生活大全", "知著书局", "生活妙招百家录", "邻里生活妙招宝典", "技巧百变馆", "趣招技巧生活通", "家庭窍门实录", "快乐祝福语录集", "合家欢乐祝福问候", "日常家居技能", "好友祝福正能量语录", "百科巧事万事通", "巧手生活百科", "巧手生活体验馆", "每日祝福语早安语录", "情感经典说", "纷云说", "家庭妙招优选", "生活能手妙招", "居家生活实用小妙招", "巧生活妙招馆", "妙招干货铺", "速学妙招录", "科普生活小帮手", "祝福语热门精选", "美满祝福语录", "畅想生活招", "技巧百科巧招", "经典情感祝福语录", "耀舟实用妙招汇", "祝福心语选集", "节日问候语选集", "居家技能全书", "祝福贺词精选", "一起跳个广场舞", "小窍门大帮手", "精选问候语送祝福", "生活妙招收录馆", "家用妙招技巧集", "邻家生活小妙招", "温馨问候语集", "真心祝福暖心问候", "技巧实用生活馆", "实用生活妙招全录", "百科妙招一点通", "妙招百科集", "花好月圆吉祥祝福", "妙招知识通", "创意生活技巧集", "颜夕漫读", "美好祝福日常集锦", "邻里技能宝典", "早上好暖心祝福语录", "妙享生活妙招社", "微看情感好文", "节日祝福常用问候语录", "暖心祝愿语录", "乐活妙招小帮手", "友友过来看", "静好生活社", "漫读生活指南", "中老年饮食杂谈", "生活之百科", "生活微光志", "生活图鉴", "生活漫记簿", "奇闻秘传", "生活轻描记", "退休生活百科", "生活拾光机", "日常慧窍门", "奇闻汐语", "家庭饮食宝典", "分享建康知识", "老年健康生活", "异闻奇谈录", "江湖奇闻记", "烟火慢生活", "奇闻逸事阁", "饮食养身秘诀", "慢享生活记", "生活山河集", "养老微光录", "甄选生活册", "居家生活指南", "老年退休手册", "生活知趣多", "流年新生活", "林下思忆", "忆往深情", "生活百科小常识", "春耕秋实录", "生活技巧锦囊大全", "趣享生活社", "每日圆满祝福", "微读情感驿站", "巧手来当家", "祝福问候早安手册", "日用窍门大全", "365天早安祝福问候", "趣事说奇闻", "技巧百科说", "趣闻奇谈录", "生活能手宝典", "日日送福语", "小妙招锦囊", "幸福生活事事通", "生活巧思屋", "温情祝福合集", "日常家居小智慧", "祝福语暖心语录精选", "生活妙招日志", "生活妙招因子", "早晚安正能量问候大全", "优选早安问候语", "巧手生活知识馆", "礼貌问候祝福语", "日常生活小工匠", "百科技巧生活屋", "美好祝福问候语集", "幸福问候祝福馆", "早安祝福温馨问候", "优享生活妙招社", "生活集事通", "邻家妙招集合录", "情感碎语", "一招妙生活", "日常祝福语录合集", "明月观尘", "技巧百科生活屋", "奇妙知识生活库", "祝福早安每日集锦", "暖心问候心语", "生活常识技能馆", "每日一祝福精选", "小技巧百科馆", "生活妙招物语", "妙招百科全录", "日常窍门百事通", "祝福节日问候馆", "巧手新生活", "退休时光宝典", "快乐开心最重要", "生活浅酌", "退休日常百科", "记录农村日常", "退休常识宝典", "闲谈养老生活", "退休老年乐园", "精选退休大全", "饮食趣发现", "精选退休录", "奇闻实记", "退休谈生活", "每日建康小妙招", "诡夜奇闻录", "每日饮食推荐", "生活闲语", "天天饮食搭配", "镜像奇闻录", "饮食慢品", "老年饮食合集", "妙趣生活集", "生活锦记", "生活行语", "退休好岁月", "生活智行记", "饮食新风尚", "经典家常食谱", "优选生活笔记", "趣谈奇闻集", "落月情绵", "安然若似", "看生活有妙招", "云露华浓", "悦读时光书房", "生活百科妙招通", "俏生活有妙招", "实用生活妙招指南", "妙招集锦百宝箱", "情感避风溏", "岁月乐活集", "奇闻妙趣谈", "小客精选团", "邻家妙招巧事通", "趣味生活指南", "生活技巧百招馆", "小妙招大智慧", "幸福语录祝语", "实用窍门馆", "优生活妙招学习馆", "邻家妙招实用技巧", "技能生活小贴士", "静听情感语录", "家居技能秘籍", "温馨祝福语早上好", "精选节日祝福语录", "美满祝福大全", "家庭日用妙招", "祝福贴心语录", "老年金色岁月", "退休都来看", "快乐金秋生活", "人老天地宽", "奇闻怪谈志", "异度奇闻录", "退休悦读汇", "养老生活感悟", "退休岁月杂谈", "生活小窍门推荐", "家庭饮食百科", "老年生活食谱", "奇闻幻彩集", "趣说奇闻汇", "退休铭记", "退休生活小贴士", "感受退休生活", "生活微光闲语", "智慧妙招生活", "甄选生活社", "生活识百味", "奇闻趣集", "每日妙招共享", "鉴赏生活日记", "生活窍门50条", "奇事异闻录", "奇闻好看", "秘探奇闻录", "记录退休秘籍", "家庭食谱合集", "与心浮沉", "唯美微情感", "春月不惜", "心怡趣论奇闻", "老年生活有依", "别样新生活", "居家幸福老人", "暖心情感屋", "点滴妙招乐生活", "日常祝福温馨问候", "生活必备技巧集", "小妙招大用途", "会点生活小妙招", "生活百科常识库", "巧思巧手汇聚集", "深情驿站", "早安祝福问候精选", "一招一生活", "温馨问候祝福语录", "每日问候暖心语录", "奇闻趣谈社", "生活妙招技巧通", "无忧巧生活", "妙招百事帮", "一招一巧生活馆", "老年生活愉快", "养老生活讲堂", "农村知识铺", "居家常用秘籍", "品质退休生活", "万象奇闻录", "退休养老之路", "名厨美食推荐", "记录养老生活", "中老年退休知识", "实用退休知识", "生活辰光记", "极光奇闻", "奇闻放大镜", "退休生活讲堂", "奇事百闻录", "退休生活新视角", "生活色彩斑斓", "迎风好生活", "闲庭信步生活", "生活其乐融融", "雾里云淡", "雨凉思情", "矜柔人生", "生活智慧妙招姐", "圆满祝福精选", "温馨问候语集锦", "实用生活技能合集", "生活趣事笔记", "生活解语", "奇谈趣闻社", "奇人见闻", "退休生活好帮手", "奇闻集锦", "退休老年社群", "奇闻故事栈", "企退生活日记", "分享生活小常识", "中老年美食圈", "退休生活常识", "精选退休美文", "休闲养老生活", "养老退休指南", "安享退休事", "悠闲退休时光", "夜谈奇闻志", "南山遗梦", "奇特人生", "奇闻集合", } class RecycleOutsideAccountArticlesTask(Const): def __init__(self, pool, log_client, date_string): self.pool = pool self.log_client = log_client self.date_string = date_string async def get_outside_accounts(self): query = """ select t2.group_source_name as account_source, t3.name as name, t3.gh_id as gh_id from wx_statistics_group_source t1 join wx_statistics_group_source_account t2 on t2.group_source_name = t1.account_source_name join publish_account t3 on t3.id = t2.account_id where t1.mode_type = '代运营服务号' and ( t2.group_source_name like '%云誉%' or t2.group_source_name like '%微小盟%' or t2.group_source_name like '%阿雅达%' or t2.group_source_name like '%创易%' ) and t3.status = 1 and t3.name != ''; """ return await self.pool.async_fetch(query=query, db_name="aigc") async def recycle_single_account(self, account): """recycle single account""" if account["name"] not in account_name_set: return query = """ select max(update_time) as publish_timestamp \ from outside_account_articles where gh_id = %s; """ response = await self.pool.async_fetch(query=query, params=(account["gh_id"],)) if response: max_publish_timestamp = response[0]["publish_timestamp"] else: max_publish_timestamp = int(time.time()) - self.NEW_ACCOUNT_CRAWL_PERIOD cursor = None while True: response = await get_article_list_from_account( account_id=account["gh_id"], index=cursor ) response_code = response["code"] match response_code: case self.ACCOUNT_FORBIDDEN_CODE: # await feishu_robot.bot( # title="发布账号封禁", # detail={ # "账号名称": account["name"], # "账号id": account["gh_id"], # }, # ) return case self.ARTICLE_SUCCESS_CODE: msg_list = response.get("data", {}).get("data", []) if not msg_list: return await insert_outside_article_into_recycle_pool( self.pool, self.log_client, msg_list, account ) # check last article last_article = msg_list[-1] last_publish_timestamp = last_article["AppMsg"]["BaseInfo"][ "UpdateTime" ] if last_publish_timestamp <= max_publish_timestamp: return cursor = response["data"].get("next_cursor") if not cursor: return case self.CRAWL_CRASH_CODE: await self.log_client.log( contents={ "task": "recycle_daily_publish_articles", "data": { "gh_id": account["gh_id"], }, "message": "爬虫挂掉", "status": "fail", } ) case _: return async def deal(self): subscription_accounts = await self.get_outside_accounts() for account in tqdm(subscription_accounts, desc="recycle each account"): try: await self.recycle_single_account(account) except Exception as e: print( f"{account['name']}\t{account['gh_id']}: recycle account error:", e ) class UpdateOutsideRootSourceIdAndUpdateTimeTask(UpdateRootSourceIdAndUpdateTimeTask): def __init__(self, pool, log_client): super().__init__(pool, log_client) async def get_outside_article_list_v2(self) -> list[dict]: query = """ select content_url, wx_sn from outside_account_articles where publish_timestamp in %s and account_name in %s order by update_time desc; """ article_list = await self.pool.async_fetch( query=query, params=(tuple([0, -1, -3]), tuple(account_name_set)) ) return article_list async def check_each_article(self, article: dict): url = article["content_url"] wx_sn = article["wx_sn"] try: response = await get_article_detail(url) response_code = response["code"] if response_code == self.ARTICLE_DELETE_CODE: publish_timestamp_s = self.DELETE_STATUS root_source_id_list = [] elif response_code == self.ARTICLE_ILLEGAL_CODE: publish_timestamp_s = self.ILLEGAL_STATUS root_source_id_list = [] elif response_code == self.ARTICLE_SUCCESS_CODE: data = response["data"]["data"] publish_timestamp_ms = data["publish_timestamp"] publish_timestamp_s = int(publish_timestamp_ms / 1000) mini_program = data.get("mini_program", []) if mini_program: root_source_id_list = [ urllib.parse.parse_qs(urllib.parse.unquote(i["path"])).get( "rootSourceId", [""] )[0] for i in mini_program ] else: root_source_id_list = [] else: publish_timestamp_s = self.UNKNOWN_STATUS root_source_id_list = [] except Exception as e: publish_timestamp_s = self.REQUEST_FAIL_STATUS root_source_id_list = None error_msg = traceback.format_exc() await self.log_client.log( contents={ "task": "get_official_article_detail", "data": { "url": url, "wx_sn": wx_sn, "error_msg": error_msg, "error": str(e), }, "function": "check_each_article", "status": "fail", } ) query = """ update outside_account_articles set publish_timestamp = %s, root_source_id_list = %s where wx_sn = %s; """ await self.pool.async_save( query=query, params=( publish_timestamp_s, json.dumps(root_source_id_list, ensure_ascii=False), wx_sn, ), ) if publish_timestamp_s == self.REQUEST_FAIL_STATUS: article["wx_sn"] = wx_sn return article else: return None async def fallback_mechanism(self): # 若还是无 publish_timestamp,用update_time当作 publish_timestamp update_sql_2 = f""" update outside_account_articles set publish_timestamp = update_time where publish_timestamp < %s; """ affected_rows_2 = await self.pool.async_save(query=update_sql_2, params=(0,)) if affected_rows_2: await feishu_robot.bot( title="执行兜底修改发布时间戳", detail={ # "通过msgId修改": affected_rows_1, "通过create_timestamp修改": affected_rows_2, }, mention=False, ) async def deal(self): task_list = await self.get_outside_article_list_v2() for task in tqdm(task_list, desc="get article detail step1: "): try: await self.check_each_article(task) except Exception as e: try: await self.log_client.log( contents={ "task": "get_official_article_detail_step1", "data": { "detail": { "url": task["ContentUrl"], "wx_sn": task["wx_sn"], }, "error_msg": traceback.format_exc(), "error": str(e), }, "function": "check_each_article", "status": "fail", } ) except Exception as e: print(e) print(traceback.format_exc())