瀏覽代碼

Merge branch 'feature/luojunhui/2025-11-17-fwhdata' of Server/LongArticleTaskServer into master

luojunhui 17 小時之前
父節點
當前提交
0f32b2cd28

+ 1 - 0
applications/pipeline/__init__.py

@@ -1,2 +1,3 @@
 from .data_recycle_pipeline import insert_article_into_recycle_pool
+from .data_recycle_pipeline import insert_outside_article_into_recycle_pool
 from .crawler_pipeline import CrawlerPipeline

+ 141 - 0
applications/pipeline/data_recycle_pipeline.py

@@ -4,6 +4,28 @@ from typing import List, Dict
 from applications.utils import show_desc_to_sta, str_to_md5
 
 
+insert_outside_article_query = """
+    INSERT INTO outside_account_articles
+    (
+        wx_sn, gh_id, account_name, app_msg_id, title,
+        title_md5, publish_type, create_time, update_time,
+        digest, item_index, content_url,
+        source_url, cover_img_url, cover_img_url_1_1, cover_img_url_255_1,
+        item_show_type, is_original, show_desc, ori_content,
+        show_view_count, show_like_count, show_zs_count, show_pay_count,
+        base_info
+    )
+    VALUES
+    (
+        %s, %s, %s, %s, %s,
+        %s, %s, %s, %s, %s,
+        %s, %s, %s, %s, %s,
+        %s, %s, %s, %s, %s,
+        %s, %s, %s, %s, %s
+    );
+"""
+
+
 async def insert_article_into_recycle_pool(
     pool, log_client, msg_list: List[Dict], account_info: Dict
 ):
@@ -126,3 +148,122 @@ async def insert_article_into_recycle_pool(
                     "data": {"account_name": account_info["name"]},
                 }
             )
+
+
+async def insert_outside_article_into_recycle_pool(
+    pool, log_client, msg_list: List[Dict], account_info: Dict
+):
+    """insert outside article into recycle pool"""
+    for info in msg_list:
+        base_info = info.get("BaseInfo", {})
+        app_msg_id = info.get("AppMsg", {}).get("BaseInfo", {}).get("AppMsgId", None)
+        create_timestamp = (
+            info.get("AppMsg", {}).get("BaseInfo", {}).get("CreateTime", None)
+        )
+        update_timestamp = (
+            info.get("AppMsg", {}).get("BaseInfo", {}).get("UpdateTime", None)
+        )
+        publish_type = info.get("AppMsg", {}).get("BaseInfo", {}).get("Type", None)
+        detail_article_list = info.get("AppMsg", {}).get("DetailInfo", [])
+        if detail_article_list:
+            for article in detail_article_list:
+                title = article.get("Title", None)
+                title_md5 = str_to_md5(title),
+                insert_query = """
+                    insert ignore into title_features (title, title_md5, version)
+                        values (%s, %s, %s);
+                """
+                await pool.async_save(query=insert_query, params=(title, title_md5, 2))
+                digest = article.get("Digest", None)
+                item_index = article.get("ItemIndex", None)
+                content_url = article.get("ContentUrl", None)
+                source_url = article.get("SourceUrl", None)
+                cover_img_url = article.get("CoverImgUrl", None)
+                cover_img_url_1_1 = article.get("CoverImgUrl_1_1", None)
+                cover_img_url_235_1 = article.get("CoverImgUrl_235_1", None)
+                item_show_type = article.get("ItemShowType", None)
+                is_original = article.get("IsOriginal", None)
+                show_desc = article.get("ShowDesc", None)
+                show_stat = show_desc_to_sta(show_desc)
+                ori_content = article.get("ori_content", None)
+                show_view_count = show_stat.get("show_view_count", 0)
+                show_like_count = show_stat.get("show_like_count", 0)
+                show_zs_count = show_stat.get("show_zs_count", 0)
+                show_pay_count = show_stat.get("show_pay_count", 0)
+                wx_sn = (
+                    content_url.split("&sn=")[1].split("&")[0] if content_url else None
+                )
+                info_tuple = (
+                    wx_sn,
+                    account_info["gh_id"],
+                    account_info["name"],
+                    app_msg_id,
+                    title,
+                    title_md5,
+                    publish_type,
+                    create_timestamp,
+                    update_timestamp,
+                    digest,
+                    item_index,
+                    content_url,
+                    source_url,
+                    cover_img_url,
+                    cover_img_url_1_1,
+                    cover_img_url_235_1,
+                    item_show_type,
+                    is_original,
+                    show_desc,
+                    ori_content,
+                    show_view_count,
+                    show_like_count,
+                    show_zs_count,
+                    show_pay_count,
+                    json.dumps(base_info, ensure_ascii=False),
+                )
+                try:
+                    await pool.async_save(
+                        query=insert_outside_article_query,
+                        params=info_tuple,
+                    )
+                    await log_client.log(
+                        contents={
+                            "function": "insert_article_into_recycle_pool",
+                            "status": "success",
+                            "data": info_tuple,
+                        }
+                    )
+                    print("insert_article_into_recycle_pool success")
+
+                except Exception as e:
+                    try:
+                        update_sql = """update outside_account_articles set show_view_count = %s, show_like_count=%s where wx_sn = %s;"""
+                        await pool.async_save(
+                            query=update_sql,
+                            params=(show_view_count, show_like_count, wx_sn),
+                        )
+                        print("update_article_into_recycle_pool success")
+
+                    except Exception as e:
+                        await log_client.log(
+                            contents={
+                                "function": "insert_article_into_recycle_pool",
+                                "status": "fail",
+                                "message": "更新文章失败",
+                                "data": {
+                                    "error": str(e),
+                                    "content_link": content_url,
+                                    "account_name": account_info["name"],
+                                },
+                            }
+                        )
+                        continue
+
+        else:
+            await log_client.log(
+                contents={
+                    "function": "insert_article_into_recycle_pool",
+                    "status": "fail",
+                    "message": "account has no articles",
+                    "data": {"account_name": account_info["name"]},
+                }
+            )

+ 4 - 0
applications/tasks/data_recycle_tasks/__init__.py

@@ -3,6 +3,8 @@ from .recycle_daily_publish_articles import CheckDailyPublishArticlesTask
 from .recycle_daily_publish_articles import UpdateRootSourceIdAndUpdateTimeTask
 from .recycle_daily_publish_articles import RecycleFwhDailyPublishArticlesTask
 from .recycle_mini_program_detail import RecycleMiniProgramDetailTask
+from .recycle_outside_account_articles import RecycleOutsideAccountArticlesTask
+from .recycle_outside_account_articles import UpdateOutsideRootSourceIdAndUpdateTimeTask
 
 
 __all__ = [
@@ -11,4 +13,6 @@ __all__ = [
     "UpdateRootSourceIdAndUpdateTimeTask",
     "RecycleFwhDailyPublishArticlesTask",
     "RecycleMiniProgramDetailTask",
+    "RecycleOutsideAccountArticlesTask",
+    "UpdateOutsideRootSourceIdAndUpdateTimeTask",
 ]

+ 856 - 0
applications/tasks/data_recycle_tasks/recycle_outside_account_articles.py

@@ -0,0 +1,856 @@
+import time, json
+
+import traceback
+import urllib.parse
+from tqdm.asyncio import tqdm
+
+from .recycle_daily_publish_articles import UpdateRootSourceIdAndUpdateTimeTask
+from .recycle_daily_publish_articles import Const
+from applications.crawler.wechat import get_article_list_from_account
+from applications.crawler.wechat import get_article_detail
+from applications.pipeline import insert_outside_article_into_recycle_pool
+from applications.api import feishu_robot
+
+account_name_set = {
+    "念念私语",
+    "发现趣论奇闻",
+    "一晴方春",
+    "生活技巧悦读",
+    "妙招百科享生活",
+    "精选问候祝福寄语",
+    "生活实用妙招收录",
+    "生活妙计通",
+    "逸事趣闻说",
+    "天天一起跳广场舞",
+    "零点生活志",
+    "居家生活实录",
+    "生活妙招实录",
+    "生活妙招点子库",
+    "生活百科巧事通",
+    "早晨送你暖心祝福",
+    "生活情感感悟",
+    "家庭百科大全",
+    "清晨早安温馨问候",
+    "经典祝福语大全集",
+    "日常技巧全书",
+    "无忧生活锦囊",
+    "家庭妙方实录",
+    "生活技巧宝藏库",
+    "妙招集锦全书",
+    "乐享技巧馆",
+    "暖心问候语录",
+    "乐享技巧汇",
+    "巧手理想家",
+    "品质生活有妙招",
+    "生活妙招万家通",
+    "生活妙招大赏",
+    "温暖祝福语大全",
+    "好愿祝福语录",
+    "精选日常祝福语",
+    "邻家生活点滴",
+    "创新生活妙招百科",
+    "小技巧生活录",
+    "妙生活家手记",
+    "生活窍门事事通",
+    "巧手技巧百科",
+    "生活技巧小支招",
+    "趣生活百科",
+    "生活妙用技巧合集",
+    "日常祝福语大全",
+    "妙招达人养成记",
+    "妙生活锦囊",
+    "生活百科一点知",
+    "生活妙计科普馆",
+    "巧妈生活妙招助手",
+    "小窍门生活录",
+    "好生活点点通",
+    "实用技巧馆",
+    "生活情感肆读",
+    "生活技巧小奥秘",
+    "送温暖祝福精选",
+    "生活妙招选集",
+    "每日祝福语问候",
+    "日常生活点子库",
+    "每日好友祝福集",
+    "节气祝福问候录",
+    "趣味生活铺子",
+    "居家生活妙招大全",
+    "落日情绪屋",
+    "邻家生活技巧",
+    "每日精选祝福语录",
+    "邻家生活有妙招",
+    "妙招集合录",
+    "多学生活技巧",
+    "祝福问候手册",
+    "点滴生活坊",
+    "早安心语合集",
+    "微情话语录",
+    "邻家妙招知识宝典",
+    "生活妙招好能手",
+    "节庆祝福语大全",
+    "精选早安祝福合集",
+    "节日问候语大全",
+    "居家生活妙招技巧",
+    "精选早安问候语合集",
+    "治愈情感宝典",
+    "生活小妙知",
+    "生活妙想指南",
+    "技巧达人站",
+    "幸福语录精选",
+    "技巧生活手册",
+    "早安祝福语集",
+    "家居生活小窍门全集",
+    "祝福问候大集锦",
+    "乐活研究社",
+    "实用生活365",
+    "无忧生活小支招",
+    "家有妙计百变通",
+    "生活技巧智慧库",
+    "点滴乐活小妙招",
+    "早安问候精选祝福",
+    "生活妙招巧思汇",
+    "热门广场舞大全",
+    "中老年妙招大全",
+    "技巧百变生活馆",
+    "生活巧手指南",
+    "有妙招享生活",
+    "巧手生活录",
+    "生活技巧我知道",
+    "每日早晚安祝福语录",
+    "情暖祝福语录",
+    "有趣生活妙招屋",
+    "家居生活趣用",
+    "日常生活妙招百科",
+    "灵巧生活一点通",
+    "拾遗情感铺",
+    "广场歌舞热榜",
+    "邻里妙方百科",
+    "妙招技巧帮",
+    "经典广场舞热榜",
+    "早安祝福集大全",
+    "祝福语每日送",
+    "情感情报库",
+    "生活妙招技巧汇",
+    "实用妙招宝",
+    "巧思收集录",
+    "日用妙招点点通",
+    "家庭技巧生活录",
+    "每日早安祝福集",
+    "清晨祝福合集",
+    "技巧生活集萃",
+    "生活妙思巧手集",
+    "晨间送祝语",
+    "便捷小妙招",
+    "趣招一点通",
+    "妙招收集馆",
+    "情感慢读",
+    "安心祝福集",
+    "生活技巧点子库",
+    "懂点技巧吧",
+    "技巧能手妙招库",
+    "事事妙招集锦",
+    "祝福语合集",
+    "技巧生活录",
+    "生活妙思小帮手",
+    "暖心祝福寄语录",
+    "生活妙计百宝库",
+    "每日祝福语选集",
+    "巧手常识集锦",
+    "日常祝福问候语",
+    "日常问候心愿语录",
+    "小窍门宝典",
+    "情感阅读舍",
+    "精选日常祝福",
+    "生活小机智",
+    "生活乐享君",
+    "早上好心情祝福",
+    "幸福小窍门",
+    "绸缪情感铺",
+    "晨间暖心语录",
+    "生活妙方宝典",
+    "生活妙招点通铺",
+    "美好微祝福语录",
+    "生活最有招",
+    "祝福语温暖问候集锦",
+    "节日问候温馨祝福",
+    "每日幸福语录集锦",
+    "生活巧招点子库",
+    "生活情感课堂",
+    "情感避风湾",
+    "实用生活小方法",
+    "什锦生活录",
+    "晨间祝福语精选",
+    "巧居生活妙招站",
+    "巧知生活集",
+    "日常妙招收集录",
+    "技巧生活百知",
+    "居家小妙术",
+    "时光说情感",
+    "好愿祝福温馨问候",
+    "最美祝愿问候",
+    "生活妙招乐享",
+    "情绪解忧屋",
+    "生活妙招空间站",
+    "乐活技巧馆",
+    "如意祝福选集",
+    "日常技巧大讲堂",
+    "三分钟技巧集",
+    "情感生活百态",
+    "早安问候语精选大全",
+    "招招妙招百事通",
+    "经典热门广场舞曲",
+    "日常生活技能大全",
+    "一点儿窍门馆",
+    "邻家生活技巧合集",
+    "每日送祝福手册",
+    "早上好常用祝福问候",
+    "妙招巧生活杂货铺",
+    "生活全知通",
+    "友友刷刷看",
+    "退休也乐呵",
+    "奇闻怪异集",
+    "退休养老攻略",
+    "竹边生活记",
+    "诡秘奇闻记",
+    "趣味生活简记",
+    "生活墨记",
+    "企退老人心声",
+    "生活静语",
+    "生活趣时光",
+    "生活向暖",
+    "老年退休小贴士",
+    "退休乐时光",
+    "生活趣谈会",
+    "生活百科闲谈",
+    "生活志记",
+    "退休生活报告",
+    "奇闻趣世界",
+    "日常饮食百科",
+    "趣享生活时光",
+    "趣读奇闻汇",
+    "生活点滴小栈",
+    "浮光生活记",
+    "生活解忧坊",
+    "生活多彩时光",
+    "退休生活那些事",
+    "生活茶话集",
+    "美好生活闲谈",
+    "民间奇闻集",
+    "生活百味记",
+    "悠享生活指南",
+    "杂谈异闻社",
+    "饮食智慧屋",
+    "漫谈奇闻社",
+    "退休生活驿站",
+    "幸福养身大全",
+    "生活念记",
+    "生活栖风",
+    "生活沐暖",
+    "生活絮事记",
+    "生活集韵",
+    "轻享生活记",
+    "幽巷奇闻谈",
+    "饮食匠心录",
+    "生活畅聊集",
+    "鉴赏奇闻集",
+    "生活拾碎光",
+    "企业退休杂谈",
+    "智慧退休计划",
+    "老年退休指南",
+    "饮食创意坊",
+    "趣闻奇谈汇",
+    "生活漫读集",
+    "生活趣事乐园",
+    "慢品生活味",
+    "企退老年乐园",
+    "生活悠闲记",
+    "生活饮食百科",
+    "畅享饮食集",
+    "幸福生活乐园",
+    "退休知识宝典",
+    "生活流年记",
+    "生活知百味",
+    "生活感悟集",
+    "精致饮食指南",
+    "生活知识宝典",
+    "\\N",
+    "沧桑时光生活",
+    "自得美好生活",
+    "生活暖暖舒心",
+    "每日技巧集",
+    "温暖问候精选",
+    "百惠优生活",
+    "生活爱问百科",
+    "技巧便生活",
+    "生活一点知",
+    "精选祝福问候大全",
+    "巧手生活碎片",
+    "巧思知识库",
+    "温暖祝福寄语",
+    "智巧日常百宝库",
+    "招招巧手汇",
+    "早安祝福精选手册",
+    "每日祝福寄语",
+    "生活技能百科全知",
+    "生活妙招窍门指南",
+    "美好生活智慧录",
+    "生活妙招小工匠",
+    "幸知情感书房",
+    "便捷生活通",
+    "心享好生活",
+    "祝福语录问候精选",
+    "解忧生活铺",
+    "家庭妙招站",
+    "智享生活巧招",
+    "早上好祝福问候心语",
+    "技巧达人生活馆",
+    "暖心幸福语集",
+    "养身百科常谈",
+    "省时省力小技巧",
+    "妙招干货合集",
+    "万事生活通",
+    "每日精选祝福寄语",
+    "祝福语正能量问候大全",
+    "生活妙手多",
+    "妙生活集市录",
+    "生活无忧大全",
+    "巧手生活技巧指南",
+    "窍门百事通",
+    "生活锦囊妙集",
+    "技巧生活大全",
+    "知著书局",
+    "生活妙招百家录",
+    "邻里生活妙招宝典",
+    "技巧百变馆",
+    "趣招技巧生活通",
+    "家庭窍门实录",
+    "快乐祝福语录集",
+    "合家欢乐祝福问候",
+    "日常家居技能",
+    "好友祝福正能量语录",
+    "百科巧事万事通",
+    "巧手生活百科",
+    "巧手生活体验馆",
+    "每日祝福语早安语录",
+    "情感经典说",
+    "纷云说",
+    "家庭妙招优选",
+    "生活能手妙招",
+    "居家生活实用小妙招",
+    "巧生活妙招馆",
+    "妙招干货铺",
+    "速学妙招录",
+    "科普生活小帮手",
+    "祝福语热门精选",
+    "美满祝福语录",
+    "畅想生活招",
+    "技巧百科巧招",
+    "经典情感祝福语录",
+    "耀舟实用妙招汇",
+    "祝福心语选集",
+    "节日问候语选集",
+    "居家技能全书",
+    "祝福贺词精选",
+    "一起跳个广场舞",
+    "小窍门大帮手",
+    "精选问候语送祝福",
+    "生活妙招收录馆",
+    "家用妙招技巧集",
+    "邻家生活小妙招",
+    "温馨问候语集",
+    "真心祝福暖心问候",
+    "技巧实用生活馆",
+    "实用生活妙招全录",
+    "百科妙招一点通",
+    "妙招百科集",
+    "花好月圆吉祥祝福",
+    "妙招知识通",
+    "创意生活技巧集",
+    "颜夕漫读",
+    "美好祝福日常集锦",
+    "邻里技能宝典",
+    "早上好暖心祝福语录",
+    "妙享生活妙招社",
+    "微看情感好文",
+    "节日祝福常用问候语录",
+    "暖心祝愿语录",
+    "乐活妙招小帮手",
+    "友友过来看",
+    "静好生活社",
+    "漫读生活指南",
+    "中老年饮食杂谈",
+    "生活之百科",
+    "生活微光志",
+    "生活图鉴",
+    "生活漫记簿",
+    "奇闻秘传",
+    "生活轻描记",
+    "退休生活百科",
+    "生活拾光机",
+    "日常慧窍门",
+    "奇闻汐语",
+    "家庭饮食宝典",
+    "分享建康知识",
+    "老年健康生活",
+    "异闻奇谈录",
+    "江湖奇闻记",
+    "烟火慢生活",
+    "奇闻逸事阁",
+    "饮食养身秘诀",
+    "慢享生活记",
+    "生活山河集",
+    "养老微光录",
+    "甄选生活册",
+    "居家生活指南",
+    "老年退休手册",
+    "生活知趣多",
+    "流年新生活",
+    "林下思忆",
+    "忆往深情",
+    "生活百科小常识",
+    "春耕秋实录",
+    "生活技巧锦囊大全",
+    "趣享生活社",
+    "每日圆满祝福",
+    "微读情感驿站",
+    "巧手来当家",
+    "祝福问候早安手册",
+    "日用窍门大全",
+    "365天早安祝福问候",
+    "趣事说奇闻",
+    "技巧百科说",
+    "趣闻奇谈录",
+    "生活能手宝典",
+    "日日送福语",
+    "小妙招锦囊",
+    "幸福生活事事通",
+    "生活巧思屋",
+    "温情祝福合集",
+    "日常家居小智慧",
+    "祝福语暖心语录精选",
+    "生活妙招日志",
+    "生活妙招因子",
+    "早晚安正能量问候大全",
+    "优选早安问候语",
+    "巧手生活知识馆",
+    "礼貌问候祝福语",
+    "日常生活小工匠",
+    "百科技巧生活屋",
+    "美好祝福问候语集",
+    "幸福问候祝福馆",
+    "早安祝福温馨问候",
+    "优享生活妙招社",
+    "生活集事通",
+    "邻家妙招集合录",
+    "情感碎语",
+    "一招妙生活",
+    "日常祝福语录合集",
+    "明月观尘",
+    "技巧百科生活屋",
+    "奇妙知识生活库",
+    "祝福早安每日集锦",
+    "暖心问候心语",
+    "生活常识技能馆",
+    "每日一祝福精选",
+    "小技巧百科馆",
+    "生活妙招物语",
+    "妙招百科全录",
+    "日常窍门百事通",
+    "祝福节日问候馆",
+    "巧手新生活",
+    "退休时光宝典",
+    "快乐开心最重要",
+    "生活浅酌",
+    "退休日常百科",
+    "记录农村日常",
+    "退休常识宝典",
+    "闲谈养老生活",
+    "退休老年乐园",
+    "精选退休大全",
+    "饮食趣发现",
+    "精选退休录",
+    "奇闻实记",
+    "退休谈生活",
+    "每日建康小妙招",
+    "诡夜奇闻录",
+    "每日饮食推荐",
+    "生活闲语",
+    "天天饮食搭配",
+    "镜像奇闻录",
+    "饮食慢品",
+    "老年饮食合集",
+    "妙趣生活集",
+    "生活锦记",
+    "生活行语",
+    "退休好岁月",
+    "生活智行记",
+    "饮食新风尚",
+    "经典家常食谱",
+    "优选生活笔记",
+    "趣谈奇闻集",
+    "落月情绵",
+    "安然若似",
+    "看生活有妙招",
+    "云露华浓",
+    "悦读时光书房",
+    "生活百科妙招通",
+    "俏生活有妙招",
+    "实用生活妙招指南",
+    "妙招集锦百宝箱",
+    "情感避风溏",
+    "岁月乐活集",
+    "奇闻妙趣谈",
+    "小客精选团",
+    "邻家妙招巧事通",
+    "趣味生活指南",
+    "生活技巧百招馆",
+    "小妙招大智慧",
+    "幸福语录祝语",
+    "实用窍门馆",
+    "优生活妙招学习馆",
+    "邻家妙招实用技巧",
+    "技能生活小贴士",
+    "静听情感语录",
+    "家居技能秘籍",
+    "温馨祝福语早上好",
+    "精选节日祝福语录",
+    "美满祝福大全",
+    "家庭日用妙招",
+    "祝福贴心语录",
+    "老年金色岁月",
+    "退休都来看",
+    "快乐金秋生活",
+    "人老天地宽",
+    "奇闻怪谈志",
+    "异度奇闻录",
+    "退休悦读汇",
+    "养老生活感悟",
+    "退休岁月杂谈",
+    "生活小窍门推荐",
+    "家庭饮食百科",
+    "老年生活食谱",
+    "奇闻幻彩集",
+    "趣说奇闻汇",
+    "退休铭记",
+    "退休生活小贴士",
+    "感受退休生活",
+    "生活微光闲语",
+    "智慧妙招生活",
+    "甄选生活社",
+    "生活识百味",
+    "奇闻趣集",
+    "每日妙招共享",
+    "鉴赏生活日记",
+    "生活窍门50条",
+    "奇事异闻录",
+    "奇闻好看",
+    "秘探奇闻录",
+    "记录退休秘籍",
+    "家庭食谱合集",
+    "与心浮沉",
+    "唯美微情感",
+    "春月不惜",
+    "心怡趣论奇闻",
+    "老年生活有依",
+    "别样新生活",
+    "居家幸福老人",
+    "暖心情感屋",
+    "点滴妙招乐生活",
+    "日常祝福温馨问候",
+    "生活必备技巧集",
+    "小妙招大用途",
+    "会点生活小妙招",
+    "生活百科常识库",
+    "巧思巧手汇聚集",
+    "深情驿站",
+    "早安祝福问候精选",
+    "一招一生活",
+    "温馨问候祝福语录",
+    "每日问候暖心语录",
+    "奇闻趣谈社",
+    "生活妙招技巧通",
+    "无忧巧生活",
+    "妙招百事帮",
+    "一招一巧生活馆",
+    "老年生活愉快",
+    "养老生活讲堂",
+    "农村知识铺",
+    "居家常用秘籍",
+    "品质退休生活",
+    "万象奇闻录",
+    "退休养老之路",
+    "名厨美食推荐",
+    "记录养老生活",
+    "中老年退休知识",
+    "实用退休知识",
+    "生活辰光记",
+    "极光奇闻",
+    "奇闻放大镜",
+    "退休生活讲堂",
+    "奇事百闻录",
+    "退休生活新视角",
+    "生活色彩斑斓",
+    "迎风好生活",
+    "闲庭信步生活",
+    "生活其乐融融",
+    "雾里云淡",
+    "雨凉思情",
+    "矜柔人生",
+    "生活智慧妙招姐",
+    "圆满祝福精选",
+    "温馨问候语集锦",
+    "实用生活技能合集",
+    "生活趣事笔记",
+    "生活解语",
+    "奇谈趣闻社",
+    "奇人见闻",
+    "退休生活好帮手",
+    "奇闻集锦",
+    "退休老年社群",
+    "奇闻故事栈",
+    "企退生活日记",
+    "分享生活小常识",
+    "中老年美食圈",
+    "退休生活常识",
+    "精选退休美文",
+    "休闲养老生活",
+    "养老退休指南",
+    "安享退休事",
+    "悠闲退休时光",
+    "夜谈奇闻志",
+    "南山遗梦",
+    "奇特人生",
+    "奇闻集合",
+}
+
+
+class RecycleOutsideAccountArticlesTask(Const):
+    def __init__(self, pool, log_client, date_string):
+        self.pool = pool
+        self.log_client = log_client
+        self.date_string = date_string
+
+    async def get_outside_accounts(self):
+        query = """
+            select
+                t2.group_source_name as account_source,
+                t3.name as name,
+                t3.gh_id as gh_id
+            from wx_statistics_group_source t1
+                join wx_statistics_group_source_account t2 on t2.group_source_name = t1.account_source_name
+                join publish_account t3 on t3.id = t2.account_id
+            where
+                t1.mode_type = '代运营服务号' and
+                (
+                    t2.group_source_name like '%云誉%'
+                        or t2.group_source_name like '%微小盟%'
+                        or t2.group_source_name like '%阿雅达%'
+                        or t2.group_source_name like '%创易%'
+                    )
+            and t3.status = 1 and t3.name != '';
+        """
+        return await self.pool.async_fetch(query=query, db_name="aigc")
+
+    async def recycle_single_account(self, account):
+        """recycle single account"""
+        if account["name"] not in account_name_set:
+            return
+
+        query = """
+                select max(update_time) as publish_timestamp \
+                from outside_account_articles
+                where gh_id = %s;
+                """
+        response = await self.pool.async_fetch(query=query, params=(account["gh_id"],))
+        if response:
+            max_publish_timestamp = response[0]["publish_timestamp"]
+        else:
+            max_publish_timestamp = int(time.time()) - self.NEW_ACCOUNT_CRAWL_PERIOD
+
+        cursor = None
+        while True:
+            response = await get_article_list_from_account(
+                account_id=account["gh_id"], index=cursor
+            )
+            response_code = response["code"]
+            match response_code:
+                case self.ACCOUNT_FORBIDDEN_CODE:
+                    # await feishu_robot.bot(
+                    #     title="发布账号封禁",
+                    #     detail={
+                    #         "账号名称": account["name"],
+                    #         "账号id": account["gh_id"],
+                    #     },
+                    # )
+                    return
+                case self.ARTICLE_SUCCESS_CODE:
+                    msg_list = response.get("data", {}).get("data", [])
+                    if not msg_list:
+                        return
+
+                    await insert_outside_article_into_recycle_pool(
+                        self.pool, self.log_client, msg_list, account
+                    )
+
+                    # check last article
+                    last_article = msg_list[-1]
+                    last_publish_timestamp = last_article["AppMsg"]["BaseInfo"][
+                        "UpdateTime"
+                    ]
+                    if last_publish_timestamp <= max_publish_timestamp:
+                        return
+
+                    cursor = response["data"].get("next_cursor")
+                    if not cursor:
+                        return
+                case self.CRAWL_CRASH_CODE:
+                    await self.log_client.log(
+                        contents={
+                            "task": "recycle_daily_publish_articles",
+                            "data": {
+                                "gh_id": account["gh_id"],
+                            },
+                            "message": "爬虫挂掉",
+                            "status": "fail",
+                        }
+                    )
+                case _:
+                    return
+
+    async def deal(self):
+        subscription_accounts = await self.get_outside_accounts()
+        for account in tqdm(subscription_accounts, desc="recycle each account"):
+            try:
+                await self.recycle_single_account(account)
+
+            except Exception as e:
+                print(
+                    f"{account['name']}\t{account['gh_id']}: recycle account error:", e
+                )
+
+
+
+class UpdateOutsideRootSourceIdAndUpdateTimeTask(UpdateRootSourceIdAndUpdateTimeTask):
+    def __init__(self, pool, log_client):
+        super().__init__(pool, log_client)
+
+    async def get_outside_article_list_v2(self) -> list[dict]:
+        query = """
+                select content_url, wx_sn 
+                from outside_account_articles where publish_timestamp in %s
+                    and account_name in %s
+                order by update_time desc;
+            """
+        article_list = await self.pool.async_fetch(
+            query=query, params=(tuple([0, -1, -3]), tuple(account_name_set))
+        )
+        return article_list
+
+    async def check_each_article(self, article: dict):
+        url = article["content_url"]
+        wx_sn = article["wx_sn"]
+        try:
+            response = await get_article_detail(url)
+            response_code = response["code"]
+
+            if response_code == self.ARTICLE_DELETE_CODE:
+                publish_timestamp_s = self.DELETE_STATUS
+                root_source_id_list = []
+            elif response_code == self.ARTICLE_ILLEGAL_CODE:
+                publish_timestamp_s = self.ILLEGAL_STATUS
+                root_source_id_list = []
+            elif response_code == self.ARTICLE_SUCCESS_CODE:
+                data = response["data"]["data"]
+                publish_timestamp_ms = data["publish_timestamp"]
+                publish_timestamp_s = int(publish_timestamp_ms / 1000)
+                mini_program = data.get("mini_program", [])
+                if mini_program:
+                    root_source_id_list = [
+                        urllib.parse.parse_qs(urllib.parse.unquote(i["path"])).get(
+                            "rootSourceId", [""]
+                        )[0]
+                        for i in mini_program
+                    ]
+                else:
+                    root_source_id_list = []
+            else:
+                publish_timestamp_s = self.UNKNOWN_STATUS
+                root_source_id_list = []
+        except Exception as e:
+            publish_timestamp_s = self.REQUEST_FAIL_STATUS
+            root_source_id_list = None
+            error_msg = traceback.format_exc()
+            await self.log_client.log(
+                contents={
+                    "task": "get_official_article_detail",
+                    "data": {
+                        "url": url,
+                        "wx_sn": wx_sn,
+                        "error_msg": error_msg,
+                        "error": str(e),
+                    },
+                    "function": "check_each_article",
+                    "status": "fail",
+                }
+            )
+        query = """
+            update outside_account_articles set publish_timestamp = %s, root_source_id_list = %s
+            where wx_sn = %s;
+        """
+        await self.pool.async_save(
+            query=query,
+            params=(
+                publish_timestamp_s,
+                json.dumps(root_source_id_list, ensure_ascii=False),
+                wx_sn,
+            ),
+        )
+        if publish_timestamp_s == self.REQUEST_FAIL_STATUS:
+            article["wx_sn"] = wx_sn
+            return article
+        else:
+            return None
+
+    async def fallback_mechanism(self):
+        # 若还是无 publish_timestamp,用update_time当作 publish_timestamp
+        update_sql_2 = f"""
+            update outside_account_articles
+            set publish_timestamp = update_time
+            where publish_timestamp < %s;
+        """
+        affected_rows_2 = await self.pool.async_save(query=update_sql_2, params=(0,))
+        if affected_rows_2:
+            await feishu_robot.bot(
+                title="执行兜底修改发布时间戳",
+                detail={
+                    # "通过msgId修改": affected_rows_1,
+                    "通过create_timestamp修改": affected_rows_2,
+                },
+                mention=False,
+            )
+
+    async def deal(self):
+        task_list = await self.get_outside_article_list_v2()
+        for task in tqdm(task_list, desc="get article detail step1: "):
+            try:
+                await self.check_each_article(task)
+            except Exception as e:
+                try:
+                    await self.log_client.log(
+                        contents={
+                            "task": "get_official_article_detail_step1",
+                            "data": {
+                                "detail": {
+                                    "url": task["ContentUrl"],
+                                    "wx_sn": task["wx_sn"],
+                                },
+                                "error_msg": traceback.format_exc(),
+                                "error": str(e),
+                            },
+                            "function": "check_each_article",
+                            "status": "fail",
+                        }
+                    )
+                except Exception as e:
+                    print(e)
+                    print(traceback.format_exc())

+ 98 - 9
applications/tasks/llm_tasks/process_title.py

@@ -7,6 +7,7 @@ from typing import Optional, List, Dict, Tuple
 from applications.api import fetch_deepseek_completion
 from applications.utils import yield_batch
 from applications.tasks.llm_tasks.prompts import extract_article_features
+from applications.tasks.llm_tasks.prompts import extract_article_category
 from tqdm.asyncio import tqdm
 
 
@@ -623,15 +624,15 @@ class ExtractTitleFeatures(Const):
         self.aliyun_log = aliyun_log
         self.trace_id = trace_id
 
-    async def get_tasks(self, batch_size=100):
+    async def get_tasks(self, version: int, batch_size=100):
         query = """
             select id, title
             from title_features
-            where status = %s
+            where status = %s and version = %s
             limit %s;
         """
         return await self.pool.async_fetch(
-            query=query, params=(self.INIT_STATUS, batch_size)
+            query=query, params=(self.INIT_STATUS, version, batch_size)
         )
 
     async def update_status(self, title_id, ori_status, new_status):
@@ -681,21 +682,26 @@ class ExtractTitleFeatures(Const):
             ),
         )
 
-    async def deal(self, data):
-        batch_size = data.get("batch_size", 50)
-        task_list = await self.get_tasks(batch_size=batch_size)
+    async def set_category_for_each_title(self, title_id, category):
+        query = """
+            UPDATE title_features
+            SET category = %s, status = %s
+            WHERE id = %s and status = %s;
+        """
+        return await self.pool.async_save(
+            query=query,
+            params=(category, self.SUCCESS_STATUS, title_id, self.PROCESSING_STATUS),
+        )
 
+    async def get_title_features(self, task_list: list):
         title_list = [i["title"] for i in task_list]
         id_list = [i["id"] for i in task_list]
         title_id_map = {i["title"]: i["id"] for i in task_list}
-
         prompt = extract_article_features(title_list)
-
         # 设置状态为处理中
         await self.update_status_batch(
             id_list, self.INIT_STATUS, self.PROCESSING_STATUS
         )
-
         try:
             feature_dict = fetch_deepseek_completion(
                 model="default", prompt=prompt, output_type="json"
@@ -744,3 +750,86 @@ class ExtractTitleFeatures(Const):
 
             title_id = title_id_map[title]
             await self.set_feature_for_each_title(title_id, features)
+
+    async def get_title_category(self, task_list: list):
+        title_list = [i["title"] for i in task_list]
+        id_list = [i["id"] for i in task_list]
+        title_id_map = {i["title"]: i["id"] for i in task_list}
+        generate_category_prompt = extract_article_category(title_list)
+        # 设置状态为处理中
+        await self.update_status_batch(
+            id_list, self.INIT_STATUS, self.PROCESSING_STATUS
+        )
+        try:
+            category_dict = fetch_deepseek_completion(
+                model="DeepSeek-V3", prompt=generate_category_prompt, output_type="json"
+            )
+        except Exception as e:
+            await self.aliyun_log.log(
+                contents={
+                    "task": "extract_title_category",
+                    "function": "deal",
+                    "message": "fetch deepseek completion failed",
+                    "status": "fail",
+                    "data": {
+                        "error_message": str(e),
+                        "error_type": type(e).__name__,
+                        "traceback": traceback.format_exc(),
+                    },
+                }
+            )
+            await self.update_status_batch(
+                id_list, self.PROCESSING_STATUS, self.FAIL_STATUS
+            )
+            return
+        if not category_dict:
+            await self.aliyun_log.log(
+                contents={
+                    "task": "extract_title_category",
+                    "function": "deal",
+                    "message": "fetch deepseek completion return empty",
+                    "status": "fail",
+                    "data": {
+                        "error_message": "fetch deepseek completion return empty",
+                        "error_type": "EmptyResponseError",
+                        "traceback": traceback.format_exc(),
+                    },
+                }
+            )
+            await self.update_status_batch(
+                id_list, self.PROCESSING_STATUS, self.FAIL_STATUS
+            )
+            return
+
+        for title in tqdm(title_list):
+            category = category_dict.get(title, {})
+            if not category:
+                continue
+
+            title_id = title_id_map[title]
+            await self.set_category_for_each_title(title_id, category)
+
+    async def deal(self, data):
+        batch_size = data.get("batch_size", 50)
+        version = data.get("version", 1)
+        task_list = await self.get_tasks(version=version, batch_size=batch_size)
+
+        match version:
+            case 1:
+                await self.get_title_features(task_list)
+            case 2:
+                await self.get_title_category(task_list)
+            case _:
+                await self.aliyun_log.log(
+                    contents={
+                        "task": "extract_title_features",
+                        "function": "deal",
+                        "message": "version not supported",
+                        "status": "fail",
+                        "data": {
+                            "error_message": "version not supported",
+                            "error_type": "VersionNotSupportedError",
+                            "traceback": traceback.format_exc(),
+                        },
+                    }
+                )

+ 139 - 0
applications/tasks/llm_tasks/prompts.py

@@ -279,3 +279,142 @@ def extract_article_features(title_list: list[str]) -> str:
     
     """
     return prompt + "\n".join(title_list)
+
+
+def extract_article_category(title_list: list[str]) -> str:
+    prompt = """
+你是一个稳定可靠的**文本分类与标签抽取助手**,专门根据「文章标题」判断内容类型,并为每个标题打上品类标签
+
+你的任务是:**对输入的每一个标题,完成内容品类分类
+请务必从下面 16 个品类中,选择一个最合适的品类输出,不要创造新的品类名称。
+
+### 品类定义
+1. **知识科普**  
+定义:以通俗易懂的方式普及科学、技术、健康、安全、生活常识、财产保护、医保政策、为人处事方式等内容,旨在提高公众的知识水平和认知能力。内容通常具有教育性和实用性,涵盖自然、社会、文化等多个领域。  
+标题示例:  
+- 我国存款最安全的五大银行,永远都不会倒闭,你知道是哪五家吗?  
+- 借条上不要写“这3个字”,不然变成一张废纸,否则用法律也没用  
+- 不能二次加热的3种食物!再次提醒:这3种食物吃不完最好扔掉  
+
+2. **军事历史**  
+定义:聚焦于历史上的军事事件、战争故事、军事策略、英雄人物等内容,旨在还原战争场景、探讨军事决策、揭示历史真相,并展现战争中的人物命运与历史影响。内容通常以叙事、分析或回忆的形式呈现,兼具历史深度和故事性。  
+标题示例:  
+- 对越作战永远失踪的332人,陵园没有墓碑,没有名字,只有烈士证  
+- 淮海大战丢失阵地,师长带头冲锋!最后出一口恶气:活捉敌最高指挥官  
+- 抗战时,一村民被敌拉去带路,半道回头忽发现:后面跟个游击队员  
+
+3. **家长里短**  
+定义:围绕家庭成员之间的关系、矛盾、情感、道德等展开的故事或讨论,内容常涉及婚姻、亲子、婆媳、兄弟姐妹等关系,或是人情往来、金钱纠纷、情感变化等内容,反映家庭生活中的温情、冲突与人性。  
+标题示例:  
+- 父母越老越能暴露家庭最真实的一面:当父母70岁,子女不该抱有这三种期待  
+- 老母亲分家产,给亲闺女30万,给养女一筐青菜,养女意外摔倒,看到筐子里的东西,瞬间愣住了  
+- 我花150一天雇了阿姨,两天后上班回来给她300,阿姨说我账算错了  
+
+4. **社会法治**  
+定义:聚焦社会事件、法律纠纷、法院判决、社会现象等内容,通常涉及道德、法律、公平正义等议题,旨在揭示社会问题、探讨法律规则或反映人性与社会现实。  
+标题示例:  
+- 山东,女子在小区捡到16万天价项链,业主悬赏3万找回,女子归还后,失主拒绝支付报酬,还说:要有格局,女子认为被骗,将失主告上法庭  
+- 陕西,女子22万买26层房,2年后,楼盘24层就已经封顶!开发商:你闹事造成100万损失,道歉才给赔偿!  
+- 上海:男子超市连续购买46枚过期咸鸭蛋,2天分46次交易,向厂家索赔金14万,法院判了!  
+
+5. **奇闻趣事**  
+定义:以猎奇、娱乐为主,涵盖罕见、奇特、有趣的事件、发现或故事,内容通常具有趣味性和话题性,能够引发读者的好奇心和讨论。  
+标题示例:  
+- 狗屎运?江西男子钓鱼时发现青鱼尸骸,扒开后捡到鸡蛋大小的青鱼石,网友:起码值几千!  
+- 内蒙古小伙河边捡到金牌,拒绝上交将其熔成金手镯,专家气愤不已  
+- 男子买了一辆废弃坦克,拆油箱时,他发现了一根又一根的金条……  
+
+6. **名人八卦**  
+定义:围绕名人的生活、言论、事件、八卦等内容展开,通常涉及娱乐圈、政界、历史人物等,旨在满足公众对名人隐私和动态的好奇心。  
+标题示例:  
+- 难怪王扶林说陈晓旭不够漂亮,看看他选的原黛玉候选人,那才叫美  
+- 心狠手辣的容嬷嬷年轻时是校花?看了照片后,网友直接闭嘴了!  
+- 李玉成终于说出实话,公开吐槽马玉琴年纪太大,结婚28年疑似后悔  
+
+7. **健康养生**  
+定义:关注健康、养生、疾病预防、生活习惯等方面的知识和建议,内容通常具有实用性和指导性,旨在帮助读者改善生活质量、提升健康水平。  
+标题示例:  
+- 72岁老人每天一个蒸苹果,半年后体检,看到指标变化让他乐开了花  
+- 40岁女子每天吃水煮蛋,一年后去体检,检查报告令医生都羡慕不已  
+- 2024年血糖新标准已公布,不再是3.9~6.1,你的血糖还不算高吗?  
+
+8. **情感故事**  
+定义:以人与人之间的情感交流、感人故事、情感经历为主题,内容通常充满温情、感动或反思,旨在引发读者的情感共鸣和思考。  
+标题示例:  
+- 男孩饭店吃饭,发现陌生女子和去世母亲很像,走过去说:我妈妈去世了,能抱一下我吗?  
+- 河南一女子直播时,被失散 32 年的父亲认出:闺女等着爸爸接你回家  
+- 1987年,江苏男子借好友一千元,25年后朋友成富豪还他1000万报恩  
+- 流浪狗跟着骑行夫妻跑了一百多公里,一直守护在女主身边,赶都赶不走,当男主得知原因后竟抱着狗狗大哭起来  
+
+9. **国家大事**  
+定义:涉及国家实力、科技发展、资源发现、国际合作等内容,通常以宏观视角展现国家的综合实力、科技成就或国际影响力,体现国家的崛起与发展。  
+标题示例:  
+- 我国在南极发现“海上粮仓”,储量高达10亿吨,世界各国眼红不已  
+- 我国贵州发现7000万吨宝藏,价值高达上万亿,多国求合作被拒绝  
+- 距我国3000公里,塞班岛明明归美国管辖,为何岛上大多是中国人?  
+
+10. **现代人物**  
+定义:聚焦活跃在21世纪后具有传奇色彩或巨大贡献的人物、事迹、成就等,内容通常充满戏剧性和启发性,旨在展现人物的非凡经历或历史贡献。  
+标题示例:  
+- 她曾狂贪国家上百亿,被发现时已经移居美国,最终还风光一时得善终  
+- 山东女子因坐月子无聊,破译美国2套绝密系统的密码,国家:奖励711万!  
+- 牺牲太大了!航天女英雄刘洋:结婚8年未生子,回地面后“消失”的她怎样了?  
+
+11. **怀旧时光**  
+定义:以回忆和怀旧为主题,涉及过去的历史、文化、生活、照片等内容,旨在唤起读者对过去时光的情感共鸣和怀念。  
+标题示例:  
+- 1975年“下馆子”的老照片,2元能吃些什么,勾起那段最难忘的时光  
+- 82年,北京老人捡回两张“破椅子”,遭家人数落,29年后拍出2300万  
+- 这张老照片第一次看到,邓颖超和李讷的罕见合影!  
+
+12. **政治新闻**  
+定义:聚焦政治事件、领导人动态、国际关系等内容,通常以新闻或分析的形式呈现,旨在揭示政治局势、政策变化或国际关系的动态。  
+标题示例:  
+- 中方外长行程有变,提前结束访欧匆匆回国,带回来一个好消息  
+- 宋庆龄在北京逝世后,远在美国的宋美龄只说了7个字,字字揪心!  
+- 庐山会议后,叶帅去劝彭德怀认个错,哭着说了一句心里话  
+
+13. **历史人物**  
+定义:聚焦于21世纪前具有重要影响的人物,包括他们的生平、事迹、成就、性格、趣事及其对历史进程的贡献。内容通常以传记、回忆录或历史分析的形式呈现,旨在还原人物的真实面貌并探讨其历史意义。  
+标题示例:  
+- 林彪去世后,蒋介石收到林彪与戴笠的一份密谈文件,看后拍桌大骂  
+- 张学良软禁时的一张实拍照片,头发秃顶,两眼无光,像个中年老头  
+- 1912年,孙中山和两个女儿罕见留影,面对镜头父女三人看起来很幸福  
+
+14. **社会现象**  
+定义:关注社会中出现的普遍现象、趋势或问题,通常涉及文化、经济、教育、民生等领域。内容以观察、分析或评论为主,旨在揭示现象背后的原因、影响及社会意义,引发公众的思考和讨论。  
+标题示例:  
+- 22年河南男子跳河救人,体力耗尽留遗言,被救女子猛然抓住他:一起走  
+- 浙江一老人刑满释放,靠蹬三轮为生,6年后,政府领导登门拜访:我们帮您分配工作  
+- 儿子收到清华通知书,父亲花5万请全村吃席,镇长看一眼竟说:这是假的  
+
+15. **财经科技**  
+定义:聚焦于经济、金融、投资及行业发展的分析与预测,涵盖未来经济趋势、资产价值变化、行业变革及个人理财策略等内容。可以提供前瞻性的财经视角和实用的理财建议,帮助其把握经济动态、优化财务规划并应对行业变化。  
+标题示例:  
+- 未来10年,现金和房子都将贬值,只有2样东西最值钱  
+- 外卖时代将被终结?一个全新行业正悄悄取代外卖,你准备好了吗?  
+- 准备存款的一定要知道,今明两年,定期存款要记住“4不存”  
+
+16. **节日祝福**
+定义:以节日为主题或者以祝福、喜悦、感谢、早上好、中午好、晚上好等情感为内容,旨在向表达节日的祝福和对其的情感支持。
+标题示例:  
+- 早上好/中午好/晚上好, 亲爱的朋友们! 
+- 春节快乐、除夕快乐!
+
+### 输出定义
+输入是一个标题列表,每一行表示一个标题
+输出结构为 json,json 的 key 是输入的标题,value 是品类信息。 json 格式如下:
+{
+    "title1": "category1",
+    "title2": "category2"
+}
+如果标题中包含半角双引号 ",请在 JSON 中进行转义,写成 \",确保整个 JSON 可以被正常解析。
+输出时,
+    只输出 json 内容本身,
+    不要包裹在```json或者markdown代码块中。
+    不要添加任何额外说明文字、注释或空行。
+### 输入标题说明
+    以下是需要分析的文字标题列表,每一行是一个标题。
+    输入的标题是:
+    """
+    return prompt + "\n".join(title_list)

+ 23 - 0
applications/tasks/task_handler.py

@@ -12,10 +12,14 @@ from applications.tasks.crawler_tasks import CrawlerGzhAccountArticles
 from applications.tasks.crawler_tasks import CrawlerGzhSearchArticles
 
 from applications.tasks.data_recycle_tasks import RecycleDailyPublishArticlesTask
+from applications.tasks.data_recycle_tasks import RecycleOutsideAccountArticlesTask
 from applications.tasks.data_recycle_tasks import CheckDailyPublishArticlesTask
 from applications.tasks.data_recycle_tasks import UpdateRootSourceIdAndUpdateTimeTask
 from applications.tasks.data_recycle_tasks import RecycleFwhDailyPublishArticlesTask
 from applications.tasks.data_recycle_tasks import RecycleMiniProgramDetailTask
+from applications.tasks.data_recycle_tasks import (
+    UpdateOutsideRootSourceIdAndUpdateTimeTask,
+)
 
 from applications.tasks.llm_tasks import TitleRewrite
 from applications.tasks.llm_tasks import ArticlePoolCategoryGeneration
@@ -216,5 +220,24 @@ class TaskHandler(TaskMapper):
         await task.deal(data=self.data)
         return self.TASK_SUCCESS_STATUS
 
+    # 回收外部账号文章
+    async def _recycle_outside_account_article_handler(self) -> int:
+        date_str = self.data.get("date_string") or datetime.now().strftime("%Y-%m-%d")
+        task = RecycleOutsideAccountArticlesTask(
+            pool=self.db_client, log_client=self.log_client, date_string=date_str
+        )
+        await task.deal()
+        return self.TASK_SUCCESS_STATUS
+
+    # 更新外部账号文章的root_source_id和update_time
+    async def _update_outside_account_article_root_source_id_and_update_time_handler(
+        self,
+    ) -> int:
+        task = UpdateOutsideRootSourceIdAndUpdateTimeTask(
+            pool=self.db_client, log_client=self.log_client
+        )
+        await task.deal()
+        return self.TASK_SUCCESS_STATUS
+
 
 __all__ = ["TaskHandler"]

+ 4 - 0
applications/tasks/task_scheduler.py

@@ -197,6 +197,10 @@ class TaskScheduler(TaskHandler):
             "mini_program_detail_process": self._mini_program_detail_handler,
             # 提取标题特征
             "extract_title_features": self._extract_title_features_handler,
+            # 回收外部文章
+            "recycle_outside_account_articles": self._recycle_outside_account_article_handler,
+            # 更新外部账号文章的root_source_id和update_time
+            "update_outside_account_article_root_source_id": self._update_outside_account_article_root_source_id_and_update_time_handler,
         }
 
         if task_name not in handlers:

+ 0 - 24
test_prompt.py

@@ -1,24 +0,0 @@
-from applications.tasks.llm_tasks.prompts import extract_article_features
-from applications.api import fetch_deepseek_completion
-
-
-title_list = [
-    "他从副总理一下被撸成普通工人,事后反倒安慰妻子:我心里面有数",
-    "​莫言曾说:“谁都靠不住,除非你有用,在这个烟换烟,酒换酒的时代,生活万般苦,唯有自己渡。”",
-    "1948年国军排长带队投诚,谁知当他来到登记处时,文书却是大惊:张定元你不是在2年前就牺牲了吗",
-    "21年前抛弃侯耀文,转身嫁给师侄戴志诚的袁茵,如今活成了这样",
-    "国外一对夫妻住饭店晚上开窗通风,第二天醒来后,想不到竟有256只挤满房间,当场直接吓坏了",
-    "董卿说“娶谁都需要房子,娶谁都需要努力挣钱,但你永远都不要说,为了我而努力这种话,难道没有我,你就不需要给别人吗?“",
-    "原来九三阅兵的解说是他俩!豪迈大气庄重激昂,难怪全国人民喜爱",
-    "一颗白菜卖60多块钱!拯救韩国人今年的餐桌,全靠山东这个小城了!",
-    "63岁冯远征带71岁妻子回老家,两人牵手逛景点,梁丹妮短发好利落",
-    "我国最大的一块“飞地”,比7个上海市还要大,却不足2000人",
-]
-
-prompt = extract_article_features(title_list)
-print(prompt)
-res = fetch_deepseek_completion(model="default", prompt=prompt, output_type="json")
-
-import json
-
-print(json.dumps(res, ensure_ascii=False, indent=4))