#36 2025-11-14 提取标题的特征

Обединени
luojunhui обедини 2 ревизии от Server/feature/luojunhui/2025-11-14-article-features във Server/master преди 2 дни

+ 439 - 433
applications/ab_test/ab_accounts.py

@@ -1,445 +1,447 @@
 import json
 import random
 
+
 def get_total_avg_views(samples):
     total_views = 0
     for sample in samples:
-        total_views += sample['avg_view_count']
+        total_views += sample["avg_view_count"]
 
     return total_views
 
+
 def ab_():
     all_accounts = [
-      {
-        "account_name": "史君记",
-        "avg_view_count": 369.79,
-        "gh_id": "gh_0642b59afc37"
-      },
-      {
-        "account_name": "妙生活君",
-        "avg_view_count": 277.8,
-        "gh_id": "gh_0a476f9a531e"
-      },
-      {
-        "account_name": "幸福启示",
-        "avg_view_count": 1432.33,
-        "gh_id": "gh_0c89e11f8bf3"
-      },
-      {
-        "account_name": "乡土趣享",
-        "avg_view_count": 433.5,
-        "gh_id": "gh_0cf967cfd50c"
-      },
-      {
-        "account_name": "彤浩初说",
-        "avg_view_count": 85.62,
-        "gh_id": "gh_11705fdc238f"
-      },
-      {
-        "account_name": "情感美文一刻",
-        "avg_view_count": 639.33,
-        "gh_id": "gh_191df768625e"
-      },
-      {
-        "account_name": "天天学生活技巧",
-        "avg_view_count": 702.95,
-        "gh_id": "gh_192c9cf58b13"
-      },
-      {
-        "account_name": "都市佳人沁读",
-        "avg_view_count": 39.9,
-        "gh_id": "gh_1bab5d93bd6e"
-      },
-      {
-        "account_name": "农家田耕季",
-        "avg_view_count": 92.37,
-        "gh_id": "gh_1d4554168e20"
-      },
-      {
-        "account_name": "乐享生活小窍门",
-        "avg_view_count": 630.42,
-        "gh_id": "gh_1d887d61088c"
-      },
-      {
-        "account_name": "持家优选金点子",
-        "avg_view_count": 493.9,
-        "gh_id": "gh_209b7fe5374b"
-      },
-      {
-        "account_name": "老来生活家",
-        "avg_view_count": 432.71,
-        "gh_id": "gh_29074b51f2b7"
-      },
-      {
-        "account_name": "欢乐老来伴",
-        "avg_view_count": 154.8,
-        "gh_id": "gh_33a9d9e3172b"
-      },
-      {
-        "account_name": "灵波悦文",
-        "avg_view_count": 120.74,
-        "gh_id": "gh_34f15950b7f6"
-      },
-      {
-        "account_name": "郸英光语",
-        "avg_view_count": 145.27,
-        "gh_id": "gh_372413e12615"
-      },
-      {
-        "account_name": "看不够妙招",
-        "avg_view_count": 585.29,
-        "gh_id": "gh_3ed305b5817f"
-      },
-      {
-        "account_name": "趣史新看",
-        "avg_view_count": 169.33,
-        "gh_id": "gh_4018ad041f91"
-      },
-      {
-        "account_name": "博览史论",
-        "avg_view_count": 247.17,
-        "gh_id": "gh_405423725a6a"
-      },
-      {
-        "account_name": "都市佳人蕊读",
-        "avg_view_count": 40.89,
-        "gh_id": "gh_45fb5df25378"
-      },
-      {
-        "account_name": "岑山旭读",
-        "avg_view_count": 74.3,
-        "gh_id": "gh_47b80b9d7cb7"
-      },
-      {
-        "account_name": "汉史遍览",
-        "avg_view_count": 242.62,
-        "gh_id": "gh_4ecf7d95a248"
-      },
-      {
-        "account_name": "退休三缺一",
-        "avg_view_count": 147.31,
-        "gh_id": "gh_5250f7fc8db4"
-      },
-      {
-        "account_name": "老友闲谈",
-        "avg_view_count": 4578.35,
-        "gh_id": "gh_56ca3dae948c"
-      },
-      {
-        "account_name": "路边闲聊社",
-        "avg_view_count": 1611.01,
-        "gh_id": "gh_5ae65db96cb7"
-      },
-      {
-        "account_name": "趣享晚年",
-        "avg_view_count": 77.34,
-        "gh_id": "gh_5ce68993ebe0"
-      },
-      {
-        "account_name": "农耕笔记",
-        "avg_view_count": 71.85,
-        "gh_id": "gh_5e9a4b012aea"
-      },
-      {
-        "account_name": "幸福晚年知音",
-        "avg_view_count": 331.07,
-        "gh_id": "gh_6b7c2a257263"
-      },
-      {
-        "account_name": "舒誉轶文",
-        "avg_view_count": 80.43,
-        "gh_id": "gh_6c2110231fe0"
-      },
-      {
-        "account_name": "持家有方法",
-        "avg_view_count": 263.45,
-        "gh_id": "gh_6e1403eb5325"
-      },
-      {
-        "account_name": "理想俏生活",
-        "avg_view_count": 154.78,
-        "gh_id": "gh_711b6e027ae2"
-      },
-      {
-        "account_name": "幸福妙招合集",
-        "avg_view_count": 693.11,
-        "gh_id": "gh_72bace6b3059"
-      },
-      {
-        "account_name": "贝俊逸文",
-        "avg_view_count": 75.24,
-        "gh_id": "gh_74af7152fca8"
-      },
-      {
-        "account_name": "妙读奇闻",
-        "avg_view_count": 1330.23,
-        "gh_id": "gh_76dd62961c8c"
-      },
-      {
-        "account_name": "谈老来生活",
-        "avg_view_count": 287.37,
-        "gh_id": "gh_7cf7e0c83e65"
-      },
-      {
-        "account_name": "便捷生活好方法",
-        "avg_view_count": 830.7,
-        "gh_id": "gh_7e5818b2dd83"
-      },
-      {
-        "account_name": "鸿云念文",
-        "avg_view_count": 61.44,
-        "gh_id": "gh_8228478a9f38"
-      },
-      {
-        "account_name": "俏生活小妙招",
-        "avg_view_count": 165.11,
-        "gh_id": "gh_823fa1ec5323"
-      },
-      {
-        "account_name": "趣史说说",
-        "avg_view_count": 188.65,
-        "gh_id": "gh_875f2fa56e95"
-      },
-      {
-        "account_name": "田耕生活",
-        "avg_view_count": 156.06,
-        "gh_id": "gh_8799d3cf0ef2"
-      },
-      {
-        "account_name": "观文史说",
-        "avg_view_count": 129.26,
-        "gh_id": "gh_87c9d4ee12b9"
-      },
-      {
-        "account_name": "田地杂论",
-        "avg_view_count": 82.79,
-        "gh_id": "gh_89f0b7689c51"
-      },
-      {
-        "account_name": "俏持家达人",
-        "avg_view_count": 91.7,
-        "gh_id": "gh_90491df306bd"
-      },
-      {
-        "account_name": "窍门天天见",
-        "avg_view_count": 15.59,
-        "gh_id": "gh_91c214eb9b6a"
-      },
-      {
-        "account_name": "田耕享生活",
-        "avg_view_count": 110.86,
-        "gh_id": "gh_9334a5bc834a"
-      },
-      {
-        "account_name": "清怡素语",
-        "avg_view_count": 73.94,
-        "gh_id": "gh_93f1d2267689"
-      },
-      {
-        "account_name": "趣论史说",
-        "avg_view_count": 136.37,
-        "gh_id": "gh_96b6ee88c752"
-      },
-      {
-        "account_name": "妙招趣帮手",
-        "avg_view_count": 1243.62,
-        "gh_id": "gh_99b47070c8b5"
-      },
-      {
-        "account_name": "快乐精选集",
-        "avg_view_count": 704.73,
-        "gh_id": "gh_9eef14ad6c16"
-      },
-      {
-        "account_name": "史说新看",
-        "avg_view_count": 116.86,
-        "gh_id": "gh_9ffc91a17adc"
-      },
-      {
-        "account_name": "精选俏生活",
-        "avg_view_count": 313,
-        "gh_id": "gh_a2166b54ee1d"
-      },
-      {
-        "account_name": "畅聊奇闻",
-        "avg_view_count": 1162.86,
-        "gh_id": "gh_a2901d34f75b"
-      },
-      {
-        "account_name": "畅享福晚年",
-        "avg_view_count": 85.57,
-        "gh_id": "gh_a41bf1e2b0ab"
-      },
-      {
-        "account_name": "田地杂谈",
-        "avg_view_count": 161.26,
-        "gh_id": "gh_a57c73d76d65"
-      },
-      {
-        "account_name": "享暮年华乐",
-        "avg_view_count": 156.16,
-        "gh_id": "gh_a8652e07fbd6"
-      },
-      {
-        "account_name": "趣览史说",
-        "avg_view_count": 354.86,
-        "gh_id": "gh_a912ae49c81a"
-      },
-      {
-        "account_name": "奇看趣闻",
-        "avg_view_count": 215.24,
-        "gh_id": "gh_ab6f244386d8"
-      },
-      {
-        "account_name": "都市佳人伴渎",
-        "avg_view_count": 58.47,
-        "gh_id": "gh_abf5ded627ae"
-      },
-      {
-        "account_name": "一品趣闻",
-        "avg_view_count": 38.77,
-        "gh_id": "gh_ac6bba08dab7"
-      },
-      {
-        "account_name": "暮年享乐",
-        "avg_view_count": 152.79,
-        "gh_id": "gh_b1513a1766bf"
-      },
-      {
-        "account_name": "人生百事观",
-        "avg_view_count": 335.94,
-        "gh_id": "gh_b15de7c99912"
-      },
-      {
-        "account_name": "无忧生活小妙招",
-        "avg_view_count": 481.36,
-        "gh_id": "gh_b676b7ad9b74"
-      },
-      {
-        "account_name": "读史趣谈",
-        "avg_view_count": 1032.11,
-        "gh_id": "gh_b72f81fe2d65"
-      },
-      {
-        "account_name": "杰云若语",
-        "avg_view_count": 65.99,
-        "gh_id": "gh_bbcaf6794ba2"
-      },
-      {
-        "account_name": "奇趣百味生活",
-        "avg_view_count": 972.87,
-        "gh_id": "gh_bfe5b705324a"
-      },
-      {
-        "account_name": "无忧居家达人",
-        "avg_view_count": 135.06,
-        "gh_id": "gh_c128a013b0d0"
-      },
-      {
-        "account_name": "老友快乐谈",
-        "avg_view_count": 1105.02,
-        "gh_id": "gh_c5cdf60d9ab4"
-      },
-      {
-        "account_name": "优选持家派",
-        "avg_view_count": 78.64,
-        "gh_id": "gh_c8580bc45e36"
-      },
-      {
-        "account_name": "趣生活技巧",
-        "avg_view_count": 80.89,
-        "gh_id": "gh_c9ee53e1fbbc"
-      },
-      {
-        "account_name": "博阅历史",
-        "avg_view_count": 630.42,
-        "gh_id": "gh_cde3948134c5"
-      },
-      {
-        "account_name": "趣闻汉史",
-        "avg_view_count": 196.69,
-        "gh_id": "gh_cfb5c80fcd34"
-      },
-      {
-        "account_name": "生活指示录",
-        "avg_view_count": 614.78,
-        "gh_id": "gh_d49df5e974ca"
-      },
-      {
-        "account_name": "香茗史论",
-        "avg_view_count": 191.35,
-        "gh_id": "gh_d5f8b8f26704"
-      },
-      {
-        "account_name": "快乐生活新方向",
-        "avg_view_count": 165.26,
-        "gh_id": "gh_d6451c7c0b92"
-      },
-      {
-        "account_name": "趣闻时分秒",
-        "avg_view_count": 53.05,
-        "gh_id": "gh_d69ca56957ad"
-      },
-      {
-        "account_name": "持家有窍门",
-        "avg_view_count": 58.47,
-        "gh_id": "gh_d71bffcc25e1"
-      },
-      {
-        "account_name": "无忧自在生活",
-        "avg_view_count": 734.72,
-        "gh_id": "gh_dd4c857bbb36"
-      },
-      {
-        "account_name": "文史新看",
-        "avg_view_count": 67.28,
-        "gh_id": "gh_e41d5e19eb4e"
-      },
-      {
-        "account_name": "妙晚年日记",
-        "avg_view_count": 56.06,
-        "gh_id": "gh_e6c35aa5954c"
-      },
-      {
-        "account_name": "云景史记",
-        "avg_view_count": 62.88,
-        "gh_id": "gh_ece3fdd64622"
-      },
-      {
-        "account_name": "轻松妙生活家",
-        "avg_view_count": 131.23,
-        "gh_id": "gh_f0b83f5f8f07"
-      },
-      {
-        "account_name": "家有窍门大全",
-        "avg_view_count": 100.57,
-        "gh_id": "gh_f37296466d43"
-      },
-      {
-        "account_name": "趣谈暮年",
-        "avg_view_count": 1692.99,
-        "gh_id": "gh_f48d3a90fc6e"
-      },
-      {
-        "account_name": "都市退休蕊渎",
-        "avg_view_count": 83.45,
-        "gh_id": "gh_f5a39031d305"
-      },
-      {
-        "account_name": "快乐生活有方法",
-        "avg_view_count": 91.97,
-        "gh_id": "gh_f9eeba7eb5c7"
-      },
-      {
-        "account_name": "趣史汇看",
-        "avg_view_count": 201.81,
-        "gh_id": "gh_fb633e4a8635"
-      },
-      {
-        "account_name": "趣味生活达人1",
-        "avg_view_count": 437.26,
-        "gh_id": "gh_ff487cb5dab3"
-      }
+        {
+            "account_name": "史君记",
+            "avg_view_count": 369.79,
+            "gh_id": "gh_0642b59afc37",
+        },
+        {
+            "account_name": "妙生活君",
+            "avg_view_count": 277.8,
+            "gh_id": "gh_0a476f9a531e",
+        },
+        {
+            "account_name": "幸福启示",
+            "avg_view_count": 1432.33,
+            "gh_id": "gh_0c89e11f8bf3",
+        },
+        {
+            "account_name": "乡土趣享",
+            "avg_view_count": 433.5,
+            "gh_id": "gh_0cf967cfd50c",
+        },
+        {
+            "account_name": "彤浩初说",
+            "avg_view_count": 85.62,
+            "gh_id": "gh_11705fdc238f",
+        },
+        {
+            "account_name": "情感美文一刻",
+            "avg_view_count": 639.33,
+            "gh_id": "gh_191df768625e",
+        },
+        {
+            "account_name": "天天学生活技巧",
+            "avg_view_count": 702.95,
+            "gh_id": "gh_192c9cf58b13",
+        },
+        {
+            "account_name": "都市佳人沁读",
+            "avg_view_count": 39.9,
+            "gh_id": "gh_1bab5d93bd6e",
+        },
+        {
+            "account_name": "农家田耕季",
+            "avg_view_count": 92.37,
+            "gh_id": "gh_1d4554168e20",
+        },
+        {
+            "account_name": "乐享生活小窍门",
+            "avg_view_count": 630.42,
+            "gh_id": "gh_1d887d61088c",
+        },
+        {
+            "account_name": "持家优选金点子",
+            "avg_view_count": 493.9,
+            "gh_id": "gh_209b7fe5374b",
+        },
+        {
+            "account_name": "老来生活家",
+            "avg_view_count": 432.71,
+            "gh_id": "gh_29074b51f2b7",
+        },
+        {
+            "account_name": "欢乐老来伴",
+            "avg_view_count": 154.8,
+            "gh_id": "gh_33a9d9e3172b",
+        },
+        {
+            "account_name": "灵波悦文",
+            "avg_view_count": 120.74,
+            "gh_id": "gh_34f15950b7f6",
+        },
+        {
+            "account_name": "郸英光语",
+            "avg_view_count": 145.27,
+            "gh_id": "gh_372413e12615",
+        },
+        {
+            "account_name": "看不够妙招",
+            "avg_view_count": 585.29,
+            "gh_id": "gh_3ed305b5817f",
+        },
+        {
+            "account_name": "趣史新看",
+            "avg_view_count": 169.33,
+            "gh_id": "gh_4018ad041f91",
+        },
+        {
+            "account_name": "博览史论",
+            "avg_view_count": 247.17,
+            "gh_id": "gh_405423725a6a",
+        },
+        {
+            "account_name": "都市佳人蕊读",
+            "avg_view_count": 40.89,
+            "gh_id": "gh_45fb5df25378",
+        },
+        {
+            "account_name": "岑山旭读",
+            "avg_view_count": 74.3,
+            "gh_id": "gh_47b80b9d7cb7",
+        },
+        {
+            "account_name": "汉史遍览",
+            "avg_view_count": 242.62,
+            "gh_id": "gh_4ecf7d95a248",
+        },
+        {
+            "account_name": "退休三缺一",
+            "avg_view_count": 147.31,
+            "gh_id": "gh_5250f7fc8db4",
+        },
+        {
+            "account_name": "老友闲谈",
+            "avg_view_count": 4578.35,
+            "gh_id": "gh_56ca3dae948c",
+        },
+        {
+            "account_name": "路边闲聊社",
+            "avg_view_count": 1611.01,
+            "gh_id": "gh_5ae65db96cb7",
+        },
+        {
+            "account_name": "趣享晚年",
+            "avg_view_count": 77.34,
+            "gh_id": "gh_5ce68993ebe0",
+        },
+        {
+            "account_name": "农耕笔记",
+            "avg_view_count": 71.85,
+            "gh_id": "gh_5e9a4b012aea",
+        },
+        {
+            "account_name": "幸福晚年知音",
+            "avg_view_count": 331.07,
+            "gh_id": "gh_6b7c2a257263",
+        },
+        {
+            "account_name": "舒誉轶文",
+            "avg_view_count": 80.43,
+            "gh_id": "gh_6c2110231fe0",
+        },
+        {
+            "account_name": "持家有方法",
+            "avg_view_count": 263.45,
+            "gh_id": "gh_6e1403eb5325",
+        },
+        {
+            "account_name": "理想俏生活",
+            "avg_view_count": 154.78,
+            "gh_id": "gh_711b6e027ae2",
+        },
+        {
+            "account_name": "幸福妙招合集",
+            "avg_view_count": 693.11,
+            "gh_id": "gh_72bace6b3059",
+        },
+        {
+            "account_name": "贝俊逸文",
+            "avg_view_count": 75.24,
+            "gh_id": "gh_74af7152fca8",
+        },
+        {
+            "account_name": "妙读奇闻",
+            "avg_view_count": 1330.23,
+            "gh_id": "gh_76dd62961c8c",
+        },
+        {
+            "account_name": "谈老来生活",
+            "avg_view_count": 287.37,
+            "gh_id": "gh_7cf7e0c83e65",
+        },
+        {
+            "account_name": "便捷生活好方法",
+            "avg_view_count": 830.7,
+            "gh_id": "gh_7e5818b2dd83",
+        },
+        {
+            "account_name": "鸿云念文",
+            "avg_view_count": 61.44,
+            "gh_id": "gh_8228478a9f38",
+        },
+        {
+            "account_name": "俏生活小妙招",
+            "avg_view_count": 165.11,
+            "gh_id": "gh_823fa1ec5323",
+        },
+        {
+            "account_name": "趣史说说",
+            "avg_view_count": 188.65,
+            "gh_id": "gh_875f2fa56e95",
+        },
+        {
+            "account_name": "田耕生活",
+            "avg_view_count": 156.06,
+            "gh_id": "gh_8799d3cf0ef2",
+        },
+        {
+            "account_name": "观文史说",
+            "avg_view_count": 129.26,
+            "gh_id": "gh_87c9d4ee12b9",
+        },
+        {
+            "account_name": "田地杂论",
+            "avg_view_count": 82.79,
+            "gh_id": "gh_89f0b7689c51",
+        },
+        {
+            "account_name": "俏持家达人",
+            "avg_view_count": 91.7,
+            "gh_id": "gh_90491df306bd",
+        },
+        {
+            "account_name": "窍门天天见",
+            "avg_view_count": 15.59,
+            "gh_id": "gh_91c214eb9b6a",
+        },
+        {
+            "account_name": "田耕享生活",
+            "avg_view_count": 110.86,
+            "gh_id": "gh_9334a5bc834a",
+        },
+        {
+            "account_name": "清怡素语",
+            "avg_view_count": 73.94,
+            "gh_id": "gh_93f1d2267689",
+        },
+        {
+            "account_name": "趣论史说",
+            "avg_view_count": 136.37,
+            "gh_id": "gh_96b6ee88c752",
+        },
+        {
+            "account_name": "妙招趣帮手",
+            "avg_view_count": 1243.62,
+            "gh_id": "gh_99b47070c8b5",
+        },
+        {
+            "account_name": "快乐精选集",
+            "avg_view_count": 704.73,
+            "gh_id": "gh_9eef14ad6c16",
+        },
+        {
+            "account_name": "史说新看",
+            "avg_view_count": 116.86,
+            "gh_id": "gh_9ffc91a17adc",
+        },
+        {
+            "account_name": "精选俏生活",
+            "avg_view_count": 313,
+            "gh_id": "gh_a2166b54ee1d",
+        },
+        {
+            "account_name": "畅聊奇闻",
+            "avg_view_count": 1162.86,
+            "gh_id": "gh_a2901d34f75b",
+        },
+        {
+            "account_name": "畅享福晚年",
+            "avg_view_count": 85.57,
+            "gh_id": "gh_a41bf1e2b0ab",
+        },
+        {
+            "account_name": "田地杂谈",
+            "avg_view_count": 161.26,
+            "gh_id": "gh_a57c73d76d65",
+        },
+        {
+            "account_name": "享暮年华乐",
+            "avg_view_count": 156.16,
+            "gh_id": "gh_a8652e07fbd6",
+        },
+        {
+            "account_name": "趣览史说",
+            "avg_view_count": 354.86,
+            "gh_id": "gh_a912ae49c81a",
+        },
+        {
+            "account_name": "奇看趣闻",
+            "avg_view_count": 215.24,
+            "gh_id": "gh_ab6f244386d8",
+        },
+        {
+            "account_name": "都市佳人伴渎",
+            "avg_view_count": 58.47,
+            "gh_id": "gh_abf5ded627ae",
+        },
+        {
+            "account_name": "一品趣闻",
+            "avg_view_count": 38.77,
+            "gh_id": "gh_ac6bba08dab7",
+        },
+        {
+            "account_name": "暮年享乐",
+            "avg_view_count": 152.79,
+            "gh_id": "gh_b1513a1766bf",
+        },
+        {
+            "account_name": "人生百事观",
+            "avg_view_count": 335.94,
+            "gh_id": "gh_b15de7c99912",
+        },
+        {
+            "account_name": "无忧生活小妙招",
+            "avg_view_count": 481.36,
+            "gh_id": "gh_b676b7ad9b74",
+        },
+        {
+            "account_name": "读史趣谈",
+            "avg_view_count": 1032.11,
+            "gh_id": "gh_b72f81fe2d65",
+        },
+        {
+            "account_name": "杰云若语",
+            "avg_view_count": 65.99,
+            "gh_id": "gh_bbcaf6794ba2",
+        },
+        {
+            "account_name": "奇趣百味生活",
+            "avg_view_count": 972.87,
+            "gh_id": "gh_bfe5b705324a",
+        },
+        {
+            "account_name": "无忧居家达人",
+            "avg_view_count": 135.06,
+            "gh_id": "gh_c128a013b0d0",
+        },
+        {
+            "account_name": "老友快乐谈",
+            "avg_view_count": 1105.02,
+            "gh_id": "gh_c5cdf60d9ab4",
+        },
+        {
+            "account_name": "优选持家派",
+            "avg_view_count": 78.64,
+            "gh_id": "gh_c8580bc45e36",
+        },
+        {
+            "account_name": "趣生活技巧",
+            "avg_view_count": 80.89,
+            "gh_id": "gh_c9ee53e1fbbc",
+        },
+        {
+            "account_name": "博阅历史",
+            "avg_view_count": 630.42,
+            "gh_id": "gh_cde3948134c5",
+        },
+        {
+            "account_name": "趣闻汉史",
+            "avg_view_count": 196.69,
+            "gh_id": "gh_cfb5c80fcd34",
+        },
+        {
+            "account_name": "生活指示录",
+            "avg_view_count": 614.78,
+            "gh_id": "gh_d49df5e974ca",
+        },
+        {
+            "account_name": "香茗史论",
+            "avg_view_count": 191.35,
+            "gh_id": "gh_d5f8b8f26704",
+        },
+        {
+            "account_name": "快乐生活新方向",
+            "avg_view_count": 165.26,
+            "gh_id": "gh_d6451c7c0b92",
+        },
+        {
+            "account_name": "趣闻时分秒",
+            "avg_view_count": 53.05,
+            "gh_id": "gh_d69ca56957ad",
+        },
+        {
+            "account_name": "持家有窍门",
+            "avg_view_count": 58.47,
+            "gh_id": "gh_d71bffcc25e1",
+        },
+        {
+            "account_name": "无忧自在生活",
+            "avg_view_count": 734.72,
+            "gh_id": "gh_dd4c857bbb36",
+        },
+        {
+            "account_name": "文史新看",
+            "avg_view_count": 67.28,
+            "gh_id": "gh_e41d5e19eb4e",
+        },
+        {
+            "account_name": "妙晚年日记",
+            "avg_view_count": 56.06,
+            "gh_id": "gh_e6c35aa5954c",
+        },
+        {
+            "account_name": "云景史记",
+            "avg_view_count": 62.88,
+            "gh_id": "gh_ece3fdd64622",
+        },
+        {
+            "account_name": "轻松妙生活家",
+            "avg_view_count": 131.23,
+            "gh_id": "gh_f0b83f5f8f07",
+        },
+        {
+            "account_name": "家有窍门大全",
+            "avg_view_count": 100.57,
+            "gh_id": "gh_f37296466d43",
+        },
+        {
+            "account_name": "趣谈暮年",
+            "avg_view_count": 1692.99,
+            "gh_id": "gh_f48d3a90fc6e",
+        },
+        {
+            "account_name": "都市退休蕊渎",
+            "avg_view_count": 83.45,
+            "gh_id": "gh_f5a39031d305",
+        },
+        {
+            "account_name": "快乐生活有方法",
+            "avg_view_count": 91.97,
+            "gh_id": "gh_f9eeba7eb5c7",
+        },
+        {
+            "account_name": "趣史汇看",
+            "avg_view_count": 201.81,
+            "gh_id": "gh_fb633e4a8635",
+        },
+        {
+            "account_name": "趣味生活达人1",
+            "avg_view_count": 437.26,
+            "gh_id": "gh_ff487cb5dab3",
+        },
     ]
     total_avg_views = get_total_avg_views(all_accounts)
     print(total_avg_views)
@@ -451,8 +453,12 @@ def ab_():
         count += 1
         print(count)
         if 0.47 < sample_views / total_avg_views < 0.53:
-            print(json.dumps([i["account_name"] for i in samples], indent=4, ensure_ascii=False))
+            print(
+                json.dumps(
+                    [i["account_name"] for i in samples], indent=4, ensure_ascii=False
+                )
+            )
             break
 
 
-ab_()
+ab_()

+ 0 - 1
applications/ab_test/get_cover.py

@@ -27,7 +27,6 @@ class GetCoverService(Response):
                 return None
 
     async def fetch_cover_info(self, pool_name, channel_content_id: str):
-
         match pool_name:
             case "aigc":
                 fetch_response = await fetch_aigc_cover(self.pool, channel_content_id)

+ 0 - 2
applications/api/async_feishu_api.py

@@ -44,7 +44,6 @@ class Feishu:
 
 
 class FeishuSheetApi(Feishu):
-
     async def prepend_value(self, sheet_token, sheet_id, ranges, values):
         insert_value_url = "https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{}/values_prepend".format(
             sheet_token
@@ -83,7 +82,6 @@ class FeishuSheetApi(Feishu):
 
 
 class FeishuBotApi(Feishu):
-
     @classmethod
     def create_feishu_columns_sheet(
         cls,

+ 0 - 1
applications/api/elastic_search_api.py

@@ -7,7 +7,6 @@ from applications.config import es_index
 
 
 class AsyncElasticSearchClient:
-
     def __init__(self, index_=es_index):
         self.password = "nkvvASQuQ0XUGRq5OLvm"
         self.hosts = ["https://192.168.205.85:9200", "https://192.168.205.85:9300"]

+ 1 - 1
applications/config/task_chinese_name.py

@@ -16,5 +16,5 @@ name_map = {
     "check_kimi_balance": "检验kimi余额",
     "account_category_analysis": "账号品类分析",
     "mini_program_detail_process": "更新小程序信息",
-    "crawler_detail_analysis": "抓取详情分析"
+    "crawler_detail_analysis": "抓取详情分析",
 }

+ 3 - 1
applications/database/mysql_pools.py

@@ -65,7 +65,9 @@ class DatabaseManager:
             logging.error(f"Failed to fetch {query}: {str(e)}")
             return None
 
-    async def async_save(self, query, params, db_name="long_articles", batch: bool=False):
+    async def async_save(
+        self, query, params, db_name="long_articles", batch: bool = False
+    ):
         pool = self.pools[db_name]
         if not pool:
             await self.init_pools()

+ 0 - 1
applications/pipeline/crawler_pipeline.py

@@ -10,7 +10,6 @@ from applications.utils import CrawlerMetaAccount
 
 
 class CrawlerPipeline(AsyncApolloApi):
-
     MODEL_TABLE_MAP: Dict[str, Tuple[type[BaseModel], str]] = {
         "article": (CrawlerMetaArticle, "crawler_meta_article"),
         "account": (CrawlerMetaAccount, "crawler_candidate_account_pool"),

+ 13 - 5
applications/tasks/algorithm_tasks/account_category_analysis.py

@@ -21,6 +21,9 @@ class AccountCategoryConst:
     VIEW_COUNT_RATE_THRESHOLD = 0
     INDEX_MAX = 3
 
+    # MAX VALUE
+    MAX_VALUE = 0.5
+
 
 class AccountCategoryAnalysis(CategoryRegression, AccountCategoryConst):
     def __init__(self, pool, log_client, trace_id, date_string, data):
@@ -44,9 +47,11 @@ class AccountCategoryAnalysis(CategoryRegression, AccountCategoryConst):
             run_date = datetime.today()
 
         end_dt = (run_date - timedelta(1)).strftime("%Y%m%d")
-        return end_dt
+        begin_dt = (run_date - timedelta(61)).strftime("%Y%m%d")
+        return begin_dt, end_dt
 
     async def prepare_raw_data(self, end_dt, begin_dt: str = "20250401"):
+        begin_dt = "20250401"
         query = """
             select dt, gh_id, account_name, title, similarity, view_count_rate, category,
                     read_avg, read_avg_rate, first_pub_interval, `index`
@@ -57,7 +62,8 @@ class AccountCategoryAnalysis(CategoryRegression, AccountCategoryConst):
                 and read_avg > %s
                 and read_avg_rate between %s and %s
                 and view_count_rate > %s
-                and `index` < %s;
+                and `index` < %s
+                and account_name in ('生活慢时光', '美好时光阅读汇', '史趣探秘', '趣味生活漫谈', '趣味生活方式', '趣味生活漫时光')
             ;
         """
         fetch_response = await self.pool.async_fetch(
@@ -157,7 +163,9 @@ class AccountCategoryAnalysis(CategoryRegression, AccountCategoryConst):
                 print(
                     f"{account_id} {account_name} {category_name} {param:.3f} {p_value:.3f}"
                 )
-                truncate_param = round(max(min(param, 0.25), -0.25) * scale_factor, 6)
+                truncate_param = round(
+                    max(min(param, self.MAX_VALUE), -self.MAX_VALUE) * scale_factor, 6
+                )
                 current_record["category_map"][category_name] = truncate_param
 
             if (
@@ -177,8 +185,8 @@ class AccountCategoryAnalysis(CategoryRegression, AccountCategoryConst):
         await self.update_each_account(current_record)
 
     async def deal(self):
-        end_dt = self.init_execute_date()
-        raw_dataframe = await self.prepare_raw_data(end_dt)
+        begin_dt, end_dt = self.init_execute_date()
+        raw_dataframe = await self.prepare_raw_data(end_dt=end_dt, begin_dt=begin_dt)
 
         # prepare data for model
         pre_processed_dataframe = self.preprocess_data(raw_dataframe)

+ 0 - 1
applications/tasks/crawler_tasks/crawler_account_manager.py

@@ -49,7 +49,6 @@ class CrawlerAccountManager(CrawlerAccountManagerConst):
 
 
 class WeixinAccountManager(CrawlerAccountManager):
-
     def __init__(self, pool, aliyun_log, trace_id):
         super().__init__(pool, aliyun_log, trace_id)
         self.pool = pool

+ 8 - 16
applications/tasks/data_recycle_tasks/recycle_daily_publish_articles.py

@@ -37,15 +37,11 @@ class Const:
         "gh_ac43e43b253b",
         "gh_93e00e187787",
         "gh_080bb43aa0dc",
-        "gh_b1c71a0e7a85"
+        "gh_b1c71a0e7a85",
     ]
 
     # NOT USED SERVER ACCOUNT
-    NOT_USED_SERVER_ACCOUNT = {
-        "gh_84e744b16b3a",
-        "gh_5855bed97938",
-        "gh_61a72b720de3"
-    }
+    NOT_USED_SERVER_ACCOUNT = {"gh_84e744b16b3a", "gh_5855bed97938", "gh_61a72b720de3"}
 
     # 文章状态
     # 记录默认状态
@@ -76,7 +72,6 @@ class Const:
 
 
 class RecycleDailyPublishArticlesTask(Const):
-
     def __init__(self, pool, log_client, date_string):
         self.pool = pool
         self.log_client = log_client
@@ -196,13 +191,7 @@ class RecycleDailyPublishArticlesTask(Const):
         #     }
         #     for item in binding_accounts
         # ]
-        account_list = [
-            {
-                **item,
-                "using_status": 1
-            }
-            for item in binding_accounts
-        ]
+        account_list = [{**item, "using_status": 1} for item in binding_accounts]
 
         # 订阅号
         subscription_accounts = [
@@ -223,7 +212,6 @@ class RecycleDailyPublishArticlesTask(Const):
 
 
 class CheckDailyPublishArticlesTask(RecycleDailyPublishArticlesTask):
-
     async def check_account(self, account: dict, date_string: str) -> bool:
         """check account data"""
         query = """
@@ -541,7 +529,11 @@ class RecycleFwhDailyPublishArticlesTask(Const):
         fetch_response = await self.pool.async_fetch(
             query=fetch_query, db_name="piaoquan_crawler"
         )
-        gh_id_list = [i["gzh_id"] for i in fetch_response if i["gzh_id"] not in self.NOT_USED_SERVER_ACCOUNT]
+        gh_id_list = [
+            i["gzh_id"]
+            for i in fetch_response
+            if i["gzh_id"] not in self.NOT_USED_SERVER_ACCOUNT
+        ]
         return gh_id_list
 
     async def get_stat_published_articles(self, gh_id):

+ 3 - 2
applications/tasks/data_recycle_tasks/recycle_mini_program_detail.py

@@ -37,7 +37,6 @@ class MiniProgramConst:
 
 
 class RecycleMiniProgramDetailBase(MiniProgramConst):
-
     def __init__(self, pool, log_client, trace_id):
         self.pool = pool
         self.log_client = log_client
@@ -52,7 +51,9 @@ class RecycleMiniProgramDetailBase(MiniProgramConst):
                 "image_url": "",
                 "nike_name": "票圈 l 3亿人喜欢的视频平台",
                 "root_source_id": item["root_source_id"],
-                "video_id": item["video_id"] if item["video_id"] else self.DEFAULT_VIDEO_ID,
+                "video_id": item["video_id"]
+                if item["video_id"]
+                else self.DEFAULT_VIDEO_ID,
                 "service_type": "0",
                 "title": "",
                 "type": "card",

+ 2 - 0
applications/tasks/llm_tasks/__init__.py

@@ -1,9 +1,11 @@
 from .process_title import TitleRewrite
 from .process_title import ArticlePoolCategoryGeneration
+from .process_title import ExtractTitleFeatures
 from .candidate_account_process import CandidateAccountQualityScoreRecognizer
 
 __all__ = [
     "TitleRewrite",
     "CandidateAccountQualityScoreRecognizer",
     "ArticlePoolCategoryGeneration",
+    "ExtractTitleFeatures",
 ]

+ 0 - 1
applications/tasks/llm_tasks/candidate_account_process.py

@@ -9,7 +9,6 @@ from applications.utils import ci_lower
 
 
 class CandidateAccountProcessConst:
-
     INIT_STATUS = 0
     PROCESSING_STATUS = 1
     SUCCESS_STATUS = 2

+ 94 - 2
applications/tasks/llm_tasks/process_title.py

@@ -1,3 +1,4 @@
+import json
 import time
 import traceback
 
@@ -5,6 +6,8 @@ from typing import Optional, List, Dict, Tuple
 
 from applications.api import fetch_deepseek_completion
 from applications.utils import yield_batch
+from applications.tasks.llm_tasks.prompts import extract_article_features
+from tqdm.asyncio import tqdm
 
 
 class Const:
@@ -307,7 +310,6 @@ class VideoPoolCategoryGeneration:
 
 
 class ArticlePoolCategoryGeneration(TitleProcess):
-
     def __init__(self, pool, aliyun_log, trace_id):
         super().__init__(pool, aliyun_log, trace_id)
 
@@ -475,7 +477,6 @@ class ArticlePoolCategoryGeneration(TitleProcess):
 
 
 class TitleRewrite(TitleProcess):
-
     async def roll_back_blocked_tasks(self):
         """
         rollback blocked tasks
@@ -614,3 +615,94 @@ class TitleRewrite(TitleProcess):
 
         for article in task_list:
             await self.rewrite_each_article(article)
+
+
+class ExtractTitleFeatures(Const):
+    def __init__(self, pool, aliyun_log, trace_id):
+        self.pool = pool
+        self.aliyun_log = aliyun_log
+        self.trace_id = trace_id
+
+    async def get_tasks(self, batch_size=100):
+        query = """
+            select id, title
+            from title_features
+            where status = %s
+            limit %s;
+        """
+        return await self.pool.async_fetch(
+            query=query, params=(self.INIT_STATUS, batch_size)
+        )
+
+    async def update_status(self, title_id, ori_status, new_status):
+        query = """
+            UPDATE title_features
+            SET status = %s
+            WHERE id = %s AND status = %s;
+        """
+        return await self.pool.async_save(
+            query=query,
+            params=(new_status, title_id, ori_status),
+        )
+
+    async def update_status_batch(self, title_id_list, ori_status, new_status):
+        query = """
+                UPDATE title_features
+                SET status = %s
+                WHERE id in %s
+                  AND status = %s;
+                """
+        return await self.pool.async_save(
+            query=query,
+            params=(new_status, tuple(title_id_list), ori_status),
+        )
+
+    async def set_feature_for_each_title(self, title_id, feature_dict):
+        query = """
+            UPDATE title_features
+            SET category = %s, era = %s, characters = %s, emotions = %s,
+                is_strong_reverse = %s, is_person_comment = %s, title_type = %s,
+                status = %s
+            WHERE id = %s and status = %s;
+        """
+        return await self.pool.async_save(
+            query=query,
+            params=(
+                feature_dict["category"],
+                feature_dict["era"],
+                json.dumps(feature_dict["characters"], ensure_ascii=False),
+                json.dumps(feature_dict["emotions"], ensure_ascii=False),
+                1 if feature_dict.get("structure", {}).get("is_strong_reverse") else 0,
+                1 if feature_dict.get("structure", {}).get("is_person_comment") else 0,
+                feature_dict["structure"]["title_type"],
+                self.SUCCESS_STATUS,
+                title_id,
+                self.PROCESSING_STATUS,
+            ),
+        )
+
+    async def deal(self, data):
+        batch_size = data.get("batch_size", 50)
+        task_list = await self.get_tasks(batch_size=batch_size)
+
+        title_list = [i["title"] for i in task_list]
+        id_list = [i["id"] for i in task_list]
+        title_id_map = {i["title"]: i["id"] for i in task_list}
+
+        prompt = extract_article_features(title_list)
+
+        # 设置状态为处理中
+        await self.update_status_batch(
+            id_list, self.INIT_STATUS, self.PROCESSING_STATUS
+        )
+        feature_dict = fetch_deepseek_completion(
+            model="default", prompt=prompt, output_type="json"
+        )
+
+        for title in tqdm(title_list):
+            features = feature_dict.get(title, {})
+            if not features:
+                continue
+
+            title_id = title_id_map[title]
+            await self.set_feature_for_each_title(title_id, features)

+ 281 - 0
applications/tasks/llm_tasks/prompts.py

@@ -0,0 +1,281 @@
+def extract_article_features(title_list: list[str]) -> str:
+    prompt = """
+你是一个稳定可靠的**文本分类与标签抽取助手**,专门根据「文章标题」判断内容类型,并为每个标题打上多维度的内容标签,用于公众号内容运营与算法分析。
+
+你的任务是:**对输入的每一个标题,完成内容品类分类 + 多维标签标注,并以 JSON 格式输出结果。**
+
+---
+
+## 一、任务说明
+
+给定若干文章标题,你需要对每个标题完成以下两部分工作:
+
+1. **一级内容品类分类(必选 1 个)**  
+   - 从预先给定的 15 个品类中,选择最合适的一个,作为该标题的主类目。  
+   - 不允许创造新类目,只能从这 15 个中选择。
+
+2. **多维度内容标签标注(可多选)**  
+   - 时代(`era`)  
+   - 主角类型(`characters`)  
+   - 情绪倾向(`emotions`)  
+   - 结构与形式(`structure`)
+
+---
+
+## 二、一级品类定义(15 类)
+
+请务必从下面 15 个品类中,选择一个最合适的品类作为 `"category"` 字段输出,不要创造新的品类名称。
+
+1. **知识科普**  
+定义:以通俗易懂的方式普及科学、技术、健康、安全、生活常识、财产保护、医保政策、为人处事方式等内容,旨在提高公众的知识水平和认知能力。内容通常具有教育性和实用性,涵盖自然、社会、文化等多个领域。  
+标题示例:  
+- 我国存款最安全的五大银行,永远都不会倒闭,你知道是哪五家吗?  
+- 借条上不要写“这3个字”,不然变成一张废纸,否则用法律也没用  
+- 不能二次加热的3种食物!再次提醒:这3种食物吃不完最好扔掉  
+
+2. **军事历史**  
+定义:聚焦于历史上的军事事件、战争故事、军事策略、英雄人物等内容,旨在还原战争场景、探讨军事决策、揭示历史真相,并展现战争中的人物命运与历史影响。内容通常以叙事、分析或回忆的形式呈现,兼具历史深度和故事性。  
+标题示例:  
+- 对越作战永远失踪的332人,陵园没有墓碑,没有名字,只有烈士证  
+- 淮海大战丢失阵地,师长带头冲锋!最后出一口恶气:活捉敌最高指挥官  
+- 抗战时,一村民被敌拉去带路,半道回头忽发现:后面跟个游击队员  
+
+3. **家长里短**  
+定义:围绕家庭成员之间的关系、矛盾、情感、道德等展开的故事或讨论,内容常涉及婚姻、亲子、婆媳、兄弟姐妹等关系,或是人情往来、金钱纠纷、情感变化等内容,反映家庭生活中的温情、冲突与人性。  
+标题示例:  
+- 父母越老越能暴露家庭最真实的一面:当父母70岁,子女不该抱有这三种期待  
+- 老母亲分家产,给亲闺女30万,给养女一筐青菜,养女意外摔倒,看到筐子里的东西,瞬间愣住了  
+- 我花150一天雇了阿姨,两天后上班回来给她300,阿姨说我账算错了  
+
+4. **社会法治**  
+定义:聚焦社会事件、法律纠纷、法院判决、社会现象等内容,通常涉及道德、法律、公平正义等议题,旨在揭示社会问题、探讨法律规则或反映人性与社会现实。  
+标题示例:  
+- 山东,女子在小区捡到16万天价项链,业主悬赏3万找回,女子归还后,失主拒绝支付报酬,还说:要有格局,女子认为被骗,将失主告上法庭  
+- 陕西,女子22万买26层房,2年后,楼盘24层就已经封顶!开发商:你闹事造成100万损失,道歉才给赔偿!  
+- 上海:男子超市连续购买46枚过期咸鸭蛋,2天分46次交易,向厂家索赔金14万,法院判了!  
+
+5. **奇闻趣事**  
+定义:以猎奇、娱乐为主,涵盖罕见、奇特、有趣的事件、发现或故事,内容通常具有趣味性和话题性,能够引发读者的好奇心和讨论。  
+标题示例:  
+- 狗屎运?江西男子钓鱼时发现青鱼尸骸,扒开后捡到鸡蛋大小的青鱼石,网友:起码值几千!  
+- 内蒙古小伙河边捡到金牌,拒绝上交将其熔成金手镯,专家气愤不已  
+- 男子买了一辆废弃坦克,拆油箱时,他发现了一根又一根的金条……  
+
+6. **名人八卦**  
+定义:围绕名人的生活、言论、事件、八卦等内容展开,通常涉及娱乐圈、政界、历史人物等,旨在满足公众对名人隐私和动态的好奇心。  
+标题示例:  
+- 难怪王扶林说陈晓旭不够漂亮,看看他选的原黛玉候选人,那才叫美  
+- 心狠手辣的容嬷嬷年轻时是校花?看了照片后,网友直接闭嘴了!  
+- 李玉成终于说出实话,公开吐槽马玉琴年纪太大,结婚28年疑似后悔  
+
+7. **健康养生**  
+定义:关注健康、养生、疾病预防、生活习惯等方面的知识和建议,内容通常具有实用性和指导性,旨在帮助读者改善生活质量、提升健康水平。  
+标题示例:  
+- 72岁老人每天一个蒸苹果,半年后体检,看到指标变化让他乐开了花  
+- 40岁女子每天吃水煮蛋,一年后去体检,检查报告令医生都羡慕不已  
+- 2024年血糖新标准已公布,不再是3.9~6.1,你的血糖还不算高吗?  
+
+8. **情感故事**  
+定义:以人与人之间的情感交流、感人故事、情感经历为主题,内容通常充满温情、感动或反思,旨在引发读者的情感共鸣和思考。  
+标题示例:  
+- 男孩饭店吃饭,发现陌生女子和去世母亲很像,走过去说:我妈妈去世了,能抱一下我吗?  
+- 河南一女子直播时,被失散 32 年的父亲认出:闺女等着爸爸接你回家  
+- 1987年,江苏男子借好友一千元,25年后朋友成富豪还他1000万报恩  
+- 流浪狗跟着骑行夫妻跑了一百多公里,一直守护在女主身边,赶都赶不走,当男主得知原因后竟抱着狗狗大哭起来  
+
+9. **国家大事**  
+定义:涉及国家实力、科技发展、资源发现、国际合作等内容,通常以宏观视角展现国家的综合实力、科技成就或国际影响力,体现国家的崛起与发展。  
+标题示例:  
+- 我国在南极发现“海上粮仓”,储量高达10亿吨,世界各国眼红不已  
+- 我国贵州发现7000万吨宝藏,价值高达上万亿,多国求合作被拒绝  
+- 距我国3000公里,塞班岛明明归美国管辖,为何岛上大多是中国人?  
+
+10. **现代人物**  
+定义:聚焦活跃在21世纪后具有传奇色彩或巨大贡献的人物、事迹、成就等,内容通常充满戏剧性和启发性,旨在展现人物的非凡经历或历史贡献。  
+标题示例:  
+- 她曾狂贪国家上百亿,被发现时已经移居美国,最终还风光一时得善终  
+- 山东女子因坐月子无聊,破译美国2套绝密系统的密码,国家:奖励711万!  
+- 牺牲太大了!航天女英雄刘洋:结婚8年未生子,回地面后“消失”的她怎样了?  
+
+11. **怀旧时光**  
+定义:以回忆和怀旧为主题,涉及过去的历史、文化、生活、照片等内容,旨在唤起读者对过去时光的情感共鸣和怀念。  
+标题示例:  
+- 1975年“下馆子”的老照片,2元能吃些什么,勾起那段最难忘的时光  
+- 82年,北京老人捡回两张“破椅子”,遭家人数落,29年后拍出2300万  
+- 这张老照片第一次看到,邓颖超和李讷的罕见合影!  
+
+12. **政治新闻**  
+定义:聚焦政治事件、领导人动态、国际关系等内容,通常以新闻或分析的形式呈现,旨在揭示政治局势、政策变化或国际关系的动态。  
+标题示例:  
+- 中方外长行程有变,提前结束访欧匆匆回国,带回来一个好消息  
+- 宋庆龄在北京逝世后,远在美国的宋美龄只说了7个字,字字揪心!  
+- 庐山会议后,叶帅去劝彭德怀认个错,哭着说了一句心里话  
+
+13. **历史人物**  
+定义:聚焦于21世纪前具有重要影响的人物,包括他们的生平、事迹、成就、性格、趣事及其对历史进程的贡献。内容通常以传记、回忆录或历史分析的形式呈现,旨在还原人物的真实面貌并探讨其历史意义。  
+标题示例:  
+- 林彪去世后,蒋介石收到林彪与戴笠的一份密谈文件,看后拍桌大骂  
+- 张学良软禁时的一张实拍照片,头发秃顶,两眼无光,像个中年老头  
+- 1912年,孙中山和两个女儿罕见留影,面对镜头父女三人看起来很幸福  
+
+14. **社会现象**  
+定义:关注社会中出现的普遍现象、趋势或问题,通常涉及文化、经济、教育、民生等领域。内容以观察、分析或评论为主,旨在揭示现象背后的原因、影响及社会意义,引发公众的思考和讨论。  
+标题示例:  
+- 22年河南男子跳河救人,体力耗尽留遗言,被救女子猛然抓住他:一起走  
+- 浙江一老人刑满释放,靠蹬三轮为生,6年后,政府领导登门拜访:我们帮您分配工作  
+- 儿子收到清华通知书,父亲花5万请全村吃席,镇长看一眼竟说:这是假的  
+
+15. **财经科技**  
+定义:聚焦于经济、金融、投资及行业发展的分析与预测,涵盖未来经济趋势、资产价值变化、行业变革及个人理财策略等内容。可以提供前瞻性的财经视角和实用的理财建议,帮助其把握经济动态、优化财务规划并应对行业变化。  
+标题示例:  
+- 未来10年,现金和房子都将贬值,只有2样东西最值钱  
+- 外卖时代将被终结?一个全新行业正悄悄取代外卖,你准备好了吗?  
+- 准备存款的一定要知道,今明两年,定期存款要记住“4不存”  
+
+---
+
+## 三、二级标签体系(多维标签)
+
+在确定一级品类后,请根据标题内容,继续打上以下多维标签。
+
+### 1. 时代标签(`era`,三选一)
+
+只允许从下面三个字符串中选择一个作为 `"era"` 值:
+
+- `"当下"`  
+  - 主要讲当代社会、近 20 年左右的现实生活、现代人物、当下事件等。
+- `"80-90回忆"`  
+  - 标题中明显带有 80、90 年代、童年往事、老物件等“年代感”回忆氛围。
+- `"历史往事"`  
+  - 更久远的历史时期,如古代、民国、抗战、解放前等明显历史语境。
+
+无法精确判断时,按以下原则处理:
+
+- 若是“回忆以前生活,但时间界定不清”,偏向 `"80-90回忆"`;  
+- 若明显是「历史人物/历史事件/古代故事」,选 `"历史往事"`;  
+- 纯当代社会新闻、情感故事、现实生活 → `"当下"`。
+
+### 2. 主角类型(`characters`,数组,可多选)
+
+只允许从以下 5 个标签中选择一个或多个,组成数组:
+
+- `"名人"`:明星、名将、著名政治人物、知名科学家、企业家等公众人物。  
+- `"普通人"`:日常生活中的普通个体,如“男子”“女子”“老人”“小伙”等,无明显名人属性。  
+- `"家庭"`:故事核心围绕家庭关系(父母子女、夫妻、婆媳、兄弟姐妹等)。  
+- `"老年人"`:标题中明确出现老年群体,如“七旬老人”“老母亲”“大爷”“大妈”等。  
+- `"年轻人"`:标题主角为青年群体,如“大学生”“小伙子”“姑娘”“打工人”“90后”等。
+
+要求:
+
+- `characters` 必须至少包含 1 个标签。  
+- 可以多选,例如:  
+  - 老年家庭故事 → `["老年人", "家庭"]`;  
+  - 名人家庭矛盾 → `["名人", "家庭"]`。  
+
+### 3. 情绪倾向(`emotions`,数组,可多选)
+
+从以下 6 个标签中选择,描述文章整体的情绪风格或读者预期感受:
+
+- `"爽文"`:  
+  - 读完让人感到痛快、打脸成功、报复成功、逆袭、出气等。
+- `"反转"`:  
+  - 标题或内容暗示前后强烈反差,结局出人意料,有明显剧情反转。
+- `"暖心"`:  
+  - 氛围温暖、感动、有善意或被治愈的感觉。
+- `"心酸"`:  
+  - 让人感到辛酸、无奈、遗憾、苦涩等复杂情绪。
+- `"愤怒"`:  
+  - 主要呈现不公、欺骗、恶意伤害、权力滥用等,让读者容易产生愤慨、气愤情绪。
+- `"知识型"`:  
+  - 以传递知识、经验、技巧为主,强调“学到东西”,而非以情绪起伏为主。
+- `"民族认同"`:  
+  - 标题中包含民族,爱国相关的内容,强调民族认同、文化传承等。
+
+要求:
+
+- `emotions` 必须至少包含 1 个标签。  
+- 可以多选,例如:  
+  - 又反转又暖心 → `["反转", "暖心"]`;  
+  - 又心酸又愤怒 → `["心酸", "愤怒"]`。  
+
+### 4. 结构与形式标签(`structure`)
+
+请根据标题形式与暗示的叙事结构,判断以下三个字段:
+
+1. `is_strong_reverse`(是否强反转,布尔值)  
+   - `true`:标题明显体现出强烈的前后反差、剧情翻转或结局意外。  
+   - `false`:未体现明显反转结构。
+
+2. `is_person_event_comment`(是否“人物 + 事件 + 评论”结构,布尔值)  
+   - `true`:内容模式倾向于“讲一个人物 → 发生一个事件 → 对该事件进行评价/反思/启示”。  
+   - `false`:不符合该结构,或从标题中难以判断。
+
+3. `title_type`(标题类型,四选一)  
+   只允许从以下四个字符串中选择一个作为标题类型:
+
+   - `"疑问句"`:标题以问号、提问形式出现,包含明显发问。  
+   - `"感叹句"`:标题以感叹号或明显的惊叹语气出现。  
+   - `"冷静陈述"`:标题以平铺直叙的形式说明事实,没有明显疑问或感叹。  
+   - `"三段式标题"`:  
+     - 标题明显由三部分组成,中间用符号或短句分割(如“主标题 + 副标题 + 评论/总结”,或用破折号、顿号、分号等分成三段)。
+
+若标题同时符合多种情况,请按照最突出的特点选择其一。
+
+---
+
+## 四、输出格式要求(非常重要)
+
+1. **最终输出必须是合法的 JSON,不要使用 markdown 代码块,不要添加任何多余文字说明。**  
+2. JSON 的整体结构为:  
+
+- 最外层是一个对象(dictionary),**key 为原始标题字符串,value 为该标题的标签结果对象**。  
+
+每个标题对应的 value 对象包含以下字段:
+
+- `"category"`:字符串,一级品类名称,必须从上述 15 个中选一。  
+- `"era"`:字符串,只能是 `"当下"`、`"80-90回忆"` 或 `"历史往事"`。  
+- `"characters"`:数组,元素为主角标签字符串,只能从 `["名人","普通人","家庭","老年人","年轻人"]` 中选择,至少 1 个。  
+- `"emotions"`:数组,元素为情绪标签字符串,只能从 `["爽文","反转","暖心","心酸","愤怒","知识型"]` 中选择,至少 1 个。  
+- `"structure"`:对象,包含 3 个字段:  
+  - `"is_strong_reverse"`:布尔值 `true` 或 `false`;  
+  - `"is_person_event_comment"`:布尔值 `true` 或 `false`;  
+  - `"title_type"`:字符串,只能是 `"疑问句"`、`"感叹句"`、`"冷静陈述"` 或 `"三段式标题"`。
+
+3. 示例输出格式如下(仅示例,不代表真实分类结果):
+
+```json
+{
+  "男孩饭店吃饭,发现陌生女子和去世母亲很像,走过去说:我妈妈去世了,能抱一下我吗?": {
+    "category": "情感故事",
+    "era": "当下",
+    "characters": ["普通人", "家庭"],
+    "emotions": ["暖心", "心酸"],
+    "structure": {
+      "is_strong_reverse": true,
+      "is_person_event_comment": true,
+      "title_type": "感叹句"
+    }
+  },
+  "未来10年,现金和房子都将贬值,只有2样东西最值钱": {
+    "category": "财经科技",
+    "era": "当下",
+    "characters": ["普通人"],
+    "emotions": ["知识型"],
+    "structure": {
+      "is_strong_reverse": false,
+      "is_person_event_comment": false,
+      "title_type": "冷静陈述"
+    }
+  }
+}
+4.如果标题中包含半角双引号 ",请在 JSON 中进行转义,写成 \",确保整个 JSON 可以被正常解析。
+5.输出时,
+    只输出 json 内容本身,
+    不要包裹在```json或者markdown代码块中。
+    不要添加任何额外说明文字、注释或空行。
+### 五、输入标题说明
+    以下是需要分析的文字标题列表,每一行是一个标题。
+    输入的标题是:
+    
+    """
+    return prompt + "\n".join(title_list)

+ 1 - 5
applications/tasks/monitor_tasks/gzh_article_monitor.py

@@ -31,7 +31,6 @@ class MonitorConst:
 
 
 class OutsideGzhArticlesManager(MonitorConst):
-
     def __init__(self, pool):
         self.pool = pool
 
@@ -70,7 +69,6 @@ class OutsideGzhArticlesManager(MonitorConst):
 
 
 class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
-
     async def fetch_outside_account_list(self):
         query = f"""
             select 
@@ -106,8 +104,7 @@ class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
                 print(f"crawler failed: {account['account_name']}")
         except Exception as e:
             print(
-                f"crawler failed: account_name: {account['account_name']}\n"
-                f"error: {e}\n"
+                f"crawler failed: account_name: {account['account_name']}\nerror: {e}\n"
             )
 
     async def save_each_msg_to_db(self, msg: dict, account: dict):
@@ -178,7 +175,6 @@ class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
 
 
 class OutsideGzhArticlesMonitor(OutsideGzhArticlesManager):
-
     async def fetch_article_list_to_check(self):
         publish_timestamp_threshold = int(time.time()) - self.MONITOR_CYCLE
         fetch_query = f"""

+ 0 - 1
applications/tasks/monitor_tasks/task_processing_monitor.py

@@ -5,7 +5,6 @@ from applications.tasks.task_mapper import TaskMapper
 
 
 class TaskProcessingMonitor(TaskMapper):
-
     def __init__(self, pool):
         self.pool = pool
 

+ 11 - 1
applications/tasks/task_handler.py

@@ -20,6 +20,8 @@ from applications.tasks.data_recycle_tasks import RecycleMiniProgramDetailTask
 from applications.tasks.llm_tasks import TitleRewrite
 from applications.tasks.llm_tasks import ArticlePoolCategoryGeneration
 from applications.tasks.llm_tasks import CandidateAccountQualityScoreRecognizer
+from applications.tasks.llm_tasks import ExtractTitleFeatures
+
 
 from applications.tasks.monitor_tasks import check_kimi_balance
 from applications.tasks.monitor_tasks import GetOffVideos
@@ -202,9 +204,17 @@ class TaskHandler(TaskMapper):
 
     # 更新小程序裂变信息
     async def _mini_program_detail_handler(self) -> int:
-        task = RecycleMiniProgramDetailTask(pool=self.db_client, log_client=self.log_client, trace_id=self.trace_id)
+        task = RecycleMiniProgramDetailTask(
+            pool=self.db_client, log_client=self.log_client, trace_id=self.trace_id
+        )
         await task.deal(params=self.data)
         return self.TASK_SUCCESS_STATUS
 
+    # 提取标题特征
+    async def _extract_title_features_handler(self) -> int:
+        task = ExtractTitleFeatures(self.db_client, self.log_client, self.trace_id)
+        await task.deal(data=self.data)
+        return self.TASK_SUCCESS_STATUS
+
 
 __all__ = ["TaskHandler"]

+ 0 - 1
applications/tasks/task_mapper.py

@@ -23,7 +23,6 @@ class Const:
 
 
 class TaskMapper(Const):
-
     def get_task_config(self, task_name) -> dict:
         match task_name:
             case "check_kimi_balance":

+ 2 - 0
applications/tasks/task_scheduler.py

@@ -195,6 +195,8 @@ class TaskScheduler(TaskHandler):
             "crawler_detail_analysis": self._crawler_article_analysis_handler,
             # 小程序裂变信息处理
             "mini_program_detail_process": self._mini_program_detail_handler,
+            # 提取标题特征
+            "extract_title_features": self._extract_title_features_handler,
         }
 
         if task_name not in handlers:

+ 3 - 3
applications/utils/async_tasks.py

@@ -9,10 +9,10 @@ async def run_tasks_with_asyncio_task_group(
     task_list: List[Any],
     handler: Callable[[Any], Coroutine[Any, Any, None]],
     *,
-    description: str = None, # 任务介绍
+    description: str = None,  # 任务介绍
     unit: str,
-    max_concurrency: int = 20, # 最大并发数
-    fail_fast: bool = False, # 是否遇到错误就退出整个tasks
+    max_concurrency: int = 20,  # 最大并发数
+    fail_fast: bool = False,  # 是否遇到错误就退出整个tasks
 ) -> Dict[str, Any]:
     """using asyncio.TaskGroup to process I/O-intensive tasks"""
     if not task_list:

+ 0 - 1
applications/utils/common.py

@@ -112,7 +112,6 @@ def extract_root_source_id(path: str) -> dict:
 
 
 def show_desc_to_sta(show_desc):
-
     def decode_show_v(show_v):
         """
 

+ 0 - 1
applications/utils/response.py

@@ -1,5 +1,4 @@
 class Response:
-
     @classmethod
     def success_response(cls, data):
         return {"code": 0, "status": "success", "data": data}

+ 0 - 1
routes/blueprint.py

@@ -10,7 +10,6 @@ server_blueprint = Blueprint("api", __name__, url_prefix="/api")
 
 
 def server_routes(pools, log_service):
-
     @server_blueprint.route("/get_cover", methods=["POST"])
     async def get_cover():
         params = await request.get_json()

+ 24 - 0
test_prompt.py

@@ -0,0 +1,24 @@
+from applications.tasks.llm_tasks.prompts import extract_article_features
+from applications.api import fetch_deepseek_completion
+
+
+title_list = [
+    "他从副总理一下被撸成普通工人,事后反倒安慰妻子:我心里面有数",
+    "​莫言曾说:“谁都靠不住,除非你有用,在这个烟换烟,酒换酒的时代,生活万般苦,唯有自己渡。”",
+    "1948年国军排长带队投诚,谁知当他来到登记处时,文书却是大惊:张定元你不是在2年前就牺牲了吗",
+    "21年前抛弃侯耀文,转身嫁给师侄戴志诚的袁茵,如今活成了这样",
+    "国外一对夫妻住饭店晚上开窗通风,第二天醒来后,想不到竟有256只挤满房间,当场直接吓坏了",
+    "董卿说“娶谁都需要房子,娶谁都需要努力挣钱,但你永远都不要说,为了我而努力这种话,难道没有我,你就不需要给别人吗?“",
+    "原来九三阅兵的解说是他俩!豪迈大气庄重激昂,难怪全国人民喜爱",
+    "一颗白菜卖60多块钱!拯救韩国人今年的餐桌,全靠山东这个小城了!",
+    "63岁冯远征带71岁妻子回老家,两人牵手逛景点,梁丹妮短发好利落",
+    "我国最大的一块“飞地”,比7个上海市还要大,却不足2000人",
+]
+
+prompt = extract_article_features(title_list)
+print(prompt)
+res = fetch_deepseek_completion(model="default", prompt=prompt, output_type="json")
+
+import json
+
+print(json.dumps(res, ensure_ascii=False, indent=4))