罗俊辉 8 месяцев назад
Родитель
Сommit
035b4b8df3

+ 6 - 5
applications/__init__.py

@@ -1,10 +1,11 @@
 """
 @author: luojunhui
 """
-from .aidit_api import AIDTApi
-from .denet_mysql import DeNetMysql
-from .ad_mysql import AdMySQL
-from .pq_mysql import PQMySQL
+from .aiditApi import AIDTApi
+from .denetMysql import DeNetMysql
+from .adMysql import AdMySQL
+from .pqMysql import PQMySQL
 from .functions import Functions
 from .data_works import ODPSApi
-from .wx_spider_api import WeixinSpider
+from .wxSpiderApi import WeixinSpider
+from .algApi import AlgApi

+ 0 - 0
applications/ad_mysql.py → applications/adMysql.py


+ 0 - 0
applications/aidit_api.py → applications/aiditApi.py


+ 29 - 0
applications/algApi.py

@@ -0,0 +1,29 @@
+"""
+@author: luojunhui
+"""
+import requests
+
+
+class AlgApi(object):
+    """
+    算法api
+    """
+    @classmethod
+    def getScoreList(cls, accountName, title_list):
+        """
+        获取得分
+        :return:
+        """
+        url = "http://61.48.133.26:6060/score_list"
+        body = {
+            "account_nickname_list": [accountName],
+            "text_list": title_list,
+            "max_time": None,
+            "min_time": None,
+            "interest_type": "avg",
+            "sim_type": "mean",
+            "rate": 0.1
+        }
+        response = requests.post(url=url, headers={}, json=body)
+        return response.json()
+

+ 37 - 0
applications/decoratorApi.py

@@ -0,0 +1,37 @@
+"""
+@author: luojunhui
+"""
+import time
+
+
+def retryOnNone():
+    """
+    基于None类型数据的重试装饰器
+    :return:
+    """
+
+    def decorator(func):
+        """
+        :param func:
+        :return:
+        """
+        max_retries = 10
+        wait_seconds = 1
+
+        def wrapper(*args, **kwargs):
+            """
+
+            :param args:
+            :param kwargs:
+            :return:
+            """
+            for attempt in range(max_retries):
+                response = func(*args, **kwargs)
+                if response['data'] is not None:
+                    return response
+                time.sleep(wait_seconds)
+            return None
+
+        return wrapper
+
+    return decorator

+ 84 - 0
applications/denetMysql.py

@@ -0,0 +1,84 @@
+"""
+@author: luojunhui
+"""
+import pymysql
+from config import planConfigDict
+
+
+class DeNetMysql(object):
+    """
+     Mysql Server
+    """
+    connection = pymysql.connect(
+        host="rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com",  # 数据库IP地址,内网地址
+        port=3306,  # 端口号
+        user="crawler_admin",  # mysql用户名
+        passwd="cyber#crawler_2023",  # mysql用户登录密码
+        db="aigc-admin-prod",  # 数据库名
+        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
+    )
+
+    @classmethod
+    def getUnEmptyPlan(cls):
+        """
+        :return:
+        """
+        sql = "select id, name from crawler_plan where name like '%腾讯互选-%' and crawler_total_num > 0;"
+        cursor = cls.connection.cursor()
+        cursor.execute(sql)
+        data = cursor.fetchall()
+        result = [list(line)[0] for line in data]
+        # print(result)
+        result = [i for i in result if planConfigDict.get(i)]
+        return result
+
+    @classmethod
+    def update(cls, sql, params):
+        """
+        :param sql:
+        :param params:
+        :return:
+        """
+        cursor = cls.connection.cursor()
+        cursor.execute(sql, params)
+        cls.connection.commit()
+
+    @classmethod
+    def select(cls, sql):
+        """
+        查询方法
+        :param sql:
+        :return:
+        """
+        cursor = cls.connection.cursor()
+        cursor.execute(sql)
+        result = cursor.fetchall()
+        return result
+
+
+# from tqdm import tqdm
+#
+# D = DeNetMysql()
+# select_sql = "select article_id, link from crawler_meta_article"
+# data_list = D.select(select_sql)
+
+
+# def update_single(line):
+#     """
+#
+#     :return:
+#     """
+#     aid = line[0]
+#     url = line[1]
+#     index = url.split("idx=")[1].split("&")[0]
+#     update_sql = f"""
+#             update crawler_meta_article
+#             set article_index = %s
+#             where article_id = %s;
+#         """
+#     D.update(sql=update_sql, params=(index, aid))
+#     # print("success   :{}".format(aid))
+#
+#
+# for line in tqdm(data_list):
+#     update_single(line)

+ 0 - 33
applications/denet_mysql.py

@@ -1,33 +0,0 @@
-"""
-@author: luojunhui
-"""
-import pymysql
-from config import planConfigDict
-
-
-class DeNetMysql(object):
-    """
-     Mysql Server
-    """
-    connection = pymysql.connect(
-        host="rm-t4na9qj85v7790tf84o.mysql.singapore.rds.aliyuncs.com",  # 数据库IP地址,内网地址
-        port=3306,  # 端口号
-        user="crawler_readonly",  # mysql用户名
-        passwd="cyber#crawler_2023",  # mysql用户登录密码
-        db="aigc-admin-prod",  # 数据库名
-        charset="utf8mb4"  # 如果数据库里面的文本是utf8编码的,charset指定是utf8
-    )
-
-    @classmethod
-    def getUnEmptyPlan(cls):
-        """
-        :return:
-        """
-        sql = "select id, name from crawler_plan where name like '%腾讯互选-%' and crawler_total_num > 0;"
-        cursor = cls.connection.cursor()
-        cursor.execute(sql)
-        data = cursor.fetchall()
-        result = [list(line)[0] for line in data]
-        # print(result)
-        result = [i for i in result if planConfigDict.get(i)]
-        return result

+ 75 - 0
applications/functions.py

@@ -1,6 +1,8 @@
 """
 @author: luojunhui
 """
+from datetime import datetime, timezone
+import hashlib
 import requests
 import pymysql
 
@@ -142,6 +144,79 @@ class Functions(object):
                 return True
         return False
 
+    @classmethod
+    def show_desc_to_sta(cls, show_desc):
+        """
+
+        :return:
+        """
+
+        def decode_show_v(show_v):
+            """
+
+            :param show_v:
+            :return:
+            """
+            foo = show_v.replace('千', 'e3').replace('万', 'e4').replace('亿', 'e8')
+            foo = eval(foo)
+            return int(foo)
+
+        def decode_show_k(show_k):
+            """
 
+            :param show_k:
+            :return:
+            """
+            this_dict = {
+                '阅读': 'show_view_count',  # 文章
+                '看过': 'show_view_count',  # 图文
+                '观看': 'show_view_count',  # 视频
+                '赞': 'show_like_count',
+                '付费': 'show_pay_count',
+                '赞赏': 'show_zs_count',
+            }
+            if show_k not in this_dict:
+                print(f'error from decode_show_k, show_k not found: {show_k}')
+            return this_dict.get(show_k, 'show_unknown')
 
+        show_desc = show_desc.replace('+', '')
+        sta = {}
+        for show_kv in show_desc.split('\u2004\u2005'):
+            if not show_kv:
+                continue
+            show_k, show_v = show_kv.split('\u2006')
+            k = decode_show_k(show_k)
+            v = decode_show_v(show_v)
+            sta[k] = v
+        res = {
+            'show_view_count': sta.get('show_view_count', 0),
+            'show_like_count': sta.get('show_like_count', 0),
+            'show_pay_count': sta.get('show_pay_count', 0),
+            'show_zs_count': sta.get('show_zs_count', 0),
+        }
+        return res
+
+    @classmethod
+    def generateGzhId(cls, url):
+        """
+        generate url
+        :param url:
+        :return:
+        """
+        biz = url.split("biz=")[1].split("&")[0]
+        idx = url.split("&idx=")[1].split("&")[0]
+        sn = url.split("&sn=")[1].split("&")[0]
+        url_bit = "{}-{}-{}".format(biz, idx, sn).encode()
+        md5_hash = hashlib.md5()
+        md5_hash.update(url_bit)
+        md5_value = md5_hash.hexdigest()
+        return md5_value
 
+    @classmethod
+    def time_stamp_to_str(cls, timestamp):
+        """
+        :param timestamp:
+        """
+        dt_object = datetime.utcfromtimestamp(timestamp).replace(tzinfo=timezone.utc).astimezone()
+        date_string = dt_object.strftime('%Y-%m-%d %H:%M:%S')
+        return date_string

+ 0 - 0
applications/pq_mysql.py → applications/pqMysql.py


+ 135 - 0
applications/spiderTool.py

@@ -0,0 +1,135 @@
+"""
+@author: luojunhui
+"""
+import time
+import datetime
+
+from applications import WeixinSpider, Functions, PQMySQL, DeNetMysql
+
+
+class SpiderTools(object):
+    """
+    长文爬虫公共入口
+    """
+    spider_client = WeixinSpider()
+    function = Functions()
+    pq_mysql_client = PQMySQL()
+    denet_mysql_client = DeNetMysql()
+
+    @classmethod
+    def searchEachAccountArticlesSinglePage(cls, gh_id, category):
+        """
+        抓取账号单页
+        :param gh_id:
+        :param category:
+        :return:
+        """
+        response = cls.spider_client.update_msg_list(gh_id=gh_id, index=None)
+        msg_list = response.get("data", {}).get("data")
+        if msg_list:
+            cls.updateDataIntoMysql(
+                gh_id=gh_id,
+                category=category,
+                mode="account",
+                article_list=msg_list
+            )
+            cls.updateLatestAccountTimeStamp(gh_id=gh_id)
+        else:
+            print("No more data")
+
+    @classmethod
+    def searchEachAccountArticlesAllData(cls, gh_id, category, latest_time_stamp, index=None):
+        """
+        抓取账号截止到2024-01-01的最新数据
+        :param index:
+        :param gh_id:
+        :param category:
+        :param latest_time_stamp
+        :return:
+        """
+        response = cls.spider_client.update_msg_list(ghId=gh_id, index=index)
+        msg_list = response.get("data", {}).get("data")
+        if msg_list:
+            last_article_in_this_msg = msg_list[-1]
+            cls.updateDataIntoMysql(
+                gh_id=gh_id, category=category, article_list=msg_list, mode="account"
+            )
+            last_time_stamp_in_this_msg = last_article_in_this_msg["AppMsg"]["BaseInfo"]["UpdateTime"]
+            if latest_time_stamp < last_time_stamp_in_this_msg:
+                next_cursor = response["data"]["next_cursor"]
+                return cls.searchEachAccountArticlesAllData(
+                    gh_id=gh_id,
+                    latest_time_stamp=latest_time_stamp,
+                    category=category,
+                    index=next_cursor,
+                )
+            else:
+                # 更新最近抓取时间
+                cls.updateLatestAccountTimeStamp(gh_id=gh_id)
+        else:
+            print("No more data")
+
+    @classmethod
+    def updateDataIntoMysql(cls, gh_id, category, mode, article_list):
+        """
+        将数据更新到数据库
+        :return:
+        """
+        for article_obj in article_list:
+            detail_article_list = article_obj["AppMsg"]["DetailInfo"]
+            for obj in detail_article_list:
+                try:
+                    show_stat = cls.function.show_desc_to_sta(obj["ShowDesc"])
+                    show_view_count = show_stat.get("show_view_count", 0)
+                    show_like_count = show_stat.get("show_like_count", 0)
+                    insert_sql = f"""
+                        insert into crawler_meta_article
+                        (platform, mode, category, out_account_id, article_index, title, link, read_cnt, like_cnt, description, publish_time, crawler_time, status, unique_index)
+                        VALUES 
+                        (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
+                    """
+                    cls.denet_mysql_client.update(
+                        sql=insert_sql,
+                        params=(
+                            "weixin",
+                            mode,
+                            category,
+                            gh_id,
+                            obj['ItemIndex'],
+                            obj["Title"],
+                            obj["ContentUrl"],
+                            show_view_count,
+                            show_like_count,
+                            obj["Digest"],
+                            obj["send_time"],
+                            int(time.time()),
+                            1,
+                            cls.function.generateGzhId(obj["ContentUrl"]),
+                        ),
+                    )
+                except Exception as e:
+                    print(e)
+
+    @classmethod
+    def updateLatestAccountTimeStamp(cls, gh_id):
+        """
+        更新账号的最新时间戳
+        :return:
+        """
+        select_sql = f"""
+            SELECT publish_time 
+            From crawler_meta_article 
+            WHERE out_account_id = '{gh_id}'
+            ORDER BY publish_time DESC LIMIT 1;
+        """
+        result = cls.denet_mysql_client.select(select_sql)
+        time_stamp = result[0][0]
+        dt_object = datetime.datetime.utcfromtimestamp(time_stamp)
+        local_dt = dt_object.astimezone()
+        dt_string = local_dt.strftime('%Y-%m-%d %H:%M:%S')
+        update_sql = f"""
+            update long_articles_accounts
+            set latest_update_time = %s
+            where account_id = %s;
+        """
+        cls.pq_mysql_client.update(sql=update_sql, params=(dt_string, gh_id))

+ 1 - 35
applications/wx_spider_api.py → applications/wxSpiderApi.py

@@ -2,41 +2,9 @@
 @author: luojunhui
 """
 import json
-import time
 import requests
 
-
-def retryOnNone():
-    """
-    基于None类型数据的重试装饰器
-    :return:
-    """
-
-    def decorator(func):
-        """
-        :param func:
-        :return:
-        """
-        max_retries = 10
-        wait_seconds = 1
-
-        def wrapper(*args, **kwargs):
-            """
-
-            :param args:
-            :param kwargs:
-            :return:
-            """
-            for attempt in range(max_retries):
-                response = func(*args, **kwargs)
-                if response['data'] is not None:
-                    return response
-                time.sleep(wait_seconds)
-            return None
-
-        return wrapper
-
-    return decorator
+from applications.decoratorApi import retryOnNone
 
 
 class WeixinSpider(object):
@@ -115,5 +83,3 @@ class WeixinSpider(object):
             json={"content_link": content_url}
         )
         return response.json()
-
-

+ 572 - 0
dev/read.json

@@ -0,0 +1,572 @@
+{
+    "code": 0,
+    "msg": null,
+    "data": {
+        "has_more": true,
+        "next_cursor": "CAMQHhiCgOC1BiAEOJTbhK8GQAFIAQ==",
+        "data": [
+            {
+                "BaseInfo": {
+                    "MsgId": 1000000030,
+                    "MsgType": 49,
+                    "DateTime": 1723334432,
+                    "Status": 2,
+                    "FuncFlag": 34816,
+                    "UniqueId": "1000000030",
+                    "NextOffset": 21
+                },
+                "AppMsg": {
+                    "BaseInfo": {
+                        "AppMsgId": 2247485304,
+                        "CreateTime": 1723334429,
+                        "UpdateTime": 1723334642,
+                        "Type": 9,
+                        "BigPic": 0
+                    },
+                    "DetailInfo": [
+                        {
+                            "Title": "中国发出“逐客令”:1分钱也不会援助,俄罗斯:这就对了",
+                            "Digest": "近期,中东地区的紧张局势再次引起全球关注。以色列与巴勒斯坦之间的冲突加剧,人员伤亡和财产损失的消息不断传出。",
+                            "ItemIndex": 1,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247485304&idx=1&sn=a3a9e207b3437e16517c1632e304e424&chksm=c24342c7fda20f4453402bf010f4df5e3322bf99ef85998d109469165b8f47424114bcda9a48&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/sz_mmbiz_jpg/3QB9iabBaTJenfRXiaXvPEGWuiaib9HBgwfpfMkWJx9jdkMiaevY9cJDpQl5SqrzKv23dYXRfG7WeicryWfxt2GI7WIQ/640?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_1_1": "",
+                            "CoverImgUrl_235_1": "",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 1.0万  赞 40  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "",
+                            "CoverImgUrl_16_9_640": "",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/sz_mmbiz_jpg/3QB9iabBaTJenfRXiaXvPEGWuiaib9HBgwfpfMkWJx9jdkMiaevY9cJDpQl5SqrzKv23dYXRfG7WeicryWfxt2GI7WIQ/640?wxtype=jpeg&wxfrom=0",
+                                "width_hint": 235,
+                                "height_hint": 100
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1723334489,
+                            "finder_export_id": ""
+                        },
+                        {
+                            "Title": "“丧良心啊!”侄子50000元卖给叔叔10头母猪,到家不久后全部死亡,叔叔向侄子索赔,侄子:你自己养猪不当怪我?",
+                            "Digest": "在寒冷的2023年冬天,60多岁的老李决定在山区承包土地开办养猪场谋生。这个计划本应为他老年生活带来一线希望",
+                            "ItemIndex": 2,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247485304&idx=2&sn=dc063a8a64cf42f3c4638ba3a00b3148&chksm=c2d6038a077e1f24bdb4b5480afaa17d42a06063eab886dbfde33a1540c063c2b9d3bb4e08c5&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/sz_mmbiz_jpg/3QB9iabBaTJenfRXiaXvPEGWuiaib9HBgwfpv8YJibaSt7w9HIQp4aEIfWuA7vC1ib0SEkSApveehZf6qlT2zwObVcAQ/300?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_1_1": "",
+                            "CoverImgUrl_235_1": "",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 4177  赞 10  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "",
+                            "CoverImgUrl_16_9_640": "",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/sz_mmbiz_jpg/3QB9iabBaTJenfRXiaXvPEGWuiaib9HBgwfpv8YJibaSt7w9HIQp4aEIfWuA7vC1ib0SEkSApveehZf6qlT2zwObVcAQ/300?wxtype=jpeg&wxfrom=0",
+                                "width_hint": 1,
+                                "height_hint": 1
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1723334489,
+                            "finder_export_id": ""
+                        },
+                        {
+                            "Title": "男子被狗咬后将狗砍死被索赔,法院判决让人不懂:不算正当防卫?",
+                            "Digest": "辽宁发生了一起引发广泛讨论的宠物狗伤人事件。李先生是一名普通市民,勤劳工作,生活平静。然而,一天凌晨,当李先",
+                            "ItemIndex": 3,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247485304&idx=3&sn=0522a5634074cd7b0fe3891969a8bfd5&chksm=c28f2d33cb03e00b5c75bc23bdabf44cd7d770cc053ecb4a55bf63d9261f1a7190b2000e3f92&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/sz_mmbiz_jpg/3QB9iabBaTJenfRXiaXvPEGWuiaib9HBgwfpu1HkMAzKibawFrj1Cjb3ZolCn2sX3zAYfUJokwVUXk06ZtxWCMW8qaw/300?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_1_1": "",
+                            "CoverImgUrl_235_1": "",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 345  赞 3  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "",
+                            "CoverImgUrl_16_9_640": "",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/sz_mmbiz_jpg/3QB9iabBaTJenfRXiaXvPEGWuiaib9HBgwfpu1HkMAzKibawFrj1Cjb3ZolCn2sX3zAYfUJokwVUXk06ZtxWCMW8qaw/300?wxtype=jpeg&wxfrom=0",
+                                "width_hint": 1,
+                                "height_hint": 1
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1723334489,
+                            "finder_export_id": ""
+                        },
+                        {
+                            "Title": "男子20年前存3000元,到期19万,结果取钱时银行不认账!",
+                            "Digest": "银行与客户之间的不对等关系长期以来一直备受诟病。银行素以其单方面的霸王条款闻名,而客户在与银行的纠纷中通常处",
+                            "ItemIndex": 4,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247485304&idx=4&sn=bbb7584a304eaa9128488e26a44273f8&chksm=c2bb5af1d2bb8413a496401ebc325c16fb1a30efe2271d2e4c4cb4a8ad909bdcec2ca3623dc7&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/sz_mmbiz_png/3QB9iabBaTJenfRXiaXvPEGWuiaib9HBgwfp2p9ZBFziaKnzSMuTfs1l4RHzz13jibwCAEpiaFjvetTibVC7Mzkf19YmWA/300?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_1_1": "",
+                            "CoverImgUrl_235_1": "",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 217  赞 4  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "",
+                            "CoverImgUrl_16_9_640": "",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/sz_mmbiz_png/3QB9iabBaTJenfRXiaXvPEGWuiaib9HBgwfp2p9ZBFziaKnzSMuTfs1l4RHzz13jibwCAEpiaFjvetTibVC7Mzkf19YmWA/300?wxtype=png&wxfrom=0",
+                                "width_hint": 1,
+                                "height_hint": 1
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1723334489,
+                            "finder_export_id": ""
+                        }
+                    ]
+                }
+            },
+            {
+                "BaseInfo": {
+                    "MsgId": 1000000001,
+                    "MsgType": 49,
+                    "DateTime": 1723248025,
+                    "Status": 2,
+                    "FuncFlag": 34816,
+                    "UniqueId": "1000000001",
+                    "NextOffset": 29
+                },
+                "AppMsg": {
+                    "BaseInfo": {
+                        "AppMsgId": 2247483655,
+                        "CreateTime": 1723248022,
+                        "UpdateTime": 1723271958,
+                        "Type": 9,
+                        "BigPic": 0
+                    },
+                    "DetailInfo": [
+                        {
+                            "Title": "金正恩乘专列过江,为何故意绕开中国?原因有两个,值得我们警惕",
+                            "Digest": "国际局势总体上趋于稳定,但在局部纷争不断。尤其是中东地区因为资源丰富,而成为冲突最为严重的地区之一。随着多年",
+                            "ItemIndex": 1,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247483655&idx=1&sn=17e19e189ecbc799b3e8292271f1aa28&chksm=c26621234962af9537576dcba44cbcd28d1f5b2570181c95542a0c7a18e43ded41e953f7d8e4&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/sz_mmbiz_png/3QB9iabBaTJeZvqVtPEBA4yzEIrZmCkCow3Mxamtz1gHPTV5eW5ESVCEWJUOrSAqjhcIuVT8CfiaNzbMVvj8ic6TA/640?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_1_1": "",
+                            "CoverImgUrl_235_1": "",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 1982  赞 4  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "",
+                            "CoverImgUrl_16_9_640": "",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/sz_mmbiz_png/3QB9iabBaTJeZvqVtPEBA4yzEIrZmCkCow3Mxamtz1gHPTV5eW5ESVCEWJUOrSAqjhcIuVT8CfiaNzbMVvj8ic6TA/640?wxtype=png&wxfrom=0",
+                                "width_hint": 235,
+                                "height_hint": 100
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1723248037,
+                            "finder_export_id": ""
+                        },
+                        {
+                            "Title": "领导借我12万,半年后只还了10万,老婆死活不让我索要,我被提拔后,他问我:钱还清了吗?真相太让人羞愧了!",
+                            "Digest": "故事的主人公是男主角,他在职场上有着出色的表现和较高的地位,但在处理人情关系上显得有些懵懂。这个故事发生在他",
+                            "ItemIndex": 2,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247483655&idx=2&sn=0e7b1226810421c5cb828ed687b4bf54&chksm=c271f588ed1352de34fa7bdaf174e61ee3df7e51a67fe1c57072e2db430f281e7ce1358b2997&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/sz_mmbiz_jpg/3QB9iabBaTJeZvqVtPEBA4yzEIrZmCkCoAOl2C9j5hfmOrcB0p4SprUYlgpKy9qvBdM1o8cLcibUWABQZ110iabiaQ/300?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_1_1": "",
+                            "CoverImgUrl_235_1": "",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 35  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "",
+                            "CoverImgUrl_16_9_640": "",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/sz_mmbiz_jpg/3QB9iabBaTJeZvqVtPEBA4yzEIrZmCkCoAOl2C9j5hfmOrcB0p4SprUYlgpKy9qvBdM1o8cLcibUWABQZ110iabiaQ/300?wxtype=jpeg&wxfrom=0",
+                                "width_hint": 1,
+                                "height_hint": 1
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1723248037,
+                            "finder_export_id": ""
+                        },
+                        {
+                            "Title": "离职后发现工资卡里多了5万,以为发项目奖金了,找财务核实,结果财务说:这是公司领导安排给你的补偿,10年员工离职有补贴!",
+                            "Digest": "事情是这样的,投稿人离职半个月后,某天突然发现自己的工资卡里多了5万块钱。她当时的第一反应是,这可能是之前一",
+                            "ItemIndex": 3,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247483655&idx=3&sn=d49b8f53b0e1eb5df4e571ba0229ce3c&chksm=c21a4be76792e1b7650ae6736e3bf5153e248fe3e69925b4413287b19b2a20114f8ed652c5dc&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/sz_mmbiz_jpg/3QB9iabBaTJeZvqVtPEBA4yzEIrZmCkCo8iayDzONqH9UGiagO989OoUuV9qtccyMIiaUHiaSHuKXl4O79adSUibBUOg/300?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_1_1": "",
+                            "CoverImgUrl_235_1": "",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 43  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "",
+                            "CoverImgUrl_16_9_640": "",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/sz_mmbiz_jpg/3QB9iabBaTJeZvqVtPEBA4yzEIrZmCkCo8iayDzONqH9UGiagO989OoUuV9qtccyMIiaUHiaSHuKXl4O79adSUibBUOg/300?wxtype=jpeg&wxfrom=0",
+                                "width_hint": 1,
+                                "height_hint": 1
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1723248037,
+                            "finder_export_id": ""
+                        },
+                        {
+                            "Title": "过于离谱,同事请丧假,理由父亲去世。后勤主任开车100多公里奔丧,拿着一大包的烧纸金银斗,还拿着大大的花圈,结果人家爹开门了",
+                            "Digest": "事情的起因源于一位同事请丧假,理由是父亲去世。然而,当后勤主任亲自驱车100多公里前去奔丧时,却发现开门的竟",
+                            "ItemIndex": 4,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247483655&idx=4&sn=efbddec581a24a1184e38942b2fc0975&chksm=c27c6fdcfdf252ecb2c9781b7e933e272aa5453455065ca0bf9450aa65c58d158a9962ae7325&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/sz_mmbiz_jpg/3QB9iabBaTJeZvqVtPEBA4yzEIrZmCkCoBdCqtwsnsdzOgUQOWr0W4lrh6GSe0ibyoVOB6hzRszwpOjk1UloVia4g/300?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_1_1": "",
+                            "CoverImgUrl_235_1": "",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 34  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "",
+                            "CoverImgUrl_16_9_640": "",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/sz_mmbiz_jpg/3QB9iabBaTJeZvqVtPEBA4yzEIrZmCkCoBdCqtwsnsdzOgUQOWr0W4lrh6GSe0ibyoVOB6hzRszwpOjk1UloVia4g/300?wxtype=jpeg&wxfrom=0",
+                                "width_hint": 1,
+                                "height_hint": 1
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1723248037,
+                            "finder_export_id": ""
+                        }
+                    ]
+                }
+            },
+            {
+                "BaseInfo": {
+                    "MsgId": 2247485242,
+                    "DateTime": 1709256084,
+                    "UniqueId": "2247485242",
+                    "NextOffset": 1
+                },
+                "AppMsg": {
+                    "BaseInfo": {
+                        "AppMsgId": 2247485242,
+                        "CreateTime": 1709256083,
+                        "UpdateTime": 1709256108,
+                        "Type": 10002,
+                        "BigPic": 0
+                    },
+                    "DetailInfo": [
+                        {
+                            "Title": "全国仅此一副对联,堪称“神联”,写尽了人生!",
+                            "Digest": "",
+                            "ItemIndex": 1,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247485242&idx=1&sn=c654f1b6b129908f0d5f27b3c2195a79&chksm=c3118e39f466072f4cfdda448e4cae267af6211713d3f933a7c02104e42596a1eb666b8f87fc&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8X80KlaTYSCD9xic9gXYaReZO94xeUpYLJvjbic8BAWawILGLUZnKXWU5qTZ1riascpsh1sBN8X3Y6g6g/640?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_1_1": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8X80KlaTYSCD9xic9gXYaReZO94xeUpYLJvjbic8BAWawILGLUZnKXWU5qTZ1riascpsh1sBN8X3Y6g6g/300?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_235_1": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8X80KlaTYSCD9xic9gXYaReZO94xeUpYLJvjbic8BAWawILGLUZnKXWU5qTZ1riascpsh1sBN8X3Y6g6g/640?wxtype=jpeg&wxfrom=0",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 420  赞 2  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8X80KlaTYSCD9xic9gXYaReZO94xeUpYLJvjbic8BAWawILGLUZnKXWU5qTZ1riascpsh1sBN8X3Y6g6g/640?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_16_9_640": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8X80KlaTYSCD9xic9gXYaReZO94xeUpYLJvjbic8BAWawILGLUZnKXWU5qTZ1riascpsh1sBN8X3Y6g6g/640?wxtype=jpeg&wxfrom=0",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8X80KlaTYSCD9xic9gXYaReZO94xeUpYLJvjbic8BAWawILGLUZnKXWU5qTZ1riascpsh1sBN8X3Y6g6g/640?wxtype=jpeg&wxfrom=0",
+                                "width_hint": 235,
+                                "height_hint": 100
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1709256084,
+                            "finder_export_id": ""
+                        },
+                        {
+                            "Title": "姓氏数量排名表,第一名万万没想到是他",
+                            "Digest": "",
+                            "ItemIndex": 2,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247485242&idx=2&sn=10dd7ab91bc7f2be40aac1eed5fe17d8&chksm=c3118e39f466072f09643dbbf424d0318501c967ae8e8531d74f58dc8746c61a4794ed68a682&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibvP7z3oVibwSGFKiaAIjuYsl7W72WIhxVrZzyS7SA3p9icicdDxnngJk2GGAqUIfewBAPg5sNoZe4YLw/300?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_1_1": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibvP7z3oVibwSGFKiaAIjuYsl7W72WIhxVrZzyS7SA3p9icicdDxnngJk2GGAqUIfewBAPg5sNoZe4YLw/300?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_235_1": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibvP7z3oVibwSGFKiaAIjuYsl7W72WIhxVrZzyS7SA3p9icicdDxnngJk2GGAqUIfewBAPg5sNoZe4YLw/300?wxtype=png&wxfrom=0",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 491  赞 3  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibvP7z3oVibwSGFKiaAIjuYsl7W72WIhxVrZzyS7SA3p9icicdDxnngJk2GGAqUIfewBAPg5sNoZe4YLw/640?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_16_9_640": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibvP7z3oVibwSGFKiaAIjuYsl7W72WIhxVrZzyS7SA3p9icicdDxnngJk2GGAqUIfewBAPg5sNoZe4YLw/640?wxtype=png&wxfrom=0",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibvP7z3oVibwSGFKiaAIjuYsl7W72WIhxVrZzyS7SA3p9icicdDxnngJk2GGAqUIfewBAPg5sNoZe4YLw/300?wxtype=png&wxfrom=0",
+                                "width_hint": 1,
+                                "height_hint": 1
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1709256084,
+                            "finder_export_id": ""
+                        },
+                        {
+                            "Title": "武则天墓前61个无头石人",
+                            "Digest": "",
+                            "ItemIndex": 3,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247485242&idx=3&sn=981f376b7611a8500d41c13f83bead6a&chksm=c3118e39f466072fef4aa2e624a8a1bebcd51a99fb76a893358d587c8eb21b9fe572fc822cfc&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8X8FSQqnQYqfG5JHJKEMsHfNqgoOUITL8TG1aIqHj8Q8eDVEvjPYZblhpMB7iafqx3eDH87659Ay2QQ/300?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_1_1": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8X8FSQqnQYqfG5JHJKEMsHfNqgoOUITL8TG1aIqHj8Q8eDVEvjPYZblhpMB7iafqx3eDH87659Ay2QQ/300?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_235_1": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8X8FSQqnQYqfG5JHJKEMsHfNqgoOUITL8TG1aIqHj8Q8eDVEvjPYZblhpMB7iafqx3eDH87659Ay2QQ/300?wxtype=jpeg&wxfrom=0",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 456  赞 7  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8X8FSQqnQYqfG5JHJKEMsHfNqgoOUITL8TG1aIqHj8Q8eDVEvjPYZblhpMB7iafqx3eDH87659Ay2QQ/640?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_16_9_640": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8X8FSQqnQYqfG5JHJKEMsHfNqgoOUITL8TG1aIqHj8Q8eDVEvjPYZblhpMB7iafqx3eDH87659Ay2QQ/640?wxtype=jpeg&wxfrom=0",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8X8FSQqnQYqfG5JHJKEMsHfNqgoOUITL8TG1aIqHj8Q8eDVEvjPYZblhpMB7iafqx3eDH87659Ay2QQ/300?wxtype=jpeg&wxfrom=0",
+                                "width_hint": 1,
+                                "height_hint": 1
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1709256084,
+                            "finder_export_id": ""
+                        },
+                        {
+                            "Title": "惊!这四个姓氏的人可能不是“炎黄子孙”?!有你吗?",
+                            "Digest": "",
+                            "ItemIndex": 4,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247485242&idx=4&sn=e3c362142b685a63402c655a78fa2d1d&chksm=c3118e39f466072f8b1e27f602589c0a0de94ae4c1952761879c8627482f9afbadd87c9fe2e6&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibRicUib7OSI438DtNoibIBNOResdvdhUln0MiaRDz9VG4fibfNiaqkRqZYMYU4aTCIqn02w3vXrw8yfGTA/300?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_1_1": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibRicUib7OSI438DtNoibIBNOResdvdhUln0MiaRDz9VG4fibfNiaqkRqZYMYU4aTCIqn02w3vXrw8yfGTA/300?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_235_1": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibRicUib7OSI438DtNoibIBNOResdvdhUln0MiaRDz9VG4fibfNiaqkRqZYMYU4aTCIqn02w3vXrw8yfGTA/300?wxtype=png&wxfrom=0",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 624  赞 2  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibRicUib7OSI438DtNoibIBNOResdvdhUln0MiaRDz9VG4fibfNiaqkRqZYMYU4aTCIqn02w3vXrw8yfGTA/640?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_16_9_640": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibRicUib7OSI438DtNoibIBNOResdvdhUln0MiaRDz9VG4fibfNiaqkRqZYMYU4aTCIqn02w3vXrw8yfGTA/640?wxtype=png&wxfrom=0",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibRicUib7OSI438DtNoibIBNOResdvdhUln0MiaRDz9VG4fibfNiaqkRqZYMYU4aTCIqn02w3vXrw8yfGTA/300?wxtype=png&wxfrom=0",
+                                "width_hint": 1,
+                                "height_hint": 1
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1709256084,
+                            "finder_export_id": ""
+                        },
+                        {
+                            "Title": "我走了,你咋办?",
+                            "Digest": "",
+                            "ItemIndex": 5,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247485242&idx=5&sn=d7e5f60c08e9d0c24f3630d760bb7468&chksm=c3118e39f466072f257a3c176caf313f3cbf11ce3856d59e2756abac94f4bde489a17ba81892&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8XibNcZAWLMRSE5gBl6QrwVcfIXt2Kcjnh4IpXHx3l0vJMndP0Yq9miaTVF6tlUl1EZrgMJhupA8nksA/300?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_1_1": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8XibNcZAWLMRSE5gBl6QrwVcfIXt2Kcjnh4IpXHx3l0vJMndP0Yq9miaTVF6tlUl1EZrgMJhupA8nksA/300?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_235_1": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8XibNcZAWLMRSE5gBl6QrwVcfIXt2Kcjnh4IpXHx3l0vJMndP0Yq9miaTVF6tlUl1EZrgMJhupA8nksA/300?wxtype=jpeg&wxfrom=0",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 1442  赞 8  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8XibNcZAWLMRSE5gBl6QrwVcfIXt2Kcjnh4IpXHx3l0vJMndP0Yq9miaTVF6tlUl1EZrgMJhupA8nksA/640?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_16_9_640": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8XibNcZAWLMRSE5gBl6QrwVcfIXt2Kcjnh4IpXHx3l0vJMndP0Yq9miaTVF6tlUl1EZrgMJhupA8nksA/640?wxtype=jpeg&wxfrom=0",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8XibNcZAWLMRSE5gBl6QrwVcfIXt2Kcjnh4IpXHx3l0vJMndP0Yq9miaTVF6tlUl1EZrgMJhupA8nksA/300?wxtype=jpeg&wxfrom=0",
+                                "width_hint": 1,
+                                "height_hint": 1
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1709256084,
+                            "finder_export_id": ""
+                        },
+                        {
+                            "Title": "这3个姓氏的人都是他后裔",
+                            "Digest": "",
+                            "ItemIndex": 6,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247485242&idx=6&sn=7d5591ba3c4a3fa5e9e0429032030c98&chksm=c3118e39f466072f682c28ef220b5716607ad9db727e1a3329fe0a605fc7d67743ccce3580da&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibnbEIVjW9DpxOQDYwMgDIR39pekeAdNIkkQuGXSFyiakTVLIgZter0ibPPxdv5aCFMAATaZ8o1YZIg/300?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_1_1": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibnbEIVjW9DpxOQDYwMgDIR39pekeAdNIkkQuGXSFyiakTVLIgZter0ibPPxdv5aCFMAATaZ8o1YZIg/300?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_235_1": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibnbEIVjW9DpxOQDYwMgDIR39pekeAdNIkkQuGXSFyiakTVLIgZter0ibPPxdv5aCFMAATaZ8o1YZIg/300?wxtype=png&wxfrom=0",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 1099  赞 8  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibnbEIVjW9DpxOQDYwMgDIR39pekeAdNIkkQuGXSFyiakTVLIgZter0ibPPxdv5aCFMAATaZ8o1YZIg/640?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_16_9_640": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibnbEIVjW9DpxOQDYwMgDIR39pekeAdNIkkQuGXSFyiakTVLIgZter0ibPPxdv5aCFMAATaZ8o1YZIg/640?wxtype=png&wxfrom=0",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8XibnbEIVjW9DpxOQDYwMgDIR39pekeAdNIkkQuGXSFyiakTVLIgZter0ibPPxdv5aCFMAATaZ8o1YZIg/300?wxtype=png&wxfrom=0",
+                                "width_hint": 1,
+                                "height_hint": 1
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1709256084,
+                            "finder_export_id": ""
+                        },
+                        {
+                            "Title": "曹操说了10个字,司马懿说了9个字",
+                            "Digest": "",
+                            "ItemIndex": 7,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247485242&idx=7&sn=9c578610a63fd1e3bd8c75d6263dda3f&chksm=c3118e39f466072fc04fcf9dcc2e72e1eac3c47b9e11e076a6adc27b5a6df5d57beb5080681e&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8X8BiaWzfp60F8knxA3KkqSKRwibgEn2qYxvkc8K11e37InZAqUXfkqOyTAyWT7dqYibiaIfDwmkdDaK6Q/300?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_1_1": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8X8BiaWzfp60F8knxA3KkqSKRwibgEn2qYxvkc8K11e37InZAqUXfkqOyTAyWT7dqYibiaIfDwmkdDaK6Q/300?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_235_1": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8X8BiaWzfp60F8knxA3KkqSKRwibgEn2qYxvkc8K11e37InZAqUXfkqOyTAyWT7dqYibiaIfDwmkdDaK6Q/300?wxtype=png&wxfrom=0",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 1347  赞 5  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8X8BiaWzfp60F8knxA3KkqSKRwibgEn2qYxvkc8K11e37InZAqUXfkqOyTAyWT7dqYibiaIfDwmkdDaK6Q/640?wxtype=png&wxfrom=0",
+                            "CoverImgUrl_16_9_640": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8X8BiaWzfp60F8knxA3KkqSKRwibgEn2qYxvkc8K11e37InZAqUXfkqOyTAyWT7dqYibiaIfDwmkdDaK6Q/640?wxtype=png&wxfrom=0",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/mmbiz_png/iarplicwGO8X8BiaWzfp60F8knxA3KkqSKRwibgEn2qYxvkc8K11e37InZAqUXfkqOyTAyWT7dqYibiaIfDwmkdDaK6Q/300?wxtype=png&wxfrom=0",
+                                "width_hint": 1,
+                                "height_hint": 1
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1709256084,
+                            "finder_export_id": ""
+                        },
+                        {
+                            "Title": "嘉庆不听乾隆临终嘱咐,坚持要抄家和珅!",
+                            "Digest": "",
+                            "ItemIndex": 8,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247485242&idx=8&sn=fde5761068d3f9db04a8b85df34e4dec&chksm=c3118e39f466072f8236e997543e3b624a0c850818ba4f5dec8ae0ed5daffd232dc24d104636&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/mmbiz_jpg/DGknWGISzkPSmvv4mMFl35ZDibRlBeA6Ijpj38HnMn7O90RicvibXwhMfL5JXB8FVnHQTWDJraeKKRqHvpp8s7wQw/300?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_1_1": "https://mmbiz.qpic.cn/mmbiz_jpg/DGknWGISzkPSmvv4mMFl35ZDibRlBeA6Ijpj38HnMn7O90RicvibXwhMfL5JXB8FVnHQTWDJraeKKRqHvpp8s7wQw/300?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_235_1": "https://mmbiz.qpic.cn/mmbiz_jpg/DGknWGISzkPSmvv4mMFl35ZDibRlBeA6Ijpj38HnMn7O90RicvibXwhMfL5JXB8FVnHQTWDJraeKKRqHvpp8s7wQw/300?wxtype=jpeg&wxfrom=0",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 795  赞 7  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "https://mmbiz.qpic.cn/mmbiz_jpg/DGknWGISzkPSmvv4mMFl35ZDibRlBeA6Ijpj38HnMn7O90RicvibXwhMfL5JXB8FVnHQTWDJraeKKRqHvpp8s7wQw/640?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_16_9_640": "https://mmbiz.qpic.cn/mmbiz_jpg/DGknWGISzkPSmvv4mMFl35ZDibRlBeA6Ijpj38HnMn7O90RicvibXwhMfL5JXB8FVnHQTWDJraeKKRqHvpp8s7wQw/640?wxtype=jpeg&wxfrom=0",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/mmbiz_jpg/DGknWGISzkPSmvv4mMFl35ZDibRlBeA6Ijpj38HnMn7O90RicvibXwhMfL5JXB8FVnHQTWDJraeKKRqHvpp8s7wQw/300?wxtype=jpeg&wxfrom=0",
+                                "width_hint": 1,
+                                "height_hint": 1
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1709256084,
+                            "finder_export_id": ""
+                        }
+                    ]
+                }
+            },
+            {
+                "BaseInfo": {
+                    "MsgId": 1000000002,
+                    "MsgType": 49,
+                    "DateTime": 1708327900,
+                    "Status": 2,
+                    "FuncFlag": 71335938,
+                    "UniqueId": "1000000002",
+                    "NextOffset": 30
+                },
+                "AppMsg": {
+                    "BaseInfo": {
+                        "AppMsgId": 2247485087,
+                        "CreateTime": 1708327899,
+                        "UpdateTime": 1723280437,
+                        "Type": 9,
+                        "BigPic": 0
+                    },
+                    "DetailInfo": [
+                        {
+                            "Title": "三国十大军师排名,诸葛亮仅排第六",
+                            "Digest": "",
+                            "ItemIndex": 1,
+                            "ContentUrl": "http://mp.weixin.qq.com/s?__biz=Mzk0NTc0MTkzMg==&mid=2247485087&idx=1&sn=1e80b68a9352869ddd801bd33e7190fd&chksm=c2e198681d0e4d6f773e04c12a823d58d70264d0597e785e2b135dc29affeddeb2ca36c35e31&scene=126&sessionid=1679649075#rd",
+                            "SourceUrl": "",
+                            "CoverImgUrl": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8Xib19ZbsrXROxUcccOcNf3UQBicFZDcoVU78bOnahQaKdPtiaga8JV2amdS0IDJ4JOqjHaYWsoYk9FtQ/640?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_1_1": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8Xib19ZbsrXROxUcccOcNf3UQBicFZDcoVU78bOnahQaKdPtiaga8JV2amdS0IDJ4JOqjHaYWsoYk9FtQ/300?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_235_1": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8Xib19ZbsrXROxUcccOcNf3UQBicFZDcoVU78bOnahQaKdPtiaga8JV2amdS0IDJ4JOqjHaYWsoYk9FtQ/640?wxtype=jpeg&wxfrom=0",
+                            "ItemShowType": 0,
+                            "IsOriginal": 0,
+                            "ShowDesc": "阅读 293  赞 3  ",
+                            "CanReward": 0,
+                            "IsPaySubscribe": 0,
+                            "CoverImgUrl_16_9": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8Xib19ZbsrXROxUcccOcNf3UQBicFZDcoVU78bOnahQaKdPtiaga8JV2amdS0IDJ4JOqjHaYWsoYk9FtQ/640?wxtype=jpeg&wxfrom=0",
+                            "CoverImgUrl_16_9_640": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8Xib19ZbsrXROxUcccOcNf3UQBicFZDcoVU78bOnahQaKdPtiaga8JV2amdS0IDJ4JOqjHaYWsoYk9FtQ/640?wxtype=jpeg&wxfrom=0",
+                            "ori_content": "",
+                            "SuggestedCoverImg": {
+                                "url": "https://mmbiz.qpic.cn/mmbiz_jpg/iarplicwGO8Xib19ZbsrXROxUcccOcNf3UQBicFZDcoVU78bOnahQaKdPtiaga8JV2amdS0IDJ4JOqjHaYWsoYk9FtQ/640?wxtype=jpeg&wxfrom=0",
+                                "width_hint": 235,
+                                "height_hint": 100
+                            },
+                            "featured_info": {
+                                "status": 0
+                            },
+                            "send_time": 1708327912,
+                            "finder_export_id": ""
+                        }
+                    ]
+                }
+            }
+        ]
+    }
+}
+

+ 26 - 0
dev/spider.py

@@ -0,0 +1,26 @@
+"""
+@author: luojunhui
+"""
+import pandas as pd
+
+from tqdm import tqdm
+
+from applications import PQMySQL
+
+
+db = PQMySQL()
+file = pd.read_excel("mean_cache.xlsx")
+data_list = file.values.tolist()
+for line in tqdm(data_list):
+    account_name = line[0]
+    index = line[1]
+    avg_read = line[2]
+    sql = f"""
+    UPDATE long_articles_accounts
+    SET account_position_read_avg = %s
+    where account_name = %s and account_position = %s;
+    """
+    db.update(sql, params=(avg_read, account_name, index))
+    print("successful")
+
+

+ 60 - 0
dev/task_analysis.py

@@ -0,0 +1,60 @@
+"""
+@author: luojunhui
+"""
+import json
+import pandas as pd
+
+with open("AccountInfo.json", encoding="utf-8") as f:
+    account_base_info = json.loads(f.read())
+
+new_d = {}
+for key in account_base_info:
+    value = account_base_info[key]
+    name = value['accountName']
+    position = value['position']
+    read_avg = value['readAvg']
+    new_key = "{}_{}".format(name, position)
+    new_d[new_key] = read_avg
+
+
+df = pd.read_excel("非实验数据.xlsx")
+L = []
+a = 0
+b = 0
+for line in df.values.tolist():
+    dt = line[0]
+    account_name = line[1]
+    title = line[2]
+    link = line[3]
+    pos = line[4]
+    read = line[5]
+    avg_read = new_d.get("{}_{}".format(account_name, pos), 0)
+    if avg_read:
+        avg_times = read / avg_read
+        if avg_times > 1.3:
+            is_up = 1
+            a += 1
+        else:
+            is_up = 0
+            b += 1
+        strategy = line[-7]
+        # if "军事" in strategy or "历史" in strategy:
+        #     ac_t = "实验"
+        # else:
+        ac_t = "对照"
+        L.append([dt, account_name, title, link, pos, read, avg_read, avg_times, is_up, ac_t, strategy])
+        print([dt, account_name, title, link, pos, read, avg_read, avg_times, is_up, ac_t, strategy])
+
+print(b)
+print(a)
+print(a + b)
+print(a / (a + b))
+# out_df = pd.DataFrame(
+#     L,
+#     columns=['日期', '账号名称', '标题', '链接', '文章位置', '阅读量', '阅读均值', '阅读均值倍数', '是否晋升', '实验/对照', '冷启品类']
+# )
+#
+# out_df.to_excel("result.xlsx", index=False)
+
+
+

+ 1 - 0
spider/__init__.py

@@ -1,3 +1,4 @@
 """
 @author: luojunhui
 """
+from .weixinCategoryCrawler import weixinCategory

+ 52 - 0
spider/weixinAssociationCrawler.py

@@ -0,0 +1,52 @@
+"""
+@author: luojunhui
+微信联想抓取
+"""
+import json
+
+from tqdm import tqdm
+
+from applications import PQMySQL
+from applications.spiderTool import SpiderTools
+
+
+class weixinAssociation(object):
+    """
+    微信联想方法
+    """
+    pq_mysql_client = PQMySQL()
+    spider_tool = SpiderTools()
+
+    @classmethod
+    def getAssociationAccounts(cls):
+        """
+        获取已经联想过的账号
+        :return:
+        """
+        select_sql = f"""
+        SELECT distinct(gh_id)
+        FROM long_articles_accounts
+        where is_using = 1 and account_category = 'association';"""
+        account_id_tuple = cls.pq_mysql_client.select(select_sql)
+        account_id_list = [list(i) for i in account_id_tuple]
+        return account_id_list
+
+    @classmethod
+    def deal(cls):
+        """
+        main function
+        :return:
+        """
+        account_info_list = cls.getAssociationAccounts()
+        for line in tqdm(account_info_list):
+            gh_id = line[0]
+            cls.spider_tool.searchEachAccountArticlesSinglePage(
+                gh_id=gh_id,
+                category="association"
+            )
+
+
+
+
+w = weixinAssociation()
+w.deal()

+ 99 - 17
spider/weixinCategoryCrawler.py

@@ -2,10 +2,12 @@
 @author: luojunhui
 抓取全局品类文章
 """
-import json
+
 import time
 
-from applications import WeixinSpider
+from tqdm import tqdm
+
+from applications import WeixinSpider, Functions, DeNetMysql
 
 
 class weixinCategory(object):
@@ -13,9 +15,10 @@ class weixinCategory(object):
     微信全局品类账号抓取
     """
 
-    def __init__(self, spider_client):
-        self.spider_client = spider_client
+    def __init__(self):
+        self.spider_client = DeNetMysql()
         self.spider = WeixinSpider()
+        self.function = Functions()
 
     def getAccountList(self):
         """
@@ -32,30 +35,109 @@ class weixinCategory(object):
                 "platform": i[1],
                 "account_name": i[2],
                 "category": i[3],
-                "latest_timestamp": i[4]
-            } for i in account_tuple
+                "latest_timestamp": i[4],
+            }
+            for i in account_tuple
         ]
         return result
 
-    def update_data_into_mysql(self, msg_list):
+    def updateDataIntoMysql(self, gh_id, category, article_list):
         """
         将数据更新到数据库
         :return:
         """
-        for obj in msg_list['data']['data']:
-            print(json.dumps(obj, ensure_ascii=False, indent=4))
+        for article_obj in article_list:
+            detail_article_list = article_obj["AppMsg"]["DetailInfo"]
+            for obj in detail_article_list:
+                try:
+                    show_stat = self.function.show_desc_to_sta(obj["ShowDesc"])
+                    show_view_count = show_stat.get("show_view_count", 0)
+                    show_like_count = show_stat.get("show_like_count", 0)
+                    insert_sql = f"""
+                        insert into crawler_meta_article
+                        (platform, mode, category, out_account_id, article_index, title, link, read_cnt, like_cnt, description, publish_time, crawler_time, status, unique_index)
+                        VALUES 
+                        (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
+                    """
+                    self.spider_client.update(
+                        sql=insert_sql,
+                        params=(
+                            "weixin",
+                            "account",
+                            category,
+                            gh_id,
+                            obj['ItemIndex'],
+                            obj["Title"],
+                            obj["ContentUrl"],
+                            show_view_count,
+                            show_like_count,
+                            obj["Digest"],
+                            obj["send_time"],
+                            int(time.time()),
+                            1,
+                            self.function.generateGzhId(obj["ContentUrl"]),
+                        ),
+                    )
+                except Exception as e:
+                    print(e)
+
+    def updateLatestAccountTimeStamp(self, gh_id):
+        """
+        更新账号的最新时间戳
+        :return:
+        """
+        select_sql = f"""
+        SELECT publish_time 
+        From crawler_meta_article 
+        WHERE out_account_id = '{gh_id}'
+        ORDER BY publish_time DESC LIMIT 1;
+        """
+        result = self.spider_client.select(select_sql)
+        time_stamp = result[0][0]
+        update_sql = f"""
+            update long_article_accounts_outside
+            set latest_article_timestamp = %s
+            where account_id = %s;
+        """
+        self.spider_client.update(sql=update_sql, params=(time_stamp, gh_id))
 
-    def updateEachAccountArticles(self, gh_id, latest_time_stamp):
+    def updateEachAccountArticles(self, gh_id, category, latest_time_stamp, index=None):
         """
         更新账号文章
         :return:
         """
-        index = None
-        msg_list = self.spider.update_msg_list(ghId=gh_id, index=index)
-        latest_time_stamp_in_this_msg = msg_list['data']['data'][-1]['BaseInfo']
-        print(latest_time_stamp_in_this_msg)
+        response = self.spider.update_msg_list(ghId=gh_id, index=index)
+        msg_list = response.get("data", {}).get("data")
+        if msg_list:
+            last_article_in_this_msg = msg_list[-1]
+            self.updateDataIntoMysql(
+                gh_id=gh_id, category=category, article_list=msg_list
+            )
+            last_time_stamp_in_this_msg = last_article_in_this_msg["AppMsg"]["BaseInfo"]["UpdateTime"]
+            if latest_time_stamp < last_time_stamp_in_this_msg:
+                next_cursor = response["data"]["next_cursor"]
+                return self.updateEachAccountArticles(
+                    gh_id=gh_id,
+                    latest_time_stamp=latest_time_stamp,
+                    category=category,
+                    index=next_cursor,
+                )
+            else:
+                # 更新最近抓取时间
+                self.updateLatestAccountTimeStamp(gh_id=gh_id)
+        else:
+            print("No more data")
 
 
-if __name__ == '__main__':
-    wc = weixinCategory(spider_client="123")
-    wc.updateEachAccountArticles("gh_ddafea4bcc29", latest_time_stamp=1)
+if __name__ == "__main__":
+    wxCategory = weixinCategory()
+    account_list = wxCategory.getAccountList()
+    for account in tqdm(account_list):
+        try:
+            wxCategory.updateEachAccountArticles(
+                gh_id=account["gh_id"],
+                category=account["category"],
+                latest_time_stamp=account["latest_timestamp"],
+            )
+        except Exception as e:
+            print("fail because of {}".format(e))

+ 227 - 0
spider/weixinRelativeAccountCrawler.py

@@ -0,0 +1,227 @@
+"""
+@author: luojunhui
+获取微信相关账号文章下载
+"""
+
+import datetime
+import json
+import time
+
+from pandas import DataFrame
+
+from applications import PQMySQL, WeixinSpider, AlgApi, Functions
+from config import accountBaseInfo
+
+
+class weixinRelationAccountGoodArticles(object):
+    """
+    优质账号抓取
+    """
+
+    pq_mysql_client = PQMySQL()
+    wx_spider = WeixinSpider()
+    function = Functions()
+    spider_client = PQMySQL()
+    nlp = AlgApi()
+
+    @classmethod
+    def findInnerAccount(cls):
+        """
+        找出站内的账号
+        :return:
+        """
+        id_set = set()
+        for key in accountBaseInfo:
+            gh_id = key[:-2]
+            id_set.add(gh_id)
+        return list(id_set)
+
+    @classmethod
+    def initAccount(cls, gh_id, account_name):
+        """
+        初始化账号
+        :param gh_id:
+        :param account_name:
+        :return:
+        """
+        for index in [i for i in range(1, 9)]:
+            insert_sql = f"""
+                INSERT INTO long_articles_accounts
+                (gh_id, account_source, account_name, account_position, account_category, whether_inner_account, is_using)
+                values 
+                (%s, %s, %s, %s, %s, %s, %s);
+            """
+            try:
+                cls.pq_mysql_client.update(
+                    sql=insert_sql,
+                    params=(gh_id, "weixin", account_name, index, "association", 0, 1),
+                )
+            except Exception as e:
+                print(e)
+        print("账号初始化完成")
+
+    @classmethod
+    def putIntoAssociationGraph(cls, gh_id, account_name, source_title, source_account):
+        """
+        将账号加入到联想表中
+        :param gh_id: 联想账号id
+        :param account_name: 联想账号名称
+        :param source_title: 源标题
+        :param source_account: 源账号
+        :return:
+        """
+        insert_sql = f"""
+            INSERT INTO long_articles_assiciation_accounts
+            (account_outside_id, accout_name, source_article_title, source_account, association_time, is_using)
+            values 
+            (%s, %s, %s, %s, %s, %s);
+        """
+        try:
+            cls.pq_mysql_client.update(
+                sql=insert_sql,
+                params=(
+                    gh_id,
+                    account_name,
+                    source_title,
+                    source_account,
+                    datetime.datetime.now().__str__(),
+                    1,
+                ),
+            )
+        except Exception as e:
+            print(e)
+
+    @classmethod
+    def getEachAccountArticle(cls, account_id):
+        """
+        获取每个账号的好文章
+        :return:
+        """
+        select_sql = f"""
+                SELECT title, Type, updateTime, ItemIndex, show_view_count
+                FROM official_articles_v2
+                WHERE ghId = '{account_id}';
+            """
+        result = cls.pq_mysql_client.select(select_sql)
+        return DataFrame(
+            result,
+            columns=["title", "Type", "updateTime", "ItemIndex", "show_view_count"],
+        )
+
+    @classmethod
+    def filterGoodArticle(cls, article_data_frame):
+        """
+        获取好的文章
+        :param article_data_frame:
+        :return:
+        """
+        avg_view = article_data_frame["show_view_count"].mean()
+        good_articles = article_data_frame[
+            (article_data_frame["show_view_count"]) > avg_view * 1.1
+        ]
+        return good_articles["title"].values.tolist()
+
+    @classmethod
+    def searchGoodArticlesAccounts(cls, source_account, source_title, base_score=None):
+        """
+        通过标题搜索文章
+        :return:
+        """
+        response = cls.wx_spider.search_articles(source_title)
+        article_list = response["data"]["data"]
+        if article_list:
+            title_list = [i["title"] for i in article_list]
+            title_score_list = cls.nlp.getScoreList(
+                accountName=source_account, title_list=title_list
+            )[source_account]["score_list"]
+            account_list = []
+            for index, score in enumerate(title_score_list):
+                # if score > base_score:
+                article_obj = article_list[index]
+                account_info = cls.wx_spider.get_account_by_url(
+                    content_url=article_obj["url"]
+                )
+                obj = [article_obj["title"], account_info]
+                account_list.append(obj)
+            return account_list
+        else:
+            return []
+
+    @classmethod
+    def insertIntoDataBase(cls, gh_id, article_list):
+        """
+        将数据插入数据库
+        :return:
+        """
+        for article_obj in article_list:
+            detail_article_list = article_obj["AppMsg"]["DetailInfo"]
+            for obj in detail_article_list:
+                try:
+                    show_stat = cls.function.show_desc_to_sta(obj["ShowDesc"])
+                    show_view_count = show_stat.get("show_view_count", 0)
+                    show_like_count = show_stat.get("show_like_count", 0)
+                    insert_sql = f"""
+                                insert into crawler_meta_article
+                                (platform, mode, category, out_account_id, title, link, read_cnt, like_cnt, description, publish_time, crawler_time, status, unique_index)
+                                VALUES 
+                                (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
+                            """
+                    cls.spider_client.update(
+                        sql=insert_sql,
+                        params=(
+                            "weixin",
+                            "account",
+                            "association",
+                            gh_id,
+                            obj["Title"],
+                            obj["ContentUrl"],
+                            show_view_count,
+                            show_like_count,
+                            obj["Digest"],
+                            obj["send_time"],
+                            int(time.time()),
+                            1,
+                            cls.function.generateGzhId(obj["ContentUrl"]),
+                        ),
+                    )
+                except Exception as e:
+                    print(e)
+
+    @classmethod
+    def searchResultFilter(cls, filter_type, info_list):
+        """
+        搜索结果过滤
+        :param info_list: 待过滤的数据list
+        :param filter_type: 过滤类型,account表示账号过滤, article表示文章过滤
+        :return: 过滤后的结果
+        """
+        # if filter_type == 'account':
+        #     for item in info_list:
+
+
+if __name__ == "__main__":
+    weixin = weixinRelationAccountGoodArticles()
+    # 获取内部账号
+    inner_account_list = weixin.findInnerAccount()
+    for source_account in inner_account_list[:1]:
+        accountArticlesDataFrame = weixin.getEachAccountArticle(
+            account_id=source_account
+        )
+        goodArticles = weixin.filterGoodArticle(accountArticlesDataFrame)
+        for title in goodArticles[:1]:
+            account_list = weixin.searchGoodArticlesAccounts(
+                source_account=source_account, source_title=title
+            )
+            for associated_account in account_list:
+                source_title = associated_account[0]
+                associated_account_info = associated_account[1]
+                account_name = associated_account_info["data"]["data"]["account_name"]
+                gh_id = associated_account_info["data"]["data"]["wx_gh"]
+                # 初始化账号
+                weixin.init_account(gh_id=gh_id, account_name=account_name)
+                weixin.putIntoAssociationGraph(
+                    gh_id=gh_id,
+                    account_name=account_name,
+                    source_account=source_account,
+                    source_title=source_title,
+                )

+ 3 - 7
stratrgy/distribution.py

@@ -81,12 +81,6 @@ class ArticleDistribution(object):
     def association_split(cls, article_list):
         """
         联想类型文章分发逻辑
-        {
-        'url': 'http://mp.weixin.qq.com/s?__biz=MzkxOTUzMTYwNg==&mid=2247490482&idx=1&sn=14553e013cbc15b0448332935f7835df&chksm=c06ad7c3e89bcaf88fda88eaafd66295fc82d1ecea66ab3fd5df5393932a01ad85d11565f9f6&scene=126&sessionid=1679649075#rd',
-        'title': '不和中国合作了?俄罗斯穾然宣布:取消1.7万亿合作项目,绝不接受中国技术',
-        'url_md5': '4dd9ed803305a4ca53139443ec311b27',
-        'id': 'http://mp.weixin.qq.com/s?__biz=MzkxOTUzMTYwNg==&mid=2247490482&idx=1&sn=14553e013cbc15b0448332935f7835df&chksm=c06ad7c3e89bcaf88fda88eaafd66295fc82d1ecea66ab3fd5df5393932a01ad85d11565f9f6&scene=126&sessionid=1679649075#rd'
-        }
         :param article_list:
         :return:
         """
@@ -96,6 +90,7 @@ class ArticleDistribution(object):
             link = article['url']
             url_md5 = article['url_md5']
             title = article['title']
+            c_id = article['id']
             title_match_list = cls.findArticleScoreList(url_md5)
             title_match_list = sorted(title_match_list, key=lambda x: x[0], reverse=True)
             # print("标题:\t", title)
@@ -107,7 +102,8 @@ class ArticleDistribution(object):
                 account_gh_id = account_name_map[account_name]
                 if cls.account_position_dict.get(account_gh_id):
                     try:
-                        channel_content_id = cls.Spider.get_article_text(link)['data']['data']['channel_content_id']
+                        # channel_content_id = cls.Spider.get_article_text(link)['data']['data']['channel_content_id']
+                        channel_content_id = c_id
                     except:
                         print(link)
                         channel_content_id = url_md5

+ 41 - 49
tasks/task2.py

@@ -21,7 +21,7 @@ class ColdStartTask(object):
     Fun = Functions()
     Spider = WeixinSpider()
     D = ArticleDistribution()
-    pool3 = "autoArticlePoolLevel3"
+    pool3 = "autoArticlePoolLevel1"
 
     @classmethod
     def generate_account_dict(cls):
@@ -68,15 +68,7 @@ class ColdStartTask(object):
         获取高分享的文章list
         :return:
         """
-        sql = f"""
-            select content_channel_id, content_link, title 
-            from cold_start_article_pool
-            where category = '{category}' and status = 1
-            order by view_count DESC, publish_time_stamp DESC
-            limit {limit_count};
-        """
-        result = cls.PqMysql.select(sql)
-        return result
+
 
     @classmethod
     def splitCategoryToAccount(cls, cate_list):
@@ -161,16 +153,16 @@ class ColdStartTask(object):
         for account in tqdm(account_article_dict):
             date_str = datetime.datetime.today().strftime("%Y-%m-%d")
             print(account, date_str, json.dumps(account_article_dict[account], ensure_ascii=False))
-            # insert_sql = f"""
-            # INSERT INTO article_pre_distribute_account
-            # (gh_id, date, article_list)
-            # VALUES
-            # (%s, %s, %s);
-            # """
-            # try:
-            #     PQMySQL.update(sql=insert_sql, params=(account, date_str, json.dumps(account_article_dict[account], ensure_ascii=False)))
-            # except Exception as e:
-            #     print("插入出现问题----{}".format(e))
+            insert_sql = f"""
+            INSERT INTO article_pre_distribute_account
+            (gh_id, date, article_list)
+            VALUES
+            (%s, %s, %s);
+            """
+            try:
+                PQMySQL.update(sql=insert_sql, params=(account, date_str, json.dumps(account_article_dict[account], ensure_ascii=False)))
+            except Exception as e:
+                print("插入出现问题----{}".format(e))
 
         print("成功更新完成")
 
@@ -233,7 +225,7 @@ class ColdStartTask(object):
             from association_articles
             where status = 1 and content_length > 500
             order by publish_timestamp
-            DESC limit 10000;
+            DESC limit 10000 offset 10000;
         """
         temp_list = cls.PqMysql.select(sql)
         id_tuple = tuple([i[0] for i in temp_list])
@@ -265,8 +257,8 @@ class ColdStartTask(object):
                     "url": i[3],
                     "title": i[2],
                     "url_md5": i[5],
-                    "id": i[3]
-                    # "id": cls.Spider.get_article_text(i[3])['data']['data']['channel_content_id']
+                    # "id": i[3]
+                    "id": cls.Spider.get_article_text(i[3])['data']['data']['channel_content_id']
                 }
             except:
                 o = {
@@ -285,34 +277,34 @@ class ColdStartTask(object):
         :return:
         """
         # 获取6个品类的数据
-        association_list = cls.findAssociationArticlesDaily()
-        cls.D.association_split(association_list)
-        # category_list = cls.findCategoryArticlesDaily()
+        # association_list = cls.findAssociationArticlesDaily()
+        # cls.D.association_split(association_list)
+        category_list = cls.findCategoryArticlesDaily()
         # d_list = category_list + association_list
         # # # 预分配账号
-        # cls.splitCategoryToAccount(association_list)
+        cls.splitCategoryToAccount(category_list)
         # #
-        # try:
-        #     army = [i for i in category_list if i['cate'] == '军事政法']
-        #     cls.AidApi.updateArticleIntoCrawlerPlan(
-        #         plan_id=plan_id,
-        #         plan_name="军事政法类冷启",
-        #         plan_tag=cls.pool3,
-        #         url_list=[i['url'] for i in army]
-        #     )
-        # except Exception as e:
-        #     print("error--{}".format(e))
-        #
-        # try:
-        #     history = [i for i in category_list if i['cate'] == '宗教历史']
-        #     cls.AidApi.updateArticleIntoCrawlerPlan(
-        #         plan_id=plan_id,
-        #         plan_name="宗教历史类冷启",
-        #         plan_tag=cls.pool3,
-        #         url_list=[i['url'] for i in history]
-        #     )
-        # except Exception as e:
-        #     print("error--{}".format(e))
+        try:
+            army = [i for i in category_list if i['cate'] == '军事政法']
+            cls.AidApi.updateArticleIntoCrawlerPlan(
+                plan_id=plan_id,
+                plan_name="军事政法类冷启",
+                plan_tag=cls.pool3,
+                url_list=[i['url'] for i in army]
+            )
+        except Exception as e:
+            print("error--{}".format(e))
+
+        try:
+            history = [i for i in category_list if i['cate'] == '宗教历史']
+            cls.AidApi.updateArticleIntoCrawlerPlan(
+                plan_id=plan_id,
+                plan_name="宗教历史类冷启",
+                plan_tag=cls.pool3,
+                url_list=[i['url'] for i in history]
+            )
+        except Exception as e:
+            print("error--{}".format(e))
         # #
         # # try:
         # #     news = [i for i in category_list if i['cate'] == '新闻媒体']
@@ -360,7 +352,7 @@ class ColdStartTask(object):
         # #
         # cls.AidApi.updateArticleIntoCrawlerPlan(
         #     plan_id=plan_id,
-        #     plan_name="文章账号联想冷启",
+        #     plan_name="文章账号联想冷启--0805",
         #     plan_tag=cls.pool3,
         #     url_list=[i['url'] for i in association_list]
         # )

+ 83 - 0
tasks/task6.py

@@ -0,0 +1,83 @@
+"""
+@author: luojunhui
+计算账号的阅读均值倍数
+"""
+import json
+
+from pandas import DataFrame
+
+from applications import DeNetMysql
+from applications import AIDTApi
+
+D = DeNetMysql()
+
+
+def get_accounts():
+    """
+    获取账号
+    :return:
+    """
+    sql = f"""select account_id from long_article_accounts_outside where category = '军事政法';"""
+    account_list = D.select(sql)
+    account_list_ = []
+    for account in account_list:
+        account_list_.append(account[0])
+    return account_list_
+
+
+def get_account_avg():
+    """
+    获取账号
+    :return:
+    """
+    with open("avg.json", encoding="utf-8") as f:
+        avg_dict = json.loads(f.read())
+
+    account_list = get_accounts()
+    L = []
+    for account in account_list:
+        select_sql = f"""
+        select title, read_cnt, link from crawler_meta_article
+        where out_account_id = '{account}';
+        """
+        result_list = D.select(select_sql)
+        try:
+            avg_read = avg_dict[account]
+            for i in result_list:
+                title, read_cnt, link = i
+                avg_score = read_cnt / avg_read
+                temp = [title, link, read_cnt, avg_score]
+                L.append(temp)
+        except:
+            continue
+
+    sl = sorted(L, reverse=True, key=lambda x: x[3])
+    a = 0
+    b = 0
+    LL = []
+    for line in sl:
+        title = line[0]
+        read_cnt = line[2]
+        if "农历" in title or "节" in title or line[3] < 1.3 or len(title) < 15 or read_cnt < 1000:
+            a += 1
+            continue
+        else:
+            b += 1
+            LL.append(line)
+    # print(a)
+    # print(b)
+    # df = DataFrame(LL, columns=["title", "link", "read", "read_avg"])
+    # df.to_excel("test.xlsx", index=False)
+    # url_list = [i[1] for i in LL]
+    # try:
+    #     AIDTApi().updateArticleIntoCrawlerPlan(
+    #         plan_id=None,
+    #         plan_name="军事政法类冷启-0805-new",
+    #         plan_tag="autoArticlePoolLevel1",
+    #         url_list=url_list
+    #     )
+    # except Exception as e:
+    #     print("error--{}".format(e))
+
+
+get_account_avg()

+ 23 - 0
tasks/task6_dev.py

@@ -0,0 +1,23 @@
+"""
+@author: luojunhui
+"""
+import json
+from applications import DeNetMysql
+
+D = DeNetMysql()
+
+with open("avg.json", encoding="utf-8") as f:
+    data = json.loads(f.read())
+
+for key in data:
+    print(key)
+    print(data[key])
+    sql = f"""
+    UPDATE long_article_accounts_outside
+    SET read_avg = %s 
+    where account_id = %s;
+    """
+    D.update(
+        sql=sql,
+        params=(data[key], key)
+    )