|  | @@ -12,7 +12,8 @@ from tqdm import tqdm
 | 
											
												
													
														|  |  from datetime import datetime
 |  |  from datetime import datetime
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  from config import accountBaseInfo
 |  |  from config import accountBaseInfo
 | 
											
												
													
														|  | -from applications import PQMySQL, WeixinSpider, Functions
 |  | 
 | 
											
												
													
														|  | 
 |  | +from applications import PQMySQL
 | 
											
												
													
														|  | 
 |  | +from tasks.task4 import update_articles
 | 
											
												
													
														|  |  from applications.decoratorApi import retryOnTimeout
 |  |  from applications.decoratorApi import retryOnTimeout
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |  
 |  |  
 | 
											
										
											
												
													
														|  | @@ -22,8 +23,6 @@ class UpdateMsgDaily(object):
 | 
											
												
													
														|  |      """
 |  |      """
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |      db_client = PQMySQL()
 |  |      db_client = PQMySQL()
 | 
											
												
													
														|  | -    spider = WeixinSpider()
 |  | 
 | 
											
												
													
														|  | -    functions = Functions()
 |  | 
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |      @classmethod
 |  |      @classmethod
 | 
											
												
													
														|  |      def getAccountIdDict(cls):
 |  |      def getAccountIdDict(cls):
 | 
											
										
											
												
													
														|  | @@ -72,189 +71,6 @@ class UpdateMsgDaily(object):
 | 
											
												
													
														|  |          }
 |  |          }
 | 
											
												
													
														|  |          requests.request("POST", url=url, headers=headers, data=json.dumps(payload), timeout=10)
 |  |          requests.request("POST", url=url, headers=headers, data=json.dumps(payload), timeout=10)
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | -    @classmethod
 |  | 
 | 
											
												
													
														|  | -    def findAccountLatestUpdateTime(cls, gh_id):
 |  | 
 | 
											
												
													
														|  | -        """
 |  | 
 | 
											
												
													
														|  | -        获取账号的最近更新id
 |  | 
 | 
											
												
													
														|  | -        :param gh_id:
 |  | 
 | 
											
												
													
														|  | -        :return:
 |  | 
 | 
											
												
													
														|  | -        """
 |  | 
 | 
											
												
													
														|  | -        sql = f"""
 |  | 
 | 
											
												
													
														|  | -            select accountName, updateTime 
 |  | 
 | 
											
												
													
														|  | -            from official_articles_v2 
 |  | 
 | 
											
												
													
														|  | -            where ghId = '{gh_id}' 
 |  | 
 | 
											
												
													
														|  | -            order by updateTime DESC;
 |  | 
 | 
											
												
													
														|  | -            """
 |  | 
 | 
											
												
													
														|  | -        result = cls.db_client.select(sql)
 |  | 
 | 
											
												
													
														|  | -        if result:
 |  | 
 | 
											
												
													
														|  | -            account_name, update_time = result[0]
 |  | 
 | 
											
												
													
														|  | -            return {"update_time": update_time, "account_type": "history"}
 |  | 
 | 
											
												
													
														|  | -        else:
 |  | 
 | 
											
												
													
														|  | -            return {
 |  | 
 | 
											
												
													
														|  | -                "update_time": int(time.time()) - 30 * 24 * 60 * 60,
 |  | 
 | 
											
												
													
														|  | -                "account_type": "new",
 |  | 
 | 
											
												
													
														|  | -            }
 |  | 
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  | -    @classmethod
 |  | 
 | 
											
												
													
														|  | -    def updateMsgList(cls, gh_id, account_name, msg_list):
 |  | 
 | 
											
												
													
														|  | -        """
 |  | 
 | 
											
												
													
														|  | -        把消息数据更新到数据库中
 |  | 
 | 
											
												
													
														|  | -        :param account_name:
 |  | 
 | 
											
												
													
														|  | -        :param gh_id:
 |  | 
 | 
											
												
													
														|  | -        :param msg_list:
 |  | 
 | 
											
												
													
														|  | -        :return:
 |  | 
 | 
											
												
													
														|  | -        """
 |  | 
 | 
											
												
													
														|  | -        for info in msg_list:
 |  | 
 | 
											
												
													
														|  | -            baseInfo = info.get("BaseInfo", {})
 |  | 
 | 
											
												
													
														|  | -            appMsgId = info.get("AppMsg", {}).get("BaseInfo", {}).get("AppMsgId", None)
 |  | 
 | 
											
												
													
														|  | -            createTime = (
 |  | 
 | 
											
												
													
														|  | -                info.get("AppMsg", {}).get("BaseInfo", {}).get("CreateTime", None)
 |  | 
 | 
											
												
													
														|  | -            )
 |  | 
 | 
											
												
													
														|  | -            updateTime = (
 |  | 
 | 
											
												
													
														|  | -                info.get("AppMsg", {}).get("BaseInfo", {}).get("UpdateTime", None)
 |  | 
 | 
											
												
													
														|  | -            )
 |  | 
 | 
											
												
													
														|  | -            Type = info.get("AppMsg", {}).get("BaseInfo", {}).get("Type", None)
 |  | 
 | 
											
												
													
														|  | -            detail_article_list = info.get("AppMsg", {}).get("DetailInfo", [])
 |  | 
 | 
											
												
													
														|  | -            if detail_article_list:
 |  | 
 | 
											
												
													
														|  | -                for article in detail_article_list:
 |  | 
 | 
											
												
													
														|  | -                    title = article.get("Title", None)
 |  | 
 | 
											
												
													
														|  | -                    Digest = article.get("Digest", None)
 |  | 
 | 
											
												
													
														|  | -                    ItemIndex = article.get("ItemIndex", None)
 |  | 
 | 
											
												
													
														|  | -                    ContentUrl = article.get("ContentUrl", None)
 |  | 
 | 
											
												
													
														|  | -                    SourceUrl = article.get("SourceUrl", None)
 |  | 
 | 
											
												
													
														|  | -                    CoverImgUrl = article.get("CoverImgUrl", None)
 |  | 
 | 
											
												
													
														|  | -                    CoverImgUrl_1_1 = article.get("CoverImgUrl_1_1", None)
 |  | 
 | 
											
												
													
														|  | -                    CoverImgUrl_235_1 = article.get("CoverImgUrl_235_1", None)
 |  | 
 | 
											
												
													
														|  | -                    ItemShowType = article.get("ItemShowType", None)
 |  | 
 | 
											
												
													
														|  | -                    IsOriginal = article.get("IsOriginal", None)
 |  | 
 | 
											
												
													
														|  | -                    ShowDesc = article.get("ShowDesc", None)
 |  | 
 | 
											
												
													
														|  | -                    show_stat = cls.functions.show_desc_to_sta(ShowDesc)
 |  | 
 | 
											
												
													
														|  | -                    ori_content = article.get("ori_content", None)
 |  | 
 | 
											
												
													
														|  | -                    show_view_count = show_stat.get("show_view_count", 0)
 |  | 
 | 
											
												
													
														|  | -                    show_like_count = show_stat.get("show_like_count", 0)
 |  | 
 | 
											
												
													
														|  | -                    show_zs_count = show_stat.get("show_zs_count", 0)
 |  | 
 | 
											
												
													
														|  | -                    show_pay_count = show_stat.get("show_pay_count", 0)
 |  | 
 | 
											
												
													
														|  | -                    wx_sn = (
 |  | 
 | 
											
												
													
														|  | -                        ContentUrl.split("&sn=")[1].split("&")[0]
 |  | 
 | 
											
												
													
														|  | -                        if ContentUrl
 |  | 
 | 
											
												
													
														|  | -                        else None
 |  | 
 | 
											
												
													
														|  | -                    )
 |  | 
 | 
											
												
													
														|  | -                    info_tuple = (
 |  | 
 | 
											
												
													
														|  | -                        gh_id,
 |  | 
 | 
											
												
													
														|  | -                        account_name,
 |  | 
 | 
											
												
													
														|  | -                        appMsgId,
 |  | 
 | 
											
												
													
														|  | -                        title,
 |  | 
 | 
											
												
													
														|  | -                        Type,
 |  | 
 | 
											
												
													
														|  | -                        createTime,
 |  | 
 | 
											
												
													
														|  | -                        updateTime,
 |  | 
 | 
											
												
													
														|  | -                        Digest,
 |  | 
 | 
											
												
													
														|  | -                        ItemIndex,
 |  | 
 | 
											
												
													
														|  | -                        ContentUrl,
 |  | 
 | 
											
												
													
														|  | -                        SourceUrl,
 |  | 
 | 
											
												
													
														|  | -                        CoverImgUrl,
 |  | 
 | 
											
												
													
														|  | -                        CoverImgUrl_1_1,
 |  | 
 | 
											
												
													
														|  | -                        CoverImgUrl_235_1,
 |  | 
 | 
											
												
													
														|  | -                        ItemShowType,
 |  | 
 | 
											
												
													
														|  | -                        IsOriginal,
 |  | 
 | 
											
												
													
														|  | -                        ShowDesc,
 |  | 
 | 
											
												
													
														|  | -                        ori_content,
 |  | 
 | 
											
												
													
														|  | -                        show_view_count,
 |  | 
 | 
											
												
													
														|  | -                        show_like_count,
 |  | 
 | 
											
												
													
														|  | -                        show_zs_count,
 |  | 
 | 
											
												
													
														|  | -                        show_pay_count,
 |  | 
 | 
											
												
													
														|  | -                        wx_sn,
 |  | 
 | 
											
												
													
														|  | -                        json.dumps(baseInfo, ensure_ascii=False),
 |  | 
 | 
											
												
													
														|  | -                    )
 |  | 
 | 
											
												
													
														|  | -                    try:
 |  | 
 | 
											
												
													
														|  | -                        insert_sql = f"""
 |  | 
 | 
											
												
													
														|  | -                                    INSERT INTO official_articles_v2
 |  | 
 | 
											
												
													
														|  | -                                    (ghId, accountName, appMsgId, title, Type, createTime, updateTime, Digest, ItemIndex, ContentUrl, SourceUrl, CoverImgUrl, CoverImgUrl_1_1, CoverImgUrl_255_1, ItemShowType, IsOriginal, ShowDesc, ori_content, show_view_count, show_like_count, show_zs_count, show_pay_count, wx_sn, baseInfo)
 |  | 
 | 
											
												
													
														|  | -                                    values
 |  | 
 | 
											
												
													
														|  | -                                    (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
 |  | 
 | 
											
												
													
														|  | -                                    """
 |  | 
 | 
											
												
													
														|  | -                        cls.db_client.update(sql=insert_sql, params=info_tuple)
 |  | 
 | 
											
												
													
														|  | -                    except Exception as e:
 |  | 
 | 
											
												
													
														|  | -                        try:
 |  | 
 | 
											
												
													
														|  | -                            update_sql = f"""
 |  | 
 | 
											
												
													
														|  | -                                    UPDATE official_articles_v2
 |  | 
 | 
											
												
													
														|  | -                                    SET show_view_count = %s, show_like_count=%s
 |  | 
 | 
											
												
													
														|  | -                                    WHERE wx_sn = %s;
 |  | 
 | 
											
												
													
														|  | -                                    """
 |  | 
 | 
											
												
													
														|  | -                            cls.db_client.update(
 |  | 
 | 
											
												
													
														|  | -                                sql=update_sql,
 |  | 
 | 
											
												
													
														|  | -                                params=(show_view_count, show_like_count, wx_sn),
 |  | 
 | 
											
												
													
														|  | -                            )
 |  | 
 | 
											
												
													
														|  | -                        except Exception as e:
 |  | 
 | 
											
												
													
														|  | -                            print("失败-{}".format(e))
 |  | 
 | 
											
												
													
														|  | -                            continue
 |  | 
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  | -    @classmethod
 |  | 
 | 
											
												
													
														|  | -    def getAccountArticleList(cls, gh_id, last_update_time, cursor=None):
 |  | 
 | 
											
												
													
														|  | -        """
 |  | 
 | 
											
												
													
														|  | -        输入ghid获取账号的文章list
 |  | 
 | 
											
												
													
														|  | -        :return:
 |  | 
 | 
											
												
													
														|  | -        """
 |  | 
 | 
											
												
													
														|  | -        try:
 |  | 
 | 
											
												
													
														|  | -            response = cls.spider.update_msg_list(ghId=gh_id, index=cursor)
 |  | 
 | 
											
												
													
														|  | -        except Exception as e:
 |  | 
 | 
											
												
													
														|  | -            response = {
 |  | 
 | 
											
												
													
														|  | -                "error": str(e),
 |  | 
 | 
											
												
													
														|  | -                "info": "更新文章接口请求失败",
 |  | 
 | 
											
												
													
														|  | -                "gh_id": gh_id,
 |  | 
 | 
											
												
													
														|  | -                "time": datetime.now().__str__()
 |  | 
 | 
											
												
													
														|  | -            }
 |  | 
 | 
											
												
													
														|  | -            # 之后可以考虑抛出阿里云日志
 |  | 
 | 
											
												
													
														|  | -            print(response)
 |  | 
 | 
											
												
													
														|  | -            return
 |  | 
 | 
											
												
													
														|  | -        msg_list = response.get("data", {}).get("data")
 |  | 
 | 
											
												
													
														|  | -        if msg_list:
 |  | 
 | 
											
												
													
														|  | -            last_article_in_this_msg = msg_list[-1]
 |  | 
 | 
											
												
													
														|  | -            last_time_stamp_in_this_msg = last_article_in_this_msg["AppMsg"][
 |  | 
 | 
											
												
													
														|  | -                "BaseInfo"
 |  | 
 | 
											
												
													
														|  | -            ]["UpdateTime"]
 |  | 
 | 
											
												
													
														|  | -            last_url = last_article_in_this_msg["AppMsg"]["DetailInfo"][0]["ContentUrl"]
 |  | 
 | 
											
												
													
														|  | -            # 校验是否抓到的是同一个账号
 |  | 
 | 
											
												
													
														|  | -            try:
 |  | 
 | 
											
												
													
														|  | -                resdata = cls.spider.get_account_by_url(last_url)
 |  | 
 | 
											
												
													
														|  | -            except Exception as e:
 |  | 
 | 
											
												
													
														|  | -                resdata = {
 |  | 
 | 
											
												
													
														|  | -                    "error": str(e),
 |  | 
 | 
											
												
													
														|  | -                    "info": "通过链接获取账号信息失败",
 |  | 
 | 
											
												
													
														|  | -                    "gh_id": gh_id,
 |  | 
 | 
											
												
													
														|  | -                    "time": datetime.now().__str__()
 |  | 
 | 
											
												
													
														|  | -                }
 |  | 
 | 
											
												
													
														|  | -                print(resdata)
 |  | 
 | 
											
												
													
														|  | -                return
 |  | 
 | 
											
												
													
														|  | -            check_name = resdata["data"].get("data", {}).get("account_name")
 |  | 
 | 
											
												
													
														|  | -            check_id = resdata["data"].get("data", {}).get("wx_gh")
 |  | 
 | 
											
												
													
														|  | -            if check_id == gh_id:
 |  | 
 | 
											
												
													
														|  | -                cls.updateMsgList(gh_id, check_name, msg_list)
 |  | 
 | 
											
												
													
														|  | -                if last_time_stamp_in_this_msg > last_update_time:
 |  | 
 | 
											
												
													
														|  | -                    next_cursor = response["data"]["next_cursor"]
 |  | 
 | 
											
												
													
														|  | -                    return cls.getAccountArticleList(
 |  | 
 | 
											
												
													
														|  | -                        gh_id=gh_id,
 |  | 
 | 
											
												
													
														|  | -                        last_update_time=last_update_time,
 |  | 
 | 
											
												
													
														|  | -                        cursor=next_cursor,
 |  | 
 | 
											
												
													
														|  | -                    )
 |  | 
 | 
											
												
													
														|  | -            else:
 |  | 
 | 
											
												
													
														|  | -                response = {
 |  | 
 | 
											
												
													
														|  | -                    "code": 1002,
 |  | 
 | 
											
												
													
														|  | -                    "info": "抓取时候账号校验失败",
 |  | 
 | 
											
												
													
														|  | -                    "error": None,
 |  | 
 | 
											
												
													
														|  | -                    "gh_id": gh_id,
 |  | 
 | 
											
												
													
														|  | -                    "time_stamp": datetime.now().__str__(),
 |  | 
 | 
											
												
													
														|  | -                }
 |  | 
 | 
											
												
													
														|  | -                print(response)
 |  | 
 | 
											
												
													
														|  | -        else:
 |  | 
 | 
											
												
													
														|  | -            response = {
 |  | 
 | 
											
												
													
														|  | -                "code": 1003,
 |  | 
 | 
											
												
													
														|  | -                "info": "账号为抓取到内容",
 |  | 
 | 
											
												
													
														|  | -                "error": None,
 |  | 
 | 
											
												
													
														|  | -                "gh_id": gh_id,
 |  | 
 | 
											
												
													
														|  | -                "time_stamp": datetime.now().__str__(),
 |  | 
 | 
											
												
													
														|  | -            }
 |  | 
 | 
											
												
													
														|  | -            print(response)
 |  | 
 | 
											
												
													
														|  | -
 |  | 
 | 
											
												
													
														|  |      @classmethod
 |  |      @classmethod
 | 
											
												
													
														|  |      def checkEachAccount(cls, gh_id):
 |  |      def checkEachAccount(cls, gh_id):
 | 
											
												
													
														|  |          """
 |  |          """
 | 
											
										
											
												
													
														|  | @@ -285,14 +101,11 @@ class UpdateMsgDaily(object):
 | 
											
												
													
														|  |          更新文章任务
 |  |          更新文章任务
 | 
											
												
													
														|  |          :return:
 |  |          :return:
 | 
											
												
													
														|  |          """
 |  |          """
 | 
											
												
													
														|  | -        account_list = cls.getAccountIdDict()
 |  | 
 | 
											
												
													
														|  | 
 |  | +        account_dict = cls.getAccountIdDict()
 | 
											
												
													
														|  | 
 |  | +        account_list = list(account_dict.keys())
 | 
											
												
													
														|  |          for account_id in tqdm(account_list):
 |  |          for account_id in tqdm(account_list):
 | 
											
												
													
														|  | -            account_info = cls.findAccountLatestUpdateTime(account_id)
 |  | 
 | 
											
												
													
														|  | -            latest_time = account_info["update_time"]
 |  | 
 | 
											
												
													
														|  |              try:
 |  |              try:
 | 
											
												
													
														|  | -                cls.getAccountArticleList(
 |  | 
 | 
											
												
													
														|  | -                    gh_id=account_id, last_update_time=latest_time
 |  | 
 | 
											
												
													
														|  | -                )
 |  | 
 | 
											
												
													
														|  | 
 |  | +                update_articles(gh_id=account_id)
 | 
											
												
													
														|  |              except Exception as e:
 |  |              except Exception as e:
 | 
											
												
													
														|  |                  response = {
 |  |                  response = {
 | 
											
												
													
														|  |                      "code": 1001,
 |  |                      "code": 1001,
 | 
											
										
											
												
													
														|  | @@ -337,8 +150,17 @@ def job_with_thread(job_func):
 | 
											
												
													
														|  |  if __name__ == "__main__":
 |  |  if __name__ == "__main__":
 | 
											
												
													
														|  |      UMD = UpdateMsgDaily()
 |  |      UMD = UpdateMsgDaily()
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  | -    schedule.every().day.at("21:00").do(job_with_thread, UMD.updateJob)
 |  | 
 | 
											
												
													
														|  | -    schedule.every().day.at("21:30").do(job_with_thread, UMD.checkJob)
 |  | 
 | 
											
												
													
														|  | 
 |  | +    try:
 | 
											
												
													
														|  | 
 |  | +        schedule.every().day.at("21:00").do(job_with_thread, UMD.updateJob)
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    except Exception as error:
 | 
											
												
													
														|  | 
 |  | +        UMD.bot(account_list=["更新文章定时任务异常终止", str(error)])
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    try:
 | 
											
												
													
														|  | 
 |  | +        schedule.every().day.at("21:30").do(job_with_thread, UMD.checkJob)
 | 
											
												
													
														|  | 
 |  | +
 | 
											
												
													
														|  | 
 |  | +    except Exception as error:
 | 
											
												
													
														|  | 
 |  | +        UMD.bot(account_list=['校验账号任务异常终止', str(error)])
 | 
											
												
													
														|  |  
 |  |  
 | 
											
												
													
														|  |      while True:
 |  |      while True:
 | 
											
												
													
														|  |          schedule.run_pending()
 |  |          schedule.run_pending()
 |