wangkun há 2 anos atrás
pai
commit
03a7da521a
3 ficheiros alterados com 123 adições e 75 exclusões
  1. 4 1
      README.md
  2. 19 1
      main/feishu_lib.py
  3. 100 73
      main/run_bot.py

+ 4 - 1
README.md

@@ -26,6 +26,9 @@ https://git.yishihui.com/Server/crawler_bot.git
 
 
 #### 需求
+2022/10/20
+1. 增加: 西瓜视频报警
+
 2022/10/19
 1. 小年糕3种爬取方式分别报警
 
@@ -36,5 +39,5 @@ https://git.yishihui.com/Server/crawler_bot.git
 1. 增加监控渠道: 公众号 / 微视
 
 2022/8/10
-1. 每隔六小时,检查一次已下载表 
+1. 每检查一次已下载表 
 2. 已下载表的最新一条数据抓取时间,距当前时间超过 24 小时,则触发机器人报警,发送飞书报警消息

+ 19 - 1
main/feishu_lib.py

@@ -32,6 +32,8 @@ class Feishu:
     crawler_weiqun_video = "https://w42nne6hzg.feishu.cn/sheets/shtcnoKThNquYRweaylMFVyo9Hc?"
     # 视频号爬虫表
     crawler_shipinhao = 'https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc?'
+    # 西瓜视频
+    crawler_xigua = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?'
 
     # 手机号
     wangkun = "13426262515"
@@ -65,6 +67,8 @@ class Feishu:
             return "shtcnoKThNquYRweaylMFVyo9Hc"
         elif crawler == 'shipinhao':
             return 'shtcn9rOdZRAGFbRkWpn7hqEHGc'
+        elif crawler == 'xigua':
+            return 'shtcnvOpx2P8vBXiV91Ot1MKIw8'
 
     # 获取飞书api token
     @classmethod
@@ -370,27 +374,41 @@ class Feishu:
                 content = "小年糕_播放量_已下载表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=c85k1C"
 
+            elif crawler == 'xigua_video':
+                content = '西瓜视频_用户主页_已下载表'
+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?sheet=e075e9'
+            elif crawler == 'xigua_little_video':
+                content = '西瓜视频_小视频_已下载表'
+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?sheet=hDSDnv'
+
             elif crawler == "music_album":
                 content = "音乐相册爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnT6zvmfsYe1g0iv4pt7855g"
+
             elif crawler == "bszf":
                 content = "本山祝福爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb"
+
             elif crawler == "kuaishou":
                 content = "快手爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf"
+
             elif crawler == "gzh":
                 content = "公众号爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnexNXnpDLHhARw0QdiwbYuA"
+
             elif crawler == "weiqun":
                 content = "微群爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnoKThNquYRweaylMFVyo9Hc"
+
             elif crawler == "weishi":
                 content = "微视爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh"
+
             elif crawler == "shipinhao":
                 content = "视频号爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc"
+
             else:
                 content = "小年糕爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh"
@@ -437,7 +455,7 @@ class Feishu:
 
 
 if __name__ == "__main__":
-    Feishu.bot("bot", "xiaoniangao_hour", "嘿。嘿嘿。。嘿嘿嘿")
+    Feishu.bot("bot", "xigua_little_video", "嘿。嘿嘿。。嘿嘿嘿")
     # Feishu.get_userid("kuaishou", "huxinxue")
     
     pass

+ 100 - 73
main/run_bot.py

@@ -12,6 +12,78 @@ from main.feishu_lib import Feishu
 
 
 class Bot:
+    # # 获取各个爬虫的 feeds 表
+    # @classmethod
+    # def get_feeds_sheet(cls, log_type, crawler, sheet):
+    #     try:
+    #         if crawler == "kanyikan" and sheet == "recommend":
+    #             feeds_sheet = Feishu.get_values_batch(log_type, "kanyikan", "SdCHOM")
+    #         elif crawler == "kanyikan" and sheet == "moment":
+    #             feeds_sheet = Feishu.get_values_batch(log_type, "kanyikan", "tGqZMX")
+    #         elif crawler == "xiaoniangao" and sheet == "hour":
+    #             feeds_sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "ba0da4")
+    #         elif crawler == "xiaoniangao" and sheet == "person":
+    #             feeds_sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "k6ldje")
+    #         elif crawler == "music_album" and sheet == "recommend":
+    #             feeds_sheet = Feishu.get_values_batch(log_type, "music_album", "69UxPo")
+    #         elif crawler == "bszf" and sheet == "recommend":
+    #             feeds_sheet = Feishu.get_values_batch(log_type, "bszf", "CcHgO7")
+    #         elif crawler == "kuaishou" and sheet == "recommend":
+    #             feeds_sheet = Feishu.get_values_batch(log_type, "kuaishou", "JK6npf")
+    #         elif crawler == "kuaishou" and sheet == "follow":
+    #             feeds_sheet = Feishu.get_values_batch(log_type, "kuaishou", "wW5cyb")
+    #         elif crawler == "gzh" and sheet == "recommend":
+    #             feeds_sheet = Feishu.get_values_batch(log_type, "gzh", "zWKFGb")
+    #         elif crawler == "weishi" and sheet == "recommend":
+    #             feeds_sheet = Feishu.get_values_batch(log_type, "weishi", "O7fCzr")
+    #         else:
+    #             feeds_sheet = "请输入{crawler}和{sheet}"
+    #
+    #         return feeds_sheet
+    #     except Exception as e:
+    #         Common.logger(log_type).error("get_feeds_sheet异常:{}", e)
+    #
+    # # feeds_sheet表报警:连续 2 小时无数据
+    # @classmethod
+    # def rebot_feeds_sheet(cls, log_type, crawler, sheet):
+    #     """
+    #     每隔一分钟获取一次表数据的数量:
+    #         1.中途有数据时,退出此次监控
+    #         2.连续2小时无数据时,触发机器人报警
+    #     """
+    #     # kanyikan_recommend_sheet = Feishu.get_values_batch(log_type, "kanyikan", "SdCHOM")
+    #     # kanyikan_moment_sheet = Feishu.get_values_batch(log_type, "kanyikan", "tGqZMX")
+    #     # xiaoniangao_hour_sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "ba0da4")
+    #     # xiaoniangao_person_sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "k6ldje")
+    #     # music_album_recommend_sheet = Feishu.get_values_batch(log_type, "music_album", "69UxPo")
+    #     # bszf_recommend_sheet = Feishu.get_values_batch(log_type, "bszf", "CcHgO7")
+    #     # kuaishou_recommend_sheet = Feishu.get_values_batch(log_type, "kuaishou", "JK6npf")
+    #     # kuaishou_follow_sheet = Feishu.get_values_batch(log_type, "kuaishou", "wW5cyb")
+    #     # gzh_recommend_sheet = Feishu.get_values_batch(log_type, "gzh", "zWKFGb")
+    #
+    #     for i in range(120):
+    #         if len(cls.get_feeds_sheet(log_type, crawler, sheet)) > 1:
+    #             break
+    #         else:
+    #             time.sleep(60)
+    #             if i == 119 and crawler == "kanyikan" and sheet == "recommend":
+    #                 Feishu.bot(log_type, "kanyikan", "看一看推荐榜表,已经 2 小时无数据了😤")
+    #             elif i == 119 and crawler == "kanyikan" and sheet == "moment":
+    #                 Feishu.bot(log_type, "kanyikan", "看一看朋友圈表,已经 2 小时无数据了😤")
+    #             elif i == 119 and crawler == "xiaoniangao" and sheet == "person":
+    #                 Feishu.bot(log_type, "xiaoniangao", "小年糕用户主页表,已经 2 小时无数据了😤")
+    #             elif i == 119 and crawler == "music_album" \
+    #                     and sheet == "recommend" and datetime.datetime.now().hour < 13:
+    #                 Feishu.bot(log_type, "music_album", "音乐相册推荐表,已经 2 小时无数据了😤")
+    #             elif i == 119 and crawler == "bszf" and sheet == "recommend" and datetime.datetime.now().hour < 13:
+    #                 Feishu.bot(log_type, "bszf", "本山祝福推荐表,已经 2 小时无数据了😤")
+    #             elif i == 119 and crawler == "kuaishou" and sheet == "recommend":
+    #                 Feishu.bot(log_type, "kuaishou", "快手推荐表,已经 2 小时无数据了😤")
+    #             elif i == 119 and crawler == "kuaishou" and sheet == "follow":
+    #                 Feishu.bot(log_type, "kuaishou", "快手关注表,已经 2 小时无数据了😤")
+    #             elif i == 119 and crawler == "gzh" and sheet == "recommend":
+    #                 Feishu.bot(log_type, "gzh", "公众号推荐表,已经 2 小时无数据了😤")
+
     # 获取各个爬虫表最新一条抓取时间
     @classmethod
     def get_first_time(cls, log_type, crawler):
@@ -32,6 +104,17 @@ class Bot:
                 first_download_time = sheet[1][5]
                 first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
 
+            elif crawler == 'xigua_video':
+                sheet = Feishu.get_values_batch(log_type, "xigua", "e075e9")
+                # 已下载表,最新一条视频抓取时间
+                first_download_time = sheet[1][5]
+                first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
+            elif crawler == 'xigua_little_video':
+                sheet = Feishu.get_values_batch(log_type, "xigua", "hDSDnv")
+                # 已下载表,最新一条视频抓取时间
+                first_download_time = sheet[1][5]
+                first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
+
             elif crawler == "kanyikan":
                 sheet = Feishu.get_values_batch(log_type, "kanyikan", "20ce0c")
                 # 已下载表,最新一条视频抓取时间
@@ -90,78 +173,6 @@ class Bot:
         except Exception as e:
             Common.logger(log_type).error("get_first_time异常:{}\n", e)
 
-    # 获取各个爬虫的 feeds 表
-    @classmethod
-    def get_feeds_sheet(cls, log_type, crawler, sheet):
-        try:
-            if crawler == "kanyikan" and sheet == "recommend":
-                feeds_sheet = Feishu.get_values_batch(log_type, "kanyikan", "SdCHOM")
-            elif crawler == "kanyikan" and sheet == "moment":
-                feeds_sheet = Feishu.get_values_batch(log_type, "kanyikan", "tGqZMX")
-            elif crawler == "xiaoniangao" and sheet == "hour":
-                feeds_sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "ba0da4")
-            elif crawler == "xiaoniangao" and sheet == "person":
-                feeds_sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "k6ldje")
-            elif crawler == "music_album" and sheet == "recommend":
-                feeds_sheet = Feishu.get_values_batch(log_type, "music_album", "69UxPo")
-            elif crawler == "bszf" and sheet == "recommend":
-                feeds_sheet = Feishu.get_values_batch(log_type, "bszf", "CcHgO7")
-            elif crawler == "kuaishou" and sheet == "recommend":
-                feeds_sheet = Feishu.get_values_batch(log_type, "kuaishou", "JK6npf")
-            elif crawler == "kuaishou" and sheet == "follow":
-                feeds_sheet = Feishu.get_values_batch(log_type, "kuaishou", "wW5cyb")
-            elif crawler == "gzh" and sheet == "recommend":
-                feeds_sheet = Feishu.get_values_batch(log_type, "gzh", "zWKFGb")
-            elif crawler == "weishi" and sheet == "recommend":
-                feeds_sheet = Feishu.get_values_batch(log_type, "weishi", "O7fCzr")
-            else:
-                feeds_sheet = "请输入{crawler}和{sheet}"
-
-            return feeds_sheet
-        except Exception as e:
-            Common.logger(log_type).error("get_feeds_sheet异常:{}", e)
-
-    # feeds_sheet表报警:连续 2 小时无数据
-    @classmethod
-    def rebot_feeds_sheet(cls, log_type, crawler, sheet):
-        """
-        每隔一分钟获取一次表数据的数量:
-            1.中途有数据时,退出此次监控
-            2.连续2小时无数据时,触发机器人报警
-        """
-        # kanyikan_recommend_sheet = Feishu.get_values_batch(log_type, "kanyikan", "SdCHOM")
-        # kanyikan_moment_sheet = Feishu.get_values_batch(log_type, "kanyikan", "tGqZMX")
-        # xiaoniangao_hour_sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "ba0da4")
-        # xiaoniangao_person_sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "k6ldje")
-        # music_album_recommend_sheet = Feishu.get_values_batch(log_type, "music_album", "69UxPo")
-        # bszf_recommend_sheet = Feishu.get_values_batch(log_type, "bszf", "CcHgO7")
-        # kuaishou_recommend_sheet = Feishu.get_values_batch(log_type, "kuaishou", "JK6npf")
-        # kuaishou_follow_sheet = Feishu.get_values_batch(log_type, "kuaishou", "wW5cyb")
-        # gzh_recommend_sheet = Feishu.get_values_batch(log_type, "gzh", "zWKFGb")
-
-        for i in range(120):
-            if len(cls.get_feeds_sheet(log_type, crawler, sheet)) > 1:
-                break
-            else:
-                time.sleep(60)
-                if i == 119 and crawler == "kanyikan" and sheet == "recommend":
-                    Feishu.bot(log_type, "kanyikan", "看一看推荐榜表,已经 2 小时无数据了😤")
-                elif i == 119 and crawler == "kanyikan" and sheet == "moment":
-                    Feishu.bot(log_type, "kanyikan", "看一看朋友圈表,已经 2 小时无数据了😤")
-                elif i == 119 and crawler == "xiaoniangao" and sheet == "person":
-                    Feishu.bot(log_type, "xiaoniangao", "小年糕用户主页表,已经 2 小时无数据了😤")
-                elif i == 119 and crawler == "music_album" \
-                        and sheet == "recommend" and datetime.datetime.now().hour < 13:
-                    Feishu.bot(log_type, "music_album", "音乐相册推荐表,已经 2 小时无数据了😤")
-                elif i == 119 and crawler == "bszf" and sheet == "recommend" and datetime.datetime.now().hour < 13:
-                    Feishu.bot(log_type, "bszf", "本山祝福推荐表,已经 2 小时无数据了😤")
-                elif i == 119 and crawler == "kuaishou" and sheet == "recommend":
-                    Feishu.bot(log_type, "kuaishou", "快手推荐表,已经 2 小时无数据了😤")
-                elif i == 119 and crawler == "kuaishou" and sheet == "follow":
-                    Feishu.bot(log_type, "kuaishou", "快手关注表,已经 2 小时无数据了😤")
-                elif i == 119 and crawler == "gzh" and sheet == "recommend":
-                    Feishu.bot(log_type, "gzh", "公众号推荐表,已经 2 小时无数据了😤")
-
     # 触发机器人报警:超过24小时没有新入库的视频
     @classmethod
     def robot_download_sheet(cls, log_type, crawler, duration):
@@ -188,6 +199,16 @@ class Bot:
                 Feishu.bot(log_type, crawler, "小年糕_播放量_已下载表,超过24小时没有新视频入库了😤")
                 Common.logger(log_type).warning("小年糕_播放量_已下载表,超过24小时没有新视频入库了😤\n")
 
+            # 西瓜视频
+            elif crawler == "xigua_video" and (
+                    int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
+                Feishu.bot(log_type, crawler, "西瓜视频_用户主页_已下载表,超过24小时没有新视频入库了😤")
+                Common.logger(log_type).warning("西瓜视频_用户主页_已下载表,超过24小时没有新视频入库了😤\n")
+            elif crawler == "xigua_little_video" and (
+                    int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
+                Feishu.bot(log_type, crawler, "西瓜视频_小视频_已下载表,超过24小时没有新视频入库了😤")
+                Common.logger(log_type).warning("西瓜视频_小视频_已下载表,超过24小时没有新视频入库了😤\n")
+
             # 音乐相册爬虫报警
             elif crawler == "music_album" and (
                     int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
@@ -247,6 +268,10 @@ class Bot:
                 Bot.robot_download_sheet("bot", "xiaoniangao_person", duration)
                 Bot.robot_download_sheet("bot", "xiaoniangao_play", duration)
 
+                Common.logger("bot").info("监控西瓜视频已下载表")
+                Bot.robot_download_sheet("bot", "xigua_video", duration)
+                # Bot.robot_download_sheet("bot", "xigua_little_video", duration)
+
                 Common.logger("bot").info("监控本山祝福已下载表")
                 Bot.robot_download_sheet("bot", "bszf", duration)
 
@@ -276,5 +301,7 @@ class Bot:
 
 
 if __name__ == "__main__":
-    # Bot.robot_download_sheet("bot", "gzh", 10)
+    # Bot.robot_download_sheet("bot", "xigua_video", 1)
     Bot.main()
+
+    pass