wangkun 2 years ago
parent
commit
c54289803b
3 changed files with 88 additions and 13 deletions
  1. 3 0
      README.md
  2. 59 12
      main/feishu_lib.py
  3. 26 1
      main/run_bot.py

+ 3 - 0
README.md

@@ -26,6 +26,9 @@ https://git.yishihui.com/Server/crawler_bot.git
 
 
 #### 需求
+2022/11/03
+1. 增加:知乎 PC 端爬虫报警
+
 2022/10/20
 1. 增加: 西瓜视频报警
 

+ 59 - 12
main/feishu_lib.py

@@ -34,12 +34,8 @@ class Feishu:
     crawler_shipinhao = 'https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc?'
     # 西瓜视频
     crawler_xigua = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?'
-
-    # 手机号
-    wangkun = "13426262515"
-    gaonannan = "18501180073"
-    xinxin = "15546206651"
-    huxinxue = "18832292015"
+    # 知乎 PC 端
+    crawler_zhihu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnkGPBmGsjaqapgzouuj8MXe?'
 
     # 飞书路径token
     @classmethod
@@ -69,6 +65,8 @@ class Feishu:
             return 'shtcn9rOdZRAGFbRkWpn7hqEHGc'
         elif crawler == 'xigua':
             return 'shtcnvOpx2P8vBXiV91Ot1MKIw8'
+        elif crawler == 'zhihu':
+            return 'shtcnkGPBmGsjaqapgzouuj8MXe'
 
     # 获取飞书api token
     @classmethod
@@ -334,14 +332,23 @@ class Feishu:
                 "Authorization": "Bearer " + cls.get_token(log_type),
                 "Content-Type": "application/json; charset=utf-8"
             }
+            # 手机号
+            wangkun = "13426262515"
+            gaonannan = "18501180073"
+            xinxin = "15546206651"
+            huxinxue = "18832292015"
+            wuchaoyue = "15712941385"
+
             if username == "wangkun":
-                username = cls.wangkun
+                username = wangkun
             elif username == "gaonannan":
-                username = cls.gaonannan
+                username = gaonannan
             elif username == "xinxin":
-                username = cls.xinxin
+                username = xinxin
             elif username == "huxinxue":
-                username = cls.huxinxue
+                username = huxinxue
+            elif username == "wuchaoyue":
+                username = wuchaoyue
             data = {"mobiles": [username]}
             urllib3.disable_warnings()
             r = requests.get(url=url, headers=headers, params=data, verify=False, proxies=proxies)
@@ -363,58 +370,98 @@ class Feishu:
             if crawler == "kanyikan":
                 content = "看一看爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih"
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "gaonannan")) + "></at>\n"
 
             elif crawler == "xiaoniangao_hour":
                 content = "小年糕_小时级_已下载表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=yatRv2"
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "gaonannan")) + "></at>\n"
             elif crawler == "xiaoniangao_person":
                 content = "小年糕_用户主页_已下载表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=Wu0CeL"
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "gaonannan")) + "></at>\n"
             elif crawler == "xiaoniangao_play":
                 content = "小年糕_播放量_已下载表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=c85k1C"
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "gaonannan")) + "></at>\n"
 
             elif crawler == 'xigua_video':
                 content = '西瓜视频_用户主页_已下载表'
                 sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?sheet=e075e9'
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "wuchaoyue")) + "></at>\n"
             elif crawler == 'xigua_little_video':
                 content = '西瓜视频_小视频_已下载表'
                 sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnvOpx2P8vBXiV91Ot1MKIw8?sheet=hDSDnv'
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "wuchaoyue")) + "></at>\n"
+
+            elif crawler == 'zhihu_hot':
+                content = '知乎_热门_已下载表'
+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnkGPBmGsjaqapgzouuj8MXe?sheet=8871e3'
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "huxinxue")) + "></at>\n"
+            elif crawler == 'zhihu_follow':
+                content = '知乎_定向_已下载表'
+                sheet_url = 'https://w42nne6hzg.feishu.cn/sheets/shtcnkGPBmGsjaqapgzouuj8MXe?sheet=4MGuux'
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "huxinxue")) + "></at>\n"
 
             elif crawler == "music_album":
                 content = "音乐相册爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnT6zvmfsYe1g0iv4pt7855g"
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "gaonannan")) + "></at>\n"
 
             elif crawler == "bszf":
                 content = "本山祝福爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb"
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "gaonannan")) + "></at>\n"
 
             elif crawler == "kuaishou_follow":
                 content = "快手_用户主页_已下载表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf?sheet=fYdA8F"
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "xinxin")) + "></at>\n"
             elif crawler == "kuaishou_recommend":
                 content = "快手_推荐榜_已下载表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnICEfaw9llDNQkKgdymM1xf?sheet=3cd128"
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "xinxin")) + "></at>\n"
 
             elif crawler == "gzh":
                 content = "公众号爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnexNXnpDLHhARw0QdiwbYuA"
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "huxinxue")) + "></at>\n"
 
             elif crawler == "weiqun":
                 content = "微群爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnoKThNquYRweaylMFVyo9Hc"
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "xinxin")) + "></at>\n"
 
             elif crawler == "weishi":
                 content = "微视爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh"
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "xinxin")) + "></at>\n"
 
             elif crawler == "shipinhao":
                 content = "视频号爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcn9rOdZRAGFbRkWpn7hqEHGc"
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at> <at id=" + str(
+                    cls.get_userid(log_type, "xinxin")) + "></at>\n"
 
             else:
                 content = "小年糕爬虫表"
                 sheet_url = "https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh"
+                users = "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at>\n"
 
             data = json.dumps({
                 "msg_type": "interactive",
@@ -426,7 +473,7 @@ class Feishu:
                     "elements": [{
                         "tag": "div",
                         "text": {
-                            "content": "\n<at id=" + str(cls.get_userid(log_type, "wangkun")) + "></at>\n" + text,
+                            "content": users + text,
                             "tag": "lark_md"
                         }
                     }, {
@@ -458,7 +505,7 @@ class Feishu:
 
 
 if __name__ == "__main__":
-    Feishu.bot("bot", "kuaishou_recommend", "嘿。嘿嘿。。嘿嘿嘿")
+    Feishu.bot("bot", "zhihu_hot", "嘿。嘿嘿。。嘿嘿嘿")
     # Feishu.get_userid("kuaishou", "huxinxue")
     
     pass

+ 26 - 1
main/run_bot.py

@@ -115,6 +115,17 @@ class Bot:
                 first_download_time = sheet[1][5]
                 first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
 
+            elif crawler == 'zhihu_hot':
+                sheet = Feishu.get_values_batch(log_type, "zhihu", "8871e3")
+                # 已下载表,最新一条视频抓取时间
+                first_download_time = sheet[1][5]
+                first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
+            elif crawler == 'zhihu_follow':
+                sheet = Feishu.get_values_batch(log_type, "zhihu", "4MGuux")
+                # 已下载表,最新一条视频抓取时间
+                first_download_time = sheet[1][5]
+                first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
+
             elif crawler == "kanyikan":
                 sheet = Feishu.get_values_batch(log_type, "kanyikan", "20ce0c")
                 # 已下载表,最新一条视频抓取时间
@@ -214,6 +225,16 @@ class Bot:
                 Feishu.bot(log_type, crawler, "西瓜视频_小视频_已下载表,超过24小时没有新视频入库了😤")
                 Common.logger(log_type).warning("西瓜视频_小视频_已下载表,超过24小时没有新视频入库了😤\n")
 
+            # 知乎
+            elif crawler == "zhihu_hot" and (
+                    int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
+                Feishu.bot(log_type, crawler, "知乎_热门_已下载表,超过24小时没有新视频入库了😤")
+                Common.logger(log_type).warning("知乎_热门_已下载表,超过24小时没有新视频入库了😤\n")
+            elif crawler == "zhihu_follow" and (
+                    int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
+                Feishu.bot(log_type, crawler, "知乎_定向_已下载表,超过24小时没有新视频入库了😤")
+                Common.logger(log_type).warning("知乎_定向_已下载表,超过24小时没有新视频入库了😤\n")
+
             # 音乐相册爬虫报警
             elif crawler == "music_album" and (
                     int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
@@ -280,6 +301,10 @@ class Bot:
                 Bot.robot_download_sheet("bot", "xigua_video", duration)
                 # Bot.robot_download_sheet("bot", "xigua_little_video", duration)
 
+                Common.logger('bot').info('监控知乎已下载表')
+                Bot.robot_download_sheet("bot", "zhihu_hot", duration)
+                Bot.robot_download_sheet("bot", "zhihu_follow", duration)
+
                 Common.logger("bot").info("监控本山祝福已下载表")
                 Bot.robot_download_sheet("bot", "bszf", duration)
 
@@ -311,7 +336,7 @@ class Bot:
 
 if __name__ == "__main__":
     # Bot.robot_download_sheet("bot", "kuaishou_recommend", 1)
-    # Bot.robot_download_sheet("bot", "kuaishou_follow", 1)
+    # Bot.robot_download_sheet("bot", "zhihu_follow", 1)
     Bot.main()
 
     pass