run_bot.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/8/9
  4. # import datetime
  5. import datetime
  6. import os
  7. import sys
  8. import time
  9. sys.path.append(os.getcwd())
  10. from main.common import Common
  11. from main.feishu_lib import Feishu
  12. class Bot:
  13. # 获取各个爬虫表最新一条抓取时间
  14. @classmethod
  15. def get_first_time(cls, log_type, crawler):
  16. try:
  17. if crawler == "xiaoniangao_hour":
  18. sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "yatRv2")
  19. # 已下载表,最新一条视频抓取时间
  20. first_download_time = sheet[1][5]
  21. first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
  22. elif crawler == "xiaoniangao_person":
  23. sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "Wu0CeL")
  24. # 已下载表,最新一条视频抓取时间
  25. first_download_time = sheet[1][5]
  26. first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
  27. elif crawler == "xiaoniangao_play":
  28. sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "c85k1C")
  29. # 已下载表,最新一条视频抓取时间
  30. first_download_time = sheet[1][5]
  31. first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
  32. elif crawler == "kanyikan":
  33. sheet = Feishu.get_values_batch(log_type, "kanyikan", "20ce0c")
  34. # 已下载表,最新一条视频抓取时间
  35. first_download_time = sheet[1][5]
  36. first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
  37. elif crawler == "music_album":
  38. sheet = Feishu.get_values_batch(log_type, "music_album", "f5a76e")
  39. # 已下载表,最新一条视频抓取时间
  40. first_download_time = sheet[1][5]
  41. first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
  42. elif crawler == "bszf":
  43. sheet = Feishu.get_values_batch(log_type, "bszf", "440018")
  44. # 已下载表,最新一条视频抓取时间
  45. first_download_time = sheet[1][4]
  46. first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
  47. elif crawler == "kuaishou":
  48. sheet = Feishu.get_values_batch(log_type, "kuaishou", "3cd128")
  49. # 已下载表,最新一条视频抓取时间
  50. first_download_time = sheet[1][5]
  51. first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
  52. elif crawler == "gzh":
  53. sheet = Feishu.get_values_batch(log_type, "gzh", "fCs3BT")
  54. # 已下载表,最新一条视频抓取时间
  55. first_download_time = sheet[1][3]
  56. first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
  57. elif crawler == "weiqun":
  58. sheet = Feishu.get_values_batch(log_type, "weiqun", "3cd128")
  59. # 已下载表,最新一条视频抓取时间
  60. first_download_time = sheet[1][5]
  61. first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
  62. elif crawler == "weishi":
  63. sheet = Feishu.get_values_batch(log_type, "weishi", "caa3fa")
  64. # 已下载表,最新一条视频抓取时间
  65. first_download_time = sheet[1][5]
  66. first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
  67. elif crawler == "shipinhao":
  68. sheet = Feishu.get_values_batch(log_type, "shipinhao", "c77cf9")
  69. # 已下载表,最新一条视频抓取时间
  70. first_download_time = sheet[1][5]
  71. first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
  72. else:
  73. sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "yatRv2")
  74. # 已下载表,最新一条视频抓取时间
  75. first_download_time = sheet[1][5]
  76. first_download_time = int(time.mktime(time.strptime(first_download_time, "%Y/%m/%d %H:%M:%S")))
  77. return first_download_time
  78. except Exception as e:
  79. Common.logger(log_type).error("get_first_time异常:{}\n", e)
  80. # 获取各个爬虫的 feeds 表
  81. @classmethod
  82. def get_feeds_sheet(cls, log_type, crawler, sheet):
  83. try:
  84. if crawler == "kanyikan" and sheet == "recommend":
  85. feeds_sheet = Feishu.get_values_batch(log_type, "kanyikan", "SdCHOM")
  86. elif crawler == "kanyikan" and sheet == "moment":
  87. feeds_sheet = Feishu.get_values_batch(log_type, "kanyikan", "tGqZMX")
  88. elif crawler == "xiaoniangao" and sheet == "hour":
  89. feeds_sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "ba0da4")
  90. elif crawler == "xiaoniangao" and sheet == "person":
  91. feeds_sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "k6ldje")
  92. elif crawler == "music_album" and sheet == "recommend":
  93. feeds_sheet = Feishu.get_values_batch(log_type, "music_album", "69UxPo")
  94. elif crawler == "bszf" and sheet == "recommend":
  95. feeds_sheet = Feishu.get_values_batch(log_type, "bszf", "CcHgO7")
  96. elif crawler == "kuaishou" and sheet == "recommend":
  97. feeds_sheet = Feishu.get_values_batch(log_type, "kuaishou", "JK6npf")
  98. elif crawler == "kuaishou" and sheet == "follow":
  99. feeds_sheet = Feishu.get_values_batch(log_type, "kuaishou", "wW5cyb")
  100. elif crawler == "gzh" and sheet == "recommend":
  101. feeds_sheet = Feishu.get_values_batch(log_type, "gzh", "zWKFGb")
  102. elif crawler == "weishi" and sheet == "recommend":
  103. feeds_sheet = Feishu.get_values_batch(log_type, "weishi", "O7fCzr")
  104. else:
  105. feeds_sheet = "请输入{crawler}和{sheet}"
  106. return feeds_sheet
  107. except Exception as e:
  108. Common.logger(log_type).error("get_feeds_sheet异常:{}", e)
  109. # feeds_sheet表报警:连续 2 小时无数据
  110. @classmethod
  111. def rebot_feeds_sheet(cls, log_type, crawler, sheet):
  112. """
  113. 每隔一分钟获取一次表数据的数量:
  114. 1.中途有数据时,退出此次监控
  115. 2.连续2小时无数据时,触发机器人报警
  116. """
  117. # kanyikan_recommend_sheet = Feishu.get_values_batch(log_type, "kanyikan", "SdCHOM")
  118. # kanyikan_moment_sheet = Feishu.get_values_batch(log_type, "kanyikan", "tGqZMX")
  119. # xiaoniangao_hour_sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "ba0da4")
  120. # xiaoniangao_person_sheet = Feishu.get_values_batch(log_type, "xiaoniangao", "k6ldje")
  121. # music_album_recommend_sheet = Feishu.get_values_batch(log_type, "music_album", "69UxPo")
  122. # bszf_recommend_sheet = Feishu.get_values_batch(log_type, "bszf", "CcHgO7")
  123. # kuaishou_recommend_sheet = Feishu.get_values_batch(log_type, "kuaishou", "JK6npf")
  124. # kuaishou_follow_sheet = Feishu.get_values_batch(log_type, "kuaishou", "wW5cyb")
  125. # gzh_recommend_sheet = Feishu.get_values_batch(log_type, "gzh", "zWKFGb")
  126. for i in range(120):
  127. if len(cls.get_feeds_sheet(log_type, crawler, sheet)) > 1:
  128. break
  129. else:
  130. time.sleep(60)
  131. if i == 119 and crawler == "kanyikan" and sheet == "recommend":
  132. Feishu.bot(log_type, "kanyikan", "看一看推荐榜表,已经 2 小时无数据了😤")
  133. elif i == 119 and crawler == "kanyikan" and sheet == "moment":
  134. Feishu.bot(log_type, "kanyikan", "看一看朋友圈表,已经 2 小时无数据了😤")
  135. elif i == 119 and crawler == "xiaoniangao" and sheet == "person":
  136. Feishu.bot(log_type, "xiaoniangao", "小年糕用户主页表,已经 2 小时无数据了😤")
  137. elif i == 119 and crawler == "music_album" \
  138. and sheet == "recommend" and datetime.datetime.now().hour < 13:
  139. Feishu.bot(log_type, "music_album", "音乐相册推荐表,已经 2 小时无数据了😤")
  140. elif i == 119 and crawler == "bszf" and sheet == "recommend" and datetime.datetime.now().hour < 13:
  141. Feishu.bot(log_type, "bszf", "本山祝福推荐表,已经 2 小时无数据了😤")
  142. elif i == 119 and crawler == "kuaishou" and sheet == "recommend":
  143. Feishu.bot(log_type, "kuaishou", "快手推荐表,已经 2 小时无数据了😤")
  144. elif i == 119 and crawler == "kuaishou" and sheet == "follow":
  145. Feishu.bot(log_type, "kuaishou", "快手关注表,已经 2 小时无数据了😤")
  146. elif i == 119 and crawler == "gzh" and sheet == "recommend":
  147. Feishu.bot(log_type, "gzh", "公众号推荐表,已经 2 小时无数据了😤")
  148. # 触发机器人报警:超过24小时没有新入库的视频
  149. @classmethod
  150. def robot_download_sheet(cls, log_type, crawler, duration):
  151. """
  152. 已下载视频表:超过24小时没有新入库的视频
  153. """
  154. try:
  155. # 看一看爬虫报警
  156. if crawler == "kanyikan" and (int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
  157. Feishu.bot(log_type, crawler, "看一看已下载表,超过24小时没有新视频入库了😤")
  158. Common.logger(log_type).warning("看一看已下载表,超过24小时没有新视频入库了😤\n")
  159. # 小年糕爬虫报警
  160. elif crawler == "xiaoniangao_hour" and (
  161. int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
  162. Feishu.bot(log_type, crawler, "小年糕_小时级_已下载表,超过24小时没有新视频入库了😤")
  163. Common.logger(log_type).warning("小年糕_小时级_已下载表,超过24小时没有新视频入库了😤\n")
  164. elif crawler == "xiaoniangao_person" and (
  165. int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
  166. Feishu.bot(log_type, crawler, "小年糕_用户主页_已下载表,超过24小时没有新视频入库了😤")
  167. Common.logger(log_type).warning("小年糕_用户主页_已下载表,超过24小时没有新视频入库了😤\n")
  168. elif crawler == "xiaoniangao_play" and (
  169. int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
  170. Feishu.bot(log_type, crawler, "小年糕_播放量_已下载表,超过24小时没有新视频入库了😤")
  171. Common.logger(log_type).warning("小年糕_播放量_已下载表,超过24小时没有新视频入库了😤\n")
  172. # 音乐相册爬虫报警
  173. elif crawler == "music_album" and (
  174. int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
  175. Feishu.bot(log_type, crawler, "音乐相册已下载表,超过24小时没有新视频入库了😤")
  176. Common.logger(log_type).warning("音乐相册已下载表,超过24小时没有新视频入库了😤\n")
  177. # 本山祝福爬虫报警
  178. elif crawler == "bszf" and (int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
  179. Feishu.bot(log_type, crawler, "本山祝福已下载表,超过24小时没有新视频入库了😤")
  180. Common.logger(log_type).warning("本山祝福已下载表,超过24小时没有新视频入库了😤\n")
  181. # 快手爬虫报警
  182. elif crawler == "kuaishou" and (int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
  183. Feishu.bot(log_type, crawler, "快手已下载表,超过24小时没有新视频入库了😤")
  184. Common.logger(log_type).warning("快手已下载表,超过24小时没有新视频入库了😤\n")
  185. # 公众号爬虫报警
  186. elif crawler == "gzh" and (int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
  187. Feishu.bot(log_type, crawler, "公众号已下载表,超过24小时没有新视频入库了😤")
  188. Common.logger(log_type).warning("公众号已下载表,超过24小时没有新视频入库了😤\n")
  189. # 微群视频爬虫报警
  190. elif crawler == "weiqun" and (int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
  191. Feishu.bot(log_type, crawler, "微群视频已下载表,超过24小时没有新视频入库了😤")
  192. Common.logger(log_type).warning("微群视频已下载表,超过24小时没有新视频入库了😤\n")
  193. # 微视爬虫报警
  194. elif crawler == "weishi" and (int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
  195. Feishu.bot(log_type, crawler, "微视已下载表,超过24小时没有新视频入库了😤")
  196. Common.logger(log_type).warning("微视已下载表,超过24小时没有新视频入库了😤\n")
  197. # 视频号爬虫报警
  198. elif crawler == "shipinhao" and (int(time.time()) - cls.get_first_time(log_type, crawler) > int(duration)):
  199. Feishu.bot(log_type, crawler, "视频号已下载表,超过24小时没有新视频入库了😤")
  200. Common.logger(log_type).warning("视频号已下载表,超过24小时没有新视频入库了😤\n")
  201. except Exception as e:
  202. Common.logger(log_type).error("robot_alarm异常:{}", e)
  203. # 监控运行入口
  204. @classmethod
  205. def main(cls):
  206. """
  207. 每隔六小时,检查一次已下载表;
  208. 已下载表的最新一条数据抓取时间,距当前时间超过 24 小时,则触发机器人报警,发送飞书报警消息
  209. """
  210. # 已下载表,超过 24 小时无新视频入库报警
  211. duration = 3600 * 24
  212. while True:
  213. if datetime.datetime.now().hour == 10:
  214. Common.logger("bot").info("监控看一看已下载表")
  215. Bot.robot_download_sheet("bot", "kanyikan", duration)
  216. Common.logger("bot").info("监控小年糕已下载表")
  217. Bot.robot_download_sheet("bot", "xiaoniangao_hour", duration)
  218. Bot.robot_download_sheet("bot", "xiaoniangao_person", duration)
  219. Bot.robot_download_sheet("bot", "xiaoniangao_play", duration)
  220. Common.logger("bot").info("监控本山祝福已下载表")
  221. Bot.robot_download_sheet("bot", "bszf", duration)
  222. Common.logger("bot").info("监控快手已下载表")
  223. Bot.robot_download_sheet("bot", "kuaishou", duration)
  224. Common.logger("bot").info("监控微视已下载表")
  225. Bot.robot_download_sheet("bot", "weishi", duration)
  226. Common.logger("bot").info("监控视频号已下载表")
  227. Bot.robot_download_sheet("bot", "shipinhao", duration)
  228. # Common.logger("bot").info("监控公众号已下载表")
  229. # Bot.robot_download_sheet("bot", "gzh", duration)
  230. # Common.logger("bot").info("监控音乐相册已下载表")
  231. # Bot.robot_download_sheet("bot", "music_album", duration)
  232. # Common.logger("bot").info("监控微群视频已下载表")
  233. # Bot.robot_download_sheet("bot", "weiqun", duration)
  234. Common.del_logs("bot")
  235. Common.logger("bot").info("休眠{}小时", 24-datetime.datetime.now().hour)
  236. time.sleep(3600 * (24-datetime.datetime.now().hour))
  237. else:
  238. pass
  239. if __name__ == "__main__":
  240. # Bot.robot_download_sheet("bot", "gzh", 10)
  241. Bot.main()