wangkun hace 2 años
padre
commit
e522376300
Se han modificado 2 ficheros con 82 adiciones y 87 borrados
  1. 61 71
      main/follow_list.py
  2. 21 16
      main/run_follow.py

+ 61 - 71
main/follow_list.py

@@ -17,6 +17,8 @@ proxies = {"http": None, "https": None}
 
 
 class Follow:
+    # 已抓取视频数量
+    get_video_count = []
     # 小程序:关注列表翻页参数
     follow_pcursor = ""
     # 小程序:个人主页视频列表翻页参数
@@ -83,7 +85,7 @@ class Follow:
 
     # 从小程序中,关注用户列表同步至云文档
     @classmethod
-    def get_sub_or_fans_list(cls, log_type):
+    def get_follow_users_to_feishu(cls, log_type):
         try:
             follow_list = []
             follow_sheet = Feishu.get_values_batch(log_type, "kuaishou", "2OLxLr")
@@ -158,11 +160,11 @@ class Follow:
             cls.follow_pcursor = r.json()["pcursor"]
             # 翻页,直至到底了
             if cls.follow_pcursor != "no_more":
-                cls.get_sub_or_fans_list(log_type)
+                cls.get_follow_users_to_feishu(log_type)
             else:
                 Common.logger(log_type).info("从小程序中同步关注用户至云文档完成\n")
         except Exception as e:
-            Common.logger(log_type).error("从小程序中,关注用户列表同步至云文档异常:{}", e)
+            Common.logger(log_type).error("从小程序中,关注用户列表同步至云文档异常:{}\n", e)
 
     # 从云文档获取关注用户列表
     @classmethod
@@ -172,17 +174,14 @@ class Follow:
             if len(follow_sheet) == 1:
                 Common.logger(log_type).info("暂无关注用户")
             else:
-                follow_list = []
-                nick_list = []
+                follow_dict = {}
                 for i in range(1, len(follow_sheet)):
                     uid = follow_sheet[i][0]
                     nick = follow_sheet[i][1]
-                    nick_list.append(nick)
-                    follow_list.append(uid)
-                Common.logger(log_type).info("关注用户列表:{}", nick_list)
-                return follow_list
+                    follow_dict[nick] = uid
+                return follow_dict
         except Exception as e:
-            Common.logger(log_type).error("从云文档获取关注用户列表异常:{}", e)
+            Common.logger(log_type).error("从云文档获取关注用户列表异常:{}\n", e)
 
     # 从云文档获取取消关注用户列表
     @classmethod
@@ -270,7 +269,7 @@ class Follow:
 
     # 获取个人主页视频
     @classmethod
-    def get_videos_from_person(cls, log_type, uid):
+    def get_user_videos(cls, log_type, uid):
         try:
             time.sleep(1)
             url = "https://wxmini-api.uyouqu.com/rest/wd/wechatApp/feed/profile?"
@@ -322,10 +321,11 @@ class Follow:
             if "feeds" not in r.json():
                 # Feishu.bot(log_type, "follow:get_videos_from_person:"+r.text)
                 Common.logger(log_type).warning("response:{}", r.text)
+            elif r.json()["feeds"] == 0:
+                Common.logger(log_type).warning("用户主页无视频\n")
+                return
             else:
                 feeds = r.json()["feeds"]
-                if len(feeds) == 0:
-                    Common.logger(log_type).warning("用户主页无视频\n")
                 for i in range(len(feeds)):
                     # 视频标题过滤话题及处理特殊字符
                     kuaishou_title = feeds[i]["caption"]
@@ -458,13 +458,16 @@ class Follow:
                             or user_id == "0" \
                             or video_title == "":
                         Common.logger(log_type).info("无效视频\n")
-                    # 视频发布时间 <= 7 天
-                    elif int(time.time()) - int(int(video_send_time) / 1000) > 604800:
-                        Common.logger("follow").info("发布时间:{},超过7天\n", time.strftime(
-                            "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
-                        cls.person_pcursor = ""
-                        break
+                    # # 视频发布时间 <= 7 天
+                    # elif int(time.time()) - int(int(video_send_time) / 1000) > 604800:
+                    #     Common.logger("follow").info("发布时间:{},超过7天\n", time.strftime(
+                    #         "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
+                    #     cls.person_pcursor = ""
+                    #     break
                     # 判断敏感词
+                    elif cls.download_rule(video_duration, video_width, video_height, video_play_cnt,
+                                           video_like_cnt, video_share_cnt) is False:
+                        Common.logger(log_type).info("不满足下载规则\n".format(kuaishou_title))
                     elif any(word if word in kuaishou_title else False for word in cls.sensitive_words()) is True:
                         Common.logger(log_type).info("视频已中敏感词:{}\n".format(kuaishou_title))
                     # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3cd128
@@ -499,34 +502,37 @@ class Follow:
                         time.sleep(1)
                         Feishu.update_values("follow", "kuaishou", "wW5cyb", "A2:P2", values)
                         Common.logger("follow").info("添加视频至follow_feeds成功:{}\n", video_title)
-
-                # 翻页
-                cls.person_pcursor = r.json()["pcursor"]
-                # 视频发布时间 <= 7 天
-                if int(time.time()) - int(cls.send_time) <= 604800:
-                    cls.get_videos_from_person(log_type, uid)
-                else:
-                    cls.person_pcursor = ""
-                    return
+                        cls.get_video_count.append(video_id)
+
+                        # 抓取足够多数量的视频
+                        if len(cls.get_video_count) >= 2:
+                            Common.logger(log_type).info('已抓取{}:{}条视频\n', user_name, len(cls.get_video_count))
+                            cls.person_pcursor = ""
+                            cls.get_video_count = []
+                            return
+                if len(cls.get_video_count) < 2:
+                    # 翻页
+                    cls.person_pcursor = r.json()["pcursor"]
+                    cls.get_user_videos(log_type, uid)
 
         except Exception as e:
-            # Feishu.bot(log_type, "follow:get_videos_from_person异常" + format(e))
             Common.logger(log_type).error("get_videos_from_person异常:{}\n", e)
 
     # 获取所有关注列表的用户视频
     @classmethod
-    def get_videos_from_follow(cls, log_type):
+    def get_videos_from_follow(cls, log_type, env):
         try:
-            # 已关注的用户列表 uids
-            uid_sheet = Feishu.get_values_batch(log_type, "kuaishou", "2OLxLr")
-            for i in range(1, len(uid_sheet)):
-                uid = uid_sheet[i][0]
-                nick = uid_sheet[i][1]
-                Common.logger(log_type).info("获取用户:{}主页视频\n", nick)
-                cls.get_videos_from_person(log_type, uid)
-                time.sleep(random.randint(3, 5))
+            user_list = cls.get_follow_users(log_type)
+            if len(user_list) == 0:
+                Common.logger(log_type).warning('用户ID列表为空\n')
+            else:
+                for k, v in user_list.items():
+                    Common.logger(log_type).info('正在获取 {} 主页视频\n', k)
+                    cls.get_user_videos(log_type, str(v))
+                    cls.run_download_publish(log_type, env)
+                    time.sleep(random.randint(10, 30))
         except Exception as e:
-            Common.logger(log_type).error("get_videos_from_follow异常:{}", e)
+            Common.logger(log_type).error('get_videos_from_follow异常:{}\n', e)
 
     # 下载/上传
     @classmethod
@@ -552,23 +558,6 @@ class Follow:
 
                 Common.logger(log_type).info("正在判断第{}行,视频:{}", i + 1, download_video_title)
 
-                # Common.logger(log_type).info("download_video_id:{}", download_video_id)
-                # Common.logger(log_type).info("download_video_title:{}", download_video_title)
-                # Common.logger(log_type).info("download_video_play_cnt:{}", download_video_play_cnt)
-                # Common.logger(log_type).info("download_video_comment_cnt:{}", download_video_comment_cnt)
-                # Common.logger(log_type).info("download_video_like_cnt:{}", download_video_like_cnt)
-                # Common.logger(log_type).info("download_video_share_cnt:{}", download_video_share_cnt)
-                # Common.logger(log_type).info("download_video_duration:{}", download_video_duration)
-                # Common.logger(log_type).info("download_video_resolution:{}", download_video_resolution)
-                # Common.logger(log_type).info("download_video_width:{}", download_video_width)
-                # Common.logger(log_type).info("download_video_height:{}", download_video_height)
-                # Common.logger(log_type).info("download_video_send_time:{}", download_video_send_time)
-                # Common.logger(log_type).info("download_user_name:{}", download_user_name)
-                # Common.logger(log_type).info("download_user_id:{}", download_user_id)
-                # Common.logger(log_type).info("download_head_url:{}", download_head_url)
-                # Common.logger(log_type).info("download_cover_url:{}", download_cover_url)
-                # Common.logger(log_type).info("download_video_url:{}", download_video_url)
-
                 # 过滤空行及空标题视频
                 if download_video_id is None \
                         or download_video_id == "" \
@@ -578,15 +567,15 @@ class Follow:
                     Feishu.dimension_range(log_type, "kuaishou", "wW5cyb", "ROWS", i + 1, i + 1)
                     Common.logger(log_type).warning("标题为空或空行,删除成功\n")
                     return
-                # 下载规则
-                elif cls.download_rule(
-                        download_video_duration, download_video_resolution.split("*")[0],
-                        download_video_resolution.split("*")[-1], download_video_play_cnt, download_video_like_cnt,
-                        download_video_share_cnt) is False:
-                    # 删除行或列,可选 ROWS、COLUMNS
-                    Feishu.dimension_range(log_type, "kuaishou", "wW5cyb", "ROWS", i + 1, i + 1)
-                    Common.logger(log_type).info("不满足下载规则:{},删除成功\n", download_video_title)
-                    return
+                # # 下载规则
+                # elif cls.download_rule(
+                #         download_video_duration, download_video_resolution.split("*")[0],
+                #         download_video_resolution.split("*")[-1], download_video_play_cnt, download_video_like_cnt,
+                #         download_video_share_cnt) is False:
+                #     # 删除行或列,可选 ROWS、COLUMNS
+                #     Feishu.dimension_range(log_type, "kuaishou", "wW5cyb", "ROWS", i + 1, i + 1)
+                #     Common.logger(log_type).info("不满足下载规则:{},删除成功\n", download_video_title)
+                #     return
                 # 从已下载视频表中去重
                 elif download_video_id in [j for m in Feishu.get_values_batch(
                         log_type, "kuaishou", "3cd128") for j in m]:
@@ -659,8 +648,7 @@ class Follow:
                     Common.logger(log_type).info("视频:{},下载/上传成功\n", download_video_title)
                     return
         except Exception as e:
-            # Feishu.bot(log_type, "follow:download_publish异常" + format(e))
-            Common.logger(log_type).error("download_publish异常:{}", e)
+            Common.logger(log_type).error("download_publish异常:{}\n", e)
 
     # 执行下载/上传
     @classmethod
@@ -669,15 +657,17 @@ class Follow:
             while True:
                 follow_feeds_sheet = Feishu.get_values_batch(log_type, "kuaishou", "wW5cyb")
                 if len(follow_feeds_sheet) == 1:
-                    Common.logger(log_type).info("下载/上传完成")
+                    Common.logger(log_type).info("下载/上传完成\n")
                     break
                 else:
                     cls.download_publish(log_type, env)
         except Exception as e:
-            Common.logger(log_type).error("run_download_publish异常:{}", e)
+            Common.logger(log_type).error("run_download_publish异常:{}\n", e)
 
 
 if __name__ == "__main__":
-    # Follow.get_videos_from_follow("follow")
     # Follow.follow_unfollow("follow", "follow", "687090964")
-    Follow.get_videos_from_person("follow", "2413743952")
+    # Follow.get_videos_from_person("follow", "2413743952")
+    # Follow.get_follow_users_to_feishu('follow')
+    # Follow.get_follow_users('follow')
+    Follow.get_videos_from_follow('follow')

+ 21 - 16
main/run_follow.py

@@ -1,7 +1,9 @@
 # -*- coding: utf-8 -*-
 # @Author: wangkun
 # @Time: 2022/7/11
+import datetime
 import os
+import random
 import sys
 import time
 sys.path.append(os.getcwd())
@@ -11,23 +13,26 @@ from main.follow_list import Follow
 
 class Main:
     @classmethod
-    def main(cls):
+    def main(cls, env):
         while True:
-            Common.logger("follow").info("开始抓取个人主页视频\n")
-            # 获取所有关注列表的用户视频
-            Follow.get_videos_from_follow("follow")
-            # 下载/上传
-            Common.logger("follow").info("开始下载/上传视频")
-            Follow.run_download_publish("follow", "prod")
-            # 清除日志
-            Common.del_logs("follow")
-            Common.logger("follow").info("休眠1小时\n")
-            Follow.follow_pcursor = ""
-            Follow.person_pcursor = ""
-            Follow.send_time = 0
-            time.sleep(3600)
+            if datetime.datetime.now().hour >= 1:
+                Common.logger("follow").info("获取关注人列表\n")
+                Follow.get_follow_users('follow')
+
+                Common.logger("follow").info("开始抓取个人主页视频\n")
+                Follow.get_videos_from_follow("follow", env)
+
+                # 清除日志
+                Common.del_logs("follow")
+
+                Common.logger("follow").info("休眠{}小时\n", 24-datetime.datetime.now().hour)
+                Follow.follow_pcursor = ""
+                Follow.person_pcursor = ""
+                Follow.send_time = 0
+                time.sleep(60*(24-datetime.datetime.now().hour))
+            else:
+                pass
 
 
 if __name__ == "__main__":
-    main = Main()
-    main.main()
+    Main.main('prod')