wangkun vor 2 Jahren
Ursprung
Commit
ee3f5c47e0
4 geänderte Dateien mit 61 neuen und 35 gelöschten Zeilen
  1. 3 0
      README.md
  2. 52 33
      main/follow_list.py
  3. 2 2
      main/recommend_list.py
  4. 4 0
      main/run_follow.py

+ 3 - 0
README.md

@@ -17,6 +17,9 @@
 
 
 #### 需求
+2022/9/29
+1. 关注榜抓取 100 条/天
+2. 关注榜抓取 1 条/人/次
 
 2022/9/16
 1. 快手关注列表爬取门槛调整:

+ 52 - 33
main/follow_list.py

@@ -7,18 +7,17 @@ import sys
 import time
 import requests
 import urllib3
-
 sys.path.append(os.getcwd())
 from main.common import Common
 from main.feishu_lib import Feishu
 from main.publish import Publish
-
 proxies = {"http": None, "https": None}
 
 
 class Follow:
     # 已抓取视频数量
-    get_video_count = []
+    get_person_video_count = []
+    get_all_video_count = []
     # 小程序:关注列表翻页参数
     follow_pcursor = ""
     # 小程序:个人主页视频列表翻页参数
@@ -83,6 +82,23 @@ class Follow:
         else:
             return False
 
+    # 删除飞书关注人列表
+    @classmethod
+    def del_follow_user_from_feishu(cls, log_type):
+        try:
+            while True:
+                follow_sheet = Feishu.get_values_batch(log_type, "kuaishou", "2OLxLr")
+                if len(follow_sheet) == 1:
+                    Common.logger(log_type).info('删除完成\n')
+                    return
+                else:
+                    for i in range(1, len(follow_sheet)):
+                        Feishu.dimension_range(log_type, "kuaishou", "2OLxLr", 'ROWS', i+1, i+1)
+                        time.sleep(0.5)
+                        break
+        except Exception as e:
+            Common.logger(log_type).error('del_follow_user_from_feishu异常:{}', e)
+
     # 从小程序中,关注用户列表同步至云文档
     @classmethod
     def get_follow_users_to_feishu(cls, log_type):
@@ -178,7 +194,10 @@ class Follow:
                 for i in range(1, len(follow_sheet)):
                     uid = follow_sheet[i][0]
                     nick = follow_sheet[i][1]
-                    follow_dict[nick] = uid
+                    if uid is None or nick is None:
+                        pass
+                    else:
+                        follow_dict[nick] = uid
                 return follow_dict
         except Exception as e:
             Common.logger(log_type).error("从云文档获取关注用户列表异常:{}\n", e)
@@ -277,8 +296,8 @@ class Follow:
                 "content-type": "application/json",
                 "Accept-Encoding": "gzip,compress,br,deflate",
                 "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) '
-                              'AppleWebKit/605.1.15 (KHTML, like Gecko)'
-                              ' Mobile/15E148 MicroMessenger/8.0.25(0x1800192a) NetType/WIFI Language/zh_CN',
+                              'AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 '
+                              'MicroMessenger/8.0.26(0x18001a34) NetType/WIFI Language/zh_CN',
                 "Referer": str(cls.Referer),
             }
             params = {
@@ -287,7 +306,6 @@ class Follow:
             }
             cookies = {
                 "did": str(cls.did),
-                "preMinaVersion": "v3.114.0",
                 "sid": "kuaishou.wechat.app",
                 "appId": "ks_wechat_small_app_2",
                 "clientid": "13",
@@ -295,7 +313,7 @@ class Follow:
                 "kpn": "WECHAT_SMALL_APP",
                 "kpf": "OUTSIDE_IOS_H5",
                 "language": "zh_CN",
-                "smallAppVersion": "v3.129.0",
+                "smallAppVersion": "v3.131.0",
                 "mod": "iPhone(11<iPhone12%2C1>)",
                 "sys": "iOS%2014.7.1",
                 'wechatVersion': '8.0.26',
@@ -433,18 +451,11 @@ class Follow:
 
                     Common.logger(log_type).info("video_title:{}".format(video_title))
                     Common.logger(log_type).info("user_name:{}".format(user_name))
-                    Common.logger(log_type).info("video_id:{}".format(video_id))
                     Common.logger(log_type).info("video_play_cnt:{}".format(video_play_cnt))
                     Common.logger(log_type).info("video_like_cnt:{}".format(video_like_cnt))
-                    Common.logger(log_type).info("video_share_cnt:{}".format(video_share_cnt))
-                    # Common.logger(log_type).info("video_comment_cnt:{}".format(video_comment_cnt))
                     Common.logger(log_type).info("video_duration:{}秒".format(video_duration))
-                    # Common.logger(log_type).info("video_resolution:{}".format(video_resolution))
                     Common.logger(log_type).info("video_send_time:{}".format(
                         time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
-                    # Common.logger(log_type).info("user_id:{}".format(user_id))
-                    # Common.logger(log_type).info("head_url:{}".format(head_url))
-                    # Common.logger(log_type).info("cover_url:{}".format(cover_url))
                     Common.logger(log_type).info("video_url:{}".format(video_url))
 
                     # 过滤无效视频
@@ -502,15 +513,20 @@ class Follow:
                         time.sleep(1)
                         Feishu.update_values("follow", "kuaishou", "wW5cyb", "A2:T2", values)
                         Common.logger("follow").info("添加视频至follow_feeds成功:{}\n", video_title)
-                        cls.get_video_count.append(video_id)
+                        cls.get_person_video_count.append(video_id)
 
                         # 抓取足够多数量的视频
-                        if len(cls.get_video_count) >= 2:
-                            Common.logger(log_type).info('已抓取{}:{}条视频\n', user_name, len(cls.get_video_count))
+                        if len(cls.get_person_video_count) >= 1:
+                            Common.logger(log_type).info('已抓取{}:{}条视频\n', user_name, len(cls.get_person_video_count))
                             cls.person_pcursor = ""
-                            cls.get_video_count = []
+                            cls.get_person_video_count = []
                             return
-                if len(cls.get_video_count) < 2:
+                if r.json()["pcursor"] == 'no_more':
+                    Common.logger(log_type).info('没有更多作品了\n')
+                    return
+                elif len(cls.get_person_video_count) < 1:
+                    Common.logger(log_type).info('休眠 10-20 秒,翻页')
+                    time.sleep(random.randint(10, 20))
                     # 翻页
                     cls.person_pcursor = r.json()["pcursor"]
                     cls.get_user_videos(log_type, uid)
@@ -528,9 +544,16 @@ class Follow:
             else:
                 for k, v in user_list.items():
                     Common.logger(log_type).info('正在获取 {} 主页视频\n', k)
+                    cls.person_pcursor = ""
                     cls.get_user_videos(log_type, str(v))
                     cls.run_download_publish(log_type, env)
-                    time.sleep(random.randint(10, 30))
+                    if len(cls.get_all_video_count) >= 100:
+                        cls.get_all_video_count = []
+                        Common.logger(log_type).info('今日已抓取{}条视频\n', len(cls.get_all_video_count))
+                        return
+                    else:
+                        Common.logger(log_type).info('随机休眠 10-30 秒\n')
+                        time.sleep(random.randint(10, 30))
         except Exception as e:
             Common.logger(log_type).error('get_videos_from_follow异常:{}\n', e)
 
@@ -567,15 +590,6 @@ class Follow:
                     Feishu.dimension_range(log_type, "kuaishou", "wW5cyb", "ROWS", i + 1, i + 1)
                     Common.logger(log_type).warning("标题为空或空行,删除成功\n")
                     return
-                # # 下载规则
-                # elif cls.download_rule(
-                #         download_video_duration, download_video_resolution.split("*")[0],
-                #         download_video_resolution.split("*")[-1], download_video_play_cnt, download_video_like_cnt,
-                #         download_video_share_cnt) is False:
-                #     # 删除行或列,可选 ROWS、COLUMNS
-                #     Feishu.dimension_range(log_type, "kuaishou", "wW5cyb", "ROWS", i + 1, i + 1)
-                #     Common.logger(log_type).info("不满足下载规则:{},删除成功\n", download_video_title)
-                #     return
                 # 从已下载视频表中去重
                 elif str(download_video_id) in [j for m in Feishu.get_values_batch(
                         log_type, "kuaishou", "3cd128") for j in m]:
@@ -616,9 +630,8 @@ class Follow:
                     our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
                     Common.logger(log_type).info("视频上传完成:{}", download_video_title)
 
-                    # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3cd128
-                    Common.logger(log_type).info("保存视频ID至云文档:{}", download_video_title)
                     # 视频ID工作表,插入首行
+                    time.sleep(1)
                     Feishu.insert_columns(log_type, "kuaishou", "3cd128", "ROWS", 1, 2)
                     # 视频ID工作表,首行写入数据
                     upload_time = int(time.time())
@@ -642,6 +655,8 @@ class Follow:
                                str(download_video_url)]]
                     time.sleep(1)
                     Feishu.update_values(log_type, "kuaishou", "3cd128", "E2:Z2", values)
+                    cls.get_all_video_count.append(download_video_id)
+                    Common.logger(log_type).info("保存视频ID至已下载云文档成功:{}", download_video_title)
 
                     # 删除行或列,可选 ROWS、COLUMNS
                     Feishu.dimension_range(log_type, "kuaishou", "wW5cyb", "ROWS", i + 1, i + 1)
@@ -671,4 +686,8 @@ if __name__ == "__main__":
     # Follow.get_videos_from_person("follow", "2413743952")
     # Follow.get_follow_users_to_feishu('follow')
     # Follow.get_follow_users('follow')
-    Follow.get_videos_from_follow('follow', 'dev')
+    # Follow.get_videos_from_follow('follow', 'dev')
+    # Follow.del_follow_user_from_feishu('follow')
+    Follow.get_user_videos('follow', '240529022')
+
+    pass

+ 2 - 2
main/recommend_list.py

@@ -392,9 +392,8 @@ class KuaiShou:
                     our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
                     Common.logger(log_type).info("视频上传完成:{}", download_video_title)
 
-                    # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3cd128
-                    Common.logger(log_type).info("保存视频ID至云文档:{}", download_video_title)
                     # 视频ID工作表,插入首行
+                    time.sleep(1)
                     Feishu.insert_columns(log_type, "kuaishou", "3cd128", "ROWS", 1, 2)
                     # 视频ID工作表,首行写入数据
                     upload_time = int(time.time())
@@ -418,6 +417,7 @@ class KuaiShou:
                                str(download_video_url)]]
                     time.sleep(1)
                     Feishu.update_values(log_type, "kuaishou", "3cd128", "E2:Z2", values)
+                    Common.logger(log_type).info("视频保存至云文档成功:{}", download_video_title)
 
                     # 删除行或列,可选 ROWS、COLUMNS
                     time.sleep(1)

+ 4 - 0
main/run_follow.py

@@ -16,6 +16,10 @@ class Main:
     def main(cls, env):
         while True:
             if datetime.datetime.now().hour >= 1:
+                # Common.logger("follow").info("同步关注人列表\n")
+                # Follow.del_follow_user_from_feishu('follow')
+                # Follow.get_follow_users_to_feishu('follow')
+
                 Common.logger("follow").info("获取关注人列表\n")
                 Follow.get_follow_users('follow')