瀏覽代碼

Merge branch 'lierqiang'

lierqiang 2 年之前
父節點
當前提交
2a88bcb8bf
共有 1 個文件被更改,包括 15 次插入16 次删除
  1. 15 16
      kuaishou/kuaishou_follow/kuaishou_follow.py

+ 15 - 16
kuaishou/kuaishou_follow/kuaishou_follow.py

@@ -354,9 +354,8 @@ class Follow:
     #         Common.logger(log_type, crawler).error(f"get_cookie:{e}\n")
 
     @classmethod
-    def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine):
+    def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine, pcursor=""):
             download_cnt_1, download_cnt_2 = 0, 0
-            pcursor = ""
 
             while True:
                 rule_dict_1 = cls.get_rule(log_type, crawler, 1)
@@ -381,7 +380,7 @@ class Follow:
                     "operationName": "visionProfilePhotoList",
                     "variables": {
                         "userId": out_uid,
-                        "pcursor": "",
+                        "pcursor": pcursor,
                         "page": "profile"
                     },
                     "query": "fragment photoContent on PhotoEntity {\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  __typename\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
@@ -571,7 +570,7 @@ class Follow:
 
                         if video_title == "" or video_url == "":
                             Common.logger(log_type, crawler).info("无效视频\n")
-                            break
+                            continue
                         elif rule_1 is True:
                             if download_cnt_1 < int(
                                     rule_dict_1['download_cnt'].replace("=", "")[-1].replace("<", "")[-1].replace(">",
@@ -608,10 +607,10 @@ class Follow:
                             Common.logger(log_type, crawler).info("不满足下载规则\n")
                             # Common.logger(log_type, crawler).info(f"feeds: {feeds}\n")
 
-                    # if pcursor == "no_more":
-                    #     Common.logger(log_type, crawler).info("已经到底了,没有更多内容了\n")
-                    #     return
-                Common.logger(log_type, crawler).info(f"作者:{out_uid},第一页抓取完毕,退出\n")
+                    if pcursor == "no_more":
+                        Common.logger(log_type, crawler).info(f"作者,{out_uid},已经到底了,没有更多内容了\n")
+                        return
+                cls.get_videoList(log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine, pcursor=pcursor)
             except Exception as e:
                 Common.logger(log_type, crawler).error(f"get_videoList:{e}\n")
 
@@ -767,14 +766,14 @@ class Follow:
 
 if __name__ == "__main__":
     pass
-    # Follow.get_videoList(log_type="follow",
-    #                      crawler="kuaishou",
-    #                      strategy="定向爬虫策略",
-    #                      our_uid="6282431",
-    #                      out_uid="3x3wsqjsxu5j6de",
-    #                      oss_endpoint="out",
-    #                      env="dev",
-    #                      machine="local")
+    Follow.get_videoList(log_type="follow",
+                         crawler="kuaishou",
+                         strategy="定向爬虫策略",
+                         our_uid="54719554",
+                         out_uid="3xv5xwkfm9y9n86",
+                         oss_endpoint="out",
+                         env="dev",
+                         machine="local")
 
     # print(Follow.get_out_user_info("follow", "kuaishou", "3xgh4ja9be3wcaw"))
     # print(Follow.get_out_user_info("follow", "kuaishou", "3x5wgjhfc7tx8ue"))