lierqiang 2 роки тому
батько
коміт
8ce2746a02
1 змінених файлів з 33 додано та 37 видалено
  1. 33 37
      kuaishou/kuaishou_follow/kuaishou_follow.py

+ 33 - 37
kuaishou/kuaishou_follow/kuaishou_follow.py

@@ -122,6 +122,7 @@ class KuaiShouFollow:
     def get_out_user_info(cls, log_type, crawler, out_uid):
         try:
             url = "https://www.kuaishou.com/graphql"
+
             payload = json.dumps({
                 "operationName": "visionProfile",
                 "variables": {
@@ -130,20 +131,17 @@ class KuaiShouFollow:
                 "query": "query visionProfile($userId: String) {\n  visionProfile(userId: $userId) {\n    result\n    hostName\n    userProfile {\n      ownerCount {\n        fan\n        photo\n        follow\n        photo_public\n        __typename\n      }\n      profile {\n        gender\n        user_name\n        user_id\n        headurl\n        user_text\n        user_profile_bg_url\n        __typename\n      }\n      isFollowing\n      __typename\n    }\n    __typename\n  }\n}\n"
             })
             headers = {
-                'Accept-Language': 'zh-CN,zh;q=0.9',
-                'Connection': 'keep-alive',
-                'Cookie': 'kpf=PC_WEB; clientid=3; did=web_3f264336f6a6c191cd36fb15e87ab708; userId=1321650328; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqAB7bjkVapctEeh2Vh6_w9YmSXFnxBUvy3dAIZZmpyd9sAJciYB48W_Ch7rN3r1mhKtJCtBGFqMX-cTy1RoGLLmaKpwQTvxjew1nsH6JacRsJf6qB0N273lzzmGeXQPxb-MVwqtoyvxL8bLJ0DcldtHb1Q36U4efpRFse9WYLL9PtlsEprI6xORB6a009HLlRKiKMzma5s_nhdwr5xt1QwnphoStVKEb-xUGkLo9u0A7O3lj4AGIiDTIZw_4BbSmp0oOBtTtItbuywLAU3zSIErl1q6F5AW8SgFMAE; kuaishou.server.web_ph=0efe610176ceb53a303c1256cd0f6ff0325e; kpn=KUAISHOU_VISION',
+                'Accept': '*/*',
+                'Content-Type': 'application/json',
                 'Origin': 'https://www.kuaishou.com',
+                'Cookie': 'did=web_b16e6ef862b22f8276ebec7ac73b5a72; clientid=3; kpf=PC_WEB; kpn=KUAISHOU_VISION',
+                'Content-Length': '552',
+                'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
+                'Host': 'www.kuaishou.com',
+                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
                 'Referer': 'https://www.kuaishou.com/profile/{}'.format(out_uid),
-                'Sec-Fetch-Dest': 'empty',
-                'Sec-Fetch-Mode': 'cors',
-                'Sec-Fetch-Site': 'same-origin',
-                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
-                'accept': '*/*',
-                'content-type': 'application/json',
-                'sec-ch-ua': '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
-                'sec-ch-ua-mobile': '?0',
-                'sec-ch-ua-platform': '"macOS"'
+                'Accept-Encoding': 'gzip, deflate, br',
+                'Connection': 'keep-alive'
             }
             urllib3.disable_warnings()
             s = requests.session()
@@ -359,14 +357,13 @@ class KuaiShouFollow:
     def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine, pcursor=""):
         download_cnt_1, download_cnt_2 = 0, 0
 
-        while True:
-            rule_dict_1 = cls.get_rule(log_type, crawler, 1)
-            rule_dict_2 = cls.get_rule(log_type, crawler, 2)
-            if rule_dict_1 is None or rule_dict_2 is None:
-                Common.logger(log_type, crawler).warning(f"rule_dict is None, 10秒后重试")
-                time.sleep(10)
-            else:
-                break
+        rule_dict_1 = cls.get_rule(log_type, crawler, 1)
+        rule_dict_2 = cls.get_rule(log_type, crawler, 2)
+        if rule_dict_1 is None or rule_dict_2 is None:
+            Common.logger(log_type, crawler).warning(f"rule_dict is None, 10秒后重试")
+            time.sleep(10)
+        else:
+            return
 
         try:
             if download_cnt_1 >= int(
@@ -378,29 +375,28 @@ class KuaiShouFollow:
                 return
 
             url = "https://www.kuaishou.com/graphql"
+
             payload = json.dumps({
                 "operationName": "visionProfilePhotoList",
                 "variables": {
                     "userId": out_uid,
-                    "pcursor": pcursor,
+                    "pcursor": "",
                     "page": "profile"
                 },
                 "query": "fragment photoContent on PhotoEntity {\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  __typename\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
             })
-
             headers = {
-                'User-Agent': get_random_user_agent('pc'),
                 'Accept': '*/*',
-                'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
-                'Accept-Encoding': 'gzip, deflate, br',
-                'Referer': f'https://www.kuaishou.com/profile/{out_uid}',
-                'content-type': 'application/json',
+                'Content-Type': 'application/json',
                 'Origin': 'https://www.kuaishou.com',
-                'Connection': 'keep-alive',
-                'Cookie': 'kpf=PC_WEB; clientid=3; did=web_3f264336f6a6c191cd36fb15e87ab708; kpn=KUAISHOU_VISION',
-                'Sec-Fetch-Dest': 'empty',
-                'Sec-Fetch-Mode': 'cors',
-                'Sec-Fetch-Site': 'same-origin'
+                'Cookie': 'did=web_b16e6ef862b22f8276ebec7ac73b5a72; clientid=3; kpf=PC_WEB; kpn=KUAISHOU_VISION',
+                'Content-Length': '1244',
+                'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
+                'Host': 'www.kuaishou.com',
+                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
+                'Referer': 'https://www.kuaishou.com/profile/{}'.format(out_uid),
+                'Accept-Encoding': 'gzip, deflate, br',
+                'Connection': 'keep-alive'
             }
             urllib3.disable_warnings()
             s = requests.session()
@@ -609,11 +605,11 @@ class KuaiShouFollow:
                         Common.logger(log_type, crawler).info("不满足下载规则\n")
                         # Common.logger(log_type, crawler).info(f"feeds: {feeds}\n")
 
-                if pcursor == "no_more":
-                    Common.logger(log_type, crawler).info(f"作者,{out_uid},已经到底了,没有更多内容了\n")
-                    return
-            cls.get_videoList(log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine,
-                              pcursor=pcursor)
+                # if pcursor == "no_more":
+                #     Common.logger(log_type, crawler).info(f"作者,{out_uid},已经到底了,没有更多内容了\n")
+                #     return
+            # cls.get_videoList(log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine,
+            #                   pcursor=pcursor)
         except Exception as e:
             Common.logger(log_type, crawler).error(f"get_videoList:{e}\n")