lierqiang 2 anni fa
parent
commit
904ffc2843
1 ha cambiato i file con 7 aggiunte e 27 eliminazioni
  1. 7 27
      kuaishou/kuaishou_follow/kuaishou_follow.py

+ 7 - 27
kuaishou/kuaishou_follow/kuaishou_follow.py

@@ -300,13 +300,6 @@ class KuaiShouFollow:
         if rule_dict_1 is None or rule_dict_2 is None:
             Common.logger(log_type, crawler).warning(f"rule_dict is None")
             return
-        # if download_cnt_1 >= int(
-        #         rule_dict_1['download_cnt'].replace("=", "")[-1].replace("<", "")[-1].replace(">", "")[
-        #             -1]) and download_cnt_2 >= int(
-        #     rule_dict_2['download_cnt'].replace("=", "")[-1].replace("<", "")[-1].replace(">", "")[-1]):
-        #     Common.logger(log_type, crawler).info(
-        #         f"规则1已下载{download_cnt_1}条视频,规则2已下载{download_cnt_2}条视频\n")
-        #     return
 
         url = "https://www.kuaishou.com/graphql"
         payload = json.dumps({
@@ -319,29 +312,16 @@ class KuaiShouFollow:
             "query": "fragment photoContent on PhotoEntity {\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  __typename\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
         })
         headers = {
-            'User-Agent':  get_random_user_agent('pc'),
-            'Accept': '*/*',
-            'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
-            'Accept-Encoding': 'gzip, deflate, br',
+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
             'Referer': 'https://www.kuaishou.com/profile/{}'.format(out_uid),
             'content-type': 'application/json',
             'Origin': 'https://www.kuaishou.com',
-            'Connection': 'keep-alive',
-            'Cookie': 'kpf=PC_WEB; clientid=3; did=web_f03efdd5416bbc78fdc7cad0757d333a; kpn=KUAISHOU_VISION',
-            'Sec-Fetch-Dest': 'empty',
-            'Sec-Fetch-Mode': 'cors',
-            'Sec-Fetch-Site': 'same-origin'
+            'Cookie': 'kpf=PC_WEB; clientid=3; did=web_d5284fed9a5802fe42bf9d1a396596ed; kpn=KUAISHOU_VISION',
+
         }
 
         try:
-            urllib3.disable_warnings()
-            s = requests.session()
-            # max_retries=3 重试3次
-            s.mount('http://', HTTPAdapter(max_retries=3))
-            s.mount('https://', HTTPAdapter(max_retries=3))
-            response = s.post(url=url, headers=headers, data=payload, proxies=Common.tunnel_proxies(), verify=False,
-                              timeout=10)
-            response.close()
+            response = requests.post(url=url, headers=headers, data=payload, proxies=Common.tunnel_proxies(), verify=False, timeout=10)
         except Exception as e:
             Common.logger(log_type, crawler).error(f"get_videoList:{e}\n")
             return
@@ -699,9 +679,9 @@ class KuaiShouFollow:
             except Exception as e:
                 Common.logger(log_type, crawler).info(f"用户:{user_name}, 抓取异常:{e}\n")
                 continue
-            # sleep_time = 120
-            # Common.logger(log_type, crawler).info(f"休眠{sleep_time}秒\n")
-            # time.sleep(sleep_time)
+            sleep_time = random.randint(1,3)
+            Common.logger(log_type, crawler).info(f"休眠{sleep_time}秒\n")
+            time.sleep(sleep_time)
 
 
 if __name__ == "__main__":