wangkun 2 роки тому
батько
коміт
dd5034e4f9

BIN
kuaishou/.DS_Store


+ 40 - 5
kuaishou/kuaishou_follow/insert_videos.py

@@ -22,13 +22,14 @@ class Insert:
             kuaishou_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
             # Common.logger(log_type, crawler).info(f"kuaishou_sheet:{kuaishou_sheet}")
             for i in range(1, len(kuaishou_sheet)):
-            # for i in range(1, 3):
+                # for i in range(1, 3):
                 if kuaishou_sheet[i][5] is None:
                     continue
                 if kuaishou_sheet[i][9] is None:
                     video_id = int(time.time())
                 else:
-                    video_id = kuaishou_sheet[i][9].replace("https://admin.piaoquantv.com/cms/post-detail/", "").replace("/info", "")
+                    video_id = kuaishou_sheet[i][9].replace("https://admin.piaoquantv.com/cms/post-detail/",
+                                                            "").replace("/info", "")
                 if video_id == "None":
                     continue
                 video_id = int(video_id)
@@ -47,7 +48,9 @@ class Insert:
                 share_cnt = int(kuaishou_sheet[i][13])
                 # collection_cnt = 0
                 comment_cnt = int(kuaishou_sheet[i][11])
-                crawler_rule = json.dumps({"play_cnt": 5000, "comment_cnt": 0, "like_cnt": 5000, "share_cnt": 1000, "duration": 40, "publish_time": 7, "video_width": 0, "video_height": 0})
+                crawler_rule = json.dumps(
+                    {"play_cnt": 5000, "comment_cnt": 0, "like_cnt": 5000, "share_cnt": 1000, "duration": 40,
+                     "publish_time": 7, "video_width": 0, "video_height": 0})
                 width = int(kuaishou_sheet[i][15].split("*")[0])
                 height = int(kuaishou_sheet[i][15].split("*")[1])
 
@@ -173,9 +176,41 @@ class Insert:
         print(type(out_uid))
         print(out_uid)
 
+    @classmethod
+    def random_cookies(cls):
+        kuaishou_server_web_st="ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABaRXtfRHlzKlQVj0Nm" \
+                               "_M1G2wrIN1p6g3UTwfqfez6rkLVj6mPNt3RBAsLkyemMpvTLerPw0h41Q0lowqcImvIv5dlSGDEpQoj" \
+                               "-VTAmOR2Suzm8vCRakG7XziAWyI0PXJKhvdXms" \
+                               "-9Giy" \
+                               "_4TnoniB49Oo3m7qXjXVBCzybcWS5BO90OLkhD30GYmGEnBBvkBI2oErJy3mNbafQdBQ6SxSUHhoS" \
+                               "-1Rj5" \
+                               "-IBBNoxoIePYcxZFs4oIiCvaT7sRn" \
+                               "-zrF7X2ClPhfNh6lgClmH8MUjXszUfY_TPLCgFMAE"
+        kuaishou_server_web_ph="1b62b98fc28bc23a42cd85240e1fd6025983"
+        kuaishou_server_web_st_1 = ''.join(random.sample(string.ascii_letters + string.digits, 53))
+        kuaishou_server_web_st_2 = ''.join(random.sample(string.ascii_letters + string.digits, 58))+''.join(random.sample(string.ascii_letters + string.digits, 20))
+        kuaishou_server_web_st_3 = ''.join(random.sample(string.ascii_letters + string.digits, 37))
+        kuaishou_server_web_st_4 = ''.join(random.sample(string.ascii_letters + string.digits, 4))
+        kuaishou_server_web_st_5 = ''.join(random.sample(string.ascii_letters + string.digits, 56))+''.join(random.sample(string.ascii_letters + string.digits, 20))
+        kuaishou_server_web_st_6 = ''.join(random.sample(string.ascii_letters + string.digits, 4))
+        kuaishou_server_web_st_7 = ''.join(random.sample(string.ascii_letters + string.digits, 28))
+        kuaishou_server_web_st_8 = ''.join(random.sample(string.ascii_letters + string.digits, 40))
+        kuaishou_server_web_st = f"{kuaishou_server_web_st_1}" \
+                                 f"_{kuaishou_server_web_st_2}" \
+                                 f"-{kuaishou_server_web_st_3}" \
+                                 f"-{kuaishou_server_web_st_4}" \
+                                 f"_{kuaishou_server_web_st_5}" \
+                                 f"-{kuaishou_server_web_st_6}" \
+                                 f"-{kuaishou_server_web_st_7}" \
+                                 f"-{kuaishou_server_web_st_8}"
+
+        kuaishou_server_web_ph = ''.join(random.sample(string.ascii_letters + string.digits, 36))
+        print(f"kuaishou_server_web_st:{kuaishou_server_web_st}")
+        print(f"kuaishou_server_web_ph:{kuaishou_server_web_ph}")
 
 if __name__ == "__main__":
-    Insert.insert_video_from_feishu_to_mysql("insert-prod", "kuaishou", "prod", "local")
+    # Insert.insert_video_from_feishu_to_mysql("insert-prod", "kuaishou", "prod", "local")
     # Insert.get_sheet()
     # Insert.random_out_uid()
-    pass
+    Insert.random_cookies()
+    pass

+ 60 - 47
kuaishou/kuaishou_follow/kuaishou_follow.py

@@ -119,29 +119,31 @@ class Follow:
             payload = json.dumps({
                 "operationName": "visionProfile",
                 "variables": {
-                    "userId": out_uid
+                    "userId": str(out_uid)
                 },
                 "query": "query visionProfile($userId: String) {\n  visionProfile(userId: $userId) {\n    result\n    hostName\n    userProfile {\n      ownerCount {\n        fan\n        photo\n        follow\n        photo_public\n        __typename\n      }\n      profile {\n        gender\n        user_name\n        user_id\n        headurl\n        user_text\n        user_profile_bg_url\n        __typename\n      }\n      isFollowing\n      __typename\n    }\n    __typename\n  }\n}\n"
             })
             headers = {
-                'Cookie': f'kpf=PC_WEB; clientid=3; did=web_e2901e1c5a13c60af81ba88bc7a3ee24; userId={"".join(str(random.choice(range(1, 10))) for _ in range(10))}; kpn=KUAISHOU_VISION; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABE4wGjnJauApJelOpl9Xqo8TVDAyra7Pvo0rZtVgMSZxgVuw4Z6P2UtHv_CHOk2Ne2el1hdE_McCptWs8tRdtYlhXFlVOu8rQX7CwexzOBudJAfB3lDN8LPc4o4qHNwqFxy5J5j_WzdllbqMmaDUK9yUxX6XA-JFezzq9jvBwtGv7_hzB7pFrUcH39z0EYOQaZo5lDl-pE09Gw7wr8NvlZRoSdWlbobCW6oJxuQLJTUr9oj_uIiBhkeb1psaIIc3VwfYQ1UfvobrXAP_WpnRabE_3UZUBOygFMAE; kuaishou.server.web_ph=2b981e2051d7130c977fd31df97fe6f5ad54',
+                # 'Cookie': f'kpf=PC_WEB; clientid=3; did=web_e2901e1c5a13c60af81ba88bc7a3ee24; userId={"".join(str(random.choice(range(1, 10))) for _ in range(10))}; kpn=KUAISHOU_VISION; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABE4wGjnJauApJelOpl9Xqo8TVDAyra7Pvo0rZtVgMSZxgVuw4Z6P2UtHv_CHOk2Ne2el1hdE_McCptWs8tRdtYlhXFlVOu8rQX7CwexzOBudJAfB3lDN8LPc4o4qHNwqFxy5J5j_WzdllbqMmaDUK9yUxX6XA-JFezzq9jvBwtGv7_hzB7pFrUcH39z0EYOQaZo5lDl-pE09Gw7wr8NvlZRoSdWlbobCW6oJxuQLJTUr9oj_uIiBhkeb1psaIIc3VwfYQ1UfvobrXAP_WpnRabE_3UZUBOygFMAE; kuaishou.server.web_ph=2b981e2051d7130c977fd31df97fe6f5ad54',
+                'Cookie': f'kpf=PC_WEB; clientid=3; did=web_e2901e1c5a13c60af81ba88bc7a3ee24; userId=3352428474; kpn=KUAISHOU_VISION; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABaRXtfRHlzKlQVj0Nm_M1G2wrIN1p6g3UTwfqfez6rkLVj6mPNt3RBAsLkyemMpvTLerPw0h41Q0lowqcImvIv5dlSGDEpQoj-VTAmOR2Suzm8vCRakG7XziAWyI0PXJKhvdXms-9Giy_4TnoniB49Oo3m7qXjXVBCzybcWS5BO90OLkhD30GYmGEnBBvkBI2oErJy3mNbafQdBQ6SxSUHhoS-1Rj5-IBBNoxoIePYcxZFs4oIiCvaT7sRn-zrF7X2ClPhfNh6lgClmH8MUjXszUfY_TPLCgFMAE; kuaishou.server.web_ph=1b62b98fc28bc23a42cd85240e1fd6025983',
                 'Referer': f'https://www.kuaishou.com/profile/{out_uid}',
                 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41',
                 'content-type': 'application/json',
-                # 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
-                # 'Cache-Control': 'no-cache',
-                # 'Connection': 'keep-alive',
-                # 'Origin': 'https://www.kuaishou.com',
-                # 'Pragma': 'no-cache',
-                # 'Sec-Fetch-Dest': 'empty',
-                # 'Sec-Fetch-Mode': 'cors',
-                # 'Sec-Fetch-Site': 'same-origin',
-                # 'accept': '*/*',
-                # 'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"',
-                # 'sec-ch-ua-mobile': '?0',
-                # 'sec-ch-ua-platform': '"macOS"'
+                'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
+                'Cache-Control': 'no-cache',
+                'Connection': 'keep-alive',
+                'Origin': 'https://www.kuaishou.com',
+                'Pragma': 'no-cache',
+                'Sec-Fetch-Dest': 'empty',
+                'Sec-Fetch-Mode': 'cors',
+                'Sec-Fetch-Site': 'same-origin',
+                'accept': '*/*',
+                'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"',
+                'sec-ch-ua-mobile': '?0',
+                'sec-ch-ua-platform': '"macOS"'
             }
             response = requests.post(url=url, headers=headers, data=payload)
+            Common.logger(log_type, crawler).info(f"get_out_user_info_response:{response.text}")
             if response.status_code != 200:
                 Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.text}\n")
                 return
@@ -156,33 +158,42 @@ class Follow:
                 return
             else:
                 userProfile = response.json()['data']['visionProfile']['userProfile']
-                out_user_dict = {}
-                if 'ownerCount' not in userProfile:
-                    out_user_dict['out_fans'] = 0
-                    out_user_dict['out_fans'] = 0
-                elif 'fan' not in userProfile['ownerCount']:
-                    out_user_dict['out_fans'] = 0
-                elif 'follow' not in userProfile['ownerCount']:
-                    out_user_dict['out_fans'] = 0
-                else:
+                Common.logger(log_type, crawler).info(f"userProfile:{userProfile}")
+
+                try:
                     out_fans_str = str(userProfile['ownerCount']['fan'])
+                except Exception:
+                    out_fans_str = "0"
+
+                try:
                     out_follow_str = str(userProfile['ownerCount']['follow'])
-                    if "万" in out_fans_str:
-                        out_user_dict['out_fans'] = int(float(out_fans_str.split("万")[0]) * 10000)
-                    else:
-                        out_user_dict['out_fans'] = int(out_fans_str.replace(",", ""))
-                    if "万" in out_follow_str:
-                        out_user_dict['out_follow'] = int(float(out_follow_str.split("万")[0]) * 10000)
-                    else:
-                        out_user_dict['out_follow'] = int(out_follow_str.replace(",", ""))
+                except Exception:
+                    out_follow_str = "0"
 
-                if 'profile' not in userProfile:
-                    out_user_dict['out_avatar_url'] = ''
-                elif 'headurl' not in userProfile['profile']:
-                    out_user_dict['out_avatar_url'] = ''
-                else:
-                    out_user_dict['out_avatar_url'] = userProfile['profile']['headurl']
+                try:
+                    out_avatar_url = userProfile['profile']['headurl']
+                except Exception:
+                    out_avatar_url = ""
+
+                Common.logger(log_type, crawler).info(f"out_fans_str:{out_fans_str}")
+                Common.logger(log_type, crawler).info(f"out_follow_str:{out_follow_str}")
+                Common.logger(log_type, crawler).info(f"out_avatar_url:{out_avatar_url}")
 
+                if "万" in out_fans_str:
+                    out_fans = int(float(out_fans_str.split("万")[0]) * 10000)
+                else:
+                    out_fans = int(out_fans_str.replace(",", ""))
+                if "万" in out_follow_str:
+                    out_follow = int(float(out_follow_str.split("万")[0]) * 10000)
+                else:
+                    out_follow = int(out_follow_str.replace(",", ""))
+
+                out_user_dict = {
+                    "out_fans": out_fans,
+                    "out_follow": out_follow,
+                    "out_avatar_url": out_avatar_url
+                }
+                Common.logger(log_type, crawler).info(f"out_user_dict:{out_user_dict}")
                 return out_user_dict
         except Exception as e:
             Common.logger(log_type, crawler).error(f"get_out_user_info:{e}\n")
@@ -197,8 +208,8 @@ class Follow:
                     Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
                     continue
                 our_user_list = []
-                for i in range(1, len(user_sheet)):
-                # for i in range(1, 2):
+                # for i in range(1, len(user_sheet)):
+                for i in range(1, 2):
                     out_uid = user_sheet[i][2]
                     user_name = user_sheet[i][3]
                     our_uid = user_sheet[i][6]
@@ -633,15 +644,17 @@ if __name__ == "__main__":
     # print(Follow.filter_words("follow", "kuaishou"))
     # print(Follow.random_title("follow", "kuaishou"))
     # Follow.get_user_list("follow", "kuaishou", "2OLxLr", "dev", "local")
-    Follow.get_videoList(log_type="follow",
-                         crawler="kuaishou",
-                         strategy="定向爬虫策略",
-                         our_uid="6282431",
-                         out_uid="3xws7ydsnmp5mgq",
-                         oss_endpoint="out",
-                         env="dev",
-                         machine="local")
+    # Follow.get_videoList(log_type="follow",
+    #                      crawler="kuaishou",
+    #                      strategy="定向爬虫策略",
+    #                      our_uid="6282431",
+    #                      out_uid="3xws7ydsnmp5mgq",
+    #                      oss_endpoint="out",
+    #                      env="dev",
+    #                      machine="local")
     # Follow.get_rule("follow", "kuaishou", 1)
     # Follow.get_rule("follow", "kuaishou", 2)
+    print(Follow.get_out_user_info("follow", "kuaishou", "3xgh4ja9be3wcaw"))
+    print(Follow.get_out_user_info("follow", "kuaishou", "3x5wgjhfc7tx8ue"))
 
     pass

BIN
kuaishou/logs/.DS_Store


+ 3 - 1
weixinzhishu/weixinzhishu_main/weixinzhishu_test.py

@@ -70,6 +70,7 @@ class Test:
 
         word_list = cls.get_words(log_type, crawler)
         for i in range(len(word_list)):
+            Common.logger(log_type, crawler).info(f"热词: {word_list[i]}")
             url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
             payload = json.dumps({
                 "openid": openid,
@@ -101,7 +102,7 @@ class Test:
                 # 写飞书
                 if word_list[i] in [x for y in Feishu.get_values_batch(log_type, crawler, "5011a2") for x in y]:
                     Common.logger(log_type, crawler).info("该词已存在")
-                    break
+                    continue
                 Feishu.insert_columns(log_type, crawler, "5011a2", "ROWS", 1, 2)
                 time.sleep(0.5)
                 Feishu.update_values(log_type, crawler, "5011a2", "F2:Z2",
@@ -112,6 +113,7 @@ class Test:
                 Common.logger(log_type, crawler).info("写入飞书成功\n")
             elif response.json()['code'] != 0:
                 Common.logger(log_type, crawler).warning(f"{word_wechat_score_dict}")
+                continue
             else:
                 time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
                 for x in range(len(time_index)):