wangkun 2 anni fa
parent
commit
af4c689447

+ 8 - 7
kuaishou/kuaishou_follow/kuaishou_follow.py

@@ -335,15 +335,16 @@ class Follow:
                     },
                     "query": "fragment photoContent on PhotoEntity {\n  id\n  duration\n  caption\n  originCaption\n  likeCount\n  viewCount\n  realLikeCount\n  coverUrl\n  photoUrl\n  photoH265Url\n  manifest\n  manifestH265\n  videoResource\n  coverUrls {\n    url\n    __typename\n  }\n  timestamp\n  expTag\n  animatedCoverUrl\n  distance\n  videoRatio\n  liked\n  stereoType\n  profileUserTopPhoto\n  musicBlocked\n  __typename\n}\n\nfragment feedContent on Feed {\n  type\n  author {\n    id\n    name\n    headerUrl\n    following\n    headerUrls {\n      url\n      __typename\n    }\n    __typename\n  }\n  photo {\n    ...photoContent\n    __typename\n  }\n  canAddComment\n  llsid\n  status\n  currentPcursor\n  tags {\n    type\n    name\n    __typename\n  }\n  __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n  visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n    result\n    llsid\n    webPageArea\n    feeds {\n      ...feedContent\n      __typename\n    }\n    hostName\n    pcursor\n    __typename\n  }\n}\n"
                 })
-                get_cookie = cls.get_cookie(log_type, crawler, out_uid, machine)
-                if get_cookie is None:
-                    cookie = 'kpf=PC_WEB; clientid=3; did=web_e2901e1c5a13c60af81ba88bc7a3ee24; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABszwASP0eGT2ym0NUdGf1PTi8_gDYrDms9otf5FKMUrx0V7MOTT1hhvpVJijmgku8KYtu3a6g3X1hZODMpX0ebZUip44txWPoY3VRqbsnBKEOs9Qkmx3uLaX33nq8KevOKLoO0cIE8nfwMJISe_BtCHr22cbJkofI0xfJXBt_ZgPqJIqWsUwdgwKzqRYn47ROkIqzeDfEjbxaZxiXy22ZhhoSlCobbmtjkvjpY9x730BPP_C5IiAl9EBaCTyvMw5IIcnkmcjMeWC8w6LKzeFMCNhcqrfkgCgFMAE; kuaishou.server.web_ph=19ae269a54f619c49def39ef5b54ae6d4353; kpn=KUAISHOU_VISION'
-                else:
-                    cookie = get_cookie
-                Common.logger(log_type, crawler).info(f"cookie:{cookie}")
+                # get_cookie = cls.get_cookie(log_type, crawler, out_uid, machine)
+                # if get_cookie is None:
+                #     cookie = 'kpf=PC_WEB; clientid=3; did=web_e2901e1c5a13c60af81ba88bc7a3ee24; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABszwASP0eGT2ym0NUdGf1PTi8_gDYrDms9otf5FKMUrx0V7MOTT1hhvpVJijmgku8KYtu3a6g3X1hZODMpX0ebZUip44txWPoY3VRqbsnBKEOs9Qkmx3uLaX33nq8KevOKLoO0cIE8nfwMJISe_BtCHr22cbJkofI0xfJXBt_ZgPqJIqWsUwdgwKzqRYn47ROkIqzeDfEjbxaZxiXy22ZhhoSlCobbmtjkvjpY9x730BPP_C5IiAl9EBaCTyvMw5IIcnkmcjMeWC8w6LKzeFMCNhcqrfkgCgFMAE; kuaishou.server.web_ph=19ae269a54f619c49def39ef5b54ae6d4353; kpn=KUAISHOU_VISION'
+                # else:
+                #     cookie = get_cookie
+                # Common.logger(log_type, crawler).info(f"cookie:{cookie}")
                 headers = {
                     # 'Cookie': f'kpf=PC_WEB; clientid=3; did=web_e2901e1c5a13c60af81ba88bc7a3ee24; userId={"".join(str(random.choice(range(1, 10))) for _ in range(10))}; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABOLgYYcIJ5ilxU46Jc-HLWThY8sppX3V0htC_KhSGOzAjP2hAOdegzfkZGAxS5rf6rCBS487FkxfYzLkV__I6b1lK16rDjvv94Kkoo4z7mgf8y8rFgWoqrp81JAWTtx00y-wrc1XXPf9RAVQoET70wWaeNG2r5bxtZEiNwpK_zPi0ZdUo0BW13dFKfVssAy2xKYh0UlJ8VSd_vBvyMKSxVBoSf061Kc3w5Nem7YdpVBmH39ceIiBpiGioLzbZqlHiSbwkH_LhUhNXz3o7LITj098KUytk2CgFMAE; kuaishou.server.web_ph=f1033957981996a7d50e849a9ded4cf4adff; kpn=KUAISHOU_VISION',
-                    'Cookie': cookie,
+                    # 'Cookie': cookie,
+                    'Cookie': 'kpf=PC_WEB; clientid=3; did=web_e2901e1c5a13c60af81ba88bc7a3ee24; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABszwASP0eGT2ym0NUdGf1PTi8_gDYrDms9otf5FKMUrx0V7MOTT1hhvpVJijmgku8KYtu3a6g3X1hZODMpX0ebZUip44txWPoY3VRqbsnBKEOs9Qkmx3uLaX33nq8KevOKLoO0cIE8nfwMJISe_BtCHr22cbJkofI0xfJXBt_ZgPqJIqWsUwdgwKzqRYn47ROkIqzeDfEjbxaZxiXy22ZhhoSlCobbmtjkvjpY9x730BPP_C5IiAl9EBaCTyvMw5IIcnkmcjMeWC8w6LKzeFMCNhcqrfkgCgFMAE; kuaishou.server.web_ph=19ae269a54f619c49def39ef5b54ae6d4353; kpn=KUAISHOU_VISION',
                     'Referer': f'https://www.kuaishou.com/profile/{out_uid}',
                     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41',
                     'content-type': 'application/json',

+ 1 - 0
scheduling/crawler_scheduling.py

@@ -88,6 +88,7 @@ class Scheduling:
     # 资源分配
     @classmethod
     def resource_allocation(cls, log_type, crawler, env, machine):
+        'sh ./main/main.sh ./xigua/xigua_main/run_xigua_follow.py --log_type="follow" --crawler="xigua" --strategy="定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" xigua/nohup.log'
         pass
 
     # 写入任务队列