|
@@ -354,9 +354,8 @@ class Follow:
|
|
# Common.logger(log_type, crawler).error(f"get_cookie:{e}\n")
|
|
# Common.logger(log_type, crawler).error(f"get_cookie:{e}\n")
|
|
|
|
|
|
@classmethod
|
|
@classmethod
|
|
- def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine):
|
|
|
|
|
|
+ def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine, pcursor=""):
|
|
download_cnt_1, download_cnt_2 = 0, 0
|
|
download_cnt_1, download_cnt_2 = 0, 0
|
|
- pcursor = ""
|
|
|
|
|
|
|
|
while True:
|
|
while True:
|
|
rule_dict_1 = cls.get_rule(log_type, crawler, 1)
|
|
rule_dict_1 = cls.get_rule(log_type, crawler, 1)
|
|
@@ -381,7 +380,7 @@ class Follow:
|
|
"operationName": "visionProfilePhotoList",
|
|
"operationName": "visionProfilePhotoList",
|
|
"variables": {
|
|
"variables": {
|
|
"userId": out_uid,
|
|
"userId": out_uid,
|
|
- "pcursor": "",
|
|
|
|
|
|
+ "pcursor": pcursor,
|
|
"page": "profile"
|
|
"page": "profile"
|
|
},
|
|
},
|
|
"query": "fragment photoContent on PhotoEntity {\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n"
|
|
"query": "fragment photoContent on PhotoEntity {\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n"
|
|
@@ -571,7 +570,7 @@ class Follow:
|
|
|
|
|
|
if video_title == "" or video_url == "":
|
|
if video_title == "" or video_url == "":
|
|
Common.logger(log_type, crawler).info("无效视频\n")
|
|
Common.logger(log_type, crawler).info("无效视频\n")
|
|
- break
|
|
|
|
|
|
+ continue
|
|
elif rule_1 is True:
|
|
elif rule_1 is True:
|
|
if download_cnt_1 < int(
|
|
if download_cnt_1 < int(
|
|
rule_dict_1['download_cnt'].replace("=", "")[-1].replace("<", "")[-1].replace(">",
|
|
rule_dict_1['download_cnt'].replace("=", "")[-1].replace("<", "")[-1].replace(">",
|
|
@@ -608,10 +607,10 @@ class Follow:
|
|
Common.logger(log_type, crawler).info("不满足下载规则\n")
|
|
Common.logger(log_type, crawler).info("不满足下载规则\n")
|
|
# Common.logger(log_type, crawler).info(f"feeds: {feeds}\n")
|
|
# Common.logger(log_type, crawler).info(f"feeds: {feeds}\n")
|
|
|
|
|
|
- # if pcursor == "no_more":
|
|
|
|
- # Common.logger(log_type, crawler).info("已经到底了,没有更多内容了\n")
|
|
|
|
- # return
|
|
|
|
- Common.logger(log_type, crawler).info(f"作者:{out_uid},第一页抓取完毕,退出\n")
|
|
|
|
|
|
+ if pcursor == "no_more":
|
|
|
|
+ Common.logger(log_type, crawler).info(f"作者,{out_uid},已经到底了,没有更多内容了\n")
|
|
|
|
+ return
|
|
|
|
+ cls.get_videoList(log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine, pcursor=pcursor)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
Common.logger(log_type, crawler).error(f"get_videoList:{e}\n")
|
|
Common.logger(log_type, crawler).error(f"get_videoList:{e}\n")
|
|
|
|
|
|
@@ -767,14 +766,14 @@ class Follow:
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
pass
|
|
pass
|
|
- # Follow.get_videoList(log_type="follow",
|
|
|
|
- # crawler="kuaishou",
|
|
|
|
- # strategy="定向爬虫策略",
|
|
|
|
- # our_uid="6282431",
|
|
|
|
- # out_uid="3x3wsqjsxu5j6de",
|
|
|
|
- # oss_endpoint="out",
|
|
|
|
- # env="dev",
|
|
|
|
- # machine="local")
|
|
|
|
|
|
+ Follow.get_videoList(log_type="follow",
|
|
|
|
+ crawler="kuaishou",
|
|
|
|
+ strategy="定向爬虫策略",
|
|
|
|
+ our_uid="54719554",
|
|
|
|
+ out_uid="3xv5xwkfm9y9n86",
|
|
|
|
+ oss_endpoint="out",
|
|
|
|
+ env="dev",
|
|
|
|
+ machine="local")
|
|
|
|
|
|
# print(Follow.get_out_user_info("follow", "kuaishou", "3xgh4ja9be3wcaw"))
|
|
# print(Follow.get_out_user_info("follow", "kuaishou", "3xgh4ja9be3wcaw"))
|
|
# print(Follow.get_out_user_info("follow", "kuaishou", "3x5wgjhfc7tx8ue"))
|
|
# print(Follow.get_out_user_info("follow", "kuaishou", "3x5wgjhfc7tx8ue"))
|