|
@@ -1,5 +1,6 @@
|
|
# -*- coding: utf-8 -*-
|
|
# -*- coding: utf-8 -*-
|
|
# @Time: 2023/11/07
|
|
# @Time: 2023/11/07
|
|
|
|
+import datetime
|
|
import os
|
|
import os
|
|
import random
|
|
import random
|
|
import sys
|
|
import sys
|
|
@@ -8,7 +9,6 @@ from datetime import date, timedelta
|
|
import requests
|
|
import requests
|
|
import json
|
|
import json
|
|
import urllib3
|
|
import urllib3
|
|
-from pandas import datetime
|
|
|
|
|
|
|
|
from common.feishu import Feishu
|
|
from common.feishu import Feishu
|
|
|
|
|
|
@@ -124,12 +124,12 @@ class KuaishouauthorScheduling:
|
|
},
|
|
},
|
|
"query": "fragment photoContent on PhotoEntity {\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n"
|
|
"query": "fragment photoContent on PhotoEntity {\n id\n duration\n caption\n originCaption\n likeCount\n viewCount\n commentCount\n realLikeCount\n coverUrl\n photoUrl\n photoH265Url\n manifest\n manifestH265\n videoResource\n coverUrls {\n url\n __typename\n }\n timestamp\n expTag\n animatedCoverUrl\n distance\n videoRatio\n liked\n stereoType\n profileUserTopPhoto\n musicBlocked\n __typename\n}\n\nfragment feedContent on Feed {\n type\n author {\n id\n name\n headerUrl\n following\n headerUrls {\n url\n __typename\n }\n __typename\n }\n photo {\n ...photoContent\n __typename\n }\n canAddComment\n llsid\n status\n currentPcursor\n tags {\n type\n name\n __typename\n }\n __typename\n}\n\nquery visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n ...feedContent\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n"
|
|
})
|
|
})
|
|
- cookie = cls.get_cookie(log_type, crawler, env)["cookie"]
|
|
|
|
|
|
+ # cookie = cls.get_cookie(log_type, crawler, env)["cookie"]
|
|
headers = {
|
|
headers = {
|
|
'Accept': '*/*',
|
|
'Accept': '*/*',
|
|
'Content-Type': 'application/json',
|
|
'Content-Type': 'application/json',
|
|
'Origin': 'https://www.kuaishou.com',
|
|
'Origin': 'https://www.kuaishou.com',
|
|
- 'Cookie': cookie,
|
|
|
|
|
|
+ 'Cookie': "kpf=PC_WEB; clientid=3; did=web_9c6a04a4004fdb7c95a658a56ed275b6; userId=1299331643; kpn=KUAISHOU_VISION; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABCqafvOnP6y7cYgtjIf1e768WcRUF6GRpDnLIc4pmS75OjS3bmBA0dsvXr2gI-erIpf6QtiDmG9gJt8uCf6iphyIa71jOWlOucSzyJHjaqh1Mk6F_iJM3U1LERQ1SepV-7zggDAEkSkyx4e8IGw3Q4AFzt6CqODASMchfKZa504WyJQbsvW8fyCWQsAgkATxG26K7EAnVGjN2nH_eF2r1IRoSoJCKbxHIWXjzVWap_gGna5KjIiB5WaM9v01O0CeINbL0Mz0lGQlktiBTUUX26W5q-9Dc6igFMAE; kuaishou.server.web_ph=472bf55506210d228e60cb1489e8a127bb51",
|
|
'Content-Length': '1260',
|
|
'Content-Length': '1260',
|
|
'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
|
|
'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
|
|
'Host': 'www.kuaishou.com',
|
|
'Host': 'www.kuaishou.com',
|
|
@@ -143,7 +143,7 @@ class KuaishouauthorScheduling:
|
|
# max_retries=3 重试3次
|
|
# max_retries=3 重试3次
|
|
s.mount('http://', HTTPAdapter(max_retries=3))
|
|
s.mount('http://', HTTPAdapter(max_retries=3))
|
|
s.mount('https://', HTTPAdapter(max_retries=3))
|
|
s.mount('https://', HTTPAdapter(max_retries=3))
|
|
- response = s.post(url=url, headers=headers, data=payload, proxies=Common.tunnel_proxies(), verify=False, timeout=10)
|
|
|
|
|
|
+ response = s.post(url=url, headers=headers, data=payload, verify=False, timeout=10)
|
|
response.close()
|
|
response.close()
|
|
# Common.logger(log_type, crawler).info(f"response:{response.text}\n")
|
|
# Common.logger(log_type, crawler).info(f"response:{response.text}\n")
|
|
if response.status_code != 200:
|
|
if response.status_code != 200:
|
|
@@ -219,6 +219,7 @@ class KuaishouauthorScheduling:
|
|
message=f"已下载视频数:{cls.download_cnt}\n"
|
|
message=f"已下载视频数:{cls.download_cnt}\n"
|
|
)
|
|
)
|
|
return
|
|
return
|
|
|
|
+ user_name = feeds[i].get("author").get("name")
|
|
video_title = feeds[i].get("photo", {}).get("caption", random_title(log_type, crawler, env, text='title'))
|
|
video_title = feeds[i].get("photo", {}).get("caption", random_title(log_type, crawler, env, text='title'))
|
|
video_title = cls.video_title(log_type, crawler, env, video_title)
|
|
video_title = cls.video_title(log_type, crawler, env, video_title)
|
|
try:
|
|
try:
|
|
@@ -335,10 +336,10 @@ class KuaishouauthorScheduling:
|
|
video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
video_dict["strategy_type"] = log_type
|
|
video_dict["strategy_type"] = log_type
|
|
mq.send_msg(video_dict)
|
|
mq.send_msg(video_dict)
|
|
- current_time = datetime.now()
|
|
|
|
|
|
+ current_time = datetime.datetime.now()
|
|
timestamp = current_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
timestamp = current_time.strftime("%Y-%m-%d %H:%M:%S")
|
|
values = [[
|
|
values = [[
|
|
- link,
|
|
|
|
|
|
+ user_name,
|
|
video_id,
|
|
video_id,
|
|
video_title,
|
|
video_title,
|
|
publish_time_str,
|
|
publish_time_str,
|