|
@@ -644,6 +644,95 @@ def get_comment_cnt(item_id):
|
|
|
return response.json().get("total_number", 0)
|
|
|
|
|
|
|
|
|
+def get_video_info(item_id, trace_id):
|
|
|
+ url = "https://www.ixigua.com/api/mixVideo/information?"
|
|
|
+ headers = {
|
|
|
+ "accept-encoding": "gzip, deflate",
|
|
|
+ "accept-language": "zh-CN,zh-Hans;q=0.9",
|
|
|
+ "user-agent": FakeUserAgent().random,
|
|
|
+ "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
|
|
|
+ }
|
|
|
+ params = {
|
|
|
+ "mixId": str(item_id),
|
|
|
+ "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC"
|
|
|
+ "NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
|
|
|
+ "X-Bogus": "DFSzswVupYTANCJOSBk0P53WxM-r",
|
|
|
+ "_signature": "_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px"
|
|
|
+ "fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94",
|
|
|
+ }
|
|
|
+ cookies = {
|
|
|
+ "ixigua-a-s": "1",
|
|
|
+ "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB"
|
|
|
+ "NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
|
|
|
+ "ttwid": "1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7"
|
|
|
+ "6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8",
|
|
|
+ "tt_scid": "QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3",
|
|
|
+ "MONITOR_WEB_ID": "0a49204a-7af5-4e96-95f0-f4bafb7450ad",
|
|
|
+ "__ac_nonce": "06304878000964fdad287",
|
|
|
+ "__ac_signature": "_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb"
|
|
|
+ "FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8",
|
|
|
+ "ttcid": "e56fabf6e85d4adf9e4d91902496a0e882",
|
|
|
+ "_tea_utm_cache_1300": "undefined",
|
|
|
+ "support_avif": "false",
|
|
|
+ "support_webp": "false",
|
|
|
+ "xiguavideopcwebid": "7134967546256016900",
|
|
|
+ "xiguavideopcwebid.sig": "xxRww5R1VEMJN_dQepHorEu_eAc",
|
|
|
+ }
|
|
|
+ response = requests.get(
|
|
|
+ url=url,
|
|
|
+ headers=headers,
|
|
|
+ params=params,
|
|
|
+ cookies=cookies,
|
|
|
+ proxies=tunnel_proxies(),
|
|
|
+ timeout=5,
|
|
|
+ )
|
|
|
+ if (
|
|
|
+ response.status_code != 200
|
|
|
+ or "data" not in response.json()
|
|
|
+ or response.json()["data"] == {}
|
|
|
+ ):
|
|
|
+ print("获取视频信息失败")
|
|
|
+ return None
|
|
|
+ else:
|
|
|
+ video_info = (
|
|
|
+ response.json()["data"]
|
|
|
+ .get("gidInformation", {})
|
|
|
+ .get("packerData", {})
|
|
|
+ .get("video", {})
|
|
|
+ )
|
|
|
+ if video_info == {}:
|
|
|
+ return None
|
|
|
+ video_detail = get_video_url(video_info)
|
|
|
+ video_dict = {
|
|
|
+ "video_title": video_info.get("title", ""),
|
|
|
+ "video_id": video_info.get("videoResource", {}).get("vid", ""),
|
|
|
+ "gid": str(item_id),
|
|
|
+ "play_cnt": int(video_info.get("video_watch_count", 0)),
|
|
|
+ "like_cnt": int(video_info.get("video_like_count", 0)),
|
|
|
+ "comment_cnt": int(get_comment_cnt(item_id)),
|
|
|
+ "share_cnt": 0,
|
|
|
+ "favorite_cnt": 0,
|
|
|
+ "duration": int(video_info.get("video_duration", 0)),
|
|
|
+ "video_width": int(video_detail["video_width"]),
|
|
|
+ "video_height": int(video_detail["video_height"]),
|
|
|
+ "publish_time_stamp": int(video_info.get("video_publish_time", 0)),
|
|
|
+ "publish_time_str": time.strftime(
|
|
|
+ "%Y-%m-%d %H:%M:%S",
|
|
|
+ time.localtime(int(video_info.get("video_publish_time", 0))),
|
|
|
+ ),
|
|
|
+ "user_name": video_info.get("user_info", {}).get("name", ""),
|
|
|
+ "user_id": str(video_info.get("user_info", {}).get("user_id", "")),
|
|
|
+ "avatar_url": str(
|
|
|
+ video_info.get("user_info", {}).get("avatar_url", "")
|
|
|
+ ),
|
|
|
+ "cover_url": video_info.get("poster_url", ""),
|
|
|
+ "audio_url": video_detail["audio_url"],
|
|
|
+ "video_url": video_detail["video_url"],
|
|
|
+ "session": f"xigua-search-{int(time.time())}",
|
|
|
+ }
|
|
|
+ return video_dict
|
|
|
+
|
|
|
+
|
|
|
class XiGuaAuthor:
|
|
|
def __init__(self, platform, mode, rule_dict, env, user_list):
|
|
|
self.platform = platform
|
|
@@ -657,7 +746,7 @@ class XiGuaAuthor:
|
|
|
def get_author_list(self):
|
|
|
# 每轮只抓取定量的数据,到达数量后自己退出
|
|
|
max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
|
|
|
- for user_dict in self.user_list[:1]:
|
|
|
+ for user_dict in self.user_list[1: 2]:
|
|
|
self.get_video_list(user_dict)
|
|
|
if self.download_count <= max_count:
|
|
|
self.get_video_list(user_dict)
|
|
@@ -720,7 +809,7 @@ class XiGuaAuthor:
|
|
|
print("无效视频")
|
|
|
return
|
|
|
# 获取视频信息
|
|
|
- video_dict = self.get_video_info(item_id=item_id, trace_id=trace_id)
|
|
|
+ video_dict = get_video_info(item_id=item_id, trace_id=trace_id)
|
|
|
video_dict["out_user_id"] = video_dict["user_id"]
|
|
|
video_dict["platform"] = self.platform
|
|
|
video_dict["strategy"] = self.mode
|
|
@@ -747,95 +836,6 @@ class XiGuaAuthor:
|
|
|
self.download_count += 1
|
|
|
print("成功发送 MQ 至 ETL")
|
|
|
|
|
|
- def get_video_info(self, item_id, trace_id):
|
|
|
- url = "https://www.ixigua.com/api/mixVideo/information?"
|
|
|
- headers = {
|
|
|
- "accept-encoding": "gzip, deflate",
|
|
|
- "accept-language": "zh-CN,zh-Hans;q=0.9",
|
|
|
- "user-agent": FakeUserAgent().random,
|
|
|
- "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
|
|
|
- }
|
|
|
- params = {
|
|
|
- "mixId": str(item_id),
|
|
|
- "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC"
|
|
|
- "NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
|
|
|
- "X-Bogus": "DFSzswVupYTANCJOSBk0P53WxM-r",
|
|
|
- "_signature": "_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px"
|
|
|
- "fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94",
|
|
|
- }
|
|
|
- cookies = {
|
|
|
- "ixigua-a-s": "1",
|
|
|
- "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB"
|
|
|
- "NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
|
|
|
- "ttwid": "1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7"
|
|
|
- "6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8",
|
|
|
- "tt_scid": "QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3",
|
|
|
- "MONITOR_WEB_ID": "0a49204a-7af5-4e96-95f0-f4bafb7450ad",
|
|
|
- "__ac_nonce": "06304878000964fdad287",
|
|
|
- "__ac_signature": "_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb"
|
|
|
- "FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8",
|
|
|
- "ttcid": "e56fabf6e85d4adf9e4d91902496a0e882",
|
|
|
- "_tea_utm_cache_1300": "undefined",
|
|
|
- "support_avif": "false",
|
|
|
- "support_webp": "false",
|
|
|
- "xiguavideopcwebid": "7134967546256016900",
|
|
|
- "xiguavideopcwebid.sig": "xxRww5R1VEMJN_dQepHorEu_eAc",
|
|
|
- }
|
|
|
- response = requests.get(
|
|
|
- url=url,
|
|
|
- headers=headers,
|
|
|
- params=params,
|
|
|
- cookies=cookies,
|
|
|
- proxies=tunnel_proxies(),
|
|
|
- timeout=5,
|
|
|
- )
|
|
|
- if (
|
|
|
- response.status_code != 200
|
|
|
- or "data" not in response.json()
|
|
|
- or response.json()["data"] == {}
|
|
|
- ):
|
|
|
- print("获取视频信息失败")
|
|
|
- return None
|
|
|
- else:
|
|
|
- video_info = (
|
|
|
- response.json()["data"]
|
|
|
- .get("gidInformation", {})
|
|
|
- .get("packerData", {})
|
|
|
- .get("video", {})
|
|
|
- )
|
|
|
- if video_info == {}:
|
|
|
- return None
|
|
|
- video_detail = get_video_url(video_info)
|
|
|
-
|
|
|
- video_dict = {
|
|
|
- "video_title": video_info.get("title", ""),
|
|
|
- "video_id": video_info.get("videoResource", {}).get("vid", ""),
|
|
|
- "gid": str(item_id),
|
|
|
- "play_cnt": int(video_info.get("video_watch_count", 0)),
|
|
|
- "like_cnt": int(video_info.get("video_like_count", 0)),
|
|
|
- "comment_cnt": int(get_comment_cnt(item_id)),
|
|
|
- "share_cnt": 0,
|
|
|
- "favorite_cnt": 0,
|
|
|
- "duration": int(video_info.get("video_duration", 0)),
|
|
|
- "video_width": int(video_detail["video_width"]),
|
|
|
- "video_height": int(video_detail["video_height"]),
|
|
|
- "publish_time_stamp": int(video_info.get("video_publish_time", 0)),
|
|
|
- "publish_time_str": time.strftime(
|
|
|
- "%Y-%m-%d %H:%M:%S",
|
|
|
- time.localtime(int(video_info.get("video_publish_time", 0))),
|
|
|
- ),
|
|
|
- "user_name": video_info.get("user_info", {}).get("name", ""),
|
|
|
- "user_id": str(video_info.get("user_info", {}).get("user_id", "")),
|
|
|
- "avatar_url": str(
|
|
|
- video_info.get("user_info", {}).get("avatar_url", "")
|
|
|
- ),
|
|
|
- "cover_url": video_info.get("poster_url", ""),
|
|
|
- "audio_url": video_detail["audio_url"],
|
|
|
- "video_url": video_detail["video_url"],
|
|
|
- "session": f"xigua-search-{int(time.time())}",
|
|
|
- }
|
|
|
- return video_dict
|
|
|
-
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
user_list = [
|
|
@@ -864,7 +864,7 @@ if __name__ == "__main__":
|
|
|
"mode": "author",
|
|
|
},
|
|
|
]
|
|
|
- rule = {'period': {'min': 30, 'max': 30}, 'duration': {'min': 20, 'max': 0}, 'play_cnt': {'min': 100000, 'max': 0}}
|
|
|
+ rule = {'period': {'min': 30, 'max': 30}, 'duration': {'min': 20, 'max': 0}, 'play_cnt': {'min': 100, 'max': 0}}
|
|
|
XGA = XiGuaAuthor(
|
|
|
platform="xigua",
|
|
|
mode="author",
|
|
@@ -873,3 +873,5 @@ if __name__ == "__main__":
|
|
|
user_list=user_list
|
|
|
)
|
|
|
XGA.get_author_list()
|
|
|
+ # item_id = "v0201ag10000cl4d7djc77u73eftvrcg"
|
|
|
+ # get_video_info(item_id=item_id, trace_id="ljh")
|