2 năm trước cách đây · 42d33c16c7
--- a/xigua/xigua_author/xigua_author.py
+++ b/xigua/xigua_author/xigua_author.py
@@ -655,7 +655,7 @@ class XiGuaAuthor:
 
				     def get_author_list(self):
			
 
				         # 每轮只抓取定量的数据，到达数量后自己退出
			
 
				         max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
			
 
				-        for user_dict in self.user_list[:1]:
			
 
				+        for user_dict in self.user_list:
			
 
				             if self.download_count <= max_count:
			
 
				                 self.get_video_list(user_dict)
			
 
				                 time.sleep(random.randint(1, 15))
			
--- a/xigua/xigua_author/xigua_author_test.py
+++ b/xigua/xigua_author/xigua_author_test.py
@@ -644,6 +644,95 @@ def get_comment_cnt(item_id):
 
				     return response.json().get("total_number", 0)
			
 
				 
			
 
				 
			
 
				+def get_video_info(item_id, trace_id):
			
 
				+    url = "https://www.ixigua.com/api/mixVideo/information?"
			
 
				+    headers = {
			
 
				+        "accept-encoding": "gzip, deflate",
			
 
				+        "accept-language": "zh-CN,zh-Hans;q=0.9",
			
 
				+        "user-agent": FakeUserAgent().random,
			
 
				+        "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
			
 
				+    }
			
 
				+    params = {
			
 
				+        "mixId": str(item_id),
			
 
				+        "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC"
			
 
				+                   "NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
			
 
				+        "X-Bogus": "DFSzswVupYTANCJOSBk0P53WxM-r",
			
 
				+        "_signature": "_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px"
			
 
				+                      "fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94",
			
 
				+    }
			
 
				+    cookies = {
			
 
				+        "ixigua-a-s": "1",
			
 
				+        "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB"
			
 
				+                   "NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
			
 
				+        "ttwid": "1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7"
			
 
				+                 "6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8",
			
 
				+        "tt_scid": "QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3",
			
 
				+        "MONITOR_WEB_ID": "0a49204a-7af5-4e96-95f0-f4bafb7450ad",
			
 
				+        "__ac_nonce": "06304878000964fdad287",
			
 
				+        "__ac_signature": "_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb"
			
 
				+                          "FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8",
			
 
				+        "ttcid": "e56fabf6e85d4adf9e4d91902496a0e882",
			
 
				+        "_tea_utm_cache_1300": "undefined",
			
 
				+        "support_avif": "false",
			
 
				+        "support_webp": "false",
			
 
				+        "xiguavideopcwebid": "7134967546256016900",
			
 
				+        "xiguavideopcwebid.sig": "xxRww5R1VEMJN_dQepHorEu_eAc",
			
 
				+    }
			
 
				+    response = requests.get(
			
 
				+        url=url,
			
 
				+        headers=headers,
			
 
				+        params=params,
			
 
				+        cookies=cookies,
			
 
				+        proxies=tunnel_proxies(),
			
 
				+        timeout=5,
			
 
				+    )
			
 
				+    if (
			
 
				+            response.status_code != 200
			
 
				+            or "data" not in response.json()
			
 
				+            or response.json()["data"] == {}
			
 
				+    ):
			
 
				+        print("获取视频信息失败")
			
 
				+        return None
			
 
				+    else:
			
 
				+        video_info = (
			
 
				+            response.json()["data"]
			
 
				+            .get("gidInformation", {})
			
 
				+            .get("packerData", {})
			
 
				+            .get("video", {})
			
 
				+        )
			
 
				+        if video_info == {}:
			
 
				+            return None
			
 
				+        video_detail = get_video_url(video_info)
			
 
				+        video_dict = {
			
 
				+            "video_title": video_info.get("title", ""),
			
 
				+            "video_id": video_info.get("videoResource", {}).get("vid", ""),
			
 
				+            "gid": str(item_id),
			
 
				+            "play_cnt": int(video_info.get("video_watch_count", 0)),
			
 
				+            "like_cnt": int(video_info.get("video_like_count", 0)),
			
 
				+            "comment_cnt": int(get_comment_cnt(item_id)),
			
 
				+            "share_cnt": 0,
			
 
				+            "favorite_cnt": 0,
			
 
				+            "duration": int(video_info.get("video_duration", 0)),
			
 
				+            "video_width": int(video_detail["video_width"]),
			
 
				+            "video_height": int(video_detail["video_height"]),
			
 
				+            "publish_time_stamp": int(video_info.get("video_publish_time", 0)),
			
 
				+            "publish_time_str": time.strftime(
			
 
				+                "%Y-%m-%d %H:%M:%S",
			
 
				+                time.localtime(int(video_info.get("video_publish_time", 0))),
			
 
				+            ),
			
 
				+            "user_name": video_info.get("user_info", {}).get("name", ""),
			
 
				+            "user_id": str(video_info.get("user_info", {}).get("user_id", "")),
			
 
				+            "avatar_url": str(
			
 
				+                video_info.get("user_info", {}).get("avatar_url", "")
			
 
				+            ),
			
 
				+            "cover_url": video_info.get("poster_url", ""),
			
 
				+            "audio_url": video_detail["audio_url"],
			
 
				+            "video_url": video_detail["video_url"],
			
 
				+            "session": f"xigua-search-{int(time.time())}",
			
 
				+        }
			
 
				+        return video_dict
			
 
				+
			
 
				+
			
 
				 class XiGuaAuthor:
			
 
				     def __init__(self, platform, mode, rule_dict, env, user_list):
			
 
				         self.platform = platform
			
@@ -657,7 +746,7 @@ class XiGuaAuthor:
 
				     def get_author_list(self):
			
 
				         # 每轮只抓取定量的数据，到达数量后自己退出
			
 
				         max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
			
 
				-        for user_dict in self.user_list[:1]:
			
 
				+        for user_dict in self.user_list[1: 2]:
			
 
				             self.get_video_list(user_dict)
			
 
				             if self.download_count <= max_count:
			
 
				                 self.get_video_list(user_dict)
			
@@ -720,7 +809,7 @@ class XiGuaAuthor:
 
				             print("无效视频")
			
 
				             return
			
 
				         # 获取视频信息
			
 
				-        video_dict = self.get_video_info(item_id=item_id, trace_id=trace_id)
			
 
				+        video_dict = get_video_info(item_id=item_id, trace_id=trace_id)
			
 
				         video_dict["out_user_id"] = video_dict["user_id"]
			
 
				         video_dict["platform"] = self.platform
			
 
				         video_dict["strategy"] = self.mode
			
@@ -747,95 +836,6 @@ class XiGuaAuthor:
 
				             self.download_count += 1
			
 
				             print("成功发送 MQ 至 ETL")
			
 
				 
			
 
				-    def get_video_info(self, item_id, trace_id):
			
 
				-        url = "https://www.ixigua.com/api/mixVideo/information?"
			
 
				-        headers = {
			
 
				-            "accept-encoding": "gzip, deflate",
			
 
				-            "accept-language": "zh-CN,zh-Hans;q=0.9",
			
 
				-            "user-agent": FakeUserAgent().random,
			
 
				-            "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
			
 
				-        }
			
 
				-        params = {
			
 
				-            "mixId": str(item_id),
			
 
				-            "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC"
			
 
				-                       "NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
			
 
				-            "X-Bogus": "DFSzswVupYTANCJOSBk0P53WxM-r",
			
 
				-            "_signature": "_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px"
			
 
				-                          "fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94",
			
 
				-        }
			
 
				-        cookies = {
			
 
				-            "ixigua-a-s": "1",
			
 
				-            "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB"
			
 
				-                       "NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
			
 
				-            "ttwid": "1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7"
			
 
				-                     "6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8",
			
 
				-            "tt_scid": "QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3",
			
 
				-            "MONITOR_WEB_ID": "0a49204a-7af5-4e96-95f0-f4bafb7450ad",
			
 
				-            "__ac_nonce": "06304878000964fdad287",
			
 
				-            "__ac_signature": "_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb"
			
 
				-                              "FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8",
			
 
				-            "ttcid": "e56fabf6e85d4adf9e4d91902496a0e882",
			
 
				-            "_tea_utm_cache_1300": "undefined",
			
 
				-            "support_avif": "false",
			
 
				-            "support_webp": "false",
			
 
				-            "xiguavideopcwebid": "7134967546256016900",
			
 
				-            "xiguavideopcwebid.sig": "xxRww5R1VEMJN_dQepHorEu_eAc",
			
 
				-        }
			
 
				-        response = requests.get(
			
 
				-            url=url,
			
 
				-            headers=headers,
			
 
				-            params=params,
			
 
				-            cookies=cookies,
			
 
				-            proxies=tunnel_proxies(),
			
 
				-            timeout=5,
			
 
				-        )
			
 
				-        if (
			
 
				-                response.status_code != 200
			
 
				-                or "data" not in response.json()
			
 
				-                or response.json()["data"] == {}
			
 
				-        ):
			
 
				-            print("获取视频信息失败")
			
 
				-            return None
			
 
				-        else:
			
 
				-            video_info = (
			
 
				-                response.json()["data"]
			
 
				-                .get("gidInformation", {})
			
 
				-                .get("packerData", {})
			
 
				-                .get("video", {})
			
 
				-            )
			
 
				-            if video_info == {}:
			
 
				-                return None
			
 
				-            video_detail = get_video_url(video_info)
			
 
				-
			
 
				-            video_dict = {
			
 
				-                "video_title": video_info.get("title", ""),
			
 
				-                "video_id": video_info.get("videoResource", {}).get("vid", ""),
			
 
				-                "gid": str(item_id),
			
 
				-                "play_cnt": int(video_info.get("video_watch_count", 0)),
			
 
				-                "like_cnt": int(video_info.get("video_like_count", 0)),
			
 
				-                "comment_cnt": int(get_comment_cnt(item_id)),
			
 
				-                "share_cnt": 0,
			
 
				-                "favorite_cnt": 0,
			
 
				-                "duration": int(video_info.get("video_duration", 0)),
			
 
				-                "video_width": int(video_detail["video_width"]),
			
 
				-                "video_height": int(video_detail["video_height"]),
			
 
				-                "publish_time_stamp": int(video_info.get("video_publish_time", 0)),
			
 
				-                "publish_time_str": time.strftime(
			
 
				-                    "%Y-%m-%d %H:%M:%S",
			
 
				-                    time.localtime(int(video_info.get("video_publish_time", 0))),
			
 
				-                ),
			
 
				-                "user_name": video_info.get("user_info", {}).get("name", ""),
			
 
				-                "user_id": str(video_info.get("user_info", {}).get("user_id", "")),
			
 
				-                "avatar_url": str(
			
 
				-                    video_info.get("user_info", {}).get("avatar_url", "")
			
 
				-                ),
			
 
				-                "cover_url": video_info.get("poster_url", ""),
			
 
				-                "audio_url": video_detail["audio_url"],
			
 
				-                "video_url": video_detail["video_url"],
			
 
				-                "session": f"xigua-search-{int(time.time())}",
			
 
				-            }
			
 
				-            return video_dict
			
 
				-
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     user_list = [
			
@@ -864,7 +864,7 @@ if __name__ == "__main__":
 
				             "mode": "author",
			
 
				         },
			
 
				     ]
			
 
				-    rule = {'period': {'min': 30, 'max': 30}, 'duration': {'min': 20, 'max': 0}, 'play_cnt': {'min': 100000, 'max': 0}}
			
 
				+    rule = {'period': {'min': 30, 'max': 30}, 'duration': {'min': 20, 'max': 0}, 'play_cnt': {'min': 100, 'max': 0}}
			
 
				     XGA = XiGuaAuthor(
			
 
				         platform="xigua",
			
 
				         mode="author",
			
@@ -873,3 +873,5 @@ if __name__ == "__main__":
 
				         user_list=user_list
			
 
				     )
			
 
				     XGA.get_author_list()
			
 
				+    # item_id = "v0201ag10000cl4d7djc77u73eftvrcg"
			
 
				+    # get_video_info(item_id=item_id, trace_id="ljh")