Browse Source

修改西瓜

罗俊辉 1 năm trước cách đây
mục cha
commit
2144e7203e

+ 3 - 2
common/feishu.py

@@ -147,8 +147,9 @@ class Feishu:
         :return:
         :return:
         """
         """
         url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
         url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
-        post_data = {"app_id": "cli_a13ad2afa438d00b",  # 这里账号密码是发布应用的后台账号及密码
-                     "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
+        post_data = {
+            "app_id": "cli_a13ad2afa438d00b",  # 这里账号密码是发布应用的后台账号及密码
+            "app_secret": "4tK9LY9VbiQlY5umhE42dclBFo6t4p5O"}
 
 
         try:
         try:
             urllib3.disable_warnings()
             urllib3.disable_warnings()

+ 17 - 2
shipinhao/shipinhao_author/shipinhao_scheduling.py

@@ -17,6 +17,9 @@ from common.public import clean_title
 
 
 
 
 def find_target_user(name, user_list):
 def find_target_user(name, user_list):
+    """
+    在搜索到到账号列表中找目标列表
+    """
     for obj in user_list:
     for obj in user_list:
         if obj["nickname"] == name:
         if obj["nickname"] == name:
             return obj
             return obj
@@ -25,8 +28,13 @@ def find_target_user(name, user_list):
     return False
     return False
 
 
 
 
-class ShiPinHaoAccount:
+class ShiPinHaoAccount(object):
+    """
+    视频号账号爬虫
+    """
     def __init__(self, platform, mode, rule_dict, user_dict, env):
     def __init__(self, platform, mode, rule_dict, user_dict, env):
+        self.cookie = None
+        self.token = None
         self.account_name = user_dict["link"]
         self.account_name = user_dict["link"]
         self.platform = platform
         self.platform = platform
         self.mode = mode
         self.mode = mode
@@ -37,6 +45,9 @@ class ShiPinHaoAccount:
         self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
         self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
 
 
     def get_token_from_mysql(self):
     def get_token_from_mysql(self):
+        """
+        从mysql中读取token和cookie
+        """
         select_sql = (
         select_sql = (
             f"""SELECT config from crawler_config where source = '{self.platform}'; """
             f"""SELECT config from crawler_config where source = '{self.platform}'; """
         )
         )
@@ -71,7 +82,9 @@ class ShiPinHaoAccount:
             return False
             return False
 
 
     def get_account_id(self):
     def get_account_id(self):
-        # 读历史数据,如果存在 id,则直接返回 id
+        """
+        读历史数据,如果存在 id,则直接返回 id
+        """
         history_id = self.get_history_id()
         history_id = self.get_history_id()
         if history_id:
         if history_id:
             return history_id
             return history_id
@@ -119,7 +132,9 @@ class ShiPinHaoAccount:
                 return False
                 return False
 
 
     def get_account_videos(self):
     def get_account_videos(self):
+        """
         # 一个账号最多抓取 30 条数据
         # 一个账号最多抓取 30 条数据
+        """
         user_id = self.get_account_id()
         user_id = self.get_account_id()
         if user_id:
         if user_id:
             url = "https://mp.weixin.qq.com/cgi-bin/videosnap"
             url = "https://mp.weixin.qq.com/cgi-bin/videosnap"

+ 96 - 65
xigua/xigua_author/xigua_author.py

@@ -12,27 +12,14 @@ from fake_useragent import FakeUserAgent
 from common.mq import MQ
 from common.mq import MQ
 
 
 sys.path.append(os.getcwd())
 sys.path.append(os.getcwd())
-from common import AliyunLogger, PiaoQuanPipeline
 
 
-
-def tunnel_proxies():
-    # 隧道域名:端口号
-    tunnel = "q796.kdltps.com:15818"
-
-    # 用户名密码方式
-    username = "t17772369458618"
-    password = "5zqcjkmy"
-    tunnel_proxies = {
-        "http": "http://%(user)s:%(pwd)s@%(proxy)s/"
-                % {"user": username, "pwd": password, "proxy": tunnel},
-        "https": "http://%(user)s:%(pwd)s@%(proxy)s/"
-                 % {"user": username, "pwd": password, "proxy": tunnel},
-    }
-
-    return tunnel_proxies
+from common import AliyunLogger, PiaoQuanPipeline, tunnel_proxies
 
 
 
 
 def random_signature():
 def random_signature():
+    """
+    随机生成签名
+    """
     src_digits = string.digits  # string_数字
     src_digits = string.digits  # string_数字
     src_uppercase = string.ascii_uppercase  # string_大写字母
     src_uppercase = string.ascii_uppercase  # string_大写字母
     src_lowercase = string.ascii_lowercase  # string_小写字母
     src_lowercase = string.ascii_lowercase  # string_小写字母
@@ -62,6 +49,9 @@ def random_signature():
 
 
 
 
 def get_video_url(video_info):
 def get_video_url(video_info):
+    """
+    获取视频的链接
+    """
     video_url_dict = {}
     video_url_dict = {}
     # video_url
     # video_url
     if "videoResource" not in video_info:
     if "videoResource" not in video_info:
@@ -599,6 +589,9 @@ def get_video_url(video_info):
 
 
 
 
 def get_comment_cnt(item_id):
 def get_comment_cnt(item_id):
+    """
+    获取视频的评论数量
+    """
     url = "https://www.ixigua.com/tlb/comment/article/v5/tab_comments/?"
     url = "https://www.ixigua.com/tlb/comment/article/v5/tab_comments/?"
     params = {
     params = {
         "tab_index": "0",
         "tab_index": "0",
@@ -643,6 +636,9 @@ def get_comment_cnt(item_id):
 
 
 
 
 class XiGuaAuthor:
 class XiGuaAuthor:
+    """
+    西瓜账号爬虫
+    """
     def __init__(self, platform, mode, rule_dict, env, user_list):
     def __init__(self, platform, mode, rule_dict, env, user_list):
         self.platform = platform
         self.platform = platform
         self.mode = mode
         self.mode = mode
@@ -652,6 +648,37 @@ class XiGuaAuthor:
         self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
         self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
         self.download_count = 0
         self.download_count = 0
 
 
+    def rule_maker(self, account):
+        """
+        通过不同的账号生成不同的规则
+        :param account: 输入的账号信息
+        {'play_cnt': {'min': 100000, 'max': 0}, 'period': {'min': 5, 'max': 5}}
+        """
+        flag = account.split("")
+        if flag == "V1":
+            rule_dict = {
+                "play_cnt": {"min": 50000, "max": 0},
+                'period': {"min": 15, "max": 15},
+                'special': 0.01
+            }
+            return rule_dict
+        elif flag == "V2":
+            rule_dict = {
+                "play_cnt": {"min": 10000, "max": 0},
+                'period': {"min": 7, "max": 7},
+                'special': 0.01
+            }
+            return rule_dict
+        elif flag == "V3":
+            rule_dict = {
+                "play_cnt": {"min": 5000, "max": 0},
+                'period': {"min": 3, "max": 3},
+                'special': 0.01
+            }
+            return rule_dict
+        else:
+            return self.rule_dict
+
     def get_author_list(self):
     def get_author_list(self):
         # 每轮只抓取定量的数据,到达数量后自己退出
         # 每轮只抓取定量的数据,到达数量后自己退出
         # max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
         # max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
@@ -743,6 +770,7 @@ class XiGuaAuthor:
                         )
                         )
 
 
     def process_video_obj(self, video_obj, user_dict):
     def process_video_obj(self, video_obj, user_dict):
+        new_rule = self.rule_maker(user_dict)
         trace_id = self.platform + str(uuid.uuid1())
         trace_id = self.platform + str(uuid.uuid1())
         item_id = video_obj.get("item_id", "")
         item_id = video_obj.get("item_id", "")
         if not item_id:
         if not item_id:
@@ -764,12 +792,13 @@ class XiGuaAuthor:
         video_dict["out_video_id"] = video_dict["video_id"]
         video_dict["out_video_id"] = video_dict["video_id"]
         video_dict["width"] = video_dict["video_width"]
         video_dict["width"] = video_dict["video_width"]
         video_dict["height"] = video_dict["video_height"]
         video_dict["height"] = video_dict["video_height"]
-        video_dict["crawler_rule"] = json.dumps(self.rule_dict)
+        video_dict["crawler_rule"] = json.dumps(new_rule)
         video_dict["user_id"] = user_dict["uid"]
         video_dict["user_id"] = user_dict["uid"]
         video_dict["publish_time"] = video_dict["publish_time_str"]
         video_dict["publish_time"] = video_dict["publish_time_str"]
         video_dict["strategy_type"] = self.mode
         video_dict["strategy_type"] = self.mode
         video_dict["update_time_stamp"] = int(time.time())
         video_dict["update_time_stamp"] = int(time.time())
-        if int(time.time()) - video_dict['publish_time_stamp'] > 3600 * 24 * int(self.rule_dict.get("period", {}).get("max", 1000)):
+        if int(time.time()) - video_dict['publish_time_stamp'] > 3600 * 24 * int(
+                new_rule.get("period", {}).get("max", 1000)):
             if not video_obj['is_top']:
             if not video_obj['is_top']:
                 """
                 """
                 非置顶数据发布时间超过才退出
                 非置顶数据发布时间超过才退出
@@ -781,15 +810,14 @@ class XiGuaAuthor:
                     env=self.env,
                     env=self.env,
                     data=video_dict,
                     data=video_dict,
                     message="发布时间超过{}天".format(
                     message="发布时间超过{}天".format(
-                        int(self.rule_dict.get("period", {}).get("max", 1000))
+                        int(new_rule.get("period", {}).get("max", 1000))
                     ),
                     ),
                 )
                 )
                 return False
                 return False
-
         pipeline = PiaoQuanPipeline(
         pipeline = PiaoQuanPipeline(
             platform=self.platform,
             platform=self.platform,
             mode=self.mode,
             mode=self.mode,
-            rule_dict=self.rule_dict,
+            rule_dict=new_rule,
             env=self.env,
             env=self.env,
             item=video_dict,
             item=video_dict,
             trace_id=trace_id,
             trace_id=trace_id,
@@ -797,50 +825,53 @@ class XiGuaAuthor:
         title_flag = pipeline.title_flag()
         title_flag = pipeline.title_flag()
         repeat_flag = pipeline.repeat_video()
         repeat_flag = pipeline.repeat_video()
         if title_flag and repeat_flag:
         if title_flag and repeat_flag:
-            if int(video_dict['play_cnt']) >= int(self.rule_dict.get("play_cnt", {}).get("min", 100000)):
-                self.mq.send_msg(video_dict)
-                self.download_count += 1
-                AliyunLogger.logging(
-                    code="1002",
-                    platform=self.platform,
-                    mode=self.mode,
-                    env=self.env,
-                    data=video_dict,
-                    trace_id=trace_id,
-                    message="成功发送 MQ 至 ETL",
-                )
-                return True
-            else:
-                AliyunLogger.logging(
-                    code="2008",
-                    platform=self.platform,
-                    mode=self.mode,
-                    env=self.env,
-                    message="不满足特殊规则, 播放量",
-                    data=video_dict
-                )
-            if float(video_dict['like_cnt']) / float(video_dict['play_cnt']) >= 0.04:
-                self.mq.send_msg(video_dict)
-                self.download_count += 1
-                AliyunLogger.logging(
-                    code="1002",
-                    platform=self.platform,
-                    mode=self.mode,
-                    env=self.env,
-                    data=video_dict,
-                    trace_id=trace_id,
-                    message="成功发送 MQ 至 ETL",
-                )
-                return True
+            if new_rule.get("special"):
+                if int(video_dict['play_cnt']) >= int(new_rule.get("play_cnt", {}).get("min", 100000)):
+                    if float(video_dict['like_cnt']) / float(video_dict['play_cnt']) >= new_rule['special']:
+                        self.mq.send_msg(video_dict)
+                        self.download_count += 1
+                        AliyunLogger.logging(
+                            code="1002",
+                            platform=self.platform,
+                            mode=self.mode,
+                            env=self.env,
+                            data=video_dict,
+                            trace_id=trace_id,
+                            message="成功发送 MQ 至 ETL",
+                        )
+                        return True
+                    else:
+                        AliyunLogger.logging(
+                            code="2008",
+                            platform=self.platform,
+                            mode=self.mode,
+                            env=self.env,
+                            message="不满足特殊规则, 点赞量/播放量",
+                            data=video_dict
+                        )
             else:
             else:
-                AliyunLogger.logging(
-                    code="2008",
-                    platform=self.platform,
-                    mode=self.mode,
-                    env=self.env,
-                    message="不满足特殊规则, 点赞量/播放量",
-                    data=video_dict
-                )
+                if int(video_dict['play_cnt']) >= int(new_rule.get("play_cnt", {}).get("min", 100000)):
+                    self.mq.send_msg(video_dict)
+                    self.download_count += 1
+                    AliyunLogger.logging(
+                        code="1002",
+                        platform=self.platform,
+                        mode=self.mode,
+                        env=self.env,
+                        data=video_dict,
+                        trace_id=trace_id,
+                        message="成功发送 MQ 至 ETL",
+                    )
+                    return True
+                else:
+                    AliyunLogger.logging(
+                        code="2008",
+                        platform=self.platform,
+                        mode=self.mode,
+                        env=self.env,
+                        message="不满足特殊规则, 播放量",
+                        data=video_dict
+                    )
         return True
         return True
 
 
     def get_video_info(self, item_id, trace_id):
     def get_video_info(self, item_id, trace_id):

+ 0 - 968
xigua/xigua_author/xigua_author_test.py

@@ -1,968 +0,0 @@
-import json
-import re
-import os
-import random
-import sys
-import string
-import time
-import uuid
-import base64
-import requests
-from fake_useragent import FakeUserAgent
-
-sys.path.append(os.getcwd())
-
-
-class PiaoQuanPipelineTest:
-    def __init__(self, platform, mode, rule_dict, env, item, trace_id):
-        self.platform = platform
-        self.mode = mode
-        self.item = item
-        self.rule_dict = rule_dict
-        self.env = env
-        self.trace_id = trace_id
-
-    # 视频的发布时间限制, 属于是规则过滤
-    def publish_time_flag(self):
-        # 判断发布时间
-        publish_time_stamp = self.item["publish_time_stamp"]
-        update_time_stamp = self.item["update_time_stamp"]
-        if self.platform == "gongzhonghao":
-            if (
-                int(time.time()) - publish_time_stamp
-                > 3600 * 24 * int(self.rule_dict.get("period", {}).get("max", 1000))
-            ) and (
-                int(time.time()) - update_time_stamp
-                > 3600 * 24 * int(self.rule_dict.get("period", {}).get("max", 1000))
-            ):
-                message = "发布时间超过{}天".format(
-                    int(self.rule_dict.get("period", {}).get("max", 1000))
-                )
-                print(message)
-                return False
-        else:
-            if (
-                int(time.time()) - publish_time_stamp
-                > 3600 * 24 * int(self.rule_dict.get("period", {}).get("max", 1000))
-            ):
-                message = "发布时间超过{}天".format(
-                    int(self.rule_dict.get("period", {}).get("max", 1000))
-                )
-                print(message)
-                return False
-        return True
-
-    # 视频标题是否满足需求
-    def title_flag(self):
-        title = self.item["video_title"]
-        cleaned_title = re.sub(r"[^\w]", " ", title)
-        # 敏感词
-        # 获取敏感词列表
-        sensitive_words = []
-        if any(word in cleaned_title for word in sensitive_words):
-            message = "标题中包含敏感词"
-            print(message)
-            return False
-        return True
-
-    # 视频基础下载规则
-    def download_rule_flag(self):
-        for key in self.item:
-            if self.rule_dict.get(key):
-                max_value = (
-                    int(self.rule_dict[key]["max"])
-                    if int(self.rule_dict[key]["max"]) > 0
-                    else 999999999999999
-                )
-                if key == "peroid": # peroid是抓取周期天数
-                    continue
-                else:
-                    flag = int(self.rule_dict[key]["min"]) <= int(self.item[key]) <= max_value
-                    if not flag:
-                        message = "{}: {} <= {} <= {}, {}".format(
-                            key,
-                            self.rule_dict[key]["min"],
-                            self.item[key],
-                            max_value,
-                            flag,
-                        )
-                        print(message)
-                        return flag
-            else:
-                continue
-        return True
-
-    # 按照某个具体平台来去重
-    # def repeat_video(self):
-    #     # sql = f""" select * from crawler_video where platform="公众号" and out_video_id="{video_id}"; """
-    #     out_id = self.item["out_video_id"]
-    #     sql = f""" select * from crawler_video where platform = "{self.platform}" and out_video_id="{out_id}"; """
-    #     repeat_video = MysqlHelper.get_values(
-    #         log_type=self.mode, crawler=self.platform, env=self.env, sql=sql, action=""
-    #     )
-    #     if repeat_video:
-    #         message = "重复的视频"
-    #         return False
-    #     return True
-
-    def process_item(self):
-        if not self.publish_time_flag():
-            # 记录相关日志
-            return False
-        if not self.title_flag():
-            # 记录相关日志
-            return False
-        # if not self.repeat_video():
-        #     # 记录相关日志
-        #     return False
-        if not self.download_rule_flag():
-            # 记录相关日志
-            return False
-        return True
-
-
-def tunnel_proxies():
-    # 隧道域名:端口号
-    tunnel = "q796.kdltps.com:15818"
-
-    # 用户名密码方式
-    username = "t17772369458618"
-    password = "5zqcjkmy"
-    tunnel_proxies = {
-        "http": "http://%(user)s:%(pwd)s@%(proxy)s/"
-                % {"user": username, "pwd": password, "proxy": tunnel},
-        "https": "http://%(user)s:%(pwd)s@%(proxy)s/"
-                 % {"user": username, "pwd": password, "proxy": tunnel},
-    }
-
-    return tunnel_proxies
-
-
-def random_signature():
-    src_digits = string.digits  # string_数字
-    src_uppercase = string.ascii_uppercase  # string_大写字母
-    src_lowercase = string.ascii_lowercase  # string_小写字母
-    digits_num = random.randint(1, 6)
-    uppercase_num = random.randint(1, 26 - digits_num - 1)
-    lowercase_num = 26 - (digits_num + uppercase_num)
-    password = (
-            random.sample(src_digits, digits_num)
-            + random.sample(src_uppercase, uppercase_num)
-            + random.sample(src_lowercase, lowercase_num)
-    )
-    random.shuffle(password)
-    new_password = "AAAAAAAAAA" + "".join(password)[10:-4] + "AAAB"
-    new_password_start = new_password[0:18]
-    new_password_end = new_password[-7:]
-    if new_password[18] == "8":
-        new_password = new_password_start + "w" + new_password_end
-    elif new_password[18] == "9":
-        new_password = new_password_start + "x" + new_password_end
-    elif new_password[18] == "-":
-        new_password = new_password_start + "y" + new_password_end
-    elif new_password[18] == ".":
-        new_password = new_password_start + "z" + new_password_end
-    else:
-        new_password = new_password_start + "y" + new_password_end
-    return new_password
-
-
-def get_video_url(video_info):
-    video_url_dict = {}
-    # video_url
-    if "videoResource" not in video_info:
-        video_url_dict["video_url"] = ""
-        video_url_dict["audio_url"] = ""
-        video_url_dict["video_width"] = 0
-        video_url_dict["video_height"] = 0
-
-    elif "dash_120fps" in video_info["videoResource"]:
-        if (
-                "video_list" in video_info["videoResource"]["dash_120fps"]
-                and "video_4" in video_info["videoResource"]["dash_120fps"]["video_list"]
-        ):
-            video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_4"
-            ]["backup_url_1"]
-            audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_4"
-            ]["backup_url_1"]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_4"
-            ]["vwidth"]
-            video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_4"
-            ]["vheight"]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-        elif (
-                "video_list" in video_info["videoResource"]["dash_120fps"]
-                and "video_3" in video_info["videoResource"]["dash_120fps"]["video_list"]
-        ):
-            video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_3"
-            ]["backup_url_1"]
-            audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_3"
-            ]["backup_url_1"]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_3"
-            ]["vwidth"]
-            video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_3"
-            ]["vheight"]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-        elif (
-                "video_list" in video_info["videoResource"]["dash_120fps"]
-                and "video_2" in video_info["videoResource"]["dash_120fps"]["video_list"]
-        ):
-            video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_2"
-            ]["backup_url_1"]
-            audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_2"
-            ]["backup_url_1"]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_2"
-            ]["vwidth"]
-            video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_2"
-            ]["vheight"]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-        elif (
-                "video_list" in video_info["videoResource"]["dash_120fps"]
-                and "video_1" in video_info["videoResource"]["dash_120fps"]["video_list"]
-        ):
-            video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_1"
-            ]["backup_url_1"]
-            audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_1"
-            ]["backup_url_1"]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_1"
-            ]["vwidth"]
-            video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
-                "video_1"
-            ]["vheight"]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-
-        elif (
-                "dynamic_video" in video_info["videoResource"]["dash_120fps"]
-                and "dynamic_video_list"
-                in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
-                and "dynamic_audio_list"
-                in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
-                and len(
-            video_info["videoResource"]["dash_120fps"]["dynamic_video"][
-                "dynamic_video_list"
-            ]
-        )
-                != 0
-                and len(
-            video_info["videoResource"]["dash_120fps"]["dynamic_video"][
-                "dynamic_audio_list"
-            ]
-        )
-                != 0
-        ):
-            video_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
-                "dynamic_video_list"
-            ][-1]["backup_url_1"]
-            audio_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
-                "dynamic_audio_list"
-            ][-1]["backup_url_1"]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
-                "dynamic_video_list"
-            ][-1]["vwidth"]
-            video_height = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
-                "dynamic_video_list"
-            ][-1]["vheight"]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-        else:
-            video_url_dict["video_url"] = ""
-            video_url_dict["audio_url"] = ""
-            video_url_dict["video_width"] = 0
-            video_url_dict["video_height"] = 0
-
-    elif "dash" in video_info["videoResource"]:
-        if (
-                "video_list" in video_info["videoResource"]["dash"]
-                and "video_4" in video_info["videoResource"]["dash"]["video_list"]
-        ):
-            video_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
-                "backup_url_1"
-            ]
-            audio_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
-                "backup_url_1"
-            ]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["dash"]["video_list"]["video_4"][
-                "vwidth"
-            ]
-            video_height = video_info["videoResource"]["dash"]["video_list"]["video_4"][
-                "vheight"
-            ]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-        elif (
-                "video_list" in video_info["videoResource"]["dash"]
-                and "video_3" in video_info["videoResource"]["dash"]["video_list"]
-        ):
-            video_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
-                "backup_url_1"
-            ]
-            audio_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
-                "backup_url_1"
-            ]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["dash"]["video_list"]["video_3"][
-                "vwidth"
-            ]
-            video_height = video_info["videoResource"]["dash"]["video_list"]["video_3"][
-                "vheight"
-            ]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-        elif (
-                "video_list" in video_info["videoResource"]["dash"]
-                and "video_2" in video_info["videoResource"]["dash"]["video_list"]
-        ):
-            video_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
-                "backup_url_1"
-            ]
-            audio_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
-                "backup_url_1"
-            ]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["dash"]["video_list"]["video_2"][
-                "vwidth"
-            ]
-            video_height = video_info["videoResource"]["dash"]["video_list"]["video_2"][
-                "vheight"
-            ]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-        elif (
-                "video_list" in video_info["videoResource"]["dash"]
-                and "video_1" in video_info["videoResource"]["dash"]["video_list"]
-        ):
-            video_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
-                "backup_url_1"
-            ]
-            audio_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
-                "backup_url_1"
-            ]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["dash"]["video_list"]["video_1"][
-                "vwidth"
-            ]
-            video_height = video_info["videoResource"]["dash"]["video_list"]["video_1"][
-                "vheight"
-            ]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-
-        elif (
-                "dynamic_video" in video_info["videoResource"]["dash"]
-                and "dynamic_video_list"
-                in video_info["videoResource"]["dash"]["dynamic_video"]
-                and "dynamic_audio_list"
-                in video_info["videoResource"]["dash"]["dynamic_video"]
-                and len(
-            video_info["videoResource"]["dash"]["dynamic_video"][
-                "dynamic_video_list"
-            ]
-        )
-                != 0
-                and len(
-            video_info["videoResource"]["dash"]["dynamic_video"][
-                "dynamic_audio_list"
-            ]
-        )
-                != 0
-        ):
-            video_url = video_info["videoResource"]["dash"]["dynamic_video"][
-                "dynamic_video_list"
-            ][-1]["backup_url_1"]
-            audio_url = video_info["videoResource"]["dash"]["dynamic_video"][
-                "dynamic_audio_list"
-            ][-1]["backup_url_1"]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["dash"]["dynamic_video"][
-                "dynamic_video_list"
-            ][-1]["vwidth"]
-            video_height = video_info["videoResource"]["dash"]["dynamic_video"][
-                "dynamic_video_list"
-            ][-1]["vheight"]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-        else:
-            video_url_dict["video_url"] = ""
-            video_url_dict["audio_url"] = ""
-            video_url_dict["video_width"] = 0
-            video_url_dict["video_height"] = 0
-
-    elif "normal" in video_info["videoResource"]:
-        if (
-                "video_list" in video_info["videoResource"]["normal"]
-                and "video_4" in video_info["videoResource"]["normal"]["video_list"]
-        ):
-            video_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
-                "backup_url_1"
-            ]
-            audio_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
-                "backup_url_1"
-            ]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["normal"]["video_list"][
-                "video_4"
-            ]["vwidth"]
-            video_height = video_info["videoResource"]["normal"]["video_list"][
-                "video_4"
-            ]["vheight"]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-        elif (
-                "video_list" in video_info["videoResource"]["normal"]
-                and "video_3" in video_info["videoResource"]["normal"]["video_list"]
-        ):
-            video_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
-                "backup_url_1"
-            ]
-            audio_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
-                "backup_url_1"
-            ]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["normal"]["video_list"][
-                "video_3"
-            ]["vwidth"]
-            video_height = video_info["videoResource"]["normal"]["video_list"][
-                "video_3"
-            ]["vheight"]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-        elif (
-                "video_list" in video_info["videoResource"]["normal"]
-                and "video_2" in video_info["videoResource"]["normal"]["video_list"]
-        ):
-            video_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
-                "backup_url_1"
-            ]
-            audio_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
-                "backup_url_1"
-            ]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["normal"]["video_list"][
-                "video_2"
-            ]["vwidth"]
-            video_height = video_info["videoResource"]["normal"]["video_list"][
-                "video_2"
-            ]["vheight"]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-        elif (
-                "video_list" in video_info["videoResource"]["normal"]
-                and "video_1" in video_info["videoResource"]["normal"]["video_list"]
-        ):
-            video_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
-                "backup_url_1"
-            ]
-            audio_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
-                "backup_url_1"
-            ]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["normal"]["video_list"][
-                "video_1"
-            ]["vwidth"]
-            video_height = video_info["videoResource"]["normal"]["video_list"][
-                "video_1"
-            ]["vheight"]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-
-        elif (
-                "dynamic_video" in video_info["videoResource"]["normal"]
-                and "dynamic_video_list"
-                in video_info["videoResource"]["normal"]["dynamic_video"]
-                and "dynamic_audio_list"
-                in video_info["videoResource"]["normal"]["dynamic_video"]
-                and len(
-            video_info["videoResource"]["normal"]["dynamic_video"][
-                "dynamic_video_list"
-            ]
-        )
-                != 0
-                and len(
-            video_info["videoResource"]["normal"]["dynamic_video"][
-                "dynamic_audio_list"
-            ]
-        )
-                != 0
-        ):
-            video_url = video_info["videoResource"]["normal"]["dynamic_video"][
-                "dynamic_video_list"
-            ][-1]["backup_url_1"]
-            audio_url = video_info["videoResource"]["normal"]["dynamic_video"][
-                "dynamic_audio_list"
-            ][-1]["backup_url_1"]
-            if len(video_url) % 3 == 1:
-                video_url += "=="
-            elif len(video_url) % 3 == 2:
-                video_url += "="
-            elif len(audio_url) % 3 == 1:
-                audio_url += "=="
-            elif len(audio_url) % 3 == 2:
-                audio_url += "="
-            video_url = base64.b64decode(video_url).decode("utf8")
-            audio_url = base64.b64decode(audio_url).decode("utf8")
-            video_width = video_info["videoResource"]["normal"]["dynamic_video"][
-                "dynamic_video_list"
-            ][-1]["vwidth"]
-            video_height = video_info["videoResource"]["normal"]["dynamic_video"][
-                "dynamic_video_list"
-            ][-1]["vheight"]
-            video_url_dict["video_url"] = video_url
-            video_url_dict["audio_url"] = audio_url
-            video_url_dict["video_width"] = video_width
-            video_url_dict["video_height"] = video_height
-        else:
-            video_url_dict["video_url"] = ""
-            video_url_dict["audio_url"] = ""
-            video_url_dict["video_width"] = 0
-            video_url_dict["video_height"] = 0
-
-    else:
-        video_url_dict["video_url"] = ""
-        video_url_dict["audio_url"] = ""
-        video_url_dict["video_width"] = 0
-        video_url_dict["video_height"] = 0
-
-    return video_url_dict
-
-
-def get_comment_cnt(item_id):
-    url = "https://www.ixigua.com/tlb/comment/article/v5/tab_comments/?"
-    params = {
-        "tab_index": "0",
-        "count": "10",
-        "offset": "10",
-        "group_id": str(item_id),
-        "item_id": str(item_id),
-        "aid": "1768",
-        "msToken": "50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==",
-        "X-Bogus": "DFSzswVOyGtANVeWtCLMqR/F6q9U",
-        "_signature": FakeUserAgent().random,
-    }
-    headers = {
-        "authority": "www.ixigua.com",
-        "accept": "application/json, text/plain, */*",
-        "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
-        "cache-control": "no-cache",
-        "cookie": "MONITOR_WEB_ID=67cb5099-a022-4ec3-bb8e-c4de6ba51dd0; passport_csrf_token=72b2574f3c99f8ba670e42df430218fd; passport_csrf_token_default=72b2574f3c99f8ba670e42df430218fd; sid_guard=c7472b508ea631823ba765a60cf8757f%7C1680867422%7C3024002%7CFri%2C+12-May-2023+11%3A37%3A04+GMT; uid_tt=c13f47d51767f616befe32fb3e9f485a; uid_tt_ss=c13f47d51767f616befe32fb3e9f485a; sid_tt=c7472b508ea631823ba765a60cf8757f; sessionid=c7472b508ea631823ba765a60cf8757f; sessionid_ss=c7472b508ea631823ba765a60cf8757f; sid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; ssid_ucp_v1=1.0.0-KGUzNWYxNmRkZGJiZjgxY2MzZWNkMTEzMTkwYjY1Yjg5OTY5NzVlNmMKFQiu3d-eqQIQ3oDAoQYYGCAMOAhACxoCaGwiIGM3NDcyYjUwOGVhNjMxODIzYmE3NjVhNjBjZjg3NTdm; odin_tt=b893608d4dde2e1e8df8cd5d97a0e2fbeafc4ca762ac72ebef6e6c97e2ed19859bb01d46b4190ddd6dd17d7f9678e1de; SEARCH_CARD_MODE=7168304743566296612_0; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; tt_scid=7Pux7s634-z8DYvCM20y7KigwH5u7Rh6D9C-RROpnT.aGMEcz6Vsxp.oai47wJqa4f86; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1683858689%7Ca5223fe1500578e01e138a0d71d6444692018296c4c24f5885af174a65873c95; ixigua-a-s=3; msToken=50-JJObWB07HfHs-BMJWT1eIDX3G-6lPSF_i-QwxBIXE9VVa-iN0jbEXR5pG2DKjXBmP299n6ZTuXzY-GAy968CCvouSAYIS4GzvGQT3pNlKNejr5G4-1g==; __ac_nonce=0645dcbf0005064517440; __ac_signature=_02B4Z6wo00f01FEGmAwAAIDBKchzCGqn-MBRJpyAAHAjieFC5GEg6gGiwz.I4PRrJl7f0GcixFrExKmgt6QI1i1S-dQyofPEj2ugWTCnmKUdJQv-wYuDofeKNe8VtMtZq2aKewyUGeKU-5Ud21; ixigua-a-s=3",
-        "pragma": "no-cache",
-        "referer": f"https://www.ixigua.com/{item_id}?logTag=3c5aa86a8600b9ab8540",
-        "sec-ch-ua": '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
-        "sec-ch-ua-mobile": "?0",
-        "sec-ch-ua-platform": '"macOS"',
-        "sec-fetch-dest": "empty",
-        "sec-fetch-mode": "cors",
-        "sec-fetch-site": "same-origin",
-        "tt-anti-token": "cBITBHvmYjEygzv-f9c78c1297722cf1f559c74b084e4525ce4900bdcf9e8588f20cc7c2e3234422",
-        "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.35",
-        "x-secsdk-csrf-token": "000100000001f8e733cf37f0cd255a51aea9a81ff7bc0c09490cfe41ad827c3c5c18ec809279175e4d9f5553d8a5",
-    }
-    response = requests.get(
-        url=url, headers=headers, params=params, proxies=tunnel_proxies(), timeout=5
-    )
-    response.close()
-    if (
-            response.status_code != 200
-            or "total_number" not in response.json()
-            or response.json() == {}
-    ):
-        return 0
-    return response.json().get("total_number", 0)
-
-
-def get_video_info(item_id, trace_id):
-    url = "https://www.ixigua.com/api/mixVideo/information?"
-    headers = {
-        "accept-encoding": "gzip, deflate",
-        "accept-language": "zh-CN,zh-Hans;q=0.9",
-        "user-agent": FakeUserAgent().random,
-        "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
-    }
-    params = {
-        "mixId": str(item_id),
-        "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC"
-                   "NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
-        "X-Bogus": "DFSzswVupYTANCJOSBk0P53WxM-r",
-        "_signature": "_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px"
-                      "fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94",
-    }
-    cookies = {
-        "ixigua-a-s": "1",
-        "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB"
-                   "NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
-        "ttwid": "1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7"
-                 "6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8",
-        "tt_scid": "QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3",
-        "MONITOR_WEB_ID": "0a49204a-7af5-4e96-95f0-f4bafb7450ad",
-        "__ac_nonce": "06304878000964fdad287",
-        "__ac_signature": "_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb"
-                          "FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8",
-        "ttcid": "e56fabf6e85d4adf9e4d91902496a0e882",
-        "_tea_utm_cache_1300": "undefined",
-        "support_avif": "false",
-        "support_webp": "false",
-        "xiguavideopcwebid": "7134967546256016900",
-        "xiguavideopcwebid.sig": "xxRww5R1VEMJN_dQepHorEu_eAc",
-    }
-    response = requests.get(
-        url=url,
-        headers=headers,
-        params=params,
-        cookies=cookies,
-        proxies=tunnel_proxies(),
-        timeout=5,
-    )
-    if (
-            response.status_code != 200
-            or "data" not in response.json()
-            or response.json()["data"] == {}
-    ):
-        print("获取视频信息失败")
-        return None
-    else:
-        video_info = (
-            response.json()["data"]
-            .get("gidInformation", {})
-            .get("packerData", {})
-            .get("video", {})
-        )
-        if video_info == {}:
-            return None
-        video_detail = get_video_url(video_info)
-        video_dict = {
-            "video_title": video_info.get("title", ""),
-            "video_id": video_info.get("videoResource", {}).get("vid", ""),
-            "gid": str(item_id),
-            "play_cnt": int(video_info.get("video_watch_count", 0)),
-            "like_cnt": int(video_info.get("video_like_count", 0)),
-            "comment_cnt": int(get_comment_cnt(item_id)),
-            "share_cnt": 0,
-            "favorite_cnt": 0,
-            "duration": int(video_info.get("video_duration", 0)),
-            "video_width": int(video_detail["video_width"]),
-            "video_height": int(video_detail["video_height"]),
-            "publish_time_stamp": int(video_info.get("video_publish_time", 0)),
-            "publish_time_str": time.strftime(
-                "%Y-%m-%d %H:%M:%S",
-                time.localtime(int(video_info.get("video_publish_time", 0))),
-            ),
-            "user_name": video_info.get("user_info", {}).get("name", ""),
-            "user_id": str(video_info.get("user_info", {}).get("user_id", "")),
-            "avatar_url": str(
-                video_info.get("user_info", {}).get("avatar_url", "")
-            ),
-            "cover_url": video_info.get("poster_url", ""),
-            "audio_url": video_detail["audio_url"],
-            "video_url": video_detail["video_url"],
-            "session": f"xigua-search-{int(time.time())}",
-        }
-        return video_dict
-
-
-class XiGuaAuthor:
-    def __init__(self, platform, mode, rule_dict, env, user_list):
-        self.platform = platform
-        self.mode = mode
-        self.rule_dict = rule_dict
-        self.env = env
-        self.user_list = user_list
-        # self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
-        self.download_count = 0
-
-    def get_author_list(self):
-        # 每轮只抓取定量的数据,到达数量后自己退出
-        max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
-        for user_dict in self.user_list:
-            self.get_video_list(user_dict)
-            if self.download_count <= max_count:
-                self.get_video_list(user_dict)
-                time.sleep(random.randint(1, 15))
-            else:
-                print("本轮已经抓取足够数量的视频,已经自动退出")
-                return
-
-    def get_video_list(self, user_dict):
-        offset = 0
-        signature = random_signature()
-        url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
-        while True:
-            params = {
-                "to_user_id": str(
-                    user_dict["link"].replace("https://www.ixigua.com/home/", "")
-                ),
-                "offset": str(offset),
-                "limit": "30",
-                "maxBehotTime": "0",
-                "order": "new",
-                "isHome": "0",
-                # 'msToken': 'G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==',
-                # 'X-Bogus': 'DFSzswVuEkUANjW9ShFTgR/F6qHt',
-                "_signature": signature,
-            }
-            headers = {
-                "referer": f'https://www.ixigua.com/home/{user_dict["link"].replace("https://www.ixigua.com/home/", "")}/video/?preActiveKey=hotsoon&list_entrance=userdetail',
-                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41",
-            }
-            response = requests.get(
-                url=url,
-                headers=headers,
-                params=params,
-                proxies=tunnel_proxies(),
-                timeout=5,
-            )
-            offset += 30
-            if "data" not in response.text or response.status_code != 200:
-                print(f"get_videoList:{response.text}\n")
-                return
-            elif not response.json()["data"]["videoList"]:
-                print(f"没有更多数据啦~\n")
-                return
-            else:
-                feeds = response.json()["data"]["videoList"]
-                for video_obj in feeds:
-                    print(video_obj['is_top'])
-                    # print(json.dumps(video_obj, ensure_ascii=False, indent=4))
-                    # return
-                    self.process_video_obj(video_obj, user_dict)
-
-                    # try:
-                    #     print("扫描到一条视频")
-                    #     self.process_video_obj(video_obj, user_dict)
-                    # except Exception as e:
-                    #     print("抓取单条视频异常, 报错原因是: {}".format(e))
-
-    def process_video_obj(self, video_obj, user_dict):
-        trace_id = self.platform + str(uuid.uuid1())
-        item_id = video_obj.get("item_id", "")
-        if not item_id:
-            print("无效视频")
-            return
-        # 获取视频信息
-        video_dict = get_video_info(item_id=item_id, trace_id=trace_id)
-        video_dict["out_user_id"] = video_dict["user_id"]
-        video_dict["platform"] = self.platform
-        video_dict["strategy"] = self.mode
-        video_dict["out_video_id"] = video_dict["video_id"]
-        video_dict["width"] = video_dict["video_width"]
-        video_dict["height"] = video_dict["video_height"]
-        video_dict["crawler_rule"] = json.dumps(self.rule_dict)
-        video_dict["user_id"] = user_dict["uid"]
-        video_dict["publish_time"] = video_dict["publish_time_str"]
-        video_dict["strategy_type"] = self.mode
-        video_dict["update_time_stamp"] = int(time.time())
-        pipeline = PiaoQuanPipelineTest(
-            platform=self.platform,
-            mode=self.mode,
-            rule_dict=self.rule_dict,
-            env=self.env,
-            item=video_dict,
-            trace_id=trace_id,
-        )
-        flag = pipeline.process_item()
-        if flag:
-            print(json.dumps(video_dict, ensure_ascii=False, indent=4))
-            # self.mq.send_msg(video_dict)
-            self.download_count += 1
-            print("成功发送 MQ 至 ETL")
-
-
-if __name__ == "__main__":
-    user_list = [
-        {
-            "uid": 6267140,
-            "source": "xigua",
-            "link": "https://www.ixigua.com/home/113976532286319/?list_entrance=anyVideo",
-            "nick_name": "云姐犹记",
-            "avatar_url": "",
-            "mode": "author",
-        }
-    ]
-    # rule = {'period': {'min': 30, 'max': 30}, 'duration': {'min': 20, 'max': 0}, 'play_cnt': {'min': 100, 'max': 0}}
-    XGA = XiGuaAuthor(
-        platform="xigua",
-        mode="author",
-        rule_dict={},
-        env="prod",
-        user_list=user_list
-    )
-    XGA.get_author_list()
-    # item_id = "v0201ag10000cl4d7djc77u73eftvrcg"
-    # get_video_info(item_id=item_id, trace_id="ljh")