Переглянути джерело

西瓜搜索——逆向完成

罗俊辉 1 рік тому
батько
коміт
aefdfb0a9e
1 змінених файлів з 711 додано та 0 видалено
  1. 711 0
      spider/crawler_online/xigua_search.py

+ 711 - 0
spider/crawler_online/xigua_search.py

@@ -0,0 +1,711 @@
+"""
+西瓜视频搜索爬虫
+"""
+import os
+import sys
+import json
+import time
+import uuid
+import random
+import base64
+import asyncio
+import aiohttp
+import urllib.parse
+
+import requests
+from lxml import etree
+
+sys.path.append(os.getcwd())
+
+from application.items import VideoItem
+from application.pipeline import PiaoQuanPipeline
+from application.common.messageQueue import MQ
+from application.common.proxies import tunnel_proxies
+from application.common.log import AliyunLogger
+
+
+def get_video_url(video_info):
+    """
+    获取视频链接信息
+    :param video_info:
+    :return:
+    """
+    video_url_dict = {}
+    # video_url
+    if "videoResource" not in video_info:
+        video_url_dict["video_url"] = ""
+        video_url_dict["audio_url"] = ""
+        video_url_dict["video_width"] = 0
+        video_url_dict["video_height"] = 0
+
+    elif "dash_120fps" in video_info["videoResource"]:
+        if (
+            "video_list" in video_info["videoResource"]["dash_120fps"]
+            and "video_4" in video_info["videoResource"]["dash_120fps"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_4"
+            ]["backup_url_1"]
+            audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_4"
+            ]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_4"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_4"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["dash_120fps"]
+            and "video_3" in video_info["videoResource"]["dash_120fps"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_3"
+            ]["backup_url_1"]
+            audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_3"
+            ]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_3"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_3"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["dash_120fps"]
+            and "video_2" in video_info["videoResource"]["dash_120fps"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_2"
+            ]["backup_url_1"]
+            audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_2"
+            ]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_2"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_2"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["dash_120fps"]
+            and "video_1" in video_info["videoResource"]["dash_120fps"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_1"
+            ]["backup_url_1"]
+            audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_1"
+            ]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_1"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
+                "video_1"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+
+        elif (
+            "dynamic_video" in video_info["videoResource"]["dash_120fps"]
+            and "dynamic_video_list"
+            in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
+            and "dynamic_audio_list"
+            in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
+            and len(
+                video_info["videoResource"]["dash_120fps"]["dynamic_video"][
+                    "dynamic_video_list"
+                ]
+            )
+            != 0
+            and len(
+                video_info["videoResource"]["dash_120fps"]["dynamic_video"][
+                    "dynamic_audio_list"
+                ]
+            )
+            != 0
+        ):
+            video_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["backup_url_1"]
+            audio_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
+                "dynamic_audio_list"
+            ][-1]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["vwidth"]
+            video_height = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        else:
+            video_url_dict["video_url"] = ""
+            video_url_dict["audio_url"] = ""
+            video_url_dict["video_width"] = 0
+            video_url_dict["video_height"] = 0
+
+    elif "dash" in video_info["videoResource"]:
+        if (
+            "video_list" in video_info["videoResource"]["dash"]
+            and "video_4" in video_info["videoResource"]["dash"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash"]["video_list"]["video_4"][
+                "vwidth"
+            ]
+            video_height = video_info["videoResource"]["dash"]["video_list"]["video_4"][
+                "vheight"
+            ]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["dash"]
+            and "video_3" in video_info["videoResource"]["dash"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash"]["video_list"]["video_3"][
+                "vwidth"
+            ]
+            video_height = video_info["videoResource"]["dash"]["video_list"]["video_3"][
+                "vheight"
+            ]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["dash"]
+            and "video_2" in video_info["videoResource"]["dash"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash"]["video_list"]["video_2"][
+                "vwidth"
+            ]
+            video_height = video_info["videoResource"]["dash"]["video_list"]["video_2"][
+                "vheight"
+            ]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["dash"]
+            and "video_1" in video_info["videoResource"]["dash"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash"]["video_list"]["video_1"][
+                "vwidth"
+            ]
+            video_height = video_info["videoResource"]["dash"]["video_list"]["video_1"][
+                "vheight"
+            ]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+
+        elif (
+            "dynamic_video" in video_info["videoResource"]["dash"]
+            and "dynamic_video_list"
+            in video_info["videoResource"]["dash"]["dynamic_video"]
+            and "dynamic_audio_list"
+            in video_info["videoResource"]["dash"]["dynamic_video"]
+            and len(
+                video_info["videoResource"]["dash"]["dynamic_video"][
+                    "dynamic_video_list"
+                ]
+            )
+            != 0
+            and len(
+                video_info["videoResource"]["dash"]["dynamic_video"][
+                    "dynamic_audio_list"
+                ]
+            )
+            != 0
+        ):
+            video_url = video_info["videoResource"]["dash"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["backup_url_1"]
+            audio_url = video_info["videoResource"]["dash"]["dynamic_video"][
+                "dynamic_audio_list"
+            ][-1]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["dash"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["vwidth"]
+            video_height = video_info["videoResource"]["dash"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        else:
+            video_url_dict["video_url"] = ""
+            video_url_dict["audio_url"] = ""
+            video_url_dict["video_width"] = 0
+            video_url_dict["video_height"] = 0
+
+    elif "normal" in video_info["videoResource"]:
+        if (
+            "video_list" in video_info["videoResource"]["normal"]
+            and "video_4" in video_info["videoResource"]["normal"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["normal"]["video_list"][
+                "video_4"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["normal"]["video_list"][
+                "video_4"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["normal"]
+            and "video_3" in video_info["videoResource"]["normal"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["normal"]["video_list"][
+                "video_3"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["normal"]["video_list"][
+                "video_3"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["normal"]
+            and "video_2" in video_info["videoResource"]["normal"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["normal"]["video_list"][
+                "video_2"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["normal"]["video_list"][
+                "video_2"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        elif (
+            "video_list" in video_info["videoResource"]["normal"]
+            and "video_1" in video_info["videoResource"]["normal"]["video_list"]
+        ):
+            video_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
+                "backup_url_1"
+            ]
+            audio_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
+                "backup_url_1"
+            ]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["normal"]["video_list"][
+                "video_1"
+            ]["vwidth"]
+            video_height = video_info["videoResource"]["normal"]["video_list"][
+                "video_1"
+            ]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+
+        elif (
+            "dynamic_video" in video_info["videoResource"]["normal"]
+            and "dynamic_video_list"
+            in video_info["videoResource"]["normal"]["dynamic_video"]
+            and "dynamic_audio_list"
+            in video_info["videoResource"]["normal"]["dynamic_video"]
+            and len(
+                video_info["videoResource"]["normal"]["dynamic_video"][
+                    "dynamic_video_list"
+                ]
+            )
+            != 0
+            and len(
+                video_info["videoResource"]["normal"]["dynamic_video"][
+                    "dynamic_audio_list"
+                ]
+            )
+            != 0
+        ):
+            video_url = video_info["videoResource"]["normal"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["backup_url_1"]
+            audio_url = video_info["videoResource"]["normal"]["dynamic_video"][
+                "dynamic_audio_list"
+            ][-1]["backup_url_1"]
+            if len(video_url) % 3 == 1:
+                video_url += "=="
+            elif len(video_url) % 3 == 2:
+                video_url += "="
+            elif len(audio_url) % 3 == 1:
+                audio_url += "=="
+            elif len(audio_url) % 3 == 2:
+                audio_url += "="
+            video_url = base64.b64decode(video_url).decode("utf8")
+            audio_url = base64.b64decode(audio_url).decode("utf8")
+            video_width = video_info["videoResource"]["normal"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["vwidth"]
+            video_height = video_info["videoResource"]["normal"]["dynamic_video"][
+                "dynamic_video_list"
+            ][-1]["vheight"]
+            video_url_dict["video_url"] = video_url
+            video_url_dict["audio_url"] = audio_url
+            video_url_dict["video_width"] = video_width
+            video_url_dict["video_height"] = video_height
+        else:
+            video_url_dict["video_url"] = ""
+            video_url_dict["audio_url"] = ""
+            video_url_dict["video_width"] = 0
+            video_url_dict["video_height"] = 0
+
+    else:
+        video_url_dict["video_url"] = ""
+        video_url_dict["audio_url"] = ""
+        video_url_dict["video_width"] = 0
+        video_url_dict["video_height"] = 0
+
+    return video_url_dict
+
+
+class XiGuaSearch(object):
+    """
+    XiGuaSearch
+    """
+
+    def __init__(self, platform, mode, rule_dict, user_list, env="prod"):
+        self.platform = platform
+        self.mode = mode
+        self.rule_dict = rule_dict
+        self.user_list = user_list
+        self.env = env
+        self.download_cnt = 0
+        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
+        self.expire_flag = False
+        self.aliyun_log = AliyunLogger(platform=self.platform, mode=self.mode)
+
+    async def search(self, keyword):
+        """搜索"""
+        keyword = urllib.parse.quote(keyword)
+        base_url = "https://www.ixigua.com/search/{}/ab_name=search&fss=input".format(
+            keyword
+        )
+        headers = {
+            "authority": "www.ixigua.com",
+            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
+            "accept-language": "zh,en;q=0.9,zh-CN;q=0.8",
+            "cache-control": "max-age=0",
+            "cookie": "ixigua-a-s=1; support_webp=true; support_avif=true; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; tt_scid=Ur23fgYD2pMJOvi1BpILyfaobg8wA7IhGwmQx260ULRa8Dvjaxc5ZA63BUIP-6Vi473f; ttwid=1%7CNtTtSp4Iej-v0nWtepdZH3d3Ts6uGNMFzTN20ps1cdo%7C1708236945%7Cc1f301c64aa3bf69cdaa41f28856e2bb7b7eed16583f8c92d50cffa2d9944fc6; msToken=rr418opQf04vm8n9s8FAGdr1AoCUsvAOGKSDPbBEfwVS1sznxxZCvcZTI93qVz5uAXlX9yRwcKlNQZ4wMro2DmlHw5yWHAVeKr_SzgO1KtVVnjUMTUNEux_cq1-EIkI=",
+            "sec-ch-ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
+            "sec-ch-ua-mobile": "?0",
+            "sec-ch-ua-platform": '"macOS"',
+            "sec-fetch-dest": "document",
+            "sec-fetch-mode": "navigate",
+            "sec-fetch-site": "none",
+            "sec-fetch-user": "?1",
+            "upgrade-insecure-requests": "1",
+            "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
+        }
+        basic_response = requests.get(url=base_url, headers=headers)
+        html = etree.HTML(basic_response.text)
+        result = html.xpath(
+            '//a[@class="HorizontalFeedCard__coverWrapper disableZoomAnimation"]/@href'
+        )
+        print(result)
+        async with aiohttp.ClientSession() as session:
+            tasks = [self.get_video_info(session, page_id[1:-2]) for page_id in result]
+            await asyncio.gather(*tasks)
+
+    async def get_video_info(self, session, page_id):
+        """
+        :param session:
+        :param page_id: 视频主页 id
+        :return:
+        """
+        url = "https://www.ixigua.com/api/mixVideo/information?"
+        headers = {
+            "accept-encoding": "gzip, deflate",
+            "accept-language": "zh-CN,zh-Hans;q=0.9",
+            "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
+            "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
+        }
+        params = {
+            "mixId": str(page_id),
+            "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC"
+            "NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
+            "X-Bogus": "DFSzswVupYTANCJOSBk0P53WxM-r",
+            "_signature": "_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px"
+            "fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94",
+        }
+        cookies = {
+            "ixigua-a-s": "1",
+            "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB"
+            "NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
+            "ttwid": "1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7"
+            "6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8",
+            "tt_scid": "QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3",
+            "MONITOR_WEB_ID": "0a49204a-7af5-4e96-95f0-f4bafb7450ad",
+            "__ac_nonce": "06304878000964fdad287",
+            "__ac_signature": "_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb"
+            "FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8",
+            "ttcid": "e56fabf6e85d4adf9e4d91902496a0e882",
+            "_tea_utm_cache_1300": "undefined",
+            "support_avif": "false",
+            "support_webp": "false",
+            "xiguavideopcwebid": "7134967546256016900",
+            "xiguavideopcwebid.sig": "xxRww5R1VEMJN_dQepHorEu_eAc",
+        }
+        async with session.get(
+            url, headers=headers, params=params, cookies=cookies
+        ) as response:
+            video_info = await response.json()
+            video_info = (
+                video_info["data"]
+                .get("gidInformation", {})
+                .get("packerData", {})
+                .get("video", {})
+            )
+            # print(video_info)
+            item = VideoItem()
+            item.add_video_info("video_title", video_info.get("title", ""))
+            item.add_video_info(
+                "video_id", video_info.get("videoResource", {}).get("vid", "")
+            )
+            item.add_video_info("play_cnt", int(video_info.get("video_watch_count", 0)))
+            item.add_video_info("like_cnt", int(video_info.get("video_like_count", 0)))
+            item.add_video_info("duration", int(video_info.get("video_duration", 0)))
+            item.add_video_info(
+                "publish_time_stamp", int(video_info.get("video_publish_time", 0))
+            )
+            item.add_video_info(
+                "publish_time_str",
+                time.strftime(
+                    "%Y-%m-%d %H:%M:%S",
+                    time.localtime(int(video_info.get("video_publish_time", 0))),
+                ),
+            )
+            item.add_video_info(
+                "user_name", video_info.get("user_info", {}).get("name", "")
+            )
+            item.add_video_info(
+                "user_id", str(video_info.get("user_info", {}).get("user_id", ""))
+            )
+            item.add_video_info(
+                "avatar_url", str(video_info.get("user_info", {}).get("avatar_url", ""))
+            )
+            item.add_video_info("cover_url", video_info.get("poster_url", ""))
+            item.add_video_info("audio_url", get_video_url(video_info)["audio_url"])
+            item.add_video_info("video_url", get_video_url(video_info)["video_url"])
+            item.add_video_info("session", "xigua-search-{}".format(int(time.time())))
+            item.add_video_info("out_video_id",  video_info.get("videoResource", {}).get("vid", ""))
+            item.add_video_info("platform", self.platform)
+            item.add_video_info("strategy", self.mode)
+            # item.add_video_info("")
+            mq_obj = item.produce_item()
+            # print(mq_obj)
+            print(json.dumps(mq_obj, ensure_ascii=False, indent=4))
+
+
+if __name__ == "__main__":
+    S = XiGuaSearch(platform=1, mode=2, rule_dict=3, user_list=1)
+    loop = asyncio.get_event_loop()
+    loop.run_until_complete(S.search("春节"))
+    # await