|
- """
- 西瓜视频搜索爬虫
- """
- import os
- import sys
- import json
- import time
- import uuid
- import random
- import base64
- import asyncio
- import aiohttp
- import urllib.parse
- import requests
- from lxml import etree
- sys.path.append(os.getcwd())
- from application.items import VideoItem
- from application.pipeline import PiaoQuanPipeline
- from application.common.messageQueue import MQ
- from application.common.proxies import tunnel_proxies
- from application.common.log import AliyunLogger
- def get_video_url(video_info):
- """
- 获取视频链接信息
- :param video_info:
- :return:
- """
- video_url_dict = {}
- # video_url
- if "videoResource" not in video_info:
- video_url_dict["video_url"] = ""
- video_url_dict["audio_url"] = ""
- video_url_dict["video_width"] = 0
- video_url_dict["video_height"] = 0
- elif "dash_120fps" in video_info["videoResource"]:
- if (
- "video_list" in video_info["videoResource"]["dash_120fps"]
- and "video_4" in video_info["videoResource"]["dash_120fps"]["video_list"]
- ):
- video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_4"
- ]["backup_url_1"]
- audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_4"
- ]["backup_url_1"]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_4"
- ]["vwidth"]
- video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_4"
- ]["vheight"]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- elif (
- "video_list" in video_info["videoResource"]["dash_120fps"]
- and "video_3" in video_info["videoResource"]["dash_120fps"]["video_list"]
- ):
- video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_3"
- ]["backup_url_1"]
- audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_3"
- ]["backup_url_1"]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_3"
- ]["vwidth"]
- video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_3"
- ]["vheight"]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- elif (
- "video_list" in video_info["videoResource"]["dash_120fps"]
- and "video_2" in video_info["videoResource"]["dash_120fps"]["video_list"]
- ):
- video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_2"
- ]["backup_url_1"]
- audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_2"
- ]["backup_url_1"]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_2"
- ]["vwidth"]
- video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_2"
- ]["vheight"]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- elif (
- "video_list" in video_info["videoResource"]["dash_120fps"]
- and "video_1" in video_info["videoResource"]["dash_120fps"]["video_list"]
- ):
- video_url = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_1"
- ]["backup_url_1"]
- audio_url = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_1"
- ]["backup_url_1"]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_1"
- ]["vwidth"]
- video_height = video_info["videoResource"]["dash_120fps"]["video_list"][
- "video_1"
- ]["vheight"]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- elif (
- "dynamic_video" in video_info["videoResource"]["dash_120fps"]
- and "dynamic_video_list"
- in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
- and "dynamic_audio_list"
- in video_info["videoResource"]["dash_120fps"]["dynamic_video"]
- and len(
- video_info["videoResource"]["dash_120fps"]["dynamic_video"][
- "dynamic_video_list"
- ]
- )
- != 0
- and len(
- video_info["videoResource"]["dash_120fps"]["dynamic_video"][
- "dynamic_audio_list"
- ]
- )
- != 0
- ):
- video_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
- "dynamic_video_list"
- ][-1]["backup_url_1"]
- audio_url = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
- "dynamic_audio_list"
- ][-1]["backup_url_1"]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
- "dynamic_video_list"
- ][-1]["vwidth"]
- video_height = video_info["videoResource"]["dash_120fps"]["dynamic_video"][
- "dynamic_video_list"
- ][-1]["vheight"]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- else:
- video_url_dict["video_url"] = ""
- video_url_dict["audio_url"] = ""
- video_url_dict["video_width"] = 0
- video_url_dict["video_height"] = 0
- elif "dash" in video_info["videoResource"]:
- if (
- "video_list" in video_info["videoResource"]["dash"]
- and "video_4" in video_info["videoResource"]["dash"]["video_list"]
- ):
- video_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
- "backup_url_1"
- ]
- audio_url = video_info["videoResource"]["dash"]["video_list"]["video_4"][
- "backup_url_1"
- ]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["dash"]["video_list"]["video_4"][
- "vwidth"
- ]
- video_height = video_info["videoResource"]["dash"]["video_list"]["video_4"][
- "vheight"
- ]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- elif (
- "video_list" in video_info["videoResource"]["dash"]
- and "video_3" in video_info["videoResource"]["dash"]["video_list"]
- ):
- video_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
- "backup_url_1"
- ]
- audio_url = video_info["videoResource"]["dash"]["video_list"]["video_3"][
- "backup_url_1"
- ]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["dash"]["video_list"]["video_3"][
- "vwidth"
- ]
- video_height = video_info["videoResource"]["dash"]["video_list"]["video_3"][
- "vheight"
- ]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- elif (
- "video_list" in video_info["videoResource"]["dash"]
- and "video_2" in video_info["videoResource"]["dash"]["video_list"]
- ):
- video_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
- "backup_url_1"
- ]
- audio_url = video_info["videoResource"]["dash"]["video_list"]["video_2"][
- "backup_url_1"
- ]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["dash"]["video_list"]["video_2"][
- "vwidth"
- ]
- video_height = video_info["videoResource"]["dash"]["video_list"]["video_2"][
- "vheight"
- ]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- elif (
- "video_list" in video_info["videoResource"]["dash"]
- and "video_1" in video_info["videoResource"]["dash"]["video_list"]
- ):
- video_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
- "backup_url_1"
- ]
- audio_url = video_info["videoResource"]["dash"]["video_list"]["video_1"][
- "backup_url_1"
- ]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["dash"]["video_list"]["video_1"][
- "vwidth"
- ]
- video_height = video_info["videoResource"]["dash"]["video_list"]["video_1"][
- "vheight"
- ]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- elif (
- "dynamic_video" in video_info["videoResource"]["dash"]
- and "dynamic_video_list"
- in video_info["videoResource"]["dash"]["dynamic_video"]
- and "dynamic_audio_list"
- in video_info["videoResource"]["dash"]["dynamic_video"]
- and len(
- video_info["videoResource"]["dash"]["dynamic_video"][
- "dynamic_video_list"
- ]
- )
- != 0
- and len(
- video_info["videoResource"]["dash"]["dynamic_video"][
- "dynamic_audio_list"
- ]
- )
- != 0
- ):
- video_url = video_info["videoResource"]["dash"]["dynamic_video"][
- "dynamic_video_list"
- ][-1]["backup_url_1"]
- audio_url = video_info["videoResource"]["dash"]["dynamic_video"][
- "dynamic_audio_list"
- ][-1]["backup_url_1"]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["dash"]["dynamic_video"][
- "dynamic_video_list"
- ][-1]["vwidth"]
- video_height = video_info["videoResource"]["dash"]["dynamic_video"][
- "dynamic_video_list"
- ][-1]["vheight"]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- else:
- video_url_dict["video_url"] = ""
- video_url_dict["audio_url"] = ""
- video_url_dict["video_width"] = 0
- video_url_dict["video_height"] = 0
- elif "normal" in video_info["videoResource"]:
- if (
- "video_list" in video_info["videoResource"]["normal"]
- and "video_4" in video_info["videoResource"]["normal"]["video_list"]
- ):
- video_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
- "backup_url_1"
- ]
- audio_url = video_info["videoResource"]["normal"]["video_list"]["video_4"][
- "backup_url_1"
- ]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["normal"]["video_list"][
- "video_4"
- ]["vwidth"]
- video_height = video_info["videoResource"]["normal"]["video_list"][
- "video_4"
- ]["vheight"]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- elif (
- "video_list" in video_info["videoResource"]["normal"]
- and "video_3" in video_info["videoResource"]["normal"]["video_list"]
- ):
- video_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
- "backup_url_1"
- ]
- audio_url = video_info["videoResource"]["normal"]["video_list"]["video_3"][
- "backup_url_1"
- ]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["normal"]["video_list"][
- "video_3"
- ]["vwidth"]
- video_height = video_info["videoResource"]["normal"]["video_list"][
- "video_3"
- ]["vheight"]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- elif (
- "video_list" in video_info["videoResource"]["normal"]
- and "video_2" in video_info["videoResource"]["normal"]["video_list"]
- ):
- video_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
- "backup_url_1"
- ]
- audio_url = video_info["videoResource"]["normal"]["video_list"]["video_2"][
- "backup_url_1"
- ]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["normal"]["video_list"][
- "video_2"
- ]["vwidth"]
- video_height = video_info["videoResource"]["normal"]["video_list"][
- "video_2"
- ]["vheight"]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- elif (
- "video_list" in video_info["videoResource"]["normal"]
- and "video_1" in video_info["videoResource"]["normal"]["video_list"]
- ):
- video_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
- "backup_url_1"
- ]
- audio_url = video_info["videoResource"]["normal"]["video_list"]["video_1"][
- "backup_url_1"
- ]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["normal"]["video_list"][
- "video_1"
- ]["vwidth"]
- video_height = video_info["videoResource"]["normal"]["video_list"][
- "video_1"
- ]["vheight"]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- elif (
- "dynamic_video" in video_info["videoResource"]["normal"]
- and "dynamic_video_list"
- in video_info["videoResource"]["normal"]["dynamic_video"]
- and "dynamic_audio_list"
- in video_info["videoResource"]["normal"]["dynamic_video"]
- and len(
- video_info["videoResource"]["normal"]["dynamic_video"][
- "dynamic_video_list"
- ]
- )
- != 0
- and len(
- video_info["videoResource"]["normal"]["dynamic_video"][
- "dynamic_audio_list"
- ]
- )
- != 0
- ):
- video_url = video_info["videoResource"]["normal"]["dynamic_video"][
- "dynamic_video_list"
- ][-1]["backup_url_1"]
- audio_url = video_info["videoResource"]["normal"]["dynamic_video"][
- "dynamic_audio_list"
- ][-1]["backup_url_1"]
- if len(video_url) % 3 == 1:
- video_url += "=="
- elif len(video_url) % 3 == 2:
- video_url += "="
- elif len(audio_url) % 3 == 1:
- audio_url += "=="
- elif len(audio_url) % 3 == 2:
- audio_url += "="
- video_url = base64.b64decode(video_url).decode("utf8")
- audio_url = base64.b64decode(audio_url).decode("utf8")
- video_width = video_info["videoResource"]["normal"]["dynamic_video"][
- "dynamic_video_list"
- ][-1]["vwidth"]
- video_height = video_info["videoResource"]["normal"]["dynamic_video"][
- "dynamic_video_list"
- ][-1]["vheight"]
- video_url_dict["video_url"] = video_url
- video_url_dict["audio_url"] = audio_url
- video_url_dict["video_width"] = video_width
- video_url_dict["video_height"] = video_height
- else:
- video_url_dict["video_url"] = ""
- video_url_dict["audio_url"] = ""
- video_url_dict["video_width"] = 0
- video_url_dict["video_height"] = 0
- else:
- video_url_dict["video_url"] = ""
- video_url_dict["audio_url"] = ""
- video_url_dict["video_width"] = 0
- video_url_dict["video_height"] = 0
- return video_url_dict
- class XiGuaSearch(object):
- """
- XiGuaSearch
- """
- def __init__(self, platform, mode, rule_dict, user_list, env="prod"):
- self.platform = platform
- self.mode = mode
- self.rule_dict = rule_dict
- self.user_list = user_list
- self.env = env
- self.download_cnt = 0
- self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
- self.expire_flag = False
- self.aliyun_log = AliyunLogger(platform=self.platform, mode=self.mode)
- async def search(self, keyword):
- """搜索"""
- keyword = urllib.parse.quote(keyword)
- base_url = "https://www.ixigua.com/search/{}/ab_name=search&fss=input".format(
- keyword
- )
- headers = {
- "authority": "www.ixigua.com",
- "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
- "accept-language": "zh,en;q=0.9,zh-CN;q=0.8",
- "cache-control": "max-age=0",
- "cookie": "ixigua-a-s=1; support_webp=true; support_avif=true; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; tt_scid=Ur23fgYD2pMJOvi1BpILyfaobg8wA7IhGwmQx260ULRa8Dvjaxc5ZA63BUIP-6Vi473f; ttwid=1%7CNtTtSp4Iej-v0nWtepdZH3d3Ts6uGNMFzTN20ps1cdo%7C1708236945%7Cc1f301c64aa3bf69cdaa41f28856e2bb7b7eed16583f8c92d50cffa2d9944fc6; msToken=rr418opQf04vm8n9s8FAGdr1AoCUsvAOGKSDPbBEfwVS1sznxxZCvcZTI93qVz5uAXlX9yRwcKlNQZ4wMro2DmlHw5yWHAVeKr_SzgO1KtVVnjUMTUNEux_cq1-EIkI=",
- "sec-ch-ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
- "sec-ch-ua-mobile": "?0",
- "sec-ch-ua-platform": '"macOS"',
- "sec-fetch-dest": "document",
- "sec-fetch-mode": "navigate",
- "sec-fetch-site": "none",
- "sec-fetch-user": "?1",
- "upgrade-insecure-requests": "1",
- "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
- }
- basic_response = requests.get(url=base_url, headers=headers)
- html = etree.HTML(basic_response.text)
- result = html.xpath(
- '//a[@class="HorizontalFeedCard__coverWrapper disableZoomAnimation"]/@href'
- )
- print(result)
- async with aiohttp.ClientSession() as session:
- tasks = [self.get_video_info(session, page_id[1:-2]) for page_id in result]
- await asyncio.gather(*tasks)
- async def get_video_info(self, session, page_id):
- """
- :param session:
- :param page_id: 视频主页 id
- :return:
- """
- url = "https://www.ixigua.com/api/mixVideo/information?"
- headers = {
- "accept-encoding": "gzip, deflate",
- "accept-language": "zh-CN,zh-Hans;q=0.9",
- "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
- "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
- }
- params = {
- "mixId": str(page_id),
- "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC"
- "NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
- "X-Bogus": "DFSzswVupYTANCJOSBk0P53WxM-r",
- "_signature": "_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px"
- "fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94",
- }
- cookies = {
- "ixigua-a-s": "1",
- "msToken": "IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB"
- "NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA",
- "ttwid": "1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7"
- "6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8",
- "tt_scid": "QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3",
- "MONITOR_WEB_ID": "0a49204a-7af5-4e96-95f0-f4bafb7450ad",
- "__ac_nonce": "06304878000964fdad287",
- "__ac_signature": "_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb"
- "FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8",
- "ttcid": "e56fabf6e85d4adf9e4d91902496a0e882",
- "_tea_utm_cache_1300": "undefined",
- "support_avif": "false",
- "support_webp": "false",
- "xiguavideopcwebid": "7134967546256016900",
- "xiguavideopcwebid.sig": "xxRww5R1VEMJN_dQepHorEu_eAc",
- }
- async with session.get(
- url, headers=headers, params=params, cookies=cookies
- ) as response:
- video_info = await response.json()
- video_info = (
- video_info["data"]
- .get("gidInformation", {})
- .get("packerData", {})
- .get("video", {})
- )
- # print(video_info)
- item = VideoItem()
- item.add_video_info("video_title", video_info.get("title", ""))
- item.add_video_info(
- "video_id", video_info.get("videoResource", {}).get("vid", "")
- )
- item.add_video_info("play_cnt", int(video_info.get("video_watch_count", 0)))
- item.add_video_info("like_cnt", int(video_info.get("video_like_count", 0)))
- item.add_video_info("duration", int(video_info.get("video_duration", 0)))
- item.add_video_info(
- "publish_time_stamp", int(video_info.get("video_publish_time", 0))
- )
- item.add_video_info(
- "publish_time_str",
- time.strftime(
- "%Y-%m-%d %H:%M:%S",
- time.localtime(int(video_info.get("video_publish_time", 0))),
- ),
- )
- item.add_video_info(
- "user_name", video_info.get("user_info", {}).get("name", "")
- )
- item.add_video_info(
- "user_id", str(video_info.get("user_info", {}).get("user_id", ""))
- )
- item.add_video_info(
- "avatar_url", str(video_info.get("user_info", {}).get("avatar_url", ""))
- )
- item.add_video_info("cover_url", video_info.get("poster_url", ""))
- item.add_video_info("audio_url", get_video_url(video_info)["audio_url"])
- item.add_video_info("video_url", get_video_url(video_info)["video_url"])
- item.add_video_info("session", "xigua-search-{}".format(int(time.time())))
- item.add_video_info("out_video_id", video_info.get("videoResource", {}).get("vid", ""))
- item.add_video_info("platform", self.platform)
- item.add_video_info("strategy", self.mode)
- # item.add_video_info("")
- mq_obj = item.produce_item()
- # print(mq_obj)
- print(json.dumps(mq_obj, ensure_ascii=False, indent=4))
- if __name__ == "__main__":
- S = XiGuaSearch(platform=1, mode=2, rule_dict=3, user_list=1)
- loop = asyncio.get_event_loop()
- loop.run_until_complete(S.search("春节"))
- # await
|