123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119 |
- import base64
- import configparser
- import os
- import re
- import sys
- import time
- import requests
- from common import Common
- from common.pq_utility import PQ
- sys.path.append(os.getcwd())
- from common.url_manage import urlManage
- from common.aliyun_oss_uploading import Oss
- from common.userAgent import get_random_user_agent
- config = configparser.ConfigParser()
- config.read('/root/single_video_crawler/config.ini') # 替换为您的配置文件路径
- class xiguaVideo():
- # 获取西瓜 标题+视频链接
- @classmethod
- def download_video(cls, video_url, video_path_url):
- for i in range(3):
- payload = {}
- headers = {
- 'accept-language': 'zh-CN,zh;q=0.9',
- 'cache-control': 'no-cache',
- 'pragma': 'no-cache',
- 'range': 'bytes=0-',
- 'referer': 'https://www.ixigua.com/',
- 'sec-ch-ua': '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
- 'sec-ch-ua-mobile': '?0',
- 'sec-ch-ua-platform': '"macOS"',
- 'sec-fetch-dest': 'video',
- 'sec-fetch-mode': 'no-cors',
- 'sec-fetch-site': 'same-site',
- 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
- }
- response = requests.request("GET", video_url, headers=headers, data=payload)
- # 检查响应状态码是否为200
- if response.status_code == 206:
- # 以二进制写入模式打开文件
- with open(f"{video_path_url}", "wb") as file:
- # 将响应内容写入文件
- file.write(response.content)
- return True
- return False
- # 获取西瓜 标题+视频链接
- @classmethod
- def get_videoList(cls, vx_message, channel):
- try:
- data_link = vx_message[1]
- time.sleep(1)
- content_id = ''
- for i in range(3):
- content_id = urlManage.url_manage(data_link, channel)
- if content_id:
- break
- if content_id == '':
- return "无法获取到视频ID"
- for i in range(3):
- headers = {
- 'Cookie': 'ttwid=1%7C7j8Kxe-NNRFurQ2qk8x48WH3FToSbPDi1_YAU1IC2ws%7C1711524169%7C845d7775e8226786557408dc77df0ba8a8f7b18bdba3c37b693438dc6049ad95;',
- 'Referer': f'https://www.ixigua.com/{content_id}',
- 'User-Agent': get_random_user_agent()
- }
- url = f'https://www.ixigua.com/{content_id}'
- response = requests.get(url, headers=headers)
- status_code = response.status_code
- if status_code != 200:
- continue
- response.encoding = "utf-8"
- try:
- html_data = response.text
- title_match = re.search(r'<title[^>]*>(.*?)</title>', html_data)
- except Exception:
- continue
- if title_match:
- title_content = title_match.group(1)
- title_content = title_content.split(" - ")[0]
- else:
- title_content = ''
- print(title_content)
- try:
- mainUrl = re.search(r'("main_url":")(.*?)"', html_data)[0]
- except Exception:
- return '无法获取视频链接'
- mainUrl = mainUrl.split(":")[1]
- decoded_data = base64.b64decode(mainUrl)
- try:
- # 尝试使用utf-8解码
- video_url = decoded_data.decode('utf-8')
- except UnicodeDecodeError:
- # 如果utf-8解码失败,尝试使用其他编码方式
- video_url = decoded_data.decode('latin-1')
- # 随机生成视频oss_id
- video_id = urlManage.random_id()
- video_path_url = config['PATHS']['VIDEO_OSS_PATH'] + video_id+".mp4"
- status = cls.download_video(video_url, video_path_url)
- if status == False:
- return "视频下载失败"
- oss_object_key = Oss.video_sync_upload_oss(video_path_url, video_id)
- # 获取 oss 视频地址
- oss_object_key = oss_object_key.get("oss_object_key")
- Common.logger().info(f'准备发送站内参数:{oss_object_key},{title_content},{vx_message[3]}')
- piaoquantv = PQ.insert_piaoquantv(oss_object_key, title_content, vx_message[3])
- if piaoquantv == False:
- return "视频发送到站内失败"
- if os.path.isfile(video_path_url):
- os.remove(video_path_url)
- return
- except Exception as e:
- Common.logger().info(f'报错信息:{e}')
- return f"处理报错,报错信息{e}"
|