xigua_video.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. import base64
  2. import configparser
  3. import os
  4. import re
  5. import sys
  6. import time
  7. import requests
  8. from common import Common
  9. from common.pq_utility import PQ
  10. sys.path.append(os.getcwd())
  11. from common.url_manage import urlManage
  12. from common.aliyun_oss_uploading import Oss
  13. from common.userAgent import get_random_user_agent
  14. config = configparser.ConfigParser()
  15. config.read('/root/single_video_crawler/config.ini') # 替换为您的配置文件路径
  16. class xiguaVideo():
  17. # 获取西瓜 标题+视频链接
  18. @classmethod
  19. def download_video(cls, video_url, video_path_url):
  20. for i in range(3):
  21. payload = {}
  22. headers = {
  23. 'accept-language': 'zh-CN,zh;q=0.9',
  24. 'cache-control': 'no-cache',
  25. 'pragma': 'no-cache',
  26. 'range': 'bytes=0-',
  27. 'referer': 'https://www.ixigua.com/',
  28. 'sec-ch-ua': '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
  29. 'sec-ch-ua-mobile': '?0',
  30. 'sec-ch-ua-platform': '"macOS"',
  31. 'sec-fetch-dest': 'video',
  32. 'sec-fetch-mode': 'no-cors',
  33. 'sec-fetch-site': 'same-site',
  34. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
  35. }
  36. response = requests.request("GET", video_url, headers=headers, data=payload)
  37. # 检查响应状态码是否为200
  38. if response.status_code == 206:
  39. # 以二进制写入模式打开文件
  40. with open(f"{video_path_url}", "wb") as file:
  41. # 将响应内容写入文件
  42. file.write(response.content)
  43. return True
  44. return False
  45. # 获取西瓜 标题+视频链接
  46. @classmethod
  47. def get_videoList(cls, vx_message, channel):
  48. try:
  49. data_link = vx_message[1]
  50. time.sleep(1)
  51. content_id = ''
  52. for i in range(3):
  53. content_id = urlManage.url_manage(data_link, channel)
  54. if content_id:
  55. break
  56. if content_id == '':
  57. return "无法获取到视频ID"
  58. for i in range(3):
  59. headers = {
  60. 'Cookie': 'ttwid=1%7C7j8Kxe-NNRFurQ2qk8x48WH3FToSbPDi1_YAU1IC2ws%7C1711524169%7C845d7775e8226786557408dc77df0ba8a8f7b18bdba3c37b693438dc6049ad95;',
  61. 'Referer': f'https://www.ixigua.com/{content_id}',
  62. 'User-Agent': get_random_user_agent()
  63. }
  64. url = f'https://www.ixigua.com/{content_id}'
  65. response = requests.get(url, headers=headers)
  66. status_code = response.status_code
  67. if status_code != 200:
  68. continue
  69. response.encoding = "utf-8"
  70. try:
  71. html_data = response.text
  72. title_match = re.search(r'<title[^>]*>(.*?)</title>', html_data)
  73. except Exception:
  74. continue
  75. if title_match:
  76. title_content = title_match.group(1)
  77. title_content = title_content.split(" - ")[0]
  78. else:
  79. title_content = ''
  80. print(title_content)
  81. try:
  82. mainUrl = re.search(r'("main_url":")(.*?)"', html_data)[0]
  83. except Exception:
  84. return '无法获取视频链接'
  85. mainUrl = mainUrl.split(":")[1]
  86. decoded_data = base64.b64decode(mainUrl)
  87. try:
  88. # 尝试使用utf-8解码
  89. video_url = decoded_data.decode('utf-8')
  90. except UnicodeDecodeError:
  91. # 如果utf-8解码失败,尝试使用其他编码方式
  92. video_url = decoded_data.decode('latin-1')
  93. # 随机生成视频oss_id
  94. video_id = urlManage.random_id()
  95. video_path_url = config['PATHS']['VIDEO_OSS_PATH'] + video_id+".mp4"
  96. status = cls.download_video(video_url, video_path_url)
  97. if status == False:
  98. return "视频下载失败"
  99. oss_object_key = Oss.video_sync_upload_oss(video_path_url, video_id)
  100. # 获取 oss 视频地址
  101. oss_object_key = oss_object_key.get("oss_object_key")
  102. Common.logger().info(f'准备发送站内参数:{oss_object_key},{title_content},{vx_message[3]}')
  103. piaoquantv = PQ.insert_piaoquantv(oss_object_key, title_content, vx_message[3])
  104. if piaoquantv == False:
  105. return "视频发送到站内失败"
  106. if os.path.isfile(video_path_url):
  107. os.remove(video_path_url)
  108. return
  109. except Exception as e:
  110. Common.logger().info(f'报错信息:{e}')
  111. return f"处理报错,报错信息{e}"