xigua_video.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. import base64
  2. import configparser
  3. import os
  4. import re
  5. import sys
  6. import time
  7. import requests
  8. from common.pq_utility import PQ
  9. sys.path.append(os.getcwd())
  10. from common.url_manage import urlManage
  11. from common.aliyun_oss_uploading import Oss
  12. from common.userAgent import get_random_user_agent
  13. config = configparser.ConfigParser()
  14. config.read('/root/single_video_crawler/config.ini') # 替换为您的配置文件路径
  15. class xiguaVideo():
  16. # 获取西瓜 标题+视频链接
  17. @classmethod
  18. def download_video(cls, video_url, video_path_url):
  19. for i in range(3):
  20. payload = {}
  21. headers = {
  22. 'accept-language': 'zh-CN,zh;q=0.9',
  23. 'cache-control': 'no-cache',
  24. 'pragma': 'no-cache',
  25. 'range': 'bytes=0-',
  26. 'referer': 'https://www.ixigua.com/',
  27. 'sec-ch-ua': '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
  28. 'sec-ch-ua-mobile': '?0',
  29. 'sec-ch-ua-platform': '"macOS"',
  30. 'sec-fetch-dest': 'video',
  31. 'sec-fetch-mode': 'no-cors',
  32. 'sec-fetch-site': 'same-site',
  33. 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
  34. }
  35. response = requests.request("GET", video_url, headers=headers, data=payload)
  36. # 检查响应状态码是否为200
  37. if response.status_code == 206:
  38. # 以二进制写入模式打开文件
  39. with open(f"{video_path_url}", "wb") as file:
  40. # 将响应内容写入文件
  41. file.write(response.content)
  42. return True
  43. return False
  44. # 获取西瓜 标题+视频链接
  45. @classmethod
  46. def get_videoList(cls, vx_message, channel):
  47. try:
  48. data_link = vx_message[1]
  49. time.sleep(1)
  50. content_id = urlManage.url_manage(data_link, channel)
  51. if content_id:
  52. headers = {
  53. 'Cookie': 'ttwid=1%7C7j8Kxe-NNRFurQ2qk8x48WH3FToSbPDi1_YAU1IC2ws%7C1711524169%7C845d7775e8226786557408dc77df0ba8a8f7b18bdba3c37b693438dc6049ad95;',
  54. 'Referer': f'https://www.ixigua.com/{content_id}',
  55. 'User-Agent': get_random_user_agent()
  56. }
  57. url = f'https://www.ixigua.com/{content_id}'
  58. response = requests.get(url, headers=headers)
  59. status_code = response.status_code
  60. if status_code != 200:
  61. return "cookie过期"
  62. response.encoding = "utf-8"
  63. try:
  64. html_data = response.text
  65. title_match = re.search(r'<title[^>]*>(.*?)</title>', html_data)
  66. except Exception:
  67. return "cookie过期"
  68. if title_match:
  69. title_content = title_match.group(1)
  70. title_content = title_content.split(" - ")[0]
  71. else:
  72. title_content = ''
  73. print(title_content)
  74. try:
  75. mainUrl = re.search(r'("main_url":")(.*?)"', html_data)[0]
  76. except Exception:
  77. return '无法获取视频链接'
  78. mainUrl = mainUrl.split(":")[1]
  79. decoded_data = base64.b64decode(mainUrl)
  80. try:
  81. # 尝试使用utf-8解码
  82. video_url = decoded_data.decode('utf-8')
  83. except UnicodeDecodeError:
  84. # 如果utf-8解码失败,尝试使用其他编码方式
  85. video_url = decoded_data.decode('latin-1')
  86. # 随机生成视频oss_id
  87. video_id = urlManage.random_id()
  88. video_path_url = config['PATHS']['VIDEO_OSS_PATH'] + video_id+".mp4"
  89. status = cls.download_video(video_url, video_path_url)
  90. if status == False:
  91. return "视频下载失败"
  92. oss_object_key = Oss.video_sync_upload_oss(video_path_url, video_id)
  93. status = oss_object_key.get("status")
  94. if status != 200:
  95. return "发送OSS失败"
  96. # 获取 oss 视频地址
  97. oss_object_key = oss_object_key.get("oss_object_key")
  98. piaoquantv = PQ.insert_piaoquantv(oss_object_key, title_content, vx_message[3])
  99. if piaoquantv == False:
  100. return "发送账号失败"
  101. if os.path.isfile(video_path_url):
  102. os.remove(video_path_url)
  103. return
  104. else:
  105. return "无法获取视频ID"
  106. except Exception :
  107. return "URL处理失败"