gongzhonghao_video.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. import configparser
  2. import re
  3. import os
  4. import sys
  5. import requests
  6. import json
  7. from urllib.parse import urlparse
  8. sys.path.append(os.getcwd())
  9. from common import Oss, Common
  10. from common.pq_utility import PQ
  11. from common.url_manage import urlManage
  12. config = configparser.ConfigParser()
  13. config.read('/root/single_video_crawler/config.ini') # 替换为您的配置文件路径
  14. # config.read('/Users/tzld/Desktop/single_video_crawler/config.ini')
  15. class gongzhonghaoVdieo():
  16. @classmethod
  17. def download_video(cls, video_url, video_path_url):
  18. for i in range(3):
  19. headers = {
  20. 'Accept': '*/*',
  21. 'Accept-Encoding': 'identity;q=1, *;q=0',
  22. 'Accept-Language': 'zh-CN,zh;q=0.9',
  23. 'Cache-Control': 'no-cache',
  24. 'Connection': 'keep-alive',
  25. 'Host': urlparse(video_url).netloc,
  26. 'Origin': 'https://mp.weixin.qq.com',
  27. 'Pragma': 'no-cache',
  28. 'Range': 'bytes=0-',
  29. 'Referer': 'https://mp.weixin.qq.com/',
  30. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
  31. }
  32. response = requests.request("GET", video_url, headers=headers)
  33. # 检查响应状态码是否为200
  34. if response.status_code == 206:
  35. # 以二进制写入模式打开文件
  36. with open(f"{video_path_url}", "wb") as file:
  37. # 将响应内容写入文件
  38. file.write(response.content)
  39. return True
  40. return False
  41. @classmethod
  42. def get_link(cls, video_id):
  43. url = "https://h5vv.video.qq.com/getinfo?vid={}&platform=101001&charge=0&otype=json&defn=shd".format(
  44. video_id
  45. )
  46. headers = {
  47. "Host": "h5vv.video.qq.com",
  48. "xweb_xhr": "1",
  49. "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF XWEB/30817",
  50. "Content-Type": "application/x-www-form-urlencoded",
  51. "Accept": "*/*",
  52. "Sec-Fetch-Site": "cross-site",
  53. "Sec-Fetch-Mode": "cors",
  54. "Sec-Fetch-Dest": "empty",
  55. "Referer": "https://servicewechat.com/wx5fcd817f3f80aece/3/page-frame.html",
  56. "Accept-Language": "en",
  57. }
  58. response = requests.get(url, headers=headers)
  59. result = json.loads(response.text.replace("QZOutputJson=", "")[:-1])
  60. vl = result["vl"]["vi"][0]
  61. key = vl["fvkey"]
  62. name = vl["fn"]
  63. folder = vl["ul"]["ui"][0]["url"]
  64. video_url = folder + name + "?vkey=" + key
  65. return video_url
  66. @classmethod
  67. def get_js(cls, link):
  68. payload = {}
  69. headers = {
  70. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
  71. }
  72. response = requests.request("GET", link, headers=headers, data=payload)
  73. js_code = response.content.decode()
  74. return js_code
  75. @classmethod
  76. def get_url(cls, js_code):
  77. pattern = re.compile(r"url: \('(.*?)'\)")
  78. urls = pattern.findall(js_code)
  79. if urls:
  80. return urls[0]
  81. else:
  82. match = re.search(r'target_url\s*:\s*"(.*?)"', js_code)
  83. # 提取匹配到的 URL
  84. url = match.group(1) if match else None
  85. return url
  86. @classmethod
  87. def get_videoList(cls, vx_message, channel):
  88. try:
  89. mp4_link = ''
  90. video_id = ''
  91. data_link = vx_message[1]
  92. data = json.loads(data_link)
  93. url = data.get('url', '')
  94. title = data.get('title', '')
  95. for i in range(3):
  96. js_code = cls.get_js(url)
  97. regex = r"video_id:\s*'([^']*)'"
  98. match = re.search(regex, js_code)
  99. video_id = match.group(1) if match else None
  100. if video_id:
  101. mp4_link = cls.get_link(video_id)
  102. if mp4_link:
  103. break
  104. else:
  105. video_id = urlManage.random_id()
  106. mp4_link = cls.get_url(js_code)
  107. mp4_link = mp4_link.replace("\\x26amp;", "&")
  108. if mp4_link:
  109. break
  110. if mp4_link:
  111. video_path_url = config['PATHS']['VIDEO_OSS_PATH'] + video_id + ".mp4"
  112. status = cls.download_video(mp4_link, video_path_url)
  113. if status == False:
  114. return "视频下载失败"
  115. oss_object_key = Oss.video_sync_upload_oss(video_path_url, video_id)
  116. oss_object_key.get("status")
  117. # 获取 oss 视频地址
  118. oss_object_key = oss_object_key.get("oss_object_key")
  119. Common.logger().info(f'准备发送站内参数:{oss_object_key},{title},{vx_message[3]}')
  120. piaoquantv = PQ.insert_piaoquantv(oss_object_key, title, vx_message[3])
  121. if piaoquantv == False:
  122. return "视频发送到站内失败"
  123. if os.path.isfile(video_path_url):
  124. os.remove(video_path_url)
  125. else:
  126. return "无法获取到视频下载链接"
  127. except Exception as e:
  128. Common.logger().info(f'报错信息:{e}')
  129. return f"处理报错,报错信息{e}"