gongzhonghao_video.py 3.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. import configparser
  2. import re
  3. import os
  4. import sys
  5. import requests
  6. import json
  7. from urllib.parse import urlparse
  8. sys.path.append(os.getcwd())
  9. from common import Oss, Common
  10. from common.pq_utility import PQ
  11. from common.url_manage import urlManage
  12. config = configparser.ConfigParser()
  13. config.read('/root/single_video_crawler/config.ini') # 替换为您的配置文件路径
  14. class gongzhonghaoVdieo():
  15. @classmethod
  16. def download_video(cls, video_url, video_path_url):
  17. for i in range(3):
  18. headers = {
  19. 'Accept': '*/*',
  20. 'Accept-Encoding': 'identity;q=1, *;q=0',
  21. 'Accept-Language': 'zh-CN,zh;q=0.9',
  22. 'Cache-Control': 'no-cache',
  23. 'Connection': 'keep-alive',
  24. 'Host': urlparse(video_url).netloc,
  25. 'Origin': 'https://mp.weixin.qq.com',
  26. 'Pragma': 'no-cache',
  27. 'Range': 'bytes=0-',
  28. 'Referer': 'https://mp.weixin.qq.com/',
  29. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
  30. }
  31. response = requests.request("GET", video_url, headers=headers)
  32. # 检查响应状态码是否为200
  33. if response.status_code == 206:
  34. # 以二进制写入模式打开文件
  35. with open(f"{video_path_url}", "wb") as file:
  36. # 将响应内容写入文件
  37. file.write(response.content)
  38. return True
  39. return False
  40. @classmethod
  41. def get_url(cls, link):
  42. payload = {}
  43. headers = {
  44. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
  45. }
  46. response = requests.request("GET", link, headers=headers, data=payload)
  47. js_code = response.content.decode()
  48. if js_code:
  49. pattern = re.compile(r"url: \('(.*?)'\)")
  50. urls = pattern.findall(js_code)
  51. if urls:
  52. return urls[0]
  53. else:
  54. return None
  55. else:
  56. return None
  57. @classmethod
  58. def get_videoList(cls, vx_message, channel):
  59. try:
  60. data_link = vx_message[1]
  61. data = json.loads(data_link)
  62. url = data.get('url', '')
  63. mp4_link = cls.get_url(url)
  64. mp4_link = mp4_link.replace("\\x26amp;", "&")
  65. if mp4_link:
  66. title = data.get('title', '')
  67. # 随机生成视频oss_id
  68. video_id = urlManage.random_id()
  69. video_path_url = config['PATHS']['VIDEO_OSS_PATH'] + video_id + ".mp4"
  70. status = cls.download_video(mp4_link, video_path_url)
  71. if status == False:
  72. return "视频下载失败"
  73. oss_object_key = Oss.video_sync_upload_oss(video_path_url, video_id)
  74. oss_object_key.get("status")
  75. # 获取 oss 视频地址
  76. oss_object_key = oss_object_key.get("oss_object_key")
  77. Common.logger().info(f'准备发送站内参数:{oss_object_key},{title},{vx_message[3]}')
  78. piaoquantv = PQ.insert_piaoquantv(oss_object_key, title, vx_message[3])
  79. if piaoquantv == False:
  80. return "视频发送到站内失败"
  81. if os.path.isfile(video_path_url):
  82. os.remove(video_path_url)
  83. else:
  84. return "无法获取到视频ID"
  85. except Exception as e:
  86. Common.logger().info(f'报错信息:{e}')
  87. return f"处理报错,报错信息{e}"