download.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. """
  2. 下载视频
  3. """
  4. import os
  5. import json
  6. import time
  7. import asyncio
  8. from hashlib import md5
  9. import datetime
  10. import httpx
  11. import requests
  12. class VideoDownloader(object):
  13. """
  14. 视频下载功能
  15. """
  16. def __init__(self, video_obj):
  17. self.platform = video_obj['platform']
  18. self.video_id = video_obj['video_id']
  19. self.video_url = video_obj['video_url']
  20. self.cover_url = video_obj['cover_url']
  21. self.proxy = {
  22. "http://": "http://t17772369458618:5zqcjkmy@q796.kdltps.com:15818/",
  23. "https://": "http://t17772369458618:5zqcjkmy@q796.kdltps.com:15818/"
  24. }
  25. self.max_retry = 5
  26. def generate_video_path(self):
  27. """
  28. 通过视频信息生成唯一视频地址
  29. :return:
  30. """
  31. index = "{}-{}".format(self.platform, self.video_id)
  32. index = md5(index.encode()).hexdigest()
  33. temp_dir = "/Users/luojunhui/cyber/automatic_crawler"
  34. file_name = "{}.mp4".format(index)
  35. date_info = datetime.datetime.today().strftime("%Y%m%d")
  36. video_path = os.path.join(temp_dir, date_info, file_name)
  37. if os.path.exists(video_path):
  38. return
  39. else:
  40. os.makedirs(os.path.dirname(video_path), exist_ok=True)
  41. return video_path
  42. async def download_video(self):
  43. """
  44. download video from the web
  45. :return:
  46. """
  47. if self.platform == "fuqiwang":
  48. download_path = self.generate_video_path()
  49. if download_path:
  50. headers = {
  51. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.6(0x13080610) XWEB/1156',
  52. 'Accept-Encoding': 'identity;q=1, *;q=0',
  53. 'Accept': '*/*',
  54. 'Sec-Fetch-Site': 'cross-site',
  55. 'Sec-Fetch-Mode': 'no-cors',
  56. 'Sec-Fetch-Dest': 'video',
  57. 'Referer': 'https://servicewechat.com/wxa1431c6e7acdd32d/2/page-frame.html',
  58. 'Accept-Language': 'en-US,en;q=0.9',
  59. 'Range': 'bytes=0-',
  60. }
  61. async with httpx.AsyncClient(http2=True, proxies=self.proxy, headers=headers) as client:
  62. try:
  63. response = await client.get(self.video_url, headers=headers)
  64. if response.status_code == 206:
  65. with open(download_path, "wb") as f:
  66. f.write(response.content)
  67. else:
  68. for _ in range(self.max_retry):
  69. response = await client.get(self.video_url, headers=headers, follow_redirects=True)
  70. if response.status_code == 206:
  71. with open(download_path, "wb") as f:
  72. f.write(response.content)
  73. break
  74. except httpx.HTTPError as e:
  75. print(f"An error occurred while downloading: {e}")
  76. else:
  77. print("视频已经存在")
  78. def get_by_request(self):
  79. """
  80. req
  81. :return:
  82. """
  83. download_path = self.generate_video_path()
  84. headers = {
  85. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.6(0x13080610) XWEB/1156',
  86. 'Accept-Encoding': 'identity;q=1, *;q=0',
  87. 'Accept': '*/*',
  88. 'Sec-Fetch-Site': 'cross-site',
  89. 'Sec-Fetch-Mode': 'no-cors',
  90. 'Sec-Fetch-Dest': 'video',
  91. 'Referer': 'https://servicewechat.com/wxa1431c6e7acdd32d/2/page-frame.html',
  92. 'Accept-Language': 'en-US,en;q=0.9',
  93. 'Range': 'bytes=0-',
  94. }
  95. r = requests.get(
  96. url=self.video_url,
  97. headers=headers,
  98. proxies=self.proxy
  99. )
  100. print(r.status_code)
  101. with open("test.mp4", "wb") as f:
  102. f.write(r.content)
  103. async def main(video_obj):
  104. """
  105. 异步执行函数
  106. :param video_obj:
  107. :return:
  108. """
  109. downloader = VideoDownloader(video_obj)
  110. await downloader.download_video()
  111. if __name__ == '__main__':
  112. video_o = {
  113. "update_time": 1709784300,
  114. "platform": "fuqiwang",
  115. "video_id": 142599,
  116. "title": "🔴3·8妇女节,最美的祝福,送给全天下的女神!",
  117. "type": 1,
  118. "video_type": 2,
  119. "cover_url": "https://znl-video-bos.cdn.bcebos.com/c6f12b49992ef638342065439f55b444/65e93632/picture/20240306/b8b0c1cc262c2394f111650c9f82e35a_thumb.jpg",
  120. "video_url": "https://znl-video-bos.cdn.bcebos.com/e368801a814c548e443835086d37caaf/65e93632/video/20240306/820ee1498e3ed2a59d37aed54d39ae95_1.mp4",
  121. }
  122. VideoDownloader(video_obj=video_o).get_by_request()
  123. # asyncio.run(main(video_obj=video_o))