__init__.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. """
  2. @author: luojunhui
  3. """
  4. import os
  5. import oss2
  6. import aiohttp
  7. import aiofiles
  8. import requests
  9. from hashlib import md5
  10. from uuid import uuid4
  11. from fake_useragent import FakeUserAgent
  12. async def download_cover(file_path, platform, cover_url):
  13. """
  14. 下载视频封面
  15. :param platform:
  16. :param cover_url:
  17. :param file_path:
  18. :return:
  19. """
  20. headers = request_header(platform=platform, url=cover_url, download_type="cover")
  21. response = requests.get(url=cover_url, headers=headers)
  22. if b"<html>" in response.content:
  23. return None
  24. elif response.status_code != 200:
  25. return None
  26. else:
  27. with open(file_path, "wb") as f:
  28. f.write(response.content)
  29. return file_path
  30. def request_header(platform, url, download_type="video"):
  31. """
  32. 请求头
  33. :return:
  34. """
  35. if platform == "xg_search":
  36. if "v9-xg-web-pc.ixigua.com" in url:
  37. headers = {
  38. "Accept": "*/*",
  39. "Accept-Language": "zh-CN,zh;q=0.9",
  40. "Host": "v9-xg-web-pc.ixigua.com",
  41. "User-Agent": FakeUserAgent().chrome,
  42. "Origin": "https://www.ixigua.com/",
  43. "Referer": "https://www.ixigua.com/"
  44. }
  45. elif "v3-xg-web-pc.ixigua.com" in url:
  46. headers = {
  47. "Accept": "*/*",
  48. "Accept-Language": "zh-CN,zh;q=0.9",
  49. "Host": "v3-xg-web-pc.ixigua.com",
  50. "User-Agent": FakeUserAgent().chrome,
  51. "Origin": "https://www.ixigua.com/",
  52. "Referer": "https://www.ixigua.com/"
  53. }
  54. elif download_type == "cover":
  55. headers = {
  56. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  57. 'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8',
  58. 'Cache-Control': 'max-age=0',
  59. 'Proxy-Connection': 'keep-alive',
  60. 'Upgrade-Insecure-Requests': '1',
  61. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
  62. }
  63. else:
  64. headers = {
  65. "Accept": "*/*",
  66. "Accept-Language": "zh-CN,zh;q=0.9",
  67. "Host": "v3-xg-web-pc.ixigua.com",
  68. "User-Agent": FakeUserAgent().chrome,
  69. "Origin": "https://www.ixigua.com/",
  70. "Referer": "https://www.ixigua.com/"
  71. }
  72. elif platform == "baidu_search":
  73. headers = {
  74. "Accept": "*/*",
  75. "Accept-Language": "zh-CN,zh;q=0.9",
  76. "User-Agent": FakeUserAgent().chrome,
  77. }
  78. elif platform == "wx_search":
  79. headers = {
  80. "Accept": "*/*",
  81. "Accept-Language": "zh-CN,zh;q=0.9",
  82. "User-Agent": FakeUserAgent().chrome,
  83. "Origin": "https://mp.weixin.qq.com",
  84. "Referer": "https://mp.weixin.qq.com"
  85. }
  86. elif platform == "dy_search":
  87. headers = {
  88. 'accept': '*/*',
  89. 'accept-language': 'en,zh;q=0.9,zh-CN;q=0.8',
  90. 'priority': 'i',
  91. 'range': 'bytes=0-',
  92. 'referer': 'https://v11-coldf.douyinvod.com/',
  93. 'user-agent': FakeUserAgent().chrome
  94. }
  95. else:
  96. headers = {}
  97. return headers
  98. async def download_video(file_path, platform, video_url, download_type="video"):
  99. """
  100. :param download_type:
  101. :param video_url:
  102. :param platform:
  103. :param file_path:
  104. :return:
  105. """
  106. headers = request_header(platform=platform, url=video_url, download_type=download_type)
  107. if os.path.exists(file_path):
  108. file_size = os.path.getsize(file_path)
  109. headers["Range"] = f"bytes={file_size}-"
  110. else:
  111. file_size = 0
  112. async with aiohttp.ClientSession() as session:
  113. async with session.get(video_url, headers=headers) as response:
  114. if response.status in [200, 206]:
  115. if file_size > 0:
  116. async with aiofiles.open(file_path, "ab+") as f:
  117. # 以1MB为单位分块下载
  118. async for chunk in response.content.iter_chunked(1024 * 1024):
  119. await f.write(chunk)
  120. else:
  121. async with aiofiles.open(file_path, "wb") as f:
  122. # 以1MB为单位分块下载
  123. async for chunk in response.content.iter_chunked(1024 * 1024):
  124. await f.write(chunk)
  125. else:
  126. print("下载失败")
  127. return False
  128. return file_path
  129. def generate_video_path(platform, video_id):
  130. """
  131. 通过视频信息生成唯一视频地址
  132. :return:
  133. """
  134. index = "{}-{}-{}".format(platform, video_id, uuid4())
  135. index = md5(index.encode()).hexdigest()
  136. file_name = "{}.mp4".format(index)
  137. cover_name = "{}.png".format(index)
  138. file_path = os.path.join(os.getcwd(), "static", file_name)
  139. cover_path = os.path.join(os.getcwd(), "static", cover_name)
  140. return file_path, cover_path
  141. async def upload_to_oss(local_video_path, download_type):
  142. """
  143. 把视频上传到 oss
  144. :return:
  145. """
  146. oss_video_key = "long_articles/{}/".format(download_type) + str(uuid4())
  147. access_key_id = "LTAIP6x1l3DXfSxm"
  148. access_key_secret = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon"
  149. endpoint = "oss-cn-hangzhou.aliyuncs.com"
  150. bucket_name = "art-pubbucket"
  151. bucket = oss2.Bucket(
  152. oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name
  153. )
  154. bucket.put_object_from_file(key=oss_video_key, filename=local_video_path)
  155. return oss_video_key