__init__.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. """
  2. @author: luojunhui
  3. """
  4. import os
  5. import oss2
  6. import aiohttp
  7. import aiofiles
  8. import requests
  9. from hashlib import md5
  10. from uuid import uuid4
  11. from fake_useragent import FakeUserAgent
  12. async def download_cover(file_path, platform, cover_url):
  13. """
  14. 下载视频封面
  15. :param platform:
  16. :param cover_url:
  17. :param file_path:
  18. :return:
  19. """
  20. headers = request_header(platform=platform, url=cover_url, download_type="cover")
  21. response = requests.get(url=cover_url, headers=headers)
  22. if b"<html>" in response.content:
  23. return None
  24. elif response.status_code != 200:
  25. return None
  26. else:
  27. with open(file_path, "wb") as f:
  28. f.write(response.content)
  29. return file_path
  30. def request_header(platform, url, download_type="video"):
  31. """
  32. 请求头
  33. :return:
  34. """
  35. if platform == "xg_search":
  36. if "v9-xg-web-pc.ixigua.com" in url:
  37. headers = {
  38. "Accept": "*/*",
  39. "Accept-Language": "zh-CN,zh;q=0.9",
  40. "Host": "v9-xg-web-pc.ixigua.com",
  41. "User-Agent": FakeUserAgent().chrome,
  42. "Origin": "https://www.ixigua.com/",
  43. "Referer": "https://www.ixigua.com/"
  44. }
  45. elif "v3-xg-web-pc.ixigua.com" in url:
  46. headers = {
  47. "Accept": "*/*",
  48. "Accept-Language": "zh-CN,zh;q=0.9",
  49. "Host": "v3-xg-web-pc.ixigua.com",
  50. "User-Agent": FakeUserAgent().chrome,
  51. "Origin": "https://www.ixigua.com/",
  52. "Referer": "https://www.ixigua.com/"
  53. }
  54. elif download_type == "cover":
  55. headers = {
  56. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  57. 'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8',
  58. 'Cache-Control': 'max-age=0',
  59. 'Proxy-Connection': 'keep-alive',
  60. 'Upgrade-Insecure-Requests': '1',
  61. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
  62. }
  63. else:
  64. headers = {
  65. "Accept": "*/*",
  66. "Accept-Language": "zh-CN,zh;q=0.9",
  67. "Host": "v3-xg-web-pc.ixigua.com",
  68. "User-Agent": FakeUserAgent().chrome,
  69. "Origin": "https://www.ixigua.com/",
  70. "Referer": "https://www.ixigua.com/"
  71. }
  72. elif platform == "baidu_search":
  73. headers = {
  74. "Accept": "*/*",
  75. "Accept-Language": "zh-CN,zh;q=0.9",
  76. "User-Agent": FakeUserAgent().chrome,
  77. }
  78. elif platform == "wx_search":
  79. headers = {
  80. "Accept": "*/*",
  81. "Accept-Language": "zh-CN,zh;q=0.9",
  82. "User-Agent": FakeUserAgent().chrome,
  83. "Origin": "https://mp.weixin.qq.com",
  84. "Referer": "https://mp.weixin.qq.com"
  85. }
  86. elif platform == "dy_search":
  87. headers = {
  88. 'accept': '*/*',
  89. 'accept-language': 'en,zh;q=0.9,zh-CN;q=0.8',
  90. 'priority': 'i',
  91. 'range': 'bytes=0-',
  92. 'referer': 'https://v11-coldf.douyinvod.com/',
  93. 'user-agent': FakeUserAgent().chrome
  94. }
  95. else:
  96. headers = {}
  97. return headers
  98. async def download_video(file_path, platform, video_url, download_type="video"):
  99. """
  100. :param download_type:
  101. :param video_url:
  102. :param platform:
  103. :param file_path:
  104. :return:
  105. """
  106. headers = request_header(platform=platform, url=video_url, download_type=download_type)
  107. if os.path.exists(file_path):
  108. file_size = os.path.getsize(file_path)
  109. headers["Range"] = f"bytes={file_size}-"
  110. else:
  111. file_size = 0
  112. async with aiohttp.ClientSession() as session:
  113. async with session.get(video_url, headers=headers) as response:
  114. if response.status in [200, 206]:
  115. if file_size > 0:
  116. async with aiofiles.open(file_path, "ab+") as f:
  117. # 以1MB为单位分块下载
  118. async for chunk in response.content.iter_chunked(1024 * 1024):
  119. await f.write(chunk)
  120. else:
  121. async with aiofiles.open(file_path, "wb") as f:
  122. # 以1MB为单位分块下载
  123. async for chunk in response.content.iter_chunked(1024 * 1024):
  124. await f.write(chunk)
  125. else:
  126. print(response.status)
  127. return file_path
  128. def generate_video_path(platform, video_id):
  129. """
  130. 通过视频信息生成唯一视频地址
  131. :return:
  132. """
  133. index = "{}-{}-{}".format(platform, video_id, uuid4())
  134. index = md5(index.encode()).hexdigest()
  135. file_name = "{}.mp4".format(index)
  136. cover_name = "{}.png".format(index)
  137. file_path = os.path.join(os.getcwd(), "static", file_name)
  138. cover_path = os.path.join(os.getcwd(), "static", cover_name)
  139. return file_path, cover_path
  140. async def upload_to_oss(local_video_path, download_type):
  141. """
  142. 把视频上传到 oss
  143. :return:
  144. """
  145. oss_video_key = "long_articles/{}/".format(download_type) + str(uuid4())
  146. access_key_id = "LTAIP6x1l3DXfSxm"
  147. access_key_secret = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon"
  148. endpoint = "oss-cn-hangzhou.aliyuncs.com"
  149. bucket_name = "art-pubbucket"
  150. bucket = oss2.Bucket(
  151. oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name
  152. )
  153. bucket.put_object_from_file(key=oss_video_key, filename=local_video_path)
  154. return oss_video_key