__init__.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. """
  2. @author: luojunhui
  3. """
  4. import os
  5. import oss2
  6. import asyncio
  7. import aiohttp
  8. import aiofiles
  9. import requests
  10. from hashlib import md5
  11. from uuid import uuid4
  12. from fake_useragent import FakeUserAgent
  13. async def is_empty(file_path: str) -> bool:
  14. """
  15. 判断文件size
  16. """
  17. # 判断文件是否大于10kb, 若小于10 kb,认为该视频文件为空
  18. TEN_KB = 1024 * 10
  19. if os.path.getsize(file_path) > TEN_KB:
  20. return False
  21. return True
  22. async def download_cover(file_path, platform, cover_url):
  23. """
  24. 下载视频封面
  25. :param platform:
  26. :param cover_url:
  27. :param file_path:
  28. :return:
  29. """
  30. headers = request_header(platform=platform, url=cover_url, download_type="cover")
  31. response = requests.get(url=cover_url, headers=headers)
  32. if b"<html>" in response.content:
  33. return None
  34. elif response.status_code != 200:
  35. return None
  36. else:
  37. with open(file_path, "wb") as f:
  38. f.write(response.content)
  39. return file_path
  40. def request_header(platform, url, download_type="video"):
  41. """
  42. 请求头
  43. :return:
  44. """
  45. if platform == "xg_search":
  46. if "v9-xg-web-pc.ixigua.com" in url:
  47. headers = {
  48. "Accept": "*/*",
  49. "Accept-Language": "zh-CN,zh;q=0.9",
  50. "Host": "v9-xg-web-pc.ixigua.com",
  51. "User-Agent": FakeUserAgent().chrome,
  52. "Origin": "https://www.ixigua.com/",
  53. "Referer": "https://www.ixigua.com/"
  54. }
  55. elif "v3-xg-web-pc.ixigua.com" in url:
  56. headers = {
  57. "Accept": "*/*",
  58. "Accept-Language": "zh-CN,zh;q=0.9",
  59. "Host": "v3-xg-web-pc.ixigua.com",
  60. "User-Agent": FakeUserAgent().chrome,
  61. "Origin": "https://www.ixigua.com/",
  62. "Referer": "https://www.ixigua.com/"
  63. }
  64. elif download_type == "cover":
  65. headers = {
  66. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
  67. 'Accept-Language': 'en,zh;q=0.9,zh-CN;q=0.8',
  68. 'Cache-Control': 'max-age=0',
  69. 'Proxy-Connection': 'keep-alive',
  70. 'Upgrade-Insecure-Requests': '1',
  71. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36'
  72. }
  73. else:
  74. headers = {
  75. "Accept": "*/*",
  76. "Accept-Language": "zh-CN,zh;q=0.9",
  77. "Host": "v3-xg-web-pc.ixigua.com",
  78. "User-Agent": FakeUserAgent().chrome,
  79. "Origin": "https://www.ixigua.com/",
  80. "Referer": "https://www.ixigua.com/"
  81. }
  82. elif platform == "baidu_search":
  83. headers = {
  84. "Accept": "*/*",
  85. "Accept-Language": "zh-CN,zh;q=0.9",
  86. "User-Agent": FakeUserAgent().chrome,
  87. }
  88. elif platform == "wx_search":
  89. headers = {
  90. "Accept": "*/*",
  91. "Accept-Language": "zh-CN,zh;q=0.9",
  92. "User-Agent": FakeUserAgent().chrome,
  93. "Origin": "https://mp.weixin.qq.com",
  94. "Referer": "https://mp.weixin.qq.com"
  95. }
  96. elif platform == "dy_search":
  97. headers = {
  98. 'accept': '*/*',
  99. 'accept-language': 'en,zh;q=0.9,zh-CN;q=0.8',
  100. 'priority': 'i',
  101. 'range': 'bytes=0-',
  102. 'referer': 'https://v11-coldf.douyinvod.com/',
  103. 'user-agent': FakeUserAgent().chrome
  104. }
  105. else:
  106. headers = {}
  107. return headers
  108. async def download_video(file_path, platform, video_url, download_type="video"):
  109. """
  110. :param download_type:
  111. :param video_url:
  112. :param platform:
  113. :param file_path:
  114. :return:
  115. """
  116. headers = request_header(platform=platform, url=video_url, download_type=download_type)
  117. max_retries = 3 # 设置最大重试次数
  118. retries = 0 # 初始化重试次数
  119. tunnel = "l901.kdltps.com:15818"
  120. username = "t11983523373311"
  121. password = "mtuhdr2z"
  122. proxy_auth = aiohttp.BasicAuth(username, password)
  123. while retries < max_retries:
  124. if os.path.exists(file_path):
  125. file_size = os.path.getsize(file_path)
  126. if file_size > 0:
  127. headers["Range"] = f"bytes={file_size}-"
  128. else:
  129. # 文件存在但大小为0,删除文件以便重新下载
  130. os.remove(file_path)
  131. file_size = 0
  132. else:
  133. file_size = 0
  134. # start download
  135. async with aiohttp.ClientSession() as session:
  136. async with session.get(video_url, headers=headers, proxy_auth=proxy_auth, proxy='http://'+tunnel) as response:
  137. if response.status in [200, 206]:
  138. if file_size > 0:
  139. async with aiofiles.open(file_path, "ab+") as f:
  140. # 以1MB为单位分块下载
  141. async for chunk in response.content.iter_chunked(1024 * 1024):
  142. await f.write(chunk)
  143. else:
  144. async with aiofiles.open(file_path, "wb") as f:
  145. # 以1MB为单位分块下载
  146. async for chunk in response.content.iter_chunked(1024 * 1024):
  147. await f.write(chunk)
  148. # 判断文件是否为空, 若为空则继续重试
  149. if await is_empty(file_path):
  150. await asyncio.sleep(3)
  151. retries += 1
  152. if retries >= max_retries:
  153. return False
  154. else:
  155. return file_path
  156. else:
  157. # 下载失败,等待3秒后重试
  158. await asyncio.sleep(3)
  159. retries += 1
  160. if retries >= max_retries:
  161. print(f"下载失败,已达到最大重试次数:{max_retries}")
  162. return False
  163. def generate_video_path(platform, video_id):
  164. """
  165. 通过视频信息生成唯一视频地址
  166. :return:
  167. """
  168. index = "{}-{}-{}".format(platform, video_id, uuid4())
  169. index = md5(index.encode()).hexdigest()
  170. file_name = "{}.mp4".format(index)
  171. cover_name = "{}.png".format(index)
  172. file_path = os.path.join(os.getcwd(), "static", file_name)
  173. cover_path = os.path.join(os.getcwd(), "static", cover_name)
  174. return file_path, cover_path
  175. async def upload_to_oss(local_video_path, download_type):
  176. """
  177. 把视频上传到 oss
  178. :return:
  179. """
  180. oss_video_key = "long_articles/{}/".format(download_type) + str(uuid4())
  181. access_key_id = "LTAIP6x1l3DXfSxm"
  182. access_key_secret = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon"
  183. endpoint = "oss-cn-hangzhou.aliyuncs.com"
  184. bucket_name = "art-pubbucket"
  185. bucket = oss2.Bucket(
  186. oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name
  187. )
  188. bucket.put_object_from_file(key=oss_video_key, filename=local_video_path)
  189. return oss_video_key