|
@@ -14,11 +14,13 @@ from common.url_manage import urlManage
|
|
|
|
|
|
config = configparser.ConfigParser()
|
|
|
config.read('/root/single_video_crawler/config.ini') # 替换为您的配置文件路径
|
|
|
+# config.read('/Users/tzld/Desktop/single_video_crawler/config.ini')
|
|
|
class gongzhonghaoVdieo():
|
|
|
|
|
|
@classmethod
|
|
|
def download_video(cls, video_url, video_path_url):
|
|
|
for i in range(3):
|
|
|
+
|
|
|
headers = {
|
|
|
'Accept': '*/*',
|
|
|
'Accept-Encoding': 'identity;q=1, *;q=0',
|
|
@@ -32,7 +34,6 @@ class gongzhonghaoVdieo():
|
|
|
'Referer': 'https://mp.weixin.qq.com/',
|
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
|
|
}
|
|
|
-
|
|
|
response = requests.request("GET", video_url, headers=headers)
|
|
|
# 检查响应状态码是否为200
|
|
|
if response.status_code == 206:
|
|
@@ -43,38 +44,81 @@ class gongzhonghaoVdieo():
|
|
|
return True
|
|
|
return False
|
|
|
|
|
|
+ @classmethod
|
|
|
+ def get_link(cls, video_id):
|
|
|
+ url = "https://h5vv.video.qq.com/getinfo?vid={}&platform=101001&charge=0&otype=json&defn=shd".format(
|
|
|
+ video_id
|
|
|
+ )
|
|
|
+ headers = {
|
|
|
+ "Host": "h5vv.video.qq.com",
|
|
|
+ "xweb_xhr": "1",
|
|
|
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF XWEB/30817",
|
|
|
+ "Content-Type": "application/x-www-form-urlencoded",
|
|
|
+ "Accept": "*/*",
|
|
|
+ "Sec-Fetch-Site": "cross-site",
|
|
|
+ "Sec-Fetch-Mode": "cors",
|
|
|
+ "Sec-Fetch-Dest": "empty",
|
|
|
+ "Referer": "https://servicewechat.com/wx5fcd817f3f80aece/3/page-frame.html",
|
|
|
+ "Accept-Language": "en",
|
|
|
+ }
|
|
|
+ response = requests.get(url, headers=headers)
|
|
|
+ result = json.loads(response.text.replace("QZOutputJson=", "")[:-1])
|
|
|
+ vl = result["vl"]["vi"][0]
|
|
|
+ key = vl["fvkey"]
|
|
|
+ name = vl["fn"]
|
|
|
+ folder = vl["ul"]["ui"][0]["url"]
|
|
|
+ video_url = folder + name + "?vkey=" + key
|
|
|
+ return video_url
|
|
|
|
|
|
@classmethod
|
|
|
- def get_url(cls, link):
|
|
|
+ def get_js(cls, link):
|
|
|
payload = {}
|
|
|
headers = {
|
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
|
|
}
|
|
|
response = requests.request("GET", link, headers=headers, data=payload)
|
|
|
js_code = response.content.decode()
|
|
|
- if js_code:
|
|
|
- pattern = re.compile(r"url: \('(.*?)'\)")
|
|
|
- urls = pattern.findall(js_code)
|
|
|
- if urls:
|
|
|
- return urls[0]
|
|
|
- else:
|
|
|
- return None
|
|
|
+ return js_code
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def get_url(cls, js_code):
|
|
|
+ pattern = re.compile(r"url: \('(.*?)'\)")
|
|
|
+ urls = pattern.findall(js_code)
|
|
|
+ if urls:
|
|
|
+ return urls[0]
|
|
|
else:
|
|
|
- return None
|
|
|
+ match = re.search(r'target_url\s*:\s*"(.*?)"', js_code)
|
|
|
+ # 提取匹配到的 URL
|
|
|
+ url = match.group(1) if match else None
|
|
|
+ return url
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
def get_videoList(cls, vx_message, channel):
|
|
|
try:
|
|
|
+ mp4_link = ''
|
|
|
+ video_id = ''
|
|
|
data_link = vx_message[1]
|
|
|
data = json.loads(data_link)
|
|
|
url = data.get('url', '')
|
|
|
- mp4_link = cls.get_url(url)
|
|
|
- mp4_link = mp4_link.replace("\\x26amp;", "&")
|
|
|
+ title = data.get('title', '')
|
|
|
+ for i in range(3):
|
|
|
+ js_code = cls.get_js(url)
|
|
|
+ regex = r"video_id:\s*'([^']*)'"
|
|
|
+ match = re.search(regex, js_code)
|
|
|
+ video_id = match.group(1) if match else None
|
|
|
+ if video_id:
|
|
|
+ mp4_link = cls.get_link(video_id)
|
|
|
+ if mp4_link:
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ video_id = urlManage.random_id()
|
|
|
+ mp4_link = cls.get_url(js_code)
|
|
|
+ mp4_link = mp4_link.replace("\\x26amp;", "&")
|
|
|
+ if mp4_link:
|
|
|
+ break
|
|
|
+
|
|
|
if mp4_link:
|
|
|
- title = data.get('title', '')
|
|
|
- # 随机生成视频oss_id
|
|
|
- video_id = urlManage.random_id()
|
|
|
video_path_url = config['PATHS']['VIDEO_OSS_PATH'] + video_id + ".mp4"
|
|
|
status = cls.download_video(mp4_link, video_path_url)
|
|
|
if status == False:
|
|
@@ -90,7 +134,7 @@ class gongzhonghaoVdieo():
|
|
|
if os.path.isfile(video_path_url):
|
|
|
os.remove(video_path_url)
|
|
|
else:
|
|
|
- return "无法获取到视频ID"
|
|
|
+ return "无法获取到视频下载链接"
|
|
|
except Exception as e:
|
|
|
Common.logger().info(f'报错信息:{e}')
|
|
|
return f"处理报错,报错信息{e}"
|