|
@@ -2,6 +2,7 @@ import datetime
|
|
|
import json
|
|
|
import os
|
|
|
import random
|
|
|
+import re
|
|
|
import sys
|
|
|
import time
|
|
|
import uuid
|
|
@@ -156,13 +157,79 @@ class GZXHAuthor:
|
|
|
return video_url
|
|
|
|
|
|
def get_wechat_gh(self, link: str):
|
|
|
- url = "http://8.217.190.241:8888/crawler/wei_xin/account_info"
|
|
|
- payload = json.dumps({"content_link": link})
|
|
|
- headers = {'Content-Type': 'application/json'}
|
|
|
- response = requests.request("POST", url, headers=headers, data=payload).json()
|
|
|
- if response['code'] == 0:
|
|
|
- wx_gh = response['data']['data']['wx_gh']
|
|
|
- return wx_gh
|
|
|
+ for i in range(3):
|
|
|
+ time.sleep(1)
|
|
|
+ url = "http://8.217.190.241:8888/crawler/wei_xin/account_info"
|
|
|
+ payload = json.dumps({"content_link": link})
|
|
|
+ headers = {'Content-Type': 'application/json'}
|
|
|
+ response = requests.request("POST", url, headers=headers, data=payload).json()
|
|
|
+ if response['code'] == 0:
|
|
|
+ wx_gh = response['data']['data']['wx_gh']
|
|
|
+ return wx_gh
|
|
|
+
|
|
|
+
|
|
|
+ def get_js(self, link):
|
|
|
+ payload = {}
|
|
|
+ headers = {
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
|
|
|
+ }
|
|
|
+ response = requests.request("GET", link, headers=headers, data=payload)
|
|
|
+ js_code = response.content.decode()
|
|
|
+ return js_code
|
|
|
+
|
|
|
+
|
|
|
+ def get_link(self, video_id):
|
|
|
+ url = "https://h5vv.video.qq.com/getinfo?vid={}&platform=101001&charge=0&otype=json&defn=shd".format(
|
|
|
+ video_id
|
|
|
+ )
|
|
|
+ headers = {
|
|
|
+ "Host": "h5vv.video.qq.com",
|
|
|
+ "xweb_xhr": "1",
|
|
|
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF XWEB/30817",
|
|
|
+ "Content-Type": "application/x-www-form-urlencoded",
|
|
|
+ "Accept": "*/*",
|
|
|
+ "Sec-Fetch-Site": "cross-site",
|
|
|
+ "Sec-Fetch-Mode": "cors",
|
|
|
+ "Sec-Fetch-Dest": "empty",
|
|
|
+ "Referer": "https://servicewechat.com/wx5fcd817f3f80aece/3/page-frame.html",
|
|
|
+ "Accept-Language": "en",
|
|
|
+ }
|
|
|
+ response = requests.get(url, headers=headers)
|
|
|
+ result = json.loads(response.text.replace("QZOutputJson=", "")[:-1])
|
|
|
+ vl = result["vl"]["vi"][0]
|
|
|
+ key = vl["fvkey"]
|
|
|
+ name = vl["fn"]
|
|
|
+ folder = vl["ul"]["ui"][0]["url"]
|
|
|
+ video_url = folder + name + "?vkey=" + key
|
|
|
+ return video_url
|
|
|
+
|
|
|
+ def get_url(self, js_code):
|
|
|
+ pattern = re.compile(r"url: \('(.*?)'\)")
|
|
|
+ urls = pattern.findall(js_code)
|
|
|
+ if urls:
|
|
|
+ return urls[0]
|
|
|
+ else:
|
|
|
+ match = re.search(r'target_url\s*:\s*"(.*?)"', js_code)
|
|
|
+ # 提取匹配到的 URL
|
|
|
+ url = match.group(1) if match else None
|
|
|
+ return url
|
|
|
+
|
|
|
+
|
|
|
+ def get_video_url(self, url: str):
|
|
|
+ for i in range(3):
|
|
|
+ js_code = self.get_js(url)
|
|
|
+ regex = r"video_id:\s*'([^']*)'"
|
|
|
+ match = re.search(regex, js_code)
|
|
|
+ video_id = match.group(1) if match else None
|
|
|
+ if video_id:
|
|
|
+ mp4_link = self.get_link(video_id)
|
|
|
+ if mp4_link:
|
|
|
+ return mp4_link
|
|
|
+ else:
|
|
|
+ mp4_link = self.get_url(js_code)
|
|
|
+ mp4_link = mp4_link.replace("\\x26amp;", "&")
|
|
|
+ if mp4_link:
|
|
|
+ return mp4_link
|
|
|
|
|
|
|
|
|
|
|
@@ -247,9 +314,12 @@ class GZXHAuthor:
|
|
|
date_time_obj = datetime.strptime(publish_time_str, date_format)
|
|
|
publish_time_stamp = int(date_time_obj.timestamp())
|
|
|
article_url = article.get("url", "")
|
|
|
+ if article_url:
|
|
|
+ video_url = self.get_video_url(article_url)
|
|
|
+
|
|
|
video_id = wechat_gh + str(int(date_time_obj.timestamp()))
|
|
|
cover_url = article.get("head_pic", "")
|
|
|
- video_url = self.get_video_url(article_url)
|
|
|
+ # video_url = self.get_video_url(article_url)
|
|
|
video_dict = {
|
|
|
"user_name": user_name,
|
|
|
"video_id": video_id,
|