Explorar o código

增加公众号下载成功率

zhangyong hai 1 ano
pai
achega
79814481cf
Modificáronse 2 ficheiros con 61 adicións e 17 borrados
  1. 1 1
      config.ini
  2. 60 16
      single_video/gongzhonghao/gongzhonghao_video.py

+ 1 - 1
config.ini

@@ -1,5 +1,5 @@
 [PATHS]
 VIDEO_OSS_PATH = /root/single_video_crawler/single_video/oss_video/
-
+;VIDEO_OSS_PATH = /Users/tzld/Desktop/single_video_crawler/single_video/oss_video/
 
 

+ 60 - 16
single_video/gongzhonghao/gongzhonghao_video.py

@@ -14,11 +14,13 @@ from common.url_manage import urlManage
 
 config = configparser.ConfigParser()
 config.read('/root/single_video_crawler/config.ini')  # 替换为您的配置文件路径
+# config.read('/Users/tzld/Desktop/single_video_crawler/config.ini')
 class gongzhonghaoVdieo():
 
     @classmethod
     def download_video(cls, video_url, video_path_url):
         for i in range(3):
+
             headers = {
                 'Accept': '*/*',
                 'Accept-Encoding': 'identity;q=1, *;q=0',
@@ -32,7 +34,6 @@ class gongzhonghaoVdieo():
                 'Referer': 'https://mp.weixin.qq.com/',
                 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
             }
-
             response = requests.request("GET", video_url, headers=headers)
             # 检查响应状态码是否为200
             if response.status_code == 206:
@@ -43,38 +44,81 @@ class gongzhonghaoVdieo():
                 return True
         return False
 
+    @classmethod
+    def get_link(cls, video_id):
+        url = "https://h5vv.video.qq.com/getinfo?vid={}&platform=101001&charge=0&otype=json&defn=shd".format(
+            video_id
+        )
+        headers = {
+            "Host": "h5vv.video.qq.com",
+            "xweb_xhr": "1",
+            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF XWEB/30817",
+            "Content-Type": "application/x-www-form-urlencoded",
+            "Accept": "*/*",
+            "Sec-Fetch-Site": "cross-site",
+            "Sec-Fetch-Mode": "cors",
+            "Sec-Fetch-Dest": "empty",
+            "Referer": "https://servicewechat.com/wx5fcd817f3f80aece/3/page-frame.html",
+            "Accept-Language": "en",
+        }
+        response = requests.get(url, headers=headers)
+        result = json.loads(response.text.replace("QZOutputJson=", "")[:-1])
+        vl = result["vl"]["vi"][0]
+        key = vl["fvkey"]
+        name = vl["fn"]
+        folder = vl["ul"]["ui"][0]["url"]
+        video_url = folder + name + "?vkey=" + key
+        return video_url
 
     @classmethod
-    def get_url(cls, link):
+    def get_js(cls, link):
         payload = {}
         headers = {
             'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
         }
         response = requests.request("GET", link, headers=headers, data=payload)
         js_code = response.content.decode()
-        if js_code:
-            pattern = re.compile(r"url: \('(.*?)'\)")
-            urls = pattern.findall(js_code)
-            if urls:
-                return urls[0]
-            else:
-                return None
+        return js_code
+
+    @classmethod
+    def get_url(cls, js_code):
+        pattern = re.compile(r"url: \('(.*?)'\)")
+        urls = pattern.findall(js_code)
+        if urls:
+            return urls[0]
         else:
-            return None
+            match = re.search(r'target_url\s*:\s*"(.*?)"', js_code)
+            # 提取匹配到的 URL
+            url = match.group(1) if match else None
+            return url
 
 
     @classmethod
     def get_videoList(cls, vx_message, channel):
         try:
+            mp4_link = ''
+            video_id = ''
             data_link = vx_message[1]
             data = json.loads(data_link)
             url = data.get('url', '')
-            mp4_link = cls.get_url(url)
-            mp4_link = mp4_link.replace("\\x26amp;", "&")
+            title = data.get('title', '')
+            for i in range(3):
+                js_code = cls.get_js(url)
+                regex = r"video_id:\s*'([^']*)'"
+                match = re.search(regex, js_code)
+                video_id = match.group(1) if match else None
+                if video_id:
+                    mp4_link = cls.get_link(video_id)
+                    if mp4_link:
+                        break
+                else:
+                    video_id = urlManage.random_id()
+                    mp4_link = cls.get_url(js_code)
+                    mp4_link = mp4_link.replace("\\x26amp;", "&")
+                    if mp4_link:
+                        break
+
             if mp4_link:
-                title = data.get('title', '')
-                # 随机生成视频oss_id
-                video_id = urlManage.random_id()
                 video_path_url = config['PATHS']['VIDEO_OSS_PATH'] + video_id + ".mp4"
                 status = cls.download_video(mp4_link, video_path_url)
                 if status == False:
@@ -90,7 +134,7 @@ class gongzhonghaoVdieo():
                 if os.path.isfile(video_path_url):
                     os.remove(video_path_url)
             else:
-                return "无法获取到视频ID"
+                return "无法获取到视频下载链接"
         except Exception as e:
             Common.logger().info(f'报错信息:{e}')
             return f"处理报错,报错信息{e}"