il y a 1 an · e8c1d96e89
--- a/gongzhongxinhao/gongzhongxinhao/gongzhongxinhao_author.py
+++ b/gongzhongxinhao/gongzhongxinhao/gongzhongxinhao_author.py
@@ -2,6 +2,7 @@ import datetime
 
				 import json
			
 
				 import os
			
 
				 import random
			
 
				+import re
			
 
				 import sys
			
 
				 import time
			
 
				 import uuid
			
@@ -156,13 +157,79 @@ class GZXHAuthor:
 
				         return video_url
			
 
				 
			
 
				     def get_wechat_gh(self, link: str):
			
 
				-        url = "http://8.217.190.241:8888/crawler/wei_xin/account_info"
			
 
				-        payload = json.dumps({"content_link": link})
			
 
				-        headers = {'Content-Type': 'application/json'}
			
 
				-        response = requests.request("POST", url, headers=headers, data=payload).json()
			
 
				-        if response['code'] == 0:
			
 
				-            wx_gh = response['data']['data']['wx_gh']
			
 
				-        return wx_gh
			
 
				+        for i in range(3):
			
 
				+            time.sleep(1)
			
 
				+            url = "http://8.217.190.241:8888/crawler/wei_xin/account_info"
			
 
				+            payload = json.dumps({"content_link": link})
			
 
				+            headers = {'Content-Type': 'application/json'}
			
 
				+            response = requests.request("POST", url, headers=headers, data=payload).json()
			
 
				+            if response['code'] == 0:
			
 
				+                wx_gh = response['data']['data']['wx_gh']
			
 
				+                return wx_gh
			
 
				+
			
 
				+
			
 
				+    def get_js(self, link):
			
 
				+        payload = {}
			
 
				+        headers = {
			
 
				+            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
			
 
				+        }
			
 
				+        response = requests.request("GET", link, headers=headers, data=payload)
			
 
				+        js_code = response.content.decode()
			
 
				+        return js_code
			
 
				+
			
 
				+
			
 
				+    def get_link(self, video_id):
			
 
				+        url = "https://h5vv.video.qq.com/getinfo?vid={}&platform=101001&charge=0&otype=json&defn=shd".format(
			
 
				+            video_id
			
 
				+        )
			
 
				+        headers = {
			
 
				+            "Host": "h5vv.video.qq.com",
			
 
				+            "xweb_xhr": "1",
			
 
				+            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF XWEB/30817",
			
 
				+            "Content-Type": "application/x-www-form-urlencoded",
			
 
				+            "Accept": "*/*",
			
 
				+            "Sec-Fetch-Site": "cross-site",
			
 
				+            "Sec-Fetch-Mode": "cors",
			
 
				+            "Sec-Fetch-Dest": "empty",
			
 
				+            "Referer": "https://servicewechat.com/wx5fcd817f3f80aece/3/page-frame.html",
			
 
				+            "Accept-Language": "en",
			
 
				+        }
			
 
				+        response = requests.get(url, headers=headers)
			
 
				+        result = json.loads(response.text.replace("QZOutputJson=", "")[:-1])
			
 
				+        vl = result["vl"]["vi"][0]
			
 
				+        key = vl["fvkey"]
			
 
				+        name = vl["fn"]
			
 
				+        folder = vl["ul"]["ui"][0]["url"]
			
 
				+        video_url = folder + name + "?vkey=" + key
			
 
				+        return video_url
			
 
				+
			
 
				+    def get_url(self, js_code):
			
 
				+        pattern = re.compile(r"url: \('(.*?)'\)")
			
 
				+        urls = pattern.findall(js_code)
			
 
				+        if urls:
			
 
				+            return urls[0]
			
 
				+        else:
			
 
				+            match = re.search(r'target_url\s*:\s*"(.*?)"', js_code)
			
 
				+            # 提取匹配到的 URL
			
 
				+            url = match.group(1) if match else None
			
 
				+            return url
			
 
				+
			
 
				+
			
 
				+    def get_video_url(self, url: str):
			
 
				+        for i in range(3):
			
 
				+            js_code = self.get_js(url)
			
 
				+            regex = r"video_id:\s*'([^']*)'"
			
 
				+            match = re.search(regex, js_code)
			
 
				+            video_id = match.group(1) if match else None
			
 
				+            if video_id:
			
 
				+                mp4_link = self.get_link(video_id)
			
 
				+                if mp4_link:
			
 
				+                    return mp4_link
			
 
				+            else:
			
 
				+                mp4_link = self.get_url(js_code)
			
 
				+                mp4_link = mp4_link.replace("\\x26amp;", "&")
			
 
				+                if mp4_link:
			
 
				+                    return mp4_link
			
 
				 
			
 
				 
			
 
				 
			
@@ -247,9 +314,12 @@ class GZXHAuthor:
 
				         date_time_obj = datetime.strptime(publish_time_str, date_format)
			
 
				         publish_time_stamp = int(date_time_obj.timestamp())
			
 
				         article_url = article.get("url", "")
			
 
				+        if article_url:
			
 
				+            video_url = self.get_video_url(article_url)
			
 
				+
			
 
				         video_id = wechat_gh + str(int(date_time_obj.timestamp()))
			
 
				         cover_url = article.get("head_pic", "")
			
 
				-        video_url = self.get_video_url(article_url)
			
 
				+        # video_url = self.get_video_url(article_url)
			
 
				         video_dict = {
			
 
				             "user_name": user_name,
			
 
				             "video_id": video_id,