zhangyong 6 tháng trước cách đây
mục cha
commit
fd6c9981cc
1 tập tin đã thay đổi với 13 bổ sung7 xóa
  1. 13 7
      carry_video/carry_video.py

+ 13 - 7
carry_video/carry_video.py

@@ -25,7 +25,15 @@ class CarryViode:
 
     def get_text_dy_video(self,url):
         try:
-            if "&vid=" not in url:
+            if "&vid=" in url:
+                parsed_url = urlparse(url)
+                params = parse_qs(parsed_url.query)
+                video_id = params.get('vid', [None])[0]
+            elif "?modal_id=" in url:
+                parsed_url = urlparse(url)
+                params = parse_qs(parsed_url.query)
+                video_id = params.get('modal_id', [None])[0]
+            else:
                 headers = {
                     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;'
                               'q=0.8,application/signed-exchange;v=b3;q=0.7',
@@ -38,10 +46,6 @@ class CarryViode:
                 response = requests.request(url=url, method='GET', headers=headers, allow_redirects=False, timeout=30)
                 location = response.headers.get('Location', None)
                 video_id = re.search(r'/video/(\d+)/?', location.split('?')[0] if location else url).group(1)
-            else:
-                parsed_url = urlparse(url)
-                params = parse_qs(parsed_url.query)
-                video_id = params.get('vid', [None])[0]
             url = "http://8.217.192.46:8889/crawler/dou_yin/detail"
             payload = json.dumps({
                 "content_id": str(video_id)
@@ -199,14 +203,16 @@ class CarryViode:
         tag_transport_channel = None
         try:
             url = data['video_url']
-            if "&vid=" not in url and data['tag_transport_channel'] != "抖音":
+            if "&vid=" in url or "?modal_id=" in url:
+                host = urlparse(url).netloc
+            else:
                 msg = html.unescape(url).split('?')[0]
                 pattern = re.search(r'https?://[^\s<>"\'\u4e00-\u9fff]+', msg)
                 if not pattern:
                     in_carry_video_data(REDIS_NAME, json.dumps(data, ensure_ascii=False, indent=4))
                     return
                 url = pattern.group()
-            host = urlparse(url).netloc
+                host = urlparse(url).netloc
             if host in ['v.douyin.com', 'www.douyin.com', 'www.iesdouyin.com']:
                 tag_transport_channel = "抖音"
                 logger.info(f"[+] {url}开始获取抖音视频链接")