Parcourir la source

请求增加代理,gemini增加失败重试

zhangliang il y a 5 mois
Parent
commit
54f8b560e3
1 fichiers modifiés avec 61 ajouts et 56 suppressions
  1. 61 56
      utils/dy_ks_get_url.py

+ 61 - 56
utils/dy_ks_get_url.py

@@ -80,67 +80,72 @@ class Dy_KS:
             except Exception as e:
                 retry_count += 1
                 logger.error(f"[+] 抖音{url}获取视频链接失败,失败信息{e} \n {traceback.format_exc()}")
-                time.sleep(1)
+                time.sleep(2)
         return None, None, None
 
     @classmethod
     def get_text_ks_video(cls,url):
-        try:
-            headers = {
-                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;'
-                          'q=0.8,application/signed-exchange;v=b3;q=0.7',
-                'Accept-Language': 'zh-CN,zh;q=0.9',
-                'Cache-Control': 'no-cache',
-                'Pragma': 'no-cache',
-                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
-                              'Chrome/127.0.0.0 Safari/537.36',
-            }
-            response = requests.request(url=url, method='GET', headers=headers, allow_redirects=False, timeout= 30,
-                                        proxies=cls.proxies)
-            logger.info(f"请求{url}响应:{response}")
-            location = response.headers.get('Location', None)
-            if location == "https://kuaishou.com/":
-                return "作品不存在", None, None
-            # video_id = re.search(r'/(f|photo|short-video|long-video)/(.*)/?',
-            #                      location.split('?')[0] if location else url).group(2)
-            match = re.search(r'/(f|photo|short-video|long-video)/(.*)/?',
-                              location.split('?')[0] if location else url)
-            if match:
-                video_id = match.group(2)
-            else:
-                parts = url.rstrip('/').split('/')
-                if parts:
-                    video_id = parts[-1]
-            logger.info(f"[+]提取到的视频ID=={video_id}")
-            url = "http://8.217.192.46:8889/crawler/kuai_shou/detail"
-            if not video_id or not video_id.strip():
-                return None, None, None
-            payload = json.dumps({
-                "content_id": str(video_id)
-            })
-            headers = {
-                'Content-Type': 'application/json'
-            }
-            time.sleep(random.uniform(10, 50))
-            response = requests.request("POST", url, headers=headers, data=payload, timeout= 30)
-            logger.info(f"请求{url}响应:{response.json()}")
-            response = response.json()
-            code = response["code"]
-            if code == 0:
-                data = response["data"]["data"]
-                content_type = data['content_type']
-                if content_type == 'note':
-                    return "note","note",None
-                video_url = data["video_url_list"][0]["video_url"]
-                original_title = data["title"]
-                return video_url, original_title, video_id
-            elif code == 27006:
-                if "作品不存在" in response['msg'] or "内容不存在" in response['msg'] or "私密作品" in response['msg'] or "该作品仅允许关注者查看" in response['msg']:
+        max_retries = 3
+        retry_count = 0
+        while retry_count < max_retries:
+            try:
+                headers = {
+                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;'
+                              'q=0.8,application/signed-exchange;v=b3;q=0.7',
+                    'Accept-Language': 'zh-CN,zh;q=0.9',
+                    'Cache-Control': 'no-cache',
+                    'Pragma': 'no-cache',
+                    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
+                                  'Chrome/127.0.0.0 Safari/537.36',
+                }
+                response = requests.request(url=url, method='GET', headers=headers, allow_redirects=False, timeout= 30,
+                                            proxies=cls.proxies)
+                logger.info(f"请求{url}响应:{response}")
+                location = response.headers.get('Location', None)
+                if location == "https://kuaishou.com/":
                     return "作品不存在", None, None
-            time.sleep(3)
-        except Exception as e:
-            logger.error(f"[+] 快手{url}获取视频链接失败,失败信息{e} \n {traceback.format_exc()}")
-            return None, None,None
+                # video_id = re.search(r'/(f|photo|short-video|long-video)/(.*)/?',
+                #                      location.split('?')[0] if location else url).group(2)
+                match = re.search(r'/(f|photo|short-video|long-video)/(.*)/?',
+                                  location.split('?')[0] if location else url)
+                if match:
+                    video_id = match.group(2)
+                else:
+                    parts = url.rstrip('/').split('/')
+                    if parts:
+                        video_id = parts[-1]
+                logger.info(f"[+]提取到的视频ID=={video_id}")
+                url = "http://8.217.192.46:8889/crawler/kuai_shou/detail"
+                if not video_id or not video_id.strip():
+                    return None, None, None
+                payload = json.dumps({
+                    "content_id": str(video_id)
+                })
+                headers = {
+                    'Content-Type': 'application/json'
+                }
+                time.sleep(random.uniform(10, 50))
+                response = requests.request("POST", url, headers=headers, data=payload, timeout= 30)
+                logger.info(f"请求{url}响应:{response.json()}")
+                response = response.json()
+                code = response["code"]
+                if code == 0:
+                    data = response["data"]["data"]
+                    content_type = data['content_type']
+                    if content_type == 'note':
+                        return "note","note",None
+                    video_url = data["video_url_list"][0]["video_url"]
+                    original_title = data["title"]
+                    return video_url, original_title, video_id
+                elif code == 27006:
+                    if "作品不存在" in response['msg'] or "内容不存在" in response['msg'] or "私密作品" in response['msg'] or "该作品仅允许关注者查看" in response['msg']:
+                        return "作品不存在", None, None
+                time.sleep(3)
+            except Exception as e:
+                retry_count += 1
+                logger.error(f"[+] 快手{url}获取视频链接失败,失败信息{e} \n {traceback.format_exc()}")
+                time.sleep(2)
+        return None, None,None
 
     @classmethod
     def get_text_hksp_video(cls, url):