|
@@ -25,7 +25,15 @@ class CarryViode:
|
|
|
|
|
|
def get_text_dy_video(self,url):
|
|
|
try:
|
|
|
- if "&vid=" not in url:
|
|
|
+ if "&vid=" in url:
|
|
|
+ parsed_url = urlparse(url)
|
|
|
+ params = parse_qs(parsed_url.query)
|
|
|
+ video_id = params.get('vid', [None])[0]
|
|
|
+ elif "?modal_id=" in url:
|
|
|
+ parsed_url = urlparse(url)
|
|
|
+ params = parse_qs(parsed_url.query)
|
|
|
+ video_id = params.get('modal_id', [None])[0]
|
|
|
+ else:
|
|
|
headers = {
|
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;'
|
|
|
'q=0.8,application/signed-exchange;v=b3;q=0.7',
|
|
@@ -38,10 +46,6 @@ class CarryViode:
|
|
|
response = requests.request(url=url, method='GET', headers=headers, allow_redirects=False, timeout=30)
|
|
|
location = response.headers.get('Location', None)
|
|
|
video_id = re.search(r'/video/(\d+)/?', location.split('?')[0] if location else url).group(1)
|
|
|
- else:
|
|
|
- parsed_url = urlparse(url)
|
|
|
- params = parse_qs(parsed_url.query)
|
|
|
- video_id = params.get('vid', [None])[0]
|
|
|
url = "http://8.217.192.46:8889/crawler/dou_yin/detail"
|
|
|
payload = json.dumps({
|
|
|
"content_id": str(video_id)
|
|
@@ -199,14 +203,16 @@ class CarryViode:
|
|
|
tag_transport_channel = None
|
|
|
try:
|
|
|
url = data['video_url']
|
|
|
- if "&vid=" not in url and data['tag_transport_channel'] != "抖音":
|
|
|
+ if "&vid=" in url or "?modal_id=" in url:
|
|
|
+ host = urlparse(url).netloc
|
|
|
+ else:
|
|
|
msg = html.unescape(url).split('?')[0]
|
|
|
pattern = re.search(r'https?://[^\s<>"\'\u4e00-\u9fff]+', msg)
|
|
|
if not pattern:
|
|
|
in_carry_video_data(REDIS_NAME, json.dumps(data, ensure_ascii=False, indent=4))
|
|
|
return
|
|
|
url = pattern.group()
|
|
|
- host = urlparse(url).netloc
|
|
|
+ host = urlparse(url).netloc
|
|
|
if host in ['v.douyin.com', 'www.douyin.com', 'www.iesdouyin.com']:
|
|
|
tag_transport_channel = "抖音"
|
|
|
logger.info(f"[+] {url}开始获取抖音视频链接")
|