import html
import json
import os
import random
import re
import time
import uuid
import requests
from datetime import datetime
from urllib.parse import urlparse, parse_qs
from loguru import logger
from utils.aliyun_log import AliyunLogger
from utils.feishu_utils import Feishu
class Dy_KS:
@classmethod
def get_text_dy_video(cls,url):
max_retries = 3
retry_count = 0
while retry_count < max_retries:
try:
if "&vid=" in url:
parsed_url = urlparse(url)
params = parse_qs(parsed_url.query)
video_id = params.get('vid', [None])[0]
elif "?modal_id=" in url:
parsed_url = urlparse(url)
params = parse_qs(parsed_url.query)
video_id = params.get('modal_id', [None])[0]
else:
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;'
'q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Pragma': 'no-cache',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/127.0.0.0 Safari/537.36',
}
response = requests.request(url=url, method='GET', headers=headers, allow_redirects=False, timeout=30)
location = response.headers.get('Location', None)
video_id = re.search(r'/video/(\d+)/?', location.split('?')[0] if location else url).group(1)
url = "http://8.217.192.46:8889/crawler/dou_yin/detail"
if not video_id or not video_id.strip():
return None, None, None
payload = json.dumps({
"content_id": str(video_id)
})
headers = {
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload, timeout= 60)
response = response.json()
code = response["code"]
if code == 0:
data = response["data"]["data"]
video_url = data["video_url_list"][0]["video_url"]
original_title = data["title"]
return video_url, original_title, video_id
if code == 22002:
if '抖音内容已被删除或无法访问' in response['msg']:
return "作品不存在", None, None
except Exception as e:
retry_count += 1
logger.error(f"[+] 抖音{url}获取视频链接失败,失败信息{e}")
time.sleep(1)
return None, None, None
@classmethod
def get_text_ks_video(cls,url):
try:
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;'
'q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'no-cache',
'Pragma': 'no-cache',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/127.0.0.0 Safari/537.36',
}
response = requests.request(url=url, method='GET', headers=headers, allow_redirects=False, timeout= 30)
location = response.headers.get('Location', None)
video_id = re.search(r'/(f|photo|short-video|long-video)/(.*)/?',
location.split('?')[0] if location else url).group(2)
url = "http://8.217.192.46:8889/crawler/kuai_shou/detail"
if not video_id or not video_id.strip():
return None, None, None
payload = json.dumps({
"content_id": str(video_id)
})
headers = {
'Content-Type': 'application/json'
}
time.sleep(random.uniform(10, 50))
response = requests.request("POST", url, headers=headers, data=payload, timeout= 30)
response = response.json()
code = response["code"]
if code == 0:
data = response["data"]["data"]
content_type = data['content_type']
if content_type == 'note':
return "note","note"
video_url = data["video_url_list"][0]["video_url"]
original_title = data["title"]
return video_url, original_title, video_id
elif code == 27006:
if "作品不存在" in response['msg'] or "内容不存在" in response['msg'] or "私密作品" in response['msg']:
return "作品不存在", None, None
time.sleep(3)
except Exception as e:
logger.error(f"[+] 快手{url}获取视频链接失败,失败信息{e}")
return None, None,None
@classmethod
def get_video_url(cls, data, principal):
try:
url = data['video_url']
if "&vid=" in url or "?modal_id=" in url:
host = urlparse(url).netloc
else:
msg = html.unescape(url).split('?')[0]
pattern = re.search(r'https?://[^\s<>"\'\u4e00-\u9fff]+', msg)
if not pattern:
return "重新处理",None,None,None
url = pattern.group()
host = urlparse(url).netloc
if host in ['v.douyin.com', 'www.douyin.com', 'www.iesdouyin.com']:
tag_transport_channel = "抖音"
logger.info(f"[+] {url}开始获取抖音视频链接")
url, original_title, video_id = cls.get_text_dy_video(url=url)
elif host in ['v.kuaishou.com', 'www.kuaishou.com', 'v.m.chenzhongtech.com', 'creater.eozatvmq.com']:
tag_transport_channel = "快手"
logger.info(f"[+] {url}开始获取快手视频链接")
url, original_title, video_id = cls.get_text_ks_video(url=url)
else:
logger.error(f"[+] {url}该链接不是抖/快 不做处理")
AliyunLogger.logging(data["name"], principal, "", data["video_url"],
"不是抖/快不做处理", "1001", str(data))
return "链接不是抖/快",None,None,None
if url == "作品不存在":
return "作品不存在",None,None,None
return url, original_title, video_id, tag_transport_channel
except Exception as e:
logger.info(f"[+] 获取视频链接异常{e}")
return "重新处理",None,None,None