from datetime import datetime import os import random import re import sys import requests import json sys.path.append(os.getcwd()) class urlManage(): # 随机生成id @classmethod def random_id(cls): now = datetime.now() rand_num = random.randint(10000, 99999) oss_id = "{}{}".format(now.strftime("%Y%m%d%H%M%S"), rand_num) return oss_id @classmethod def get_content_id(cls, link, channel): headers = { "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1" } share_url = link resp = requests.get(share_url, headers=headers, timeout=10, verify=False) url = resp.url if url: if channel == "douyin" or channel == "xigua": pattern = r'/(\d+)/\?' elif channel == "kuaishou": pattern = r'/photo/(\w+)\?' else: return None match = re.search(pattern, url) if match: return match.group(1) else: return None else: return None @classmethod def extract_link(cls, data_link): try: json_data = json.loads(data_link) content = json_data.get('content', '') except json.decoder.JSONDecodeError: content = data_link # 使用正则表达式提取链接 link = re.search(r'https?://\S+', content) if link: return link.group() else: return None @classmethod def url_manage(cls, data_link, channel): link = cls.extract_link(data_link) if link: content_id = cls.get_content_id(link, channel) if content_id: return content_id else: return None else: return None