1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- from datetime import datetime
- import os
- import random
- import re
- import sys
- import requests
- import json
- sys.path.append(os.getcwd())
- class urlManage():
- # 随机生成id
- @classmethod
- def random_id(cls):
- now = datetime.now()
- rand_num = random.randint(10000, 99999)
- oss_id = "{}{}".format(now.strftime("%Y%m%d%H%M%S"), rand_num)
- return oss_id
- @classmethod
- def get_content_id(cls, link, channel):
- headers = {
- "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1"
- }
- share_url = link
- resp = requests.get(share_url, headers=headers, timeout=10, verify=False)
- url = resp.url
- if url:
- if channel == "douyin" or channel == "xigua":
- pattern = r'/(\d+)/\?'
- elif channel == "kuaishou":
- pattern = r'/photo/(\w+)\?'
- else:
- return None
- match = re.search(pattern, url)
- if match:
- return match.group(1)
- else:
- return None
- else:
- return None
- @classmethod
- def extract_link(cls, data_link):
- try:
- json_data = json.loads(data_link)
- content = json_data.get('content', '')
- except json.decoder.JSONDecodeError:
- content = data_link
- # 使用正则表达式提取链接
- link = re.search(r'https?://\S+', content)
- if link:
- return link.group()
- else:
- return None
- @classmethod
- def url_manage(cls, data_link, channel):
- link = cls.extract_link(data_link)
- if link:
- content_id = cls.get_content_id(link, channel)
- if content_id:
- return content_id
- else:
- return None
- else:
- return None
|