|
@@ -124,10 +124,13 @@ class Dy_KS:
|
|
|
host = urlparse(url).netloc
|
|
|
logger.info(f"[+] host=={host}")
|
|
|
else:
|
|
|
- msg = html.unescape(url).split('?')[0]
|
|
|
- pattern = re.search(r'https?://[^\s<>"\'\u4e00-\u9fff]+', msg)
|
|
|
+ # msg = html.unescape(url).split('?')[0]
|
|
|
+ # pattern = re.search(r'https?://[^\s<>"\'\u4e00-\u9fff]+', msg)
|
|
|
+ msg = html.unescape(url)
|
|
|
+ pattern = re.search(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+(/[-\w._~:/#[\]@!$&()*+,;=]*)', msg)
|
|
|
logger.info(f"[+] pattern == {pattern}")
|
|
|
- if not pattern:
|
|
|
+ if pattern is None:
|
|
|
+ logger.error(f"[+] {url} 提取 url失败")
|
|
|
return "重新处理",None,None,None
|
|
|
url = pattern.group()
|
|
|
host = urlparse(url).netloc
|