|
@@ -5,7 +5,6 @@ import datetime
|
|
import difflib
|
|
import difflib
|
|
import json
|
|
import json
|
|
import os
|
|
import os
|
|
-import random
|
|
|
|
import shutil
|
|
import shutil
|
|
import sys
|
|
import sys
|
|
import time
|
|
import time
|
|
@@ -13,6 +12,7 @@ from hashlib import md5
|
|
|
|
|
|
import requests
|
|
import requests
|
|
import urllib3
|
|
import urllib3
|
|
|
|
+from requests.adapters import HTTPAdapter
|
|
from selenium.webdriver import DesiredCapabilities
|
|
from selenium.webdriver import DesiredCapabilities
|
|
from selenium.webdriver.chrome.service import Service
|
|
from selenium.webdriver.chrome.service import Service
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.common.by import By
|
|
@@ -116,24 +116,35 @@ class GongzhonghaoFollow2:
|
|
"ajax": "1",
|
|
"ajax": "1",
|
|
}
|
|
}
|
|
urllib3.disable_warnings()
|
|
urllib3.disable_warnings()
|
|
- r = requests.get(url=url, headers=headers, params=params, verify=False)
|
|
+ s = requests.session()
|
|
|
|
+
|
|
|
|
+ s.mount('http://', HTTPAdapter(max_retries=3))
|
|
|
|
+ s.mount('https://', HTTPAdapter(max_retries=3))
|
|
|
|
+ r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
|
|
|
|
+ r.close()
|
|
if r.json()["base_resp"]["err_msg"] == "invalid session":
|
|
if r.json()["base_resp"]["err_msg"] == "invalid session":
|
|
- Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
|
|
+ Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
|
|
Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
|
|
Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
|
|
|
|
+ Common.logger(log_type, crawler).warning(
|
|
|
|
+ f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
|
|
if 20 >= datetime.datetime.now().hour >= 10:
|
|
if 20 >= datetime.datetime.now().hour >= 10:
|
|
Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
|
|
Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
|
|
time.sleep(60 * 10)
|
|
time.sleep(60 * 10)
|
|
continue
|
|
continue
|
|
if r.json()["base_resp"]["err_msg"] == "freq control":
|
|
if r.json()["base_resp"]["err_msg"] == "freq control":
|
|
- Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
|
|
+ Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
|
|
Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
|
|
Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
|
|
|
|
+ Common.logger(log_type, crawler).warning(
|
|
|
|
+ f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
|
|
if 20 >= datetime.datetime.now().hour >= 10:
|
|
if 20 >= datetime.datetime.now().hour >= 10:
|
|
Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
|
|
Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
|
|
time.sleep(60 * 10)
|
|
time.sleep(60 * 10)
|
|
continue
|
|
continue
|
|
if "list" not in r.json() or len(r.json()["list"]) == 0:
|
|
if "list" not in r.json() or len(r.json()["list"]) == 0:
|
|
- Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
|
|
+ Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
|
|
Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
|
|
Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
|
|
|
|
+ Common.logger(log_type, crawler).warning(
|
|
|
|
+ f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
|
|
if 20 >= datetime.datetime.now().hour >= 10:
|
|
if 20 >= datetime.datetime.now().hour >= 10:
|
|
Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
|
|
Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
|
|
time.sleep(60 * 10)
|
|
time.sleep(60 * 10)
|
|
@@ -239,17 +250,26 @@ class GongzhonghaoFollow2:
|
|
"ajax": "1",
|
|
"ajax": "1",
|
|
}
|
|
}
|
|
urllib3.disable_warnings()
|
|
urllib3.disable_warnings()
|
|
- r = requests.get(url=url, headers=headers, params=params, verify=False)
|
|
+ s = requests.session()
|
|
|
|
+
|
|
|
|
+ s.mount('http://', HTTPAdapter(max_retries=3))
|
|
|
|
+ s.mount('https://', HTTPAdapter(max_retries=3))
|
|
|
|
+ r = s.get(url=url, headers=headers, params=params, verify=False, proxies=Common.tunnel_proxies(), timeout=5)
|
|
|
|
+ r.close()
|
|
if r.json()["base_resp"]["err_msg"] == "invalid session":
|
|
if r.json()["base_resp"]["err_msg"] == "invalid session":
|
|
- Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
|
|
+ Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
|
|
- Common.logger(log_type, crawler).info(f"get_videoList:{r.text}")
|
|
+ Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}")
|
|
|
|
+ Common.logger(log_type, crawler).warning(
|
|
|
|
+ f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 过期啦\n")
|
|
if 20 >= datetime.datetime.now().hour >= 10:
|
|
if 20 >= datetime.datetime.now().hour >= 10:
|
|
Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
|
|
Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
|
|
time.sleep(60 * 10)
|
|
time.sleep(60 * 10)
|
|
continue
|
|
continue
|
|
if r.json()["base_resp"]["err_msg"] == "freq control":
|
|
if r.json()["base_resp"]["err_msg"] == "freq control":
|
|
- Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
|
|
+ Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
|
|
Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
|
|
Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
|
|
|
|
+ Common.logger(log_type, crawler).warning(
|
|
|
|
+ f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
|
|
if 20 >= datetime.datetime.now().hour >= 10:
|
|
if 20 >= datetime.datetime.now().hour >= 10:
|
|
Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
|
|
Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
|
|
time.sleep(60 * 10)
|
|
time.sleep(60 * 10)
|
|
@@ -257,6 +277,8 @@ class GongzhonghaoFollow2:
|
|
if 'app_msg_list' not in r.json():
|
|
if 'app_msg_list' not in r.json():
|
|
Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
|
|
Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
|
|
Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
|
|
Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
|
|
|
|
+ Common.logger(log_type, crawler).warning(
|
|
|
|
+ f"公众号_2:{token_dict['gzh_name']}, 更换日期:{token_dict['gzh_time']} 频控啦\n")
|
|
if 20 >= datetime.datetime.now().hour >= 10:
|
|
if 20 >= datetime.datetime.now().hour >= 10:
|
|
Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
|
|
Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
|
|
time.sleep(60 * 10)
|
|
time.sleep(60 * 10)
|
|
@@ -330,8 +352,8 @@ class GongzhonghaoFollow2:
|
|
return
|
|
return
|
|
cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
|
|
cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
|
|
|
|
|
|
- Common.logger(log_type, crawler).info('随机休眠 60*3-60*8 秒\n')
|
|
+ Common.logger(log_type, crawler).info('休眠 60 秒\n')
|
|
- time.sleep(random.randint(60*5, 60*10))
|
|
+ time.sleep(60)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
|
|
Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
|
|
|
|
|
|
@@ -500,8 +522,8 @@ class GongzhonghaoFollow2:
|
|
Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
|
|
Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
|
|
cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
|
|
cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
|
|
cls.begin = 0
|
|
cls.begin = 0
|
|
- Common.logger(log_type, crawler).info('随机休眠 60*5, 60*10 秒\n')
|
|
+ Common.logger(log_type, crawler).info('休眠 60 秒\n')
|
|
- time.sleep(random.randint(60*5, 60*10))
|
|
+ time.sleep(60)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
|
|
Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
|
|
|
|
|