|
@@ -301,6 +301,63 @@ class Follow:
|
|
|
else:
|
|
|
return video_title
|
|
|
|
|
|
+ @classmethod
|
|
|
+ def get_cookie(cls, log_type, crawler, out_uid, machine):
|
|
|
+ try:
|
|
|
+ # 打印请求配置
|
|
|
+ ca = DesiredCapabilities.CHROME
|
|
|
+ ca["goog:loggingPrefs"] = {"performance": "ALL"}
|
|
|
+
|
|
|
+ # 不打开浏览器运行
|
|
|
+ chrome_options = webdriver.ChromeOptions()
|
|
|
+ chrome_options.add_argument("headless")
|
|
|
+ chrome_options.add_argument(
|
|
|
+ f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
|
|
|
+ chrome_options.add_argument("--no-sandbox")
|
|
|
+
|
|
|
+ # driver初始化
|
|
|
+ if machine == "aliyun":
|
|
|
+ driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
|
|
|
+ elif machine == "macpro":
|
|
|
+ driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
|
|
|
+ service=Service('/Users/lieyunye/Downloads/chromedriver_v107/chromedriver'))
|
|
|
+ elif machine == "macair":
|
|
|
+ driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
|
|
|
+ service=Service('/Users/piaoquan/Downloads/chromedriver_v108/chromedriver'))
|
|
|
+ else:
|
|
|
+ driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
|
|
|
+ '/Users/wangkun/Downloads/chromedriver/chromedriver_v109/chromedriver'))
|
|
|
+
|
|
|
+ driver.implicitly_wait(10)
|
|
|
+ # print('打开个人主页')
|
|
|
+ driver.get(f'https://www.kuaishou.com/profile/{out_uid}')
|
|
|
+ time.sleep(1)
|
|
|
+
|
|
|
+ # print('解析cookies')
|
|
|
+ logs = driver.get_log("performance")
|
|
|
+ # Common.logger(log_type, crawler).info('已获取logs:{}\n', logs)
|
|
|
+ # print('退出浏览器')
|
|
|
+ driver.quit()
|
|
|
+ for line in logs:
|
|
|
+ msg = json.loads(line['message'])
|
|
|
+ # Common.logger(log_type, crawler).info(f"{msg}\n\n")
|
|
|
+ if 'message' not in msg:
|
|
|
+ pass
|
|
|
+ elif 'params' not in msg['message']:
|
|
|
+ pass
|
|
|
+ elif 'headers' not in msg['message']['params']:
|
|
|
+ pass
|
|
|
+ elif 'Cookie' not in msg['message']['params']['headers']:
|
|
|
+ pass
|
|
|
+ elif msg['message']['params']['headers']['Host'] != 'www.kuaishou.com':
|
|
|
+ pass
|
|
|
+ else:
|
|
|
+ cookie = msg['message']['params']['headers']['Cookie']
|
|
|
+ # Common.logger(log_type, crawler).info(f"{cookie}")
|
|
|
+ return cookie
|
|
|
+ except Exception as e:
|
|
|
+ Common.logger(log_type, crawler).error(f"get_cookie:{e}\n")
|
|
|
+
|
|
|
@classmethod
|
|
|
def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine):
|
|
|
try:
|
|
@@ -710,64 +767,9 @@ class Follow:
|
|
|
oss_endpoint=oss_endpoint,
|
|
|
env=env,
|
|
|
machine=machine)
|
|
|
- time.sleep(3)
|
|
|
-
|
|
|
- @classmethod
|
|
|
- def get_cookie(cls, log_type, crawler, out_uid, machine):
|
|
|
- try:
|
|
|
- # 打印请求配置
|
|
|
- ca = DesiredCapabilities.CHROME
|
|
|
- ca["goog:loggingPrefs"] = {"performance": "ALL"}
|
|
|
-
|
|
|
- # 不打开浏览器运行
|
|
|
- chrome_options = webdriver.ChromeOptions()
|
|
|
- chrome_options.add_argument("headless")
|
|
|
- chrome_options.add_argument(
|
|
|
- f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
|
|
|
- chrome_options.add_argument("--no-sandbox")
|
|
|
-
|
|
|
- # driver初始化
|
|
|
- if machine == "aliyun":
|
|
|
- driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
|
|
|
- elif machine == "macpro":
|
|
|
- driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
|
|
|
- service=Service('/Users/lieyunye/Downloads/chromedriver_v107/chromedriver'))
|
|
|
- elif machine == "macair":
|
|
|
- driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
|
|
|
- service=Service('/Users/piaoquan/Downloads/chromedriver_v108/chromedriver'))
|
|
|
- else:
|
|
|
- driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
|
|
|
- '/Users/wangkun/Downloads/chromedriver/chromedriver_v109/chromedriver'))
|
|
|
-
|
|
|
- driver.implicitly_wait(10)
|
|
|
- # print('打开个人主页')
|
|
|
- driver.get(f'https://www.kuaishou.com/profile/{out_uid}')
|
|
|
- time.sleep(1)
|
|
|
-
|
|
|
- # print('解析cookies')
|
|
|
- logs = driver.get_log("performance")
|
|
|
- # Common.logger(log_type, crawler).info('已获取logs:{}\n', logs)
|
|
|
- # print('退出浏览器')
|
|
|
- driver.quit()
|
|
|
- for line in logs:
|
|
|
- msg = json.loads(line['message'])
|
|
|
- # Common.logger(log_type, crawler).info(f"{msg}\n\n")
|
|
|
- if 'message' not in msg:
|
|
|
- pass
|
|
|
- elif 'params' not in msg['message']:
|
|
|
- pass
|
|
|
- elif 'headers' not in msg['message']['params']:
|
|
|
- pass
|
|
|
- elif 'Cookie' not in msg['message']['params']['headers']:
|
|
|
- pass
|
|
|
- elif msg['message']['params']['headers']['Host'] != 'www.kuaishou.com':
|
|
|
- pass
|
|
|
- else:
|
|
|
- cookie = msg['message']['params']['headers']['Cookie']
|
|
|
- # Common.logger(log_type, crawler).info(f"{cookie}")
|
|
|
- return cookie
|
|
|
- except Exception as e:
|
|
|
- Common.logger(log_type, crawler).error(f"get_cookie:{e}\n")
|
|
|
+ sleep_time = 120
|
|
|
+ Common.logger(log_type, crawler).info(f"休眠{sleep_time}秒")
|
|
|
+ time.sleep(sleep_time)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|