|
@@ -15,10 +15,10 @@ import json
|
|
|
import random
|
|
|
# import emoji
|
|
|
import requests
|
|
|
-from selenium import webdriver
|
|
|
-from selenium.webdriver.chrome.service import Service
|
|
|
-from selenium.webdriver.common.by import By
|
|
|
-from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
|
+# from selenium import webdriver
|
|
|
+# from selenium.webdriver.chrome.service import Service
|
|
|
+# from selenium.webdriver.common.by import By
|
|
|
+# from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
|
|
|
|
sys.path.append(os.getcwd())
|
|
|
from common.common import Common
|
|
@@ -57,62 +57,62 @@ class YoutubeFollow:
|
|
|
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
|
|
|
}
|
|
|
|
|
|
- @classmethod
|
|
|
- def get_browse_id(cls, log_type, crawler, out_user_id, machine):
|
|
|
- """
|
|
|
- 获取每个用户的 browse_id
|
|
|
- :param log_type: 日志
|
|
|
- :param crawler: 哪款爬虫
|
|
|
- :param out_user_id: 站外用户 UID
|
|
|
- :param machine: 部署机器,阿里云填写 aliyun / aliyun_hk,线下分别填写 macpro,macair,local
|
|
|
- :return: browse_id
|
|
|
- """
|
|
|
- try:
|
|
|
- # 打印请求配置
|
|
|
- ca = DesiredCapabilities.CHROME
|
|
|
- ca["goog:loggingPrefs"] = {"performance": "ALL"}
|
|
|
-
|
|
|
- # 不打开浏览器运行
|
|
|
- chrome_options = webdriver.ChromeOptions()
|
|
|
- chrome_options.add_argument("--headless")
|
|
|
- chrome_options.add_argument(
|
|
|
- '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
|
|
|
- chrome_options.add_argument("--no-sandbox")
|
|
|
-
|
|
|
- # driver初始化
|
|
|
- if machine == 'aliyun' or machine == 'aliyun_hk':
|
|
|
- driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
|
|
|
- elif machine == 'macpro':
|
|
|
- driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
|
|
|
- service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
|
|
|
- elif machine == 'macair':
|
|
|
- driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
|
|
|
- service=Service('/Users/piaoquan/Downloads/chromedriver'))
|
|
|
- else:
|
|
|
- driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
|
|
|
- '/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))
|
|
|
-
|
|
|
- driver.implicitly_wait(10)
|
|
|
- url = f'https://www.youtube.com/{out_user_id}/videos'
|
|
|
- driver.get(url)
|
|
|
- # driver.save_screenshot("./1.png")
|
|
|
- # 向上滑动 1000 个像素
|
|
|
- # driver.execute_script('window.scrollBy(0, 2000)')
|
|
|
- # driver.save_screenshot("./2.png")
|
|
|
- time.sleep(3)
|
|
|
- accept_btns = driver.find_elements(By.XPATH, '//span[text()="全部接受"]')
|
|
|
- accept_btns_eng = driver.find_elements(By.XPATH, '//span[text()="Accept all"]')
|
|
|
- if len(accept_btns) != 0:
|
|
|
- accept_btns[0].click()
|
|
|
- time.sleep(2)
|
|
|
- elif len(accept_btns_eng) != 0:
|
|
|
- accept_btns_eng[0].click()
|
|
|
- time.sleep(2)
|
|
|
- browse_id = driver.find_element(By.XPATH, '//meta[@itemprop="channelId"]').get_attribute('content')
|
|
|
- driver.quit()
|
|
|
- return browse_id
|
|
|
- except Exception as e:
|
|
|
- Common.logger(log_type, crawler).error(f'get_browse_id异常:{e}\n')
|
|
|
+ # @classmethod
|
|
|
+ # def get_browse_id(cls, log_type, crawler, out_user_id, machine):
|
|
|
+ # """
|
|
|
+ # 获取每个用户的 browse_id
|
|
|
+ # :param log_type: 日志
|
|
|
+ # :param crawler: 哪款爬虫
|
|
|
+ # :param out_user_id: 站外用户 UID
|
|
|
+ # :param machine: 部署机器,阿里云填写 aliyun / aliyun_hk,线下分别填写 macpro,macair,local
|
|
|
+ # :return: browse_id
|
|
|
+ # """
|
|
|
+ # try:
|
|
|
+ # # 打印请求配置
|
|
|
+ # ca = DesiredCapabilities.CHROME
|
|
|
+ # ca["goog:loggingPrefs"] = {"performance": "ALL"}
|
|
|
+ #
|
|
|
+ # # 不打开浏览器运行
|
|
|
+ # chrome_options = webdriver.ChromeOptions()
|
|
|
+ # chrome_options.add_argument("--headless")
|
|
|
+ # chrome_options.add_argument(
|
|
|
+ # '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
|
|
|
+ # chrome_options.add_argument("--no-sandbox")
|
|
|
+ #
|
|
|
+ # # driver初始化
|
|
|
+ # if machine == 'aliyun' or machine == 'aliyun_hk':
|
|
|
+ # driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
|
|
|
+ # elif machine == 'macpro':
|
|
|
+ # driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
|
|
|
+ # service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
|
|
|
+ # elif machine == 'macair':
|
|
|
+ # driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
|
|
|
+ # service=Service('/Users/piaoquan/Downloads/chromedriver'))
|
|
|
+ # else:
|
|
|
+ # driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
|
|
|
+ # '/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))
|
|
|
+ #
|
|
|
+ # driver.implicitly_wait(10)
|
|
|
+ # url = f'https://www.youtube.com/{out_user_id}/videos'
|
|
|
+ # driver.get(url)
|
|
|
+ # # driver.save_screenshot("./1.png")
|
|
|
+ # # 向上滑动 1000 个像素
|
|
|
+ # # driver.execute_script('window.scrollBy(0, 2000)')
|
|
|
+ # # driver.save_screenshot("./2.png")
|
|
|
+ # time.sleep(3)
|
|
|
+ # accept_btns = driver.find_elements(By.XPATH, '//span[text()="全部接受"]')
|
|
|
+ # accept_btns_eng = driver.find_elements(By.XPATH, '//span[text()="Accept all"]')
|
|
|
+ # if len(accept_btns) != 0:
|
|
|
+ # accept_btns[0].click()
|
|
|
+ # time.sleep(2)
|
|
|
+ # elif len(accept_btns_eng) != 0:
|
|
|
+ # accept_btns_eng[0].click()
|
|
|
+ # time.sleep(2)
|
|
|
+ # browse_id = driver.find_element(By.XPATH, '//meta[@itemprop="channelId"]').get_attribute('content')
|
|
|
+ # driver.quit()
|
|
|
+ # return browse_id
|
|
|
+ # except Exception as e:
|
|
|
+ # Common.logger(log_type, crawler).error(f'get_browse_id异常:{e}\n')
|
|
|
|
|
|
@classmethod
|
|
|
def get_out_user_info(cls, log_type, crawler, browse_id, out_user_id):
|
|
@@ -177,7 +177,7 @@ class YoutubeFollow:
|
|
|
try:
|
|
|
user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
|
|
|
user_list = []
|
|
|
- for i in range(271, len(user_sheet)):
|
|
|
+ for i in range(1, len(user_sheet)):
|
|
|
out_uid = user_sheet[i][2]
|
|
|
user_name = user_sheet[i][3]
|
|
|
browse_id = user_sheet[i][5]
|