lierqiang 2 år sedan
förälder
incheckning
60d7e5dfe5

+ 56 - 60
kuaishou/kuaishou_follow/kuaishou_follow.py

@@ -13,10 +13,6 @@ import json
 
 
 import urllib3
 import urllib3
 from requests.adapters import HTTPAdapter
 from requests.adapters import HTTPAdapter
-from selenium import webdriver
-from selenium.webdriver import DesiredCapabilities
-from selenium.webdriver.chrome.service import Service
-
 sys.path.append(os.getcwd())
 sys.path.append(os.getcwd())
 from common.common import Common
 from common.common import Common
 from common.feishu import Feishu
 from common.feishu import Feishu
@@ -303,62 +299,62 @@ class Follow:
         else:
         else:
             return video_title
             return video_title
 
 
-    @classmethod
-    def get_cookie(cls, log_type, crawler, out_uid, machine):
-        try:
-            # 打印请求配置
-            ca = DesiredCapabilities.CHROME
-            ca["goog:loggingPrefs"] = {"performance": "ALL"}
-
-            # 不打开浏览器运行
-            chrome_options = webdriver.ChromeOptions()
-            chrome_options.add_argument("headless")
-            chrome_options.add_argument(
-                f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
-            chrome_options.add_argument("--no-sandbox")
-
-            # driver初始化
-            if machine == "aliyun":
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
-            elif machine == "macpro":
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
-                                          service=Service('/Users/lieyunye/Downloads/chromedriver_v107/chromedriver'))
-            elif machine == "macair":
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
-                                          service=Service('/Users/piaoquan/Downloads/chromedriver_v108/chromedriver'))
-            else:
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
-                    '/Users/wangkun/Downloads/chromedriver/chromedriver_v109/chromedriver'))
-
-            driver.implicitly_wait(10)
-            # print('打开个人主页')
-            driver.get(f'https://www.kuaishou.com/profile/{out_uid}')
-            time.sleep(1)
-
-            # print('解析cookies')
-            logs = driver.get_log("performance")
-            # Common.logger(log_type, crawler).info('已获取logs:{}\n', logs)
-            # print('退出浏览器')
-            driver.quit()
-            for line in logs:
-                msg = json.loads(line['message'])
-                # Common.logger(log_type, crawler).info(f"{msg}\n\n")
-                if 'message' not in msg:
-                    pass
-                elif 'params' not in msg['message']:
-                    pass
-                elif 'headers' not in msg['message']['params']:
-                    pass
-                elif 'Cookie' not in msg['message']['params']['headers']:
-                    pass
-                elif msg['message']['params']['headers']['Host'] != 'www.kuaishou.com':
-                    pass
-                else:
-                    cookie = msg['message']['params']['headers']['Cookie']
-                    # Common.logger(log_type, crawler).info(f"{cookie}")
-                    return cookie
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_cookie:{e}\n")
+    # @classmethod
+    # def get_cookie(cls, log_type, crawler, out_uid, machine):
+    #     try:
+    #         # 打印请求配置
+    #         ca = DesiredCapabilities.CHROME
+    #         ca["goog:loggingPrefs"] = {"performance": "ALL"}
+    #
+    #         # 不打开浏览器运行
+    #         chrome_options = webdriver.ChromeOptions()
+    #         chrome_options.add_argument("headless")
+    #         chrome_options.add_argument(
+    #             f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
+    #         chrome_options.add_argument("--no-sandbox")
+    #
+    #         # driver初始化
+    #         if machine == "aliyun":
+    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
+    #         elif machine == "macpro":
+    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
+    #                                       service=Service('/Users/lieyunye/Downloads/chromedriver_v107/chromedriver'))
+    #         elif machine == "macair":
+    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
+    #                                       service=Service('/Users/piaoquan/Downloads/chromedriver_v108/chromedriver'))
+    #         else:
+    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
+    #                 '/Users/wangkun/Downloads/chromedriver/chromedriver_v109/chromedriver'))
+    #
+    #         driver.implicitly_wait(10)
+    #         # print('打开个人主页')
+    #         driver.get(f'https://www.kuaishou.com/profile/{out_uid}')
+    #         time.sleep(1)
+    #
+    #         # print('解析cookies')
+    #         logs = driver.get_log("performance")
+    #         # Common.logger(log_type, crawler).info('已获取logs:{}\n', logs)
+    #         # print('退出浏览器')
+    #         driver.quit()
+    #         for line in logs:
+    #             msg = json.loads(line['message'])
+    #             # Common.logger(log_type, crawler).info(f"{msg}\n\n")
+    #             if 'message' not in msg:
+    #                 pass
+    #             elif 'params' not in msg['message']:
+    #                 pass
+    #             elif 'headers' not in msg['message']['params']:
+    #                 pass
+    #             elif 'Cookie' not in msg['message']['params']['headers']:
+    #                 pass
+    #             elif msg['message']['params']['headers']['Host'] != 'www.kuaishou.com':
+    #                 pass
+    #             else:
+    #                 cookie = msg['message']['params']['headers']['Cookie']
+    #                 # Common.logger(log_type, crawler).info(f"{cookie}")
+    #                 return cookie
+    #     except Exception as e:
+    #         Common.logger(log_type, crawler).error(f"get_cookie:{e}\n")
 
 
     @classmethod
     @classmethod
     def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine):
     def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine):

+ 36 - 36
xigua/xigua_follow/xigua_follow.py

@@ -15,10 +15,10 @@ import requests
 import urllib3
 import urllib3
 from requests.adapters import HTTPAdapter
 from requests.adapters import HTTPAdapter
 
 
-from selenium.webdriver import DesiredCapabilities
-from selenium.webdriver.chrome.service import Service
-from selenium.webdriver.common.by import By
-from selenium import webdriver
+# from selenium.webdriver import DesiredCapabilities
+# from selenium.webdriver.chrome.service import Service
+# from selenium.webdriver.common.by import By
+# from selenium import webdriver
 from lxml import etree
 from lxml import etree
 
 
 sys.path.append(os.getcwd())
 sys.path.append(os.getcwd())
@@ -225,38 +225,38 @@ class Follow:
             new_password = new_password_start + 'y' + new_password_end
             new_password = new_password_start + 'y' + new_password_end
         return new_password
         return new_password
 
 
-    @classmethod
-    def get_signature(cls, log_type, crawler, out_uid, machine):
-        try:
-            # 打印请求配置
-            ca = DesiredCapabilities.CHROME
-            ca["goog:loggingPrefs"] = {"performance": "ALL"}
-
-            # 不打开浏览器运行
-            chrome_options = webdriver.ChromeOptions()
-            chrome_options.add_argument("--headless")
-            chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
-            chrome_options.add_argument("--no-sandbox")
-
-            # driver初始化
-            if machine == 'aliyun' or machine == 'aliyun_hk':
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
-            elif machine == 'macpro':
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
-                                          service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
-            elif machine == 'macair':
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
-                                          service=Service('/Users/piaoquan/Downloads/chromedriver'))
-            else:
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))
-            driver.implicitly_wait(10)
-            driver.get(f'https://www.ixigua.com/home/{out_uid}/')
-            time.sleep(3)
-            data_src = driver.find_elements(By.XPATH, '//img[@class="tt-img BU-MagicImage tt-img-loaded"]')[1].get_attribute("data-src")
-            signature = data_src.split("x-signature=")[-1]
-            return signature
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'get_signature异常:{e}\n')
+    # @classmethod
+    # def get_signature(cls, log_type, crawler, out_uid, machine):
+    #     try:
+    #         # 打印请求配置
+    #         ca = DesiredCapabilities.CHROME
+    #         ca["goog:loggingPrefs"] = {"performance": "ALL"}
+    #
+    #         # 不打开浏览器运行
+    #         chrome_options = webdriver.ChromeOptions()
+    #         chrome_options.add_argument("--headless")
+    #         chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
+    #         chrome_options.add_argument("--no-sandbox")
+    #
+    #         # driver初始化
+    #         if machine == 'aliyun' or machine == 'aliyun_hk':
+    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
+    #         elif machine == 'macpro':
+    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
+    #                                       service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
+    #         elif machine == 'macair':
+    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
+    #                                       service=Service('/Users/piaoquan/Downloads/chromedriver'))
+    #         else:
+    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))
+    #         driver.implicitly_wait(10)
+    #         driver.get(f'https://www.ixigua.com/home/{out_uid}/')
+    #         time.sleep(3)
+    #         data_src = driver.find_elements(By.XPATH, '//img[@class="tt-img BU-MagicImage tt-img-loaded"]')[1].get_attribute("data-src")
+    #         signature = data_src.split("x-signature=")[-1]
+    #         return signature
+    #     except Exception as e:
+    #         Common.logger(log_type, crawler).error(f'get_signature异常:{e}\n')
 
 
     # 获取视频详情
     # 获取视频详情
     @classmethod
     @classmethod

+ 61 - 61
youtube/youtube_follow/youtube_follow_api.py

@@ -15,10 +15,10 @@ import json
 import random
 import random
 # import emoji
 # import emoji
 import requests
 import requests
-from selenium import webdriver
-from selenium.webdriver.chrome.service import Service
-from selenium.webdriver.common.by import By
-from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
+# from selenium import webdriver
+# from selenium.webdriver.chrome.service import Service
+# from selenium.webdriver.common.by import By
+# from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 
 
 sys.path.append(os.getcwd())
 sys.path.append(os.getcwd())
 from common.common import Common
 from common.common import Common
@@ -57,62 +57,62 @@ class YoutubeFollow:
         'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
         'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
     }
     }
 
 
-    @classmethod
-    def get_browse_id(cls, log_type, crawler, out_user_id, machine):
-        """
-        获取每个用户的 browse_id
-        :param log_type: 日志
-        :param crawler: 哪款爬虫
-        :param out_user_id: 站外用户 UID
-        :param machine: 部署机器,阿里云填写 aliyun / aliyun_hk,线下分别填写 macpro,macair,local
-        :return: browse_id
-        """
-        try:
-            # 打印请求配置
-            ca = DesiredCapabilities.CHROME
-            ca["goog:loggingPrefs"] = {"performance": "ALL"}
-
-            # 不打开浏览器运行
-            chrome_options = webdriver.ChromeOptions()
-            chrome_options.add_argument("--headless")
-            chrome_options.add_argument(
-                '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
-            chrome_options.add_argument("--no-sandbox")
-
-            # driver初始化
-            if machine == 'aliyun' or machine == 'aliyun_hk':
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
-            elif machine == 'macpro':
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
-                                          service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
-            elif machine == 'macair':
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
-                                          service=Service('/Users/piaoquan/Downloads/chromedriver'))
-            else:
-                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
-                    '/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))
-
-            driver.implicitly_wait(10)
-            url = f'https://www.youtube.com/{out_user_id}/videos'
-            driver.get(url)
-            # driver.save_screenshot("./1.png")
-            # 向上滑动 1000 个像素
-            # driver.execute_script('window.scrollBy(0, 2000)')
-            # driver.save_screenshot("./2.png")
-            time.sleep(3)
-            accept_btns = driver.find_elements(By.XPATH, '//span[text()="全部接受"]')
-            accept_btns_eng = driver.find_elements(By.XPATH, '//span[text()="Accept all"]')
-            if len(accept_btns) != 0:
-                accept_btns[0].click()
-                time.sleep(2)
-            elif len(accept_btns_eng) != 0:
-                accept_btns_eng[0].click()
-                time.sleep(2)
-            browse_id = driver.find_element(By.XPATH, '//meta[@itemprop="channelId"]').get_attribute('content')
-            driver.quit()
-            return browse_id
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f'get_browse_id异常:{e}\n')
+    # @classmethod
+    # def get_browse_id(cls, log_type, crawler, out_user_id, machine):
+    #     """
+    #     获取每个用户的 browse_id
+    #     :param log_type: 日志
+    #     :param crawler: 哪款爬虫
+    #     :param out_user_id: 站外用户 UID
+    #     :param machine: 部署机器,阿里云填写 aliyun / aliyun_hk,线下分别填写 macpro,macair,local
+    #     :return: browse_id
+    #     """
+    #     try:
+    #         # 打印请求配置
+    #         ca = DesiredCapabilities.CHROME
+    #         ca["goog:loggingPrefs"] = {"performance": "ALL"}
+    #
+    #         # 不打开浏览器运行
+    #         chrome_options = webdriver.ChromeOptions()
+    #         chrome_options.add_argument("--headless")
+    #         chrome_options.add_argument(
+    #             '--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
+    #         chrome_options.add_argument("--no-sandbox")
+    #
+    #         # driver初始化
+    #         if machine == 'aliyun' or machine == 'aliyun_hk':
+    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
+    #         elif machine == 'macpro':
+    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
+    #                                       service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
+    #         elif machine == 'macair':
+    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
+    #                                       service=Service('/Users/piaoquan/Downloads/chromedriver'))
+    #         else:
+    #             driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service(
+    #                 '/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))
+    #
+    #         driver.implicitly_wait(10)
+    #         url = f'https://www.youtube.com/{out_user_id}/videos'
+    #         driver.get(url)
+    #         # driver.save_screenshot("./1.png")
+    #         # 向上滑动 1000 个像素
+    #         # driver.execute_script('window.scrollBy(0, 2000)')
+    #         # driver.save_screenshot("./2.png")
+    #         time.sleep(3)
+    #         accept_btns = driver.find_elements(By.XPATH, '//span[text()="全部接受"]')
+    #         accept_btns_eng = driver.find_elements(By.XPATH, '//span[text()="Accept all"]')
+    #         if len(accept_btns) != 0:
+    #             accept_btns[0].click()
+    #             time.sleep(2)
+    #         elif len(accept_btns_eng) != 0:
+    #             accept_btns_eng[0].click()
+    #             time.sleep(2)
+    #         browse_id = driver.find_element(By.XPATH, '//meta[@itemprop="channelId"]').get_attribute('content')
+    #         driver.quit()
+    #         return browse_id
+    #     except Exception as e:
+    #         Common.logger(log_type, crawler).error(f'get_browse_id异常:{e}\n')
 
 
     @classmethod
     @classmethod
     def get_out_user_info(cls, log_type, crawler, browse_id, out_user_id):
     def get_out_user_info(cls, log_type, crawler, browse_id, out_user_id):
@@ -177,7 +177,7 @@ class YoutubeFollow:
         try:
         try:
             user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
             user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
             user_list = []
             user_list = []
-            for i in range(271, len(user_sheet)):
+            for i in range(1, len(user_sheet)):
                 out_uid = user_sheet[i][2]
                 out_uid = user_sheet[i][2]
                 user_name = user_sheet[i][3]
                 user_name = user_sheet[i][3]
                 browse_id = user_sheet[i][5]
                 browse_id = user_sheet[i][5]