wangkun 2 年之前
父節點
當前提交
06978e6786
共有 3 個文件被更改,包括 90 次插入91 次删除
  1. 2 3
      main/run_shipinhao.py
  2. 1 1
      xinshi/xinshi_app.py
  3. 87 87
      xinshi/xinshi_pc.py

+ 2 - 3
main/run_shipinhao.py

@@ -16,9 +16,8 @@ class Main:
     def main(cls, env):
     def main(cls, env):
         while True:
         while True:
             if 16 >= datetime.datetime.now().hour >= 10:
             if 16 >= datetime.datetime.now().hour >= 10:
-                Common.logger('xinshi').info('开始抓取"新视-PC"内容\n')
-                XinshiPC.login('xinshi', env)
-
+                # Common.logger('xinshi').info('开始抓取"新视-PC"内容\n')
+                # XinshiPC.login('xinshi', env)
                 Recommend.run_recommend('recommend', env)
                 Recommend.run_recommend('recommend', env)
                 Common.del_logs('recommend')
                 Common.del_logs('recommend')
                 Common.logger('recommend').info('休眠{}小时\n', 24 - datetime.datetime.now().hour)
                 Common.logger('recommend').info('休眠{}小时\n', 24 - datetime.datetime.now().hour)

+ 1 - 1
xinshi/xinshi_app.py

@@ -46,7 +46,7 @@ class XinshiAPP:
                 'enableWebviewDetailsCollection': True,
                 'enableWebviewDetailsCollection': True,
                 'setWebContentsDebuggingEnabled': True,
                 'setWebContentsDebuggingEnabled': True,
                 # 'chromedriverExecutable': '/Users/wangkun/Downloads/chromedriver_v86/chromedriver',
                 # 'chromedriverExecutable': '/Users/wangkun/Downloads/chromedriver_v86/chromedriver',
-                'chromedriverExecutable': '/Users/piaoquan/Downloads/chromedriver',
+                'chromedriverExecutable': '/Users/lieyunye/Downloads/chromedriver_v86/chromedriver',
             }
             }
             driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
             driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
             driver.implicitly_wait(10)
             driver.implicitly_wait(10)

+ 87 - 87
xinshi/xinshi_pc.py

@@ -19,98 +19,98 @@ from xinshi.xinshi_app import XinshiAPP
 class XinshiPC:
 class XinshiPC:
     @classmethod
     @classmethod
     def login(cls, log_type, env):
     def login(cls, log_type, env):
+        # try:
+        # 打印请求配置
+        ca = DesiredCapabilities.CHROME
+        ca["goog:loggingPrefs"] = {"performance": "ALL"}
+
+        # 不打开浏览器运行
+        chrome_options = webdriver.ChromeOptions()
+        chrome_options.add_argument("headless")
+        chrome_options.add_argument(
+            f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
+        chrome_options.add_argument("--no-sandbox")
+
+        # driver初始化
+        # Common.logger(log_type).info('初始化 webdriver')
+        # driver = webdriver.Chrome(desired_capabilities=ca)
+        # driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/wangkun/Downloads/chromedriver_v106/chromedriver'))
+        driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/lieyunye/Downloads/chromedriver_v107/chromedriver'))
+
+        driver.implicitly_wait(10)
+        Common.logger(log_type).info('打开网页"新视-热门内容"')
+        driver.get('https://xs.newrank.cn/Material/faddish/recentHot')
+        driver.maximize_window()
+        driver.implicitly_wait(10)
+
+        time.sleep(1)
+        Common.logger(log_type).info('点击"登录/按钮"')
+        driver.find_element(By.XPATH, '//button[@class="ant-btn ant-btn-primary"]').click()
+
+        time.sleep(1)
+        Common.logger(log_type).info('点击"其他登录方式"')
+        driver.find_element(By.XPATH, '//span[@class ="_2XRFN1F6"]').click()
+
+        time.sleep(1)
+        Common.logger(log_type).info('输入手机号')
+        driver.find_element(By.XPATH, '//input[@class="_2DyE0cvF"]').send_keys('13426262515')
+        Common.logger(log_type).info('输入密码')
+        driver.find_element(By.XPATH, '//input[@placeholder="输入密码"]').send_keys('test111111')
+
+        time.sleep(1)
+        Common.logger(log_type).info('勾选"保持登录状态"')
+        driver.find_element(By.XPATH, '//input[@class="nrd-login-checkbox-input"]').click()
+        time.sleep(1)
+        Common.logger(log_type).info('点击"登录"')
+        driver.find_element(By.XPATH, '//button[@class="_3RtjFeM- _CH1sF8Xz _38DPDVRd"]').click()
+
+        # 滑块
         try:
         try:
-            # 打印请求配置
-            ca = DesiredCapabilities.CHROME
-            ca["goog:loggingPrefs"] = {"performance": "ALL"}
-
-            # 不打开浏览器运行
-            chrome_options = webdriver.ChromeOptions()
-            chrome_options.add_argument("headless")
-            chrome_options.add_argument(
-                f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
-            chrome_options.add_argument("--no-sandbox")
-
-            # driver初始化
-            # Common.logger(log_type).info('初始化 webdriver')
-            # driver = webdriver.Chrome(desired_capabilities=ca)
-            # driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/wangkun/Downloads/chromedriver_v106/chromedriver'))
-            driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/piaoquan/Downloads/chromedriver_v107/chromedriver'))
-
-            driver.implicitly_wait(10)
-            Common.logger(log_type).info('打开网页"新视-热门内容"')
-            driver.get('https://xs.newrank.cn/Material/faddish/recentHot')
-            driver.maximize_window()
-            driver.implicitly_wait(10)
-
+            slider = driver.find_element(By.XPATH, '//span[@class="nc_iconfont btn_slide"]')
+            slider_full = driver.find_element(By.XPATH, '//div[@class="scale_text slidetounlock"]')
+            Common.logger(log_type).info('拖动滑块')
             time.sleep(1)
             time.sleep(1)
-            Common.logger(log_type).info('点击"登录/按钮"')
-            driver.find_element(By.XPATH, '//button[@class="ant-btn ant-btn-primary"]').click()
+            """
+            解决特征识别的代码
+            script = 'Object.defineProperty(navigator, "webdriver", {get: () => false,});'
+            driver.execute_script(script)
+            如果不采取去除特征识别,即以下两行代码。则页面的滑块验证码在滑动后,会显示如下图的出错,从而阻止登录进行。
+            因为服务器识别到的selenium的特征。使用该两行代码更改了特征,即可以顺利通过识别。
+            一般是反爬虫机制,用selenium打开的浏览器,就算手动去滑动都不行。
+            """
+            script = 'Object.defineProperty(navigator, "webdriver", {get: () => false,});'
+            driver.execute_script(script)
+            ActionChains(driver).drag_and_drop_by_offset(
+                slider, slider_full.size['width'], -slider.size['height']).perform()
+        except NoSuchElementException:
+            Common.logger(log_type).info('没有滑块')
+            pass
+
+        # 登录成功,获取到头像
+        time.sleep(3)
+        try:
+            driver.find_element(By.XPATH, '//img[@class="_J1BGEmMJ"]')
+            Common.logger(log_type).info('登录成功\n')
+        except NoSuchElementException:
+            Common.logger(log_type).info('登录失败,重新登录\n')
+            driver.quit()
+            cls.login(log_type, env)
 
 
-            time.sleep(1)
-            Common.logger(log_type).info('点击"其他登录方式"')
-            driver.find_element(By.XPATH, '//span[@class ="_2XRFN1F6"]').click()
+        # 获取热门内容
+        cls.get_recenhot(log_type, driver, env)
+        Common.logger(log_type).info('新视-热门内容抓取完毕\n')
 
 
-            time.sleep(1)
-            Common.logger(log_type).info('输入手机号')
-            driver.find_element(By.XPATH, '//input[@class="_2DyE0cvF"]').send_keys('13426262515')
-            Common.logger(log_type).info('输入密码')
-            driver.find_element(By.XPATH, '//input[@placeholder="输入密码"]').send_keys('test111111')
+        # 获取十万推荐内容
+        cls.get_hundredthousand(log_type, driver, env)
+        Common.logger(log_type).info('新视-十万推荐内容抓取完毕\n')
 
 
-            time.sleep(1)
-            Common.logger(log_type).info('勾选"保持登录状态"')
-            driver.find_element(By.XPATH, '//input[@class="nrd-login-checkbox-input"]').click()
-            time.sleep(1)
-            Common.logger(log_type).info('点击"登录"')
-            driver.find_element(By.XPATH, '//button[@class="_3RtjFeM- _CH1sF8Xz _38DPDVRd"]').click()
-
-            # 滑块
-            try:
-                slider = driver.find_element(By.XPATH, '//span[@class="nc_iconfont btn_slide"]')
-                slider_full = driver.find_element(By.XPATH, '//div[@class="scale_text slidetounlock"]')
-                Common.logger(log_type).info('拖动滑块')
-                time.sleep(1)
-                """
-                解决特征识别的代码
-                script = 'Object.defineProperty(navigator, "webdriver", {get: () => false,});'
-                driver.execute_script(script)
-                如果不采取去除特征识别,即以下两行代码。则页面的滑块验证码在滑动后,会显示如下图的出错,从而阻止登录进行。
-                因为服务器识别到的selenium的特征。使用该两行代码更改了特征,即可以顺利通过识别。
-                一般是反爬虫机制,用selenium打开的浏览器,就算手动去滑动都不行。
-                """
-                script = 'Object.defineProperty(navigator, "webdriver", {get: () => false,});'
-                driver.execute_script(script)
-                ActionChains(driver).drag_and_drop_by_offset(
-                    slider, slider_full.size['width'], -slider.size['height']).perform()
-            except NoSuchElementException:
-                Common.logger(log_type).info('没有滑块')
-                pass
-
-            # 登录成功,获取到头像
-            time.sleep(3)
-            try:
-                driver.find_element(By.XPATH, '//img[@class="_J1BGEmMJ"]')
-                Common.logger(log_type).info('登录成功\n')
-            except NoSuchElementException:
-                Common.logger(log_type).info('登录失败,重新登录\n')
-                driver.quit()
-                cls.login(log_type, env)
-
-            # 获取热门内容
-            cls.get_recenhot(log_type, driver, env)
-            Common.logger(log_type).info('新视-热门内容抓取完毕\n')
-
-            # 获取十万推荐内容
-            cls.get_hundredthousand(log_type, driver, env)
-            Common.logger(log_type).info('新视-十万推荐内容抓取完毕\n')
-
-            time.sleep(5)
-            Common.logger(log_type).info('退出浏览器\n')
-            driver.close()
-            driver.quit()
-        except Exception as e:
-            Common.logger(log_type).error('XinshiPC异常,重启浏览器:{}\n', e)
-            cls.login(log_type, env)
+        time.sleep(5)
+        Common.logger(log_type).info('退出浏览器\n')
+        driver.close()
+        driver.quit()
+        # except Exception as e:
+        #     Common.logger(log_type).error('XinshiPC异常,重启浏览器:{}\n', e)
+        #     cls.login(log_type, env)
 
 
     @classmethod
     @classmethod
     def get_recenhot(cls, log_type, driver, env):
     def get_recenhot(cls, log_type, driver, env):