فهرست منبع

xiaoniangao_plus_提交_1010

luojunhui 1 سال پیش
والد
کامیت
c2b162ee06

+ 5 - 0
main/start_appium.sh

@@ -28,6 +28,11 @@ elif [ ${crawler} = "shipinhao" ] || [ ${log_type} = "search" ];then
   profile_path=/etc/profile
   profile_path=/etc/profile
   node_path=/usr/local/bin/node
   node_path=/usr/local/bin/node
   log_path=${piaoquan_crawler_dir}main/main_logs/start-appium-$(date +%Y-%m-%d).log
   log_path=${piaoquan_crawler_dir}main/main_logs/start-appium-$(date +%Y-%m-%d).log
+elif [ ${crawler} = "xngplus" ] ;then
+  piaoquan_crawler_dir=/Users/a123456/Desktop/spider/piaoquan_crawler/
+  profile_path=/.base_profile
+  node_path=/usr/local/bin/node
+  log_path=${piaoquan_crawler_dir}main/main_logs/process-mq-$(date +%Y-%m-%d).log
 else
 else
   piaoquan_crawler_dir=/Users/wangkun/Desktop/crawler/piaoquan_crawler/
   piaoquan_crawler_dir=/Users/wangkun/Desktop/crawler/piaoquan_crawler/
   profile_path=/etc/profile
   profile_path=/etc/profile

+ 34 - 3
xiaoniangaoplus/xiaoniangaoplus/xiaoniangao_plus_scheduling.py

@@ -5,11 +5,13 @@ import json
 import os
 import os
 import sys
 import sys
 import time
 import time
+import random
 from hashlib import md5
 from hashlib import md5
 
 
 from appium import webdriver
 from appium import webdriver
 from appium.webdriver.extensions.android.nativekey import AndroidKey
 from appium.webdriver.extensions.android.nativekey import AndroidKey
 from appium.webdriver.webdriver import WebDriver
 from appium.webdriver.webdriver import WebDriver
+# from appium.webdriver.common.touch_action import TouchAction
 from bs4 import BeautifulSoup
 from bs4 import BeautifulSoup
 from selenium.common.exceptions import NoSuchElementException
 from selenium.common.exceptions import NoSuchElementException
 from selenium.webdriver.common.by import By
 from selenium.webdriver.common.by import By
@@ -26,6 +28,7 @@ class XiaoNianGaoPlusRecommend:
     download_cnt = 0
     download_cnt = 0
     element_list = []
     element_list = []
     i = 0
     i = 0
+    swipe_count = 0
 
 
     @classmethod
     @classmethod
     def start_wechat(cls, log_type, crawler, env, rule_dict, our_uid):
     def start_wechat(cls, log_type, crawler, env, rule_dict, our_uid):
@@ -60,6 +63,7 @@ class XiaoNianGaoPlusRecommend:
         }
         }
         driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
         driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
         driver.implicitly_wait(30)
         driver.implicitly_wait(30)
+        # action = TouchAction(driver)
 
 
         for i in range(120):
         for i in range(120):
             try:
             try:
@@ -100,7 +104,10 @@ class XiaoNianGaoPlusRecommend:
             try:
             try:
                 elements = driver.find_elements(By.XPATH, xpath)
                 elements = driver.find_elements(By.XPATH, xpath)
                 if elements:
                 if elements:
+                    # print(" driver find elements")
                     return elements
                     return elements
+                # else:
+                #     print(" driver not find elements")
             except NoSuchElementException:
             except NoSuchElementException:
                 pass
                 pass
 
 
@@ -108,7 +115,7 @@ class XiaoNianGaoPlusRecommend:
     def check_to_applet(cls, log_type, crawler, env, driver: WebDriver, xpath):
     def check_to_applet(cls, log_type, crawler, env, driver: WebDriver, xpath):
         time.sleep(1)
         time.sleep(1)
         webViews = driver.contexts
         webViews = driver.contexts
-        driver.switch_to.context(webViews[-1])
+        driver.switch_to.context(webViews[1])
         windowHandles = driver.window_handles
         windowHandles = driver.window_handles
         for handle in windowHandles:
         for handle in windowHandles:
             driver.switch_to.window(handle)
             driver.switch_to.window(handle)
@@ -133,6 +140,7 @@ class XiaoNianGaoPlusRecommend:
         size = driver.get_window_size()
         size = driver.get_window_size()
         driver.swipe(int(size["width"] * 0.5), int(size["height"] * 0.8),
         driver.swipe(int(size["width"] * 0.5), int(size["height"] * 0.8),
                      int(size["width"] * 0.5), int(size["height"] * 0.4), 200)
                      int(size["width"] * 0.5), int(size["height"] * 0.4), 200)
+        cls.swipe_count += 1
 
 
     @classmethod
     @classmethod
     def get_video_url(cls, log_type, crawler, driver: WebDriver, video_title_element, env):
     def get_video_url(cls, log_type, crawler, driver: WebDriver, video_title_element, env):
@@ -188,7 +196,11 @@ class XiaoNianGaoPlusRecommend:
             if len(video_list_elements) == 0:
             if len(video_list_elements) == 0:
                 for i in range(10):
                 for i in range(10):
                     Common.logger(log_type, crawler).info(f"向上滑动第{i + 1}次")
                     Common.logger(log_type, crawler).info(f"向上滑动第{i + 1}次")
+                    # scroll_down(action, driver)
+                    # print(f"向上滑动第{i + 1}次 s")
                     cls.swipe_up(driver)
                     cls.swipe_up(driver)
+                    # print(f"向上滑动第{i + 1}次 e")
+
                     time.sleep(0.5)
                     time.sleep(0.5)
                 continue
                 continue
 
 
@@ -315,7 +327,11 @@ class XiaoNianGaoPlusRecommend:
                         cls.download_cnt += 1
                         cls.download_cnt += 1
                         driver.press_keycode(AndroidKey.BACK)
                         driver.press_keycode(AndroidKey.BACK)
                         time.sleep(5)
                         time.sleep(5)
+                        # scroll_down(action, driver)
                         cls.swipe_up(driver)
                         cls.swipe_up(driver)
+                        if cls.swipe_count > 200:
+                            print("一共滑动超过200次")
+                            return
                 except Exception as e:
                 except Exception as e:
                     Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
                     Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
                     Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
                     Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
@@ -325,10 +341,25 @@ class XiaoNianGaoPlusRecommend:
             page += 1
             page += 1
 
 
 
 
+def scroll_down(action, driver: WebDriver):
+    """
+    刷视频函数,使用该函数可以往下滑动进入下一个视频
+    """
+    time.sleep(1)
+    width = driver.get_window_size()['width']  # 获取屏幕宽
+    height = driver.get_window_size()['height']  # 获取屏幕高
+    # print(width, height)
+    action.press(x=int(0.5 * width), y=int(0.25 * height))
+    action.wait(ms=random.randint(200, 400))
+    action.move_to(x=int(0.5 * width), y=int(0.75 * height))
+    action.release()
+    action.perform()
+
+
 if __name__ == "__main__":
 if __name__ == "__main__":
     rule_dict1 = {"period": {"min": 365, "max": 365},
     rule_dict1 = {"period": {"min": 365, "max": 365},
                   "duration": {"min": 30, "max": 1800},
                   "duration": {"min": 30, "max": 1800},
-                  "favorite_cnt": {"min": 0, "max": 0},
+                  "favorite_cnt": {"min": 999999, "max": 0},
                   "videos_cnt": {"min": 100, "max": 0},
                   "videos_cnt": {"min": 100, "max": 0},
-                  "share_cnt": {"min": 0, "max": 0}}
+                  "share_cnt": {"min": 999999, "max": 0}}
     XiaoNianGaoPlusRecommend.start_wechat("recommend", "xiaoniangao", "dev", rule_dict1, 6267141)
     XiaoNianGaoPlusRecommend.start_wechat("recommend", "xiaoniangao", "dev", rule_dict1, 6267141)

+ 1 - 1
xiaoniangaoplus/xiaoniangaoplus_main/run_xngplus_dev.py

@@ -15,7 +15,7 @@ def xiaoniangao_plus_recommend_main(log_type, crawler, env):
     XiaoNianGaoPlusRecommend.start_wechat(log_type=log_type,
     XiaoNianGaoPlusRecommend.start_wechat(log_type=log_type,
                                           crawler=crawler,
                                           crawler=crawler,
                                           rule_dict={"duration": {"min": 40, "max": 0},
                                           rule_dict={"duration": {"min": 40, "max": 0},
-                                                     "play_cnt": {"min": 20000, "max": 0},
+                                                     "play_cnt": {"min": 2000000, "max": 0},
                                                      "period": {"min": 60, "max": 60}},
                                                      "period": {"min": 60, "max": 60}},
                                           our_uid=6267140,
                                           our_uid=6267140,
                                           env=env)
                                           env=env)

+ 34 - 6
xiaoniangaoplus/xiaoniangaoplus_main/run_xngplus_recommend.py

@@ -6,12 +6,24 @@ import random
 from mq_http_sdk.mq_client import *
 from mq_http_sdk.mq_client import *
 from mq_http_sdk.mq_consumer import *
 from mq_http_sdk.mq_consumer import *
 from mq_http_sdk.mq_exception import MQExceptionBase
 from mq_http_sdk.mq_exception import MQExceptionBase
+import multiprocessing
 
 
 sys.path.append(os.getcwd())
 sys.path.append(os.getcwd())
 from common.public import get_consumer, ack_message, task_fun_mq
 from common.public import get_consumer, ack_message, task_fun_mq
 from common.common import Common
 from common.common import Common
 from common.scheduling_db import MysqlHelper
 from common.scheduling_db import MysqlHelper
-from xiaoniangaoplus.xiaoniangaoplus.xiaoniangao_plus_scheduling import XiaoNianGaoPlusRecommend
+# from xiaoniangaoplus.xiaoniangaoplus.xiaoniangao_plus_scheduling import XiaoNianGaoPlusRecommend
+from xiaoniangaoplus.xiaoniangaoplus.xiaoniangao_plus_scheduling2 import XiaoNianGaoPlusRecommend
+
+
+def run(args1, args2, args3, args4, args5):
+    XiaoNianGaoPlusRecommend(
+        log_type=args1,
+        crawler=args2,
+        env=args3,
+        rule_dict=args4,
+        our_uid=args5
+    )
 
 
 
 
 def main(log_type, crawler, topic_name, group_id, env):
 def main(log_type, crawler, topic_name, group_id, env):
@@ -75,11 +87,27 @@ def main(log_type, crawler, topic_name, group_id, env):
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
                 Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
                 Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
                 Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
-                XiaoNianGaoPlusRecommend.start_wechat(log_type=log_type,
-                                                      crawler=crawler,
-                                                      rule_dict=rule_dict,
-                                                      our_uid=our_uid,
-                                                      env=env)
+                process = multiprocessing.Process(
+                    target=run,
+                    args=(log_type, crawler, env, rule_dict, our_uid)
+                )
+                process.start()
+                print("进程开始")
+
+                while True:
+                    if not process.is_alive():
+                        print("正在重启")
+                        process.terminate()
+                        os.system("adb forward --remove-all")
+                        time.sleep(60)
+                        process = multiprocessing.Process(target=run, args=(log_type, crawler, env, rule_dict, our_uid))
+                        process.start()
+                    time.sleep(60)
+                # XiaoNianGaoPlusRecommend.start_wechat(log_type=log_type,
+                #                                       crawler=crawler,
+                #                                       rule_dict=rule_dict,
+                #                                       our_uid=our_uid,
+                #                                       env=env)
                 # Common.del_logs(log_type, crawler)
                 # Common.del_logs(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
                 Common.logging(log_type, crawler, env, '抓取一轮结束\n')
                 Common.logging(log_type, crawler, env, '抓取一轮结束\n')