|  | @@ -5,16 +5,15 @@ import json
 | 
	
		
			
				|  |  |  import os
 | 
	
		
			
				|  |  |  import sys
 | 
	
		
			
				|  |  |  import time
 | 
	
		
			
				|  |  | +import uuid
 | 
	
		
			
				|  |  |  from hashlib import md5
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  from appium import webdriver
 | 
	
		
			
				|  |  |  from appium.webdriver.common.touch_action import TouchAction
 | 
	
		
			
				|  |  |  from appium.webdriver.extensions.android.nativekey import AndroidKey
 | 
	
		
			
				|  |  |  from appium.webdriver.webdriver import WebDriver
 | 
	
		
			
				|  |  | -from bs4 import BeautifulSoup
 | 
	
		
			
				|  |  |  from selenium.common.exceptions import NoSuchElementException
 | 
	
		
			
				|  |  |  from selenium.webdriver.common.by import By
 | 
	
		
			
				|  |  | -import multiprocessing
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  sys.path.append(os.getcwd())
 | 
	
		
			
				|  |  |  from common.mq import MQ
 | 
	
	
		
			
				|  | @@ -29,7 +28,7 @@ class ZMYXRecommend:
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def __init__(self, log_type, crawler, env, rule_dict, our_uid):
 | 
	
		
			
				|  |  |          self.mq = None
 | 
	
		
			
				|  |  | -        self.platform = "众妙音信"
 | 
	
		
			
				|  |  | +        self.platform = "zhongmiaoyinxin"
 | 
	
		
			
				|  |  |          self.download_cnt = 0
 | 
	
		
			
				|  |  |          self.element_list = []
 | 
	
		
			
				|  |  |          self.count = 0
 | 
	
	
		
			
				|  | @@ -69,32 +68,46 @@ class ZMYXRecommend:
 | 
	
		
			
				|  |  |              # "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
 | 
	
		
			
				|  |  |              'browserName': ''
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  | -        self.driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
 | 
	
		
			
				|  |  | +        try:
 | 
	
		
			
				|  |  | +            self.driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
 | 
	
		
			
				|  |  | +        except:
 | 
	
		
			
				|  |  | +            AliyunLogger.logging(
 | 
	
		
			
				|  |  | +                code="3002",
 | 
	
		
			
				|  |  | +                platform=self.platform,
 | 
	
		
			
				|  |  | +                mode=self.log_type,
 | 
	
		
			
				|  |  | +                env=self.env,
 | 
	
		
			
				|  |  | +                message="appium 启动异常"
 | 
	
		
			
				|  |  | +            )
 | 
	
		
			
				|  |  | +            return
 | 
	
		
			
				|  |  |          self.driver.implicitly_wait(30)
 | 
	
		
			
				|  |  | +        wechat_flag = self.check_wechat()
 | 
	
		
			
				|  |  | +        if wechat_flag:
 | 
	
		
			
				|  |  | +            size = self.driver.get_window_size()
 | 
	
		
			
				|  |  | +            self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2),
 | 
	
		
			
				|  |  | +                              int(size['width'] * 0.5), int(size['height'] * 0.8), 200)
 | 
	
		
			
				|  |  | +            time.sleep(1)
 | 
	
		
			
				|  |  | +            self.driver.find_elements(By.XPATH, '//*[@text="西瓜悦"]')[-1].click()
 | 
	
		
			
				|  |  | +            AliyunLogger.logging(
 | 
	
		
			
				|  |  | +                code="1000",
 | 
	
		
			
				|  |  | +                platform=self.platform,
 | 
	
		
			
				|  |  | +                env=self.env,
 | 
	
		
			
				|  |  | +                mode=self.log_type,
 | 
	
		
			
				|  |  | +                message="打开小程序西瓜悦成功"
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        for i in range(120):
 | 
	
		
			
				|  |  | -            try:
 | 
	
		
			
				|  |  | -                if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"):
 | 
	
		
			
				|  |  | -                    print("启动微信成功")
 | 
	
		
			
				|  |  | -                    break
 | 
	
		
			
				|  |  | -                elif self.driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"):
 | 
	
		
			
				|  |  | -                    print("发现并关闭系统下拉菜单")
 | 
	
		
			
				|  |  | -                    self.driver.find_element(By.ID, "com.android.system:id/dismiss_view").click()
 | 
	
		
			
				|  |  | -                else:
 | 
	
		
			
				|  |  | -                    pass
 | 
	
		
			
				|  |  | -            except NoSuchElementException:
 | 
	
		
			
				|  |  | -                time.sleep(1)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -        size = self.driver.get_window_size()
 | 
	
		
			
				|  |  | -        self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2),
 | 
	
		
			
				|  |  | -                          int(size['width'] * 0.5), int(size['height'] * 0.8), 200)
 | 
	
		
			
				|  |  | -        time.sleep(1)
 | 
	
		
			
				|  |  | -        self.driver.find_elements(By.XPATH, '//*[@text="西瓜悦"]')[-1].click()
 | 
	
		
			
				|  |  | -        print("打开小程序成功")
 | 
	
		
			
				|  |  | -        time.sleep(5)
 | 
	
		
			
				|  |  | -        self.get_videoList()
 | 
	
		
			
				|  |  | -        time.sleep(100)
 | 
	
		
			
				|  |  | -        self.driver.quit()
 | 
	
		
			
				|  |  | +            )
 | 
	
		
			
				|  |  | +            time.sleep(5)
 | 
	
		
			
				|  |  | +            self.get_videoList()
 | 
	
		
			
				|  |  | +            time.sleep(100)
 | 
	
		
			
				|  |  | +            self.driver.quit()
 | 
	
		
			
				|  |  | +        else:
 | 
	
		
			
				|  |  | +            AliyunLogger.logging(
 | 
	
		
			
				|  |  | +                code="3001",
 | 
	
		
			
				|  |  | +                platform=self.platform,
 | 
	
		
			
				|  |  | +                mode=self.log_type,
 | 
	
		
			
				|  |  | +                env=self.env,
 | 
	
		
			
				|  |  | +                message="打开微信异常"
 | 
	
		
			
				|  |  | +            )
 | 
	
		
			
				|  |  | +            return
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def search_elements(self, xpath):
 | 
	
		
			
				|  |  |          time.sleep(1)
 | 
	
	
		
			
				|  | @@ -109,6 +122,35 @@ class ZMYXRecommend:
 | 
	
		
			
				|  |  |              except NoSuchElementException:
 | 
	
		
			
				|  |  |                  pass
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    # 检查是否打开微信
 | 
	
		
			
				|  |  | +    def check_wechat(self):
 | 
	
		
			
				|  |  | +        for i in range(10):
 | 
	
		
			
				|  |  | +            try:
 | 
	
		
			
				|  |  | +                if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"):
 | 
	
		
			
				|  |  | +                    AliyunLogger.logging(
 | 
	
		
			
				|  |  | +                        code="1000",
 | 
	
		
			
				|  |  | +                        platform=self.platform,
 | 
	
		
			
				|  |  | +                        mode=self.log_type,
 | 
	
		
			
				|  |  | +                        env=self.env,
 | 
	
		
			
				|  |  | +                        message="启动微信成功"
 | 
	
		
			
				|  |  | +                    )
 | 
	
		
			
				|  |  | +                    return True
 | 
	
		
			
				|  |  | +                elif self.driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"):
 | 
	
		
			
				|  |  | +                    print("发现并关闭系统下拉菜单")
 | 
	
		
			
				|  |  | +                    AliyunLogger.logging(
 | 
	
		
			
				|  |  | +                        code="1000",
 | 
	
		
			
				|  |  | +                        platform=self.platform,
 | 
	
		
			
				|  |  | +                        mode=self.log_type,
 | 
	
		
			
				|  |  | +                        env=self.env,
 | 
	
		
			
				|  |  | +                        message="第{}次错误打开了通知栏".format(i + 1)
 | 
	
		
			
				|  |  | +                    )
 | 
	
		
			
				|  |  | +                    self.driver.find_element(By.ID, "com.android.system:id/dismiss_view").click()
 | 
	
		
			
				|  |  | +                else:
 | 
	
		
			
				|  |  | +                    pass
 | 
	
		
			
				|  |  | +            except NoSuchElementException:
 | 
	
		
			
				|  |  | +                time.sleep(10)
 | 
	
		
			
				|  |  | +        return False
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      def check_to_applet(self):
 | 
	
		
			
				|  |  |          while True:
 | 
	
		
			
				|  |  |              webview = self.driver.contexts
 | 
	
	
		
			
				|  | @@ -138,30 +180,76 @@ class ZMYXRecommend:
 | 
	
		
			
				|  |  |          window_size = self.driver.get_window_size()
 | 
	
		
			
				|  |  |          TouchAction(self.driver).tap(x=int(window_size['width'] * 0.5), y=int(window_size['height'] * 0.1)).perform()
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    def get_video_url(self, video_element):
 | 
	
		
			
				|  |  | +        video_element.click()
 | 
	
		
			
				|  |  | +        time.sleep(5)
 | 
	
		
			
				|  |  | +        windowHandles = self.driver.window_handles
 | 
	
		
			
				|  |  | +        for handle in windowHandles:
 | 
	
		
			
				|  |  | +            self.driver.switch_to.window(handle)
 | 
	
		
			
				|  |  | +            time.sleep(1)
 | 
	
		
			
				|  |  | +            try:
 | 
	
		
			
				|  |  | +                video_url_element = self.driver.find_element(By.XPATH, '//wx-video[@class="videoh"]')
 | 
	
		
			
				|  |  | +                video_url = video_url_element.get_attribute("src")
 | 
	
		
			
				|  |  | +                self.close_ad()
 | 
	
		
			
				|  |  | +                return video_url
 | 
	
		
			
				|  |  | +            except NoSuchElementException:
 | 
	
		
			
				|  |  | +                time.sleep(1)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |      def get_videoList(self):
 | 
	
		
			
				|  |  |          self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
 | 
	
		
			
				|  |  |          self.driver.implicitly_wait(20)
 | 
	
		
			
				|  |  | -        print("关闭广告")
 | 
	
		
			
				|  |  |          self.close_ad()
 | 
	
		
			
				|  |  | -        print("切换到 webview")
 | 
	
		
			
				|  |  | +        AliyunLogger.logging(
 | 
	
		
			
				|  |  | +            code="1000",
 | 
	
		
			
				|  |  | +            platform=self.platform,
 | 
	
		
			
				|  |  | +            mode=self.log_type,
 | 
	
		
			
				|  |  | +            env=self.env,
 | 
	
		
			
				|  |  | +            message="已经关闭广告"
 | 
	
		
			
				|  |  | +        )
 | 
	
		
			
				|  |  |          self.check_to_applet()
 | 
	
		
			
				|  |  | +        AliyunLogger.logging(
 | 
	
		
			
				|  |  | +            code="1000",
 | 
	
		
			
				|  |  | +            platform=self.platform,
 | 
	
		
			
				|  |  | +            mode=self.log_type,
 | 
	
		
			
				|  |  | +            env=self.env,
 | 
	
		
			
				|  |  | +            message="成功切换到 webview"
 | 
	
		
			
				|  |  | +        )
 | 
	
		
			
				|  |  |          index = 0
 | 
	
		
			
				|  |  |          while True:
 | 
	
		
			
				|  |  |              if self.search_elements('//*[@id="scrollContainer"]') is None:
 | 
	
		
			
				|  |  | -                print("窗口已销毁")
 | 
	
		
			
				|  |  | +                AliyunLogger.logging(
 | 
	
		
			
				|  |  | +                    code="3000",
 | 
	
		
			
				|  |  | +                    platform=self.platform,
 | 
	
		
			
				|  |  | +                    mode=self.log_type,
 | 
	
		
			
				|  |  | +                    env=self.env,
 | 
	
		
			
				|  |  | +                    message="窗口已销毁"
 | 
	
		
			
				|  |  | +                )
 | 
	
		
			
				|  |  |                  return
 | 
	
		
			
				|  |  | -            print("获取视频列表")
 | 
	
		
			
				|  |  |              video_elements = self.search_elements('//wx-view[@class="cover"]')
 | 
	
		
			
				|  |  |              if video_elements is None:
 | 
	
		
			
				|  |  | -                print("视频列表为空列表")
 | 
	
		
			
				|  |  | +                AliyunLogger.logging(
 | 
	
		
			
				|  |  | +                    code="2000",
 | 
	
		
			
				|  |  | +                    platform=self.platform,
 | 
	
		
			
				|  |  | +                    mode=self.log_type,
 | 
	
		
			
				|  |  | +                    env=self.env,
 | 
	
		
			
				|  |  | +                    message="视频列表为空列表"
 | 
	
		
			
				|  |  | +                )
 | 
	
		
			
				|  |  |                  return
 | 
	
		
			
				|  |  |              video_element_temp = video_elements[index:]
 | 
	
		
			
				|  |  |              if len(video_element_temp) == 0:
 | 
	
		
			
				|  |  | -                print("视频已经到底")
 | 
	
		
			
				|  |  | +                AliyunLogger.logging(
 | 
	
		
			
				|  |  | +                    code="2000",
 | 
	
		
			
				|  |  | +                    platform=self.platform,
 | 
	
		
			
				|  |  | +                    mode=self.log_type,
 | 
	
		
			
				|  |  | +                    env=self.env,
 | 
	
		
			
				|  |  | +                    message="视频已经到底"
 | 
	
		
			
				|  |  | +                )
 | 
	
		
			
				|  |  |                  return
 | 
	
		
			
				|  |  |              for i, video_element in enumerate(video_element_temp):
 | 
	
		
			
				|  |  |                  if video_element is None:
 | 
	
		
			
				|  |  |                      return
 | 
	
		
			
				|  |  | +                # 获取 trace_id, 并且把该 id 当做视频生命周期唯一索引
 | 
	
		
			
				|  |  | +                trace_id = self.crawler + str(uuid.uuid1())
 | 
	
		
			
				|  |  |                  self.download_cnt += 1
 | 
	
		
			
				|  |  |                  self.search_elements('//wx-view[@class="cover"]')
 | 
	
		
			
				|  |  |                  time.sleep(3)
 | 
	
	
		
			
				|  | @@ -198,20 +286,39 @@ class ZMYXRecommend:
 | 
	
		
			
				|  |  |                      mode=self.log_type,
 | 
	
		
			
				|  |  |                      item=video_dict,
 | 
	
		
			
				|  |  |                      rule_dict=self.rule_dict,
 | 
	
		
			
				|  |  | -                    env=self.env
 | 
	
		
			
				|  |  | +                    env=self.env,
 | 
	
		
			
				|  |  | +                    trace_id=trace_id
 | 
	
		
			
				|  |  |                  )
 | 
	
		
			
				|  |  |                  flag = pipeline.process_item()
 | 
	
		
			
				|  |  |                  if flag:
 | 
	
		
			
				|  |  |                      print(video_dict)
 | 
	
		
			
				|  |  | -                else:
 | 
	
		
			
				|  |  | -                    print("被规则过滤")
 | 
	
		
			
				|  |  | -                # self.mq.send_msg(video_dict)
 | 
	
		
			
				|  |  | -                # print(video_dict)
 | 
	
		
			
				|  |  | +                    video_url = self.get_video_url(video_element)
 | 
	
		
			
				|  |  | +                    if video_url is None:
 | 
	
		
			
				|  |  | +                        self.driver.press_keycode(AndroidKey.BACK)
 | 
	
		
			
				|  |  | +                    else:
 | 
	
		
			
				|  |  | +                        video_dict["video_url"] = video_url
 | 
	
		
			
				|  |  | +                        video_dict['strategy'] = self.log_type
 | 
	
		
			
				|  |  | +                        video_dict["out_user_id"] = ""
 | 
	
		
			
				|  |  | +                        video_dict["platform"] = self.crawler
 | 
	
		
			
				|  |  | +                        video_dict["crawler_rule"] = json.dumps(self.rule_dict)
 | 
	
		
			
				|  |  | +                        video_dict["user_id"] = self.our_uid
 | 
	
		
			
				|  |  | +                        video_dict["publish_time"] = video_dict["publish_time_str"]
 | 
	
		
			
				|  |  | +                        self.mq.send_msg(video_dict)
 | 
	
		
			
				|  |  | +                        AliyunLogger.logging(
 | 
	
		
			
				|  |  | +                            code="1002",
 | 
	
		
			
				|  |  | +                            platform=self.platform,
 | 
	
		
			
				|  |  | +                            mode=self.log_type,
 | 
	
		
			
				|  |  | +                            env=self.env,
 | 
	
		
			
				|  |  | +                            data=video_dict,
 | 
	
		
			
				|  |  | +                            trace_id=trace_id,
 | 
	
		
			
				|  |  | +                            message="成功发送 MQ 至 ETL",
 | 
	
		
			
				|  |  | +                        )
 | 
	
		
			
				|  |  | +                        self.driver.press_keycode(AndroidKey.BACK)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  def run():
 | 
	
		
			
				|  |  |      rule_dict1 = {}
 | 
	
		
			
				|  |  | -    ZMYXRecommend("recommend", "zhongmiaoyinxin", "dev", rule_dict1, 6267141)
 | 
	
		
			
				|  |  | +    ZMYXRecommend("recommend", "zhongmiaoyinxin", "prod", rule_dict1, 6267141)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  if __name__ == "__main__":
 |