zhufuquanzi_recommend.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/9/6
  4. import os
  5. import sys
  6. import time
  7. from appium import webdriver
  8. from appium.webdriver.webdriver import WebDriver
  9. from selenium.common import NoSuchElementException
  10. from selenium.webdriver.common.by import By
  11. sys.path.append(os.getcwd())
  12. from common.common import Common
  13. class ZhufuquanziRecommend:
  14. platform = "祝福圈子"
  15. download_cnt = 0
  16. i = 0
  17. @classmethod
  18. def start_wechat(cls, log_type, crawler, env, rule_dict):
  19. if env == "dev":
  20. chromedriverExecutable = "/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver"
  21. else:
  22. chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
  23. Common.logger(log_type, crawler).info("启动微信")
  24. Common.logging(log_type, crawler, env, '启动微信')
  25. caps = {
  26. "platformName": "Android",
  27. "devicesName": "Android",
  28. "platformVersion": "12",
  29. # "udid": "emulator-5554",
  30. "appPackage": "com.tencent.mm",
  31. "appActivity": ".ui.LauncherUI",
  32. "autoGrantPermissions": "true",
  33. "unicodekeyboard": True,
  34. "resetkeyboard": True,
  35. "noReset": True,
  36. "printPageSourceOnFailure": True,
  37. "newCommandTimeout": 6000,
  38. "aotomationName": "UiAutomator2",
  39. "showChromedriverLog": True,
  40. "enableWebviewDetailsCollection": True,
  41. "setWebContentsDebuggingEnabled": True,
  42. "recreateChromeDriverSessions": True,
  43. "chromedriverExecutable": chromedriverExecutable,
  44. "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
  45. }
  46. driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
  47. driver.implicitly_wait(30)
  48. for i in range(120):
  49. try:
  50. if driver.find_elements(By.ID, "com.tencent.mm:id/f2s"):
  51. Common.logger(log_type, crawler).info("微信启动成功")
  52. Common.logging(log_type, crawler, env, '微信启动成功')
  53. break
  54. elif driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"):
  55. Common.logger(log_type, crawler).info("发现并关闭系统下拉菜单")
  56. Common.logging(log_type, crawler, env, '发现并关闭系统下拉菜单')
  57. driver.find_element(By.ID, "com.android.system:id/dismiss_view").click()
  58. else:
  59. pass
  60. except NoSuchElementException:
  61. time.sleep(1)
  62. Common.logger(log_type, crawler).info("下滑,展示小程序选择面板")
  63. Common.logging(log_type, crawler, env, '下滑,展示小程序选择面板')
  64. size = driver.get_window_size()
  65. driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2),
  66. int(size['width'] * 0.5), int(size['height'] * 0.8), 200)
  67. time.sleep(5)
  68. Common.logger(log_type, crawler).info('打开小程序"祝福圈子"')
  69. Common.logging(log_type, crawler, env, '打开小程序"祝福圈子"')
  70. driver.find_elements(By.XPATH, '//*[@text="祝福圈子"]')[-1].click()
  71. time.sleep(10)
  72. cls.get_videoList(log_type, crawler, driver, env, rule_dict)
  73. time.sleep(3)
  74. driver.quit()
  75. @classmethod
  76. def search_elements(cls, driver: WebDriver, xpath):
  77. time.sleep(1)
  78. windowHandles = driver.window_handles
  79. for handle in windowHandles:
  80. driver.switch_to.window(handle)
  81. time.sleep(1)
  82. try:
  83. elements = driver.find_elements(By.XPATH, xpath)
  84. if elements:
  85. return elements
  86. except NoSuchElementException:
  87. pass
  88. @classmethod
  89. def check_to_appplet(cls, log_type, crawler, env, driver: WebDriver, xpath):
  90. time.sleep(1)
  91. webviews = driver.contexts
  92. Common.logger(log_type, crawler).info(f"webviews:{webviews}")
  93. Common.logging(log_type, crawler, env, f"webviews:{webviews}")
  94. driver.switch_to.context(webviews[1])
  95. windowHandles = driver.window_handles
  96. for handle in windowHandles:
  97. driver.switch_to.window(handle)
  98. time.sleep(1)
  99. try:
  100. driver.find_element(By.XPATH, xpath)
  101. Common.logger(log_type, crawler).info("切换到小程序成功\n")
  102. Common.logging(log_type, crawler, env, '切换到小程序成功\n')
  103. return
  104. except NoSuchElementException:
  105. time.sleep(1)
  106. @classmethod
  107. def get_videoList(cls, log_type, crawler, driver: WebDriver, env, rule_dict):
  108. driver.implicitly_wait(20)
  109. cls.check_to_appplet(log_type=log_type,
  110. crawler=crawler,
  111. env=env,
  112. driver=driver,
  113. xpath='//*[@class="tags--tag tags--tag-0 tags--checked"]')
  114. time.sleep(3)
  115. index = 0
  116. while True:
  117. if cls.search_elements(driver, '//*[@class="bless--list"]') is None:
  118. Common.logger(log_type, crawler).info("窗口已销毁\n")
  119. Common.logging(log_type, crawler, env, '窗口已销毁\n')
  120. return
  121. video_list_elements = cls.search_elements(driver,
  122. '//*[@is="pages/discover/components/bless/dynamic/dynamic"]')
  123. if video_list_elements is None:
  124. Common.logger(log_type, crawler).warning(f"当前视频列表为空:{video_list_elements}")
  125. Common.logging(log_type, crawler, env, f"当前视频列表为空:{video_list_elements}")
  126. return
  127. video_list = video_list_elements[index:]
  128. if len(video_list) == 0 or video_list is None:
  129. Common.logger(log_type, crawler).info("到底啦~~~~~~~~~~\n")
  130. Common.logging(log_type, crawler, env, "到底啦~~~~~~~~~~\n")
  131. return
  132. for i, video_element in enumerate(video_list):
  133. if cls.download_cnt >= int(rule_dict.get("videos_cnt", {}).get("min", 20)):
  134. Common.logger(log_type, crawler).info(f"本轮已抓取视频数:{cls.download_cnt}")
  135. Common.logging(log_type, crawler, env, f"本轮已抓取视频数:{cls.download_cnt}")
  136. return
  137. if video_element is None:
  138. Common.logger(log_type, crawler).info("没有更多数据啦\n")
  139. Common.logging(log_type, crawler, env, "没有更多数据啦\n")
  140. return
  141. cls.i += 1
  142. Common.logger(log_type, crawler).info(f"拖动第{cls.i}条视频至屏幕中间")
  143. Common.logging(log_type, crawler, env, f"拖动第{cls.i}条视频至屏幕中间")
  144. time.sleep(3)
  145. driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})",
  146. video_element)
  147. cls.download_cnt += 1
  148. Common.logger(log_type, crawler).info("已抓取完一组,休眠 10 秒\n")
  149. Common.logging(log_type, crawler, env, "已抓取完一组,休眠 10 秒\n")
  150. time.sleep(10)
  151. index = index + len(video_list)
  152. if __name__ == "__main__":
  153. rule_dict1 = {"period": {"min": 365, "max": 365},
  154. "duration": {"min": 30, "max": 1800},
  155. "favorite_cnt": {"min": 5000, "max": 0},
  156. "videos_cnt": {"min": 10, "max": 20},
  157. "share_cnt": {"min": 1000, "max": 0}}
  158. ZhufuquanziRecommend.start_wechat("recommend", "zhufuquanzi", "dev", rule_dict1)