zhongmiaoyinxin_recommend_new.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. # 众妙音信接入后台
  2. # @Author: luojunhui
  3. # @Time: 2023/10/26
  4. import json
  5. import os
  6. import sys
  7. import time
  8. import uuid
  9. from hashlib import md5
  10. from appium import webdriver
  11. from appium.webdriver.common.touch_action import TouchAction
  12. from appium.webdriver.extensions.android.nativekey import AndroidKey
  13. from selenium.common.exceptions import NoSuchElementException
  14. from selenium.webdriver.common.by import By
  15. sys.path.append(os.getcwd())
  16. from common.mq import MQ
  17. from common.aliyun_log import AliyunLogger
  18. from common.pipeline import PiaoQuanPipeline
  19. class ZMYXRecommend:
  20. def __init__(self, log_type, crawler, env, rule_dict, our_uid):
  21. self.mq = None
  22. self.platform = "zhongmiaoyinxin"
  23. self.download_cnt = 0
  24. self.element_list = []
  25. self.count = 0
  26. self.swipe_count = 0
  27. self.log_type = log_type
  28. self.crawler = crawler
  29. self.env = env
  30. self.rule_dict = rule_dict
  31. self.our_uid = our_uid
  32. if self.env == "dev":
  33. chromedriverExecutable = "/Users/luojunhui/Downloads/chromedriver_V111/chromedriver"
  34. else:
  35. chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
  36. # 微信的配置文件
  37. caps = {
  38. "platformName": "Android", # 手机操作系统 Android / iOS
  39. "devicesName": "Android",
  40. # "udid": "emulator-5554", # 指定 adb devices 中的哪一台设备
  41. # "platforVersion": "11", # 手机对应的系统版本
  42. "appPackage": "com.tencent.mm", # 被测APP的包名,乐活圈 Android
  43. "appActivity": ".ui.LauncherUI", # 启动的Activity名
  44. "autoGrantPermissions": "true", # 让 appium 自动授权 base 权限,
  45. # 如果 noReset 为 True,则该条不生效(该参数为 Android 独有),对应的值为 True 或 False
  46. "unicodekeyboard": True, # 使用自带输入法,输入中文时填True
  47. "resetkeyboard": True, # 执行完程序恢复原来输入法
  48. "noReset": True, # 不重置APP
  49. "printPageSourceOnFailure": True, # 找不到元素时,appium log 会完整记录当前页面的 pagesource
  50. "newCommandTimeout": 6000, # 初始等待时间
  51. "automationName": "UiAutomator2", # 使用引擎,默认为 Appium,
  52. # 其中 Appium、UiAutomator2、Selendroid、Espresso 用于 Android,XCUITest 用于 iOS
  53. "showChromedriverLog": True,
  54. 'enableWebviewDetailsCollection': True,
  55. 'setWebContentsDebuggingEnabled': True,
  56. 'recreateChromeDriverSessions': True,
  57. 'chromedriverExecutable': chromedriverExecutable,
  58. "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
  59. # "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
  60. 'browserName': ''
  61. }
  62. try:
  63. self.driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
  64. except:
  65. AliyunLogger.logging(
  66. code="3002",
  67. platform=self.platform,
  68. mode=self.log_type,
  69. env=self.env,
  70. message="appium 启动异常"
  71. )
  72. return
  73. self.driver.implicitly_wait(30)
  74. wechat_flag = self.check_wechat()
  75. if wechat_flag:
  76. size = self.driver.get_window_size()
  77. self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2),
  78. int(size['width'] * 0.5), int(size['height'] * 0.8), 100)
  79. time.sleep(1)
  80. self.driver.find_elements(By.XPATH, '//*[@text="西瓜悦"]')[-1].click()
  81. AliyunLogger.logging(
  82. code="1000",
  83. platform=self.platform,
  84. env=self.env,
  85. mode=self.log_type,
  86. message="打开小程序西瓜悦成功"
  87. )
  88. time.sleep(5)
  89. self.get_videoList()
  90. time.sleep(100)
  91. self.driver.quit()
  92. elif self.driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"):
  93. AliyunLogger.logging(
  94. code="1000",
  95. platform=self.platform,
  96. mode=self.log_type,
  97. env=self.env,
  98. message="发现并关闭系统下拉菜单"
  99. )
  100. size = self.driver.get_window_size()
  101. self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.8),
  102. int(size['width'] * 0.5), int(size['height'] * 0.2), 200)
  103. else:
  104. AliyunLogger.logging(
  105. code="3001",
  106. platform=self.platform,
  107. mode=self.log_type,
  108. env=self.env,
  109. message="打开微信异常"
  110. )
  111. return
  112. def search_elements(self, xpath):
  113. time.sleep(1)
  114. windowHandles = self.driver.window_handles
  115. for handle in windowHandles:
  116. self.driver.switch_to.window(handle)
  117. time.sleep(1)
  118. try:
  119. elements = self.driver.find_elements(By.XPATH, xpath)
  120. if elements:
  121. return elements
  122. except NoSuchElementException:
  123. pass
  124. # 检查是否打开微信
  125. def check_wechat(self):
  126. for i in range(10):
  127. try:
  128. if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"):
  129. AliyunLogger.logging(
  130. code="1000",
  131. platform=self.platform,
  132. mode=self.log_type,
  133. env=self.env,
  134. message="启动微信成功"
  135. )
  136. return True
  137. elif self.driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"):
  138. print("发现并关闭系统下拉菜单")
  139. AliyunLogger.logging(
  140. code="1000",
  141. platform=self.platform,
  142. mode=self.log_type,
  143. env=self.env,
  144. message="第{}次错误打开了通知栏".format(i + 1)
  145. )
  146. self.driver.find_element(By.ID, "com.android.system:id/dismiss_view").click()
  147. else:
  148. pass
  149. except NoSuchElementException:
  150. time.sleep(10)
  151. return False
  152. def check_to_applet(self):
  153. while True:
  154. webview = self.driver.contexts
  155. self.driver.switch_to.context(webview[1])
  156. windowHandles = self.driver.window_handles
  157. for handle in windowHandles:
  158. self.driver.switch_to.window(handle)
  159. time.sleep(1)
  160. try:
  161. video_list = self.driver.find_element(By.XPATH, '//*[@class="index--navbar-list"]/*[1]')
  162. video_list.click()
  163. print("切换 webview 成功")
  164. return
  165. except NoSuchElementException:
  166. time.sleep(1)
  167. print("切换 webview 失败")
  168. break
  169. def swipe_up(self):
  170. self.search_elements('//*[@class="list-list--list"]')
  171. size = self.driver.get_window_size()
  172. self.driver.swipe(int(size["width"] * 0.5), int(size["height"] * 0.8),
  173. int(size["width"] * 0.5), int(size["height"] * 0.442), 200)
  174. self.swipe_count += 1
  175. def close_ad(self):
  176. window_size = self.driver.get_window_size()
  177. TouchAction(self.driver).tap(x=int(window_size['width'] * 0.5), y=int(window_size['height'] * 0.1)).perform()
  178. def get_video_url(self, video_element):
  179. video_element.click()
  180. time.sleep(5)
  181. windowHandles = self.driver.window_handles
  182. for handle in windowHandles:
  183. self.driver.switch_to.window(handle)
  184. time.sleep(1)
  185. try:
  186. video_url_element = self.driver.find_element(By.XPATH, '//wx-video[@class="videoh"]')
  187. video_url = video_url_element.get_attribute("src")
  188. self.close_ad()
  189. return video_url
  190. except NoSuchElementException:
  191. time.sleep(1)
  192. def get_videoList(self):
  193. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  194. self.driver.implicitly_wait(20)
  195. self.close_ad()
  196. AliyunLogger.logging(
  197. code="1000",
  198. platform=self.platform,
  199. mode=self.log_type,
  200. env=self.env,
  201. message="已经关闭广告"
  202. )
  203. self.check_to_applet()
  204. AliyunLogger.logging(
  205. code="1000",
  206. platform=self.platform,
  207. mode=self.log_type,
  208. env=self.env,
  209. message="成功切换到 webview"
  210. )
  211. index = 0
  212. while True:
  213. if self.search_elements('//*[@id="scrollContainer"]') is None:
  214. AliyunLogger.logging(
  215. code="3000",
  216. platform=self.platform,
  217. mode=self.log_type,
  218. env=self.env,
  219. message="窗口已销毁"
  220. )
  221. return
  222. video_elements = self.search_elements('//wx-view[@class="cover"]')
  223. if video_elements is None:
  224. AliyunLogger.logging(
  225. code="2000",
  226. platform=self.platform,
  227. mode=self.log_type,
  228. env=self.env,
  229. message="视频列表为空列表"
  230. )
  231. return
  232. video_element_temp = video_elements[index:]
  233. if len(video_element_temp) == 0:
  234. AliyunLogger.logging(
  235. code="2000",
  236. platform=self.platform,
  237. mode=self.log_type,
  238. env=self.env,
  239. message="视频已经到底"
  240. )
  241. return
  242. for i, video_element in enumerate(video_element_temp):
  243. if video_element is None:
  244. return
  245. # 获取 trace_id, 并且把该 id 当做视频生命周期唯一索引
  246. trace_id = self.crawler + str(uuid.uuid1())
  247. AliyunLogger.logging(
  248. code="1001",
  249. platform=self.platform,
  250. mode=self.log_type,
  251. env=self.env,
  252. trace_id=trace_id,
  253. message="扫描到一条视频",
  254. )
  255. self.download_cnt += 1
  256. self.search_elements('//wx-view[@class="cover"]')
  257. time.sleep(3)
  258. self.driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})",
  259. video_element)
  260. video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="playImgs"]')[index + i].text
  261. cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="coverImg"]')[
  262. index + i].get_attribute('src')
  263. play_cnt = video_element.find_elements(By.XPATH, '//wx-image[@class="coverImg"]/span/*[2]')[
  264. index + i].text
  265. if "万" in play_cnt:
  266. play_cnt = int(play_cnt.split("万")[0]) * 10000
  267. out_video_id = md5(video_title.encode('utf8')).hexdigest()
  268. video_dict = {
  269. 'video_title': video_title,
  270. 'video_id': out_video_id,
  271. 'out_video_id': out_video_id,
  272. 'play_cnt': play_cnt,
  273. 'comment_cnt': 0,
  274. 'like_cnt': 0,
  275. 'share_cnt': 0,
  276. 'publish_time_stamp': int(time.time()),
  277. 'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
  278. 'update_time_stamp': int(time.time()),
  279. 'user_name': "zhongmiaoyinxin",
  280. 'user_id': "zhongmiaoyinxin",
  281. 'avatar_url': cover_url,
  282. 'cover_url': cover_url,
  283. 'session': f"zhongmiaoyinxin-{int(time.time())}"
  284. }
  285. pipeline = PiaoQuanPipeline(
  286. platform=self.crawler,
  287. mode=self.log_type,
  288. item=video_dict,
  289. rule_dict=self.rule_dict,
  290. env=self.env,
  291. trace_id=trace_id
  292. )
  293. flag = pipeline.process_item()
  294. if flag:
  295. print(video_dict)
  296. video_url = self.get_video_url(video_element)
  297. if video_url is None:
  298. self.driver.press_keycode(AndroidKey.BACK)
  299. else:
  300. video_dict["video_url"] = video_url
  301. video_dict['strategy'] = self.log_type
  302. video_dict["out_user_id"] = ""
  303. video_dict["platform"] = self.crawler
  304. video_dict["crawler_rule"] = json.dumps(self.rule_dict)
  305. video_dict["user_id"] = self.our_uid
  306. video_dict["publish_time"] = video_dict["publish_time_str"]
  307. self.mq.send_msg(video_dict)
  308. AliyunLogger.logging(
  309. code="1002",
  310. platform=self.platform,
  311. mode=self.log_type,
  312. env=self.env,
  313. data=video_dict,
  314. trace_id=trace_id,
  315. message="成功发送 MQ 至 ETL",
  316. )
  317. self.driver.press_keycode(AndroidKey.BACK)
  318. def run():
  319. rule_dict1 = {}
  320. ZMYXRecommend("recommend", "zhongmiaoyinxin", "prod", rule_dict1, 6267141)
  321. if __name__ == "__main__":
  322. run()