zhongmiaoyinxin_recommend_new.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. # 众妙音信接入后台
  2. # @Author: luojunhui
  3. # @Time: 2023/10/26
  4. import json
  5. import os
  6. import sys
  7. import time
  8. import uuid
  9. from hashlib import md5
  10. from appium import webdriver
  11. from appium.webdriver.common.touch_action import TouchAction
  12. from appium.webdriver.extensions.android.nativekey import AndroidKey
  13. from selenium.common.exceptions import NoSuchElementException
  14. from selenium.webdriver.common.by import By
  15. sys.path.append(os.getcwd())
  16. from common.mq import MQ
  17. from common.aliyun_log import AliyunLogger
  18. from common.pipeline import PiaoQuanPipeline
  19. class ZMYXRecommend:
  20. def __init__(self, log_type, crawler, env, rule_dict, our_uid):
  21. self.mq = None
  22. self.platform = "zhongmiaoyinxin"
  23. self.download_cnt = 0
  24. self.element_list = []
  25. self.count = 0
  26. self.swipe_count = 0
  27. self.log_type = log_type
  28. self.crawler = crawler
  29. self.env = env
  30. self.rule_dict = rule_dict
  31. self.our_uid = our_uid
  32. if self.env == "dev":
  33. chromedriverExecutable = "/Users/luojunhui/Downloads/chromedriver_V111/chromedriver"
  34. else:
  35. chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
  36. # 微信的配置文件
  37. caps = {
  38. "platformName": "Android", # 手机操作系统 Android / iOS
  39. "deviceName": "a0a65126", # 连接的设备名(模拟器或真机),安卓可以随便写
  40. # "udid": "emulator-5554", # 指定 adb devices 中的哪一台设备
  41. "platforVersion": "11", # 手机对应的系统版本
  42. "appPackage": "com.tencent.mm", # 被测APP的包名,乐活圈 Android
  43. "appActivity": ".ui.LauncherUI", # 启动的Activity名
  44. "autoGrantPermissions": "true", # 让 appium 自动授权 base 权限,
  45. # 如果 noReset 为 True,则该条不生效(该参数为 Android 独有),对应的值为 True 或 False
  46. "unicodekeyboard": True, # 使用自带输入法,输入中文时填True
  47. "resetkeyboard": True, # 执行完程序恢复原来输入法
  48. "noReset": True, # 不重置APP
  49. "printPageSourceOnFailure": True, # 找不到元素时,appium log 会完整记录当前页面的 pagesource
  50. "newCommandTimeout": 6000, # 初始等待时间
  51. "automationName": "UiAutomator2", # 使用引擎,默认为 Appium,
  52. # 其中 Appium、UiAutomator2、Selendroid、Espresso 用于 Android,XCUITest 用于 iOS
  53. "showChromedriverLog": True,
  54. 'enableWebviewDetailsCollection': True,
  55. 'setWebContentsDebuggingEnabled': True,
  56. 'recreateChromeDriverSessions': True,
  57. 'chromedriverExecutable': chromedriverExecutable,
  58. "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
  59. # "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
  60. 'browserName': ''
  61. }
  62. try:
  63. self.driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
  64. except:
  65. AliyunLogger.logging(
  66. code="3002",
  67. platform=self.platform,
  68. mode=self.log_type,
  69. env=self.env,
  70. message="appium 启动异常"
  71. )
  72. return
  73. self.driver.implicitly_wait(30)
  74. wechat_flag = self.check_wechat()
  75. if wechat_flag:
  76. size = self.driver.get_window_size()
  77. self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2),
  78. int(size['width'] * 0.5), int(size['height'] * 0.8), 200)
  79. time.sleep(1)
  80. self.driver.find_elements(By.XPATH, '//*[@text="西瓜悦"]')[-1].click()
  81. AliyunLogger.logging(
  82. code="1000",
  83. platform=self.platform,
  84. env=self.env,
  85. mode=self.log_type,
  86. message="打开小程序西瓜悦成功"
  87. )
  88. time.sleep(5)
  89. self.get_videoList()
  90. time.sleep(100)
  91. self.driver.quit()
  92. else:
  93. AliyunLogger.logging(
  94. code="3001",
  95. platform=self.platform,
  96. mode=self.log_type,
  97. env=self.env,
  98. message="打开微信异常"
  99. )
  100. return
  101. def search_elements(self, xpath):
  102. time.sleep(1)
  103. windowHandles = self.driver.window_handles
  104. for handle in windowHandles:
  105. self.driver.switch_to.window(handle)
  106. time.sleep(1)
  107. try:
  108. elements = self.driver.find_elements(By.XPATH, xpath)
  109. if elements:
  110. return elements
  111. except NoSuchElementException:
  112. pass
  113. # 检查是否打开微信
  114. def check_wechat(self):
  115. for i in range(10):
  116. try:
  117. if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"):
  118. AliyunLogger.logging(
  119. code="1000",
  120. platform=self.platform,
  121. mode=self.log_type,
  122. env=self.env,
  123. message="启动微信成功"
  124. )
  125. return True
  126. elif self.driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"):
  127. print("发现并关闭系统下拉菜单")
  128. AliyunLogger.logging(
  129. code="1000",
  130. platform=self.platform,
  131. mode=self.log_type,
  132. env=self.env,
  133. message="第{}次错误打开了通知栏".format(i + 1)
  134. )
  135. self.driver.find_element(By.ID, "com.android.system:id/dismiss_view").click()
  136. else:
  137. pass
  138. except NoSuchElementException:
  139. time.sleep(10)
  140. return False
  141. def check_to_applet(self):
  142. while True:
  143. webview = self.driver.contexts
  144. self.driver.switch_to.context(webview[1])
  145. windowHandles = self.driver.window_handles
  146. for handle in windowHandles:
  147. self.driver.switch_to.window(handle)
  148. time.sleep(1)
  149. try:
  150. video_list = self.driver.find_element(By.XPATH, '//*[@class="index--navbar-list"]/*[1]')
  151. video_list.click()
  152. print("切换 webview 成功")
  153. return
  154. except NoSuchElementException:
  155. time.sleep(1)
  156. print("切换 webview 失败")
  157. break
  158. def swipe_up(self):
  159. self.search_elements('//*[@class="list-list--list"]')
  160. size = self.driver.get_window_size()
  161. self.driver.swipe(int(size["width"] * 0.5), int(size["height"] * 0.8),
  162. int(size["width"] * 0.5), int(size["height"] * 0.442), 200)
  163. self.swipe_count += 1
  164. def close_ad(self):
  165. window_size = self.driver.get_window_size()
  166. TouchAction(self.driver).tap(x=int(window_size['width'] * 0.5), y=int(window_size['height'] * 0.1)).perform()
  167. def get_video_url(self, video_element):
  168. video_element.click()
  169. time.sleep(5)
  170. windowHandles = self.driver.window_handles
  171. for handle in windowHandles:
  172. self.driver.switch_to.window(handle)
  173. time.sleep(1)
  174. try:
  175. video_url_element = self.driver.find_element(By.XPATH, '//wx-video[@class="videoh"]')
  176. video_url = video_url_element.get_attribute("src")
  177. self.close_ad()
  178. return video_url
  179. except NoSuchElementException:
  180. time.sleep(1)
  181. def get_videoList(self):
  182. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  183. self.driver.implicitly_wait(20)
  184. self.close_ad()
  185. AliyunLogger.logging(
  186. code="1000",
  187. platform=self.platform,
  188. mode=self.log_type,
  189. env=self.env,
  190. message="已经关闭广告"
  191. )
  192. self.check_to_applet()
  193. AliyunLogger.logging(
  194. code="1000",
  195. platform=self.platform,
  196. mode=self.log_type,
  197. env=self.env,
  198. message="成功切换到 webview"
  199. )
  200. index = 0
  201. while True:
  202. if self.search_elements('//*[@id="scrollContainer"]') is None:
  203. AliyunLogger.logging(
  204. code="3000",
  205. platform=self.platform,
  206. mode=self.log_type,
  207. env=self.env,
  208. message="窗口已销毁"
  209. )
  210. return
  211. video_elements = self.search_elements('//wx-view[@class="cover"]')
  212. if video_elements is None:
  213. AliyunLogger.logging(
  214. code="2000",
  215. platform=self.platform,
  216. mode=self.log_type,
  217. env=self.env,
  218. message="视频列表为空列表"
  219. )
  220. return
  221. video_element_temp = video_elements[index:]
  222. if len(video_element_temp) == 0:
  223. AliyunLogger.logging(
  224. code="2000",
  225. platform=self.platform,
  226. mode=self.log_type,
  227. env=self.env,
  228. message="视频已经到底"
  229. )
  230. return
  231. for i, video_element in enumerate(video_element_temp):
  232. if video_element is None:
  233. return
  234. # 获取 trace_id, 并且把该 id 当做视频生命周期唯一索引
  235. trace_id = self.crawler + str(uuid.uuid1())
  236. AliyunLogger.logging(
  237. code="1001",
  238. platform=self.platform,
  239. mode=self.log_type,
  240. env=self.env,
  241. trace_id=trace_id,
  242. message="扫描到一条视频",
  243. )
  244. self.download_cnt += 1
  245. self.search_elements('//wx-view[@class="cover"]')
  246. time.sleep(3)
  247. self.driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})",
  248. video_element)
  249. video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="playImgs"]')[index + i].text
  250. cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="coverImg"]')[
  251. index + i].get_attribute('src')
  252. play_cnt = video_element.find_elements(By.XPATH, '//wx-image[@class="coverImg"]/span/*[2]')[
  253. index + i].text
  254. if "万" in play_cnt:
  255. play_cnt = int(play_cnt.split("万")[0]) * 10000
  256. out_video_id = md5(video_title.encode('utf8')).hexdigest()
  257. video_dict = {
  258. 'video_title': video_title,
  259. 'video_id': out_video_id,
  260. 'out_video_id': out_video_id,
  261. 'play_cnt': play_cnt,
  262. 'comment_cnt': 0,
  263. 'like_cnt': 0,
  264. 'share_cnt': 0,
  265. 'publish_time_stamp': int(time.time()),
  266. 'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
  267. 'update_time_stamp': int(time.time()),
  268. 'user_name': "zhongmiaoyinxin",
  269. 'user_id': "zhongmiaoyinxin",
  270. 'avatar_url': cover_url,
  271. 'cover_url': cover_url,
  272. 'session': f"zhongmiaoyinxin-{int(time.time())}"
  273. }
  274. pipeline = PiaoQuanPipeline(
  275. platform=self.crawler,
  276. mode=self.log_type,
  277. item=video_dict,
  278. rule_dict=self.rule_dict,
  279. env=self.env,
  280. trace_id=trace_id
  281. )
  282. flag = pipeline.process_item()
  283. if flag:
  284. print(video_dict)
  285. video_url = self.get_video_url(video_element)
  286. if video_url is None:
  287. self.driver.press_keycode(AndroidKey.BACK)
  288. else:
  289. video_dict["video_url"] = video_url
  290. video_dict['strategy'] = self.log_type
  291. video_dict["out_user_id"] = ""
  292. video_dict["platform"] = self.crawler
  293. video_dict["crawler_rule"] = json.dumps(self.rule_dict)
  294. video_dict["user_id"] = self.our_uid
  295. video_dict["publish_time"] = video_dict["publish_time_str"]
  296. self.mq.send_msg(video_dict)
  297. AliyunLogger.logging(
  298. code="1002",
  299. platform=self.platform,
  300. mode=self.log_type,
  301. env=self.env,
  302. data=video_dict,
  303. trace_id=trace_id,
  304. message="成功发送 MQ 至 ETL",
  305. )
  306. self.driver.press_keycode(AndroidKey.BACK)
  307. def run():
  308. rule_dict1 = {}
  309. ZMYXRecommend("recommend", "zhongmiaoyinxin", "prod", rule_dict1, 6267141)
  310. if __name__ == "__main__":
  311. run()