zhongmiaoyinxin_recommend_new.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. # 众妙音信接入后台
  2. # @Author: luojunhui
  3. # @Time: 2023/10/26
  4. import json
  5. import os
  6. import sys
  7. import time
  8. import uuid
  9. from hashlib import md5
  10. from appium import webdriver
  11. from appium.webdriver.common.touch_action import TouchAction
  12. from appium.webdriver.extensions.android.nativekey import AndroidKey
  13. from appium.webdriver.webdriver import WebDriver
  14. from selenium.common.exceptions import NoSuchElementException
  15. from selenium.webdriver.common.by import By
  16. sys.path.append(os.getcwd())
  17. from common.mq import MQ
  18. from common.aliyun_log import AliyunLogger
  19. from common.pipeline import PiaoQuanPipeline
  20. class ZMYXRecommend:
  21. env = None
  22. driver = None
  23. log_type = None
  24. def __init__(self, log_type, crawler, env, rule_dict, our_uid):
  25. self.platform = "zhongmiaoyinxin"
  26. self.download_cnt = 0
  27. self.element_list = []
  28. self.count = 0
  29. self.swipe_count = 0
  30. self.log_type = log_type
  31. self.crawler = crawler
  32. self.env = env
  33. self.rule_dict = rule_dict
  34. self.our_uid = our_uid
  35. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  36. if self.env == "dev":
  37. chromedriverExecutable = "/Users/luojunhui/Downloads/chromedriver_V111/chromedriver"
  38. else:
  39. chromedriverExecutable = '/Users/piaoquan/Downloads/chromedriver' # Mac 爬虫机器
  40. # 微信的配置文件
  41. caps = {
  42. "platformName": "Android", # 手机操作系统 Android / iOS
  43. "deviceName": "a0a65126", # 连接的设备名(模拟器或真机),安卓可以随便写
  44. # "udid": "emulator-5554", # 指定 adb devices 中的哪一台设备
  45. "platforVersion": "11", # 手机对应的系统版本
  46. "appPackage": "com.tencent.mm", # 被测APP的包名,乐活圈 Android
  47. "appActivity": ".ui.LauncherUI", # 启动的Activity名
  48. "autoGrantPermissions": "true", # 让 appium 自动授权 base 权限,
  49. # 如果 noReset 为 True,则该条不生效(该参数为 Android 独有),对应的值为 True 或 False
  50. "unicodekeyboard": True, # 使用自带输入法,输入中文时填True
  51. "resetkeyboard": True, # 执行完程序恢复原来输入法
  52. "noReset": True, # 不重置APP
  53. "printPageSourceOnFailure": True, # 找不到元素时,appium log 会完整记录当前页面的 pagesource
  54. "newCommandTimeout": 6000, # 初始等待时间
  55. "automationName": "UiAutomator2", # 使用引擎,默认为 Appium,
  56. # 其中 Appium、UiAutomator2、Selendroid、Espresso 用于 Android,XCUITest 用于 iOS
  57. "showChromedriverLog": True,
  58. 'enableWebviewDetailsCollection': True,
  59. 'setWebContentsDebuggingEnabled': True,
  60. 'recreateChromeDriverSessions': True,
  61. 'chromedriverExecutable': chromedriverExecutable,
  62. "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
  63. # "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
  64. 'browserName': ''
  65. }
  66. try:
  67. self.driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
  68. except:
  69. AliyunLogger.logging(
  70. code="3002",
  71. platform=self.platform,
  72. mode=self.log_type,
  73. env=self.env,
  74. message="appium 启动异常"
  75. )
  76. return
  77. self.driver.implicitly_wait(30)
  78. wechat_flag = self.check_wechat()
  79. if wechat_flag:
  80. size = self.driver.get_window_size()
  81. self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2),
  82. int(size['width'] * 0.5), int(size['height'] * 0.8), 200)
  83. time.sleep(1)
  84. self.driver.find_elements(By.XPATH, '//*[@text="西瓜悦"]')[-1].click()
  85. AliyunLogger.logging(
  86. code="1000",
  87. platform=self.platform,
  88. env=self.env,
  89. mode=self.log_type,
  90. message="打开小程序西瓜悦成功"
  91. )
  92. time.sleep(5)
  93. self.get_videoList()
  94. time.sleep(100)
  95. self.driver.quit()
  96. else:
  97. AliyunLogger.logging(
  98. code="3001",
  99. platform=self.platform,
  100. mode=self.log_type,
  101. env=self.env,
  102. message="打开微信异常"
  103. )
  104. return
  105. def search_elements(self, xpath):
  106. time.sleep(1)
  107. windowHandles = self.driver.window_handles
  108. for handle in windowHandles:
  109. self.driver.switch_to.window(handle)
  110. time.sleep(1)
  111. try:
  112. elements = self.driver.find_elements(By.XPATH, xpath)
  113. if elements:
  114. return elements
  115. except NoSuchElementException:
  116. pass
  117. # 检查是否打开微信
  118. def check_wechat(self):
  119. for i in range(10):
  120. try:
  121. if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"):
  122. AliyunLogger.logging(
  123. code="1000",
  124. platform=self.platform,
  125. mode=self.log_type,
  126. env=self.env,
  127. message="启动微信成功"
  128. )
  129. return True
  130. elif self.driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"):
  131. print("发现并关闭系统下拉菜单")
  132. AliyunLogger.logging(
  133. code="1000",
  134. platform=self.platform,
  135. mode=self.log_type,
  136. env=self.env,
  137. message="第{}次错误打开了通知栏".format(i + 1)
  138. )
  139. self.driver.find_element(By.ID, "com.android.system:id/dismiss_view").click()
  140. else:
  141. pass
  142. except NoSuchElementException:
  143. time.sleep(10)
  144. return False
  145. def check_to_applet(self):
  146. while True:
  147. webview = self.driver.contexts
  148. self.driver.switch_to.context(webview[1])
  149. windowHandles = self.driver.window_handles
  150. for handle in windowHandles:
  151. self.driver.switch_to.window(handle)
  152. time.sleep(1)
  153. try:
  154. video_list = self.driver.find_element(By.XPATH, '//*[@class="index--navbar-list"]/*[1]')
  155. video_list.click()
  156. print("切换 webview 成功")
  157. return
  158. except NoSuchElementException:
  159. time.sleep(1)
  160. print("切换 webview 失败")
  161. break
  162. def swipe_up(self):
  163. self.search_elements('//*[@class="list-list--list"]')
  164. size = self.driver.get_window_size()
  165. self.driver.swipe(int(size["width"] * 0.5), int(size["height"] * 0.8),
  166. int(size["width"] * 0.5), int(size["height"] * 0.442), 200)
  167. self.swipe_count += 1
  168. def close_ad(self):
  169. window_size = self.driver.get_window_size()
  170. TouchAction(self.driver).tap(x=int(window_size['width'] * 0.5), y=int(window_size['height'] * 0.1)).perform()
  171. def get_video_url(self, video_element):
  172. video_element.click()
  173. time.sleep(5)
  174. windowHandles = self.driver.window_handles
  175. for handle in windowHandles:
  176. self.driver.switch_to.window(handle)
  177. time.sleep(1)
  178. try:
  179. video_url_element = self.driver.find_element(By.XPATH, '//wx-video[@class="videoh"]')
  180. video_url = video_url_element.get_attribute("src")
  181. self.close_ad()
  182. return video_url
  183. except NoSuchElementException:
  184. time.sleep(1)
  185. def get_videoList(self):
  186. self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
  187. self.driver.implicitly_wait(20)
  188. self.close_ad()
  189. AliyunLogger.logging(
  190. code="1000",
  191. platform=self.platform,
  192. mode=self.log_type,
  193. env=self.env,
  194. message="已经关闭广告"
  195. )
  196. self.check_to_applet()
  197. AliyunLogger.logging(
  198. code="1000",
  199. platform=self.platform,
  200. mode=self.log_type,
  201. env=self.env,
  202. message="成功切换到 webview"
  203. )
  204. index = 0
  205. while True:
  206. if self.search_elements('//*[@id="scrollContainer"]') is None:
  207. AliyunLogger.logging(
  208. code="3000",
  209. platform=self.platform,
  210. mode=self.log_type,
  211. env=self.env,
  212. message="窗口已销毁"
  213. )
  214. return
  215. video_elements = self.search_elements('//wx-view[@class="cover"]')
  216. if video_elements is None:
  217. AliyunLogger.logging(
  218. code="2000",
  219. platform=self.platform,
  220. mode=self.log_type,
  221. env=self.env,
  222. message="视频列表为空列表"
  223. )
  224. return
  225. video_element_temp = video_elements[index:]
  226. if len(video_element_temp) == 0:
  227. AliyunLogger.logging(
  228. code="2000",
  229. platform=self.platform,
  230. mode=self.log_type,
  231. env=self.env,
  232. message="视频已经到底"
  233. )
  234. return
  235. for i, video_element in enumerate(video_element_temp):
  236. if video_element is None:
  237. return
  238. # 获取 trace_id, 并且把该 id 当做视频生命周期唯一索引
  239. trace_id = self.crawler + str(uuid.uuid1())
  240. self.download_cnt += 1
  241. self.search_elements('//wx-view[@class="cover"]')
  242. time.sleep(3)
  243. self.driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})",
  244. video_element)
  245. video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="playImgs"]')[index + i].text
  246. cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="coverImg"]')[
  247. index + i].get_attribute('src')
  248. play_cnt = video_element.find_elements(By.XPATH, '//wx-image[@class="coverImg"]/span/*[2]')[
  249. index + i].text
  250. if "万" in play_cnt:
  251. play_cnt = int(play_cnt.split("万")[0]) * 10000
  252. out_video_id = md5(video_title.encode('utf8')).hexdigest()
  253. video_dict = {
  254. 'video_title': video_title,
  255. 'video_id': out_video_id,
  256. 'out_video_id': out_video_id,
  257. 'play_cnt': play_cnt,
  258. 'comment_cnt': 0,
  259. 'like_cnt': 0,
  260. 'share_cnt': 0,
  261. 'publish_time_stamp': int(time.time()),
  262. 'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
  263. 'update_time_stamp': int(time.time()),
  264. 'user_name': "zhongmiaoyinxin",
  265. 'user_id': "zhongmiaoyinxin",
  266. 'avatar_url': cover_url,
  267. 'cover_url': cover_url,
  268. 'session': f"zhongmiaoyinxin-{int(time.time())}"
  269. }
  270. pipeline = PiaoQuanPipeline(
  271. platform=self.crawler,
  272. mode=self.log_type,
  273. item=video_dict,
  274. rule_dict=self.rule_dict,
  275. env=self.env,
  276. trace_id=trace_id
  277. )
  278. flag = pipeline.process_item()
  279. if flag:
  280. print(video_dict)
  281. video_url = self.get_video_url(video_element)
  282. if video_url is None:
  283. self.driver.press_keycode(AndroidKey.BACK)
  284. else:
  285. video_dict["video_url"] = video_url
  286. video_dict['strategy'] = self.log_type
  287. video_dict["out_user_id"] = ""
  288. video_dict["platform"] = self.crawler
  289. video_dict["crawler_rule"] = json.dumps(self.rule_dict)
  290. video_dict["user_id"] = self.our_uid
  291. video_dict["publish_time"] = video_dict["publish_time_str"]
  292. self.mq.send_msg(video_dict)
  293. AliyunLogger.logging(
  294. code="1002",
  295. platform=self.platform,
  296. mode=self.log_type,
  297. env=self.env,
  298. data=video_dict,
  299. trace_id=trace_id,
  300. message="成功发送 MQ 至 ETL",
  301. )
  302. self.driver.press_keycode(AndroidKey.BACK)
  303. def run():
  304. rule_dict1 = {}
  305. ZMYXRecommend("recommend", "zhongmiaoyinxin", "prod", rule_dict1, 6267141)
  306. if __name__ == "__main__":
  307. run()
  308. # process = multiprocessing.Process(
  309. # target=run
  310. # )
  311. # process.start()
  312. # while True:
  313. # if not process.is_alive():
  314. # print("正在重启")
  315. # process.terminate()
  316. # time.sleep(60)
  317. # os.system("adb forward --remove-all")
  318. # process = multiprocessing.Process(target=run)
  319. # process.start()
  320. # time.sleep(60)