shipinhao_recommend.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. # @Author: wangkun
  2. # @Time: 3月 02, 2022
  3. import os
  4. import sys
  5. import time
  6. from appium import webdriver
  7. from appium.webdriver.webdriver import WebDriver
  8. from selenium.common import NoSuchElementException
  9. from selenium.webdriver.common.by import By
  10. sys.path.append(os.getcwd())
  11. from crawler_shipinhao.main.common import Common
  12. from crawler_shipinhao.main.feishu_lib import Feishu
  13. from crawler_shipinhao.main.click_share_video import Click
  14. from crawler_shipinhao.main.get_url import GetUrl
  15. class Recommend:
  16. # 启动微信,并打开视频号
  17. @classmethod
  18. def start_wechat(cls, log_type):
  19. Common.logger(log_type).info('启动微信')
  20. caps = {
  21. "platformName": "Android", # 手机操作系统 Android / iOS
  22. "deviceName": "Android", # 连接的设备名(模拟器或真机),安卓可以随便写
  23. "platforVersion": "11", # 手机对应的系统版本(Android 11)
  24. "appPackage": "com.tencent.mm", # 被测APP的包名,乐活圈 Android
  25. "appActivity": ".ui.LauncherUI", # 启动的Activity名
  26. "autoGrantPermissions": "true", # 让 appium 自动授权 base 权限,
  27. # 如果 noReset 为 True,则该条不生效(该参数为 Android 独有),对应的值为 True 或 False
  28. "unicodekeyboard": True, # 使用自带输入法,输入中文时填True
  29. "resetkeyboard": True, # 执行完程序恢复原来输入法
  30. "noReset": True, # 不重置APP
  31. "printPageSourceOnFailure": True, # 找不到元素时,appium log 会完整记录当前页面的 pagesource
  32. "newCommandTimeout": 6000, # 初始等待时间
  33. "automationName": "UiAutomator2" # 使用引擎,默认为 Appium,
  34. # 其中 Appium、UiAutomator2、Selendroid、Espresso 用于 Android,XCUITest 用于 iOS
  35. }
  36. # global driver
  37. driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
  38. driver.implicitly_wait(10)
  39. Common.logger(log_type).info('点击发现TAB')
  40. driver.find_elements(By.ID, 'com.tencent.mm:id/f2s')[2].click()
  41. Common.logger(log_type).info('点击视频号\n')
  42. driver.find_elements(By.ID, 'com.tencent.mm:id/gv6')[1].click()
  43. time.sleep(5)
  44. cls.get_feeds(log_type, driver)
  45. Common.logger(log_type).info('休眠 3s')
  46. time.sleep(3)
  47. cls.quit(log_type, driver)
  48. # 退出 APP
  49. @classmethod
  50. def quit(cls, log_type, driver: WebDriver):
  51. driver.quit()
  52. Common.logger(log_type).info('退出 APP 成功\n')
  53. # 下载规则
  54. @staticmethod
  55. def download_rule(duration, like_cnt, share_cnt, favorite_cnt, comment_cnt):
  56. if int(duration) >= 10:
  57. if int(like_cnt) >= 0:
  58. if int(share_cnt) >= 0:
  59. if int(favorite_cnt) >= 0:
  60. if int(comment_cnt) >= 0:
  61. return True
  62. else:
  63. return False
  64. else:
  65. return False
  66. else:
  67. return False
  68. else:
  69. return False
  70. else:
  71. return False
  72. # 操作安卓手机,自己滑动首页视频,并获取视频信息
  73. @classmethod
  74. def get_feeds(cls, log_type, driver: WebDriver):
  75. driver.implicitly_wait(10)
  76. for i in range(10):
  77. # 关闭页面弹窗
  78. try:
  79. driver.find_element(By.XPATH, '//*[@text="我知道了"]')
  80. Common.logger(log_type).info('已关闭未成年模式弹窗')
  81. except NoSuchElementException:
  82. pass
  83. try:
  84. driver.find_element(By.ID, 'com.tencent.mm:id/dkf')
  85. Common.logger(log_type).info('直播视频,向上滑动页面\n')
  86. driver.swipe(500, 1000, 500, 300, 300)
  87. except NoSuchElementException:
  88. pass
  89. # Common.logger(log_type).info('暂停播放')
  90. pause_btn = driver.find_element(By.ID, 'com.tencent.mm:id/eh4')
  91. pause_btn.click()
  92. # 视频标题
  93. try:
  94. title_id = driver.find_element(By.ID, 'com.tencent.mm:id/ki5')
  95. video_title = title_id.get_attribute('name')
  96. except NoSuchElementException:
  97. video_title = ''
  98. # 点击播放器,获取视频时长
  99. start_time = driver.find_element(By.ID, 'com.tencent.mm:id/l59').get_attribute('name')
  100. start_time = int(start_time.split(':')[0])*60 + int(start_time.split(':')[-1])
  101. try:
  102. end_time = driver.find_element(By.ID, 'com.tencent.mm:id/l7i').get_attribute('name')
  103. except NoSuchElementException:
  104. end_time = driver.find_element(By.ID, 'com.tencent.mm:id/g73').get_attribute('name')
  105. end_time = int(end_time.split(':')[0]) * 60 + int(end_time.split(':')[-1])
  106. duration = start_time + end_time
  107. # 点赞
  108. like_id = driver.find_element(By.ID, 'com.tencent.mm:id/k04')
  109. like_cnt = like_id.get_attribute('name')
  110. if like_cnt == "" or like_cnt == "喜欢":
  111. like_cnt = 0
  112. elif '万' in like_cnt:
  113. like_cnt = float(like_cnt.split('万')[0]) * 10000
  114. elif '万+' in like_cnt:
  115. like_cnt = float(like_cnt.split('万+')[0]) * 10000
  116. else:
  117. like_cnt = float(like_cnt)
  118. # 分享
  119. share_id = driver.find_element(By.ID, 'com.tencent.mm:id/jhv')
  120. share_cnt = share_id.get_attribute('name')
  121. if share_cnt == "" or share_cnt == "转发":
  122. share_cnt = 0
  123. elif '万' in share_cnt:
  124. share_cnt = float(share_cnt.split('万')[0]) * 10000
  125. elif '万+' in share_cnt:
  126. share_cnt = float(share_cnt.split('万+')[0]) * 10000
  127. else:
  128. share_cnt = float(share_cnt)
  129. # 收藏
  130. favorite_id = driver.find_element(By.ID, 'com.tencent.mm:id/fnp')
  131. favorite_cnt = favorite_id.get_attribute('name')
  132. if favorite_cnt == "" or favorite_cnt == "收藏":
  133. favorite_cnt = 0
  134. elif '万' in favorite_cnt:
  135. favorite_cnt = float(favorite_cnt.split('万')[0]) * 10000
  136. elif '万+' in favorite_cnt:
  137. favorite_cnt = float(favorite_cnt.split('万+')[0]) * 10000
  138. else:
  139. favorite_cnt = float(favorite_cnt)
  140. # 评论
  141. comment_id = driver.find_element(By.ID, 'com.tencent.mm:id/bje')
  142. comment_cnt = comment_id.get_attribute('name')
  143. if comment_cnt == "" or comment_cnt == "评论":
  144. comment_cnt = 0
  145. elif '万' in comment_cnt:
  146. comment_cnt = float(comment_cnt.split('万')[0]) * 10000
  147. elif '万+' in comment_cnt:
  148. comment_cnt = float(comment_cnt.split('万+')[0]) * 10000
  149. else:
  150. comment_cnt = float(comment_cnt)
  151. # 用户名
  152. username_id = driver.find_element(By.ID, 'com.tencent.mm:id/hft')
  153. user_name = username_id.get_attribute('name')
  154. Common.logger(log_type).info('video_title:{}', video_title)
  155. Common.logger(log_type).info('duration:{}', duration)
  156. Common.logger(log_type).info('like_cnt:{}', like_cnt)
  157. Common.logger(log_type).info('share_cnt:{}', share_cnt)
  158. Common.logger(log_type).info('favorite_cnt:{}', favorite_cnt)
  159. Common.logger(log_type).info('comment_cnt:{}', comment_cnt)
  160. Common.logger(log_type).info('user_name:{}', user_name)
  161. # 判断无效视频
  162. if video_title == '' or user_name == '':
  163. Common.logger(log_type).info('向上滑动页面')
  164. driver.swipe(500, 1000, 500, 300, 300)
  165. Common.logger(log_type).info('无效视频\n')
  166. # 判断下载规则
  167. elif cls.download_rule(duration, like_cnt, share_cnt, favorite_cnt, comment_cnt) is False:
  168. Common.logger(log_type).info('向上滑动页面')
  169. driver.swipe(500, 1000, 500, 300, 300)
  170. Common.logger(log_type).info('不满足抓取规则\n')
  171. # 已下载表去重
  172. elif str(video_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'c77cf9') for x in y]:
  173. Common.logger(log_type).info('向上滑动页面')
  174. driver.swipe(500, 1000, 500, 300, 300)
  175. Common.logger(log_type).info('视频已下载\n')
  176. # feeds 表去重
  177. elif str(video_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy') for x in y]:
  178. Common.logger(log_type).info('向上滑动页面')
  179. driver.swipe(500, 1000, 500, 300, 300)
  180. Common.logger(log_type).info('视频已存在\n')
  181. # 分享给 windows 爬虫机
  182. else:
  183. share_id.click()
  184. driver.find_element(By.XPATH, '//*[@text="转发给朋友"]').click()
  185. driver.find_element(By.XPATH, '//*[@text="爬虫群"]').click()
  186. driver.find_element(By.ID, 'com.tencent.mm:id/guw').click()
  187. Click.click_video(log_type, video_title)
  188. time.sleep(1)
  189. urls = GetUrl.get_url(log_type)
  190. if urls is None:
  191. Common.logger(log_type).info('未获取到视频 URL')
  192. else:
  193. video_url = urls[0]
  194. cover_url = urls[1]
  195. # 把视频信息写入飞书feeds文档
  196. Feishu.insert_columns(log_type, 'shipinhao', 'FSDlBy', 'ROWS', 1, 2)
  197. get_feeds_time = int(time.time())
  198. values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
  199. '推荐榜',
  200. str(video_title),
  201. duration,
  202. like_cnt,
  203. share_cnt,
  204. favorite_cnt,
  205. comment_cnt,
  206. str(user_name),
  207. cover_url,
  208. video_url]]
  209. time.sleep(1)
  210. Feishu.update_values(log_type, 'shipinhao', 'FSDlBy', 'A2:Z2', values)
  211. Common.logger(log_type).info('向上滑动页面')
  212. driver.swipe(500, 1000, 500, 300, 300)
  213. Common.logger(log_type).info('视频信息写入飞书文档成功\n')
  214. if __name__ == '__main__':
  215. Recommend.start_wechat('recommend')
  216. pass