shipinhao_recommend.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416
  1. # @Author: wangkun
  2. # @Time: 3月 02, 2022
  3. import datetime
  4. import os
  5. import sys
  6. import time
  7. from appium import webdriver
  8. from appium.webdriver.webdriver import WebDriver
  9. from selenium.common.exceptions import NoSuchElementException
  10. from selenium.webdriver.common.by import By
  11. sys.path.append(os.getcwd())
  12. from main.common import Common
  13. from shipinhao.shipinhao_publish import Publish
  14. from main.feishu_lib import Feishu
  15. class Recommend:
  16. # 当日已下载数量
  17. download_cnt = []
  18. # 下载/上传指定数量的视频
  19. @classmethod
  20. def run_recommend(cls, log_type, env):
  21. try:
  22. while True:
  23. if len(cls.download_cnt) >= 100 or datetime.datetime.now().hour == 17:
  24. Common.logger('recommend').info('已下载{}条视频\n', len(cls.download_cnt))
  25. cls.download_cnt = []
  26. return
  27. else:
  28. cls.start_wechat(log_type, env)
  29. except Exception as e:
  30. Common.logger(log_type).error('run_recommend异常:{}\n', e)
  31. # 启动微信,并打开视频号
  32. @classmethod
  33. def start_wechat(cls, log_type, env):
  34. try:
  35. Common.logger(log_type).info('启动微信')
  36. caps = {
  37. "platformName": "Android", # 手机操作系统 Android / iOS
  38. "deviceName": "Android", # 连接的设备名(模拟器或真机),安卓可以随便写
  39. "platforVersion": "11", # 手机对应的系统版本(Android 11)
  40. "appPackage": "com.tencent.mm", # 被测APP的包名,乐活圈 Android
  41. "appActivity": ".ui.LauncherUI", # 启动的Activity名
  42. "autoGrantPermissions": "true", # 让 appium 自动授权 base 权限,
  43. # 如果 noReset 为 True,则该条不生效(该参数为 Android 独有),对应的值为 True 或 False
  44. "unicodekeyboard": True, # 使用自带输入法,输入中文时填True
  45. "resetkeyboard": True, # 执行完程序恢复原来输入法
  46. "noReset": True, # 不重置APP
  47. "printPageSourceOnFailure": True, # 找不到元素时,appium log 会完整记录当前页面的 pagesource
  48. "newCommandTimeout": 6000, # 初始等待时间
  49. "automationName": "UiAutomator2" # 使用引擎,默认为 Appium,
  50. # 其中 Appium、UiAutomator2、Selendroid、Espresso 用于 Android,XCUITest 用于 iOS
  51. }
  52. driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
  53. driver.implicitly_wait(10)
  54. Common.logger(log_type).info('点击发现TAB')
  55. driver.find_elements(By.ID, 'com.tencent.mm:id/f2s')[2].click()
  56. Common.logger(log_type).info('点击视频号\n')
  57. driver.find_elements(By.ID, 'com.tencent.mm:id/gv6')[1].click()
  58. time.sleep(5)
  59. cls.get_feeds(log_type, driver, env)
  60. Common.logger(log_type).info('休眠 3s')
  61. time.sleep(3)
  62. cls.quit(log_type, driver)
  63. except Exception as e:
  64. Common.logger(log_type).error('start_wechat异常:{}\n', e)
  65. # 退出 APP
  66. @classmethod
  67. def quit(cls, log_type, driver: WebDriver):
  68. driver.quit()
  69. Common.logger(log_type).info('退出 APP 成功\n')
  70. # 下载规则
  71. @staticmethod
  72. def download_rule(duration, like_cnt, share_cnt, favorite_cnt, comment_cnt):
  73. if int(duration) >= 30:
  74. if int(like_cnt) >= 1000:
  75. if int(share_cnt) >= 0:
  76. if int(favorite_cnt) >= 0:
  77. if int(comment_cnt) >= 0:
  78. return True
  79. else:
  80. return False
  81. else:
  82. return False
  83. else:
  84. return False
  85. else:
  86. return False
  87. else:
  88. return False
  89. # 操作安卓手机,自己滑动首页视频,并获取视频信息
  90. @classmethod
  91. def get_feeds(cls, log_type, driver: WebDriver, env):
  92. try:
  93. for i in range(5):
  94. driver.implicitly_wait(10)
  95. # 视频标题
  96. try:
  97. title_id = driver.find_element(By.ID, 'com.tencent.mm:id/ki5')
  98. video_title = title_id.get_attribute('name').split('\n')[0].replace('#', '').strip()
  99. except NoSuchElementException:
  100. video_title = ''
  101. driver.swipe(10, 1600, 10, 300, 200)
  102. # 点击播放器,获取视频时长
  103. # Common.logger(log_type).info('暂停播放')
  104. pause_btn = driver.find_element(By.ID, 'com.tencent.mm:id/eh4')
  105. pause_btn.click()
  106. start_time = driver.find_element(By.ID, 'com.tencent.mm:id/l59').get_attribute('name')
  107. start_time = int(start_time.split(':')[0]) * 60 + int(start_time.split(':')[-1])
  108. try:
  109. end_time = driver.find_element(By.ID, 'com.tencent.mm:id/l7i').get_attribute('name')
  110. except NoSuchElementException:
  111. end_time = driver.find_element(By.ID, 'com.tencent.mm:id/g73').get_attribute('name')
  112. end_time = int(end_time.split(':')[0]) * 60 + int(end_time.split(':')[-1])
  113. duration = start_time + end_time
  114. # 点赞
  115. like_id = driver.find_element(By.ID, 'com.tencent.mm:id/k04')
  116. like_cnt = like_id.get_attribute('name')
  117. if like_cnt == "" or like_cnt == "喜欢":
  118. like_cnt = 0
  119. elif '万' in like_cnt:
  120. like_cnt = float(like_cnt.split('万')[0]) * 10000
  121. elif '万+' in like_cnt:
  122. like_cnt = float(like_cnt.split('万+')[0]) * 10000
  123. else:
  124. like_cnt = float(like_cnt)
  125. # 分享
  126. share_id = driver.find_element(By.ID, 'com.tencent.mm:id/jhv')
  127. share_cnt = share_id.get_attribute('name')
  128. if share_cnt == "" or share_cnt == "转发":
  129. share_cnt = 0
  130. elif '万' in share_cnt:
  131. share_cnt = float(share_cnt.split('万')[0]) * 10000
  132. elif '万+' in share_cnt:
  133. share_cnt = float(share_cnt.split('万+')[0]) * 10000
  134. else:
  135. share_cnt = float(share_cnt)
  136. # 收藏
  137. favorite_id = driver.find_element(By.ID, 'com.tencent.mm:id/fnp')
  138. favorite_cnt = favorite_id.get_attribute('name')
  139. if favorite_cnt == "" or favorite_cnt == "收藏":
  140. favorite_cnt = 0
  141. elif '万' in favorite_cnt:
  142. favorite_cnt = float(favorite_cnt.split('万')[0]) * 10000
  143. elif '万+' in favorite_cnt:
  144. favorite_cnt = float(favorite_cnt.split('万+')[0]) * 10000
  145. else:
  146. favorite_cnt = float(favorite_cnt)
  147. # 评论
  148. comment_id = driver.find_element(By.ID, 'com.tencent.mm:id/bje')
  149. comment_cnt = comment_id.get_attribute('name')
  150. if comment_cnt == "" or comment_cnt == "评论":
  151. comment_cnt = 0
  152. elif '万' in comment_cnt:
  153. comment_cnt = float(comment_cnt.split('万')[0]) * 10000
  154. elif '万+' in comment_cnt:
  155. comment_cnt = float(comment_cnt.split('万+')[0]) * 10000
  156. else:
  157. comment_cnt = float(comment_cnt)
  158. # 用户名
  159. username_id = driver.find_element(By.ID, 'com.tencent.mm:id/hft')
  160. user_name = username_id.get_attribute('name')
  161. Common.logger(log_type).info('video_title:{}', video_title)
  162. Common.logger(log_type).info('duration:{}', duration)
  163. Common.logger(log_type).info('like_cnt:{}', like_cnt)
  164. Common.logger(log_type).info('share_cnt:{}', share_cnt)
  165. Common.logger(log_type).info('favorite_cnt:{}', favorite_cnt)
  166. Common.logger(log_type).info('comment_cnt:{}', comment_cnt)
  167. Common.logger(log_type).info('user_name:{}', user_name)
  168. # 判断无效视频
  169. if video_title == '' or user_name == '':
  170. Common.logger(log_type).info('无效视频,滑动到下一个视频\n')
  171. driver.swipe(10, 1600, 10, 300, 200)
  172. # 判断下载规则
  173. elif cls.download_rule(duration, like_cnt, share_cnt, favorite_cnt, comment_cnt) is False:
  174. Common.logger(log_type).info('不满足抓取规则,滑动到下一个视频\n')
  175. driver.swipe(10, 1600, 10, 300, 200)
  176. # 过滤词库(视频标题)
  177. elif any(word if word in video_title else False for word in cls.filter_words(log_type)) is True:
  178. Common.logger(log_type).info('视频已中过滤词:{}\n', video_title)
  179. driver.swipe(10, 1600, 10, 300, 200)
  180. # 黑名单(用户名)
  181. elif any(word if word in user_name else False for word in cls.username_blacklist(log_type)) is True:
  182. Common.logger(log_type).info('用户名已中过滤词:{}\n', user_name)
  183. driver.swipe(10, 1600, 10, 300, 200)
  184. # 连续下载判断
  185. elif user_name in Feishu.get_values_batch('recommend', 'shipinhao', 'c77cf9')[1][14] \
  186. and user_name in Feishu.get_values_batch('recommend', 'shipinhao', 'c77cf9')[2][14]:
  187. Common.logger(log_type).info('该用户已连续下载2条视频,滑动到下一个视频\n')
  188. driver.swipe(10, 1600, 10, 300, 200)
  189. # 视频号推荐_已下载表
  190. elif str(video_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'c77cf9') for x in
  191. y]:
  192. Common.logger(log_type).info('视频已下载,滑动到下一个视频\n')
  193. driver.swipe(10, 1600, 10, 300, 200)
  194. # 新视推荐_已下载表
  195. elif str(video_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'WAG7Dq') for x in
  196. y]:
  197. Common.logger(log_type).info('视频已下载,滑动到下一个视频\n')
  198. driver.swipe(10, 1600, 10, 300, 200)
  199. # 新视热门_已下载表
  200. elif str(video_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', '0i4jmV') for x
  201. in y]:
  202. Common.logger(log_type).info('视频已下载,滑动到下一个视频\n')
  203. driver.swipe(10, 1600, 10, 300, 200)
  204. # feeds 表去重
  205. elif str(video_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy') for x in
  206. y]:
  207. Common.logger(log_type).info('视频已存在,滑动到下一个视频\n')
  208. driver.swipe(10, 1600, 10, 300, 200)
  209. # 分享给 windows 爬虫机
  210. else:
  211. share_id.click()
  212. driver.find_element(By.XPATH, '//*[@text="转发给朋友"]').click()
  213. driver.find_element(By.XPATH, '//*[@text="爬虫群"]').click()
  214. driver.find_element(By.ID, 'com.tencent.mm:id/guw').click()
  215. # 把视频信息写入飞书feeds文档
  216. Feishu.insert_columns(log_type, 'shipinhao', 'FSDlBy', 'ROWS', 1, 2)
  217. get_feeds_time = int(time.time())
  218. values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
  219. '推荐榜',
  220. str(video_title),
  221. duration,
  222. like_cnt,
  223. share_cnt,
  224. favorite_cnt,
  225. comment_cnt,
  226. str(user_name)]]
  227. time.sleep(1)
  228. Feishu.update_values(log_type, 'shipinhao', 'FSDlBy', 'A2:Z2', values)
  229. Common.logger(log_type).info('视频信息写入飞书文档成功\n')
  230. while True:
  231. if Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][11] is None:
  232. Common.logger(log_type).info('等待更新 URL 信息')
  233. time.sleep(10)
  234. else:
  235. Common.logger(log_type).info('URL 信息已更新\n')
  236. break
  237. cls.download_publish(log_type, env)
  238. driver.swipe(10, 1600, 10, 300, 200)
  239. except Exception as e:
  240. Common.logger(log_type).error('get_feeds异常,滑动到下一个视频\n', e)
  241. Feishu.dimension_range(log_type, "shipinhao", "FSDlBy", "ROWS", 2, 2)
  242. driver.swipe(10, 1600, 10, 300, 200)
  243. # 下载 、上传
  244. @classmethod
  245. def download_publish(cls, log_type, env):
  246. try:
  247. recommend_feeds_sheet = Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')
  248. for i in range(1, len(recommend_feeds_sheet)):
  249. download_title = recommend_feeds_sheet[i][2].strip().replace('"', '') \
  250. .replace('“', '').replace('“', '…').replace("\n", "") \
  251. .replace("/", "").replace("\r", "").replace("#", "") \
  252. .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
  253. .replace(":", "").replace("*", "").replace("?", "") \
  254. .replace("?", "").replace('"', "").replace("<", "") \
  255. .replace(">", "").replace("|", "").replace(" ", "")
  256. download_duration = recommend_feeds_sheet[i][3]
  257. download_like_cnt = recommend_feeds_sheet[i][4]
  258. download_share_cnt = recommend_feeds_sheet[i][5]
  259. download_favorite_cnt = recommend_feeds_sheet[i][6]
  260. download_comment_cnt = recommend_feeds_sheet[i][7]
  261. download_username = recommend_feeds_sheet[i][8]
  262. download_head_url = recommend_feeds_sheet[i][9]
  263. download_cover_url = recommend_feeds_sheet[i][10]
  264. download_video_url = recommend_feeds_sheet[i][11]
  265. Common.logger(log_type).info("download_title:{}", download_title)
  266. Common.logger(log_type).info("download_username:{}", download_username)
  267. Common.logger(log_type).info("download_video_url:{}", download_video_url)
  268. if download_title is None or download_duration is None or download_video_url is None:
  269. Feishu.dimension_range(log_type, 'shipinhao', 'FSDlBy', 'ROWS', i + 1, i + 1)
  270. Common.logger(log_type).info('空行,删除成功\n')
  271. return
  272. elif str(download_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'c77cf9') for x
  273. in y]:
  274. Feishu.dimension_range(log_type, 'shipinhao', 'FSDlBy', 'ROWS', i + 1, i + 1)
  275. Common.logger(log_type).info('视频已下载,删除成功\n')
  276. return
  277. else:
  278. # 下载封面
  279. Common.download_method(log_type=log_type, text="cover",
  280. d_name=str(download_title), d_url=str(download_cover_url))
  281. # 下载视频
  282. Common.download_method(log_type=log_type, text="video",
  283. d_name=str(download_title), d_url=str(download_video_url))
  284. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  285. with open("./videos/" + download_title
  286. + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
  287. f_a.write('shipinhao' + str(int(time.time())) + "\n" +
  288. str(download_title) + "\n" +
  289. str(download_duration) + "\n" +
  290. str(download_favorite_cnt) + "\n" +
  291. str(download_comment_cnt) + "\n" +
  292. str(download_like_cnt) + "\n" +
  293. str(download_share_cnt) + "\n" +
  294. str(1920 * 1080) + "\n" +
  295. str(int(time.time())) + "\n" +
  296. str(download_username) + "\n" +
  297. str(download_head_url) + "\n" +
  298. str(download_video_url) + "\n" +
  299. str(download_cover_url) + "\n" +
  300. "shipinhao")
  301. Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
  302. Common.logger(log_type).info("开始上传视频:{}".format(download_title))
  303. our_video_id = Publish.upload_and_publish(log_type, env, "play")
  304. if env == 'dev':
  305. our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/" + str(
  306. our_video_id) + "/info"
  307. else:
  308. our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
  309. Common.logger(log_type).info("视频上传完成:{}", our_video_link)
  310. # 视频ID工作表,插入首行
  311. Feishu.insert_columns(log_type, "shipinhao", "c77cf9", "ROWS", 1, 2)
  312. # 视频ID工作表,首行写入数据
  313. upload_time = int(time.time())
  314. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  315. "推荐榜",
  316. str(download_title),
  317. our_video_link,
  318. download_duration,
  319. download_like_cnt,
  320. download_share_cnt,
  321. download_favorite_cnt,
  322. download_comment_cnt,
  323. download_username,
  324. str(download_head_url),
  325. str(download_cover_url),
  326. str(download_video_url)]]
  327. time.sleep(1)
  328. Feishu.update_values(log_type, "shipinhao", "c77cf9", "F2:V2", values)
  329. cls.download_cnt.append(download_title)
  330. # 删除行或列,可选 ROWS、COLUMNS
  331. time.sleep(1)
  332. Feishu.dimension_range(log_type, "shipinhao", "FSDlBy", "ROWS", i + 1, i + 1)
  333. Common.logger(log_type).info("下载/上传成功,滑动到下一个视频")
  334. return
  335. except Exception as e:
  336. Feishu.dimension_range(log_type, "shipinhao", "FSDlBy", "ROWS", 2, 2)
  337. Common.logger(log_type).error('download_publish异常,删除视频信息成功:{}\n', e)
  338. # 过滤词库
  339. @classmethod
  340. def filter_words(cls, log_type):
  341. try:
  342. filter_words_sheet = Feishu.get_values_batch(log_type, 'shipinhao', 'PYennl')
  343. filter_words_list = []
  344. for x in filter_words_sheet:
  345. for y in x:
  346. if y is None:
  347. pass
  348. else:
  349. filter_words_list.append(y)
  350. return filter_words_list
  351. except Exception as e:
  352. Common.logger(log_type).error('filter_words异常:{}\n', e)
  353. # 用户名黑名单
  354. @classmethod
  355. def username_blacklist(cls, log_type):
  356. try:
  357. username_blacklist_sheet = Feishu.get_values_batch(log_type, 'shipinhao', 'nkKrdl')
  358. username_blacklist_list = []
  359. for x in username_blacklist_sheet:
  360. for y in x:
  361. if y is None:
  362. pass
  363. else:
  364. username_blacklist_list.append(y)
  365. return username_blacklist_list
  366. except Exception as e:
  367. Common.logger(log_type).error('filter_words异常:{}\n', e)
  368. if __name__ == '__main__':
  369. # print(Recommend.username_blacklist('recommend'))
  370. Recommend.run_recommend('recommend', 'prod')
  371. pass