xinshi_app.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/10/26
  4. import difflib
  5. import os
  6. import sys
  7. import time
  8. from appium import webdriver
  9. from appium.webdriver.extensions.android.nativekey import AndroidKey
  10. from appium.webdriver.webdriver import WebDriver
  11. from selenium.common import NoSuchElementException
  12. from selenium.webdriver.common.by import By
  13. sys.path.append(os.getcwd())
  14. from main.common import Common
  15. from main.feishu_lib import Feishu
  16. from xinshi.xinshi_publish import Publish
  17. class XinshiAPP:
  18. i = 0
  19. @classmethod
  20. def start_wechat(cls, log_type, env):
  21. try:
  22. Common.logger(log_type).info('启动微信')
  23. caps = {
  24. "platformName": "Android", # 手机操作系统 Android / iOS
  25. "deviceName": "Android", # 连接的设备名(模拟器或真机),安卓可以随便写
  26. "platforVersion": "11", # 手机对应的系统版本(Android 11)
  27. "appPackage": "com.tencent.mm", # 被测APP的包名,乐活圈 Android
  28. "appActivity": ".ui.LauncherUI", # 启动的Activity名
  29. "autoGrantPermissions": True, # 让 appium 自动授权 base 权限,
  30. # 如果 noReset 为 True,则该条不生效(该参数为 Android 独有),对应的值为 True 或 False
  31. "unicodekeyboard": True, # 使用自带输入法,输入中文时填True
  32. "resetkeyboard": True, # 执行完程序恢复原来输入法
  33. "noReset": True, # 不重置APP
  34. "recreateChromeDriverSessions": True, # 切换到非 chrome-Driver 会 kill 掉 session,就不需要手动 kill 了
  35. "printPageSourceOnFailure": True, # 找不到元素时,appium log 会完整记录当前页面的 pagesource
  36. "newCommandTimeout": 6000, # 初始等待时间
  37. "automationName": "UiAutomator2", # 使用引擎,默认为 Appium,
  38. # 其中 Appium、UiAutomator2、Selendroid、Espresso 用于 Android,XCUITest 用于 iOS
  39. "showChromedriverLog": True,
  40. # "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
  41. "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
  42. 'enableWebviewDetailsCollection': True,
  43. 'setWebContentsDebuggingEnabled': True,
  44. # 'chromedriverExecutable': '/Users/wangkun/Downloads/chromedriver_v86/chromedriver',
  45. 'chromedriverExecutable': '/Users/lieyunye/Downloads/chromedriver_v86/chromedriver',
  46. }
  47. driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
  48. driver.implicitly_wait(10)
  49. time.sleep(5)
  50. # 搜索 新视-热门
  51. cls.search_video(log_type, driver, 'gO4Sn4', env)
  52. # 搜索 新视-推荐
  53. cls.search_video(log_type, driver, 'aOjaIU', env)
  54. except Exception as e:
  55. Common.logger(log_type).error('start_wechat异常,重启APP:{}\n', e)
  56. cls.start_wechat(log_type, env)
  57. @classmethod
  58. def search_element(cls, log_type, driver: WebDriver, element):
  59. try:
  60. windowHandles = driver.window_handles
  61. # 遍历所有的handles,找到当前页面所在的handle:如果pageSource有包含你想要的元素,就是所要找的handle
  62. # 小程序的页面来回切换也需要:遍历所有的handles,切换到元素所在的handle
  63. for handle in windowHandles:
  64. driver.switch_to.window(handle)
  65. time.sleep(3)
  66. if len(driver.find_elements(By.XPATH, element)) != 0:
  67. return driver.find_element(By.XPATH, element)
  68. else:
  69. pass
  70. except Exception as e:
  71. Common.logger(log_type).warning('search_element异常:{}\n', e)
  72. @classmethod
  73. def search_video(cls, log_type, driver: WebDriver, sheetid, env):
  74. try:
  75. sheet = Feishu.get_values_batch(log_type, 'shipinhao', sheetid)
  76. if len(sheet) == 1:
  77. Common.logger(log_type).info('暂无数据\n')
  78. return
  79. else:
  80. for i in range(1, len(sheet)):
  81. driver.implicitly_wait(10)
  82. try:
  83. video_title = sheet[i][2]
  84. video_user = sheet[i][3]
  85. Common.logger(log_type).info('点击搜索按钮')
  86. driver.find_element(By.ID, 'com.tencent.mm:id/j5t').click()
  87. Common.logger(log_type).info('输入视频标题:{}', video_title)
  88. time.sleep(3)
  89. driver.find_element(By.ID, 'com.tencent.mm:id/cd7').clear().send_keys(video_title[:40])
  90. Common.logger(log_type).info('点击搜索')
  91. driver.press_keycode(AndroidKey.SEARCH)
  92. driver.find_element(By.ID, 'com.tencent.mm:id/lm0').click()
  93. Common.logger(log_type).info('切换到webview')
  94. webview = driver.contexts
  95. driver.switch_to.context(webview[1])
  96. time.sleep(5)
  97. video_list = cls.search_element(log_type, driver, '//div[@class="unit"]/*[2]')
  98. Common.logger(log_type).info('点击"视频"分类')
  99. video_list.click()
  100. time.sleep(8)
  101. # webview搜索并点击目标视频
  102. while True:
  103. cls.i += 1
  104. title_element = cls.search_element(
  105. log_type, driver,
  106. '//div[@class="double-rich double-rich_vertical"]'
  107. '/*['+str(cls.i)+']//div[@class="title ellipsis_2"]')
  108. driver.execute_script(
  109. "arguments[0].scrollIntoView({block:'center',inline:'center'})", title_element)
  110. download_title = title_element.text
  111. download_user = cls.search_element(
  112. log_type, driver,
  113. '//div[@class="double-rich double-rich_vertical"]'
  114. '/*[' + str(cls.i) + ']//div[@class="vc-source"]').text.split('\n')[0]
  115. Common.logger(log_type).info('title:{}', download_title)
  116. Common.logger(log_type).info('user:{}\n', download_user)
  117. # 标题相似度>=50%, 用户名相似度>=80%
  118. # if video_title[:30] in download_title and video_user in download_user:
  119. # if difflib.SequenceMatcher(None, video_title, download_title).quick_ratio() >= 0.5 \
  120. # and difflib.SequenceMatcher(None, video_user, download_user).quick_ratio() >= 0.8:
  121. if difflib.SequenceMatcher(None, video_user, download_user).quick_ratio() >= 0.8:
  122. Common.logger(log_type).info('点击进入该视频详情')
  123. title_element.click()
  124. # 获取视频播放数据及播放地址
  125. cls.get_video_info(log_type, driver, sheetid)
  126. break
  127. elif cls.search_element(log_type, driver, '//span[@class="page-end"]'):
  128. Feishu.dimension_range(log_type, 'shipinhao', sheetid, 'ROWS', i + 1, i + 1)
  129. Common.logger(log_type).info('未搜索到该视频,删除视频信息\n')
  130. break
  131. else:
  132. pass
  133. except Exception as e:
  134. Common.logger(log_type).error('获取视频列表数据异常,重启微信:{}\n', e)
  135. # cls.start_wechat(log_type, env)
  136. # 下载/上传
  137. cls.xinshi_download_publish(log_type, sheetid, env)
  138. except Exception as e:
  139. Common.logger(log_type).error('search_video异常:{}\n', e)
  140. @classmethod
  141. def get_video_info(cls, log_type, driver: WebDriver, sheetid):
  142. try:
  143. # webview = driver.contexts
  144. Common.logger(log_type).info('切回NATIVE_APP\n')
  145. driver.switch_to.context('NATIVE_APP')
  146. # 点击播放器,获取视频时长
  147. # Common.logger(log_type).info('暂停播放')
  148. pause_btn = driver.find_element(By.ID, 'com.tencent.mm:id/eh4')
  149. pause_btn.click()
  150. start_time = driver.find_element(By.ID, 'com.tencent.mm:id/l59').get_attribute('name')
  151. start_time = int(start_time.split(':')[0]) * 60 + int(start_time.split(':')[-1])
  152. try:
  153. end_time = driver.find_element(By.ID, 'com.tencent.mm:id/l7i').get_attribute('name')
  154. except NoSuchElementException:
  155. end_time = driver.find_element(By.ID, 'com.tencent.mm:id/g73').get_attribute('name')
  156. end_time = int(end_time.split(':')[0]) * 60 + int(end_time.split(':')[-1])
  157. duration = start_time + end_time
  158. # 点赞
  159. like_id = driver.find_element(By.ID, 'com.tencent.mm:id/k04')
  160. like_cnt = like_id.get_attribute('name')
  161. if like_cnt == "" or like_cnt == "喜欢":
  162. like_cnt = 0
  163. elif '万' in like_cnt:
  164. like_cnt = float(like_cnt.split('万')[0]) * 10000
  165. elif '万+' in like_cnt:
  166. like_cnt = float(like_cnt.split('万+')[0]) * 10000
  167. else:
  168. like_cnt = float(like_cnt)
  169. # 分享
  170. share_id = driver.find_element(By.ID, 'com.tencent.mm:id/jhv')
  171. share_cnt = share_id.get_attribute('name')
  172. if share_cnt == "" or share_cnt == "转发":
  173. share_cnt = 0
  174. elif '万' in share_cnt:
  175. share_cnt = float(share_cnt.split('万')[0]) * 10000
  176. elif '万+' in share_cnt:
  177. share_cnt = float(share_cnt.split('万+')[0]) * 10000
  178. else:
  179. share_cnt = float(share_cnt)
  180. # 收藏
  181. favorite_id = driver.find_element(By.ID, 'com.tencent.mm:id/fnp')
  182. favorite_cnt = favorite_id.get_attribute('name')
  183. if favorite_cnt == "" or favorite_cnt == "收藏":
  184. favorite_cnt = 0
  185. elif '万' in favorite_cnt:
  186. favorite_cnt = float(favorite_cnt.split('万')[0]) * 10000
  187. elif '万+' in favorite_cnt:
  188. favorite_cnt = float(favorite_cnt.split('万+')[0]) * 10000
  189. else:
  190. favorite_cnt = float(favorite_cnt)
  191. # 评论
  192. comment_id = driver.find_element(By.ID, 'com.tencent.mm:id/bje')
  193. comment_cnt = comment_id.get_attribute('name')
  194. if comment_cnt == "" or comment_cnt == "评论":
  195. comment_cnt = 0
  196. elif '万' in comment_cnt:
  197. comment_cnt = float(comment_cnt.split('万')[0]) * 10000
  198. elif '万+' in comment_cnt:
  199. comment_cnt = float(comment_cnt.split('万+')[0]) * 10000
  200. else:
  201. comment_cnt = float(comment_cnt)
  202. # 把视频信息写入飞书feeds文档
  203. values = [[duration,
  204. like_cnt,
  205. share_cnt,
  206. favorite_cnt,
  207. comment_cnt]]
  208. time.sleep(1)
  209. Feishu.update_values(log_type, 'shipinhao', sheetid, 'E2:I2', values)
  210. Common.logger(log_type).info('视频信息写入飞书文档成功\n')
  211. # 分享给 windows 爬虫机
  212. share_id.click()
  213. driver.find_element(By.XPATH, '//*[@text="转发给朋友"]').click()
  214. driver.find_element(By.XPATH, '//*[@text="爬虫群"]').click()
  215. driver.find_element(By.ID, 'com.tencent.mm:id/guw').click()
  216. while True:
  217. if Feishu.get_values_batch(log_type, 'shipinhao', sheetid)[1][11] is None:
  218. Common.logger(log_type).info('等待更新 URL 信息')
  219. time.sleep(10)
  220. else:
  221. Common.logger(log_type).info('URL 信息已更新\n')
  222. break
  223. except Exception as e:
  224. Common.logger(log_type).error('get_video_info异常:{}\n', e)
  225. @classmethod
  226. def xinshi_download_publish(cls, log_type, sheetid, env):
  227. try:
  228. download_sheet = Feishu.get_values_batch(log_type, 'shipinhao', sheetid)
  229. for i in range(1, len(download_sheet)):
  230. download_title = download_sheet[i][2].strip().replace('"', '') \
  231. .replace('“', '').replace('“', '…').replace("\n", "") \
  232. .replace("/", "").replace("\r", "").replace("#", "") \
  233. .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
  234. .replace(":", "").replace("*", "").replace("?", "") \
  235. .replace("?", "").replace('"', "").replace("<", "") \
  236. .replace(">", "").replace("|", "").replace(" ", "")
  237. download_duration = download_sheet[i][4]
  238. download_like_cnt = download_sheet[i][5]
  239. download_share_cnt = download_sheet[i][6]
  240. download_favorite_cnt = download_sheet[i][7]
  241. download_comment_cnt = download_sheet[i][8]
  242. download_username = download_sheet[i][3]
  243. download_head_url = download_sheet[i][9]
  244. download_cover_url = download_sheet[i][10]
  245. download_video_url = download_sheet[i][11]
  246. Common.logger(log_type).info("download_title:{}", download_title)
  247. Common.logger(log_type).info("download_username:{}", download_username)
  248. Common.logger(log_type).info("download_video_url:{}", download_video_url)
  249. if download_title is None or download_duration is None or download_video_url is None:
  250. Feishu.dimension_range(log_type, 'shipinhao', sheetid, 'ROWS', i + 1, i + 1)
  251. Common.logger(log_type).info('空行,删除成功\n')
  252. return
  253. elif str(download_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'c77cf9') for x
  254. in y]:
  255. Feishu.dimension_range(log_type, 'shipinhao', sheetid, 'ROWS', i + 1, i + 1)
  256. Common.logger(log_type).info('视频已下载,删除成功\n')
  257. return
  258. elif str(download_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'WAG7Dq') for x
  259. in y]:
  260. Feishu.dimension_range(log_type, 'shipinhao', sheetid, 'ROWS', i + 1, i + 1)
  261. Common.logger(log_type).info('视频已下载,删除成功\n')
  262. return
  263. elif str(download_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', '0i4jmV') for x
  264. in y]:
  265. Feishu.dimension_range(log_type, 'shipinhao', sheetid, 'ROWS', i + 1, i + 1)
  266. Common.logger(log_type).info('视频已下载,删除成功\n')
  267. return
  268. elif str(download_title) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'c77cf9') for x
  269. in y]:
  270. Feishu.dimension_range(log_type, 'shipinhao', sheetid, 'ROWS', i + 1, i + 1)
  271. Common.logger(log_type).info('视频已存在,删除成功\n')
  272. return
  273. else:
  274. # 下载封面
  275. Common.download_method(log_type=log_type, text="cover",
  276. d_name=str(download_title), d_url=str(download_cover_url))
  277. # 下载视频
  278. Common.download_method(log_type=log_type, text="video",
  279. d_name=str(download_title), d_url=str(download_video_url))
  280. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  281. with open("./videos/" + download_title
  282. + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
  283. f_a.write('shipinhao' + str(int(time.time())) + "\n" +
  284. str(download_title) + "\n" +
  285. str(download_duration) + "\n" +
  286. str(download_favorite_cnt) + "\n" +
  287. str(download_comment_cnt) + "\n" +
  288. str(download_like_cnt) + "\n" +
  289. str(download_share_cnt) + "\n" +
  290. str(1920 * 1080) + "\n" +
  291. str(int(time.time())) + "\n" +
  292. str(download_username) + "\n" +
  293. str(download_head_url) + "\n" +
  294. str(download_video_url) + "\n" +
  295. str(download_cover_url) + "\n" +
  296. "shipinhao")
  297. Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
  298. Common.logger(log_type).info("开始上传视频:{}".format(download_title))
  299. if env == 'dev' and sheetid == 'gO4Sn4':
  300. our_video_id = Publish.upload_and_publish(log_type, env, "xinshi_hot")
  301. our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/"+str(our_video_id)+"/info"
  302. # 视频ID工作表,插入首行
  303. Feishu.insert_columns(log_type, "shipinhao", '0i4jmV', "ROWS", 1, 2)
  304. # 视频ID工作表,首行写入数据
  305. upload_time = int(time.time())
  306. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  307. "新视-热门榜",
  308. str(download_title),
  309. our_video_link,
  310. download_duration,
  311. download_like_cnt,
  312. download_share_cnt,
  313. download_favorite_cnt,
  314. download_comment_cnt,
  315. download_username,
  316. str(download_head_url),
  317. str(download_cover_url),
  318. str(download_video_url)]]
  319. time.sleep(1)
  320. Feishu.update_values(log_type, "shipinhao", '0i4jmV', "F2:V2", values)
  321. # 删除行或列,可选 ROWS、COLUMNS
  322. time.sleep(1)
  323. Feishu.dimension_range(log_type, "shipinhao", sheetid, "ROWS", i + 1, i + 1)
  324. Common.logger(log_type).info("视频上传完成:{}\n", our_video_link)
  325. return
  326. elif env == 'dev' and sheetid == 'aOjaIU':
  327. our_video_id = Publish.upload_and_publish(log_type, env, "xinshi_recommend")
  328. our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/"+str(our_video_id)+"/info"
  329. # 视频ID工作表,插入首行
  330. Feishu.insert_columns(log_type, "shipinhao", 'WAG7Dq', "ROWS", 1, 2)
  331. # 视频ID工作表,首行写入数据
  332. upload_time = int(time.time())
  333. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  334. "新视-推荐榜",
  335. str(download_title),
  336. our_video_link,
  337. download_duration,
  338. download_like_cnt,
  339. download_share_cnt,
  340. download_favorite_cnt,
  341. download_comment_cnt,
  342. download_username,
  343. str(download_head_url),
  344. str(download_cover_url),
  345. str(download_video_url)]]
  346. time.sleep(1)
  347. Feishu.update_values(log_type, "shipinhao", 'WAG7Dq', "F2:V2", values)
  348. # 删除行或列,可选 ROWS、COLUMNS
  349. time.sleep(1)
  350. Feishu.dimension_range(log_type, "shipinhao", sheetid, "ROWS", i + 1, i + 1)
  351. Common.logger(log_type).info("视频上传完成:{}\n", our_video_link)
  352. return
  353. elif env == 'prod' and sheetid == 'gO4Sn4':
  354. our_video_id = Publish.upload_and_publish(log_type, env, "xinshi_hot")
  355. our_video_link = "https://admin.piaoquantv.com/cms/post-detail/"+str(our_video_id)+"/info"
  356. # 视频ID工作表,插入首行
  357. Feishu.insert_columns(log_type, "shipinhao", '0i4jmV', "ROWS", 1, 2)
  358. # 视频ID工作表,首行写入数据
  359. upload_time = int(time.time())
  360. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  361. "新视-热门榜",
  362. str(download_title),
  363. our_video_link,
  364. download_duration,
  365. download_like_cnt,
  366. download_share_cnt,
  367. download_favorite_cnt,
  368. download_comment_cnt,
  369. download_username,
  370. str(download_head_url),
  371. str(download_cover_url),
  372. str(download_video_url)]]
  373. time.sleep(1)
  374. Feishu.update_values(log_type, "shipinhao", '0i4jmV', "F2:V2", values)
  375. # 删除行或列,可选 ROWS、COLUMNS
  376. time.sleep(1)
  377. Feishu.dimension_range(log_type, "shipinhao", sheetid, "ROWS", i + 1, i + 1)
  378. Common.logger(log_type).info("视频上传完成:{}\n", our_video_link)
  379. return
  380. elif env == 'prod' and sheetid == 'aOjaIU':
  381. our_video_id = Publish.upload_and_publish(log_type, env, "xinshi_recommend")
  382. our_video_link = "https://admin.piaoquantv.com/cms/post-detail/"+str(our_video_id)+"/info"
  383. # 视频ID工作表,插入首行
  384. Feishu.insert_columns(log_type, "shipinhao", 'WAG7Dq', "ROWS", 1, 2)
  385. # 视频ID工作表,首行写入数据
  386. upload_time = int(time.time())
  387. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  388. "新视-热门榜",
  389. str(download_title),
  390. our_video_link,
  391. download_duration,
  392. download_like_cnt,
  393. download_share_cnt,
  394. download_favorite_cnt,
  395. download_comment_cnt,
  396. download_username,
  397. str(download_head_url),
  398. str(download_cover_url),
  399. str(download_video_url)]]
  400. time.sleep(1)
  401. Feishu.update_values(log_type, "shipinhao", 'WAG7Dq', "F2:V2", values)
  402. # 删除行或列,可选 ROWS、COLUMNS
  403. time.sleep(1)
  404. Feishu.dimension_range(log_type, "shipinhao", sheetid, "ROWS", i + 1, i + 1)
  405. Common.logger(log_type).info("视频上传完成:{}\n", our_video_link)
  406. return
  407. else:
  408. our_video_id = Publish.upload_and_publish(log_type, env, "xinshi_hot")
  409. our_video_link = "https://admin.piaoquantv.com/cms/post-detail/"+str(our_video_id)+"/info"
  410. # 视频ID工作表,插入首行
  411. Feishu.insert_columns(log_type, "shipinhao", 'WAG7Dq', "ROWS", 1, 2)
  412. # 视频ID工作表,首行写入数据
  413. upload_time = int(time.time())
  414. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  415. "新视-热门榜",
  416. str(download_title),
  417. our_video_link,
  418. download_duration,
  419. download_like_cnt,
  420. download_share_cnt,
  421. download_favorite_cnt,
  422. download_comment_cnt,
  423. download_username,
  424. str(download_head_url),
  425. str(download_cover_url),
  426. str(download_video_url)]]
  427. time.sleep(1)
  428. Feishu.update_values(log_type, "shipinhao", 'WAG7Dq', "F2:V2", values)
  429. # 删除行或列,可选 ROWS、COLUMNS
  430. time.sleep(1)
  431. Feishu.dimension_range(log_type, "shipinhao", sheetid, "ROWS", i + 1, i + 1)
  432. Common.logger(log_type).info("视频上传完成:{}\n", our_video_link)
  433. return
  434. except Exception as e:
  435. Feishu.dimension_range(log_type, "shipinhao", "FSDlBy", "ROWS", 2, 2)
  436. Common.logger(log_type).error('download_publish异常,删除视频信息成功:{}\n', e)
  437. if __name__ == '__main__':
  438. XinshiAPP.start_wechat('xinshi', 'dev')
  439. pass