shipinhao_get_url.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/9/1
  4. import json
  5. import os
  6. import sys
  7. import time
  8. # import atomacos
  9. from appium import webdriver
  10. from selenium.webdriver.common.by import By
  11. sys.path.append(os.getcwd())
  12. # from crawler_shipinhao.main.common import Common
  13. # from crawler_shipinhao.main.feishu_lib import Feishu
  14. from main.feishu_lib import Feishu
  15. from main.common import Common
  16. class GetUrl:
  17. @classmethod
  18. def click_video(cls, log_type, video_title):
  19. Common.logger(log_type).info('启动"微信"')
  20. desired_caps = {'app': r"C:\Program Files (x86)\Tencent\WeChat\WeChat.exe"}
  21. driver = webdriver.Remote(
  22. command_executor='http://127.0.0.1:4723',
  23. desired_capabilities=desired_caps)
  24. driver.implicitly_wait(10)
  25. Common.logger(log_type).info('点击"聊天窗口"')
  26. print('点击"聊天窗口"')
  27. driver.find_element(By.NAME, '聊天').click()
  28. Common.logger(log_type).info('点击"爬虫群"')
  29. print('点击"爬虫群"')
  30. driver.find_element(By.NAME, '爬虫群').click()
  31. Common.logger(log_type).info('点击视频:{}', video_title)
  32. print(f'点击视频{video_title}')
  33. driver.find_elements(By.ID, '42.852020.3.487')[-1].click()
  34. # driver.find_element(By.NAME, video_title[:10]).click()
  35. # els = driver.find_elements(By.TAG_NAME, '按钮')
  36. # print(len(els))
  37. time.sleep(5)
  38. print('退出')
  39. driver.quit()
  40. # windows = driver.window_handles
  41. # driver.switch_to.window(windows[0])
  42. @classmethod
  43. def get_url(cls, log_type):
  44. try:
  45. # charles 抓包文件保存目录
  46. # charles_file_dir = r"./crawler_kanyikan_recommend/chlsfiles/"
  47. charles_file_dir = r"./chlsfiles/"
  48. if int(len(os.listdir(charles_file_dir))) == 1:
  49. Common.logger(log_type).info("未找到chlsfile文件,等待60s")
  50. time.sleep(60)
  51. else:
  52. # 目标文件夹下所有文件
  53. all_file = sorted(os.listdir(charles_file_dir))
  54. # 获取到目标文件
  55. old_file = all_file[-1]
  56. # 分离文件名与扩展名
  57. new_file = os.path.splitext(old_file)
  58. # 重命名文件后缀
  59. os.rename(os.path.join(charles_file_dir, old_file),
  60. os.path.join(charles_file_dir, new_file[0] + ".txt"))
  61. with open(charles_file_dir + new_file[0] + ".txt", encoding='utf-8-sig', errors='ignore') as f:
  62. contents = json.load(f, strict=False)
  63. video_url_list = []
  64. cover_url_list = []
  65. if "finder.video.qq.com" in [text['host'] for text in contents]:
  66. for text in contents:
  67. if text["host"] == "finder.video.qq.com" and text["path"] == "/251/20302/stodownload":
  68. video_url_list.append(text)
  69. elif text["host"] == "finder.video.qq.com" and text["path"] == "/251/20304/stodownload":
  70. cover_url_list.append(text)
  71. video_url = video_url_list[0]['host']+video_url_list[0]['path']+'?'+video_url_list[0]['query']
  72. cover_url = cover_url_list[0]['host']+cover_url_list[0]['path']+'?'+cover_url_list[0]['query']
  73. head_url = cover_url
  74. print(f'video_url:{video_url}')
  75. print(f'cover_url:{cover_url}')
  76. print(f'head_url:{head_url}')
  77. return video_url, cover_url, head_url
  78. else:
  79. Common.logger(log_type).info("未找到 url,10s后重新获取")
  80. time.sleep(10)
  81. cls.get_url(log_type)
  82. except Exception as e:
  83. Common.logger(log_type).exception("get_url异常:{}", e)
  84. return None
  85. @classmethod
  86. def write_url(cls, log_type):
  87. while True:
  88. if Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][11] is None:
  89. # Common.logger(log_type).info('开始点击分享的视频')
  90. print(f"开始点击分享的视频:{Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][2]}")
  91. cls.click_video(log_type, Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][2])
  92. time.sleep(60)
  93. Common.logger(log_type).info('获取视频头像/封面/播放地址')
  94. print('获取视频头像/封面/播放地址')
  95. urls = cls.get_url(log_type)
  96. Feishu.update_values(log_type, 'shipinhao', 'FSDlBy', 'J2:L2',
  97. [['https://'+urls[2], 'https://'+urls[1], 'https://'+urls[0]]])
  98. Common.logger(log_type).info('视频地址信息写入飞书成功\n')
  99. break
  100. else:
  101. break
  102. if __name__ == '__main__':
  103. GetUrl.write_url('recommend')