shipinhao_get_url.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/9/1
  4. import json
  5. import os
  6. import sys
  7. import time
  8. # import atomacos
  9. from appium import webdriver
  10. from selenium.webdriver.common.by import By
  11. sys.path.append(os.getcwd())
  12. # from crawler_shipinhao.main.common import Common
  13. # from crawler_shipinhao.main.feishu_lib import Feishu
  14. from main.feishu_lib import Feishu
  15. from main.common import Common
  16. class GetUrl:
  17. @classmethod
  18. def click_video(cls, log_type, video_title):
  19. try:
  20. Common.logger(log_type).info('启动"微信"')
  21. desired_caps = {'app': r"C:\Program Files (x86)\Tencent\WeChat\WeChat.exe"}
  22. driver = webdriver.Remote(
  23. command_executor='http://127.0.0.1:4723',
  24. desired_capabilities=desired_caps)
  25. driver.implicitly_wait(10)
  26. Common.logger(log_type).info('点击"聊天窗口"')
  27. # print('点击"聊天窗口"')
  28. driver.find_element(By.NAME, '聊天').click()
  29. Common.logger(log_type).info('点击"爬虫群"')
  30. # print('点击"爬虫群"')
  31. driver.find_element(By.NAME, '爬虫群').click()
  32. Common.logger(log_type).info('点击视频:{}', video_title)
  33. # print(f'点击视频{video_title}')
  34. driver.find_elements(By.ID, '42.852020.3.487')[-1].click()
  35. # 切换 windows 窗口
  36. # windows = driver.window_handles
  37. # driver.switch_to.window(windows[0])
  38. Common.logger(log_type).info('休眠 3s 后,退出微信')
  39. time.sleep(3)
  40. driver.quit()
  41. except Exception as e:
  42. Common.logger(log_type).error('click_video异常:{}', e)
  43. @classmethod
  44. def get_url(cls, log_type):
  45. try:
  46. # charles 抓包文件保存目录
  47. # charles_file_dir = r"./crawler_kanyikan_recommend/chlsfiles/"
  48. charles_file_dir = r"./chlsfiles/"
  49. if int(len(os.listdir(charles_file_dir))) == 1:
  50. Common.logger(log_type).info("未找到chlsfile文件,等待60s")
  51. time.sleep(60)
  52. else:
  53. # 目标文件夹下所有文件
  54. all_file = sorted(os.listdir(charles_file_dir))
  55. # 获取到目标文件
  56. old_file = all_file[-1]
  57. # 分离文件名与扩展名
  58. new_file = os.path.splitext(old_file)
  59. # 重命名文件后缀
  60. os.rename(os.path.join(charles_file_dir, old_file),
  61. os.path.join(charles_file_dir, new_file[0] + ".txt"))
  62. with open(charles_file_dir + new_file[0] + ".txt", encoding='utf-8-sig', errors='ignore') as f:
  63. contents = json.load(f, strict=False)
  64. video_url_list = []
  65. cover_url_list = []
  66. if "finder.video.qq.com" in [text['host'] for text in contents]:
  67. for text in contents:
  68. if text["host"] == "finder.video.qq.com" and text["path"] == "/251/20302/stodownload":
  69. video_url_list.append(text)
  70. elif text["host"] == "finder.video.qq.com" and text["path"] == "/251/20304/stodownload":
  71. cover_url_list.append(text)
  72. video_url = video_url_list[0]['host']+video_url_list[0]['path']+'?'+video_url_list[0]['query']
  73. cover_url = cover_url_list[0]['host']+cover_url_list[0]['path']+'?'+cover_url_list[0]['query']
  74. head_url = cover_url
  75. # print(f'video_url:{video_url}')
  76. # print(f'cover_url:{cover_url}')
  77. # print(f'head_url:{head_url}')
  78. return video_url, cover_url, head_url
  79. else:
  80. Common.logger(log_type).info("未找到 url,10s后重新获取")
  81. time.sleep(10)
  82. cls.get_url(log_type)
  83. except Exception as e:
  84. Common.logger(log_type).exception("get_url异常:{}", e)
  85. return None
  86. @classmethod
  87. def write_url(cls, log_type):
  88. try:
  89. while True:
  90. if Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][11] is None:
  91. # Common.logger(log_type).info('开始点击分享的视频')
  92. # print(f"开始点击分享的视频:{Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][2]}")
  93. cls.click_video(log_type, Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][2])
  94. Common.logger(log_type).info('等待 10s')
  95. time.sleep(10)
  96. Common.logger(log_type).info('获取视频头像/封面/播放地址')
  97. # print('获取视频头像/封面/播放地址')
  98. urls = cls.get_url(log_type)
  99. Feishu.update_values(log_type, 'shipinhao', 'FSDlBy', 'J2:L2',
  100. [['https://'+urls[2], 'https://'+urls[1], 'https://'+urls[0]]])
  101. Common.logger(log_type).info('视频地址信息写入飞书成功\n')
  102. Common.del_charles_files('recommend')
  103. break
  104. else:
  105. Common.logger(log_type).info('视频已有地址信息,休眠 10s')
  106. time.sleep(10)
  107. break
  108. except Exception as e:
  109. # Feishu.dimension_range(log_type, 'shipinhao', 'FSDlBy', 'ROWS', 2, 2)
  110. Common.logger(log_type).error('write_url异常:{}', e)
  111. @classmethod
  112. def run_get_url(cls, log_type):
  113. try:
  114. while True:
  115. if len(Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')) == 1:
  116. Common.logger(log_type).info('暂无需要获取地址的视频信息')
  117. time.sleep(30)
  118. break
  119. else:
  120. cls.write_url(log_type)
  121. except Exception as e:
  122. Common.logger(log_type).error('run_get_url异常:{}', e)
  123. if __name__ == '__main__':
  124. # GetUrl.write_url('recommend')
  125. # print(len(Feishu.get_values_batch('recommend', 'shipinhao', 'FSDlBy')))
  126. while True:
  127. GetUrl.run_get_url('recommend')
  128. time.sleep(1)