shipinhao_get_url.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/9/1
  4. import json
  5. import os
  6. import sys
  7. import time
  8. import psutil as psutil
  9. # import atomacos
  10. from appium import webdriver
  11. from selenium.webdriver.common.by import By
  12. sys.path.append(os.getcwd())
  13. # from crawler_shipinhao.main.common import Common
  14. # from crawler_shipinhao.main.feishu_lib import Feishu
  15. from main.feishu_lib import Feishu
  16. from main.common import Common
  17. class GetUrl:
  18. @classmethod
  19. def kill_pid(cls):
  20. os.system('chcp 65001') # 将cmd的显示字符编码从默认的GBK改为UTF-8
  21. list_process = list()
  22. pid_list = psutil.pids()
  23. for sub_pid in pid_list:
  24. try:
  25. process_info = psutil.Process(sub_pid)
  26. # print(process_info.name())
  27. if process_info.name() == 'WechatBrowser.exe' or process_info.name() == 'WeChatPlayer.exe':
  28. list_process.append(sub_pid)
  29. except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
  30. pass
  31. # print('\n')
  32. # print(list_process)
  33. for pid in list_process:
  34. os.system('taskkill /f /pid ' + str(pid))
  35. @classmethod
  36. def click_video(cls, log_type, video_title):
  37. try:
  38. Common.logger(log_type).info('启动"微信"')
  39. desired_caps = {'app': r"C:\Program Files (x86)\Tencent\WeChat\WeChat.exe"}
  40. driver = webdriver.Remote(
  41. command_executor='http://127.0.0.1:4723',
  42. desired_capabilities=desired_caps)
  43. driver.implicitly_wait(10)
  44. Common.logger(log_type).info('点击"聊天窗口"')
  45. # print('点击"聊天窗口"')
  46. driver.find_element(By.NAME, '聊天').click()
  47. Common.logger(log_type).info('点击"爬虫群"')
  48. # print('点击"爬虫群"')
  49. driver.find_element(By.NAME, '爬虫群').click()
  50. Common.logger(log_type).info('点击视频:{}', video_title)
  51. # print(f'点击视频{video_title}')
  52. driver.find_elements(By.ID, '42.852020.3.487')[-1].click()
  53. Common.logger(log_type).info('退出视频号')
  54. time.sleep(2)
  55. cls.kill_pid()
  56. Common.logger(log_type).info('退出微信')
  57. driver.quit()
  58. except Exception as e:
  59. Common.logger(log_type).error('click_video异常:{}', e)
  60. @classmethod
  61. def get_url(cls, log_type):
  62. try:
  63. # charles 抓包文件保存目录
  64. # charles_file_dir = r"./crawler_kanyikan_recommend/chlsfiles/"
  65. charles_file_dir = r"./chlsfiles/"
  66. if int(len(os.listdir(charles_file_dir))) == 0:
  67. Common.logger(log_type).info("未找到chlsfile文件,等待2s")
  68. time.sleep(2)
  69. else:
  70. # 目标文件夹下所有文件
  71. all_file = sorted(os.listdir(charles_file_dir))
  72. # 获取到目标文件
  73. old_file = all_file[-1]
  74. # 分离文件名与扩展名
  75. new_file = os.path.splitext(old_file)
  76. # 重命名文件后缀
  77. os.rename(os.path.join(charles_file_dir, old_file),
  78. os.path.join(charles_file_dir, new_file[0] + ".txt"))
  79. with open(charles_file_dir + new_file[0] + ".txt", encoding='utf-8-sig', errors='ignore') as f:
  80. contents = json.load(f, strict=False)
  81. video_url_list = []
  82. cover_url_list = []
  83. if "finder.video.qq.com" in [text['host'] for text in contents]:
  84. for text in contents:
  85. if text["host"] == "finder.video.qq.com" and text["path"] == "/251/20302/stodownload":
  86. video_url_list.append(text)
  87. elif text["host"] == "finder.video.qq.com" and text["path"] == "/251/20304/stodownload":
  88. cover_url_list.append(text)
  89. video_url = video_url_list[0]['host']+video_url_list[0]['path']+'?'+video_url_list[0]['query']
  90. cover_url = cover_url_list[0]['host']+cover_url_list[0]['path']+'?'+cover_url_list[0]['query']
  91. head_url = cover_url
  92. # print(f'video_url:{video_url}')
  93. # print(f'cover_url:{cover_url}')
  94. # print(f'head_url:{head_url}')
  95. return video_url, cover_url, head_url
  96. else:
  97. Common.logger(log_type).info("未找到url")
  98. return '未找到url'
  99. except Exception as e:
  100. Common.logger(log_type).exception("get_url异常:{}", e)
  101. return None
  102. @classmethod
  103. def write_url(cls, log_type):
  104. try:
  105. while True:
  106. if Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][11] is None:
  107. # Common.logger(log_type).info('开始点击分享的视频')
  108. # print(f"开始点击分享的视频:{Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][2]}")
  109. Common.del_charles_files('recommend')
  110. cls.click_video(log_type, Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][2])
  111. Common.logger(log_type).info('等待 2s')
  112. time.sleep(2)
  113. Common.logger(log_type).info('获取视频头像/封面/播放地址')
  114. # print('获取视频头像/封面/播放地址')
  115. urls = cls.get_url(log_type)
  116. if urls == '未找到url':
  117. cls.write_url(log_type)
  118. else:
  119. Feishu.update_values(log_type, 'shipinhao', 'FSDlBy', 'J2:L2',
  120. [['https://'+urls[2], 'https://'+urls[1], 'https://'+urls[0]]])
  121. Common.logger(log_type).info('视频地址信息写入飞书成功\n')
  122. Common.del_charles_files('recommend')
  123. break
  124. else:
  125. Common.logger(log_type).info('视频已有地址信息,休眠 10s')
  126. time.sleep(10)
  127. break
  128. except Exception as e:
  129. # Feishu.dimension_range(log_type, 'shipinhao', 'FSDlBy', 'ROWS', 2, 2)
  130. Common.logger(log_type).error('write_url异常:{}', e)
  131. @classmethod
  132. def run_get_url(cls, log_type):
  133. try:
  134. while True:
  135. if len(Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')) == 1:
  136. Common.logger(log_type).info('暂无需要获取地址的视频信息')
  137. time.sleep(30)
  138. break
  139. else:
  140. cls.write_url(log_type)
  141. except Exception as e:
  142. Common.logger(log_type).error('run_get_url异常:{}', e)
  143. if __name__ == '__main__':
  144. # GetUrl.write_url('recommend')
  145. # print(len(Feishu.get_values_batch('recommend', 'shipinhao', 'FSDlBy')))
  146. while True:
  147. GetUrl.run_get_url('recommend')
  148. Common.del_logs('recommend')
  149. time.sleep(1)