shipinhao_get_url.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/9/1
  4. import json
  5. import os
  6. import sys
  7. import time
  8. import psutil as psutil
  9. # import atomacos
  10. from appium import webdriver
  11. from selenium.webdriver.common.by import By
  12. sys.path.append(os.getcwd())
  13. # from crawler_shipinhao.main.common import Common
  14. # from crawler_shipinhao.main.feishu_lib import Feishu
  15. from main.feishu_lib import Feishu
  16. from main.common import Common
  17. class GetUrl:
  18. @classmethod
  19. def kill_pid(cls):
  20. os.system('chcp 65001') # 将cmd的显示字符编码从默认的GBK改为UTF-8
  21. list_process = list()
  22. pid_list = psutil.pids()
  23. for sub_pid in pid_list:
  24. try:
  25. process_info = psutil.Process(sub_pid)
  26. if process_info.name() == 'WechatBrowser.exe' or process_info.name() == 'WeChatPlayer.exe':
  27. list_process.append(sub_pid)
  28. except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
  29. pass
  30. for pid in list_process:
  31. os.system('taskkill /f /pid ' + str(pid))
  32. @classmethod
  33. def click_video(cls, log_type):
  34. try:
  35. Common.logger(log_type).info('启动"微信"')
  36. desired_caps = {'app': r"C:\Program Files (x86)\Tencent\WeChat\WeChat.exe"}
  37. driver = webdriver.Remote(
  38. command_executor='http://127.0.0.1:4723',
  39. desired_capabilities=desired_caps)
  40. driver.implicitly_wait(10)
  41. Common.logger(log_type).info('点击"聊天窗口"')
  42. driver.find_element(By.NAME, '聊天').click()
  43. Common.logger(log_type).info('点击"爬虫群"')
  44. driver.find_element(By.NAME, '爬虫群').click()
  45. Common.logger(log_type).info('点击视频')
  46. # driver.find_elements(By.ID, '42.131546.3.3981')[-1].click()
  47. driver.find_elements(By.NAME, '消息')[-1].click()
  48. Common.logger(log_type).info('退出视频号')
  49. time.sleep(5)
  50. cls.kill_pid()
  51. Common.logger(log_type).info('退出微信')
  52. driver.quit()
  53. except Exception as e:
  54. Common.logger(log_type).error('click_video异常:{}', e)
  55. @classmethod
  56. def get_url(cls, log_type):
  57. try:
  58. # charles 抓包文件保存目录
  59. charles_file_dir = r"./chlsfiles/"
  60. if int(len(os.listdir(charles_file_dir))) == 0:
  61. Common.logger(log_type).info("未找到chlsfile文件,等待2s")
  62. time.sleep(2)
  63. else:
  64. # 目标文件夹下所有文件
  65. all_file = sorted(os.listdir(charles_file_dir))
  66. # 获取到目标文件
  67. old_file = all_file[-1]
  68. # 分离文件名与扩展名
  69. new_file = os.path.splitext(old_file)
  70. # 重命名文件后缀
  71. os.rename(os.path.join(charles_file_dir, old_file),
  72. os.path.join(charles_file_dir, new_file[0] + ".txt"))
  73. with open(charles_file_dir + new_file[0] + ".txt", encoding='utf-8-sig', errors='ignore') as f:
  74. contents = json.load(f, strict=False)
  75. video_url_list = []
  76. cover_url_list = []
  77. if "finder.video.qq.com" in [text['host'] for text in contents]:
  78. for text in contents:
  79. if text["host"] == "finder.video.qq.com" and text["path"] == "/251/20302/stodownload":
  80. video_url_list.append(text)
  81. elif text["host"] == "finder.video.qq.com" and text["path"] == "/251/20304/stodownload":
  82. cover_url_list.append(text)
  83. video_url = video_url_list[0]['host']+video_url_list[0]['path']+'?'+video_url_list[0]['query']
  84. cover_url = cover_url_list[0]['host']+cover_url_list[0]['path']+'?'+cover_url_list[0]['query']
  85. head_url = cover_url
  86. # print(f'video_url:{video_url}')
  87. # print(f'cover_url:{cover_url}')
  88. # print(f'head_url:{head_url}')
  89. return video_url, cover_url, head_url
  90. else:
  91. Common.logger(log_type).info("未找到url")
  92. return '未找到url'
  93. except Exception as e:
  94. Common.logger(log_type).exception("get_url异常:{}", e)
  95. return None
  96. @classmethod
  97. def write_url(cls, log_type):
  98. try:
  99. while True:
  100. if Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][11] is None\
  101. or Feishu.get_values_batch(log_type, 'shipinhao', '4Y2e5n')[1][11] is None:
  102. Common.del_charles_files('recommend')
  103. cls.click_video(log_type)
  104. Common.logger(log_type).info('等待 2s')
  105. time.sleep(2)
  106. Common.logger(log_type).info('获取视频头像/封面/播放地址')
  107. # print('获取视频头像/封面/播放地址')
  108. urls = cls.get_url(log_type)
  109. if urls == '未找到url':
  110. cls.write_url(log_type)
  111. else:
  112. Feishu.update_values(log_type, 'shipinhao', 'FSDlBy', 'J2:L2',
  113. [['https://'+urls[2], 'https://'+urls[1], 'https://'+urls[0]]])
  114. Common.logger(log_type).info('视频地址信息写入飞书成功\n')
  115. Common.del_charles_files('recommend')
  116. break
  117. else:
  118. Common.logger(log_type).info('视频已有地址信息,休眠 10s')
  119. time.sleep(10)
  120. break
  121. except Exception as e:
  122. # Feishu.dimension_range(log_type, 'shipinhao', 'FSDlBy', 'ROWS', 2, 2)
  123. Common.logger(log_type).error('write_url异常:{}', e)
  124. @classmethod
  125. def run_get_url(cls, log_type):
  126. try:
  127. while True:
  128. if len(Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')) == 1:
  129. Common.logger(log_type).info('暂无需要获取地址的视频信息')
  130. time.sleep(30)
  131. break
  132. else:
  133. cls.write_url(log_type)
  134. except Exception as e:
  135. Common.logger(log_type).error('run_get_url异常:{}', e)
  136. if __name__ == '__main__':
  137. # GetUrl.write_url('recommend')
  138. # print(len(Feishu.get_values_batch('recommend', 'shipinhao', 'FSDlBy')))
  139. while True:
  140. GetUrl.run_get_url('recommend')
  141. Common.del_logs('recommend')
  142. time.sleep(1)