get_search_key.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/2/10
  4. import json
  5. import os
  6. import sys
  7. import time
  8. import psutil as psutil
  9. from appium import webdriver
  10. from selenium.webdriver.common.by import By
  11. from common.common import Common
  12. sys.path.append(os.getcwd())
  13. class ShipinhaoWindows:
  14. @classmethod
  15. def kill_pid(cls, log_type):
  16. try:
  17. os.system('chcp 65001') # 将cmd的显示字符编码从默认的GBK改为UTF-8
  18. list_process = list()
  19. pid_list = psutil.pids()
  20. for sub_pid in pid_list:
  21. try:
  22. process_info = psutil.Process(sub_pid)
  23. if process_info.name() == 'WechatBrowser.exe' or process_info.name() == 'WeChatPlayer.exe':
  24. list_process.append(sub_pid)
  25. except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
  26. pass
  27. for pid in list_process:
  28. os.system('taskkill /f /pid ' + str(pid))
  29. except Exception as e:
  30. Common.logger(log_type).error('kill_pid异常:{}', e)
  31. @classmethod
  32. def click_video(cls, log_type, crawler):
  33. try:
  34. Common.logger(log_type, crawler).info('启动"微信"')
  35. desired_caps = {'app': r"C:\Program Files (x86)\Tencent\WeChat\WeChat.exe"}
  36. driver = webdriver.Remote(
  37. command_executor='http://127.0.0.1:4723',
  38. desired_capabilities=desired_caps)
  39. driver.implicitly_wait(10)
  40. # Common.logger(log_type).info('点击"聊天窗口"')
  41. # driver.find_element(By.NAME, '聊天').click()
  42. #
  43. # Common.logger(log_type).info('点击"爬虫群"')
  44. # driver.find_elements(By.NAME, '爬虫群')[0].click()
  45. Common.logger(log_type, crawler).info('点击微信指数')
  46. driver.find_elements(By.NAME, '消息')[-1].click()
  47. Common.logger(log_type, crawler).info('休眠 10 秒,退出微信指数')
  48. time.sleep(10)
  49. cls.kill_pid(log_type)
  50. Common.logger(log_type, crawler).info('退出微信')
  51. driver.quit()
  52. except Exception as e:
  53. Common.logger(log_type, crawler).error('click_video异常:{}', e)
  54. @classmethod
  55. def get_url(cls, log_type):
  56. try:
  57. # charles 抓包文件保存目录
  58. charles_file_dir = r"./chlsfiles/"
  59. if len(os.listdir(charles_file_dir)) == 0:
  60. Common.logger(log_type).info("未找到chlsfile文件,等待2s")
  61. time.sleep(2)
  62. else:
  63. # 目标文件夹下所有文件
  64. all_file = sorted(os.listdir(charles_file_dir))
  65. # 获取到目标文件
  66. old_file = all_file[-1]
  67. # 分离文件名与扩展名
  68. new_file = os.path.splitext(old_file)
  69. # 重命名文件后缀
  70. os.rename(os.path.join(charles_file_dir, old_file),
  71. os.path.join(charles_file_dir, new_file[0] + ".txt"))
  72. with open(charles_file_dir + new_file[0] + ".txt", encoding='utf-8-sig', errors='ignore') as f:
  73. contents = json.load(f, strict=False)
  74. video_url_list = []
  75. cover_url_list = []
  76. if "finder.video.qq.com" in [text['host'] for text in contents]:
  77. for text in contents:
  78. if text["host"] == "finder.video.qq.com" and text["path"] == "/251/20302/stodownload":
  79. video_url_list.append(text)
  80. elif text["host"] == "finder.video.qq.com" and text["path"] == "/251/20304/stodownload":
  81. cover_url_list.append(text)
  82. video_url = video_url_list[0]['host']+video_url_list[0]['path']+'?'+video_url_list[0]['query']
  83. cover_url = cover_url_list[0]['host']+cover_url_list[0]['path']+'?'+cover_url_list[0]['query']
  84. head_url = cover_url
  85. # print(f'video_url:{video_url}')
  86. # print(f'cover_url:{cover_url}')
  87. # print(f'head_url:{head_url}')
  88. return video_url, cover_url, head_url
  89. else:
  90. Common.logger(log_type).info("未找到url")
  91. return '未找到url'
  92. except Exception as e:
  93. Common.logger(log_type).exception("get_url异常:{}\n", e)
  94. return None
  95. @classmethod
  96. def write_url(cls, log_type):
  97. try:
  98. while True:
  99. if Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')[1][11] is None:
  100. Common.del_charles_files('recommend')
  101. cls.click_video(log_type)
  102. Common.logger(log_type).info('等待 2s')
  103. time.sleep(2)
  104. Common.logger(log_type).info('获取视频头像/封面/播放地址')
  105. urls = cls.get_url(log_type)
  106. if urls == '未找到url':
  107. time.sleep(1)
  108. cls.write_url(log_type)
  109. elif urls is None:
  110. time.sleep(1)
  111. cls.write_url(log_type)
  112. else:
  113. Feishu.update_values(log_type, 'shipinhao', 'FSDlBy', 'J2:L2',
  114. [['https://'+urls[2], 'https://'+urls[1], 'https://'+urls[0]]])
  115. Common.logger(log_type).info('视频地址信息写入飞书成功\n')
  116. Common.del_charles_files('recommend')
  117. break
  118. else:
  119. Common.logger(log_type).info('视频已有地址信息,休眠 10s')
  120. time.sleep(10)
  121. break
  122. except Exception as e:
  123. # Feishu.dimension_range(log_type, 'shipinhao', 'FSDlBy', 'ROWS', 2, 2)
  124. Common.logger(log_type).error('write_url异常:{}\n', e)
  125. @classmethod
  126. def run_get_url(cls, log_type):
  127. try:
  128. while True:
  129. if len(Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy')) == 1:
  130. Common.logger(log_type).info('暂无需要获取地址的视频信息')
  131. time.sleep(30)
  132. break
  133. else:
  134. cls.write_url(log_type)
  135. except Exception as e:
  136. Common.logger(log_type).error('run_get_url异常:{}\n', e)
  137. if __name__ == '__main__':
  138. while True:
  139. ShipinhaoWindows.run_get_url('recommend')
  140. Common.del_logs('recommend')
  141. time.sleep(1)
  142. pass