search_key.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/2/10
  4. """
  5. 部署机器: Windows 笔记本
  6. 获取微信指数小程序请求参数:search_key
  7. 1. 启动 WinAppDriver.exe
  8. 2. 启动 Charles.exe:
  9. 2.1 选中 Proxy - Windows Proxy
  10. 2.2 选中 Tools - Auto Save - Enable Auto Save
  11. 3. 启动 Python 脚本:
  12. 3.1 cd D:\piaoquan_crawler
  13. 3.2 python .\weixinzhishu\weixinzhishu_main\search_key.py
  14. 每 10 秒获取最新search_key,写入飞书: https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
  15. """
  16. import json
  17. import os
  18. import sys
  19. import time
  20. import psutil
  21. from appium import webdriver
  22. from selenium.webdriver.common.by import By
  23. sys.path.append(os.getcwd())
  24. from common.common import Common
  25. from common.feishu import Feishu
  26. class Searchkey:
  27. @classmethod
  28. def start_wechat(cls, log_type, crawler):
  29. try:
  30. Common.logger(log_type, crawler).info('启动"微信"')
  31. desired_caps = {'app': r"C:\Program Files (x86)\Tencent\WeChat\WeChat.exe"}
  32. driver = webdriver.Remote(
  33. command_executor='http://127.0.0.1:4723',
  34. desired_capabilities=desired_caps)
  35. driver.implicitly_wait(10)
  36. Common.logger(log_type, crawler).info('选择对话人"微信同步助手"')
  37. driver.find_elements(By.NAME, '微信同步助手')[0].click()
  38. time.sleep(1)
  39. Common.logger(log_type, crawler).info('点击"微信指数"')
  40. driver.find_elements(By.NAME, '消息')[-1].click()
  41. time.sleep(1)
  42. Common.logger(log_type, crawler).info('退出微信')
  43. driver.quit()
  44. time.sleep(1)
  45. Common.logger(log_type, crawler).info('关闭微信指数')
  46. weixinzhishu_driver = cls.close_weixinzhishu(log_type, crawler)
  47. weixinzhishu_driver.find_elements(By.NAME, '关闭')[-1].click()
  48. except Exception as e:
  49. Common.logger(log_type, crawler).error(f'start_wechat异常:{e}\n')
  50. @classmethod
  51. def close_weixinzhishu(cls, log_type, crawler, app_name='微信指数'):
  52. """
  53. *通过名字找到windowsdriver
  54. *通过窗口名称,从桌面对象获取webdriver对象
  55. """
  56. new_caps = {'app': "Root"}
  57. try:
  58. new_driver = webdriver.Remote(command_executor='http://127.0.0.1:4723', desired_capabilities=new_caps)
  59. windowElement = new_driver.find_elements(By.NAME, app_name)
  60. if len(windowElement) != 0:
  61. newWindowHandle = hex(int(windowElement[0].get_attribute("NativeWindowHandle")))
  62. app_caps = {"appTopLevelWindow": newWindowHandle}
  63. app_driver = webdriver.Remote(command_executor='http://127.0.0.1:4723',
  64. desired_capabilities=app_caps)
  65. return app_driver
  66. except Exception as e:
  67. Common.logger(log_type, crawler).error(f"close_weixinzhishu异常:{e}\n")
  68. @classmethod
  69. def kill_pid(cls, log_type, crawler):
  70. try:
  71. os.system('chcp 65001') # 将cmd的显示字符编码从默认的GBK改为UTF-8
  72. list_process = list()
  73. pid_list = psutil.pids()
  74. for sub_pid in pid_list:
  75. try:
  76. process_info = psutil.Process(sub_pid)
  77. print(process_info)
  78. if process_info.name() == 'WeChatAppEx.exe' \
  79. or process_info.name() == 'WeChatOCR.exe' \
  80. or process_info.name() == 'WeChatPlayer.exe' \
  81. or process_info.name() == 'WeChatUtility.exe':
  82. list_process.append(sub_pid)
  83. except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
  84. pass
  85. for pid in list_process:
  86. os.system('taskkill /f /pid ' + str(pid))
  87. except Exception as e:
  88. Common.logger(log_type, crawler).error(f'kill_pid异常:{e}\n')
  89. @classmethod
  90. def get_search_key(cls, log_type, crawler):
  91. try:
  92. # charles 抓包文件保存目录
  93. chlsfile_path = f"./{crawler}/{crawler}_chlsfiles/"
  94. if len(os.listdir(chlsfile_path)) == 0:
  95. Common.logger(log_type, crawler).info("chlsfile文件夹为空,等待10s")
  96. cls.start_wechat(log_type, crawler)
  97. time.sleep(10)
  98. cls.get_search_key(log_type, crawler)
  99. else:
  100. Common.logger(log_type, crawler).info(f"chlsfile_list:{sorted(os.listdir(chlsfile_path))}")
  101. # 获取最新的 chlsfile
  102. chlsfile = sorted(os.listdir(chlsfile_path))[-1]
  103. # 分离文件名与扩展名
  104. new_file = os.path.splitext(chlsfile)
  105. # 重命名文件后缀
  106. os.rename(os.path.join(chlsfile_path, chlsfile),
  107. os.path.join(chlsfile_path, new_file[0] + ".txt"))
  108. with open(f"{chlsfile_path}{new_file[0]}.txt", encoding='utf-8-sig', errors='ignore') as f:
  109. contents = json.load(f, strict=False)
  110. if "search.weixin.qq.com" not in [text['host'] for text in contents]:
  111. return "未找到search_key"
  112. else:
  113. for content in contents:
  114. if content["host"] == "search.weixin.qq.com" and content["path"] == "/cgi-bin/wxaweb/wxindexgetusergroup":
  115. # print(f"content:{content}")
  116. text = content['request']['body']['text']
  117. search_key = json.loads(text)['search_key']
  118. openid = json.loads(text)['openid']
  119. return search_key, openid
  120. except Exception as e:
  121. Common.logger(log_type, crawler).exception(f"get_search_key异常:{e}\n")
  122. return None
  123. @classmethod
  124. def remove_file(cls, log_type, crawler):
  125. try:
  126. all_file_path = f"./{crawler}/{crawler}_chlsfiles/"
  127. if not os.path.exists(all_file_path):
  128. os.mkdir(all_file_path)
  129. all_file = os.listdir(f"./{crawler}/{crawler}_chlsfiles/")
  130. for file in all_file:
  131. os.remove(f"./{crawler}/{crawler}_chlsfiles/{file}")
  132. except Exception as e:
  133. Common.logger(log_type, crawler).error(f"remove_file异常:{e}\n")
  134. @classmethod
  135. def del_search_key_from_feishu(cls, log_type, crawler):
  136. try:
  137. sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
  138. if len(sheet) <= 21:
  139. # print('<=20行')
  140. return
  141. else:
  142. Feishu.dimension_range(log_type, crawler, 'sVL74k', 'ROWS', 22, 22)
  143. cls.del_search_key_from_feishu(log_type, crawler)
  144. except Exception as e:
  145. Common.logger(log_type, crawler).error(f"del_search_key_from_feishu异常:{e}\n")
  146. @classmethod
  147. def write_search_key_to_feishu(cls, log_type, crawler):
  148. Common.logger(log_type, crawler).info('清除 chlsfiles 文件夹')
  149. cls.remove_file(log_type, crawler)
  150. Common.logger(log_type, crawler).info('启动微信指数小程序')
  151. cls.start_wechat(log_type, crawler)
  152. Common.logger(log_type, crawler).info('获取 search_key')
  153. while True:
  154. search_key = cls.get_search_key(log_type, crawler)
  155. if search_key is None or search_key == "未找到search_key":
  156. time.sleep(3)
  157. Common.logger(log_type, crawler).info('未找到search_key,重启打开微信指数,获取 search_key')
  158. cls.start_wechat(log_type, crawler)
  159. cls.get_search_key(log_type, crawler)
  160. else:
  161. Common.logger(log_type, crawler).info(f'已获取 search_key,openid:{search_key}')
  162. Feishu.insert_columns(log_type, crawler, 'sVL74k', 'ROWS', 1, 2)
  163. time.sleep(1)
  164. time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
  165. Feishu.update_values(log_type, crawler, 'sVL74k', 'A2:Z2', [[time_str, search_key[0], search_key[-1]]])
  166. cls.del_search_key_from_feishu(log_type, crawler)
  167. Common.logger(log_type, crawler).info(f"search_key:{search_key}写入飞书表成功\n")
  168. return
  169. if __name__ == '__main__':
  170. while True:
  171. Searchkey.write_search_key_to_feishu('searchkey', 'weixinzhishu')
  172. Common.logger('searchkey', 'weixinzhishu').info('休眠 10 秒')
  173. time.sleep(10)
  174. # Searchkey.start_wechat('searchkey', 'weixinzhishu')