search_key.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/2/10
  4. import json
  5. import os
  6. import sys
  7. import time
  8. from appium import webdriver
  9. from selenium.webdriver.common.by import By
  10. sys.path.append(os.getcwd())
  11. from common.common import Common
  12. from common.feishu import Feishu
  13. class Searchkey:
  14. @classmethod
  15. def start_wechat(cls, log_type, crawler):
  16. try:
  17. # Common.logger(log_type, crawler).info('启动"微信"')
  18. # print('启动"微信"')
  19. desired_caps = {'app': r"C:\Program Files (x86)\Tencent\WeChat\WeChat.exe"}
  20. driver = webdriver.Remote(
  21. command_executor='http://127.0.0.1:4723',
  22. desired_capabilities=desired_caps)
  23. driver.implicitly_wait(10)
  24. # Common.logger(log_type).info('点击"聊天窗口"')
  25. # driver.find_element(By.NAME, '聊天').click()
  26. #
  27. # Common.logger(log_type).info('点击"爬虫群"')
  28. # driver.find_elements(By.NAME, '爬虫群')[0].click()
  29. # Common.logger(log_type, crawler).info('点击微信指数')
  30. driver.find_elements(By.NAME, '消息')[-1].click()
  31. # Common.logger(log_type, crawler).info('休眠 3 秒,退出微信')
  32. time.sleep(3)
  33. driver.quit()
  34. except Exception as e:
  35. Common.logger(log_type, crawler).error(f'start_wechat异常:{e}\n')
  36. @classmethod
  37. def get_search_key(cls, log_type, crawler):
  38. try:
  39. # charles 抓包文件保存目录
  40. chlsfile_path = f"./{crawler}/{crawler}_chlsfiles/"
  41. # chlsfile_path = f"../weixinzhishu_chlsfiles/"
  42. all_file = os.listdir(chlsfile_path)
  43. chlsfile_list = []
  44. if len(all_file) == 0:
  45. Common.logger(log_type, crawler).info("chlsfile文件夹为空,等待10s")
  46. cls.start_wechat(log_type, crawler)
  47. time.sleep(10)
  48. cls.get_search_key(log_type, crawler)
  49. else:
  50. for file in all_file:
  51. if file.endswith("chlsj") is False:
  52. os.remove(f"./{crawler}/{crawler}_chlsfiles/{file}")
  53. else:
  54. chlsfile_list.append(file)
  55. if len(chlsfile_list) == 0:
  56. Common.logger(log_type, crawler).info('未找到chlsfile文件,重新获取')
  57. cls.start_wechat(log_type, crawler)
  58. time.sleep(10)
  59. cls.get_search_key(log_type, crawler)
  60. else:
  61. # 获取最新的 chlsfile
  62. chlsfile = chlsfile_list[-1]
  63. # 分离文件名与扩展名
  64. new_file = os.path.splitext(chlsfile)
  65. # 重命名文件后缀
  66. os.rename(os.path.join(chlsfile_path, chlsfile),
  67. os.path.join(chlsfile_path, new_file[0] + ".txt"))
  68. with open(f"{chlsfile_path}{new_file[0]}.txt", encoding='utf-8-sig', errors='ignore') as f:
  69. contents = json.load(f, strict=False)
  70. if "search.weixin.qq.com" not in [text['host'] for text in contents]:
  71. return "未找到search_key"
  72. else:
  73. for content in contents:
  74. if content["host"] == "search.weixin.qq.com" and content["path"] == "/cgi-bin/wxaweb/wxindexgetusergroup":
  75. text = content['request']['body']['text']
  76. search_key = json.loads(text)['search_key']
  77. return search_key
  78. except Exception as e:
  79. Common.logger(log_type, crawler).exception(f"get_search_key异常:{e}\n")
  80. return None
  81. @classmethod
  82. def remove_file(cls, log_type, crawler):
  83. try:
  84. all_file_path = f"./{crawler}/{crawler}_chlsfiles/"
  85. if not os.path.exists(all_file_path):
  86. os.mkdir(all_file_path)
  87. all_file = os.listdir(f"./{crawler}/{crawler}_chlsfiles/")
  88. for file in all_file:
  89. os.remove(f"./{crawler}/{crawler}_chlsfiles/{file}")
  90. except Exception as e:
  91. Common.logger(log_type, crawler).error(f"remove_file异常:{e}\n")
  92. @classmethod
  93. def del_search_key_from_feishu(cls, log_type, crawler):
  94. try:
  95. sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
  96. print(len(sheet))
  97. if len(sheet) <= 21:
  98. # print('<=20行')
  99. return
  100. else:
  101. Feishu.dimension_range(log_type, crawler, 'sVL74k', 'ROWS', 22, 22)
  102. cls.del_search_key_from_feishu(log_type, crawler)
  103. except Exception as e:
  104. Common.logger(log_type, crawler).error(f"del_search_key_from_feishu异常:{e}\n")
  105. @classmethod
  106. def write_search_key_to_feishu(cls, log_type, crawler):
  107. Common.logger(log_type, crawler).info('清除 chlsfiles 文件夹')
  108. cls.remove_file(log_type, crawler)
  109. Common.logger(log_type, crawler).info('启动微信指数小程序')
  110. cls.start_wechat(log_type, crawler)
  111. Common.logger(log_type, crawler).info('获取 search_key')
  112. while True:
  113. search_key = cls.get_search_key(log_type, crawler)
  114. if search_key is None or search_key == "未找到search_key":
  115. time.sleep(3)
  116. Common.logger(log_type, crawler).info('未找到search_key,重启打开微信指数,获取 search_key')
  117. cls.start_wechat(log_type, crawler)
  118. cls.get_search_key(log_type, crawler)
  119. else:
  120. Common.logger(log_type, crawler).info(f'已获取 search_key:{search_key}')
  121. Feishu.insert_columns(log_type, crawler, 'sVL74k', 'ROWS', 1, 2)
  122. time.sleep(1)
  123. time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
  124. Feishu.update_values(log_type, crawler, 'sVL74k', 'A2:B2', [[time_str, search_key]])
  125. cls.del_search_key_from_feishu(log_type, crawler)
  126. Common.logger(log_type, crawler).info(f"search_key:{search_key}写入飞书表成功\n")
  127. return
  128. if __name__ == '__main__':
  129. # Searchkey.start_wechat('searchkey', 'weixinzhishu')
  130. Searchkey.write_search_key_to_feishu('searchkey', 'weixinzhishu')
  131. pass