weixinzhishu_inner_sort.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/2/28
  4. import json
  5. import os
  6. import sys
  7. import time
  8. from datetime import date, timedelta
  9. import requests
  10. sys.path.append(os.getcwd())
  11. from common.common import Common
  12. from common.feishu import Feishu
  13. class Test:
  14. # 获取微信 key / openid
  15. @classmethod
  16. def get_wechat_key(cls, log_type, crawler):
  17. """
  18. 获取微信 key / openid
  19. https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
  20. :param log_type: 日志名
  21. :param crawler: 哪款爬虫,填写:weixinzhishu
  22. :return: search_key, openid
  23. """
  24. try:
  25. # while True:
  26. sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
  27. # if sheet is None:
  28. # Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ,10秒钟后重试")
  29. # time.sleep(10)
  30. # else:
  31. # break
  32. for i in range(len(sheet)):
  33. search_key = sheet[1][1]
  34. openid = sheet[1][2]
  35. return search_key, openid
  36. except Exception as e:
  37. Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
  38. @classmethod
  39. def get_words(cls, log_type, crawler):
  40. try:
  41. while True:
  42. sheet = Feishu.get_values_batch(log_type, crawler, 'SuhTA6')
  43. if sheet is None:
  44. Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ,10秒钟后重试")
  45. time.sleep(10)
  46. else:
  47. break
  48. word_list = []
  49. for x in sheet:
  50. for y in x:
  51. if y is None:
  52. pass
  53. else:
  54. word_list.append(y)
  55. return word_list
  56. except Exception as e:
  57. Common.logger(log_type, crawler).error(f"get_words:{e}\n")
  58. @classmethod
  59. def get_score_test(cls, log_type, crawler):
  60. start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
  61. end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
  62. word_list = cls.get_words(log_type, crawler)
  63. for i in range(len(word_list)):
  64. Common.logger(log_type, crawler).info(f"热词: {word_list[i]}")
  65. while True:
  66. wechat_key = cls.get_wechat_key(log_type, crawler)
  67. if wechat_key is None:
  68. Common.logger(log_type, crawler).info(
  69. f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} auth 过期,休眠 10 秒,重新获取")
  70. time.sleep(10)
  71. continue
  72. search_key = wechat_key[0]
  73. openid = wechat_key[-1]
  74. url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
  75. payload = json.dumps({
  76. "openid": openid,
  77. "search_key": search_key,
  78. "cgi_name": "GetDefaultIndex",
  79. "start_ymd": start_ymd,
  80. "end_ymd": end_ymd,
  81. "query": word_list[i]
  82. })
  83. headers = {
  84. 'Host': 'search.weixin.qq.com',
  85. 'content-type': 'application/json',
  86. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
  87. 'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
  88. }
  89. response = requests.request("POST", url, headers=headers, data=payload)
  90. if response.json()['code'] == -10000:
  91. Common.logger(log_type, crawler).info(f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} response:{response.json()['msg']} 休眠 10 秒,重新获取")
  92. time.sleep(10)
  93. continue
  94. wechat_score_list = []
  95. word_wechat_score_dict = {
  96. "id": i+1,
  97. "word": word_list[i],
  98. "wechatScores": wechat_score_list,
  99. }
  100. if response.json()['code'] == -10002:
  101. Common.logger(log_type, crawler).info("该词暂未收录")
  102. # 写飞书
  103. if word_list[i] in [x for y in Feishu.get_values_batch(log_type, crawler, "2fP99U") for x in y]:
  104. Common.logger(log_type, crawler).info("该词已存在")
  105. continue
  106. Feishu.insert_columns(log_type, crawler, "2fP99U", "ROWS", 1, 2)
  107. time.sleep(0.5)
  108. Feishu.update_values(log_type, crawler, "2fP99U", "F2:Z2",
  109. [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
  110. word_list[i],
  111. "",
  112. "该词暂未收录"]])
  113. Common.logger(log_type, crawler).info("写入飞书成功\n")
  114. elif response.json()['code'] != 0:
  115. Common.logger(log_type, crawler).warning(f"{word_wechat_score_dict}")
  116. continue
  117. else:
  118. time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
  119. for x in range(len(time_index)):
  120. Common.logger(log_type, crawler).info(f"正在更新 {word_list[i]}")
  121. score_time = time_index[x]['time']
  122. score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
  123. score = time_index[x]['score']
  124. wechat_score_dict = {"score": score, "scoreDate": score_time_str}
  125. wechat_score_list.append(wechat_score_dict)
  126. Common.logger(log_type, crawler).info(f"wechat_score_dict:{wechat_score_dict}")
  127. Feishu.insert_columns(log_type, crawler, "2fP99U", "ROWS", 1, 2)
  128. time.sleep(0.5)
  129. Feishu.update_values(log_type, crawler, "2fP99U", "F2:Z2", [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
  130. word_list[i],
  131. score_time_str,
  132. score]])
  133. Common.logger(log_type, crawler).info("写入飞书成功\n")
  134. break
  135. if __name__ == "__main__":
  136. Test.get_score_test("inner-sort", "weixinzhishu")
  137. pass