get_weixinzhishu.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/2/10
  4. import os
  5. import sys
  6. import time
  7. from datetime import date, timedelta
  8. import requests
  9. import json
  10. sys.path.append(os.getcwd())
  11. from common.feishu import Feishu
  12. from common.common import Common
  13. class Weixinzhishu:
  14. pageNum = 1
  15. # 获取微信 key / openid
  16. @classmethod
  17. def get_wechat_key(cls, log_type, crawler):
  18. """
  19. 获取微信 key / openid
  20. https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
  21. :param log_type: 日志名
  22. :param crawler: 哪款爬虫,填写:weixinzhishu
  23. :return: search_key, openid
  24. """
  25. try:
  26. sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
  27. for i in range(len(sheet)):
  28. search_key = sheet[1][1]
  29. openid = sheet[1][2]
  30. return search_key, openid
  31. except Exception as e:
  32. Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
  33. # 获取热词
  34. @classmethod
  35. def get_word(cls, log_type, crawler, host):
  36. try:
  37. url = '/hot/word/getAllWords'
  38. params = {
  39. 'pageNum': cls.pageNum, # 第几页,默认1,int
  40. 'pageSize': 100 # 请求条目数,默认为100,int
  41. }
  42. response = requests.post(url=host+url, json=params)
  43. cls.pageNum += 1
  44. if response.status_code != 200:
  45. Common.logger(log_type, crawler).warning(f"get_word_response:{response.text}\n")
  46. elif response.json()['message'] != "success":
  47. Common.logger(log_type, crawler).warning(f"get_word_response:{response.json()}\n")
  48. else:
  49. word_list = response.json()['data']['words']
  50. return word_list
  51. except Exception as e:
  52. Common.logger(log_type, crawler).error(f"get_word:{e}\n")
  53. # 获取热词分数
  54. @classmethod
  55. def get_word_score(cls, log_type, crawler, word_id, word):
  56. """
  57. 获取热词分数
  58. :param log_type: 日志名
  59. :param crawler: 哪款爬虫,填写:weixinzhishu
  60. :param word_id: 热词 ID
  61. :param word: 热词
  62. :return: 热词 7 天指数,例如:
  63. {'id': 1,
  64. 'word': '消息',
  65. 'wechatScores': [
  66. {'score': 95521022, 'scoreDate': '2023-02-07'},
  67. {'score': 97315283, 'scoreDate': '2023-02-08'},
  68. {'score': 109845849, 'scoreDate': '2023-02-09'},
  69. {'score': 107089560, 'scoreDate': '2023-02-10'},
  70. {'score': 102658391, 'scoreDate': '2023-02-11'},
  71. {'score': 93843701, 'scoreDate': '2023-02-12'},
  72. {'score': 100211894, 'scoreDate': '2023-02-13'}]}
  73. """
  74. try:
  75. while True:
  76. wechat_key = cls.get_wechat_key(log_type, crawler)
  77. if wechat_key is None:
  78. Common.logger(log_type, crawler).info(f"wechat_key:{wechat_key}")
  79. time.sleep(10)
  80. continue
  81. search_key = wechat_key[0]
  82. openid = wechat_key[-1]
  83. start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
  84. end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
  85. url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
  86. payload = json.dumps({
  87. "openid": openid,
  88. "search_key": search_key,
  89. "cgi_name": "GetDefaultIndex",
  90. "start_ymd": start_ymd,
  91. "end_ymd": end_ymd,
  92. "query": word
  93. })
  94. headers = {
  95. 'Host': 'search.weixin.qq.com',
  96. 'content-type': 'application/json',
  97. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
  98. 'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
  99. }
  100. response = requests.request("POST", url=url, headers=headers, data=payload)
  101. wechat_score_list = []
  102. word_wechat_score_dict = {
  103. "id": word_id,
  104. "word": word,
  105. "wechatScores": wechat_score_list,
  106. }
  107. if response.json()['code'] == -10000:
  108. Common.logger(log_type, crawler).warning(f"response:{response.json()['msg']} 休眠 10 秒,重新获取")
  109. time.sleep(10)
  110. elif response.json()['code'] == -10002:
  111. Common.logger(log_type, crawler).info(f'{word}:该词暂未收录')
  112. return word_wechat_score_dict
  113. elif response.json()['code'] != 0:
  114. Common.logger(log_type, crawler).info(f'response:{response.text}\n')
  115. return word_wechat_score_dict
  116. else:
  117. time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
  118. for i in range(len(time_index)):
  119. score_time = time_index[i]['time']
  120. score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
  121. score = time_index[i]['score']
  122. wechat_score_dict = {"score": score, "scoreDate": score_time_str}
  123. wechat_score_list.append(wechat_score_dict)
  124. return word_wechat_score_dict
  125. except Exception as e:
  126. Common.logger(log_type, crawler).error(f"get_word_score异常:{e}\n")
  127. # 获取微信指数
  128. @classmethod
  129. def get_wechat_score(cls, log_type, crawler, host):
  130. """
  131. 获取微信指数
  132. :param log_type: 日志名
  133. :param crawler: 哪款爬虫
  134. :param host: 域名
  135. :return: 热词指数列表
  136. """
  137. score_num = 0
  138. while True:
  139. word_list = cls.get_word(log_type, crawler, host)
  140. if len(word_list) == 0:
  141. Common.logger(log_type, crawler).info(f"热词更新完毕\n")
  142. cls.pageNum = 1
  143. Common.logger(log_type, crawler).info(f"score_num: {score_num}")
  144. return
  145. else:
  146. wechat_score_data = []
  147. Common.logger(log_type, crawler).info(f"len(word_list):{len(word_list)}")
  148. for i in range(len(word_list)):
  149. word_id = word_list[i]['id']
  150. word = word_list[i]['word']
  151. Common.logger(log_type, crawler).info(f"word_id:{word_id}")
  152. Common.logger(log_type, crawler).info(f"word:{word}")
  153. word_score_dict = cls.get_word_score(log_type, crawler, word_id, word)
  154. Common.logger(log_type, crawler).info(f"word_score_dict:{word_score_dict}\n")
  155. wechat_score_data.append(word_score_dict)
  156. if word_score_dict['wechatScores'] is not None:
  157. score_num += len(word_score_dict['wechatScores'])
  158. Common.logger(log_type, crawler).info(f"wechat_score_data:{wechat_score_data}\n")
  159. cls.update_wechat_score(log_type, crawler, wechat_score_data, host)
  160. # 更新微信指数
  161. @classmethod
  162. def update_wechat_score(cls, log_type, crawler, data, host):
  163. """
  164. 更新热词微信指数
  165. :param log_type: 日志名
  166. :param crawler: 哪款爬虫
  167. :param data: 热词微信指数
  168. :param host: 域名
  169. :return: {"code":200, "message":"success"}
  170. """
  171. try:
  172. url = '/hot/word/updateWechatScore'
  173. params = {'data': data}
  174. response = requests.post(url=host+url, json=params)
  175. if response.status_code != 200:
  176. Common.logger(log_type, crawler).warning(f"update_wechat_score_response:{response.text}\n")
  177. elif response.json()["message"] != "success":
  178. Common.logger(log_type, crawler).warning(f"update_wechat_score_response:{response.json()}\n")
  179. else:
  180. Common.logger(log_type, crawler).info(f"更新热词微信指数:{response.json()['message']}\n")
  181. except Exception as e:
  182. Common.logger(log_type, crawler).error(f"update_wechat_score:{e}\n")
  183. @classmethod
  184. def get_score_test(cls, log_type, crawler, word_id, word):
  185. wechat_key = cls.get_wechat_key(log_type, crawler)
  186. search_key = wechat_key[0]
  187. openid = wechat_key[-1]
  188. end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
  189. start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
  190. url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
  191. payload = json.dumps({
  192. "openid": openid,
  193. "search_key": search_key,
  194. "cgi_name": "GetDefaultIndex",
  195. "start_ymd": start_ymd,
  196. "end_ymd": end_ymd,
  197. "query": word
  198. })
  199. headers = {
  200. 'Host': 'search.weixin.qq.com',
  201. 'content-type': 'application/json',
  202. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
  203. 'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
  204. }
  205. response = requests.request("POST", url, headers=headers, data=payload)
  206. wechat_score_list = []
  207. word_wechat_score_dict = {
  208. "id": word_id,
  209. "word": word,
  210. "wechatScores": wechat_score_list,
  211. }
  212. if response.json()['code'] == -10000:
  213. print(f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} response:{response.json()['msg']} 休眠 10 秒,重新获取")
  214. time.sleep(10)
  215. cls.get_score_test(log_type, crawler, word_id, word)
  216. elif response.json()['code'] == -10002:
  217. print("该词暂未收录")
  218. print(f"{word_wechat_score_dict}")
  219. elif response.json()['code'] != 0:
  220. print(f"{word_wechat_score_dict}")
  221. else:
  222. time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
  223. for i in range(len(time_index)):
  224. score_time = time_index[i]['time']
  225. score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
  226. score = time_index[i]['score']
  227. wechat_score_dict = {"score": score, "scoreDate": score_time_str}
  228. wechat_score_list.append(wechat_score_dict)
  229. print(f"wechat_score_dict:{wechat_score_dict}")
  230. print(word_wechat_score_dict)
  231. if __name__ == "__main__":
  232. Weixinzhishu.get_score_test('weixin', 'weixinzhishu', 1 , "根本")
  233. pass