weixinzhishu_out.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/2/28
  4. import json
  5. import os
  6. import sys
  7. import time
  8. from datetime import date, timedelta
  9. import requests
  10. sys.path.append(os.getcwd())
  11. from common.common import Common
  12. from common.feishu import Feishu
  13. proxies = {"http": None, "https": None}
  14. class Test:
  15. # 获取微信 key / openid
  16. @classmethod
  17. def get_wechat_key(cls, log_type, crawler):
  18. """
  19. 获取微信 key / openid
  20. https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
  21. :param log_type: 日志名
  22. :param crawler: 哪款爬虫,填写:weixinzhishu
  23. :return: search_key, openid
  24. """
  25. try:
  26. # while True:
  27. sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
  28. # if sheet is None:
  29. # Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ,10秒钟后重试")
  30. # time.sleep(10)
  31. # else:
  32. # break
  33. for i in range(len(sheet)):
  34. search_key = sheet[1][1]
  35. openid = sheet[1][2]
  36. return search_key, openid
  37. except Exception as e:
  38. Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
  39. @classmethod
  40. def get_words(cls, log_type, crawler):
  41. try:
  42. while True:
  43. sheet = Feishu.get_values_batch(log_type, crawler, 'MvFi8s')
  44. if sheet is None:
  45. Common.logger(log_type, crawler).warning(f"获取热词sheet:{sheet} ,10秒钟后重试")
  46. time.sleep(10)
  47. else:
  48. break
  49. word_list = []
  50. for i in range(len(sheet)):
  51. word_dict = {
  52. "title": sheet[i][0],
  53. "word": sheet[i][1]
  54. }
  55. word_list.append(word_dict)
  56. return word_list
  57. except Exception as e:
  58. Common.logger(log_type, crawler).error(f"get_words:{e}\n")
  59. @classmethod
  60. def get_score_test(cls, log_type, crawler):
  61. start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
  62. end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
  63. word_list = cls.get_words(log_type, crawler)
  64. for i in range(len(word_list)):
  65. Common.logger(log_type, crawler).info(f"热词: {word_list[i]['word']}")
  66. while True:
  67. wechat_key = cls.get_wechat_key(log_type, crawler)
  68. if wechat_key is None:
  69. Common.logger(log_type, crawler).info(
  70. f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} auth 过期,休眠 10 秒,重新获取")
  71. time.sleep(10)
  72. continue
  73. search_key = wechat_key[0]
  74. openid = wechat_key[-1]
  75. url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
  76. payload = json.dumps({
  77. "openid": openid,
  78. "search_key": search_key,
  79. "cgi_name": "GetDefaultIndex",
  80. "start_ymd": start_ymd,
  81. "end_ymd": end_ymd,
  82. "query": word_list[i]['word']
  83. })
  84. headers = {
  85. 'Host': 'search.weixin.qq.com',
  86. 'content-type': 'application/json',
  87. 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
  88. 'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
  89. }
  90. response = requests.request("POST", url, headers=headers, data=payload, proxies=proxies)
  91. if response.json()['code'] == -10000:
  92. Common.logger(log_type, crawler).info(f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} response:{response.json()['msg']} 休眠 10 秒,重新获取")
  93. time.sleep(10)
  94. continue
  95. wechat_score_list = []
  96. word_wechat_score_dict = {
  97. "id": i+1,
  98. "word": word_list[i]['word'],
  99. "wechatScores": wechat_score_list,
  100. }
  101. if response.json()['code'] == -10002:
  102. Common.logger(log_type, crawler).info("该词暂未收录")
  103. # 写飞书
  104. # if word_list[i]['word'] in [x for y in Feishu.get_values_batch(log_type, crawler, "YVuVgQ") for x in y]:
  105. # Common.logger(log_type, crawler).info("该词已存在")
  106. # continue
  107. Feishu.insert_columns(log_type, crawler, "YVuVgQ", "ROWS", 1, 2)
  108. time.sleep(0.5)
  109. Feishu.update_values(log_type, crawler, "YVuVgQ", "F2:Z2",
  110. [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
  111. word_list[i]['title'],
  112. word_list[i]['word'],
  113. "",
  114. "该词暂未收录"]])
  115. Common.logger(log_type, crawler).info("写入飞书成功\n")
  116. elif response.json()['code'] != 0:
  117. Common.logger(log_type, crawler).warning(f"{word_wechat_score_dict}")
  118. continue
  119. else:
  120. time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
  121. for x in range(len(time_index)):
  122. Common.logger(log_type, crawler).info(f"正在更新 {word_list[i]['word']}")
  123. score_time = time_index[x]['time']
  124. score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
  125. score = time_index[x]['score']
  126. wechat_score_dict = {"score": score, "scoreDate": score_time_str}
  127. wechat_score_list.append(wechat_score_dict)
  128. Common.logger(log_type, crawler).info(f"wechat_score_dict:{wechat_score_dict}")
  129. Feishu.insert_columns(log_type, crawler, "YVuVgQ", "ROWS", 1, 2)
  130. time.sleep(1)
  131. Feishu.update_values(log_type, crawler, "YVuVgQ", "F2:Z2", [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
  132. word_list[i]['title'],
  133. word_list[i]['word'],
  134. score_time_str,
  135. score]])
  136. Common.logger(log_type, crawler).info("写入飞书成功\n")
  137. break
  138. Feishu.bot(log_type, "weixinzhishu_out", "微信指数_站外指数抓取完毕")
  139. if __name__ == "__main__":
  140. # print(Test.get_words("test", "weixinzhishu"))
  141. Test.get_score_test("out", "weixinzhishu")
  142. pass