|
@@ -0,0 +1,203 @@
|
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
|
+# @Author: wangkun
|
|
|
|
+# @Time: 2023/4/6
|
|
|
|
+import json
|
|
|
|
+import os
|
|
|
|
+import sys
|
|
|
|
+import time
|
|
|
|
+from datetime import date, timedelta
|
|
|
|
+import requests
|
|
|
|
+sys.path.append(os.getcwd())
|
|
|
|
+from common.common import Common
|
|
|
|
+from common.feishu import Feishu
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+class TodayScore:
|
|
|
|
+ # 翻页参数
|
|
|
|
+ pageNum = 1
|
|
|
|
+
|
|
|
|
+ # 获取微信 key / openid
|
|
|
|
+ @classmethod
|
|
|
|
+ def get_wechat_key(cls, log_type, crawler):
|
|
|
|
+ """
|
|
|
|
+ 获取微信 key / openid
|
|
|
|
+ https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
|
|
|
|
+ :param log_type: 日志名
|
|
|
|
+ :param crawler: 哪款爬虫,填写:weixinzhishu
|
|
|
|
+ :return: search_key, openid
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ while True:
|
|
|
|
+ sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
|
|
|
|
+ if sheet is None:
|
|
|
|
+ time.sleep(1)
|
|
|
|
+ continue
|
|
|
|
+ for i in range(len(sheet)):
|
|
|
|
+ search_key = sheet[1][1]
|
|
|
|
+ openid = sheet[1][2]
|
|
|
|
+ return search_key, openid
|
|
|
|
+ except Exception as e:
|
|
|
|
+ Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
|
|
|
|
+
|
|
|
|
+ # 获取热词
|
|
|
|
+ @classmethod
|
|
|
|
+ def get_today_words(cls, log_type, crawler, env):
|
|
|
|
+ try:
|
|
|
|
+ if env == "dev:":
|
|
|
|
+ url = 'http://testhot-words-internal.piaoquantv.com/hot/word/getTodayWords'
|
|
|
|
+ else:
|
|
|
|
+ url = "http://hot-words-internal.piaoquantv.com/hot/word/getTodayWords"
|
|
|
|
+ params = {
|
|
|
|
+ 'pageNum': cls.pageNum, # 第几页,默认1,int
|
|
|
|
+ 'pageSize': 100 # 请求条目数,默认为100,int
|
|
|
|
+ }
|
|
|
|
+ response = requests.post(url=url, json=params)
|
|
|
|
+ cls.pageNum += 1
|
|
|
|
+ if response.status_code != 200:
|
|
|
|
+ Common.logger(log_type, crawler).warning(f"get_today_words:{response.status_code}, {response.text}\n")
|
|
|
|
+ elif response.json()['message'] != "success":
|
|
|
|
+ Common.logger(log_type, crawler).warning(f"get_word_response:{response.status_code}, {response.json()}\n")
|
|
|
|
+ else:
|
|
|
|
+ word_list = response.json()['data']['words']
|
|
|
|
+ return word_list
|
|
|
|
+ except Exception as e:
|
|
|
|
+ Common.logger(log_type, crawler).error(f"get_today_words:{e}\n")
|
|
|
|
+
|
|
|
|
+ # 获取热词分数
|
|
|
|
+ @classmethod
|
|
|
|
+ def get_word_score(cls, log_type, crawler, word_id, word):
|
|
|
|
+ """
|
|
|
|
+ 获取热词分数
|
|
|
|
+ :param log_type: 日志名
|
|
|
|
+ :param crawler: 哪款爬虫,填写:weixinzhishu
|
|
|
|
+ :param word_id: 热词 ID
|
|
|
|
+ :param word: 热词
|
|
|
|
+ :return: 热词 7 天指数,例如:
|
|
|
|
+ {'id': 1,
|
|
|
|
+ 'word': '消息',
|
|
|
|
+ 'wechatScores': [
|
|
|
|
+ {'score': 95521022, 'scoreDate': '2023-02-07'},
|
|
|
|
+ {'score': 97315283, 'scoreDate': '2023-02-08'},
|
|
|
|
+ {'score': 109845849, 'scoreDate': '2023-02-09'},
|
|
|
|
+ {'score': 107089560, 'scoreDate': '2023-02-10'},
|
|
|
|
+ {'score': 102658391, 'scoreDate': '2023-02-11'},
|
|
|
|
+ {'score': 93843701, 'scoreDate': '2023-02-12'},
|
|
|
|
+ {'score': 100211894, 'scoreDate': '2023-02-13'}]}
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ while True:
|
|
|
|
+ wechat_key = cls.get_wechat_key(log_type, crawler)
|
|
|
|
+ start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
|
|
|
|
+ end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
|
|
|
|
+ url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
|
|
|
|
+ payload = json.dumps({
|
|
|
|
+ "openid": wechat_key[-1],
|
|
|
|
+ # "openid": '123',
|
|
|
|
+ "search_key": wechat_key[0],
|
|
|
|
+ "cgi_name": "GetDefaultIndex",
|
|
|
|
+ "start_ymd": start_ymd,
|
|
|
|
+ "end_ymd": end_ymd,
|
|
|
|
+ "query": word
|
|
|
|
+ })
|
|
|
|
+ headers = {
|
|
|
|
+ 'Host': 'search.weixin.qq.com',
|
|
|
|
+ 'content-type': 'application/json',
|
|
|
|
+ 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
|
|
|
|
+ 'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
|
|
|
|
+ }
|
|
|
|
+ response = requests.request("POST", url=url, headers=headers, data=payload)
|
|
|
|
+ wechat_score_list = []
|
|
|
|
+ word_wechat_score_dict = {
|
|
|
|
+ "id": word_id,
|
|
|
|
+ "word": word,
|
|
|
|
+ "wechatScores": wechat_score_list,
|
|
|
|
+ }
|
|
|
|
+ if response.json()['code'] == -10000:
|
|
|
|
+ Common.logger(log_type, crawler).warning(f"response:{response.json()['msg']} 休眠 5 秒,重新获取")
|
|
|
|
+ time.sleep(5)
|
|
|
|
+ continue
|
|
|
|
+ if response.json()['code'] == -10002:
|
|
|
|
+ Common.logger(log_type, crawler).info(f'{word}:该词暂未收录')
|
|
|
|
+ return word_wechat_score_dict
|
|
|
|
+ elif response.json()['code'] != 0:
|
|
|
|
+ Common.logger(log_type, crawler).info(f'response:{response.text}\n')
|
|
|
|
+ return word_wechat_score_dict
|
|
|
|
+ else:
|
|
|
|
+ time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
|
|
|
|
+ for i in range(len(time_index)):
|
|
|
|
+ score_time = time_index[i]['time']
|
|
|
|
+ score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
|
|
|
|
+ score = time_index[i]['score']
|
|
|
|
+ wechat_score_dict = {"score": score, "scoreDate": score_time_str}
|
|
|
|
+ wechat_score_list.append(wechat_score_dict)
|
|
|
|
+ return word_wechat_score_dict
|
|
|
|
+ except Exception as e:
|
|
|
|
+ Common.logger(log_type, crawler).error(f"get_word_score异常:{e}\n")
|
|
|
|
+
|
|
|
|
+ # 更新微信指数
|
|
|
|
+ @classmethod
|
|
|
|
+ def update_today_wechat_score(cls, log_type, crawler, wechat_score_data, env):
|
|
|
|
+ """
|
|
|
|
+ 更新热词微信指数
|
|
|
|
+ :param log_type: 日志名
|
|
|
|
+ :param crawler: 哪款爬虫
|
|
|
|
+ :param wechat_score_data: 热词微信指数
|
|
|
|
+ :param env: 环境
|
|
|
|
+ :return: {"code":200, "message":"success"}
|
|
|
|
+ """
|
|
|
|
+ try:
|
|
|
|
+ if env == "dev":
|
|
|
|
+ url = 'http://testhot-words-internal.piaoquantv.com/hot/word/updateWechatScore'
|
|
|
|
+ else:
|
|
|
|
+ url = 'http://hot-words-internal.piaoquantv.com/hot/word/updateWechatScore'
|
|
|
|
+ params = {'data': wechat_score_data}
|
|
|
|
+ response = requests.post(url=url, json=params)
|
|
|
|
+ if response.status_code != 200:
|
|
|
|
+ Common.logger(log_type, crawler).warning(f"update_wechat_score_response:{response.status_code}, {response.text}\n")
|
|
|
|
+ elif response.json()["message"] != "success":
|
|
|
|
+ Common.logger(log_type, crawler).warning(f"update_wechat_score_response:{response.status_code}, {response.json()}\n")
|
|
|
|
+ else:
|
|
|
|
+ Common.logger(log_type, crawler).info(f"更新热词微信指数:{response.json()['message']}\n")
|
|
|
|
+ except Exception as e:
|
|
|
|
+ Common.logger(log_type, crawler).error(f"update_today_wechat_score:{e}\n")
|
|
|
|
+
|
|
|
|
+ # 获取微信指数
|
|
|
|
+ @classmethod
|
|
|
|
+ def get_today_wechat_score(cls, log_type, crawler, env):
|
|
|
|
+ """
|
|
|
|
+ 获取微信指数
|
|
|
|
+ :param log_type: 日志名
|
|
|
|
+ :param crawler: 哪款爬虫
|
|
|
|
+ :param env: 环境
|
|
|
|
+ :return: 热词指数列表
|
|
|
|
+ """
|
|
|
|
+ score_num = 0
|
|
|
|
+ while True:
|
|
|
|
+ word_list = cls.get_today_words(log_type, crawler, env)
|
|
|
|
+ if len(word_list) == 0:
|
|
|
|
+ Common.logger(log_type, crawler).info(f"今日热词更新完毕")
|
|
|
|
+ cls.pageNum = 1
|
|
|
|
+ Common.logger(log_type, crawler).info(f"score_num: {score_num}")
|
|
|
|
+ return
|
|
|
|
+ else:
|
|
|
|
+ wechat_score_data = []
|
|
|
|
+ Common.logger(log_type, crawler).info(f"len(word_list):{len(word_list)}")
|
|
|
|
+ for i in range(len(word_list)):
|
|
|
|
+ word_id = word_list[i]['id']
|
|
|
|
+ word = word_list[i]['word']
|
|
|
|
+ Common.logger(log_type, crawler).info(f"word_id:{word_id}")
|
|
|
|
+ Common.logger(log_type, crawler).info(f"word:{word}")
|
|
|
|
+ word_score_dict = cls.get_word_score(log_type, crawler, word_id, word)
|
|
|
|
+ Common.logger(log_type, crawler).info(f"word_score_dict:{word_score_dict}\n")
|
|
|
|
+ wechat_score_data.append(word_score_dict)
|
|
|
|
+ if word_score_dict['wechatScores'] is not None:
|
|
|
|
+ score_num += len(word_score_dict['wechatScores'])
|
|
|
|
+ Common.logger(log_type, crawler).info(f"wechat_score_data:{wechat_score_data}\n")
|
|
|
|
+ cls.update_today_wechat_score(log_type, crawler, wechat_score_data, env)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
+ print(TodayScore.get_today_words("today-score", "weixinzhishu", "dev"))
|
|
|
|
+ # print(TodayScore.get_word_score("today-score", "weixinzhishu", 1, "加班"))
|
|
|
|
+
|
|
|
|
+ pass
|