|
@@ -0,0 +1,107 @@
|
|
|
+import traceback
|
|
|
+
|
|
|
+from db_helper import MysqlHelper
|
|
|
+from log import Log
|
|
|
+
|
|
|
+mysql_helper = MysqlHelper()
|
|
|
+log_ = Log()
|
|
|
+
|
|
|
+
|
|
|
+def get_words(page_num, page_size):
|
|
|
+ """
|
|
|
+ 分页获取所有热点词
|
|
|
+ :param page_num: 页码
|
|
|
+ :param page_size: 每页请求条目数
|
|
|
+ :return: words
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ sql = f"select id, word from word.hot_word order by id limit {(page_num-1)*page_size}, {page_size};"
|
|
|
+ data = mysql_helper.get_data(sql=sql)
|
|
|
+ if data is None:
|
|
|
+ return None
|
|
|
+ words = []
|
|
|
+ for id_, word in data:
|
|
|
+ words.append({'id': id_, 'word': word})
|
|
|
+ return words
|
|
|
+ except Exception as e:
|
|
|
+ log_.error(traceback.format_exc())
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+def update_wechat_score_data(data):
|
|
|
+ """
|
|
|
+ 根据爬取到的微信指数数据更新数据库
|
|
|
+ :param data:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ if data is None or len(data) == 0:
|
|
|
+ log_.info(f"无需要更新的数据!")
|
|
|
+ return
|
|
|
+ # 爬取数据解析
|
|
|
+ log_.info(f"data count = {len(data)}")
|
|
|
+ wechat_score_data_list = []
|
|
|
+ for item in data:
|
|
|
+ word_id = item.get('id')
|
|
|
+ word = item.get('word')
|
|
|
+ wechat_scores = item.get('wechatScores')
|
|
|
+ if wechat_scores is None or len(wechat_scores) == 0:
|
|
|
+ continue
|
|
|
+ for score_data in wechat_scores:
|
|
|
+ score = score_data.get('score')
|
|
|
+ score_date = score_data.get('scoreDate')
|
|
|
+ wechat_score_data_list.append({'word_id': word_id, 'word': word, 'score': score, 'score_date': score_date})
|
|
|
+ log_.info(f"wechat_score_data_list count = {len(wechat_score_data_list)}")
|
|
|
+ # update or insert 数据区分
|
|
|
+ update_data = []
|
|
|
+ insert_data = []
|
|
|
+ for wechat_score_data in wechat_score_data_list:
|
|
|
+ select_sql = f"SELECT id FROM word.word_wechat_score " \
|
|
|
+ f"WHERE word_id = {wechat_score_data['word_id']} " \
|
|
|
+ f"AND score_date = '{wechat_score_data['score_date']}';"
|
|
|
+ res = mysql_helper.get_data(sql=select_sql)
|
|
|
+ if res is None:
|
|
|
+ continue
|
|
|
+ if len(res) == 0:
|
|
|
+ insert_data.append(wechat_score_data)
|
|
|
+ else:
|
|
|
+ id_ = res[0][0]
|
|
|
+ wechat_score_data['id'] = id_
|
|
|
+ update_data.append(wechat_score_data)
|
|
|
+ log_.info(f"update_data = {len(update_data)}")
|
|
|
+ log_.info(f"insert_data = {len(insert_data)}")
|
|
|
+
|
|
|
+ # 批量插入
|
|
|
+ if len(insert_data) > 0:
|
|
|
+ for i in range(len(insert_data) // 100 + 1):
|
|
|
+ log_.info(f"insert i = {i}")
|
|
|
+ insert_temp_data = insert_data[i * 100:(i + 1) * 100]
|
|
|
+ if len(insert_temp_data) > 0:
|
|
|
+ insert_sql_values = ', '.join([f"({item['word_id']}, {item['score']}, '{item['score_date']}')"
|
|
|
+ for item in insert_temp_data])
|
|
|
+ insert_sql = f"insert into word.word_wechat_score (word_id, wechat_score, score_date) " \
|
|
|
+ f"values {insert_sql_values};"
|
|
|
+ mysql_helper.add_data(sql=insert_sql)
|
|
|
+ log_.info(f"insert wechat score data finished! insert count = {len(insert_data)}")
|
|
|
+
|
|
|
+ # 批量更新
|
|
|
+ if len(update_data) > 0:
|
|
|
+ for i in range(len(update_data) // 100 + 1):
|
|
|
+ log_.info(f"update i = {i}")
|
|
|
+ update_temp_data = update_data[i * 100:(i + 1) * 100]
|
|
|
+ if len(update_temp_data) > 0:
|
|
|
+ update_id = [item['id'] for item in update_temp_data]
|
|
|
+ update_sql_values = ' '.join([f"when {item['id']} then {item['score']}" for item in update_temp_data])
|
|
|
+ if len(update_id) > 1:
|
|
|
+ update_sql = f"update word.word_wechat_score set wechat_score = " \
|
|
|
+ f"case id {update_sql_values} end where id in {tuple(update_id)};"
|
|
|
+ else:
|
|
|
+ update_sql = f"update word.word_wechat_score set wechat_score = " \
|
|
|
+ f"case id {update_sql_values} end where id in ({update_id[0]});"
|
|
|
+ mysql_helper.add_data(sql=update_sql)
|
|
|
+ log_.info(f"update wechat score data finished! update count = {len(update_data)}")
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ get_words(8, 100)
|
|
|
+ # get_words(1, 20)
|
|
|
+ # get_words(2, 10)
|