update_common_words.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. from feishu import FeiShuHelper
  2. from db_helper import MysqlHelper
  3. from config import set_config
  4. from log import Log
  5. config_, env = set_config()
  6. log_ = Log()
  7. mysql_helper = MysqlHelper()
  8. def add_words2mysql(sheet_name, source):
  9. """新增词到mysql数据库中"""
  10. sheet_info = config_.SHEET_INFO.get(sheet_name)
  11. # 获取词
  12. feishu_helper = FeiShuHelper()
  13. data = feishu_helper.get_data(spreadsheet_token=sheet_info.get('spreadsheet_token'),
  14. sheet_id=sheet_info.get('sheet_id'))
  15. words = [word for item in data for word in item if word is not None and word != '']
  16. words = list(set(words))
  17. log_.info(f"words count = {len(words)}")
  18. # 批量插入到mysql
  19. for i in range(len(words)//100+1):
  20. log_.info(f"i = {i}")
  21. words_list = words[i*100:(i+1)*100]
  22. if len(words_list) > 0:
  23. # 与数据库中的数据去重
  24. select_sql = f"select word from word.hot_word where word in {tuple(words_list)}"
  25. data = mysql_helper.get_data(sql=select_sql)
  26. exist_words = [item[0] for item in data]
  27. insert_words = list(set(words_list).difference(set(exist_words)))
  28. log_.info(f"words_list count = {len(words_list)}, "
  29. f"exist_words count = {len(exist_words)}, "
  30. f"insert_words count = {len(insert_words)}")
  31. if len(insert_words) > 0:
  32. # 拼接sql,插入数据库
  33. sql_values = ', '.join([f"('{word}', {source})" for word in insert_words])
  34. insert_sql = f"insert into word.hot_word (word, source) values {sql_values};"
  35. log_.info(f"insert_sql = {insert_sql}")
  36. mysql_helper.add_data(sql=insert_sql)
  37. if __name__ == '__main__':
  38. add_words2mysql(sheet_name='微信指数搜索常用词样本-人工标注站内高频关键词', source=1)