import sys from utils.feishu import FeiShuHelper from db_helper import MysqlHelper from config import set_config from log import Log config_, env = set_config() log_ = Log() mysql_helper = MysqlHelper() def add_words2mysql(sheet_name, source): """新增词到mysql数据库中""" sheet_info = config_.SHEET_INFO.get(sheet_name) # 获取词 feishu_helper = FeiShuHelper() data = feishu_helper.get_data(spreadsheet_token=sheet_info.get('spreadsheet_token'), sheet_id=sheet_info.get('sheet_id')) words = [word for item in data for word in item if word is not None and word != ''] words = list(set(words)) log_.info(f"words count = {len(words)}") # 批量插入到mysql for i in range(len(words)//100+1): log_.info(f"i = {i}") words_list = words[i*100:(i+1)*100] if len(words_list) > 0: # 与数据库中的数据去重 select_sql = f"select word from word.hot_word where word in {tuple(words_list)}" data = mysql_helper.get_data(sql=select_sql) exist_words = [item[0] for item in data] insert_words = list(set(words_list).difference(set(exist_words))) # log_.info(f"words_list = {words_list}, \n" # f"exist_words = {exist_words}, \n" # f"insert_words = {insert_words}") log_.info(f"words_list count = {len(words_list)}, " f"exist_words count = {len(exist_words)}, " f"insert_words count = {len(insert_words)}") if len(insert_words) > 0: # 拼接sql,插入数据库 sql_values = ', '.join([f"('{word}', {source})" for word in insert_words]) insert_sql = f"insert into word.hot_word (word, source) values {sql_values};" log_.info(f"insert_sql = {insert_sql}") mysql_helper.add_data(sql=insert_sql) if __name__ == '__main__': # add_words2mysql(sheet_name='微信指数搜索常用词样本-人工标注站内高频关键词', source=1) sheet_name, source = sys.argv[1], sys.argv[2] add_words2mysql(sheet_name, source)