1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- import sys
- from utils.feishu import FeiShuHelper
- from db_helper import MysqlHelper
- from config import set_config
- from log import Log
- config_, env = set_config()
- log_ = Log()
- mysql_helper = MysqlHelper()
- def add_words2mysql(sheet_name, source):
- """新增词到mysql数据库中"""
- sheet_info = config_.SHEET_INFO.get(sheet_name)
- # 获取词
- feishu_helper = FeiShuHelper()
- data = feishu_helper.get_data(spreadsheet_token=sheet_info.get('spreadsheet_token'),
- sheet_id=sheet_info.get('sheet_id'))
- words = [word for item in data for word in item if word is not None and word != '']
- words = list(set(words))
- log_.info(f"words count = {len(words)}")
- # 批量插入到mysql
- for i in range(len(words)//100+1):
- log_.info(f"i = {i}")
- words_list = words[i*100:(i+1)*100]
- if len(words_list) > 0:
- # 与数据库中的数据去重
- select_sql = f"select word from word.hot_word where word in {tuple(words_list)}"
- data = mysql_helper.get_data(sql=select_sql)
- exist_words = [item[0] for item in data]
- insert_words = list(set(words_list).difference(set(exist_words)))
- # log_.info(f"words_list = {words_list}, \n"
- # f"exist_words = {exist_words}, \n"
- # f"insert_words = {insert_words}")
- log_.info(f"words_list count = {len(words_list)}, "
- f"exist_words count = {len(exist_words)}, "
- f"insert_words count = {len(insert_words)}")
- if len(insert_words) > 0:
- # 拼接sql,插入数据库
- sql_values = ', '.join([f"('{word}', {source})" for word in insert_words])
- insert_sql = f"insert into word.hot_word (word, source) values {sql_values};"
- log_.info(f"insert_sql = {insert_sql}")
- mysql_helper.add_data(sql=insert_sql)
- if __name__ == '__main__':
- # add_words2mysql(sheet_name='微信指数搜索常用词样本-人工标注站内高频关键词', source=1)
- sheet_name, source = sys.argv[1], sys.argv[2]
- add_words2mysql(sheet_name, source)
|