|
@@ -8,7 +8,7 @@ from log import Log
|
|
|
from utils.utils import get_data_from_odps
|
|
|
from words_func import word_cut
|
|
|
from db_helper import MysqlHelper
|
|
|
-config_ = set_config()
|
|
|
+config_, env = set_config()
|
|
|
log_ = Log()
|
|
|
mysql_helper = MysqlHelper()
|
|
|
features = ['title', 'source']
|
|
@@ -55,7 +55,7 @@ def data_check(project, table, now_date):
|
|
|
)
|
|
|
|
|
|
try:
|
|
|
- dt = datetime.datetime.strftime(now_date, '%Y%m%d%H')
|
|
|
+ dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
|
check_res = check_table_partition_exits(date=dt, project=project, table=table)
|
|
|
if check_res:
|
|
|
sql = f'select * from {project}.{table} where dt = {dt}'
|
|
@@ -97,12 +97,14 @@ def update_cut_words_result(text, source, words_list):
|
|
|
res = mysql_helper.get_data(sql=select_sql)
|
|
|
if res is None or len(res) == 0:
|
|
|
# 不存在,插入
|
|
|
- insert_sql = f"insert into word.cut_words_result (text, words, source) values '{text}', '{words}', {source};"
|
|
|
+ insert_sql = f"insert into word.cut_words_result (text, words, source) values ('{text}', '{words}', {source});"
|
|
|
+ log_.info(f"insert_sql = {insert_sql}")
|
|
|
mysql_helper.add_data(sql=insert_sql)
|
|
|
else:
|
|
|
# 存在,更新
|
|
|
update_sql = f"""update word.cut_words_result set words = '{words}', source = {source},
|
|
|
- update_time = {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} where id = {res[0][0]};"""
|
|
|
+ update_time = '{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}' where id = {res[0][0]};"""
|
|
|
+ log_.info(f"update_sql = {update_sql}")
|
|
|
mysql_helper.add_data(sql=update_sql)
|
|
|
|
|
|
|
|
@@ -121,7 +123,7 @@ def update_hot_word(words_list, source):
|
|
|
res = mysql_helper.get_data(sql=select_sql)
|
|
|
if res is None or len(res) == 0:
|
|
|
# 不存在,插入
|
|
|
- insert_sql = f"insert into word.hot_word (word, source) values '{word}', {source};"
|
|
|
+ insert_sql = f"insert into word.hot_word (word, source) values ('{word}', {source});"
|
|
|
mysql_helper.add_data(sql=insert_sql)
|
|
|
else:
|
|
|
# 存在,更新
|
|
@@ -129,7 +131,7 @@ def update_hot_word(words_list, source):
|
|
|
source = 3
|
|
|
update_sql = \
|
|
|
f"""update word.hot_word set source = {source},
|
|
|
- update_time = {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")} where id = {res[0][0]};"""
|
|
|
+ update_time = '{datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}' where id = {res[0][0]};"""
|
|
|
mysql_helper.add_data(sql=update_sql)
|
|
|
|
|
|
|
|
@@ -137,15 +139,18 @@ def data_update(project, table, now_date):
|
|
|
"""数据更新"""
|
|
|
# 获取站内外视频标题数据
|
|
|
df = get_title_data(project=project, table=table, now_date=now_date)
|
|
|
- df = df['tag'].astype(int)
|
|
|
+ df['source'] = df['source'].astype(int)
|
|
|
for source in [1, 2]:
|
|
|
df_temp = df[df['source'] == source]
|
|
|
title_list = df_temp['title'].to_list()
|
|
|
+ log_.info(f"source = {source}, count = {len(title_list)}")
|
|
|
for title in title_list:
|
|
|
+ log_.info(f"title: {title}")
|
|
|
if len(title) == 0:
|
|
|
return
|
|
|
# 1. 分词
|
|
|
words_list = word_cut(text=title)
|
|
|
+ log_.info(f"words_list: {words_list}")
|
|
|
# 2. 分词结果入库
|
|
|
update_cut_words_result(text=title, source=source, words_list=words_list)
|
|
|
# 3. 词入库
|