|
@@ -1,8 +1,6 @@
|
|
#coding utf-8
|
|
#coding utf-8
|
|
-from tqdm import tqdm
|
|
|
|
import sys
|
|
import sys
|
|
-import json
|
|
|
|
-
|
|
|
|
|
|
+import datetime
|
|
import traceback
|
|
import traceback
|
|
from threading import Timer
|
|
from threading import Timer
|
|
from tqdm import tqdm
|
|
from tqdm import tqdm
|
|
@@ -17,10 +15,8 @@ from feature import get_item_features
|
|
from lr_model import LrModel
|
|
from lr_model import LrModel
|
|
from utils import exe_sql
|
|
from utils import exe_sql
|
|
|
|
|
|
-
|
|
|
|
-if __name__ == "__main__":
|
|
|
|
|
|
+def update_offline_score_item(dt):
|
|
project = 'loghubods'
|
|
project = 'loghubods'
|
|
- datetime = sys.argv[1]
|
|
|
|
sql = """
|
|
sql = """
|
|
--odps sql
|
|
--odps sql
|
|
--********************************************************************--
|
|
--********************************************************************--
|
|
@@ -127,7 +123,7 @@ mid AS u_id
|
|
,round(if(i_ros_3month > 10.0, 10.0, i_ros_3month) / 10.0, 6) as i_ros_3month
|
|
,round(if(i_ros_3month > 10.0, 10.0, i_ros_3month) / 10.0, 6) as i_ros_3month
|
|
from
|
|
from
|
|
loghubods.user_video_features_data_final
|
|
loghubods.user_video_features_data_final
|
|
-where dt='{datetime}'
|
|
|
|
|
|
+where dt='{dt}'
|
|
and ad_ornot = '0'
|
|
and ad_ornot = '0'
|
|
and apptype != '13'
|
|
and apptype != '13'
|
|
), candidate_user as (
|
|
), candidate_user as (
|
|
@@ -223,7 +219,7 @@ and apptype != '13'
|
|
SELECT
|
|
SELECT
|
|
*
|
|
*
|
|
from candidate_item
|
|
from candidate_item
|
|
- """.format(datetime=datetime)
|
|
|
|
|
|
+ """.format(dt=dt)
|
|
# print(sql)
|
|
# print(sql)
|
|
data = exe_sql(project, sql)
|
|
data = exe_sql(project, sql)
|
|
print('sql done')
|
|
print('sql done')
|
|
@@ -232,8 +228,6 @@ from candidate_item
|
|
model_key = 'ad_out_v1'
|
|
model_key = 'ad_out_v1'
|
|
lr_model = LrModel('model/{}.json'.format(model_key))
|
|
lr_model = LrModel('model/{}.json'.format(model_key))
|
|
item_h_dict = {}
|
|
item_h_dict = {}
|
|
- k_col = 'i_id'
|
|
|
|
- dt = datetime
|
|
|
|
key_name_prefix = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM}{model_key}"
|
|
key_name_prefix = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM}{model_key}"
|
|
print(key_name_prefix)
|
|
print(key_name_prefix)
|
|
mean_item_h = 0.0
|
|
mean_item_h = 0.0
|
|
@@ -251,8 +245,8 @@ from candidate_item
|
|
count_item_h += 1
|
|
count_item_h += 1
|
|
# print(item_features)
|
|
# print(item_features)
|
|
# print(item_h)
|
|
# print(item_h)
|
|
- mean_item_h = mean_item_h / count_item_h
|
|
|
|
- item_h_dict['mean'] = mean_item_h
|
|
|
|
|
|
+ mean_item_h = mean_item_h / count_item_h
|
|
|
|
+ item_h_dict['mean'] = mean_item_h
|
|
print(mean_item_h)
|
|
print(mean_item_h)
|
|
print(count_item_h)
|
|
print(count_item_h)
|
|
k = 'mean'
|
|
k = 'mean'
|
|
@@ -260,3 +254,47 @@ from candidate_item
|
|
# with open('{}.json'.format(key_name_prefix), 'w') as fout:
|
|
# with open('{}.json'.format(key_name_prefix), 'w') as fout:
|
|
# json.dump(item_h_dict, fout, indent=2, ensure_ascii=False, sort_keys=True)
|
|
# json.dump(item_h_dict, fout, indent=2, ensure_ascii=False, sort_keys=True)
|
|
|
|
|
|
|
|
+
|
|
|
|
+def timer_check():
|
|
|
|
+ try:
|
|
|
|
+ project = config_.ad_model_data['ad_out_v1'].get('project')
|
|
|
|
+ table = config_.ad_model_data['ad_out_v1'].get('table')
|
|
|
|
+ now_date = datetime.datetime.today()
|
|
|
|
+ yesterday_date = now_date - datetime.timedelta(days=1)
|
|
|
|
+ now_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
|
|
+ yesterday_dt = datetime.datetime.strftime(yesterday_date, '%Y%m%d')
|
|
|
|
+ log_.info(f"now_dt: {now_dt}")
|
|
|
|
+ log_.info(f"yesterday_dt: {yesterday_dt}")
|
|
|
|
+ now_min = datetime.datetime.now().minute
|
|
|
|
+ # 查看当前更新的数据是否已准备好
|
|
|
|
+ data_count = data_check(project=project, table=table, dt=yesterday_dt)
|
|
|
|
+ if data_count > 0:
|
|
|
|
+ log_.info('update_offline_score_item start! {}'.format(data_count))
|
|
|
|
+ # 数据准备好,进行更新
|
|
|
|
+ update_offline_score_item(dt=yesterday_dt)
|
|
|
|
+ log_.info('update_offline_score_item end!')
|
|
|
|
+ else:
|
|
|
|
+ # 数据没准备好,5分钟后重新检查
|
|
|
|
+ Timer(5 * 60, timer_check).start()
|
|
|
|
+
|
|
|
|
+ except Exception as e:
|
|
|
|
+ log_.error(f"用户广告跳出率预估离线item数据更新失败 exception: {e}, traceback: {traceback.format_exc()}")
|
|
|
|
+ send_msg_to_feishu(
|
|
|
|
+ webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
|
|
|
|
+ key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
|
|
|
|
+ msg_text=f"rov-offline{config_.ENV_TEXT} - 用户广告跳出率预估离线item数据更新失败\n"
|
|
|
|
+ f"exception: {e}\n"
|
|
|
|
+ f"traceback: {traceback.format_exc()}"
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
+ dt = sys.argv[1]
|
|
|
|
+ if len(sys.argv) > 2:
|
|
|
|
+ log_.info('## 手动更新:{}'.format(dt))
|
|
|
|
+ update_offline_score_item(dt)
|
|
|
|
+ else:
|
|
|
|
+ log_.info('## 自动更新')
|
|
|
|
+ timer_check()
|
|
|
|
+
|
|
|
|
+
|