|
@@ -0,0 +1,159 @@
|
|
|
+#coding utf-8
|
|
|
+import sys
|
|
|
+import datetime
|
|
|
+import traceback
|
|
|
+from threading import Timer
|
|
|
+from tqdm import tqdm
|
|
|
+from utils import RedisHelper, data_check, get_feature_data, send_msg_to_feishu
|
|
|
+from config import set_config
|
|
|
+from log import Log
|
|
|
+from records_process import records_process
|
|
|
+
|
|
|
+config_, _ = set_config()
|
|
|
+log_ = Log()
|
|
|
+redis_helper = RedisHelper()
|
|
|
+
|
|
|
+from feature import get_item_features as get_features
|
|
|
+from lr_model import LrModel
|
|
|
+from utils import exe_sql
|
|
|
+
|
|
|
+model_key = 'ad_out_v1'
|
|
|
+lr_model = LrModel('model/{}.json'.format(model_key))
|
|
|
+item_h_dict = {}
|
|
|
+key_name_prefix = f"{config_.KEY_NAME_PREFIX_AD_OUT_MODEL_SCORE_ITEM}{model_key}"
|
|
|
+print(key_name_prefix)
|
|
|
+# 过期时间:一周
|
|
|
+expire_time = 7 * 24 * 3600
|
|
|
+
|
|
|
+def process_and_store(row):
|
|
|
+ k = str(row['k'])
|
|
|
+ features = get_features(row)
|
|
|
+ h = lr_model.predict_h(features)
|
|
|
+ redis_helper.set_data_to_redis(f"{key_name_prefix}:{k}", round(h, 6), expire_time)
|
|
|
+
|
|
|
+def update_offline_score_item(dt):
|
|
|
+ project = 'loghubods'
|
|
|
+ sql = """
|
|
|
+--odps sql
|
|
|
+--********************************************************************--
|
|
|
+--author:研发
|
|
|
+--create time:2023-12-11 23:54:20
|
|
|
+--********************************************************************--
|
|
|
+with candidate_item as (
|
|
|
+select
|
|
|
+-- 基础特征_视频
|
|
|
+videoid AS i_id
|
|
|
+,uid AS i_up_id
|
|
|
+-- ,tags as i_tag
|
|
|
+-- ,title as i_title
|
|
|
+,ceil(log2(length(title) + 1)) as i_title_len
|
|
|
+,ceil(log2(total_time + 1)) as i_play_len
|
|
|
+,ceil(log2(existence_days + 1)) as i_days_since_upload -- 发布时间(距离现在天数)
|
|
|
+-- 基础特征_场景
|
|
|
+-- ,apptype AS ctx_apptype
|
|
|
+-- ,ctx_day AS ctx_day
|
|
|
+-- ,ctx_week AS ctx_week
|
|
|
+-- ,ctx_hour AS ctx_hour
|
|
|
+-- ,ctx_region as ctx_region
|
|
|
+-- ,ctx_city as ctx_city
|
|
|
+-- 基础特征_交叉
|
|
|
+-- ,ui_is_out as ui_is_out
|
|
|
+-- ,i_play_len as playtime
|
|
|
+-- ,IF(i_play_len > 1,'0','1') AS ui_is_out_new
|
|
|
+-- ,rootmid AS ui_root_id
|
|
|
+-- ,shareid AS ui_share_id
|
|
|
+-- 统计特征_视频
|
|
|
+,ceil(log2(i_1day_exp_cnt + 1)) as i_1day_exp_cnt
|
|
|
+,ceil(log2(i_1day_click_cnt + 1)) as i_1day_click_cnt
|
|
|
+,ceil(log2(i_1day_share_cnt + 1)) as i_1day_share_cnt
|
|
|
+,ceil(log2(i_1day_return_cnt + 1)) as i_1day_return_cnt
|
|
|
+,ceil(log2(i_3day_exp_cnt + 1)) as i_3day_exp_cnt
|
|
|
+,ceil(log2(i_3day_click_cnt + 1)) as i_3day_click_cnt
|
|
|
+,ceil(log2(i_3day_share_cnt + 1)) as i_3day_share_cnt
|
|
|
+,ceil(log2(i_3day_return_cnt + 1)) as i_3day_return_cnt
|
|
|
+,ceil(log2(i_7day_exp_cnt + 1)) as i_7day_exp_cnt
|
|
|
+,ceil(log2(i_7day_click_cnt + 1)) as i_7day_click_cnt
|
|
|
+,ceil(log2(i_7day_share_cnt + 1)) as i_7day_share_cnt
|
|
|
+,ceil(log2(i_7day_return_cnt + 1)) as i_7day_return_cnt
|
|
|
+,ceil(log2(i_3month_exp_cnt + 1)) as i_3month_exp_cnt
|
|
|
+,ceil(log2(i_3month_click_cnt + 1)) as i_3month_click_cnt
|
|
|
+,ceil(log2(i_3month_share_cnt + 1)) as i_3month_share_cnt
|
|
|
+,ceil(log2(i_3month_return_cnt + 1)) as i_3month_return_cnt
|
|
|
+,round(if(i_ctr_1day > 10.0, 10.0, i_ctr_1day) / 10.0, 6) as i_ctr_1day
|
|
|
+,round(if(i_str_1day > 10.0, 10.0, i_str_1day) / 10.0, 6) as i_str_1day
|
|
|
+,round(if(i_rov_1day > 10.0, 10.0, i_rov_1day) / 10.0, 6) as i_rov_1day
|
|
|
+,round(if(i_ros_1day > 10.0, 10.0, i_ros_1day) / 10.0, 6) as i_ros_1day
|
|
|
+,round(if(i_ctr_3day > 10.0, 10.0, i_ctr_3day) / 10.0, 6) as i_ctr_3day
|
|
|
+,round(if(i_str_3day > 10.0, 10.0, i_str_3day) / 10.0, 6) as i_str_3day
|
|
|
+,round(if(i_rov_3day > 10.0, 10.0, i_rov_3day) / 10.0, 6) as i_rov_3day
|
|
|
+,round(if(i_ros_3day > 10.0, 10.0, i_ros_3day) / 10.0, 6) as i_ros_3day
|
|
|
+,round(if(i_ctr_7day > 10.0, 10.0, i_ctr_7day) / 10.0, 6) as i_ctr_7day
|
|
|
+,round(if(i_str_7day > 10.0, 10.0, i_str_7day) / 10.0, 6) as i_str_7day
|
|
|
+,round(if(i_rov_7day > 10.0, 10.0, i_rov_7day) / 10.0, 6) as i_rov_7day
|
|
|
+,round(if(i_ros_7day > 10.0, 10.0, i_ros_7day) / 10.0, 6) as i_ros_7day
|
|
|
+,round(if(i_ctr_3month > 10.0, 10.0, i_ctr_3month) / 10.0, 6) as i_ctr_3month
|
|
|
+,round(if(i_str_3month > 10.0, 10.0, i_str_3month) / 10.0, 6) as i_str_3month
|
|
|
+,round(if(i_rov_3month > 10.0, 10.0, i_rov_3month) / 10.0, 6) as i_rov_3month
|
|
|
+,round(if(i_ros_3month > 10.0, 10.0, i_ros_3month) / 10.0, 6) as i_ros_3month
|
|
|
+from
|
|
|
+loghubods.alg_recsys_video_info
|
|
|
+where dt='{dt}'
|
|
|
+and length(videoid) > 0
|
|
|
+)
|
|
|
+SELECT
|
|
|
+i_id as k,
|
|
|
+*
|
|
|
+from candidate_item
|
|
|
+ """.format(dt=dt)
|
|
|
+ # log_.info(sql)
|
|
|
+ records = exe_sql(project, sql)
|
|
|
+ log_.info('sql_done')
|
|
|
+ records_process(records, process_and_store, max_size=50, num_workers=10)
|
|
|
+
|
|
|
+def timer_check(dt):
|
|
|
+ try:
|
|
|
+ project = config_.ad_model_data['ad_out_v1_item'].get('project')
|
|
|
+ table = config_.ad_model_data['ad_out_v1_item'].get('table')
|
|
|
+ now_date = datetime.datetime.today()
|
|
|
+ yesterday_date = now_date - datetime.timedelta(days=1)
|
|
|
+ now_dt = datetime.datetime.strftime(now_date, '%Y%m%d')
|
|
|
+ yesterday_dt = datetime.datetime.strftime(yesterday_date, '%Y%m%d')
|
|
|
+ log_.info(f"now_dt: {now_dt}")
|
|
|
+ if dt is not None:
|
|
|
+ yesterday_dt = dt
|
|
|
+ log_.info(f"update_dt: {yesterday_dt}")
|
|
|
+ now_min = datetime.datetime.now().minute
|
|
|
+ # 查看当前更新的数据是否已准备好
|
|
|
+ data_count = data_check(project=project, table=table, dt=yesterday_dt)
|
|
|
+ if data_count > 0:
|
|
|
+ log_.info('update_offline_score_item start! {}'.format(data_count))
|
|
|
+ # 数据准备好,进行更新
|
|
|
+ update_offline_score_item(dt=yesterday_dt)
|
|
|
+ log_.info('update_offline_score_item end!')
|
|
|
+ else:
|
|
|
+ # 数据没准备好,5分钟后重新检查
|
|
|
+ wait_seconds = 5 * 60
|
|
|
+ log_.info('data not ready, wait {}s'.format(wait_seconds))
|
|
|
+ Timer(wait_seconds, timer_check, args=(dt,)).start()
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ log_.error(f"用户广告跳出率预估离线item数据更新失败 exception: {e}, traceback: {traceback.format_exc()}")
|
|
|
+ send_msg_to_feishu(
|
|
|
+ webhook=config_.FEISHU_ROBOT['server_robot'].get('webhook'),
|
|
|
+ key_word=config_.FEISHU_ROBOT['server_robot'].get('key_word'),
|
|
|
+ msg_text=f"rov-offline{config_.ENV_TEXT} - 用户广告跳出率预估离线item数据更新失败\n"
|
|
|
+ f"exception: {e}\n"
|
|
|
+ f"traceback: {traceback.format_exc()}"
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ dt = None
|
|
|
+ if len(sys.argv) > 1:
|
|
|
+ dt = sys.argv[1]
|
|
|
+ log_.info('## 手动更新:{}'.format(dt))
|
|
|
+ else:
|
|
|
+ log_.info('## 自动更新')
|
|
|
+ timer_check(dt)
|
|
|
+
|
|
|
+
|