|
@@ -19,41 +19,21 @@ from utils import exe_sql
|
|
|
def update_offline_score_user(dt):
|
|
|
project = 'loghubods'
|
|
|
sql = """
|
|
|
---odps sql
|
|
|
+--odps sql
|
|
|
--********************************************************************--
|
|
|
--author:研发
|
|
|
---create time:2023-12-01 15:48:17
|
|
|
+--create time:2023-12-11 23:54:20
|
|
|
--********************************************************************--
|
|
|
-with candidate as (
|
|
|
+with candidate_user as (
|
|
|
select
|
|
|
-- 基础特征_用户
|
|
|
-mid AS u_id
|
|
|
+mids AS u_id
|
|
|
,machineinfo_brand AS u_brand
|
|
|
,machineinfo_model AS u_device
|
|
|
,SPLIT(machineinfo_system,' ')[0] AS u_system
|
|
|
,machineinfo_system AS u_system_ver
|
|
|
--- 基础特征_视频
|
|
|
-,videoid AS i_id
|
|
|
-,i_up_id AS i_up_id
|
|
|
-,tags as i_tag
|
|
|
-,title as i_title
|
|
|
-,ceil(log2(i_title_len + 1)) as i_title_len
|
|
|
-,ceil(log2(total_time + 1)) as i_play_len
|
|
|
-,ceil(log2(i_days_since_upload + 1)) as i_days_since_upload -- 发布时间(距离现在天数)
|
|
|
--- 基础特征_场景
|
|
|
-,apptype AS ctx_apptype
|
|
|
-,ctx_day AS ctx_day
|
|
|
-,ctx_week AS ctx_week
|
|
|
-,ctx_hour AS ctx_hour
|
|
|
-,ctx_region as ctx_region
|
|
|
-,ctx_city as ctx_city
|
|
|
--- 基础特征_交叉
|
|
|
-,ui_is_out as ui_is_out
|
|
|
-,i_play_len as playtime
|
|
|
--- ,IF(i_play_len > 1,'0','1') AS ui_is_out_new
|
|
|
-,rootmid AS ui_root_id
|
|
|
-,shareid AS ui_share_id
|
|
|
--- 统计特征_用户
|
|
|
+,province as ctx_region
|
|
|
+,city as ctx_city
|
|
|
,u_cycle_bucket_7days
|
|
|
,u_cycle_bucket_30days
|
|
|
,u_share_bucket_30days
|
|
@@ -89,135 +69,14 @@ mid AS u_id
|
|
|
,round(if(u_str_3month > 10.0, 10.0, u_str_3month) / 10.0, 6) as u_str_3month
|
|
|
,round(if(u_rov_3month > 10.0, 10.0, u_rov_3month) / 10.0, 6) as u_rov_3month
|
|
|
,round(if(u_ros_3month > 10.0, 10.0, u_ros_3month) / 10.0, 6) as u_ros_3month
|
|
|
--- 统计特征_视频
|
|
|
-,ceil(log2(i_1day_exp_cnt + 1)) as i_1day_exp_cnt
|
|
|
-,ceil(log2(i_1day_click_cnt + 1)) as i_1day_click_cnt
|
|
|
-,ceil(log2(i_1day_share_cnt + 1)) as i_1day_share_cnt
|
|
|
-,ceil(log2(i_1day_return_cnt + 1)) as i_1day_return_cnt
|
|
|
-,ceil(log2(i_3day_exp_cnt + 1)) as i_3day_exp_cnt
|
|
|
-,ceil(log2(i_3day_click_cnt + 1)) as i_3day_click_cnt
|
|
|
-,ceil(log2(i_3day_share_cnt + 1)) as i_3day_share_cnt
|
|
|
-,ceil(log2(i_3day_return_cnt + 1)) as i_3day_return_cnt
|
|
|
-,ceil(log2(i_7day_exp_cnt + 1)) as i_7day_exp_cnt
|
|
|
-,ceil(log2(i_7day_click_cnt + 1)) as i_7day_click_cnt
|
|
|
-,ceil(log2(i_7day_share_cnt + 1)) as i_7day_share_cnt
|
|
|
-,ceil(log2(i_7day_return_cnt + 1)) as i_7day_return_cnt
|
|
|
-,ceil(log2(i_3month_exp_cnt + 1)) as i_3month_exp_cnt
|
|
|
-,ceil(log2(i_3month_click_cnt + 1)) as i_3month_click_cnt
|
|
|
-,ceil(log2(i_3month_share_cnt + 1)) as i_3month_share_cnt
|
|
|
-,ceil(log2(i_3month_return_cnt + 1)) as i_3month_return_cnt
|
|
|
-,round(if(i_ctr_1day > 10.0, 10.0, i_ctr_1day) / 10.0, 6) as i_ctr_1day
|
|
|
-,round(if(i_str_1day > 10.0, 10.0, i_str_1day) / 10.0, 6) as i_str_1day
|
|
|
-,round(if(i_rov_1day > 10.0, 10.0, i_rov_1day) / 10.0, 6) as i_rov_1day
|
|
|
-,round(if(i_ros_1day > 10.0, 10.0, i_ros_1day) / 10.0, 6) as i_ros_1day
|
|
|
-,round(if(i_ctr_3day > 10.0, 10.0, i_ctr_3day) / 10.0, 6) as i_ctr_3day
|
|
|
-,round(if(i_str_3day > 10.0, 10.0, i_str_3day) / 10.0, 6) as i_str_3day
|
|
|
-,round(if(i_rov_3day > 10.0, 10.0, i_rov_3day) / 10.0, 6) as i_rov_3day
|
|
|
-,round(if(i_ros_3day > 10.0, 10.0, i_ros_3day) / 10.0, 6) as i_ros_3day
|
|
|
-,round(if(i_ctr_7day > 10.0, 10.0, i_ctr_7day) / 10.0, 6) as i_ctr_7day
|
|
|
-,round(if(i_str_7day > 10.0, 10.0, i_str_7day) / 10.0, 6) as i_str_7day
|
|
|
-,round(if(i_rov_7day > 10.0, 10.0, i_rov_7day) / 10.0, 6) as i_rov_7day
|
|
|
-,round(if(i_ros_7day > 10.0, 10.0, i_ros_7day) / 10.0, 6) as i_ros_7day
|
|
|
-,round(if(i_ctr_3month > 10.0, 10.0, i_ctr_3month) / 10.0, 6) as i_ctr_3month
|
|
|
-,round(if(i_str_3month > 10.0, 10.0, i_str_3month) / 10.0, 6) as i_str_3month
|
|
|
-,round(if(i_rov_3month > 10.0, 10.0, i_rov_3month) / 10.0, 6) as i_rov_3month
|
|
|
-,round(if(i_ros_3month > 10.0, 10.0, i_ros_3month) / 10.0, 6) as i_ros_3month
|
|
|
from
|
|
|
-loghubods.user_video_features_data_final
|
|
|
+loghubods.alg_recsys_user_info
|
|
|
where dt='{dt}'
|
|
|
-and ad_ornot = '0'
|
|
|
-and apptype != '13'
|
|
|
-), candidate_user as (
|
|
|
- SELECT
|
|
|
- u_id,
|
|
|
- max(u_brand) as u_brand,
|
|
|
- max(u_device) as u_device,
|
|
|
- max(u_system) as u_system,
|
|
|
- max(u_system_ver) as u_system_ver,
|
|
|
- max(ctx_region) as ctx_region,
|
|
|
- max(ctx_city) as ctx_city,
|
|
|
- max(u_cycle_bucket_7days) as u_cycle_bucket_7days,
|
|
|
- max(u_cycle_bucket_30days) as u_cycle_bucket_30days,
|
|
|
- max(u_share_bucket_30days) as u_share_bucket_30days,
|
|
|
- max(u_1day_exp_cnt) as u_1day_exp_cnt,
|
|
|
- max(u_1day_click_cnt) as u_1day_click_cnt,
|
|
|
- max(u_1day_share_cnt) as u_1day_share_cnt,
|
|
|
- max(u_1day_return_cnt) as u_1day_return_cnt,
|
|
|
- max(u_3day_exp_cnt) as u_3day_exp_cnt,
|
|
|
- max(u_3day_click_cnt) as u_3day_click_cnt,
|
|
|
- max(u_3day_share_cnt) as u_3day_share_cnt,
|
|
|
- max(u_3day_return_cnt) as u_3day_return_cnt,
|
|
|
- max(u_7day_exp_cnt) as u_7day_exp_cnt,
|
|
|
- max(u_7day_click_cnt) as u_7day_click_cnt,
|
|
|
- max(u_7day_share_cnt) as u_7day_share_cnt,
|
|
|
- max(u_7day_return_cnt) as u_7day_return_cnt,
|
|
|
- max(u_3month_exp_cnt) as u_3month_exp_cnt,
|
|
|
- max(u_3month_click_cnt) as u_3month_click_cnt,
|
|
|
- max(u_3month_share_cnt) as u_3month_share_cnt,
|
|
|
- max(u_3month_return_cnt) as u_3month_return_cnt,
|
|
|
- max(u_ctr_1day) as u_ctr_1day,
|
|
|
- max(u_str_1day) as u_str_1day,
|
|
|
- max(u_rov_1day) as u_rov_1day,
|
|
|
- max(u_ros_1day) as u_ros_1day,
|
|
|
- max(u_ctr_3day) as u_ctr_3day,
|
|
|
- max(u_str_3day) as u_str_3day,
|
|
|
- max(u_rov_3day) as u_rov_3day,
|
|
|
- max(u_ros_3day) as u_ros_3day,
|
|
|
- max(u_ctr_7day) as u_ctr_7day,
|
|
|
- max(u_str_7day) as u_str_7day,
|
|
|
- max(u_rov_7day) as u_rov_7day,
|
|
|
- max(u_ros_7day) as u_ros_7day,
|
|
|
- max(u_ctr_3month) as u_ctr_3month,
|
|
|
- max(u_str_3month) as u_str_3month,
|
|
|
- max(u_rov_3month) as u_rov_3month,
|
|
|
- max(u_ros_3month) as u_ros_3month
|
|
|
- FROM
|
|
|
- candidate
|
|
|
- group by u_id
|
|
|
-), candidate_item as (
|
|
|
- select
|
|
|
- i_id,
|
|
|
- max(i_up_id) as i_up_id,
|
|
|
- max(i_title_len) as i_title_len,
|
|
|
- max(i_play_len) as i_play_len,
|
|
|
- max(i_days_since_upload) as i_days_since_upload,
|
|
|
- max(i_1day_exp_cnt) as i_1day_exp_cnt,
|
|
|
- max(i_1day_click_cnt) as i_1day_click_cnt,
|
|
|
- max(i_1day_share_cnt) as i_1day_share_cnt,
|
|
|
- max(i_1day_return_cnt) as i_1day_return_cnt,
|
|
|
- max(i_3day_exp_cnt) as i_3day_exp_cnt,
|
|
|
- max(i_3day_click_cnt) as i_3day_click_cnt,
|
|
|
- max(i_3day_share_cnt) as i_3day_share_cnt,
|
|
|
- max(i_3day_return_cnt) as i_3day_return_cnt,
|
|
|
- max(i_7day_exp_cnt) as i_7day_exp_cnt,
|
|
|
- max(i_7day_click_cnt) as i_7day_click_cnt,
|
|
|
- max(i_7day_share_cnt) as i_7day_share_cnt,
|
|
|
- max(i_7day_return_cnt) as i_7day_return_cnt,
|
|
|
- max(i_3month_exp_cnt) as i_3month_exp_cnt,
|
|
|
- max(i_3month_click_cnt) as i_3month_click_cnt,
|
|
|
- max(i_3month_share_cnt) as i_3month_share_cnt,
|
|
|
- max(i_3month_return_cnt) as i_3month_return_cnt,
|
|
|
- max(i_ctr_1day) as i_ctr_1day,
|
|
|
- max(i_str_1day) as i_str_1day,
|
|
|
- max(i_rov_1day) as i_rov_1day,
|
|
|
- max(i_ros_1day) as i_ros_1day,
|
|
|
- max(i_ctr_3day) as i_ctr_3day,
|
|
|
- max(i_str_3day) as i_str_3day,
|
|
|
- max(i_rov_3day) as i_rov_3day,
|
|
|
- max(i_ros_3day) as i_ros_3day,
|
|
|
- max(i_ctr_7day) as i_ctr_7day,
|
|
|
- max(i_str_7day) as i_str_7day,
|
|
|
- max(i_rov_7day) as i_rov_7day,
|
|
|
- max(i_ros_7day) as i_ros_7day,
|
|
|
- max(i_ctr_3month) as i_ctr_3month,
|
|
|
- max(i_str_3month) as i_str_3month,
|
|
|
- max(i_rov_3month) as i_rov_3month,
|
|
|
- max(i_ros_3month) as i_ros_3month
|
|
|
- FROM
|
|
|
- candidate
|
|
|
- group by i_id
|
|
|
+and length(mids) > 0
|
|
|
+and (u_3month_share_cnt > 0 or u_7day_click_cnt > 0 or u_3day_exp_cnt > 0)
|
|
|
)
|
|
|
SELECT
|
|
|
+u_id as k,
|
|
|
*
|
|
|
from candidate_user
|
|
|
""".format(dt=dt)
|