WITH -- 用户拉活量分层 t_user_type AS ( SELECT DISTINCT type, openid FROM loghubods.mid_share_return_people_1year WHERE dt = TO_CHAR(DATEADD(TO_DATE('${dt}','YYYYMMDD'),-1,'dd'),'YYYYMMDD') AND type IS NOT NULL AND type != 'S_ALL' AND type NOT REGEXP 'R50' ) -- 模型预估分数 ,t_score AS ( SELECT apptype ,videoid ,recommendtraceid ,scoresmap ,sortscore FROM ( SELECT apptype ,videoid ,recommendtraceid ,scoresmap ,sortscore ,ROW_NUMBER() OVER (PARTITION BY apptype,videoid,recommendtraceid) AS rn FROM loghubods.statistics_log_hour WHERE dt = '${dt}' AND scoresmap IS NOT NULL ) WHERE rn = 1 ) -- 宽表 ,t_wide AS ( SELECT base.* ,CASE WHEN e.type IS NULL OR e.type = 'R_0' THEN 'R0&新用户' WHEN e.type IN ('R_1','R_2_10','R_10_50') THEN 'R1-50' WHEN e.type IN ('R_50_100','R_100_180','R_180_330') THEN 'R_180_330' ELSE e.type END AS user_type ,vt_head.merge_leve2 AS head_merge_leve2 ,vt_vid.merge_leve2 AS vid_merge_leve2 ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.fmRov') AS DOUBLE) AS str_pred ,1.22*POW(CAST(GET_JSON_OBJECT(e1.scoresmap,'$.NorXGBScore') AS DOUBLE),1.15) AS rosn_pred ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_pred_origin ,e1.sortscore ,CASE WHEN CAST(hh AS INT) BETWEEN 0 AND 3 THEN '00-03' WHEN CAST(hh AS INT) BETWEEN 4 AND 7 THEN '04-07' WHEN CAST(hh AS INT) BETWEEN 8 AND 11 THEN '08-11' WHEN CAST(hh AS INT) BETWEEN 12 AND 15 THEN '12-15' WHEN CAST(hh AS INT) BETWEEN 16 AND 19 THEN '16-19' WHEN CAST(hh AS INT) BETWEEN 20 AND 23 THEN '20-23' ELSE '-' END AS hh_bucket FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base LEFT JOIN t_user_type e ON SUBSTRING_INDEX(base.mid,'weixin_openid_',-1) = e.openid LEFT JOIN loghubods.video_merge_tag vt_head ON base.headvideoid = vt_head.videoid LEFT JOIN loghubods.video_merge_tag vt_vid ON base.vid = vt_vid.videoid LEFT JOIN t_score e1 ON base.apptype = e1.apptype AND base.vid = e1.videoid AND base.recomtraceid = e1.recommendtraceid WHERE base.dt = '${dt}' ) SELECT -- ==================== 维度列 ==================== CASE WHEN GROUPING(user_type) = 1 THEN 'SUM' ELSE NVL(user_type, 'SUM') END AS user_type ,CASE WHEN GROUPING(hh_bucket) = 1 THEN 'SUM' ELSE NVL(hh_bucket, 'SUM') END AS hh_bucket ,CASE WHEN GROUPING(head_merge_leve2) = 1 THEN 'SUM' ELSE NVL(head_merge_leve2, 'SUM') END AS head_merge_leve2 ,CASE WHEN GROUPING(vid_merge_leve2) = 1 THEN 'SUM' ELSE NVL(vid_merge_leve2, 'SUM') END AS vid_merge_leve2 -- ==================== 基础流量 ==================== ,COUNT(1) AS exposure_cnt ,COUNT(DISTINCT mid) AS exposure_uv ,COUNT(DISTINCT vid) AS vid_cnt ,ROUND(COUNT(1) / COUNT(DISTINCT mid), 4) AS exposure_per_user -- ==================== 分享 ==================== ,SUM(CAST(is_share AS BIGINT)) AS share_exposure_cnt ,SUM(CAST(share_cnt AS BIGINT)) AS share_cnt -- ==================== STR 指标 ==================== ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6) AS str_real ,ROUND(COALESCE(SUM(str_pred) / NULLIF(COUNT(1), 0), 0), 6) AS str_pred ,ROUND( (SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0)) / NULLIF(SUM(str_pred) / NULLIF(COUNT(1), 0), 0) , 4) AS str_copc ,ROUND(AVG(ABS(str_pred - CAST(is_return_noself AS BIGINT))), 6) AS str_mae ,ROUND(VARIANCE(str_pred - CAST(is_return_noself AS BIGINT)), 6) AS str_var -- ==================== ROSN 指标 ==================== ,ROUND(COALESCE( SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0) , 0), 6) AS rosn_real ,ROUND(COALESCE(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0), 6) AS rosn_pred ,ROUND( (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0)) / NULLIF(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0) , 4) AS rosn_copc ,ROUND(AVG(rosn_pred_origin), 6) AS rosn_pred_origin ,ROUND(AVG( CASE WHEN CAST(is_return_noself AS BIGINT) = 1 THEN ABS(rosn_pred - CAST(return_n_uv_noself AS BIGINT)) END ), 6) AS rosn_mae ,ROUND(VARIANCE( CASE WHEN CAST(is_return_noself AS BIGINT) = 1 THEN rosn_pred - CAST(return_n_uv_noself AS BIGINT) END ), 6) AS rosn_var -- ==================== ROVN 指标 ==================== ,ROUND(COALESCE(SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6) AS rovn ,ROUND(AVG(str_pred * rosn_pred), 6) AS rovn_pred ,ROUND( (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0)) / NULLIF(AVG(str_pred * rosn_pred), 0) , 4) AS rovn_copc ,ROUND(AVG(ABS(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT))), 6) AS rovn_mae ,ROUND(VARIANCE(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT)), 6) AS rovn_var ,ROUND(AVG(CAST(sortscore AS DOUBLE)), 6) AS sortscore_avg -- ==================== B链 ==================== ,SUM(CAST(bn_exp AS BIGINT)) AS bn_exp ,SUM(CAST(bn_pv AS BIGINT)) AS bn_pv ,SUM(CAST(bn_uv AS BIGINT)) AS bn_uv ,SUM(CAST(b1_exp AS BIGINT)) AS b1_exp ,SUM(CAST(b1_pv AS BIGINT)) AS b1_pv ,SUM(CAST(b1_uv AS BIGINT)) AS b1_uv ,SUM(CAST(b2_exp AS BIGINT)) AS b2_exp ,SUM(CAST(b2_pv AS BIGINT)) AS b2_pv ,SUM(CAST(b2_uv AS BIGINT)) AS b2_uv ,SUM(CAST(b3_exp AS BIGINT)) AS b3_exp ,SUM(CAST(b3_pv AS BIGINT)) AS b3_pv ,SUM(CAST(b3_uv AS BIGINT)) AS b3_uv -- ==================== C链 (全量depth, 按hop) ==================== ,SUM(CAST(cn_1_exp AS BIGINT)) AS cn_1_exp ,SUM(CAST(cn_1_pv AS BIGINT)) AS cn_1_pv ,SUM(CAST(cn_1_uv AS BIGINT)) AS cn_1_uv ,SUM(CAST(cn_2_exp AS BIGINT)) AS cn_2_exp ,SUM(CAST(cn_2_pv AS BIGINT)) AS cn_2_pv ,SUM(CAST(cn_2_uv AS BIGINT)) AS cn_2_uv ,SUM(CAST(cn_3_exp AS BIGINT)) AS cn_3_exp ,SUM(CAST(cn_3_pv AS BIGINT)) AS cn_3_pv ,SUM(CAST(cn_3_uv AS BIGINT)) AS cn_3_uv ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)) AS cn_total_uv -- ==================== D链 (全量depth, 按hop) ==================== ,SUM(CAST(d0 AS BIGINT)) AS d0 ,SUM(CAST(dn_1_exp AS BIGINT)) AS dn_1_exp ,SUM(CAST(dn_1_pv AS BIGINT)) AS dn_1_pv ,SUM(CAST(dn_1_uv AS BIGINT)) AS dn_1_uv ,SUM(CAST(dn_2_exp AS BIGINT)) AS dn_2_exp ,SUM(CAST(dn_2_pv AS BIGINT)) AS dn_2_pv ,SUM(CAST(dn_2_uv AS BIGINT)) AS dn_2_uv ,SUM(CAST(dn_3_exp AS BIGINT)) AS dn_3_exp ,SUM(CAST(dn_3_pv AS BIGINT)) AS dn_3_pv ,SUM(CAST(dn_3_uv AS BIGINT)) AS dn_3_uv ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)) AS dn_total_uv -- ==================== 全链路 ==================== ,SUM(CAST(return_n_uv_noself AS BIGINT)) + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)) + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)) AS all_return_n_uv ,ROUND(COALESCE( ( SUM(CAST(return_n_uv_noself AS BIGINT)) + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)) + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)) ) / NULLIF(COUNT(1), 0) , 0), 6) AS all_rovn -- ===================================================================== -- FROM + GROUP BY CUBE -- ===================================================================== FROM t_wide GROUP BY CUBE( user_type ,hh_bucket ,head_merge_leve2 ,vid_merge_leve2 ) ORDER BY exposure_cnt DESC limit 1000 ;