| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194 |
- WITH
- -- 用户拉活量分层
- t_user_type AS (
- SELECT DISTINCT type, openid
- FROM loghubods.mid_share_return_people_1year
- WHERE dt = TO_CHAR(DATEADD(TO_DATE('${dt}','YYYYMMDD'),-1,'dd'),'YYYYMMDD')
- AND type IS NOT NULL
- AND type != 'S_ALL'
- AND type NOT REGEXP 'R50'
- )
- -- 模型预估分数
- ,t_score AS (
- SELECT apptype
- ,videoid
- ,recommendtraceid
- ,scoresmap
- ,sortscore
- FROM (
- SELECT apptype
- ,videoid
- ,recommendtraceid
- ,scoresmap
- ,sortscore
- ,ROW_NUMBER() OVER (PARTITION BY apptype,videoid,recommendtraceid) AS rn
- FROM loghubods.statistics_log_hour
- WHERE dt = '${dt}'
- AND scoresmap IS NOT NULL
- )
- WHERE rn = 1
- )
- -- 宽表
- ,t_wide AS (
- SELECT base.*
- ,CASE WHEN e.type IS NULL OR e.type = 'R_0' THEN 'R0&新用户'
- WHEN e.type IN ('R_1','R_2_10','R_10_50') THEN 'R1-50'
- WHEN e.type IN ('R_50_100','R_100_180','R_180_330') THEN 'R_180_330'
- ELSE e.type
- END AS user_type
- ,vt_head.merge_leve2 AS head_merge_leve2
- ,vt_vid.merge_leve2 AS vid_merge_leve2
- ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
- ,1.22*POW(CAST(GET_JSON_OBJECT(e1.scoresmap,'$.NorXGBScore') AS DOUBLE),1.15) AS rosn_pred
- ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_pred_origin
- ,e1.sortscore
- ,CASE
- WHEN CAST(hh AS INT) BETWEEN 0 AND 3 THEN '00-03'
- WHEN CAST(hh AS INT) BETWEEN 4 AND 7 THEN '04-07'
- WHEN CAST(hh AS INT) BETWEEN 8 AND 11 THEN '08-11'
- WHEN CAST(hh AS INT) BETWEEN 12 AND 15 THEN '12-15'
- WHEN CAST(hh AS INT) BETWEEN 16 AND 19 THEN '16-19'
- WHEN CAST(hh AS INT) BETWEEN 20 AND 23 THEN '20-23'
- ELSE '-'
- END AS hh_bucket
- FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
- LEFT JOIN t_user_type e
- ON SUBSTRING_INDEX(base.mid,'weixin_openid_',-1) = e.openid
- LEFT JOIN loghubods.video_merge_tag vt_head
- ON base.headvideoid = vt_head.videoid
- LEFT JOIN loghubods.video_merge_tag vt_vid
- ON base.vid = vt_vid.videoid
- LEFT JOIN t_score e1
- ON base.apptype = e1.apptype
- AND base.vid = e1.videoid
- AND base.recomtraceid = e1.recommendtraceid
- WHERE base.dt = '${dt}'
- )
- SELECT
- -- ==================== 维度列 ====================
- CASE WHEN GROUPING(user_type) = 1 THEN 'SUM' ELSE NVL(user_type, 'SUM') END AS user_type
- ,CASE WHEN GROUPING(hh_bucket) = 1 THEN 'SUM' ELSE NVL(hh_bucket, 'SUM') END AS hh_bucket
- ,CASE WHEN GROUPING(head_merge_leve2) = 1 THEN 'SUM' ELSE NVL(head_merge_leve2, 'SUM') END AS head_merge_leve2
- ,CASE WHEN GROUPING(vid_merge_leve2) = 1 THEN 'SUM' ELSE NVL(vid_merge_leve2, 'SUM') END AS vid_merge_leve2
- -- ==================== 基础流量 ====================
- ,COUNT(1) AS exposure_cnt
- ,COUNT(DISTINCT mid) AS exposure_uv
- ,COUNT(DISTINCT vid) AS vid_cnt
- ,ROUND(COUNT(1) / COUNT(DISTINCT mid), 4) AS exposure_per_user
- -- ==================== 分享 ====================
- ,SUM(CAST(is_share AS BIGINT)) AS share_exposure_cnt
- ,SUM(CAST(share_cnt AS BIGINT)) AS share_cnt
- -- ==================== STR 指标 ====================
- ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6) AS str_real
- ,ROUND(COALESCE(SUM(str_pred) / NULLIF(COUNT(1), 0), 0), 6) AS str_pred
- ,ROUND(
- (SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
- / NULLIF(SUM(str_pred) / NULLIF(COUNT(1), 0), 0)
- , 4) AS str_copc
- ,ROUND(AVG(ABS(str_pred - CAST(is_return_noself AS BIGINT))), 6) AS str_mae
- ,ROUND(VARIANCE(str_pred - CAST(is_return_noself AS BIGINT)), 6) AS str_var
- -- ==================== ROSN 指标 ====================
- ,ROUND(COALESCE(
- SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0)
- , 0), 6) AS rosn_real
- ,ROUND(COALESCE(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0), 6) AS rosn_pred
- ,ROUND(
- (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0))
- / NULLIF(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0)
- , 4) AS rosn_copc
- ,ROUND(AVG(rosn_pred_origin), 6) AS rosn_pred_origin
- ,ROUND(AVG(
- CASE WHEN CAST(is_return_noself AS BIGINT) = 1
- THEN ABS(rosn_pred - CAST(return_n_uv_noself AS BIGINT))
- END
- ), 6) AS rosn_mae
- ,ROUND(VARIANCE(
- CASE WHEN CAST(is_return_noself AS BIGINT) = 1
- THEN rosn_pred - CAST(return_n_uv_noself AS BIGINT)
- END
- ), 6) AS rosn_var
- -- ==================== ROVN 指标 ====================
- ,ROUND(COALESCE(SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6) AS rovn
- ,ROUND(AVG(str_pred * rosn_pred), 6) AS rovn_pred
- ,ROUND(
- (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
- / NULLIF(AVG(str_pred * rosn_pred), 0)
- , 4) AS rovn_copc
- ,ROUND(AVG(ABS(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT))), 6) AS rovn_mae
- ,ROUND(VARIANCE(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT)), 6) AS rovn_var
- ,ROUND(AVG(CAST(sortscore AS DOUBLE)), 6) AS sortscore_avg
- -- ==================== B链 ====================
- ,SUM(CAST(bn_exp AS BIGINT)) AS bn_exp
- ,SUM(CAST(bn_pv AS BIGINT)) AS bn_pv
- ,SUM(CAST(bn_uv AS BIGINT)) AS bn_uv
- ,SUM(CAST(b1_exp AS BIGINT)) AS b1_exp
- ,SUM(CAST(b1_pv AS BIGINT)) AS b1_pv
- ,SUM(CAST(b1_uv AS BIGINT)) AS b1_uv
- ,SUM(CAST(b2_exp AS BIGINT)) AS b2_exp
- ,SUM(CAST(b2_pv AS BIGINT)) AS b2_pv
- ,SUM(CAST(b2_uv AS BIGINT)) AS b2_uv
- ,SUM(CAST(b3_exp AS BIGINT)) AS b3_exp
- ,SUM(CAST(b3_pv AS BIGINT)) AS b3_pv
- ,SUM(CAST(b3_uv AS BIGINT)) AS b3_uv
- -- ==================== C链 (全量depth, 按hop) ====================
- ,SUM(CAST(cn_1_exp AS BIGINT)) AS cn_1_exp
- ,SUM(CAST(cn_1_pv AS BIGINT)) AS cn_1_pv
- ,SUM(CAST(cn_1_uv AS BIGINT)) AS cn_1_uv
- ,SUM(CAST(cn_2_exp AS BIGINT)) AS cn_2_exp
- ,SUM(CAST(cn_2_pv AS BIGINT)) AS cn_2_pv
- ,SUM(CAST(cn_2_uv AS BIGINT)) AS cn_2_uv
- ,SUM(CAST(cn_3_exp AS BIGINT)) AS cn_3_exp
- ,SUM(CAST(cn_3_pv AS BIGINT)) AS cn_3_pv
- ,SUM(CAST(cn_3_uv AS BIGINT)) AS cn_3_uv
- ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)) AS cn_total_uv
- -- ==================== D链 (全量depth, 按hop) ====================
- ,SUM(CAST(d0 AS BIGINT)) AS d0
- ,SUM(CAST(dn_1_exp AS BIGINT)) AS dn_1_exp
- ,SUM(CAST(dn_1_pv AS BIGINT)) AS dn_1_pv
- ,SUM(CAST(dn_1_uv AS BIGINT)) AS dn_1_uv
- ,SUM(CAST(dn_2_exp AS BIGINT)) AS dn_2_exp
- ,SUM(CAST(dn_2_pv AS BIGINT)) AS dn_2_pv
- ,SUM(CAST(dn_2_uv AS BIGINT)) AS dn_2_uv
- ,SUM(CAST(dn_3_exp AS BIGINT)) AS dn_3_exp
- ,SUM(CAST(dn_3_pv AS BIGINT)) AS dn_3_pv
- ,SUM(CAST(dn_3_uv AS BIGINT)) AS dn_3_uv
- ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)) AS dn_total_uv
- -- ==================== 全链路 ====================
- ,SUM(CAST(return_n_uv_noself AS BIGINT))
- + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
- + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
- AS all_return_n_uv
- ,ROUND(COALESCE(
- ( SUM(CAST(return_n_uv_noself AS BIGINT))
- + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
- + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
- ) / NULLIF(COUNT(1), 0)
- , 0), 6) AS all_rovn
- -- =====================================================================
- -- FROM + GROUP BY CUBE
- -- =====================================================================
- FROM t_wide
- GROUP BY CUBE(
- user_type
- ,hh_bucket
- ,head_merge_leve2
- ,vid_merge_leve2
- )
- ORDER BY exposure_cnt DESC
- limit 1000
- ;
|