| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300 |
- WITH
- -- 用户拉活量分层
- t_user_type AS (
- SELECT DISTINCT type, openid
- FROM loghubods.mid_share_return_people_1year
- WHERE dt = TO_CHAR(DATEADD(TO_DATE('${dt}','YYYYMMDD'),-1,'dd'),'YYYYMMDD')
- AND type IS NOT NULL
- AND type != 'S_ALL'
- AND type NOT REGEXP 'R50'
- )
- -- 模型预估分数
- ,t_score AS (
- SELECT apptype
- ,videoid
- ,recommendtraceid
- ,scoresmap
- ,sortscore
- FROM (
- SELECT apptype
- ,videoid
- ,recommendtraceid
- ,scoresmap
- ,sortscore
- ,ROW_NUMBER() OVER (PARTITION BY apptype,videoid,recommendtraceid) AS rn
- FROM loghubods.statistics_log_hour
- WHERE dt LIKE '${dt}%'
- AND scoresmap IS NOT NULL
- )
- WHERE rn = 1
- )
- -- TOP1 进入内容品类(场): 按回流人数取 TOP1
- ,t_top_head_cate AS (
- SELECT vt.merge_leve2
- FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
- JOIN loghubods.video_merge_tag vt ON base.headvideoid = vt.videoid
- WHERE base.dt = '${dt}'
- AND vt.merge_leve2 IS NOT NULL
- GROUP BY vt.merge_leve2
- ORDER BY SUM(CAST(is_return_noself AS BIGINT)) DESC
- LIMIT 1
- )
- -- TOP10 推荐内容品类(货): 按曝光次数取 TOP10
- ,t_top_vid_cate AS (
- SELECT vt.merge_leve2
- FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
- JOIN loghubods.video_merge_tag vt ON base.vid = vt.videoid
- WHERE base.dt = '${dt}'
- AND vt.merge_leve2 IS NOT NULL
- GROUP BY vt.merge_leve2
- ORDER BY COUNT(1) DESC
- LIMIT 10
- )
- -- TOP1 内容id(货): 每品类按曝光次数取 TOP1 (曝光>10w)
- ,t_top_vid AS (
- SELECT merge_leve2, vid
- FROM (
- SELECT vt.merge_leve2
- ,base.vid
- ,COUNT(1) AS exp_cnt
- ,ROW_NUMBER() OVER (PARTITION BY vt.merge_leve2 ORDER BY COUNT(1) DESC) AS rk
- FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
- JOIN loghubods.video_merge_tag vt ON base.vid = vt.videoid
- WHERE base.dt = '${dt}'
- AND vt.merge_leve2 IS NOT NULL
- GROUP BY vt.merge_leve2, base.vid
- HAVING exp_cnt > 100000
- )
- WHERE rk <= 1
- )
- -- 宽表
- ,t_wide AS (
- SELECT base.*
- ,CASE WHEN e.type IS NULL OR e.type = 'R_0' THEN 'R0&新用户'
- WHEN e.type IN ('R_1','R_2_10','R_10_50') THEN 'R1-50'
- WHEN e.type IN ('R_50_100','R_100_180','R_180_330') THEN 'R_180_330'
- ELSE e.type
- END AS user_type
- ,CASE WHEN th.merge_leve2 IS NOT NULL THEN vt_head.merge_leve2 ELSE '其他' END AS head_merge_leve2
- ,CASE WHEN tv.merge_leve2 IS NOT NULL THEN vt_vid.merge_leve2 ELSE '其他' END AS vid_merge_leve2
- ,CASE WHEN ti.vid IS NOT NULL THEN base.vid ELSE '其他' END AS vid_id
- ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
- ,1.22*POW(CAST(GET_JSON_OBJECT(e1.scoresmap,'$.NorXGBScore') AS DOUBLE),1.15) AS rosn_pred
- ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_pred_origin
- ,e1.sortscore
- ,CASE
- WHEN CAST(hh AS INT) BETWEEN 0 AND 3 THEN '00-03'
- WHEN CAST(hh AS INT) BETWEEN 4 AND 7 THEN '04-07'
- WHEN CAST(hh AS INT) BETWEEN 8 AND 11 THEN '08-11'
- WHEN CAST(hh AS INT) BETWEEN 12 AND 15 THEN '12-15'
- WHEN CAST(hh AS INT) BETWEEN 16 AND 19 THEN '16-19'
- WHEN CAST(hh AS INT) BETWEEN 20 AND 23 THEN '20-23'
- ELSE '-'
- END AS hh_bucket
- FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
- LEFT JOIN t_user_type e
- ON SUBSTRING_INDEX(base.mid,'weixin_openid_',-1) = e.openid
- LEFT JOIN loghubods.video_merge_tag vt_head
- ON base.headvideoid = vt_head.videoid
- LEFT JOIN loghubods.video_merge_tag vt_vid
- ON base.vid = vt_vid.videoid
- LEFT JOIN t_score e1
- ON base.apptype = e1.apptype
- AND base.vid = e1.videoid
- AND base.recomtraceid = e1.recommendtraceid
- LEFT JOIN t_top_head_cate th
- ON vt_head.merge_leve2 = th.merge_leve2
- LEFT JOIN t_top_vid_cate tv
- ON vt_vid.merge_leve2 = tv.merge_leve2
- LEFT JOIN t_top_vid ti
- ON base.vid = ti.vid
- WHERE base.dt = '${dt}'
- )
- SELECT
- -- ==================== 维度列 ====================
- '${dt}' AS dt
- ,CASE WHEN GROUPING(user_type) = 1 THEN 'SUM' ELSE NVL(user_type, 'SUM') END AS user_type
- ,CASE WHEN GROUPING(hh_bucket) = 1 THEN 'SUM' ELSE NVL(hh_bucket, 'SUM') END AS hh_bucket
- ,CASE WHEN GROUPING(head_merge_leve2) = 1 THEN 'SUM' ELSE NVL(head_merge_leve2, 'SUM') END AS head_merge_leve2
- ,CASE WHEN GROUPING(vid_merge_leve2) = 1 THEN 'SUM' ELSE NVL(vid_merge_leve2, 'SUM') END AS vid_merge_leve2
- ,CASE WHEN GROUPING(vid_id) = 1 THEN 'SUM' ELSE NVL(vid_id, 'SUM') END AS vid_id
- -- ==================== 基础流量 ====================
- ,COUNT(1) AS exposure_cnt
- ,COUNT(DISTINCT mid) AS exposure_uv
- ,COUNT(DISTINCT vid) AS vid_cnt
- ,ROUND(COUNT(1) / COUNT(DISTINCT mid), 4) AS exposure_per_user
- -- ==================== 分享 ====================
- ,SUM(CAST(is_share AS BIGINT)) AS share_exposure_cnt
- ,SUM(CAST(share_cnt AS BIGINT)) AS share_cnt
- -- ==================== STR 指标 ====================
- ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6) AS str_real
- ,ROUND(COALESCE(SUM(str_pred) / NULLIF(COUNT(1), 0), 0), 6) AS str_pred
- ,ROUND(
- (SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
- / NULLIF(SUM(str_pred) / NULLIF(COUNT(1), 0), 0)
- , 4) AS str_copc
- ,ROUND(AVG(ABS(str_pred - CAST(is_return_noself AS BIGINT))), 6) AS str_mae
- ,ROUND(VARIANCE(str_pred - CAST(is_return_noself AS BIGINT)), 6) AS str_var
- -- ==================== ROSN 指标 ====================
- ,ROUND(COALESCE(
- SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0)
- , 0), 6) AS rosn_real
- ,ROUND(COALESCE(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0), 6) AS rosn_pred
- ,ROUND(
- (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0))
- / NULLIF(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0)
- , 4) AS rosn_copc
- ,ROUND(AVG(rosn_pred_origin), 6) AS rosn_pred_origin
- ,ROUND(AVG(
- CASE WHEN CAST(is_return_noself AS BIGINT) = 1
- THEN ABS(rosn_pred - CAST(return_n_uv_noself AS BIGINT))
- END
- ), 6) AS rosn_mae
- ,ROUND(VARIANCE(
- CASE WHEN CAST(is_return_noself AS BIGINT) = 1
- THEN rosn_pred - CAST(return_n_uv_noself AS BIGINT)
- END
- ), 6) AS rosn_var
- -- ==================== ROVN 指标 ====================
- ,ROUND(COALESCE(SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6) AS rovn
- ,ROUND(AVG(str_pred * rosn_pred), 6) AS rovn_pred
- ,ROUND(
- (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
- / NULLIF(AVG(str_pred * rosn_pred), 0)
- , 4) AS rovn_copc
- ,ROUND(AVG(ABS(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT))), 6) AS rovn_mae
- ,ROUND(VARIANCE(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT)), 6) AS rovn_var
- ,ROUND(AVG(CAST(sortscore AS DOUBLE)), 6) AS sortscore_avg
- -- ==================== B链 ====================
- ,SUM(CAST(bn_exp AS BIGINT)) AS bn_exp
- ,SUM(CAST(bn_pv AS BIGINT)) AS bn_pv
- ,SUM(CAST(bn_uv AS BIGINT)) AS bn_uv
- ,SUM(CAST(b1_exp AS BIGINT)) AS b1_exp
- ,SUM(CAST(b1_pv AS BIGINT)) AS b1_pv
- ,SUM(CAST(b1_uv AS BIGINT)) AS b1_uv
- ,SUM(CAST(b2_exp AS BIGINT)) AS b2_exp
- ,SUM(CAST(b2_pv AS BIGINT)) AS b2_pv
- ,SUM(CAST(b2_uv AS BIGINT)) AS b2_uv
- ,SUM(CAST(b3_exp AS BIGINT)) AS b3_exp
- ,SUM(CAST(b3_pv AS BIGINT)) AS b3_pv
- ,SUM(CAST(b3_uv AS BIGINT)) AS b3_uv
- -- ==================== C链 (全量depth, 按hop) ====================
- ,SUM(CAST(cn_1_exp AS BIGINT)) AS cn_1_exp
- ,SUM(CAST(cn_1_pv AS BIGINT)) AS cn_1_pv
- ,SUM(CAST(cn_1_uv AS BIGINT)) AS cn_1_uv
- ,SUM(CAST(cn_2_exp AS BIGINT)) AS cn_2_exp
- ,SUM(CAST(cn_2_pv AS BIGINT)) AS cn_2_pv
- ,SUM(CAST(cn_2_uv AS BIGINT)) AS cn_2_uv
- ,SUM(CAST(cn_3_exp AS BIGINT)) AS cn_3_exp
- ,SUM(CAST(cn_3_pv AS BIGINT)) AS cn_3_pv
- ,SUM(CAST(cn_3_uv AS BIGINT)) AS cn_3_uv
- ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)) AS cn_total_uv
- -- ==================== C链 (按depth拆分) ====================
- ,SUM(CAST(c1_1_exp AS BIGINT)) AS c1_1_exp
- ,SUM(CAST(c1_1_pv AS BIGINT)) AS c1_1_pv
- ,SUM(CAST(c1_1_uv AS BIGINT)) AS c1_1_uv
- ,SUM(CAST(c1_2_exp AS BIGINT)) AS c1_2_exp
- ,SUM(CAST(c1_2_pv AS BIGINT)) AS c1_2_pv
- ,SUM(CAST(c1_2_uv AS BIGINT)) AS c1_2_uv
- ,SUM(CAST(c1_3_exp AS BIGINT)) AS c1_3_exp
- ,SUM(CAST(c1_3_pv AS BIGINT)) AS c1_3_pv
- ,SUM(CAST(c1_3_uv AS BIGINT)) AS c1_3_uv
- ,SUM(CAST(c2_1_exp AS BIGINT)) AS c2_1_exp
- ,SUM(CAST(c2_1_pv AS BIGINT)) AS c2_1_pv
- ,SUM(CAST(c2_1_uv AS BIGINT)) AS c2_1_uv
- ,SUM(CAST(c2_2_exp AS BIGINT)) AS c2_2_exp
- ,SUM(CAST(c2_2_pv AS BIGINT)) AS c2_2_pv
- ,SUM(CAST(c2_2_uv AS BIGINT)) AS c2_2_uv
- ,SUM(CAST(c2_3_exp AS BIGINT)) AS c2_3_exp
- ,SUM(CAST(c2_3_pv AS BIGINT)) AS c2_3_pv
- ,SUM(CAST(c2_3_uv AS BIGINT)) AS c2_3_uv
- ,SUM(CAST(c3_1_exp AS BIGINT)) AS c3_1_exp
- ,SUM(CAST(c3_1_pv AS BIGINT)) AS c3_1_pv
- ,SUM(CAST(c3_1_uv AS BIGINT)) AS c3_1_uv
- ,SUM(CAST(c3_2_exp AS BIGINT)) AS c3_2_exp
- ,SUM(CAST(c3_2_pv AS BIGINT)) AS c3_2_pv
- ,SUM(CAST(c3_2_uv AS BIGINT)) AS c3_2_uv
- ,SUM(CAST(c3_3_exp AS BIGINT)) AS c3_3_exp
- ,SUM(CAST(c3_3_pv AS BIGINT)) AS c3_3_pv
- ,SUM(CAST(c3_3_uv AS BIGINT)) AS c3_3_uv
- -- ==================== D链 (全量depth, 按hop) ====================
- ,SUM(CAST(d0 AS BIGINT)) AS d0
- ,SUM(CAST(dn_1_exp AS BIGINT)) AS dn_1_exp
- ,SUM(CAST(dn_1_pv AS BIGINT)) AS dn_1_pv
- ,SUM(CAST(dn_1_uv AS BIGINT)) AS dn_1_uv
- ,SUM(CAST(dn_2_exp AS BIGINT)) AS dn_2_exp
- ,SUM(CAST(dn_2_pv AS BIGINT)) AS dn_2_pv
- ,SUM(CAST(dn_2_uv AS BIGINT)) AS dn_2_uv
- ,SUM(CAST(dn_3_exp AS BIGINT)) AS dn_3_exp
- ,SUM(CAST(dn_3_pv AS BIGINT)) AS dn_3_pv
- ,SUM(CAST(dn_3_uv AS BIGINT)) AS dn_3_uv
- ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)) AS dn_total_uv
- -- ==================== D链 (按depth拆分) ====================
- ,SUM(CAST(d1_1_exp AS BIGINT)) AS d1_1_exp
- ,SUM(CAST(d1_1_pv AS BIGINT)) AS d1_1_pv
- ,SUM(CAST(d1_1_uv AS BIGINT)) AS d1_1_uv
- ,SUM(CAST(d1_2_exp AS BIGINT)) AS d1_2_exp
- ,SUM(CAST(d1_2_pv AS BIGINT)) AS d1_2_pv
- ,SUM(CAST(d1_2_uv AS BIGINT)) AS d1_2_uv
- ,SUM(CAST(d1_3_exp AS BIGINT)) AS d1_3_exp
- ,SUM(CAST(d1_3_pv AS BIGINT)) AS d1_3_pv
- ,SUM(CAST(d1_3_uv AS BIGINT)) AS d1_3_uv
- ,SUM(CAST(d2_1_exp AS BIGINT)) AS d2_1_exp
- ,SUM(CAST(d2_1_pv AS BIGINT)) AS d2_1_pv
- ,SUM(CAST(d2_1_uv AS BIGINT)) AS d2_1_uv
- ,SUM(CAST(d2_2_exp AS BIGINT)) AS d2_2_exp
- ,SUM(CAST(d2_2_pv AS BIGINT)) AS d2_2_pv
- ,SUM(CAST(d2_2_uv AS BIGINT)) AS d2_2_uv
- ,SUM(CAST(d2_3_exp AS BIGINT)) AS d2_3_exp
- ,SUM(CAST(d2_3_pv AS BIGINT)) AS d2_3_pv
- ,SUM(CAST(d2_3_uv AS BIGINT)) AS d2_3_uv
- ,SUM(CAST(d3_1_exp AS BIGINT)) AS d3_1_exp
- ,SUM(CAST(d3_1_pv AS BIGINT)) AS d3_1_pv
- ,SUM(CAST(d3_1_uv AS BIGINT)) AS d3_1_uv
- ,SUM(CAST(d3_2_exp AS BIGINT)) AS d3_2_exp
- ,SUM(CAST(d3_2_pv AS BIGINT)) AS d3_2_pv
- ,SUM(CAST(d3_2_uv AS BIGINT)) AS d3_2_uv
- ,SUM(CAST(d3_3_exp AS BIGINT)) AS d3_3_exp
- ,SUM(CAST(d3_3_pv AS BIGINT)) AS d3_3_pv
- ,SUM(CAST(d3_3_uv AS BIGINT)) AS d3_3_uv
- -- ==================== 全链路 ====================
- ,SUM(CAST(return_n_uv_noself AS BIGINT))
- + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
- + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
- AS all_return_n_uv
- ,ROUND(COALESCE(
- ( SUM(CAST(return_n_uv_noself AS BIGINT))
- + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
- + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
- ) / NULLIF(COUNT(1), 0)
- , 0), 6) AS all_rovn
- -- =====================================================================
- -- FROM + GROUP BY CUBE
- -- =====================================================================
- FROM t_wide
- GROUP BY CUBE(
- user_type
- ,hh_bucket
- ,head_merge_leve2
- ,vid_merge_leve2
- ,vid_id
- )
- ORDER BY exposure_cnt DESC
- ;
|