-- ===================================================================== -- 用户拉活-曝光-拉回分析表 (recommend_distribution_data_total) -- 维度: 用户拉活量分层 × 进入小时 × 进入场景 × 进入内容品类 × 推荐内容品类 × 内容id -- 粒度: GROUP BY CUBE 全维度组合 + SUM 汇总 -- ===================================================================== -- DROP TABLE IF EXISTS loghubods.recommend_distribution_data_total; CREATE TABLE IF NOT EXISTS loghubods.recommend_distribution_data_total ( `用户拉活量分层` STRING COMMENT '用户拉活量分层(R0&新用户/R1-50等,汇总为SUM)', `进入小时` STRING COMMENT '进入小时(1-4/5-8等,汇总为SUM)', `进入场景` STRING COMMENT '进入场景(业务场景描述,汇总为SUM)', `进入内容品类` STRING COMMENT '进入内容品类(merge_leve2维度,汇总为SUM)', `推荐内容品类` STRING COMMENT '推荐内容品类(merge_leve2维度,汇总为SUM)', `内容id` STRING COMMENT '内容id(videoid,其他为兜底,汇总为SUM)', -- ===== 基础流量指标 ===== `访问人数` BIGINT COMMENT '去重访问人数(machinecode维度)', `曝光人数` BIGINT COMMENT '去重曝光人数(mid维度)', `曝光人数比访问人数` DOUBLE COMMENT '曝光人数/访问人数 比值', `曝光次数` BIGINT COMMENT '曝光总次数(mid计数)', `曝光次数人均` DOUBLE COMMENT '曝光次数/去重曝光人数 人均曝光次数', `视频个数` BIGINT COMMENT '去重曝光视频个数(vid维度)', `单视频曝光次数` DOUBLE COMMENT '曝光次数/去重视频个数 单视频平均曝光次数', -- ===== 全链路拉回 ===== `ALL拉回n层` BIGINT COMMENT '全量拉回n层人数(多来源拉回人数求和)', `ALL拉回n层比曝光次数` DOUBLE COMMENT '全量拉回n层人数/曝光次数 比值,保留6位小数', -- ===== 原生拉回 & rovn ===== `return_n_uv_noself` BIGINT COMMENT '原生n层拉回人数(无自返)', `rovn` DOUBLE COMMENT '原生n层拉回人数/曝光次数 比值,保留6位小数', `rovn_pred` DOUBLE COMMENT 'rovn预估值(str_pred*rosn_pred平均值),保留6位小数', `rovn_copc` DOUBLE COMMENT 'rovn实际/rovn预估 比值,保留4位小数', -- ===== STR 指标 ===== `str_real` DOUBLE COMMENT 'str实际值(is_return_noself求和/曝光次数),保留6位小数', `str_pred` DOUBLE COMMENT 'str预估值(str_pred求和/曝光次数),保留6位小数', `str_copc` DOUBLE COMMENT 'str实际/str预估 比值,保留4位小数', `str_MAE` DOUBLE COMMENT 'str预估与实际的平均绝对误差,保留6位小数', `STR_VAR` DOUBLE COMMENT 'str预估与实际差值的方差,保留6位小数', `strauc` DOUBLE COMMENT 'str AUC,保留6位小数', `str多维加权copc` DOUBLE COMMENT 'str多维加权copc,保留6位小数', -- ===== ROSN 指标 ===== `rosn_real` DOUBLE COMMENT 'rosn实际值(原生n层拉回人数/is_return_noself求和),保留6位小数', `rosn_pred` DOUBLE COMMENT 'rosn预估值(rosn_pred求和/曝光次数),保留6位小数', `rosn_copc` DOUBLE COMMENT 'rosn实际/rosn预估 比值,保留4位小数', `rosn_实际预估diff绝对距离` DOUBLE COMMENT '拉回时rosn预估与实际的平均绝对距离,保留6位小数', `rosn_实际预估高低估分布比例` DOUBLE COMMENT '拉回时rosn预估与实际差值的方差,保留6位小数', -- ===== B链 (分享→点击) ===== `Buv1层拉回人数` BIGINT COMMENT 'B端uv1层拉回人数(is_return_noself求和)', `rov0` DOUBLE COMMENT 'B端uv1层拉回人数/曝光次数 比值,保留6位小数', `rov0预估` BIGINT COMMENT 'rov0预估值(固定为0)', `rov0copc` BIGINT COMMENT 'rov0copc值(固定为0)', `ros0` DOUBLE COMMENT 'ros0实际值(B端uv1层拉回人数/分享次数),保留6位小数', `ros0预估` BIGINT COMMENT 'ros0预估值(固定为0)', `ros0copc` BIGINT COMMENT 'ros0copc值(固定为0)', `Bnuvn层拉回人数` BIGINT COMMENT 'B端nuvn层拉回人数(原生n层拉回人数一致)', `rorn_b` DOUBLE COMMENT 'B端n层拉回率(Bnuvn/uv1层拉回人数),保留6位小数', `ror1单层_b` DOUBLE COMMENT 'B端1层拉回率(b1/uv1层拉回人数),保留6位小数', `ror2单层_b` DOUBLE COMMENT 'B端2层拉回率(b2/b1),保留6位小数', -- ===== B链 T+1 ===== `Bnuvn层拉回人数_t1` DOUBLE COMMENT 'T1维度B端nuvn层拉回人数,保留6位小数', `rorn_b_t1` DOUBLE COMMENT 'T1维度B端n层拉回率,保留6位小数', `ror1单层_b_t1` DOUBLE COMMENT 'T1维度B端1层拉回率,保留6位小数', `ror2单层_b_t1` DOUBLE COMMENT 'T1维度B端2层拉回率,保留6位小数', -- ===== C链 (二次分享→点击) ===== `cn` BIGINT COMMENT 'C端拉回n层人数', `rorn_c` DOUBLE COMMENT 'C端n层拉回率(cn/原生n层拉回人数)', `ror1_c` DOUBLE COMMENT 'C端1层拉回率(c/原生n层拉回人数)', `vor单层_c` DOUBLE COMMENT 'C端单层视频访问率(cviews/原生n层拉回人数)', `vor单层预估` BIGINT COMMENT 'C端单层视频访问率预估(固定为0)', `vor单层copc` BIGINT COMMENT 'C端单层视频访问率copc(固定为0)', -- ===== D链 (session内后续曝光传播) ===== `a` BIGINT COMMENT 'D端去重访问人数(machinecode维度)', `dn` BIGINT COMMENT 'D端拉回n层人数', `rorn_d` DOUBLE COMMENT 'D端n层拉回率(dn/D端去重访问人数),保留6位小数', `ror1_d` DOUBLE COMMENT 'D端1层拉回率(d1/D端去重访问人数),保留6位小数', `vor_d` DOUBLE COMMENT 'D端视频访问率(g.mid计数/D端去重访问人数)' ) COMMENT '用户拉活-曝光-拉回分析表' PARTITIONED BY ( dt STRING COMMENT '统计日期,格式yyyyMMdd' ); -- ===================================================================== -- CTE: 基础样本提取 + 特征工程 -- ===================================================================== WITH -- [1] 原始样本: 从推荐算法样本表取数,解析 scoresMap t_raw AS ( SELECT * ,REPLACE(GET_JSON_OBJECT(extend_alg, '$.scoresMap'), "\\", "") AS scoresmap ,CASE WHEN page IN ("回流后沉浸页&内页feed", "详情后沉浸页", "首页feed", "详情页") THEN "推荐" WHEN page IN ("回流页", "其他") THEN "非推荐" ELSE "其他" END AS page_type FROM loghubods.dwd_recsys_alg_sample_all_20250212 WHERE dt = '${bizdate}' -- AND apptype IN ("0","4") AND abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9") AND abcode NOT IN ("ab100") AND extend_alg IS NOT NULL AND GET_JSON_OBJECT(extend_alg, '$.scoresMap') IS NOT NULL ) -- [2] 过滤: 只保留推荐页面 ,t_filtered AS ( SELECT * FROM t_raw WHERE page_type = "推荐" ) -- [3] 特征提取: 维度映射 + 模型分数解析 ,t_base AS ( SELECT dt ,apptype ,CASE WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权" WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量" WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准" WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组" WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "ab4-5" ELSE "其他" END AS abcode ,page_type AS page ,mid ,vid ,is_share ,share_cnt ,is_return_1 ,is_return_n ,is_return_noself ,return_1_uv ,return_n_uv ,return_n_uv_noself ,new_exposure_cnt ,flowpool ,scoresmap ,subsessionid ,CAST(GET_JSON_OBJECT(scoresmap, '$.fmRov') AS DOUBLE) AS str_pred ,1.22 * POW(CAST(GET_JSON_OBJECT(scoresmap, '$.NorXGBScore') AS DOUBLE), 1.15) AS rosn_pred ,CAST(GET_JSON_OBJECT(scoresmap, '$.hasReturnRovScore') AS DOUBLE) AS rosn_stat ,GET_JSON_OBJECT(v1_feature, '$.title') AS vid_title FROM t_filtered ) -- ===================================================================== -- 主查询: INSERT OVERWRITE → 多维度 CUBE 聚合 -- ===================================================================== -- SELECT * FROM loghubods.recommend_distribution_data_total WHERE dt = 20260204 ORDER BY 访问人数 DESC INSERT OVERWRITE TABLE loghubods.recommend_distribution_data_total PARTITION (dt = '${bizdate}') SELECT -- ==================== 维度列 ==================== -- 用户拉活量分层 CASE WHEN grouping( COALESCE( CASE WHEN e.type IS NULL OR e.type = 'R_0' THEN 'R0&新用户' WHEN e.type IN ('R_1','R_2_10','R_10_50') THEN 'R1-50' WHEN e.type IN ('R_50_100','R_100_180','R_180_330') THEN 'R_180_330' ELSE e.type END , '-') ) = 1 THEN 'SUM' ELSE NVL( COALESCE( CASE WHEN e.type IS NULL OR e.type = 'R_0' THEN 'R0&新用户' WHEN e.type IN ('R_1','R_2_10','R_10_50') THEN 'R1-50' WHEN e.type IN ('R_50_100','R_100_180','R_180_330') THEN 'R_180_330' ELSE e.type END , '-') , 'SUM') END AS 用户拉活量分层 -- 进入小时 ,CASE WHEN grouping( COALESCE( CASE WHEN in_hour >= 1 AND in_hour <= 4 THEN '1-4' WHEN in_hour >= 5 AND in_hour <= 8 THEN '5-8' WHEN in_hour >= 9 AND in_hour <= 12 THEN '9-12' WHEN in_hour >= 13 AND in_hour <= 16 THEN '13-16' WHEN in_hour >= 17 AND in_hour <= 20 THEN '17-20' WHEN in_hour >= 21 AND in_hour <= 24 THEN '21-24' ELSE '-' END , '-') ) = 1 THEN 'SUM' ELSE NVL( COALESCE( CASE WHEN in_hour >= 1 AND in_hour <= 4 THEN '1-4' WHEN in_hour >= 5 AND in_hour <= 8 THEN '5-8' WHEN in_hour >= 9 AND in_hour <= 12 THEN '9-12' WHEN in_hour >= 13 AND in_hour <= 16 THEN '13-16' WHEN in_hour >= 17 AND in_hour <= 20 THEN '17-20' WHEN in_hour >= 21 AND in_hour <= 24 THEN '21-24' ELSE '-' END , '-') , 'SUM') END AS 进入小时 -- 进入场景 ,CASE WHEN grouping(CASE WHEN f.scene_id IS NOT NULL THEN f.scene_desc ELSE '其他' END) = 1 THEN 'SUM' ELSE NVL(CASE WHEN f.scene_id IS NOT NULL THEN f.scene_desc ELSE '其他' END, 'SUM') END AS 进入场景 -- 进入内容品类 ,CASE WHEN grouping(CASE WHEN m.merge_leve2 IS NOT NULL THEN d.merge_leve2 ELSE '其他' END) = 1 THEN 'SUM' ELSE NVL(CASE WHEN m.merge_leve2 IS NOT NULL THEN d.merge_leve2 ELSE '其他' END, 'SUM') END AS 进入内容品类 -- 推荐内容品类 ,CASE WHEN grouping(CASE WHEN l.merge_leve2 IS NOT NULL THEN h.merge_leve2 ELSE '其他' END) = 1 THEN 'SUM' ELSE NVL(CASE WHEN l.merge_leve2 IS NOT NULL THEN h.merge_leve2 ELSE '其他' END, 'SUM') END AS 推荐内容品类 -- 内容id ,CASE WHEN grouping(COALESCE(CASE WHEN i.merge_leve2 IS NOT NULL THEN i.videoid ELSE '其他' END, '-')) = 1 THEN 'SUM' ELSE NVL(COALESCE(CASE WHEN i.merge_leve2 IS NOT NULL THEN i.videoid ELSE '其他' END, '-'), 'SUM') END AS 内容id -- ==================== 基础流量指标 ==================== ,COUNT(DISTINCT a.machinecode) AS 访问人数 ,COUNT(DISTINCT j.mid) AS 曝光人数 ,COUNT(DISTINCT j.mid) / COUNT(DISTINCT a.machinecode) AS 曝光人数比访问人数 ,COUNT(j.mid) AS 曝光次数 ,COUNT(j.mid) / COUNT(DISTINCT j.mid) AS 曝光次数人均 ,COUNT(DISTINCT j.vid) AS 视频个数 ,COUNT(j.mid) / COUNT(DISTINCT j.vid) AS 单视频曝光次数 -- ==================== 全链路拉回 ==================== ,SUM(return_n_uv_noself) + SUM(cc.cn) + SUM(dd.dn) AS ALL拉回n层 ,ROUND(COALESCE( (SUM(return_n_uv_noself) + SUM(cc.cn) + SUM(dd.dn)) / COUNT(j.mid) , 0), 6) AS ALL拉回n层比曝光次数 -- ==================== 原生拉回 & rovn ==================== ,SUM(return_n_uv_noself) AS return_n_uv_noself ,ROUND(SUM(return_n_uv_noself) / COUNT(j.mid), 6) AS rovn ,ROUND(AVG(str_pred * rosn_pred), 6) AS rovn_pred ,ROUND( (SUM(return_n_uv_noself) / COUNT(j.mid)) / NULLIF(AVG(str_pred * rosn_pred), 0) , 4) AS rovn_copc -- ==================== STR 指标 ==================== ,ROUND(COALESCE(SUM(is_return_noself) / COUNT(j.mid), 0), 6) AS str_real ,ROUND(COALESCE(SUM(str_pred) / COUNT(j.mid), 0), 6) AS str_pred ,ROUND( (SUM(is_return_noself) / COUNT(j.mid)) / NULLIF(SUM(str_pred) / COUNT(j.mid), 0) , 4) AS str_copc ,ROUND(AVG(ABS(str_pred - is_return_noself)), 6) AS str_MAE ,ROUND(VARIANCE(str_pred - is_return_noself), 6) AS STR_VAR ,0 AS strauc ,0 AS str多维加权copc -- ==================== ROSN 指标 ==================== ,ROUND(COALESCE( SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0) , 0), 6) AS rosn_real ,ROUND(COALESCE(SUM(rosn_pred) / COUNT(j.mid), 0), 6) AS rosn_pred ,ROUND( (SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0)) / NULLIF(SUM(rosn_pred) / COUNT(1), 0) , 4) AS rosn_copc ,ROUND(AVG( CASE WHEN is_return_noself = 1 THEN ABS(rosn_pred - return_n_uv_noself) END ), 6) AS rosn_实际预估diff绝对距离 ,ROUND(VARIANCE( CASE WHEN is_return_noself = 1 THEN rosn_pred - return_n_uv_noself END ), 6) AS rosn_实际预估高低估分布比例 -- ==================== B链: 分享→点击 ==================== ,SUM(is_return_noself) AS Buv1层拉回人数 ,ROUND(SUM(is_return_noself) / COUNT(j.mid), 6) AS rov0 ,0 AS rov0预估 ,0 AS rov0copc ,ROUND(COALESCE( SUM(is_return_noself) / NULLIF(SUM(share_cnt), 0) , 0), 6) AS ros0 ,0 AS ros0预估 ,0 AS ros0copc ,SUM(return_n_uv_noself) AS Bnuvn层拉回人数 ,ROUND(COALESCE(SUM(return_n_uv_noself) / SUM(is_return_noself), 0), 6) AS rorn_b ,ROUND(COALESCE(SUM(k.b1) / SUM(is_return_noself), 0), 6) AS ror1单层_b ,ROUND(COALESCE(SUM(k.b2) / SUM(k.b1), 0), 6) AS ror2单层_b -- ==================== B链 T+1 ==================== ,ROUND(COALESCE(SUM(k1.bn), 0), 6) AS Bnuvn层拉回人数_t1 ,ROUND(COALESCE(SUM(k1.bn) / SUM(k1.b), 0), 6) AS rorn_b_t1 ,ROUND(COALESCE(SUM(k1.b1) / SUM(k1.b), 0), 6) AS ror1单层_b_t1 ,ROUND(COALESCE(SUM(k1.b2) / SUM(k1.b1), 0), 6) AS ror2单层_b_t1 -- ==================== C链: 二次分享 ==================== ,SUM(cc.cn) AS cn ,SUM(cc.cn) / SUM(return_n_uv_noself) AS rorn_c ,SUM(cc.c) / SUM(return_n_uv_noself) AS ror1_c ,SUM(cc.cviews) / SUM(return_n_uv_noself) AS vor单层_c ,0 AS vor单层预估 ,0 AS vor单层copc -- ==================== D链: session内后续曝光传播 ==================== ,COUNT(DISTINCT b.machinecode) AS a ,SUM(dd.dn) AS dn ,ROUND(COALESCE(SUM(dd.dn) / COUNT(DISTINCT b.machinecode), 0), 6) AS rorn_d ,ROUND(COALESCE(SUM(dd.d1) / COUNT(DISTINCT b.machinecode), 0), 6) AS ror1_d ,COUNT(g.mid) / COUNT(DISTINCT b.machinecode) AS vor_d -- ===================================================================== -- FROM + JOINs -- ===================================================================== FROM ( -- [a] 用户活跃日志 + 进入小时 SELECT * ,TO_CHAR(FROM_UNIXTIME(a.clienttimestamp / 1000), 'HH') + 1 AS in_hour FROM loghubods.useractive_log a WHERE a.dt = '${bizdate}' ) a -- [b] 分享点击日志 (topic=click) LEFT JOIN ( SELECT DISTINCT machinecode ,sessionid ,subsessionid ,pagesource ,topic ,shareid ,clickobjectid ,shareobjectid ,clientip ,CASE WHEN topic = 'click' THEN clickobjectid WHEN topic = 'share' THEN shareobjectid END AS videoid ,sharetitleid ,sharetitle ,shareimageurl ,rootsharemid ,clienttimestamp ,FROM_UNIXTIME(clienttimestamp / 1000) AS click_time ,GET_JSON_OBJECT(videocdm.Ip2RegionAnalyse(clientip), "$.province") AS click_province ,GET_JSON_OBJECT(videocdm.Ip2RegionAnalyse(clientip), "$.city") AS click_city ,TO_CHAR(FROM_UNIXTIME(clienttimestamp / 1000), 'HH') + 1 AS click_hour FROM loghubods.user_share_log WHERE dt = '${bizdate}' AND topic = 'click' ) b ON a.machinecode = b.machinecode AND a.subsessionid = b.subsessionid -- [c] 视频元信息 LEFT JOIN videoods.wx_video c ON b.videoid = c.id -- [d] 点击视频品类标签 LEFT JOIN loghubods.video_merge_tag d ON b.videoid = d.videoid -- [e] 用户拉活量分层 (R0/R1-50/R_180_330/...) LEFT JOIN ( SELECT DISTINCT type, openid FROM loghubods.mid_share_return_people_1year WHERE dt = '${bizdate-1}' AND type IS NOT NULL AND type != 'S_ALL' AND type NOT REGEXP 'R50' ) e ON SUBSTRING_INDEX(a.machinecode, 'weixin_openid_', -1) = e.openid -- [f] TOP5 进入场景 LEFT JOIN ( SELECT a.scene_id ,a.scene_desc ,COUNT(DISTINCT b.machinecode) AS cnt FROM loghubods.wechat_miniprogram_scene_simple a LEFT JOIN loghubods.useractive_log b ON a.scene_id = b.sencetype AND b.dt = '${bizdate}' GROUP BY a.scene_id, a.scene_desc ORDER BY cnt DESC LIMIT 5 ) f ON a.sencetype = f.scene_id -- [g] 推荐页视频播放 LEFT JOIN ( SELECT DISTINCT mid, subsessionid, videoid FROM loghubods.video_action_log_rp WHERE dt = '${bizdate}' AND businesstype = 'videoView' AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' ) g ON b.machinecode = g.mid AND b.subsessionid = g.subsessionid -- [h] 推荐视频品类标签 LEFT JOIN loghubods.video_merge_tag h ON g.videoid = h.videoid -- [i] 热门视频 (按品类 TOP1, 曝光>10w) LEFT JOIN ( SELECT merge_leve2, videoid FROM ( SELECT b.merge_leve2 ,a.videoid ,COUNT(1) AS view ,ROW_NUMBER() OVER (PARTITION BY b.merge_leve2 ORDER BY COUNT(1) DESC) AS rank FROM loghubods.video_action_log_rp a LEFT JOIN loghubods.video_merge_tag b ON a.videoid = b.videoid WHERE dt = '${bizdate}' AND businesstype = 'videoView' AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' GROUP BY b.merge_leve2, a.videoid HAVING view > 100000 ) WHERE rank <= 1 ) i ON g.videoid = i.videoid -- [j] 曝光样本 (t_base CTE) LEFT JOIN t_base j ON g.mid = j.mid AND g.subsessionid = j.subsessionid AND g.videoid = j.vid -- [k] B链: 当天分享→点击裂变 (sharedepth 1~4) LEFT JOIN ( SELECT a.machinecode AS mid ,a.subsessionid ,a.videoid AS vid ,COUNT(DISTINCT CASE WHEN a.machinecode <> b.machinecode THEN b.machinecode END) AS bn ,COUNT(DISTINCT CASE WHEN b.sharedepth = 1 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b ,COUNT(DISTINCT CASE WHEN b.sharedepth = 2 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b1 ,COUNT(DISTINCT CASE WHEN b.sharedepth = 3 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b2 ,COUNT(DISTINCT CASE WHEN b.sharedepth = 4 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b3 FROM ( SELECT DISTINCT machinecode, shareobjectid AS videoid, recomTraceId, subsessionid, sharedepth, shareid FROM loghubods.user_share_log WHERE dt = '${bizdate}' AND topic = 'share' AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' ) a LEFT JOIN ( SELECT DISTINCT machinecode, clickobjectid, recomTraceId, subsessionid, sharedepth, rootshareid FROM loghubods.user_share_log WHERE dt = '${bizdate}' AND topic = 'click' ) b ON a.shareid = b.rootshareid GROUP BY a.machinecode, a.subsessionid, a.videoid ) k ON g.mid = k.mid AND g.subsessionid = k.subsessionid AND g.videoid = k.vid -- [k1] B链 T+1: 跨天分享→点击裂变 LEFT JOIN ( SELECT a.machinecode AS mid ,a.subsessionid ,a.videoid AS vid ,COUNT(DISTINCT CASE WHEN a.machinecode <> b.machinecode THEN b.machinecode END) AS bn ,COUNT(DISTINCT CASE WHEN b.sharedepth = 1 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b ,COUNT(DISTINCT CASE WHEN b.sharedepth = 2 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b1 ,COUNT(DISTINCT CASE WHEN b.sharedepth = 3 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b2 ,COUNT(DISTINCT CASE WHEN b.sharedepth = 4 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b3 FROM ( SELECT DISTINCT machinecode, shareobjectid AS videoid, recomTraceId, subsessionid, sharedepth, shareid FROM loghubods.user_share_log WHERE dt = '${bizdate}' AND topic = 'share' AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' ) a LEFT JOIN ( SELECT DISTINCT machinecode, clickobjectid, recomTraceId, subsessionid, sharedepth, rootshareid FROM loghubods.user_share_log WHERE dt >= '${bizdate}' AND dt <= '${bizdate+1}' -- T+1 跨天 AND topic = 'click' ) b ON a.shareid = b.rootshareid GROUP BY a.machinecode, a.subsessionid, a.videoid ) k1 ON g.mid = k1.mid AND g.subsessionid = k1.subsessionid AND g.videoid = k1.vid -- [l] TOP10 推荐品类 (按播放量) LEFT JOIN ( SELECT b.merge_leve2 ,COUNT(1) AS view FROM loghubods.video_action_log_rp a LEFT JOIN loghubods.video_merge_tag b ON a.videoid = b.videoid WHERE dt = '${bizdate}' AND businesstype = 'videoView' AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' AND b.merge_leve2 IS NOT NULL GROUP BY b.merge_leve2 ORDER BY view DESC LIMIT 10 ) l ON h.merge_leve2 = l.merge_leve2 -- [m] TOP1 点击品类 (按点击人数) LEFT JOIN ( SELECT b.merge_leve2 ,COUNT(DISTINCT machinecode) AS click FROM loghubods.user_share_log a LEFT JOIN loghubods.video_merge_tag b ON a.clickobjectid = b.videoid WHERE dt = '${bizdate}' AND topic = 'click' AND b.merge_leve2 IS NOT NULL GROUP BY b.merge_leve2 ORDER BY click DESC LIMIT 1 ) m ON d.merge_leve2 = m.merge_leve2 -- [cc] C链: 二次分享 (分享→点击→再分享→再点击 + 视频播放) LEFT JOIN ( SELECT a.machinecode AS mid ,a.subsessionid ,a.videoid AS vid ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END ) AS c ,COUNT(c.mid) AS cviews FROM ( -- 一次分享 SELECT DISTINCT machinecode, shareobjectid AS videoid, recomTraceId, subsessionid, sharedepth, shareid FROM loghubods.user_share_log WHERE dt = '${bizdate}' AND topic = 'share' AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' ) a LEFT JOIN ( -- 一次点击 SELECT DISTINCT machinecode, clickobjectid, recomTraceId, subsessionid, sharedepth, rootshareid FROM loghubods.user_share_log WHERE dt = '${bizdate}' AND topic = 'click' ) b ON a.shareid = b.rootshareid LEFT JOIN ( -- 二次分享 (点击者再分享) SELECT DISTINCT machinecode, shareobjectid, recomTraceId, subsessionid, sharedepth, shareid FROM loghubods.user_share_log WHERE dt = '${bizdate}' AND topic = 'share' AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' ) b1 ON b.machinecode = b1.machinecode AND b.subsessionid = b1.subsessionid LEFT JOIN ( -- 二次点击 SELECT DISTINCT machinecode, clickobjectid, recomTraceId, subsessionid, sharedepth, shareid, rootshareid FROM loghubods.user_share_log WHERE dt = '${bizdate}' AND topic = 'click' ) b2 ON b1.shareid = b2.rootshareid LEFT JOIN ( -- 二次点击者的视频播放 SELECT DISTINCT mid, subsessionid, videoid FROM loghubods.video_action_log_rp WHERE dt = '${bizdate}' AND businesstype = 'videoView' AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' ) c ON b2.machinecode = c.mid AND b2.subsessionid = c.subsessionid GROUP BY a.machinecode, a.subsessionid, a.videoid ) cc ON g.mid = cc.mid AND g.subsessionid = cc.subsessionid AND g.videoid = cc.vid -- [dd] D链: session内后续曝光传播 (窗口函数累计后续拉回) LEFT JOIN ( SELECT * ,COALESCE( SUM(回流) OVER (PARTITION BY mid, subsessionid ORDER BY rn ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) , 0) AS dn ,COALESCE( SUM(回流1) OVER (PARTITION BY mid, subsessionid ORDER BY rn ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) , 0) AS d1 FROM ( SELECT a.mid AS mid ,a.subsessionid ,a.videoid AS vid ,COUNT(DISTINCT b.shareid) AS 分享次数 ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END ) AS 回流 ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END ) AS 回流1 ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp DESC) AS rn FROM ( SELECT DISTINCT mid, subsessionid, videoid, logtimestamp FROM loghubods.video_action_log_rp WHERE dt = '${bizdate}' AND businesstype = 'videoView' AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' ) a LEFT JOIN ( SELECT DISTINCT machinecode, shareobjectid AS videoid, recomTraceId, subsessionid, sharedepth, shareid, clienttimestamp FROM loghubods.user_share_log WHERE dt = '${bizdate}' AND topic = 'share' AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$' ) b ON a.mid = b.machinecode AND a.subsessionid = b.subsessionid AND a.videoid = b.videoid LEFT JOIN ( SELECT DISTINCT machinecode, clickobjectid, recomTraceId, subsessionid, sharedepth, rootshareid FROM loghubods.user_share_log WHERE dt = '${bizdate}' AND topic = 'click' ) c ON b.shareid = c.rootshareid GROUP BY a.mid, a.subsessionid, a.videoid, a.logtimestamp ) ) dd ON g.mid = dd.mid AND g.subsessionid = dd.subsessionid AND g.videoid = dd.vid -- ===================================================================== -- GROUP BY CUBE: 全维度组合聚合 -- ===================================================================== GROUP BY CUBE( -- 用户拉活量分层 COALESCE( CASE WHEN e.type IS NULL OR e.type = 'R_0' THEN 'R0&新用户' WHEN e.type IN ('R_1','R_2_10','R_10_50') THEN 'R1-50' WHEN e.type IN ('R_50_100','R_100_180','R_180_330') THEN 'R_180_330' ELSE e.type END , '-') -- 进入小时 ,COALESCE( CASE WHEN in_hour >= 1 AND in_hour <= 4 THEN '1-4' WHEN in_hour >= 5 AND in_hour <= 8 THEN '5-8' WHEN in_hour >= 9 AND in_hour <= 12 THEN '9-12' WHEN in_hour >= 13 AND in_hour <= 16 THEN '13-16' WHEN in_hour >= 17 AND in_hour <= 20 THEN '17-20' WHEN in_hour >= 21 AND in_hour <= 24 THEN '21-24' ELSE '-' END , '-') -- 进入场景 ,CASE WHEN f.scene_id IS NOT NULL THEN f.scene_desc ELSE '其他' END -- 进入内容品类 ,CASE WHEN m.merge_leve2 IS NOT NULL THEN d.merge_leve2 ELSE '其他' END -- 推荐内容品类 ,CASE WHEN l.merge_leve2 IS NOT NULL THEN h.merge_leve2 ELSE '其他' END -- 内容id ,COALESCE(CASE WHEN i.merge_leve2 IS NOT NULL THEN i.videoid ELSE '其他' END, '-') ) ORDER BY 访问人数 DESC ;