| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722 |
- -- =====================================================================
- -- 用户拉活-曝光-拉回分析表 (recommend_distribution_data_total)
- -- 维度: 用户拉活量分层 × 进入小时 × 进入场景 × 进入内容品类 × 推荐内容品类 × 内容id
- -- 粒度: GROUP BY CUBE 全维度组合 + SUM 汇总
- -- =====================================================================
- -- DROP TABLE IF EXISTS loghubods.recommend_distribution_data_total;
- CREATE TABLE IF NOT EXISTS loghubods.recommend_distribution_data_total (
- `用户拉活量分层` STRING COMMENT '用户拉活量分层(R0&新用户/R1-50等,汇总为SUM)',
- `进入小时` STRING COMMENT '进入小时(1-4/5-8等,汇总为SUM)',
- `进入场景` STRING COMMENT '进入场景(业务场景描述,汇总为SUM)',
- `进入内容品类` STRING COMMENT '进入内容品类(merge_leve2维度,汇总为SUM)',
- `推荐内容品类` STRING COMMENT '推荐内容品类(merge_leve2维度,汇总为SUM)',
- `内容id` STRING COMMENT '内容id(videoid,其他为兜底,汇总为SUM)',
- -- ===== 基础流量指标 =====
- `访问人数` BIGINT COMMENT '去重访问人数(machinecode维度)',
- `曝光人数` BIGINT COMMENT '去重曝光人数(mid维度)',
- `曝光人数比访问人数` DOUBLE COMMENT '曝光人数/访问人数 比值',
- `曝光次数` BIGINT COMMENT '曝光总次数(mid计数)',
- `曝光次数人均` DOUBLE COMMENT '曝光次数/去重曝光人数 人均曝光次数',
- `视频个数` BIGINT COMMENT '去重曝光视频个数(vid维度)',
- `单视频曝光次数` DOUBLE COMMENT '曝光次数/去重视频个数 单视频平均曝光次数',
- -- ===== 全链路拉回 =====
- `ALL拉回n层` BIGINT COMMENT '全量拉回n层人数(多来源拉回人数求和)',
- `ALL拉回n层比曝光次数` DOUBLE COMMENT '全量拉回n层人数/曝光次数 比值,保留6位小数',
- -- ===== 原生拉回 & rovn =====
- `return_n_uv_noself` BIGINT COMMENT '原生n层拉回人数(无自返)',
- `rovn` DOUBLE COMMENT '原生n层拉回人数/曝光次数 比值,保留6位小数',
- `rovn_pred` DOUBLE COMMENT 'rovn预估值(str_pred*rosn_pred平均值),保留6位小数',
- `rovn_copc` DOUBLE COMMENT 'rovn实际/rovn预估 比值,保留4位小数',
- -- ===== STR 指标 =====
- `str_real` DOUBLE COMMENT 'str实际值(is_return_noself求和/曝光次数),保留6位小数',
- `str_pred` DOUBLE COMMENT 'str预估值(str_pred求和/曝光次数),保留6位小数',
- `str_copc` DOUBLE COMMENT 'str实际/str预估 比值,保留4位小数',
- `str_MAE` DOUBLE COMMENT 'str预估与实际的平均绝对误差,保留6位小数',
- `STR_VAR` DOUBLE COMMENT 'str预估与实际差值的方差,保留6位小数',
- `strauc` DOUBLE COMMENT 'str AUC,保留6位小数',
- `str多维加权copc` DOUBLE COMMENT 'str多维加权copc,保留6位小数',
- -- ===== ROSN 指标 =====
- `rosn_real` DOUBLE COMMENT 'rosn实际值(原生n层拉回人数/is_return_noself求和),保留6位小数',
- `rosn_pred` DOUBLE COMMENT 'rosn预估值(rosn_pred求和/曝光次数),保留6位小数',
- `rosn_copc` DOUBLE COMMENT 'rosn实际/rosn预估 比值,保留4位小数',
- `rosn_实际预估diff绝对距离` DOUBLE COMMENT '拉回时rosn预估与实际的平均绝对距离,保留6位小数',
- `rosn_实际预估高低估分布比例` DOUBLE COMMENT '拉回时rosn预估与实际差值的方差,保留6位小数',
- -- ===== B链 (分享→点击) =====
- `Buv1层拉回人数` BIGINT COMMENT 'B端uv1层拉回人数(is_return_noself求和)',
- `rov0` DOUBLE COMMENT 'B端uv1层拉回人数/曝光次数 比值,保留6位小数',
- `rov0预估` BIGINT COMMENT 'rov0预估值(固定为0)',
- `rov0copc` BIGINT COMMENT 'rov0copc值(固定为0)',
- `ros0` DOUBLE COMMENT 'ros0实际值(B端uv1层拉回人数/分享次数),保留6位小数',
- `ros0预估` BIGINT COMMENT 'ros0预估值(固定为0)',
- `ros0copc` BIGINT COMMENT 'ros0copc值(固定为0)',
- `Bnuvn层拉回人数` BIGINT COMMENT 'B端nuvn层拉回人数(原生n层拉回人数一致)',
- `rorn_b` DOUBLE COMMENT 'B端n层拉回率(Bnuvn/uv1层拉回人数),保留6位小数',
- `ror1单层_b` DOUBLE COMMENT 'B端1层拉回率(b1/uv1层拉回人数),保留6位小数',
- `ror2单层_b` DOUBLE COMMENT 'B端2层拉回率(b2/b1),保留6位小数',
- -- ===== B链 T+1 =====
- `Bnuvn层拉回人数_t1` DOUBLE COMMENT 'T1维度B端nuvn层拉回人数,保留6位小数',
- `rorn_b_t1` DOUBLE COMMENT 'T1维度B端n层拉回率,保留6位小数',
- `ror1单层_b_t1` DOUBLE COMMENT 'T1维度B端1层拉回率,保留6位小数',
- `ror2单层_b_t1` DOUBLE COMMENT 'T1维度B端2层拉回率,保留6位小数',
- -- ===== C链 (二次分享→点击) =====
- `cn` BIGINT COMMENT 'C端拉回n层人数',
- `rorn_c` DOUBLE COMMENT 'C端n层拉回率(cn/原生n层拉回人数)',
- `ror1_c` DOUBLE COMMENT 'C端1层拉回率(c/原生n层拉回人数)',
- `vor单层_c` DOUBLE COMMENT 'C端单层视频访问率(cviews/原生n层拉回人数)',
- `vor单层预估` BIGINT COMMENT 'C端单层视频访问率预估(固定为0)',
- `vor单层copc` BIGINT COMMENT 'C端单层视频访问率copc(固定为0)',
- -- ===== D链 (session内后续曝光传播) =====
- `a` BIGINT COMMENT 'D端去重访问人数(machinecode维度)',
- `dn` BIGINT COMMENT 'D端拉回n层人数',
- `rorn_d` DOUBLE COMMENT 'D端n层拉回率(dn/D端去重访问人数),保留6位小数',
- `ror1_d` DOUBLE COMMENT 'D端1层拉回率(d1/D端去重访问人数),保留6位小数',
- `vor_d` DOUBLE COMMENT 'D端视频访问率(g.mid计数/D端去重访问人数)'
- )
- COMMENT '用户拉活-曝光-拉回分析表'
- PARTITIONED BY (
- dt STRING COMMENT '统计日期,格式yyyyMMdd'
- );
- -- =====================================================================
- -- CTE: 基础样本提取 + 特征工程
- -- =====================================================================
- WITH
- -- [1] 原始样本: 从推荐算法样本表取数,解析 scoresMap
- t_raw AS (
- SELECT *
- ,REPLACE(GET_JSON_OBJECT(extend_alg, '$.scoresMap'), "\\", "") AS scoresmap
- ,CASE
- WHEN page IN ("回流后沉浸页&内页feed", "详情后沉浸页", "首页feed", "详情页") THEN "推荐"
- WHEN page IN ("回流页", "其他") THEN "非推荐"
- ELSE "其他"
- END AS page_type
- FROM loghubods.dwd_recsys_alg_sample_all_20250212
- WHERE dt = '${bizdate}'
- -- AND apptype IN ("0","4")
- AND abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
- AND abcode NOT IN ("ab100")
- AND extend_alg IS NOT NULL
- AND GET_JSON_OBJECT(extend_alg, '$.scoresMap') IS NOT NULL
- )
- -- [2] 过滤: 只保留推荐页面
- ,t_filtered AS (
- SELECT *
- FROM t_raw
- WHERE page_type = "推荐"
- )
- -- [3] 特征提取: 维度映射 + 模型分数解析
- ,t_base AS (
- SELECT dt
- ,apptype
- ,CASE
- WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
- WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
- WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准"
- WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
- WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "ab4-5"
- ELSE "其他"
- END AS abcode
- ,page_type AS page
- ,mid
- ,vid
- ,is_share
- ,share_cnt
- ,is_return_1
- ,is_return_n
- ,is_return_noself
- ,return_1_uv
- ,return_n_uv
- ,return_n_uv_noself
- ,new_exposure_cnt
- ,flowpool
- ,scoresmap
- ,subsessionid
- ,CAST(GET_JSON_OBJECT(scoresmap, '$.fmRov') AS DOUBLE) AS str_pred
- ,1.22 * POW(CAST(GET_JSON_OBJECT(scoresmap, '$.NorXGBScore') AS DOUBLE), 1.15) AS rosn_pred
- ,CAST(GET_JSON_OBJECT(scoresmap, '$.hasReturnRovScore') AS DOUBLE) AS rosn_stat
- ,GET_JSON_OBJECT(v1_feature, '$.title') AS vid_title
- FROM t_filtered
- )
- -- =====================================================================
- -- 主查询: INSERT OVERWRITE → 多维度 CUBE 聚合
- -- =====================================================================
- -- SELECT * FROM loghubods.recommend_distribution_data_total WHERE dt = 20260204 ORDER BY 访问人数 DESC
- INSERT OVERWRITE TABLE loghubods.recommend_distribution_data_total PARTITION (dt = '${bizdate}')
- SELECT
- -- ==================== 维度列 ====================
- -- 用户拉活量分层
- CASE WHEN grouping(
- COALESCE(
- CASE
- WHEN e.type IS NULL OR e.type = 'R_0' THEN 'R0&新用户'
- WHEN e.type IN ('R_1','R_2_10','R_10_50') THEN 'R1-50'
- WHEN e.type IN ('R_50_100','R_100_180','R_180_330') THEN 'R_180_330'
- ELSE e.type
- END
- , '-')
- ) = 1
- THEN 'SUM'
- ELSE NVL(
- COALESCE(
- CASE
- WHEN e.type IS NULL OR e.type = 'R_0' THEN 'R0&新用户'
- WHEN e.type IN ('R_1','R_2_10','R_10_50') THEN 'R1-50'
- WHEN e.type IN ('R_50_100','R_100_180','R_180_330') THEN 'R_180_330'
- ELSE e.type
- END
- , '-')
- , 'SUM')
- END AS 用户拉活量分层
- -- 进入小时
- ,CASE WHEN grouping(
- COALESCE(
- CASE
- WHEN in_hour >= 1 AND in_hour <= 4 THEN '1-4'
- WHEN in_hour >= 5 AND in_hour <= 8 THEN '5-8'
- WHEN in_hour >= 9 AND in_hour <= 12 THEN '9-12'
- WHEN in_hour >= 13 AND in_hour <= 16 THEN '13-16'
- WHEN in_hour >= 17 AND in_hour <= 20 THEN '17-20'
- WHEN in_hour >= 21 AND in_hour <= 24 THEN '21-24'
- ELSE '-'
- END
- , '-')
- ) = 1
- THEN 'SUM'
- ELSE NVL(
- COALESCE(
- CASE
- WHEN in_hour >= 1 AND in_hour <= 4 THEN '1-4'
- WHEN in_hour >= 5 AND in_hour <= 8 THEN '5-8'
- WHEN in_hour >= 9 AND in_hour <= 12 THEN '9-12'
- WHEN in_hour >= 13 AND in_hour <= 16 THEN '13-16'
- WHEN in_hour >= 17 AND in_hour <= 20 THEN '17-20'
- WHEN in_hour >= 21 AND in_hour <= 24 THEN '21-24'
- ELSE '-'
- END
- , '-')
- , 'SUM')
- END AS 进入小时
- -- 进入场景
- ,CASE WHEN grouping(CASE WHEN f.scene_id IS NOT NULL THEN f.scene_desc ELSE '其他' END) = 1
- THEN 'SUM'
- ELSE NVL(CASE WHEN f.scene_id IS NOT NULL THEN f.scene_desc ELSE '其他' END, 'SUM')
- END AS 进入场景
- -- 进入内容品类
- ,CASE WHEN grouping(CASE WHEN m.merge_leve2 IS NOT NULL THEN d.merge_leve2 ELSE '其他' END) = 1
- THEN 'SUM'
- ELSE NVL(CASE WHEN m.merge_leve2 IS NOT NULL THEN d.merge_leve2 ELSE '其他' END, 'SUM')
- END AS 进入内容品类
- -- 推荐内容品类
- ,CASE WHEN grouping(CASE WHEN l.merge_leve2 IS NOT NULL THEN h.merge_leve2 ELSE '其他' END) = 1
- THEN 'SUM'
- ELSE NVL(CASE WHEN l.merge_leve2 IS NOT NULL THEN h.merge_leve2 ELSE '其他' END, 'SUM')
- END AS 推荐内容品类
- -- 内容id
- ,CASE WHEN grouping(COALESCE(CASE WHEN i.merge_leve2 IS NOT NULL THEN i.videoid ELSE '其他' END, '-')) = 1
- THEN 'SUM'
- ELSE NVL(COALESCE(CASE WHEN i.merge_leve2 IS NOT NULL THEN i.videoid ELSE '其他' END, '-'), 'SUM')
- END AS 内容id
- -- ==================== 基础流量指标 ====================
- ,COUNT(DISTINCT a.machinecode) AS 访问人数
- ,COUNT(DISTINCT j.mid) AS 曝光人数
- ,COUNT(DISTINCT j.mid) / COUNT(DISTINCT a.machinecode) AS 曝光人数比访问人数
- ,COUNT(j.mid) AS 曝光次数
- ,COUNT(j.mid) / COUNT(DISTINCT j.mid) AS 曝光次数人均
- ,COUNT(DISTINCT j.vid) AS 视频个数
- ,COUNT(j.mid) / COUNT(DISTINCT j.vid) AS 单视频曝光次数
- -- ==================== 全链路拉回 ====================
- ,SUM(return_n_uv_noself) + SUM(cc.cn) + SUM(dd.dn) AS ALL拉回n层
- ,ROUND(COALESCE(
- (SUM(return_n_uv_noself) + SUM(cc.cn) + SUM(dd.dn)) / COUNT(j.mid)
- , 0), 6) AS ALL拉回n层比曝光次数
- -- ==================== 原生拉回 & rovn ====================
- ,SUM(return_n_uv_noself) AS return_n_uv_noself
- ,ROUND(SUM(return_n_uv_noself) / COUNT(j.mid), 6) AS rovn
- ,ROUND(AVG(str_pred * rosn_pred), 6) AS rovn_pred
- ,ROUND(
- (SUM(return_n_uv_noself) / COUNT(j.mid))
- / NULLIF(AVG(str_pred * rosn_pred), 0)
- , 4) AS rovn_copc
- -- ==================== STR 指标 ====================
- ,ROUND(COALESCE(SUM(is_return_noself) / COUNT(j.mid), 0), 6) AS str_real
- ,ROUND(COALESCE(SUM(str_pred) / COUNT(j.mid), 0), 6) AS str_pred
- ,ROUND(
- (SUM(is_return_noself) / COUNT(j.mid))
- / NULLIF(SUM(str_pred) / COUNT(j.mid), 0)
- , 4) AS str_copc
- ,ROUND(AVG(ABS(str_pred - is_return_noself)), 6) AS str_MAE
- ,ROUND(VARIANCE(str_pred - is_return_noself), 6) AS STR_VAR
- ,0 AS strauc
- ,0 AS str多维加权copc
- -- ==================== ROSN 指标 ====================
- ,ROUND(COALESCE(
- SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0)
- , 0), 6) AS rosn_real
- ,ROUND(COALESCE(SUM(rosn_pred) / COUNT(j.mid), 0), 6) AS rosn_pred
- ,ROUND(
- (SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0))
- / NULLIF(SUM(rosn_pred) / COUNT(1), 0)
- , 4) AS rosn_copc
- ,ROUND(AVG(
- CASE WHEN is_return_noself = 1
- THEN ABS(rosn_pred - return_n_uv_noself)
- END
- ), 6) AS rosn_实际预估diff绝对距离
- ,ROUND(VARIANCE(
- CASE WHEN is_return_noself = 1
- THEN rosn_pred - return_n_uv_noself
- END
- ), 6) AS rosn_实际预估高低估分布比例
- -- ==================== B链: 分享→点击 ====================
- ,SUM(is_return_noself) AS Buv1层拉回人数
- ,ROUND(SUM(is_return_noself) / COUNT(j.mid), 6) AS rov0
- ,0 AS rov0预估
- ,0 AS rov0copc
- ,ROUND(COALESCE(
- SUM(is_return_noself) / NULLIF(SUM(share_cnt), 0)
- , 0), 6) AS ros0
- ,0 AS ros0预估
- ,0 AS ros0copc
- ,SUM(return_n_uv_noself) AS Bnuvn层拉回人数
- ,ROUND(COALESCE(SUM(return_n_uv_noself) / SUM(is_return_noself), 0), 6) AS rorn_b
- ,ROUND(COALESCE(SUM(k.b1) / SUM(is_return_noself), 0), 6) AS ror1单层_b
- ,ROUND(COALESCE(SUM(k.b2) / SUM(k.b1), 0), 6) AS ror2单层_b
- -- ==================== B链 T+1 ====================
- ,ROUND(COALESCE(SUM(k1.bn), 0), 6) AS Bnuvn层拉回人数_t1
- ,ROUND(COALESCE(SUM(k1.bn) / SUM(k1.b), 0), 6) AS rorn_b_t1
- ,ROUND(COALESCE(SUM(k1.b1) / SUM(k1.b), 0), 6) AS ror1单层_b_t1
- ,ROUND(COALESCE(SUM(k1.b2) / SUM(k1.b1), 0), 6) AS ror2单层_b_t1
- -- ==================== C链: 二次分享 ====================
- ,SUM(cc.cn) AS cn
- ,SUM(cc.cn) / SUM(return_n_uv_noself) AS rorn_c
- ,SUM(cc.c) / SUM(return_n_uv_noself) AS ror1_c
- ,SUM(cc.cviews) / SUM(return_n_uv_noself) AS vor单层_c
- ,0 AS vor单层预估
- ,0 AS vor单层copc
- -- ==================== D链: session内后续曝光传播 ====================
- ,COUNT(DISTINCT b.machinecode) AS a
- ,SUM(dd.dn) AS dn
- ,ROUND(COALESCE(SUM(dd.dn) / COUNT(DISTINCT b.machinecode), 0), 6) AS rorn_d
- ,ROUND(COALESCE(SUM(dd.d1) / COUNT(DISTINCT b.machinecode), 0), 6) AS ror1_d
- ,COUNT(g.mid) / COUNT(DISTINCT b.machinecode) AS vor_d
- -- =====================================================================
- -- FROM + JOINs
- -- =====================================================================
- FROM (
- -- [a] 用户活跃日志 + 进入小时
- SELECT *
- ,TO_CHAR(FROM_UNIXTIME(a.clienttimestamp / 1000), 'HH') + 1 AS in_hour
- FROM loghubods.useractive_log a
- WHERE a.dt = '${bizdate}'
- ) a
- -- [b] 分享点击日志 (topic=click)
- LEFT JOIN (
- SELECT DISTINCT
- machinecode
- ,sessionid
- ,subsessionid
- ,pagesource
- ,topic
- ,shareid
- ,clickobjectid
- ,shareobjectid
- ,clientip
- ,CASE
- WHEN topic = 'click' THEN clickobjectid
- WHEN topic = 'share' THEN shareobjectid
- END AS videoid
- ,sharetitleid
- ,sharetitle
- ,shareimageurl
- ,rootsharemid
- ,clienttimestamp
- ,FROM_UNIXTIME(clienttimestamp / 1000) AS click_time
- ,GET_JSON_OBJECT(videocdm.Ip2RegionAnalyse(clientip), "$.province") AS click_province
- ,GET_JSON_OBJECT(videocdm.Ip2RegionAnalyse(clientip), "$.city") AS click_city
- ,TO_CHAR(FROM_UNIXTIME(clienttimestamp / 1000), 'HH') + 1 AS click_hour
- FROM loghubods.user_share_log
- WHERE dt = '${bizdate}'
- AND topic = 'click'
- ) b
- ON a.machinecode = b.machinecode
- AND a.subsessionid = b.subsessionid
- -- [c] 视频元信息
- LEFT JOIN videoods.wx_video c
- ON b.videoid = c.id
- -- [d] 点击视频品类标签
- LEFT JOIN loghubods.video_merge_tag d
- ON b.videoid = d.videoid
- -- [e] 用户拉活量分层 (R0/R1-50/R_180_330/...)
- LEFT JOIN (
- SELECT DISTINCT type, openid
- FROM loghubods.mid_share_return_people_1year
- WHERE dt = '${bizdate-1}'
- AND type IS NOT NULL
- AND type != 'S_ALL'
- AND type NOT REGEXP 'R50'
- ) e
- ON SUBSTRING_INDEX(a.machinecode, 'weixin_openid_', -1) = e.openid
- -- [f] TOP5 进入场景
- LEFT JOIN (
- SELECT a.scene_id
- ,a.scene_desc
- ,COUNT(DISTINCT b.machinecode) AS cnt
- FROM loghubods.wechat_miniprogram_scene_simple a
- LEFT JOIN loghubods.useractive_log b
- ON a.scene_id = b.sencetype
- AND b.dt = '${bizdate}'
- GROUP BY a.scene_id, a.scene_desc
- ORDER BY cnt DESC
- LIMIT 5
- ) f
- ON a.sencetype = f.scene_id
- -- [g] 推荐页视频播放
- LEFT JOIN (
- SELECT DISTINCT mid, subsessionid, videoid
- FROM loghubods.video_action_log_rp
- WHERE dt = '${bizdate}'
- AND businesstype = 'videoView'
- AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
- ) g
- ON b.machinecode = g.mid
- AND b.subsessionid = g.subsessionid
- -- [h] 推荐视频品类标签
- LEFT JOIN loghubods.video_merge_tag h
- ON g.videoid = h.videoid
- -- [i] 热门视频 (按品类 TOP1, 曝光>10w)
- LEFT JOIN (
- SELECT merge_leve2, videoid
- FROM (
- SELECT b.merge_leve2
- ,a.videoid
- ,COUNT(1) AS view
- ,ROW_NUMBER() OVER (PARTITION BY b.merge_leve2 ORDER BY COUNT(1) DESC) AS rank
- FROM loghubods.video_action_log_rp a
- LEFT JOIN loghubods.video_merge_tag b
- ON a.videoid = b.videoid
- WHERE dt = '${bizdate}'
- AND businesstype = 'videoView'
- AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
- GROUP BY b.merge_leve2, a.videoid
- HAVING view > 100000
- )
- WHERE rank <= 1
- ) i
- ON g.videoid = i.videoid
- -- [j] 曝光样本 (t_base CTE)
- LEFT JOIN t_base j
- ON g.mid = j.mid
- AND g.subsessionid = j.subsessionid
- AND g.videoid = j.vid
- -- [k] B链: 当天分享→点击裂变 (sharedepth 1~4)
- LEFT JOIN (
- SELECT a.machinecode AS mid
- ,a.subsessionid
- ,a.videoid AS vid
- ,COUNT(DISTINCT CASE WHEN a.machinecode <> b.machinecode THEN b.machinecode END) AS bn
- ,COUNT(DISTINCT CASE WHEN b.sharedepth = 1 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b
- ,COUNT(DISTINCT CASE WHEN b.sharedepth = 2 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b1
- ,COUNT(DISTINCT CASE WHEN b.sharedepth = 3 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b2
- ,COUNT(DISTINCT CASE WHEN b.sharedepth = 4 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b3
- FROM (
- SELECT DISTINCT machinecode, shareobjectid AS videoid, recomTraceId,
- subsessionid, sharedepth, shareid
- FROM loghubods.user_share_log
- WHERE dt = '${bizdate}'
- AND topic = 'share'
- AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
- ) a
- LEFT JOIN (
- SELECT DISTINCT machinecode, clickobjectid, recomTraceId,
- subsessionid, sharedepth, rootshareid
- FROM loghubods.user_share_log
- WHERE dt = '${bizdate}'
- AND topic = 'click'
- ) b
- ON a.shareid = b.rootshareid
- GROUP BY a.machinecode, a.subsessionid, a.videoid
- ) k
- ON g.mid = k.mid
- AND g.subsessionid = k.subsessionid
- AND g.videoid = k.vid
- -- [k1] B链 T+1: 跨天分享→点击裂变
- LEFT JOIN (
- SELECT a.machinecode AS mid
- ,a.subsessionid
- ,a.videoid AS vid
- ,COUNT(DISTINCT CASE WHEN a.machinecode <> b.machinecode THEN b.machinecode END) AS bn
- ,COUNT(DISTINCT CASE WHEN b.sharedepth = 1 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b
- ,COUNT(DISTINCT CASE WHEN b.sharedepth = 2 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b1
- ,COUNT(DISTINCT CASE WHEN b.sharedepth = 3 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b2
- ,COUNT(DISTINCT CASE WHEN b.sharedepth = 4 AND a.machinecode <> b.machinecode THEN b.machinecode END) AS b3
- FROM (
- SELECT DISTINCT machinecode, shareobjectid AS videoid, recomTraceId,
- subsessionid, sharedepth, shareid
- FROM loghubods.user_share_log
- WHERE dt = '${bizdate}'
- AND topic = 'share'
- AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
- ) a
- LEFT JOIN (
- SELECT DISTINCT machinecode, clickobjectid, recomTraceId,
- subsessionid, sharedepth, rootshareid
- FROM loghubods.user_share_log
- WHERE dt >= '${bizdate}'
- AND dt <= '${bizdate+1}' -- T+1 跨天
- AND topic = 'click'
- ) b
- ON a.shareid = b.rootshareid
- GROUP BY a.machinecode, a.subsessionid, a.videoid
- ) k1
- ON g.mid = k1.mid
- AND g.subsessionid = k1.subsessionid
- AND g.videoid = k1.vid
- -- [l] TOP10 推荐品类 (按播放量)
- LEFT JOIN (
- SELECT b.merge_leve2
- ,COUNT(1) AS view
- FROM loghubods.video_action_log_rp a
- LEFT JOIN loghubods.video_merge_tag b
- ON a.videoid = b.videoid
- WHERE dt = '${bizdate}'
- AND businesstype = 'videoView'
- AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
- AND b.merge_leve2 IS NOT NULL
- GROUP BY b.merge_leve2
- ORDER BY view DESC
- LIMIT 10
- ) l
- ON h.merge_leve2 = l.merge_leve2
- -- [m] TOP1 点击品类 (按点击人数)
- LEFT JOIN (
- SELECT b.merge_leve2
- ,COUNT(DISTINCT machinecode) AS click
- FROM loghubods.user_share_log a
- LEFT JOIN loghubods.video_merge_tag b
- ON a.clickobjectid = b.videoid
- WHERE dt = '${bizdate}'
- AND topic = 'click'
- AND b.merge_leve2 IS NOT NULL
- GROUP BY b.merge_leve2
- ORDER BY click DESC
- LIMIT 1
- ) m
- ON d.merge_leve2 = m.merge_leve2
- -- [cc] C链: 二次分享 (分享→点击→再分享→再点击 + 视频播放)
- LEFT JOIN (
- SELECT a.machinecode AS mid
- ,a.subsessionid
- ,a.videoid AS vid
- ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
- ,COUNT(DISTINCT
- CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode
- THEN b2.machinecode
- END
- ) AS c
- ,COUNT(c.mid) AS cviews
- FROM (
- -- 一次分享
- SELECT DISTINCT machinecode, shareobjectid AS videoid, recomTraceId,
- subsessionid, sharedepth, shareid
- FROM loghubods.user_share_log
- WHERE dt = '${bizdate}'
- AND topic = 'share'
- AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
- ) a
- LEFT JOIN (
- -- 一次点击
- SELECT DISTINCT machinecode, clickobjectid, recomTraceId,
- subsessionid, sharedepth, rootshareid
- FROM loghubods.user_share_log
- WHERE dt = '${bizdate}'
- AND topic = 'click'
- ) b
- ON a.shareid = b.rootshareid
- LEFT JOIN (
- -- 二次分享 (点击者再分享)
- SELECT DISTINCT machinecode, shareobjectid, recomTraceId,
- subsessionid, sharedepth, shareid
- FROM loghubods.user_share_log
- WHERE dt = '${bizdate}'
- AND topic = 'share'
- AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
- ) b1
- ON b.machinecode = b1.machinecode
- AND b.subsessionid = b1.subsessionid
- LEFT JOIN (
- -- 二次点击
- SELECT DISTINCT machinecode, clickobjectid, recomTraceId,
- subsessionid, sharedepth, shareid, rootshareid
- FROM loghubods.user_share_log
- WHERE dt = '${bizdate}'
- AND topic = 'click'
- ) b2
- ON b1.shareid = b2.rootshareid
- LEFT JOIN (
- -- 二次点击者的视频播放
- SELECT DISTINCT mid, subsessionid, videoid
- FROM loghubods.video_action_log_rp
- WHERE dt = '${bizdate}'
- AND businesstype = 'videoView'
- AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
- ) c
- ON b2.machinecode = c.mid
- AND b2.subsessionid = c.subsessionid
- GROUP BY a.machinecode, a.subsessionid, a.videoid
- ) cc
- ON g.mid = cc.mid
- AND g.subsessionid = cc.subsessionid
- AND g.videoid = cc.vid
- -- [dd] D链: session内后续曝光传播 (窗口函数累计后续拉回)
- LEFT JOIN (
- SELECT *
- ,COALESCE(
- SUM(回流) OVER (PARTITION BY mid, subsessionid ORDER BY rn ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING)
- , 0) AS dn
- ,COALESCE(
- SUM(回流1) OVER (PARTITION BY mid, subsessionid ORDER BY rn ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING)
- , 0) AS d1
- FROM (
- SELECT a.mid AS mid
- ,a.subsessionid
- ,a.videoid AS vid
- ,COUNT(DISTINCT b.shareid) AS 分享次数
- ,COUNT(DISTINCT
- CASE WHEN c.machinecode <> b.machinecode
- THEN c.machinecode
- END
- ) AS 回流
- ,COUNT(DISTINCT
- CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1
- THEN c.machinecode
- END
- ) AS 回流1
- ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp DESC) AS rn
- FROM (
- SELECT DISTINCT mid, subsessionid, videoid, logtimestamp
- FROM loghubods.video_action_log_rp
- WHERE dt = '${bizdate}'
- AND businesstype = 'videoView'
- AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
- ) a
- LEFT JOIN (
- SELECT DISTINCT machinecode, shareobjectid AS videoid, recomTraceId,
- subsessionid, sharedepth, shareid, clienttimestamp
- FROM loghubods.user_share_log
- WHERE dt = '${bizdate}'
- AND topic = 'share'
- AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
- ) b
- ON a.mid = b.machinecode
- AND a.subsessionid = b.subsessionid
- AND a.videoid = b.videoid
- LEFT JOIN (
- SELECT DISTINCT machinecode, clickobjectid, recomTraceId,
- subsessionid, sharedepth, rootshareid
- FROM loghubods.user_share_log
- WHERE dt = '${bizdate}'
- AND topic = 'click'
- ) c
- ON b.shareid = c.rootshareid
- GROUP BY a.mid, a.subsessionid, a.videoid, a.logtimestamp
- )
- ) dd
- ON g.mid = dd.mid
- AND g.subsessionid = dd.subsessionid
- AND g.videoid = dd.vid
- -- =====================================================================
- -- GROUP BY CUBE: 全维度组合聚合
- -- =====================================================================
- GROUP BY CUBE(
- -- 用户拉活量分层
- COALESCE(
- CASE
- WHEN e.type IS NULL OR e.type = 'R_0' THEN 'R0&新用户'
- WHEN e.type IN ('R_1','R_2_10','R_10_50') THEN 'R1-50'
- WHEN e.type IN ('R_50_100','R_100_180','R_180_330') THEN 'R_180_330'
- ELSE e.type
- END
- , '-')
- -- 进入小时
- ,COALESCE(
- CASE
- WHEN in_hour >= 1 AND in_hour <= 4 THEN '1-4'
- WHEN in_hour >= 5 AND in_hour <= 8 THEN '5-8'
- WHEN in_hour >= 9 AND in_hour <= 12 THEN '9-12'
- WHEN in_hour >= 13 AND in_hour <= 16 THEN '13-16'
- WHEN in_hour >= 17 AND in_hour <= 20 THEN '17-20'
- WHEN in_hour >= 21 AND in_hour <= 24 THEN '21-24'
- ELSE '-'
- END
- , '-')
- -- 进入场景
- ,CASE WHEN f.scene_id IS NOT NULL THEN f.scene_desc ELSE '其他' END
- -- 进入内容品类
- ,CASE WHEN m.merge_leve2 IS NOT NULL THEN d.merge_leve2 ELSE '其他' END
- -- 推荐内容品类
- ,CASE WHEN l.merge_leve2 IS NOT NULL THEN h.merge_leve2 ELSE '其他' END
- -- 内容id
- ,COALESCE(CASE WHEN i.merge_leve2 IS NOT NULL THEN i.videoid ELSE '其他' END, '-')
- )
- ORDER BY 访问人数 DESC
- ;
|