|
@@ -0,0 +1,542 @@
|
|
|
|
|
+-- ════════════════════════════════════════════════════════════════════════════
|
|
|
|
|
+-- 两层尾号映射 (SCD Type 2 模式)
|
|
|
|
|
+--
|
|
|
|
|
+-- 第一层 t_suffix_group:物理尾号 → 分流桶 ID(16 个 hex 尾号分成 8 个 2-元桶)
|
|
|
|
|
+-- - 分流规则不变时,此层永不改
|
|
|
|
|
+--
|
|
|
|
|
+-- 第二层 t_experiment_map:分流桶 → 实验名 + 生效日期
|
|
|
|
|
+-- - 只列出"分配了具体实验"的桶,未列出的桶自动默认为"对照组"
|
|
|
|
|
+-- - 支持 1 对多:同一个实验占多个桶时,用同一 abcode 字符串多加几行
|
|
|
|
|
+-- - 实验切换:不删旧行,关闭 end_dt + 追加新行(保留历史可回溯)
|
|
|
|
|
+-- ════════════════════════════════════════════════════════════════════════════
|
|
|
|
|
+WITH t_suffix_group AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT "a" AS suffix, "ab" AS suffix_group
|
|
|
|
|
+ UNION ALL SELECT "b", "ab"
|
|
|
|
|
+ UNION ALL SELECT "0", "01"
|
|
|
|
|
+ UNION ALL SELECT "1", "01"
|
|
|
|
|
+ UNION ALL SELECT "2", "2c"
|
|
|
|
|
+ UNION ALL SELECT "c", "2c"
|
|
|
|
|
+ UNION ALL SELECT "3", "34"
|
|
|
|
|
+ UNION ALL SELECT "4", "34"
|
|
|
|
|
+ UNION ALL SELECT "5", "5d"
|
|
|
|
|
+ UNION ALL SELECT "d", "5d"
|
|
|
|
|
+ UNION ALL SELECT "6", "67"
|
|
|
|
|
+ UNION ALL SELECT "7", "67"
|
|
|
|
|
+ UNION ALL SELECT "8", "89"
|
|
|
|
|
+ UNION ALL SELECT "9", "89"
|
|
|
|
|
+ UNION ALL SELECT "e", "ef"
|
|
|
|
|
+ UNION ALL SELECT "f", "ef"
|
|
|
|
|
+)
|
|
|
|
|
+-- 当前实验映射
|
|
|
|
|
+-- 未列出的桶(89 / 2c)→ 自动默认为"对照组"
|
|
|
|
|
+-- 同一个 suffix_group 可以有多行(SCD Type 2),但同一时间只能命中一行
|
|
|
|
|
+,t_experiment_map AS
|
|
|
|
|
+(
|
|
|
|
|
+ -- 前基线(ab 桶,从未变动)
|
|
|
|
|
+ SELECT "ab" AS suffix_group, "实验组:变更str*ros建模目标实验" AS abcode, "20260413" AS start_dt, "29991231" AS end_dt
|
|
|
|
|
+
|
|
|
|
|
+ -- 变更str*ros建模目标实验(分阶段扩量,当前占用 4 个桶;1 对多)
|
|
|
|
|
+ -- 20260320: 首批上 01 桶
|
|
|
|
|
+ -- 20260330: 扩到 67 桶(此时 67 桶的 bn_ros 实验已下线 10 天 ⚠️)
|
|
|
|
|
+ -- 20260407: 同日扩到 5d 桶(5d 的解构str 实验下线)和 34 桶(34 此前是默认对照组)
|
|
|
|
|
+ UNION ALL SELECT "01", "实验组:变更str*ros建模目标实验", "20260320", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "67", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "5d", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "34", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
|
|
|
|
|
+
|
|
|
|
|
+ -- 67 桶的前实验:bn_ros 新损失函数
|
|
|
|
|
+ -- 20260320~20260329 为空窗期(10 天),此间 67 → 默认"对照组"
|
|
|
|
|
+ UNION ALL SELECT "67", "实验组:bn_ros新损失函数", "20260311", "20260319"
|
|
|
|
|
+
|
|
|
|
|
+ -- 5d 桶的前实验:解构特征排序 str 模型
|
|
|
|
|
+ -- 20260407 直接被建模目标实验接手,无空窗
|
|
|
|
|
+ UNION ALL SELECT "5d", "实验组:解构特征排序str模型", "20260314", "20260406"
|
|
|
|
|
+
|
|
|
|
|
+ -- ef 桶的历史:解构str&召回(已下线)→ 空窗 17 天 → DNN 模型(仍在运行)
|
|
|
|
|
+ -- 20260321~20260406 为空窗期,此间 ef → 默认"对照组"
|
|
|
|
|
+ UNION ALL SELECT "ef", "实验组:解构特征排序str模型&召回", "20260314", "20260320"
|
|
|
|
|
+ UNION ALL SELECT "ef", "实验组:DNN模型", "20260407", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "2c", "实验组:DNN模型-调参", "20260413", "29991231"
|
|
|
|
|
+
|
|
|
|
|
+ UNION ALL SELECT "89", "对照组", "20260301", "20260412"
|
|
|
|
|
+ UNION ALL SELECT "89", "实验组:变更str*ros建模目标实验", "20260413", "29991231"
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ -- ────────────────────────────────────────────────────────────────────
|
|
|
|
|
+ -- 📖 修改样例(复制下面的行到上面 UNION ALL 列表里使用)
|
|
|
|
|
+ --
|
|
|
|
|
+ -- 样例 A:新增一个占用单桶的实验
|
|
|
|
|
+ -- UNION ALL SELECT "2c", "实验组:新策略 X", "20260501", "29991231"
|
|
|
|
|
+ --
|
|
|
|
|
+ -- 样例 B:新增一个 1 对多 实验(同一实验占 01 + 34 两个桶)
|
|
|
|
|
+ -- 用同一 abcode 字符串加两行即可,下游 GROUP BY 自动合并:
|
|
|
|
|
+ -- UNION ALL SELECT "01", "实验组:大流量 Y", "20260601", "29991231"
|
|
|
|
|
+ -- UNION ALL SELECT "34", "实验组:大流量 Y", "20260601", "29991231"
|
|
|
|
|
+ --
|
|
|
|
|
+ -- 样例 C:实验切换(SCD Type 2 —— 保留历史)
|
|
|
|
|
+ -- 假设 01 桶 20260701 从 实验 A 切换到 实验 B:
|
|
|
|
|
+ -- Step 1: 把原来那行 end_dt 改成切换前一天:
|
|
|
|
|
+ -- SELECT "01", "实验组:A", "20260320", "20260630"
|
|
|
|
|
+ -- Step 2: 追加新实验行:
|
|
|
|
|
+ -- UNION ALL SELECT "01", "实验组:B", "20260701", "29991231"
|
|
|
|
|
+ --
|
|
|
|
|
+ -- 样例 D:实验下线回到对照组(产生空窗)
|
|
|
|
|
+ -- 直接把该行的 end_dt 改成下线前一天即可(不用追加行):
|
|
|
|
|
+ -- SELECT "5d", "实验组:A", "20250101", "20260630"
|
|
|
|
|
+ -- 20260701 之后 5d 桶没有任何有效行覆盖,自动进入"对照组"
|
|
|
|
|
+ -- ⚠️ 如果这是有意的空窗,没问题;如果只是忘了接新实验,后续记得补
|
|
|
|
|
+ -- ────────────────────────────────────────────────────────────────────
|
|
|
|
|
+)
|
|
|
|
|
+,t_base AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT sub.*
|
|
|
|
|
+ ,sg.suffix_group
|
|
|
|
|
+ ,COALESCE(m.abcode,"对照组") AS abcode
|
|
|
|
|
+ FROM (
|
|
|
|
|
+ SELECT dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
|
|
|
|
|
+ ,CASE WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
|
|
|
|
|
+ WHEN page IN ("回流页","其他") THEN "非推荐"
|
|
|
|
|
+ ELSE "其他"
|
|
|
|
|
+ END AS page
|
|
|
|
|
+ ,a.mid
|
|
|
|
|
+ ,a.vid
|
|
|
|
|
+ ,is_share
|
|
|
|
|
+ ,share_cnt
|
|
|
|
|
+ ,is_return_1
|
|
|
|
|
+ ,is_return_n
|
|
|
|
|
+ ,is_return_noself
|
|
|
|
|
+ ,return_1_uv
|
|
|
|
|
+ ,return_n_uv
|
|
|
|
|
+ ,return_n_uv_noself
|
|
|
|
|
+ ,new_exposure_cnt
|
|
|
|
|
+ ,flowpool
|
|
|
|
|
+ ,cc.cn
|
|
|
|
|
+ ,cc.c1
|
|
|
|
|
+ ,dd.dn
|
|
|
|
|
+ ,dd.d1
|
|
|
|
|
+ FROM loghubods.dwd_recsys_alg_exposure_base_20250108 a
|
|
|
|
|
+ LEFT JOIN (
|
|
|
|
|
+ -- c1/cn:分享后被点击的回流 UV
|
|
|
|
|
+ SELECT a.machinecode AS mid
|
|
|
|
|
+ ,a.subsessionid
|
|
|
|
|
+ ,a.videoid AS vid
|
|
|
|
|
+ ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
|
|
|
|
|
+ ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS c1
|
|
|
|
|
+ FROM (
|
|
|
|
|
+ SELECT DISTINCT machinecode
|
|
|
|
|
+ ,shareobjectid AS videoid
|
|
|
|
|
+ ,recomTraceId
|
|
|
|
|
+ ,subsessionid
|
|
|
|
|
+ ,sharedepth
|
|
|
|
|
+ ,shareid
|
|
|
|
|
+ FROM loghubods.user_share_log
|
|
|
|
|
+ WHERE dt = '${dt}'
|
|
|
|
|
+ AND topic = 'share'
|
|
|
|
|
+ AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
|
|
|
|
|
+ ) a
|
|
|
|
|
+ LEFT JOIN (
|
|
|
|
|
+ SELECT DISTINCT machinecode
|
|
|
|
|
+ ,clickobjectid
|
|
|
|
|
+ ,recomTraceId
|
|
|
|
|
+ ,subsessionid
|
|
|
|
|
+ ,sharedepth
|
|
|
|
|
+ ,rootshareid
|
|
|
|
|
+ FROM loghubods.user_share_log
|
|
|
|
|
+ WHERE dt = '${dt}'
|
|
|
|
|
+ AND topic = 'click'
|
|
|
|
|
+ ) b
|
|
|
|
|
+ ON a.shareid = b.rootshareid
|
|
|
|
|
+ LEFT JOIN (
|
|
|
|
|
+ SELECT DISTINCT machinecode
|
|
|
|
|
+ ,shareobjectid
|
|
|
|
|
+ ,recomTraceId
|
|
|
|
|
+ ,subsessionid
|
|
|
|
|
+ ,sharedepth
|
|
|
|
|
+ ,shareid
|
|
|
|
|
+ FROM loghubods.user_share_log
|
|
|
|
|
+ WHERE dt = '${dt}'
|
|
|
|
|
+ AND topic = 'share'
|
|
|
|
|
+ AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
|
|
|
|
|
+ ) b1
|
|
|
|
|
+ ON b.machinecode = b1.machinecode
|
|
|
|
|
+ AND b.subsessionid = b1.subsessionid
|
|
|
|
|
+ LEFT JOIN (
|
|
|
|
|
+ SELECT DISTINCT machinecode
|
|
|
|
|
+ ,clickobjectid
|
|
|
|
|
+ ,recomTraceId
|
|
|
|
|
+ ,subsessionid
|
|
|
|
|
+ ,sharedepth
|
|
|
|
|
+ ,shareid
|
|
|
|
|
+ ,rootshareid
|
|
|
|
|
+ FROM loghubods.user_share_log
|
|
|
|
|
+ WHERE dt = '${dt}'
|
|
|
|
|
+ AND topic = 'click'
|
|
|
|
|
+ ) b2
|
|
|
|
|
+ ON b1.shareid = b2.rootshareid
|
|
|
|
|
+ GROUP BY a.machinecode
|
|
|
|
|
+ ,a.subsessionid
|
|
|
|
|
+ ,a.videoid
|
|
|
|
|
+ ) cc
|
|
|
|
|
+ ON a.mid = cc.mid
|
|
|
|
|
+ AND a.subsessionid = cc.subsessionid
|
|
|
|
|
+ AND a.vid = cc.vid
|
|
|
|
|
+ LEFT JOIN (
|
|
|
|
|
+ -- d1/dn:下一条视频带来的回流
|
|
|
|
|
+ SELECT *
|
|
|
|
|
+ ,LAG(回流,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS dn
|
|
|
|
|
+ ,LAG(回流1,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS d1
|
|
|
|
|
+ FROM (
|
|
|
|
|
+ SELECT a.mid AS mid
|
|
|
|
|
+ ,a.subsessionid
|
|
|
|
|
+ ,a.videoid AS vid
|
|
|
|
|
+ ,COUNT(DISTINCT b.shareid) AS 分享次数
|
|
|
|
|
+ ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END) AS 回流
|
|
|
|
|
+ ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END) AS 回流1
|
|
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp ASC) AS rn
|
|
|
|
|
+ FROM (
|
|
|
|
|
+ SELECT *
|
|
|
|
|
+ FROM (
|
|
|
|
|
+ SELECT DISTINCT mid
|
|
|
|
|
+ ,subsessionid
|
|
|
|
|
+ ,videoid
|
|
|
|
|
+ ,logtimestamp
|
|
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY mid,subsessionid,videoid ORDER BY logtimestamp ASC) AS rn
|
|
|
|
|
+ FROM loghubods.video_action_log_rp
|
|
|
|
|
+ WHERE dt = '${dt}'
|
|
|
|
|
+ AND businesstype = 'videoView'
|
|
|
|
|
+ AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
|
|
|
|
|
+ )
|
|
|
|
|
+ WHERE rn = 1
|
|
|
|
|
+ ) a
|
|
|
|
|
+ LEFT JOIN (
|
|
|
|
|
+ SELECT DISTINCT machinecode
|
|
|
|
|
+ ,shareobjectid AS videoid
|
|
|
|
|
+ ,recomTraceId
|
|
|
|
|
+ ,subsessionid
|
|
|
|
|
+ ,sharedepth
|
|
|
|
|
+ ,shareid
|
|
|
|
|
+ ,clienttimestamp
|
|
|
|
|
+ FROM loghubods.user_share_log
|
|
|
|
|
+ WHERE dt = '${dt}'
|
|
|
|
|
+ AND topic = 'share'
|
|
|
|
|
+ AND pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
|
|
|
|
|
+ ) b
|
|
|
|
|
+ ON a.mid = b.machinecode
|
|
|
|
|
+ AND a.subsessionid = b.subsessionid
|
|
|
|
|
+ AND a.videoid = b.videoid
|
|
|
|
|
+ LEFT JOIN (
|
|
|
|
|
+ SELECT DISTINCT machinecode
|
|
|
|
|
+ ,clickobjectid
|
|
|
|
|
+ ,recomTraceId
|
|
|
|
|
+ ,subsessionid
|
|
|
|
|
+ ,sharedepth
|
|
|
|
|
+ ,rootshareid
|
|
|
|
|
+ FROM loghubods.user_share_log
|
|
|
|
|
+ WHERE dt = '${dt}'
|
|
|
|
|
+ AND topic = 'click'
|
|
|
|
|
+ ) c
|
|
|
|
|
+ ON b.shareid = c.rootshareid
|
|
|
|
|
+ GROUP BY a.mid
|
|
|
|
|
+ ,a.subsessionid
|
|
|
|
|
+ ,a.videoid
|
|
|
|
|
+ ,a.logtimestamp
|
|
|
|
|
+ )
|
|
|
|
|
+ ) dd
|
|
|
|
|
+ ON a.mid = dd.mid
|
|
|
|
|
+ AND a.subsessionid = dd.subsessionid
|
|
|
|
|
+ AND a.vid = dd.vid
|
|
|
|
|
+ WHERE dt="${dt}"
|
|
|
|
|
+ AND apptype IN ("4")
|
|
|
|
|
+ AND page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
|
|
|
|
|
+ AND abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
|
|
|
|
|
+ AND abcode NOT IN ("ab100")
|
|
|
|
|
+ ) sub
|
|
|
|
|
+ -- INNER JOIN: 合法尾号(在 16 个 hex 里)才进分析;防御异常数据
|
|
|
|
|
+ INNER JOIN t_suffix_group sg
|
|
|
|
|
+ ON sub.suffix = sg.suffix
|
|
|
|
|
+ -- LEFT JOIN: 可无实验匹配,此时 m.abcode 为 NULL → COALESCE 为"对照组"
|
|
|
|
|
+ LEFT JOIN t_experiment_map m
|
|
|
|
|
+ ON sg.suffix_group = m.suffix_group
|
|
|
|
|
+ AND '${dt}' BETWEEN m.start_dt AND m.end_dt
|
|
|
|
|
+)
|
|
|
|
|
+-- 桶内每个 vid 的曝光数(ECS / ARP 的共同中间件,避免重复扫 t_base)
|
|
|
|
|
+,t_vid_exp AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,abcode
|
|
|
|
|
+ ,suffix
|
|
|
|
|
+ ,vid
|
|
|
|
|
+ ,COUNT(1) AS vid_exp_cnt
|
|
|
|
|
+ FROM t_base
|
|
|
|
|
+ WHERE page = "推荐"
|
|
|
|
|
+ GROUP BY dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,abcode
|
|
|
|
|
+ ,suffix
|
|
|
|
|
+ ,vid
|
|
|
|
|
+)
|
|
|
|
|
+-- 桶内 ECS (Effective Catalog Size):曝光实际"相当于推了多少条视频"
|
|
|
|
|
+-- ECS = 2 * Σ(p_i * rank_i) - 1
|
|
|
|
|
+-- p_i = vid 在桶内曝光占比
|
|
|
|
|
+-- rank_i = 按曝光降序的排名(1 起)
|
|
|
|
|
+-- 值域 [1, distinct_vid_cnt],越大越分散,越小越头部集中
|
|
|
|
|
+,t_bucket_ecs AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,abcode
|
|
|
|
|
+ ,suffix
|
|
|
|
|
+ ,2 * SUM(p * rn) - 1 AS ecs
|
|
|
|
|
+ FROM (
|
|
|
|
|
+ SELECT dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,abcode
|
|
|
|
|
+ ,suffix
|
|
|
|
|
+ ,vid_exp_cnt / SUM(vid_exp_cnt) OVER (
|
|
|
|
|
+ PARTITION BY dt, apptype, abcode, suffix
|
|
|
|
|
+ ) AS p
|
|
|
|
|
+ ,ROW_NUMBER() OVER (
|
|
|
|
|
+ PARTITION BY dt, apptype, abcode, suffix
|
|
|
|
|
+ ORDER BY vid_exp_cnt DESC
|
|
|
|
|
+ ) AS rn
|
|
|
|
|
+ FROM t_vid_exp
|
|
|
|
|
+ ) t
|
|
|
|
|
+ GROUP BY dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,abcode
|
|
|
|
|
+ ,suffix
|
|
|
|
|
+)
|
|
|
|
|
+-- 全平台每个 vid 的曝光度(作为 ARP 的 popularity reference)
|
|
|
|
|
+-- 注意:不过滤 abcode,让 reference 覆盖全部合法尾号
|
|
|
|
|
+,t_vid_global_pop AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,vid
|
|
|
|
|
+ ,COUNT(1) AS vid_global_pop
|
|
|
|
|
+ FROM t_base
|
|
|
|
|
+ WHERE page = "推荐"
|
|
|
|
|
+ GROUP BY dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,vid
|
|
|
|
|
+)
|
|
|
|
|
+-- 桶内 ARP (Average Recommendation Popularity):推荐视频的平均热门度
|
|
|
|
|
+-- 按桶内曝光量加权:曝光越多的 vid 对 ARP 影响越大
|
|
|
|
|
+-- 组合 ECS + ARP 可识别四象限:
|
|
|
|
|
+-- 高ECS + 低ARP = 分散 + 偏长尾 ✅ 理想
|
|
|
|
|
+-- 高ECS + 高ARP = 分散 + 头部内部多样化 ⚠️ 需警惕
|
|
|
|
|
+-- 低ECS + 低ARP = 集中 + 冷门(小众爆发) ❓ 特殊
|
|
|
|
|
+-- 低ECS + 高ARP = 集中 + 头部 ❌ 模型坍缩
|
|
|
|
|
+,t_bucket_arp AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT v.dt
|
|
|
|
|
+ ,v.apptype
|
|
|
|
|
+ ,v.abcode
|
|
|
|
|
+ ,v.suffix
|
|
|
|
|
+ ,SUM(v.vid_exp_cnt * g.vid_global_pop) / SUM(v.vid_exp_cnt) AS arp
|
|
|
|
|
+ FROM t_vid_exp v
|
|
|
|
|
+ LEFT JOIN t_vid_global_pop g
|
|
|
|
|
+ ON v.dt = g.dt
|
|
|
|
|
+ AND v.apptype = g.apptype
|
|
|
|
|
+ AND v.vid = g.vid
|
|
|
|
|
+ GROUP BY v.dt
|
|
|
|
|
+ ,v.apptype
|
|
|
|
|
+ ,v.abcode
|
|
|
|
|
+ ,v.suffix
|
|
|
|
|
+)
|
|
|
|
|
+-- dau2:按单尾号聚合
|
|
|
|
|
+,t_dau2_bucket AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT SUBSTR(sub.dt,1,8) AS dt
|
|
|
|
|
+ ,sub.apptype
|
|
|
|
|
+ ,COALESCE(m.abcode,"对照组") AS abcode
|
|
|
|
|
+ ,sg.suffix_group
|
|
|
|
|
+ ,sub.suffix
|
|
|
|
|
+ ,COUNT(DISTINCT sub.machinecode) AS dau2
|
|
|
|
|
+ FROM (
|
|
|
|
|
+ SELECT dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,machinecode
|
|
|
|
|
+ ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
|
|
|
|
|
+ FROM loghubods.useractive_log
|
|
|
|
|
+ WHERE dt="${dt}"
|
|
|
|
|
+ -- FROM loghubods.useractive_log_per5min
|
|
|
|
|
+ -- WHERE dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
|
|
|
|
|
+ AND apptype IN ("4")
|
|
|
|
|
+ ) sub
|
|
|
|
|
+ INNER JOIN t_suffix_group sg
|
|
|
|
|
+ ON sub.suffix = sg.suffix
|
|
|
|
|
+ LEFT JOIN t_experiment_map m
|
|
|
|
|
+ ON sg.suffix_group = m.suffix_group
|
|
|
|
|
+ AND '${dt}' BETWEEN m.start_dt AND m.end_dt
|
|
|
|
|
+ GROUP BY SUBSTR(sub.dt,1,8)
|
|
|
|
|
+ ,sub.apptype
|
|
|
|
|
+ ,COALESCE(m.abcode,"对照组")
|
|
|
|
|
+ ,sg.suffix_group
|
|
|
|
|
+ ,sub.suffix
|
|
|
|
|
+)
|
|
|
|
|
+-- dau2:按 suffix_group 求尾号均值
|
|
|
|
|
+,t_dau2 AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,abcode
|
|
|
|
|
+ ,suffix_group
|
|
|
|
|
+ ,AVG(dau2) AS dau2
|
|
|
|
|
+ FROM t_dau2_bucket
|
|
|
|
|
+ GROUP BY dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,abcode
|
|
|
|
|
+ ,suffix_group
|
|
|
|
|
+)
|
|
|
|
|
+-- 按单尾号聚合(尾号内 UV 去重)
|
|
|
|
|
+,t_bucket AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,abcode
|
|
|
|
|
+ ,suffix_group
|
|
|
|
|
+ ,suffix
|
|
|
|
|
+ ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
|
|
|
|
|
+ ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
|
|
|
|
|
+ ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
|
|
|
|
|
+ ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
|
|
|
|
|
+ ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
|
|
|
|
|
+ ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
|
|
|
|
|
+ ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
|
|
|
|
|
+ ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS bn_rov
|
|
|
|
|
+ ,COALESCE(SUM(c1) / COUNT(1),0) AS c1_rov
|
|
|
|
|
+ ,COALESCE(SUM(cn) / COUNT(1),0) AS cn_rov
|
|
|
|
|
+ ,COALESCE(SUM(d1) / COUNT(1),0) AS d1_rov
|
|
|
|
|
+ ,COALESCE(SUM(dn) / COUNT(1),0) AS dn_rov
|
|
|
|
|
+ -- [NEW] 合并 ROV = bn_rov + cn_rov + dn_rov(三者分母同为 COUNT(1),可合并)
|
|
|
|
|
+ ,COALESCE((SUM(return_n_uv) + SUM(cn) + SUM(dn)) / COUNT(1),0) AS total_rov
|
|
|
|
|
+ ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
|
|
|
|
|
+ ,COUNT(DISTINCT mid) AS dau
|
|
|
|
|
+ ,COUNT(1) AS exp
|
|
|
|
|
+ -- [NEW] 桶内去重 vid 数(ECS 的天然配套)
|
|
|
|
|
+ ,COUNT(DISTINCT vid) AS distinct_vid_cnt
|
|
|
|
|
+ ,COALESCE(SUM(is_share),0) AS is_share
|
|
|
|
|
+ ,COALESCE(SUM(share_cnt),0) AS share_cnt
|
|
|
|
|
+ ,COALESCE(SUM(is_return_1),0) AS is_return_1
|
|
|
|
|
+ ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
|
|
|
|
|
+ ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
|
|
|
|
|
+ ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
|
|
|
|
|
+ ,COALESCE(SUM(cn),0) AS cn
|
|
|
|
|
+ ,COALESCE(SUM(c1),0) AS c1
|
|
|
|
|
+ ,COALESCE(SUM(dn),0) AS dn
|
|
|
|
|
+ ,COALESCE(SUM(d1),0) AS d1
|
|
|
|
|
+ FROM t_base
|
|
|
|
|
+ WHERE page = "推荐"
|
|
|
|
|
+ GROUP BY dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,abcode
|
|
|
|
|
+ ,suffix_group
|
|
|
|
|
+ ,suffix
|
|
|
|
|
+)
|
|
|
|
|
+-- 按实验组求尾号均值(新增:合并 ROV + 分发多样性三件套)
|
|
|
|
|
+,t_metrics AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT b.dt
|
|
|
|
|
+ ,b.apptype
|
|
|
|
|
+ ,b.abcode
|
|
|
|
|
+ ,b.suffix_group
|
|
|
|
|
+ ,ROUND(AVG(b.exp_per_dau),2) AS exp_per_dau
|
|
|
|
|
+ ,ROUND(AVG(b.str_one),6) AS str_one
|
|
|
|
|
+ ,ROUND(AVG(b.ros_one),6) AS ros_one
|
|
|
|
|
+ ,ROUND(AVG(b.str),6) AS str
|
|
|
|
|
+ ,ROUND(AVG(b.ros),6) AS ros
|
|
|
|
|
+ ,ROUND(AVG(b.str_plus),6) AS str_plus
|
|
|
|
|
+ ,ROUND(AVG(b.ros_minus),6) AS ros_minus
|
|
|
|
|
+ ,ROUND(AVG(b.bn_rov),6) AS bn_rov
|
|
|
|
|
+ ,ROUND(AVG(b.c1_rov),6) AS c1_rov
|
|
|
|
|
+ ,ROUND(AVG(b.cn_rov),6) AS cn_rov
|
|
|
|
|
+ ,ROUND(AVG(b.d1_rov),6) AS d1_rov
|
|
|
|
|
+ ,ROUND(AVG(b.dn_rov),6) AS dn_rov
|
|
|
|
|
+ ,ROUND(AVG(b.total_rov),6) AS total_rov
|
|
|
|
|
+ ,ROUND(AVG(b.vovh24),6) AS vovh24
|
|
|
|
|
+ ,AVG(b.dau) AS dau
|
|
|
|
|
+ ,AVG(b.exp) AS exp
|
|
|
|
|
+ ,ROUND(AVG(b.distinct_vid_cnt),0) AS distinct_vid_cnt
|
|
|
|
|
+ ,ROUND(AVG(e.ecs),1) AS ecs
|
|
|
|
|
+ -- ECS 归一化比值:去掉池子大小的影响,纯形态指标
|
|
|
|
|
+ ,ROUND(AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS ecs_ratio
|
|
|
|
|
+ -- Gini 系数:快手/Twitter/Netflix 业界标准,数学上 Gini = 1 - ecs_ratio
|
|
|
|
|
+ ,ROUND(1 - AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS gini
|
|
|
|
|
+ ,ROUND(AVG(a.arp),0) AS arp
|
|
|
|
|
+ ,AVG(b.is_share) AS is_share
|
|
|
|
|
+ ,AVG(b.share_cnt) AS share_cnt
|
|
|
|
|
+ ,AVG(b.is_return_1) AS is_return_1
|
|
|
|
|
+ ,AVG(b.return_n_uv) AS return_n_uv
|
|
|
|
|
+ ,AVG(b.viewh24) AS viewh24
|
|
|
|
|
+ ,AVG(b.return_n_uv_noself) AS return_n_uv_noself
|
|
|
|
|
+ ,AVG(b.cn) AS cn
|
|
|
|
|
+ ,AVG(b.c1) AS c1
|
|
|
|
|
+ ,AVG(b.dn) AS dn
|
|
|
|
|
+ ,AVG(b.d1) AS d1
|
|
|
|
|
+ ,WM_CONCAT(DISTINCT ',',b.suffix) AS suffix
|
|
|
|
|
+ FROM t_bucket b
|
|
|
|
|
+ LEFT JOIN t_bucket_ecs e
|
|
|
|
|
+ ON b.dt = e.dt
|
|
|
|
|
+ AND b.apptype = e.apptype
|
|
|
|
|
+ AND b.abcode = e.abcode
|
|
|
|
|
+ AND b.suffix = e.suffix
|
|
|
|
|
+ LEFT JOIN t_bucket_arp a
|
|
|
|
|
+ ON b.dt = a.dt
|
|
|
|
|
+ AND b.apptype = a.apptype
|
|
|
|
|
+ AND b.abcode = a.abcode
|
|
|
|
|
+ AND b.suffix = a.suffix
|
|
|
|
|
+ GROUP BY b.dt
|
|
|
|
|
+ ,b.apptype
|
|
|
|
|
+ ,b.abcode
|
|
|
|
|
+ ,b.suffix_group
|
|
|
|
|
+)
|
|
|
|
|
+SELECT a.dt
|
|
|
|
|
+ ,a.apptype
|
|
|
|
|
+ ,a.abcode
|
|
|
|
|
+ ,a.suffix_group
|
|
|
|
|
+ ,a.suffix
|
|
|
|
|
+ ,a.exp_per_dau
|
|
|
|
|
+ ,a.str_one
|
|
|
|
|
+ ,a.ros_one
|
|
|
|
|
+ ,a.str
|
|
|
|
|
+ ,a.ros
|
|
|
|
|
+ ,a.str_plus
|
|
|
|
|
+ ,a.ros_minus
|
|
|
|
|
+ ,a.bn_rov
|
|
|
|
|
+ ,a.c1_rov
|
|
|
|
|
+ ,a.cn_rov
|
|
|
|
|
+ ,a.d1_rov
|
|
|
|
|
+ ,a.dn_rov
|
|
|
|
|
+ ,a.total_rov
|
|
|
|
|
+ ,a.vovh24
|
|
|
|
|
+ ,a.dau
|
|
|
|
|
+ ,a.exp
|
|
|
|
|
+ ,a.distinct_vid_cnt
|
|
|
|
|
+ ,a.ecs
|
|
|
|
|
+ ,a.ecs_ratio
|
|
|
|
|
+ ,a.gini
|
|
|
|
|
+ ,a.arp
|
|
|
|
|
+ ,a.is_share
|
|
|
|
|
+ ,a.share_cnt
|
|
|
|
|
+ ,a.is_return_1
|
|
|
|
|
+ ,a.return_n_uv
|
|
|
|
|
+ ,a.viewh24
|
|
|
|
|
+ ,a.return_n_uv_noself
|
|
|
|
|
+ ,a.cn
|
|
|
|
|
+ ,a.c1
|
|
|
|
|
+ ,a.dn
|
|
|
|
|
+ ,a.d1
|
|
|
|
|
+ ,b.dau2
|
|
|
|
|
+FROM t_metrics a
|
|
|
|
|
+LEFT JOIN t_dau2 b
|
|
|
|
|
+ON a.dt = b.dt
|
|
|
|
|
+AND a.apptype = b.apptype
|
|
|
|
|
+AND a.abcode = b.abcode
|
|
|
|
|
+AND a.suffix_group = b.suffix_group
|
|
|
|
|
+ORDER BY a.dt DESC,a.apptype,a.abcode,a.suffix_group
|
|
|
|
|
+;
|