Просмотр исходного кода

feat: 新增 apptype 0+4 合并分层版本,source_type×layer_type 维度 + 同口径 dau_vs_5d

- base_all_new_v3_分层: 在 base_all_new_v3 基础上增加 source_type(内部/外部) × layer_type(首层/裂变层)
- GROUPING SETS 同时产出分层行和总体行(每天 80 行 = 2 apptype × 8 桶 × 5 分层)
- t_dau2_base5 扩展为 80 行分层基线值,dau_vs_5d 同口径对比各分层的历史基线
- ECS/ARP 也按分层维度计算

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
yangxiaohui 4 недель назад
Родитель
Сommit
dbb330a5a0

+ 10 - 0
tasks/00_尾号实验/base_all_new_v3_分层.json

@@ -0,0 +1,10 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "ePvpL8",
+  "sort": "dt:desc,apptype:asc,suffix_group:asc,source_type:asc,layer_type:asc",
+  "order": {
+      "suffix_group": ["ab", "34", "2c", "67", "01", "5d", "ef", "89"]
+  },
+  "cols": null,
+  "append_cols": false
+}

+ 715 - 0
tasks/00_尾号实验/base_all_new_v3_分层.sql

@@ -0,0 +1,715 @@
+-- ════════════════════════════════════════════════════════════════════════════
+-- 两层尾号映射 (SCD Type 2 模式) — apptype = 0 + 4 合并版 + 基线对比 + 分层
+-- [硬编码版:基线 DAU2 预计算,无额外扫描]
+-- 分层维度:source_type(内部/外部) × layer_type(首层/裂变层),GROUPING SETS 同时产出总体
+--
+-- 第一层 t_suffix_group:物理尾号 → 分流桶 ID(16 个 hex 尾号分成 8 个 2-元桶)
+--   - 分流规则不变时,此层永不改
+--
+-- 第二层 t_experiment_map:分流桶 → 实验名 + 生效日期
+--   - 只列出"分配了具体实验"的桶,未列出的桶自动默认为"对照组"
+--   - 支持 1 对多:同一个实验占多个桶时,用同一 abcode 字符串多加几行
+--   - 实验切换:不删旧行,关闭 end_dt + 追加新行(保留历史可回溯)
+--
+-- ┌─ 基线配置 ──────────────────────────────────────────────────────────┐
+-- │  基线桶:89              (对照组物理桶,横向对比基准)              │
+-- │  基线天数:20260307~20260311(5 天均值,DAU2 纵向对比基准)         │
+-- │  apptype:0 + 4 合并输出,按 apptype 分别对比各自基线              │
+-- └─────────────────────────────────────────────────────────────────────┘
+-- ════════════════════════════════════════════════════════════════════════════
+WITH t_suffix_group AS
+(
+    SELECT "a" AS suffix, "ab" AS suffix_group
+    UNION ALL SELECT "b", "ab"
+    UNION ALL SELECT "0", "01"
+    UNION ALL SELECT "1", "01"
+    UNION ALL SELECT "2", "2c"
+    UNION ALL SELECT "c", "2c"
+    UNION ALL SELECT "3", "34"
+    UNION ALL SELECT "4", "34"
+    UNION ALL SELECT "5", "5d"
+    UNION ALL SELECT "d", "5d"
+    UNION ALL SELECT "6", "67"
+    UNION ALL SELECT "7", "67"
+    UNION ALL SELECT "8", "89"
+    UNION ALL SELECT "9", "89"
+    UNION ALL SELECT "e", "ef"
+    UNION ALL SELECT "f", "ef"
+)
+-- ┌─ 配置区 1/2:实验映射(按 apptype 分组) ─────────────────────────────┐
+-- │  新增 apptype 只需追加行,下游逻辑自动适配                           │
+-- │  未列出的桶 → 自动默认为"对照组"                                     │
+-- └──────────────────────────────────────────────────────────────────────┘
+,t_experiment_map AS
+(
+    -- ── apptype = 4 ──────────────────────────────────────────────────
+    SELECT "4" AS apptype, "ab" AS suffix_group, "实验组:变更str*ros建模目标实验" AS abcode, "20260413" AS start_dt, "29991231" AS end_dt
+    UNION ALL SELECT "4", "01", "实验组:变更str*ros建模目标实验", "20260320", "29991231"
+    UNION ALL SELECT "4", "67", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
+    UNION ALL SELECT "4", "5d", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+    UNION ALL SELECT "4", "34", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+    UNION ALL SELECT "4", "67", "实验组:bn_ros新损失函数",        "20260311", "20260319"
+    UNION ALL SELECT "4", "5d", "实验组:解构特征排序str模型",     "20260314", "20260406"
+    UNION ALL SELECT "4", "ef", "实验组:解构特征排序str模型&召回", "20260314", "20260320"
+    UNION ALL SELECT "4", "ef", "实验组:DNN模型",                 "20260407", "29991231"
+    UNION ALL SELECT "4", "2c", "实验组:DNN模型-调参",            "20260413", "29991231"
+    UNION ALL SELECT "4", "89", "对照组",                          "20260301", "20260412"
+    UNION ALL SELECT "4", "89", "实验组:变更str*ros建模目标实验", "20260413", "29991231"
+
+    -- ── apptype = 0 ──────────────────────────────────────────────────
+    UNION ALL SELECT "0", "ab", "实验组:变更str*ros建模目标实验", "20260413", "29991231"
+    UNION ALL SELECT "0", "01", "实验组:变更str*ros建模目标实验", "20260320", "29991231"
+    UNION ALL SELECT "0", "34", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
+    UNION ALL SELECT "0", "67", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
+    UNION ALL SELECT "0", "5d", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+    UNION ALL SELECT "0", "ef", "实验组:DNN模型-调参",            "20260410", "29991231"
+    UNION ALL SELECT "0", "2c", "实验组:DNN模型",                 "20260413", "29991231"
+    UNION ALL SELECT "0", "89", "对照组",                          "20260301", "20260412"
+    UNION ALL SELECT "0", "89", "实验组:变更str*ros建模目标实验", "20260413", "29991231"
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,sg.suffix_group
+            ,COALESCE(m.abcode,"对照组") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,CASE WHEN a.rootsourceid = '' OR a.rootsourceid IS NULL THEN '内部' ELSE '外部' END AS source_type
+                        ,CASE WHEN GET_JSON_OBJECT(a.extend,'$.rootsessionid') = a.subsessionid THEN '首层' ELSE '裂变层' END AS layer_type
+                        ,a.mid
+                        ,a.vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        ,cc.cn
+                        ,cc.c1
+                        ,dd.dn
+                        ,dd.d1
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108 a
+                LEFT JOIN   (
+                                -- c1/cn:分享后被点击的回流 UV
+                                SELECT  a.machinecode AS mid
+                                        ,a.subsessionid
+                                        ,a.videoid AS vid
+                                        ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
+                                        ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS c1
+                                FROM    (
+                                            SELECT  DISTINCT machinecode
+                                                    ,shareobjectid AS videoid
+                                                    ,recomTraceId
+                                                    ,subsessionid
+                                                    ,sharedepth
+                                                    ,shareid
+                                            FROM    loghubods.user_share_log
+                                            WHERE   dt = '${dt}'
+                                            AND     topic = 'share'
+                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                        ) a
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b
+                                ON      a.shareid = b.rootshareid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,shareobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'share'
+                                                AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                            ) b1
+                                ON      b.machinecode = b1.machinecode
+                                AND     b.subsessionid = b1.subsessionid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b2
+                                ON      b1.shareid = b2.rootshareid
+                                GROUP BY a.machinecode
+                                         ,a.subsessionid
+                                         ,a.videoid
+                            ) cc
+                ON      a.mid = cc.mid
+                AND     a.subsessionid = cc.subsessionid
+                AND     a.vid = cc.vid
+                LEFT JOIN   (
+                                -- d1/dn:下一条视频带来的回流
+                                SELECT  *
+                                        ,LAG(回流,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS dn
+                                        ,LAG(回流1,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS d1
+                                FROM    (
+                                            SELECT  a.mid AS mid
+                                                    ,a.subsessionid
+                                                    ,a.videoid AS vid
+                                                    ,COUNT(DISTINCT b.shareid) AS 分享次数
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END) AS 回流
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END) AS 回流1
+                                                    ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp ASC) AS rn
+                                            FROM    (
+                                                        SELECT  *
+                                                        FROM    (
+                                                                    SELECT  DISTINCT mid
+                                                                            ,subsessionid
+                                                                            ,videoid
+                                                                            ,logtimestamp
+                                                                            ,ROW_NUMBER() OVER (PARTITION BY mid,subsessionid,videoid ORDER BY logtimestamp ASC) AS rn
+                                                                    FROM    loghubods.video_action_log_rp
+                                                                    WHERE   dt = '${dt}'
+                                                                    AND     businesstype = 'videoView'
+                                                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                                )
+                                                        WHERE   rn = 1
+                                                    ) a
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,shareobjectid AS videoid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,shareid
+                                                                    ,clienttimestamp
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'share'
+                                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                        ) b
+                                            ON      a.mid = b.machinecode
+                                            AND     a.subsessionid = b.subsessionid
+                                            AND     a.videoid = b.videoid
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,clickobjectid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,rootshareid
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'click'
+                                                        ) c
+                                            ON      b.shareid = c.rootshareid
+                                            GROUP BY a.mid
+                                                     ,a.subsessionid
+                                                     ,a.videoid
+                                                     ,a.logtimestamp
+                                        )
+                            ) dd
+                ON      a.mid = dd.mid
+                AND     a.subsessionid = dd.subsessionid
+                AND     a.vid = dd.vid
+                WHERE   dt="${dt}"
+                AND     apptype IN ("0","4")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                -- apptype=4: ab0-ab9; apptype=0: ab0-ab4,ab8,ab9(无 ab5/ab6/ab7)
+                AND     abcode NOT IN ("ab100")
+                AND     (apptype = "4"
+                         OR abcode IN ("ab0","ab1","ab2","ab3","ab4","ab8","ab9"))
+                AND     (apptype = "0"
+                         OR abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9"))
+            ) sub
+    -- INNER JOIN: 合法尾号(在 16 个 hex 里)才进分析;防御异常数据
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    -- LEFT JOIN: 可无实验匹配,此时 m.abcode 为 NULL → COALESCE 为"对照组"
+    LEFT JOIN t_experiment_map m
+    ON      sub.apptype = m.apptype
+    AND     sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+)
+-- 桶内每个 vid 的曝光数(ECS / ARP 的共同中间件,避免重复扫 t_base)
+,t_vid_exp AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,COALESCE(source_type,'总体') AS source_type
+            ,COALESCE(layer_type,'总体') AS layer_type
+            ,vid
+            ,COUNT(1) AS vid_exp_cnt
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt, apptype, abcode, suffix, vid, source_type, layer_type
+    GROUPING SETS (
+        (dt, apptype, abcode, suffix, vid, source_type, layer_type),
+        (dt, apptype, abcode, suffix, vid)
+    )
+)
+-- 桶内 ECS (Effective Catalog Size):曝光实际"相当于推了多少条视频"
+-- ECS = 2 * Σ(p_i * rank_i) - 1
+--   p_i    = vid 在桶内曝光占比
+--   rank_i = 按曝光降序的排名(1 起)
+-- 值域 [1, distinct_vid_cnt],越大越分散,越小越头部集中
+,t_bucket_ecs AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,source_type
+            ,layer_type
+            ,2 * SUM(p * rn) - 1 AS ecs
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,abcode
+                        ,suffix
+                        ,source_type
+                        ,layer_type
+                        ,vid_exp_cnt / SUM(vid_exp_cnt) OVER (
+                            PARTITION BY dt, apptype, abcode, suffix, source_type, layer_type
+                        ) AS p
+                        ,ROW_NUMBER() OVER (
+                            PARTITION BY dt, apptype, abcode, suffix, source_type, layer_type
+                            ORDER BY vid_exp_cnt DESC
+                        ) AS rn
+                FROM    t_vid_exp
+            ) t
+    GROUP BY dt, apptype, abcode, suffix, source_type, layer_type
+)
+-- 全平台每个 vid 的曝光度(作为 ARP 的 popularity reference)
+-- 注意:不过滤 abcode,让 reference 覆盖全部合法尾号
+,t_vid_global_pop AS
+(
+    SELECT  dt
+            ,apptype
+            ,COALESCE(source_type,'总体') AS source_type
+            ,COALESCE(layer_type,'总体') AS layer_type
+            ,vid
+            ,COUNT(1) AS vid_global_pop
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt, apptype, vid, source_type, layer_type
+    GROUPING SETS (
+        (dt, apptype, vid, source_type, layer_type),
+        (dt, apptype, vid)
+    )
+)
+-- 桶内 ARP (Average Recommendation Popularity):推荐视频的平均热门度
+-- 按桶内曝光量加权:曝光越多的 vid 对 ARP 影响越大
+-- 组合 ECS + ARP 可识别四象限:
+--   高ECS + 低ARP = 分散 + 偏长尾        ✅ 理想
+--   高ECS + 高ARP = 分散 + 头部内部多样化  ⚠️ 需警惕
+--   低ECS + 低ARP = 集中 + 冷门(小众爆发) ❓ 特殊
+--   低ECS + 高ARP = 集中 + 头部            ❌ 模型坍缩
+,t_bucket_arp AS
+(
+    SELECT  v.dt
+            ,v.apptype
+            ,v.abcode
+            ,v.suffix
+            ,v.source_type
+            ,v.layer_type
+            ,SUM(v.vid_exp_cnt * g.vid_global_pop) / SUM(v.vid_exp_cnt) AS arp
+    FROM    t_vid_exp v
+    LEFT JOIN t_vid_global_pop g
+    ON      v.dt = g.dt
+    AND     v.apptype = g.apptype
+    AND     v.source_type = g.source_type
+    AND     v.layer_type = g.layer_type
+    AND     v.vid = g.vid
+    GROUP BY v.dt, v.apptype, v.abcode, v.suffix, v.source_type, v.layer_type
+)
+-- dau2:按单尾号聚合
+,t_dau2_bucket AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"对照组") AS abcode
+            ,sg.suffix_group
+            ,sub.suffix
+            ,COALESCE(sub.source_type,'总体') AS source_type
+            ,COALESCE(sub.layer_type,'总体') AS layer_type
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                        ,CASE WHEN GET_JSON_OBJECT(extparams,'$.rootSourceId') != '' AND GET_JSON_OBJECT(extparams,'$.rootSourceId') IS NOT NULL THEN '外部' ELSE '内部' END AS source_type
+                        ,CASE WHEN GET_JSON_OBJECT(extparams,'$.rootSessionId') = subsessionid
+                                OR GET_JSON_OBJECT(extparams,'$.rootSessionId') = sessionid THEN '首层' ELSE '裂变层' END AS layer_type
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                AND     apptype IN ("0","4")
+                AND     (apptype = "4"
+                         OR GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9"))
+                AND     (apptype = "4"
+                         OR GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') NOT IN ("ab100"))
+            ) sub
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    LEFT JOIN t_experiment_map m
+    ON      sub.apptype = m.apptype
+    AND     sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+    GROUP BY SUBSTR(sub.dt,1,8), sub.apptype, COALESCE(m.abcode,"对照组"), sg.suffix_group, sub.suffix
+             ,sub.source_type, sub.layer_type
+    GROUPING SETS (
+        (SUBSTR(sub.dt,1,8), sub.apptype, COALESCE(m.abcode,"对照组"), sg.suffix_group, sub.suffix, sub.source_type, sub.layer_type),
+        (SUBSTR(sub.dt,1,8), sub.apptype, COALESCE(m.abcode,"对照组"), sg.suffix_group, sub.suffix)
+    )
+)
+-- dau2:按 suffix_group 求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,source_type
+            ,layer_type
+            ,AVG(dau2) AS dau2
+    FROM    t_dau2_bucket
+    GROUP BY dt, apptype, abcode, suffix_group, source_type, layer_type
+)
+-- 按单尾号聚合(尾号内 UV 去重)
+,t_bucket AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,suffix
+            ,COALESCE(source_type,'总体') AS source_type
+            ,COALESCE(layer_type,'总体') AS layer_type
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
+            ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
+            ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
+            ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS bn_rov
+            ,COALESCE(SUM(c1) / COUNT(1),0) AS c1_rov
+            ,COALESCE(SUM(cn) / COUNT(1),0) AS cn_rov
+            ,COALESCE(SUM(d1) / COUNT(1),0) AS d1_rov
+            ,COALESCE(SUM(dn) / COUNT(1),0) AS dn_rov
+            -- [NEW] 合并 ROV = bn_rov + cn_rov + dn_rov(三者分母同为 COUNT(1),可合并)
+            ,COALESCE((SUM(return_n_uv) + SUM(cn) + SUM(dn)) / COUNT(1),0) AS total_rov
+            ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            -- [NEW] 桶内去重 vid 数(ECS 的天然配套)
+            ,COUNT(DISTINCT vid) AS distinct_vid_cnt
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            ,COALESCE(SUM(cn),0) AS cn
+            ,COALESCE(SUM(c1),0) AS c1
+            ,COALESCE(SUM(dn),0) AS dn
+            ,COALESCE(SUM(d1),0) AS d1
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt, apptype, abcode, suffix_group, suffix, source_type, layer_type
+    GROUPING SETS (
+        (dt, apptype, abcode, suffix_group, suffix, source_type, layer_type),
+        (dt, apptype, abcode, suffix_group, suffix)
+    )
+)
+-- 按实验组求尾号均值(新增:合并 ROV + 分发多样性三件套)
+,t_metrics AS
+(
+    SELECT  b.dt
+            ,b.apptype
+            ,b.abcode
+            ,b.suffix_group
+            ,b.source_type
+            ,b.layer_type
+            ,ROUND(AVG(b.exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(b.str_one),6) AS str_one
+            ,ROUND(AVG(b.ros_one),6) AS ros_one
+            ,ROUND(AVG(b.str),6) AS str
+            ,ROUND(AVG(b.ros),6) AS ros
+            ,ROUND(AVG(b.str_plus),6) AS str_plus
+            ,ROUND(AVG(b.ros_minus),6) AS ros_minus
+            ,ROUND(AVG(b.bn_rov),6) AS bn_rov
+            ,ROUND(AVG(b.c1_rov),6) AS c1_rov
+            ,ROUND(AVG(b.cn_rov),6) AS cn_rov
+            ,ROUND(AVG(b.d1_rov),6) AS d1_rov
+            ,ROUND(AVG(b.dn_rov),6) AS dn_rov
+            ,ROUND(AVG(b.total_rov),6) AS total_rov
+            ,ROUND(AVG(b.vovh24),6) AS vovh24
+            ,AVG(b.dau) AS dau
+            ,AVG(b.exp) AS exp
+            ,ROUND(AVG(b.distinct_vid_cnt),0) AS distinct_vid_cnt
+            ,ROUND(AVG(e.ecs),1) AS ecs
+            -- ECS 归一化比值:去掉池子大小的影响,纯形态指标
+            ,ROUND(AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS ecs_ratio
+            -- Gini 系数:快手/Twitter/Netflix 业界标准,数学上 Gini = 1 - ecs_ratio
+            ,ROUND(1 - AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS gini
+            ,ROUND(AVG(a.arp),0) AS arp
+            ,AVG(b.is_share) AS is_share
+            ,AVG(b.share_cnt) AS share_cnt
+            ,AVG(b.is_return_1) AS is_return_1
+            ,AVG(b.return_n_uv) AS return_n_uv
+            ,AVG(b.viewh24) AS viewh24
+            ,AVG(b.return_n_uv_noself) AS return_n_uv_noself
+            ,AVG(b.cn) AS cn
+            ,AVG(b.c1) AS c1
+            ,AVG(b.dn) AS dn
+            ,AVG(b.d1) AS d1
+            ,WM_CONCAT(DISTINCT ',',b.suffix) AS suffix
+    FROM    t_bucket b
+    LEFT JOIN t_bucket_ecs e
+    ON      b.dt = e.dt
+    AND     b.apptype = e.apptype
+    AND     b.abcode = e.abcode
+    AND     b.suffix = e.suffix
+    AND     b.source_type = e.source_type
+    AND     b.layer_type = e.layer_type
+    LEFT JOIN t_bucket_arp a
+    ON      b.dt = a.dt
+    AND     b.apptype = a.apptype
+    AND     b.abcode = a.abcode
+    AND     b.suffix = a.suffix
+    AND     b.source_type = a.source_type
+    AND     b.layer_type = a.layer_type
+    GROUP BY b.dt, b.apptype, b.abcode, b.suffix_group, b.source_type, b.layer_type
+)
+-- ════════════════════════════════════════════════════════════════════════════
+-- 基线对比层:DAU2 历史 5 天均值 + 基线桶(89)横向对比
+-- ════════════════════════════════════════════════════════════════════════════
+-- ┌─ 配置区 2/2:基线 DAU2(按 apptype × suffix_group × 分层 预计算) ───┐
+-- │  基线期 20260307~20260311,含分层(内部/外部 × 首层/裂变层)+ 总体   │
+-- └──────────────────────────────────────────────────────────────────────┘
+,t_dau2_base5 AS
+(
+    -- ── apptype = 0 ──
+    SELECT "0" AS apptype, "01" AS suffix_group, "内部"  AS source_type, "裂变层" AS layer_type, 15293.4  AS dau2_base5
+    UNION ALL SELECT "0", "01", "内部",  "首层",   5649.5
+    UNION ALL SELECT "0", "01", "外部",  "裂变层", 187201.8
+    UNION ALL SELECT "0", "01", "外部",  "首层",   44738.8
+    UNION ALL SELECT "0", "01", "总体",  "总体",   250063.2
+    UNION ALL SELECT "0", "2c", "内部",  "裂变层", 16274.4
+    UNION ALL SELECT "0", "2c", "内部",  "首层",   5638.2
+    UNION ALL SELECT "0", "2c", "外部",  "裂变层", 197492.2
+    UNION ALL SELECT "0", "2c", "外部",  "首层",   44625.3
+    UNION ALL SELECT "0", "2c", "总体",  "总体",   261125.5
+    UNION ALL SELECT "0", "34", "内部",  "裂变层", 15779.9
+    UNION ALL SELECT "0", "34", "内部",  "首层",   5646.3
+    UNION ALL SELECT "0", "34", "外部",  "裂变层", 194802.8
+    UNION ALL SELECT "0", "34", "外部",  "首层",   44646.0
+    UNION ALL SELECT "0", "34", "总体",  "总体",   258002.4
+    UNION ALL SELECT "0", "5d", "内部",  "裂变层", 15828.0
+    UNION ALL SELECT "0", "5d", "内部",  "首层",   5639.7
+    UNION ALL SELECT "0", "5d", "外部",  "裂变层", 190163.7
+    UNION ALL SELECT "0", "5d", "外部",  "首层",   44828.3
+    UNION ALL SELECT "0", "5d", "总体",  "总体",   253589.2
+    UNION ALL SELECT "0", "67", "内部",  "裂变层", 15757.9
+    UNION ALL SELECT "0", "67", "内部",  "首层",   5627.2
+    UNION ALL SELECT "0", "67", "外部",  "裂变层", 195371.1
+    UNION ALL SELECT "0", "67", "外部",  "首层",   44620.4
+    UNION ALL SELECT "0", "67", "总体",  "总体",   258466.2
+    UNION ALL SELECT "0", "89", "内部",  "裂变层", 15889.8
+    UNION ALL SELECT "0", "89", "内部",  "首层",   5628.7
+    UNION ALL SELECT "0", "89", "外部",  "裂变层", 187814.3
+    UNION ALL SELECT "0", "89", "外部",  "首层",   44561.2
+    UNION ALL SELECT "0", "89", "总体",  "总体",   251052.3
+    UNION ALL SELECT "0", "ab", "内部",  "裂变层", 16179.7
+    UNION ALL SELECT "0", "ab", "内部",  "首层",   5661.8
+    UNION ALL SELECT "0", "ab", "外部",  "裂变层", 184483.2
+    UNION ALL SELECT "0", "ab", "外部",  "首层",   44617.9
+    UNION ALL SELECT "0", "ab", "总体",  "总体",   248110.7
+    UNION ALL SELECT "0", "ef", "内部",  "裂变层", 15500.8
+    UNION ALL SELECT "0", "ef", "内部",  "首层",   5664.6
+    UNION ALL SELECT "0", "ef", "外部",  "裂变层", 184847.8
+    UNION ALL SELECT "0", "ef", "外部",  "首层",   44637.9
+    UNION ALL SELECT "0", "ef", "总体",  "总体",   247799.1
+    -- ── apptype = 4 ──
+    UNION ALL SELECT "4", "01", "内部",  "裂变层", 9358.8
+    UNION ALL SELECT "4", "01", "内部",  "首层",   3953.4
+    UNION ALL SELECT "4", "01", "外部",  "裂变层", 129526.8
+    UNION ALL SELECT "4", "01", "外部",  "首层",   82839.8
+    UNION ALL SELECT "4", "01", "总体",  "总体",   221923.1
+    UNION ALL SELECT "4", "2c", "内部",  "裂变层", 9556.8
+    UNION ALL SELECT "4", "2c", "内部",  "首层",   3935.1
+    UNION ALL SELECT "4", "2c", "外部",  "裂变层", 131426.8
+    UNION ALL SELECT "4", "2c", "外部",  "首层",   82767.1
+    UNION ALL SELECT "4", "2c", "总体",  "总体",   223926.3
+    UNION ALL SELECT "4", "34", "内部",  "裂变层", 11739.7
+    UNION ALL SELECT "4", "34", "内部",  "首层",   3892.0
+    UNION ALL SELECT "4", "34", "外部",  "裂变层", 126370.7
+    UNION ALL SELECT "4", "34", "外部",  "首层",   82681.9
+    UNION ALL SELECT "4", "34", "总体",  "总体",   220940.6
+    UNION ALL SELECT "4", "5d", "内部",  "裂变层", 9999.3
+    UNION ALL SELECT "4", "5d", "内部",  "首层",   3950.1
+    UNION ALL SELECT "4", "5d", "外部",  "裂变层", 128746.6
+    UNION ALL SELECT "4", "5d", "外部",  "首层",   82744.1
+    UNION ALL SELECT "4", "5d", "总体",  "总体",   221669.4
+    UNION ALL SELECT "4", "67", "内部",  "裂变层", 9685.2
+    UNION ALL SELECT "4", "67", "内部",  "首层",   3942.3
+    UNION ALL SELECT "4", "67", "外部",  "裂变层", 125356.4
+    UNION ALL SELECT "4", "67", "外部",  "首层",   82720.8
+    UNION ALL SELECT "4", "67", "总体",  "总体",   217974.6
+    UNION ALL SELECT "4", "89", "内部",  "裂变层", 11370.5
+    UNION ALL SELECT "4", "89", "内部",  "首层",   3964.6
+    UNION ALL SELECT "4", "89", "外部",  "裂变层", 130128.5
+    UNION ALL SELECT "4", "89", "外部",  "首层",   82614.5
+    UNION ALL SELECT "4", "89", "总体",  "总体",   224279.4
+    UNION ALL SELECT "4", "ab", "内部",  "裂变层", 9481.3
+    UNION ALL SELECT "4", "ab", "内部",  "首层",   3933.9
+    UNION ALL SELECT "4", "ab", "外部",  "裂变层", 129952.9
+    UNION ALL SELECT "4", "ab", "外部",  "首层",   82784.3
+    UNION ALL SELECT "4", "ab", "总体",  "总体",   222393.9
+    UNION ALL SELECT "4", "ef", "内部",  "裂变层", 9533.7
+    UNION ALL SELECT "4", "ef", "内部",  "首层",   3947.6
+    UNION ALL SELECT "4", "ef", "外部",  "裂变层", 127244.0
+    UNION ALL SELECT "4", "ef", "外部",  "首层",   82754.4
+    UNION ALL SELECT "4", "ef", "总体",  "总体",   219735.4
+)
+-- 合并主表 + dau2 + dau_vs_5d
+,t_combined AS
+(
+    SELECT  a.*
+            ,b.dau2
+            ,ROUND(b.dau2 / NULLIF(c.dau2_base5, 0), 6) AS dau_vs_5d
+    FROM    t_metrics a
+    LEFT JOIN t_dau2 b
+    ON      a.dt = b.dt
+    AND     a.apptype = b.apptype
+    AND     a.abcode = b.abcode
+    AND     a.suffix_group = b.suffix_group
+    AND     a.source_type = b.source_type
+    AND     a.layer_type = b.layer_type
+    LEFT JOIN t_dau2_base5 c
+    ON      a.apptype = c.apptype
+    AND     a.suffix_group = c.suffix_group
+    AND     a.source_type = c.source_type
+    AND     a.layer_type = c.layer_type
+)
+-- 基线桶(89)每日指标,作为横向对比基准
+,t_ctrl AS
+(
+    SELECT  dt
+            ,apptype
+            ,source_type
+            ,layer_type
+            ,dau_vs_5d  AS ctrl_dau_vs_5d
+            ,exp        AS ctrl_exp
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one    AS ctrl_str_one
+            ,ros_one    AS ctrl_ros_one
+            ,str        AS ctrl_str
+            ,ros        AS ctrl_ros
+            ,vovh24     AS ctrl_vovh24
+            ,str_plus   AS ctrl_str_plus
+            ,ros_minus  AS ctrl_ros_minus
+            ,bn_rov     AS ctrl_bn_rov
+            ,c1_rov     AS ctrl_c1_rov
+            ,cn_rov     AS ctrl_cn_rov
+            ,d1_rov     AS ctrl_d1_rov
+            ,dn_rov     AS ctrl_dn_rov
+            ,total_rov  AS ctrl_total_rov
+            ,ecs        AS ctrl_ecs
+            ,ecs_ratio  AS ctrl_ecs_ratio
+            ,arp        AS ctrl_arp
+    FROM    t_combined
+    WHERE   suffix_group = '89'
+)
+-- 最终输出:原始指标 + 基线对比差值(后缀 _diff = 当前值 / 基线桶值 - 1)
+SELECT  r.dt
+        ,r.apptype
+        ,r.abcode
+        ,r.suffix_group
+        ,r.source_type
+        ,r.layer_type
+        ,r.suffix
+        -- ── 原始指标 ──
+        ,r.exp_per_dau
+        ,r.str_one
+        ,r.ros_one
+        ,r.str
+        ,r.ros
+        ,r.str_plus
+        ,r.ros_minus
+        ,r.bn_rov
+        ,r.c1_rov
+        ,r.cn_rov
+        ,r.d1_rov
+        ,r.dn_rov
+        ,r.total_rov
+        ,r.vovh24
+        ,r.dau
+        ,r.exp
+        ,r.distinct_vid_cnt
+        ,r.ecs
+        ,r.ecs_ratio
+        ,r.gini
+        ,r.arp
+        ,r.is_share
+        ,r.share_cnt
+        ,r.is_return_1
+        ,r.return_n_uv
+        ,r.viewh24
+        ,r.return_n_uv_noself
+        ,r.cn
+        ,r.c1
+        ,r.dn
+        ,r.d1
+        ,r.dau2
+        -- ── DAU2 纵向对比(vs 历史 5 天均值) ──
+        ,r.dau_vs_5d
+        -- ── 横向对比:当日各桶 vs 基线桶(89),公式 = 当前值 / 基线值 - 1 ──
+        ,ROUND(r.dau_vs_5d / NULLIF(ctrl.ctrl_dau_vs_5d, 0) - 1, 6) AS dau_vs_5d_diff
+        ,ROUND(r.exp / NULLIF(ctrl.ctrl_exp, 0) - 1, 6) AS exp_diff
+        ,ROUND(r.exp_per_dau / NULLIF(ctrl.ctrl_exp_per_dau, 0) - 1, 6) AS exp_per_dau_diff
+        ,ROUND(r.str_one / NULLIF(ctrl.ctrl_str_one, 0) - 1, 6) AS str_one_diff
+        ,ROUND(r.ros_one / NULLIF(ctrl.ctrl_ros_one, 0) - 1, 6) AS ros_one_diff
+        ,ROUND(r.str / NULLIF(ctrl.ctrl_str, 0) - 1, 6) AS str_diff
+        ,ROUND(r.ros / NULLIF(ctrl.ctrl_ros, 0) - 1, 6) AS ros_diff
+        ,ROUND(r.vovh24 / NULLIF(ctrl.ctrl_vovh24, 0) - 1, 6) AS vovh24_diff
+        ,ROUND(r.str_plus / NULLIF(ctrl.ctrl_str_plus, 0) - 1, 6) AS str_plus_diff
+        ,ROUND(r.ros_minus / NULLIF(ctrl.ctrl_ros_minus, 0) - 1, 6) AS ros_minus_diff
+        ,ROUND(r.bn_rov / NULLIF(ctrl.ctrl_bn_rov, 0) - 1, 6) AS bn_rov_diff
+        ,ROUND(r.c1_rov / NULLIF(ctrl.ctrl_c1_rov, 0) - 1, 6) AS c1_rov_diff
+        ,ROUND(r.cn_rov / NULLIF(ctrl.ctrl_cn_rov, 0) - 1, 6) AS cn_rov_diff
+        ,ROUND(r.d1_rov / NULLIF(ctrl.ctrl_d1_rov, 0) - 1, 6) AS d1_rov_diff
+        ,ROUND(r.dn_rov / NULLIF(ctrl.ctrl_dn_rov, 0) - 1, 6) AS dn_rov_diff
+        ,ROUND(r.total_rov / NULLIF(ctrl.ctrl_total_rov, 0) - 1, 6) AS total_rov_diff
+        ,ROUND(r.ecs / NULLIF(ctrl.ctrl_ecs, 0) - 1, 6) AS ecs_diff
+        ,ROUND(r.ecs_ratio / NULLIF(ctrl.ctrl_ecs_ratio, 0) - 1, 6) AS ecs_ratio_diff
+        ,ROUND(r.arp / NULLIF(ctrl.ctrl_arp, 0) - 1, 6) AS arp_diff
+FROM    t_combined r
+LEFT JOIN t_ctrl ctrl
+ON      r.dt = ctrl.dt
+AND     r.apptype = ctrl.apptype
+AND     r.source_type = ctrl.source_type
+AND     r.layer_type = ctrl.layer_type
+ORDER BY r.dt DESC, r.apptype, r.abcode, r.suffix_group, r.source_type, r.layer_type
+;