Explorar el Código

feat: 新增 v3 基线对比版本(dau_vs_5d + _diff 系列),支持 apptype 0/4 合并

- base_v3_new_v3: apptype=4 硬编码基线版(基线期 20260307~20260311)
- base_v3_new_v3_scan: apptype=4 独立扫描版
- base_v4_v1_new_v3: apptype=0 硬编码基线版
- base_v4_v1_new_v3_scan: apptype=0 独立扫描版
- base_all_new_v3: apptype 0+4 合并版,t_experiment_map 和 t_dau2_base5 按 apptype 分组配置
- 新增 20 列:dau_vs_5d(纵向 vs 历史 5 天)+ *_diff(横向 vs 89 基线桶)
- 修复合并版 abcode 过滤条件差异(apptype=0 无 ab5/ab6/ab7)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
yangxiaohui hace 4 semanas
padre
commit
5bc7b9d8d7

+ 10 - 0
tasks/00_尾号实验/base_all_new_v3.json

@@ -0,0 +1,10 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "quaDtn",
+  "sort": "dt:desc,apptype:asc,suffix_group:asc",
+  "order": {
+      "suffix_group": ["ab", "34", "2c", "67", "01", "5d", "ef", "89"]
+  },
+  "cols": null,
+  "append_cols": true
+}

+ 621 - 0
tasks/00_尾号实验/base_all_new_v3.sql

@@ -0,0 +1,621 @@
+-- ════════════════════════════════════════════════════════════════════════════
+-- 两层尾号映射 (SCD Type 2 模式) — apptype = 0 + 4 合并版 + 基线对比
+-- [硬编码版:基线 DAU2 预计算,无额外扫描]
+--
+-- 第一层 t_suffix_group:物理尾号 → 分流桶 ID(16 个 hex 尾号分成 8 个 2-元桶)
+--   - 分流规则不变时,此层永不改
+--
+-- 第二层 t_experiment_map:分流桶 → 实验名 + 生效日期
+--   - 只列出"分配了具体实验"的桶,未列出的桶自动默认为"对照组"
+--   - 支持 1 对多:同一个实验占多个桶时,用同一 abcode 字符串多加几行
+--   - 实验切换:不删旧行,关闭 end_dt + 追加新行(保留历史可回溯)
+--
+-- ┌─ 基线配置 ──────────────────────────────────────────────────────────┐
+-- │  基线桶:89              (对照组物理桶,横向对比基准)              │
+-- │  基线天数:20260307~20260311(5 天均值,DAU2 纵向对比基准)         │
+-- │  apptype:0 + 4 合并输出,按 apptype 分别对比各自基线              │
+-- └─────────────────────────────────────────────────────────────────────┘
+-- ════════════════════════════════════════════════════════════════════════════
+WITH t_suffix_group AS
+(
+    SELECT "a" AS suffix, "ab" AS suffix_group
+    UNION ALL SELECT "b", "ab"
+    UNION ALL SELECT "0", "01"
+    UNION ALL SELECT "1", "01"
+    UNION ALL SELECT "2", "2c"
+    UNION ALL SELECT "c", "2c"
+    UNION ALL SELECT "3", "34"
+    UNION ALL SELECT "4", "34"
+    UNION ALL SELECT "5", "5d"
+    UNION ALL SELECT "d", "5d"
+    UNION ALL SELECT "6", "67"
+    UNION ALL SELECT "7", "67"
+    UNION ALL SELECT "8", "89"
+    UNION ALL SELECT "9", "89"
+    UNION ALL SELECT "e", "ef"
+    UNION ALL SELECT "f", "ef"
+)
+-- ┌─ 配置区 1/2:实验映射(按 apptype 分组) ─────────────────────────────┐
+-- │  新增 apptype 只需追加行,下游逻辑自动适配                           │
+-- │  未列出的桶 → 自动默认为"对照组"                                     │
+-- └──────────────────────────────────────────────────────────────────────┘
+,t_experiment_map AS
+(
+    -- ── apptype = 4 ──────────────────────────────────────────────────
+    SELECT "4" AS apptype, "ab" AS suffix_group, "实验组:变更str*ros建模目标实验" AS abcode, "20260413" AS start_dt, "29991231" AS end_dt
+    UNION ALL SELECT "4", "01", "实验组:变更str*ros建模目标实验", "20260320", "29991231"
+    UNION ALL SELECT "4", "67", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
+    UNION ALL SELECT "4", "5d", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+    UNION ALL SELECT "4", "34", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+    UNION ALL SELECT "4", "67", "实验组:bn_ros新损失函数",        "20260311", "20260319"
+    UNION ALL SELECT "4", "5d", "实验组:解构特征排序str模型",     "20260314", "20260406"
+    UNION ALL SELECT "4", "ef", "实验组:解构特征排序str模型&召回", "20260314", "20260320"
+    UNION ALL SELECT "4", "ef", "实验组:DNN模型",                 "20260407", "29991231"
+    UNION ALL SELECT "4", "2c", "实验组:DNN模型-调参",            "20260413", "29991231"
+    UNION ALL SELECT "4", "89", "对照组",                          "20260301", "20260412"
+    UNION ALL SELECT "4", "89", "实验组:变更str*ros建模目标实验", "20260413", "29991231"
+
+    -- ── apptype = 0 ──────────────────────────────────────────────────
+    UNION ALL SELECT "0", "ab", "实验组:变更str*ros建模目标实验", "20260413", "29991231"
+    UNION ALL SELECT "0", "01", "实验组:变更str*ros建模目标实验", "20260320", "29991231"
+    UNION ALL SELECT "0", "34", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
+    UNION ALL SELECT "0", "67", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
+    UNION ALL SELECT "0", "5d", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+    UNION ALL SELECT "0", "ef", "实验组:DNN模型-调参",            "20260410", "29991231"
+    UNION ALL SELECT "0", "2c", "实验组:DNN模型",                 "20260413", "29991231"
+    UNION ALL SELECT "0", "89", "对照组",                          "20260301", "20260412"
+    UNION ALL SELECT "0", "89", "实验组:变更str*ros建模目标实验", "20260413", "29991231"
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,sg.suffix_group
+            ,COALESCE(m.abcode,"对照组") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,a.mid
+                        ,a.vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        ,cc.cn
+                        ,cc.c1
+                        ,dd.dn
+                        ,dd.d1
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108 a
+                LEFT JOIN   (
+                                -- c1/cn:分享后被点击的回流 UV
+                                SELECT  a.machinecode AS mid
+                                        ,a.subsessionid
+                                        ,a.videoid AS vid
+                                        ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
+                                        ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS c1
+                                FROM    (
+                                            SELECT  DISTINCT machinecode
+                                                    ,shareobjectid AS videoid
+                                                    ,recomTraceId
+                                                    ,subsessionid
+                                                    ,sharedepth
+                                                    ,shareid
+                                            FROM    loghubods.user_share_log
+                                            WHERE   dt = '${dt}'
+                                            AND     topic = 'share'
+                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                        ) a
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b
+                                ON      a.shareid = b.rootshareid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,shareobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'share'
+                                                AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                            ) b1
+                                ON      b.machinecode = b1.machinecode
+                                AND     b.subsessionid = b1.subsessionid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b2
+                                ON      b1.shareid = b2.rootshareid
+                                GROUP BY a.machinecode
+                                         ,a.subsessionid
+                                         ,a.videoid
+                            ) cc
+                ON      a.mid = cc.mid
+                AND     a.subsessionid = cc.subsessionid
+                AND     a.vid = cc.vid
+                LEFT JOIN   (
+                                -- d1/dn:下一条视频带来的回流
+                                SELECT  *
+                                        ,LAG(回流,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS dn
+                                        ,LAG(回流1,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS d1
+                                FROM    (
+                                            SELECT  a.mid AS mid
+                                                    ,a.subsessionid
+                                                    ,a.videoid AS vid
+                                                    ,COUNT(DISTINCT b.shareid) AS 分享次数
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END) AS 回流
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END) AS 回流1
+                                                    ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp ASC) AS rn
+                                            FROM    (
+                                                        SELECT  *
+                                                        FROM    (
+                                                                    SELECT  DISTINCT mid
+                                                                            ,subsessionid
+                                                                            ,videoid
+                                                                            ,logtimestamp
+                                                                            ,ROW_NUMBER() OVER (PARTITION BY mid,subsessionid,videoid ORDER BY logtimestamp ASC) AS rn
+                                                                    FROM    loghubods.video_action_log_rp
+                                                                    WHERE   dt = '${dt}'
+                                                                    AND     businesstype = 'videoView'
+                                                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                                )
+                                                        WHERE   rn = 1
+                                                    ) a
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,shareobjectid AS videoid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,shareid
+                                                                    ,clienttimestamp
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'share'
+                                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                        ) b
+                                            ON      a.mid = b.machinecode
+                                            AND     a.subsessionid = b.subsessionid
+                                            AND     a.videoid = b.videoid
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,clickobjectid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,rootshareid
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'click'
+                                                        ) c
+                                            ON      b.shareid = c.rootshareid
+                                            GROUP BY a.mid
+                                                     ,a.subsessionid
+                                                     ,a.videoid
+                                                     ,a.logtimestamp
+                                        )
+                            ) dd
+                ON      a.mid = dd.mid
+                AND     a.subsessionid = dd.subsessionid
+                AND     a.vid = dd.vid
+                WHERE   dt="${dt}"
+                AND     apptype IN ("0","4")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                -- apptype=4: ab0-ab9; apptype=0: ab0-ab4,ab8,ab9(无 ab5/ab6/ab7)
+                AND     abcode NOT IN ("ab100")
+                AND     (apptype = "4"
+                         OR abcode IN ("ab0","ab1","ab2","ab3","ab4","ab8","ab9"))
+                AND     (apptype = "0"
+                         OR abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9"))
+            ) sub
+    -- INNER JOIN: 合法尾号(在 16 个 hex 里)才进分析;防御异常数据
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    -- LEFT JOIN: 可无实验匹配,此时 m.abcode 为 NULL → COALESCE 为"对照组"
+    LEFT JOIN t_experiment_map m
+    ON      sub.apptype = m.apptype
+    AND     sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+)
+-- 桶内每个 vid 的曝光数(ECS / ARP 的共同中间件,避免重复扫 t_base)
+,t_vid_exp AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,vid
+            ,COUNT(1) AS vid_exp_cnt
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+             ,vid
+)
+-- 桶内 ECS (Effective Catalog Size):曝光实际"相当于推了多少条视频"
+-- ECS = 2 * Σ(p_i * rank_i) - 1
+--   p_i    = vid 在桶内曝光占比
+--   rank_i = 按曝光降序的排名(1 起)
+-- 值域 [1, distinct_vid_cnt],越大越分散,越小越头部集中
+,t_bucket_ecs AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,2 * SUM(p * rn) - 1 AS ecs
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,abcode
+                        ,suffix
+                        ,vid_exp_cnt / SUM(vid_exp_cnt) OVER (
+                            PARTITION BY dt, apptype, abcode, suffix
+                        ) AS p
+                        ,ROW_NUMBER() OVER (
+                            PARTITION BY dt, apptype, abcode, suffix
+                            ORDER BY vid_exp_cnt DESC
+                        ) AS rn
+                FROM    t_vid_exp
+            ) t
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+)
+-- 全平台每个 vid 的曝光度(作为 ARP 的 popularity reference)
+-- 注意:不过滤 abcode,让 reference 覆盖全部合法尾号
+,t_vid_global_pop AS
+(
+    SELECT  dt
+            ,apptype
+            ,vid
+            ,COUNT(1) AS vid_global_pop
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,vid
+)
+-- 桶内 ARP (Average Recommendation Popularity):推荐视频的平均热门度
+-- 按桶内曝光量加权:曝光越多的 vid 对 ARP 影响越大
+-- 组合 ECS + ARP 可识别四象限:
+--   高ECS + 低ARP = 分散 + 偏长尾        ✅ 理想
+--   高ECS + 高ARP = 分散 + 头部内部多样化  ⚠️ 需警惕
+--   低ECS + 低ARP = 集中 + 冷门(小众爆发) ❓ 特殊
+--   低ECS + 高ARP = 集中 + 头部            ❌ 模型坍缩
+,t_bucket_arp AS
+(
+    SELECT  v.dt
+            ,v.apptype
+            ,v.abcode
+            ,v.suffix
+            ,SUM(v.vid_exp_cnt * g.vid_global_pop) / SUM(v.vid_exp_cnt) AS arp
+    FROM    t_vid_exp v
+    LEFT JOIN t_vid_global_pop g
+    ON      v.dt = g.dt
+    AND     v.apptype = g.apptype
+    AND     v.vid = g.vid
+    GROUP BY v.dt
+             ,v.apptype
+             ,v.abcode
+             ,v.suffix
+)
+-- dau2:按单尾号聚合
+,t_dau2_bucket AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"对照组") AS abcode
+            ,sg.suffix_group
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                -- FROM    loghubods.useractive_log_per5min
+                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("0","4")
+                -- apptype=0 需要额外过滤 ab_test003
+                AND     (apptype = "4"
+                         OR GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9"))
+                AND     (apptype = "4"
+                         OR GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') NOT IN ("ab100"))
+            ) sub
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    LEFT JOIN t_experiment_map m
+    ON      sub.apptype = m.apptype
+    AND     sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"对照组")
+             ,sg.suffix_group
+             ,sub.suffix
+)
+-- dau2:按 suffix_group 求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,AVG(dau2) AS dau2
+    FROM    t_dau2_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix_group
+)
+-- 按单尾号聚合(尾号内 UV 去重)
+,t_bucket AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,suffix
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
+            ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
+            ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
+            ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS bn_rov
+            ,COALESCE(SUM(c1) / COUNT(1),0) AS c1_rov
+            ,COALESCE(SUM(cn) / COUNT(1),0) AS cn_rov
+            ,COALESCE(SUM(d1) / COUNT(1),0) AS d1_rov
+            ,COALESCE(SUM(dn) / COUNT(1),0) AS dn_rov
+            -- [NEW] 合并 ROV = bn_rov + cn_rov + dn_rov(三者分母同为 COUNT(1),可合并)
+            ,COALESCE((SUM(return_n_uv) + SUM(cn) + SUM(dn)) / COUNT(1),0) AS total_rov
+            ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            -- [NEW] 桶内去重 vid 数(ECS 的天然配套)
+            ,COUNT(DISTINCT vid) AS distinct_vid_cnt
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            ,COALESCE(SUM(cn),0) AS cn
+            ,COALESCE(SUM(c1),0) AS c1
+            ,COALESCE(SUM(dn),0) AS dn
+            ,COALESCE(SUM(d1),0) AS d1
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix_group
+             ,suffix
+)
+-- 按实验组求尾号均值(新增:合并 ROV + 分发多样性三件套)
+,t_metrics AS
+(
+    SELECT  b.dt
+            ,b.apptype
+            ,b.abcode
+            ,b.suffix_group
+            ,ROUND(AVG(b.exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(b.str_one),6) AS str_one
+            ,ROUND(AVG(b.ros_one),6) AS ros_one
+            ,ROUND(AVG(b.str),6) AS str
+            ,ROUND(AVG(b.ros),6) AS ros
+            ,ROUND(AVG(b.str_plus),6) AS str_plus
+            ,ROUND(AVG(b.ros_minus),6) AS ros_minus
+            ,ROUND(AVG(b.bn_rov),6) AS bn_rov
+            ,ROUND(AVG(b.c1_rov),6) AS c1_rov
+            ,ROUND(AVG(b.cn_rov),6) AS cn_rov
+            ,ROUND(AVG(b.d1_rov),6) AS d1_rov
+            ,ROUND(AVG(b.dn_rov),6) AS dn_rov
+            ,ROUND(AVG(b.total_rov),6) AS total_rov
+            ,ROUND(AVG(b.vovh24),6) AS vovh24
+            ,AVG(b.dau) AS dau
+            ,AVG(b.exp) AS exp
+            ,ROUND(AVG(b.distinct_vid_cnt),0) AS distinct_vid_cnt
+            ,ROUND(AVG(e.ecs),1) AS ecs
+            -- ECS 归一化比值:去掉池子大小的影响,纯形态指标
+            ,ROUND(AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS ecs_ratio
+            -- Gini 系数:快手/Twitter/Netflix 业界标准,数学上 Gini = 1 - ecs_ratio
+            ,ROUND(1 - AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS gini
+            ,ROUND(AVG(a.arp),0) AS arp
+            ,AVG(b.is_share) AS is_share
+            ,AVG(b.share_cnt) AS share_cnt
+            ,AVG(b.is_return_1) AS is_return_1
+            ,AVG(b.return_n_uv) AS return_n_uv
+            ,AVG(b.viewh24) AS viewh24
+            ,AVG(b.return_n_uv_noself) AS return_n_uv_noself
+            ,AVG(b.cn) AS cn
+            ,AVG(b.c1) AS c1
+            ,AVG(b.dn) AS dn
+            ,AVG(b.d1) AS d1
+            ,WM_CONCAT(DISTINCT ',',b.suffix) AS suffix
+    FROM    t_bucket b
+    LEFT JOIN t_bucket_ecs e
+    ON      b.dt = e.dt
+    AND     b.apptype = e.apptype
+    AND     b.abcode = e.abcode
+    AND     b.suffix = e.suffix
+    LEFT JOIN t_bucket_arp a
+    ON      b.dt = a.dt
+    AND     b.apptype = a.apptype
+    AND     b.abcode = a.abcode
+    AND     b.suffix = a.suffix
+    GROUP BY b.dt
+             ,b.apptype
+             ,b.abcode
+             ,b.suffix_group
+)
+-- ════════════════════════════════════════════════════════════════════════════
+-- 基线对比层:DAU2 历史 5 天均值 + 基线桶(89)横向对比
+-- ════════════════════════════════════════════════════════════════════════════
+-- 基线 5 天 DAU2 均值(硬编码,基线期 20260307~20260311,按 apptype 分别预计算)
+,t_dau2_base5 AS
+(
+    -- apptype = 4
+    SELECT "4" AS apptype, "01" AS suffix_group, 221923.1 AS dau2_base5
+    UNION ALL SELECT "4", "2c", 223926.3
+    UNION ALL SELECT "4", "34", 220940.6
+    UNION ALL SELECT "4", "5d", 221669.4
+    UNION ALL SELECT "4", "67", 217974.6
+    UNION ALL SELECT "4", "89", 224279.4
+    UNION ALL SELECT "4", "ab", 222393.9
+    UNION ALL SELECT "4", "ef", 219735.4
+    -- apptype = 0
+    UNION ALL SELECT "0", "01", 250063.2
+    UNION ALL SELECT "0", "2c", 261125.5
+    UNION ALL SELECT "0", "34", 258002.4
+    UNION ALL SELECT "0", "5d", 253589.2
+    UNION ALL SELECT "0", "67", 258466.2
+    UNION ALL SELECT "0", "89", 251052.3
+    UNION ALL SELECT "0", "ab", 248110.7
+    UNION ALL SELECT "0", "ef", 247799.1
+)
+-- 合并主表 + dau2 + dau_vs_5d
+,t_combined AS
+(
+    SELECT  a.*
+            ,b.dau2
+            ,ROUND(b.dau2 / NULLIF(c.dau2_base5, 0), 6) AS dau_vs_5d
+    FROM    t_metrics a
+    LEFT JOIN t_dau2 b
+    ON      a.dt = b.dt
+    AND     a.apptype = b.apptype
+    AND     a.abcode = b.abcode
+    AND     a.suffix_group = b.suffix_group
+    LEFT JOIN t_dau2_base5 c
+    ON      a.apptype = c.apptype
+    AND     a.suffix_group = c.suffix_group
+)
+-- 基线桶(89)每日指标,作为横向对比基准
+,t_ctrl AS
+(
+    SELECT  dt
+            ,apptype
+            ,dau_vs_5d  AS ctrl_dau_vs_5d
+            ,exp        AS ctrl_exp
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one    AS ctrl_str_one
+            ,ros_one    AS ctrl_ros_one
+            ,str        AS ctrl_str
+            ,ros        AS ctrl_ros
+            ,vovh24     AS ctrl_vovh24
+            ,str_plus   AS ctrl_str_plus
+            ,ros_minus  AS ctrl_ros_minus
+            ,bn_rov     AS ctrl_bn_rov
+            ,c1_rov     AS ctrl_c1_rov
+            ,cn_rov     AS ctrl_cn_rov
+            ,d1_rov     AS ctrl_d1_rov
+            ,dn_rov     AS ctrl_dn_rov
+            ,total_rov  AS ctrl_total_rov
+            ,ecs        AS ctrl_ecs
+            ,ecs_ratio  AS ctrl_ecs_ratio
+            ,arp        AS ctrl_arp
+    FROM    t_combined
+    WHERE   suffix_group = '89'
+)
+-- 最终输出:原始指标 + 基线对比差值(后缀 _diff = 当前值 / 基线桶值 - 1)
+SELECT  r.dt
+        ,r.apptype
+        ,r.abcode
+        ,r.suffix_group
+        ,r.suffix
+        -- ── 原始指标 ──
+        ,r.exp_per_dau
+        ,r.str_one
+        ,r.ros_one
+        ,r.str
+        ,r.ros
+        ,r.str_plus
+        ,r.ros_minus
+        ,r.bn_rov
+        ,r.c1_rov
+        ,r.cn_rov
+        ,r.d1_rov
+        ,r.dn_rov
+        ,r.total_rov
+        ,r.vovh24
+        ,r.dau
+        ,r.exp
+        ,r.distinct_vid_cnt
+        ,r.ecs
+        ,r.ecs_ratio
+        ,r.gini
+        ,r.arp
+        ,r.is_share
+        ,r.share_cnt
+        ,r.is_return_1
+        ,r.return_n_uv
+        ,r.viewh24
+        ,r.return_n_uv_noself
+        ,r.cn
+        ,r.c1
+        ,r.dn
+        ,r.d1
+        ,r.dau2
+        -- ── DAU2 纵向对比(vs 历史 5 天均值) ──
+        ,r.dau_vs_5d
+        -- ── 横向对比:当日各桶 vs 基线桶(89),公式 = 当前值 / 基线值 - 1 ──
+        ,ROUND(r.dau_vs_5d / NULLIF(ctrl.ctrl_dau_vs_5d, 0) - 1, 6) AS dau_vs_5d_diff
+        ,ROUND(r.exp / NULLIF(ctrl.ctrl_exp, 0) - 1, 6) AS exp_diff
+        ,ROUND(r.exp_per_dau / NULLIF(ctrl.ctrl_exp_per_dau, 0) - 1, 6) AS exp_per_dau_diff
+        ,ROUND(r.str_one / NULLIF(ctrl.ctrl_str_one, 0) - 1, 6) AS str_one_diff
+        ,ROUND(r.ros_one / NULLIF(ctrl.ctrl_ros_one, 0) - 1, 6) AS ros_one_diff
+        ,ROUND(r.str / NULLIF(ctrl.ctrl_str, 0) - 1, 6) AS str_diff
+        ,ROUND(r.ros / NULLIF(ctrl.ctrl_ros, 0) - 1, 6) AS ros_diff
+        ,ROUND(r.vovh24 / NULLIF(ctrl.ctrl_vovh24, 0) - 1, 6) AS vovh24_diff
+        ,ROUND(r.str_plus / NULLIF(ctrl.ctrl_str_plus, 0) - 1, 6) AS str_plus_diff
+        ,ROUND(r.ros_minus / NULLIF(ctrl.ctrl_ros_minus, 0) - 1, 6) AS ros_minus_diff
+        ,ROUND(r.bn_rov / NULLIF(ctrl.ctrl_bn_rov, 0) - 1, 6) AS bn_rov_diff
+        ,ROUND(r.c1_rov / NULLIF(ctrl.ctrl_c1_rov, 0) - 1, 6) AS c1_rov_diff
+        ,ROUND(r.cn_rov / NULLIF(ctrl.ctrl_cn_rov, 0) - 1, 6) AS cn_rov_diff
+        ,ROUND(r.d1_rov / NULLIF(ctrl.ctrl_d1_rov, 0) - 1, 6) AS d1_rov_diff
+        ,ROUND(r.dn_rov / NULLIF(ctrl.ctrl_dn_rov, 0) - 1, 6) AS dn_rov_diff
+        ,ROUND(r.total_rov / NULLIF(ctrl.ctrl_total_rov, 0) - 1, 6) AS total_rov_diff
+        ,ROUND(r.ecs / NULLIF(ctrl.ctrl_ecs, 0) - 1, 6) AS ecs_diff
+        ,ROUND(r.ecs_ratio / NULLIF(ctrl.ctrl_ecs_ratio, 0) - 1, 6) AS ecs_ratio_diff
+        ,ROUND(r.arp / NULLIF(ctrl.ctrl_arp, 0) - 1, 6) AS arp_diff
+FROM    t_combined r
+LEFT JOIN t_ctrl ctrl
+ON      r.dt = ctrl.dt
+AND     r.apptype = ctrl.apptype
+ORDER BY r.dt DESC, r.apptype, r.abcode, r.suffix_group
+;

+ 10 - 0
tasks/00_尾号实验/base_v3_new_v3.json

@@ -0,0 +1,10 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "Jeh90P",
+  "sort": "dt:desc,suffix_group:asc",
+  "order": {
+      "suffix_group": ["ab", "34", "2c", "67", "01", "5d", "ef", "89"]
+  },
+  "cols": null,
+  "append_cols": true
+}

+ 629 - 0
tasks/00_尾号实验/base_v3_new_v3.sql

@@ -0,0 +1,629 @@
+-- ════════════════════════════════════════════════════════════════════════════
+-- 两层尾号映射 (SCD Type 2 模式) + 基线对比(vs 89 桶 + vs 历史 5 天)
+--
+-- 第一层 t_suffix_group:物理尾号 → 分流桶 ID(16 个 hex 尾号分成 8 个 2-元桶)
+--   - 分流规则不变时,此层永不改
+--
+-- 第二层 t_experiment_map:分流桶 → 实验名 + 生效日期
+--   - 只列出"分配了具体实验"的桶,未列出的桶自动默认为"对照组"
+--   - 支持 1 对多:同一个实验占多个桶时,用同一 abcode 字符串多加几行
+--   - 实验切换:不删旧行,关闭 end_dt + 追加新行(保留历史可回溯)
+--
+-- ┌─ 基线配置 ──────────────────────────────────────────────────────────┐
+-- │  基线桶:89              (对照组物理桶,横向对比基准)              │
+-- │  基线天数:20260307~20260311(5 天均值,DAU2 纵向对比基准)         │
+-- │  新增列:dau_vs_5d / dau_vs_5d_diff / d_* 系列差值列              │
+-- └─────────────────────────────────────────────────────────────────────┘
+-- ════════════════════════════════════════════════════════════════════════════
+WITH t_suffix_group AS
+(
+    SELECT "a" AS suffix, "ab" AS suffix_group
+    UNION ALL SELECT "b", "ab"
+    UNION ALL SELECT "0", "01"
+    UNION ALL SELECT "1", "01"
+    UNION ALL SELECT "2", "2c"
+    UNION ALL SELECT "c", "2c"
+    UNION ALL SELECT "3", "34"
+    UNION ALL SELECT "4", "34"
+    UNION ALL SELECT "5", "5d"
+    UNION ALL SELECT "d", "5d"
+    UNION ALL SELECT "6", "67"
+    UNION ALL SELECT "7", "67"
+    UNION ALL SELECT "8", "89"
+    UNION ALL SELECT "9", "89"
+    UNION ALL SELECT "e", "ef"
+    UNION ALL SELECT "f", "ef"
+)
+-- 当前实验映射
+--   未列出的桶(89 / 2c)→ 自动默认为"对照组"
+--   同一个 suffix_group 可以有多行(SCD Type 2),但同一时间只能命中一行
+,t_experiment_map AS
+(
+    -- 前基线(ab 桶,从未变动)
+    SELECT "ab" AS suffix_group, "实验组:变更str*ros建模目标实验" AS abcode, "20260413" AS start_dt, "29991231" AS end_dt
+
+    -- 变更str*ros建模目标实验(分阶段扩量,当前占用 4 个桶;1 对多)
+    --   20260320: 首批上 01 桶
+    --   20260330: 扩到 67 桶(此时 67 桶的 bn_ros 实验已下线 10 天 ⚠️)
+    --   20260407: 同日扩到 5d 桶(5d 的解构str 实验下线)和 34 桶(34 此前是默认对照组)
+    UNION ALL SELECT "01", "实验组:变更str*ros建模目标实验", "20260320", "29991231"
+    UNION ALL SELECT "67", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
+    UNION ALL SELECT "5d", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+    UNION ALL SELECT "34", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+
+    -- 67 桶的前实验:bn_ros 新损失函数
+    --   20260320~20260329 为空窗期(10 天),此间 67 → 默认"对照组"
+    UNION ALL SELECT "67", "实验组:bn_ros新损失函数",    "20260311", "20260319"
+
+    -- 5d 桶的前实验:解构特征排序 str 模型
+    --   20260407 直接被建模目标实验接手,无空窗
+    UNION ALL SELECT "5d", "实验组:解构特征排序str模型", "20260314", "20260406"
+
+    -- ef 桶的历史:解构str&召回(已下线)→ 空窗 17 天 → DNN 模型(仍在运行)
+    --   20260321~20260406 为空窗期,此间 ef → 默认"对照组"
+    UNION ALL SELECT "ef", "实验组:解构特征排序str模型&召回", "20260314", "20260320"
+    UNION ALL SELECT "ef", "实验组:DNN模型",                   "20260407", "29991231"
+    UNION ALL SELECT "2c", "实验组:DNN模型-调参", "20260413", "29991231"
+
+    UNION ALL SELECT "89", "对照组", "20260301", "20260412"
+    UNION ALL SELECT "89", "实验组:变更str*ros建模目标实验", "20260413", "29991231"
+
+
+    -- ────────────────────────────────────────────────────────────────────
+    -- 📖 修改样例(复制下面的行到上面 UNION ALL 列表里使用)
+    --
+    -- 样例 A:新增一个占用单桶的实验
+    --   UNION ALL SELECT "2c", "实验组:新策略 X", "20260501", "29991231"
+    --
+    -- 样例 B:新增一个 1 对多 实验(同一实验占 01 + 34 两个桶)
+    --   用同一 abcode 字符串加两行即可,下游 GROUP BY 自动合并:
+    --   UNION ALL SELECT "01", "实验组:大流量 Y", "20260601", "29991231"
+    --   UNION ALL SELECT "34", "实验组:大流量 Y", "20260601", "29991231"
+    --
+    -- 样例 C:实验切换(SCD Type 2 —— 保留历史)
+    --   假设 01 桶 20260701 从 实验 A 切换到 实验 B:
+    --   Step 1: 把原来那行 end_dt 改成切换前一天:
+    --     SELECT "01", "实验组:A", "20260320", "20260630"
+    --   Step 2: 追加新实验行:
+    --     UNION ALL SELECT "01", "实验组:B", "20260701", "29991231"
+    --
+    -- 样例 D:实验下线回到对照组(产生空窗)
+    --   直接把该行的 end_dt 改成下线前一天即可(不用追加行):
+    --     SELECT "5d", "实验组:A", "20250101", "20260630"
+    --   20260701 之后 5d 桶没有任何有效行覆盖,自动进入"对照组"
+    --   ⚠️ 如果这是有意的空窗,没问题;如果只是忘了接新实验,后续记得补
+    -- ────────────────────────────────────────────────────────────────────
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,sg.suffix_group
+            ,COALESCE(m.abcode,"对照组") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,a.mid
+                        ,a.vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        ,cc.cn
+                        ,cc.c1
+                        ,dd.dn
+                        ,dd.d1
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108 a
+                LEFT JOIN   (
+                                -- c1/cn:分享后被点击的回流 UV
+                                SELECT  a.machinecode AS mid
+                                        ,a.subsessionid
+                                        ,a.videoid AS vid
+                                        ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
+                                        ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS c1
+                                FROM    (
+                                            SELECT  DISTINCT machinecode
+                                                    ,shareobjectid AS videoid
+                                                    ,recomTraceId
+                                                    ,subsessionid
+                                                    ,sharedepth
+                                                    ,shareid
+                                            FROM    loghubods.user_share_log
+                                            WHERE   dt = '${dt}'
+                                            AND     topic = 'share'
+                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                        ) a
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b
+                                ON      a.shareid = b.rootshareid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,shareobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'share'
+                                                AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                            ) b1
+                                ON      b.machinecode = b1.machinecode
+                                AND     b.subsessionid = b1.subsessionid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b2
+                                ON      b1.shareid = b2.rootshareid
+                                GROUP BY a.machinecode
+                                         ,a.subsessionid
+                                         ,a.videoid
+                            ) cc
+                ON      a.mid = cc.mid
+                AND     a.subsessionid = cc.subsessionid
+                AND     a.vid = cc.vid
+                LEFT JOIN   (
+                                -- d1/dn:下一条视频带来的回流
+                                SELECT  *
+                                        ,LAG(回流,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS dn
+                                        ,LAG(回流1,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS d1
+                                FROM    (
+                                            SELECT  a.mid AS mid
+                                                    ,a.subsessionid
+                                                    ,a.videoid AS vid
+                                                    ,COUNT(DISTINCT b.shareid) AS 分享次数
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END) AS 回流
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END) AS 回流1
+                                                    ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp ASC) AS rn
+                                            FROM    (
+                                                        SELECT  *
+                                                        FROM    (
+                                                                    SELECT  DISTINCT mid
+                                                                            ,subsessionid
+                                                                            ,videoid
+                                                                            ,logtimestamp
+                                                                            ,ROW_NUMBER() OVER (PARTITION BY mid,subsessionid,videoid ORDER BY logtimestamp ASC) AS rn
+                                                                    FROM    loghubods.video_action_log_rp
+                                                                    WHERE   dt = '${dt}'
+                                                                    AND     businesstype = 'videoView'
+                                                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                                )
+                                                        WHERE   rn = 1
+                                                    ) a
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,shareobjectid AS videoid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,shareid
+                                                                    ,clienttimestamp
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'share'
+                                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                        ) b
+                                            ON      a.mid = b.machinecode
+                                            AND     a.subsessionid = b.subsessionid
+                                            AND     a.videoid = b.videoid
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,clickobjectid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,rootshareid
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'click'
+                                                        ) c
+                                            ON      b.shareid = c.rootshareid
+                                            GROUP BY a.mid
+                                                     ,a.subsessionid
+                                                     ,a.videoid
+                                                     ,a.logtimestamp
+                                        )
+                            ) dd
+                ON      a.mid = dd.mid
+                AND     a.subsessionid = dd.subsessionid
+                AND     a.vid = dd.vid
+                WHERE   dt="${dt}"
+                AND     apptype IN ("4")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    -- INNER JOIN: 合法尾号(在 16 个 hex 里)才进分析;防御异常数据
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    -- LEFT JOIN: 可无实验匹配,此时 m.abcode 为 NULL → COALESCE 为"对照组"
+    LEFT JOIN t_experiment_map m
+    ON      sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+)
+-- 桶内每个 vid 的曝光数(ECS / ARP 的共同中间件,避免重复扫 t_base)
+,t_vid_exp AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,vid
+            ,COUNT(1) AS vid_exp_cnt
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+             ,vid
+)
+-- 桶内 ECS (Effective Catalog Size):曝光实际"相当于推了多少条视频"
+-- ECS = 2 * Σ(p_i * rank_i) - 1
+--   p_i    = vid 在桶内曝光占比
+--   rank_i = 按曝光降序的排名(1 起)
+-- 值域 [1, distinct_vid_cnt],越大越分散,越小越头部集中
+,t_bucket_ecs AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,2 * SUM(p * rn) - 1 AS ecs
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,abcode
+                        ,suffix
+                        ,vid_exp_cnt / SUM(vid_exp_cnt) OVER (
+                            PARTITION BY dt, apptype, abcode, suffix
+                        ) AS p
+                        ,ROW_NUMBER() OVER (
+                            PARTITION BY dt, apptype, abcode, suffix
+                            ORDER BY vid_exp_cnt DESC
+                        ) AS rn
+                FROM    t_vid_exp
+            ) t
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+)
+-- 全平台每个 vid 的曝光度(作为 ARP 的 popularity reference)
+-- 注意:不过滤 abcode,让 reference 覆盖全部合法尾号
+,t_vid_global_pop AS
+(
+    SELECT  dt
+            ,apptype
+            ,vid
+            ,COUNT(1) AS vid_global_pop
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,vid
+)
+-- 桶内 ARP (Average Recommendation Popularity):推荐视频的平均热门度
+-- 按桶内曝光量加权:曝光越多的 vid 对 ARP 影响越大
+-- 组合 ECS + ARP 可识别四象限:
+--   高ECS + 低ARP = 分散 + 偏长尾        ✅ 理想
+--   高ECS + 高ARP = 分散 + 头部内部多样化  ⚠️ 需警惕
+--   低ECS + 低ARP = 集中 + 冷门(小众爆发) ❓ 特殊
+--   低ECS + 高ARP = 集中 + 头部            ❌ 模型坍缩
+,t_bucket_arp AS
+(
+    SELECT  v.dt
+            ,v.apptype
+            ,v.abcode
+            ,v.suffix
+            ,SUM(v.vid_exp_cnt * g.vid_global_pop) / SUM(v.vid_exp_cnt) AS arp
+    FROM    t_vid_exp v
+    LEFT JOIN t_vid_global_pop g
+    ON      v.dt = g.dt
+    AND     v.apptype = g.apptype
+    AND     v.vid = g.vid
+    GROUP BY v.dt
+             ,v.apptype
+             ,v.abcode
+             ,v.suffix
+)
+-- dau2:按单尾号聚合
+,t_dau2_bucket AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"对照组") AS abcode
+            ,sg.suffix_group
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                -- FROM    loghubods.useractive_log_per5min
+                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("4")
+            ) sub
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    LEFT JOIN t_experiment_map m
+    ON      sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"对照组")
+             ,sg.suffix_group
+             ,sub.suffix
+)
+-- dau2:按 suffix_group 求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,AVG(dau2) AS dau2
+    FROM    t_dau2_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix_group
+)
+-- 按单尾号聚合(尾号内 UV 去重)
+,t_bucket AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,suffix
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
+            ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
+            ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
+            ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS bn_rov
+            ,COALESCE(SUM(c1) / COUNT(1),0) AS c1_rov
+            ,COALESCE(SUM(cn) / COUNT(1),0) AS cn_rov
+            ,COALESCE(SUM(d1) / COUNT(1),0) AS d1_rov
+            ,COALESCE(SUM(dn) / COUNT(1),0) AS dn_rov
+            -- [NEW] 合并 ROV = bn_rov + cn_rov + dn_rov(三者分母同为 COUNT(1),可合并)
+            ,COALESCE((SUM(return_n_uv) + SUM(cn) + SUM(dn)) / COUNT(1),0) AS total_rov
+            ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            -- [NEW] 桶内去重 vid 数(ECS 的天然配套)
+            ,COUNT(DISTINCT vid) AS distinct_vid_cnt
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            ,COALESCE(SUM(cn),0) AS cn
+            ,COALESCE(SUM(c1),0) AS c1
+            ,COALESCE(SUM(dn),0) AS dn
+            ,COALESCE(SUM(d1),0) AS d1
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix_group
+             ,suffix
+)
+-- 按实验组求尾号均值(新增:合并 ROV + 分发多样性三件套)
+,t_metrics AS
+(
+    SELECT  b.dt
+            ,b.apptype
+            ,b.abcode
+            ,b.suffix_group
+            ,ROUND(AVG(b.exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(b.str_one),6) AS str_one
+            ,ROUND(AVG(b.ros_one),6) AS ros_one
+            ,ROUND(AVG(b.str),6) AS str
+            ,ROUND(AVG(b.ros),6) AS ros
+            ,ROUND(AVG(b.str_plus),6) AS str_plus
+            ,ROUND(AVG(b.ros_minus),6) AS ros_minus
+            ,ROUND(AVG(b.bn_rov),6) AS bn_rov
+            ,ROUND(AVG(b.c1_rov),6) AS c1_rov
+            ,ROUND(AVG(b.cn_rov),6) AS cn_rov
+            ,ROUND(AVG(b.d1_rov),6) AS d1_rov
+            ,ROUND(AVG(b.dn_rov),6) AS dn_rov
+            ,ROUND(AVG(b.total_rov),6) AS total_rov
+            ,ROUND(AVG(b.vovh24),6) AS vovh24
+            ,AVG(b.dau) AS dau
+            ,AVG(b.exp) AS exp
+            ,ROUND(AVG(b.distinct_vid_cnt),0) AS distinct_vid_cnt
+            ,ROUND(AVG(e.ecs),1) AS ecs
+            -- ECS 归一化比值:去掉池子大小的影响,纯形态指标
+            ,ROUND(AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS ecs_ratio
+            -- Gini 系数:快手/Twitter/Netflix 业界标准,数学上 Gini = 1 - ecs_ratio
+            ,ROUND(1 - AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS gini
+            ,ROUND(AVG(a.arp),0) AS arp
+            ,AVG(b.is_share) AS is_share
+            ,AVG(b.share_cnt) AS share_cnt
+            ,AVG(b.is_return_1) AS is_return_1
+            ,AVG(b.return_n_uv) AS return_n_uv
+            ,AVG(b.viewh24) AS viewh24
+            ,AVG(b.return_n_uv_noself) AS return_n_uv_noself
+            ,AVG(b.cn) AS cn
+            ,AVG(b.c1) AS c1
+            ,AVG(b.dn) AS dn
+            ,AVG(b.d1) AS d1
+            ,WM_CONCAT(DISTINCT ',',b.suffix) AS suffix
+    FROM    t_bucket b
+    LEFT JOIN t_bucket_ecs e
+    ON      b.dt = e.dt
+    AND     b.apptype = e.apptype
+    AND     b.abcode = e.abcode
+    AND     b.suffix = e.suffix
+    LEFT JOIN t_bucket_arp a
+    ON      b.dt = a.dt
+    AND     b.apptype = a.apptype
+    AND     b.abcode = a.abcode
+    AND     b.suffix = a.suffix
+    GROUP BY b.dt
+             ,b.apptype
+             ,b.abcode
+             ,b.suffix_group
+)
+-- ════════════════════════════════════════════════════════════════════════════
+-- 基线对比层:DAU2 历史 5 天均值 + 基线桶(89)横向对比
+-- ════════════════════════════════════════════════════════════════════════════
+-- 基线 5 天 DAU2 均值(硬编码,基线期 20260307~20260311,apptype=4)
+-- 预计算结果,避免每次查询重复扫描 useractive_log
+-- 如需更换基线期,重新运行基线查询并替换下面的数值
+,t_dau2_base5 AS
+(
+    SELECT "01" AS suffix_group, 221923.1 AS dau2_base5
+    UNION ALL SELECT "2c", 223926.3
+    UNION ALL SELECT "34", 220940.6
+    UNION ALL SELECT "5d", 221669.4
+    UNION ALL SELECT "67", 217974.6
+    UNION ALL SELECT "89", 224279.4
+    UNION ALL SELECT "ab", 222393.9
+    UNION ALL SELECT "ef", 219735.4
+)
+-- 合并主表 + dau2 + dau_vs_5d
+,t_combined AS
+(
+    SELECT  a.*
+            ,b.dau2
+            ,ROUND(b.dau2 / NULLIF(c.dau2_base5, 0), 6) AS dau_vs_5d
+    FROM    t_metrics a
+    LEFT JOIN t_dau2 b
+    ON      a.dt = b.dt
+    AND     a.apptype = b.apptype
+    AND     a.abcode = b.abcode
+    AND     a.suffix_group = b.suffix_group
+    LEFT JOIN t_dau2_base5 c
+    ON      a.suffix_group = c.suffix_group
+)
+-- 基线桶(89)每日指标,作为横向对比基准
+,t_ctrl AS
+(
+    SELECT  dt
+            ,apptype
+            ,dau_vs_5d  AS ctrl_dau_vs_5d
+            ,exp        AS ctrl_exp
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one    AS ctrl_str_one
+            ,ros_one    AS ctrl_ros_one
+            ,str        AS ctrl_str
+            ,ros        AS ctrl_ros
+            ,vovh24     AS ctrl_vovh24
+            ,str_plus   AS ctrl_str_plus
+            ,ros_minus  AS ctrl_ros_minus
+            ,bn_rov     AS ctrl_bn_rov
+            ,c1_rov     AS ctrl_c1_rov
+            ,cn_rov     AS ctrl_cn_rov
+            ,d1_rov     AS ctrl_d1_rov
+            ,dn_rov     AS ctrl_dn_rov
+            ,total_rov  AS ctrl_total_rov
+            ,ecs        AS ctrl_ecs
+            ,ecs_ratio  AS ctrl_ecs_ratio
+            ,arp        AS ctrl_arp
+    FROM    t_combined
+    WHERE   suffix_group = '89'
+)
+-- 最终输出:原始指标 + 基线对比差值(后缀 _diff = 当前值 / 基线桶值 - 1)
+SELECT  r.dt
+        ,r.apptype
+        ,r.abcode
+        ,r.suffix_group
+        ,r.suffix
+        -- ── 原始指标 ──
+        ,r.exp_per_dau
+        ,r.str_one
+        ,r.ros_one
+        ,r.str
+        ,r.ros
+        ,r.str_plus
+        ,r.ros_minus
+        ,r.bn_rov
+        ,r.c1_rov
+        ,r.cn_rov
+        ,r.d1_rov
+        ,r.dn_rov
+        ,r.total_rov
+        ,r.vovh24
+        ,r.dau
+        ,r.exp
+        ,r.distinct_vid_cnt
+        ,r.ecs
+        ,r.ecs_ratio
+        ,r.gini
+        ,r.arp
+        ,r.is_share
+        ,r.share_cnt
+        ,r.is_return_1
+        ,r.return_n_uv
+        ,r.viewh24
+        ,r.return_n_uv_noself
+        ,r.cn
+        ,r.c1
+        ,r.dn
+        ,r.d1
+        ,r.dau2
+        -- ── DAU2 纵向对比(vs 历史 5 天均值) ──
+        ,r.dau_vs_5d
+        -- ── 横向对比:当日各桶 vs 基线桶(89),公式 = 当前值 / 基线值 - 1 ──
+        ,ROUND(r.dau_vs_5d / NULLIF(ctrl.ctrl_dau_vs_5d, 0) - 1, 6) AS dau_vs_5d_diff
+        ,ROUND(r.exp / NULLIF(ctrl.ctrl_exp, 0) - 1, 6) AS exp_diff
+        ,ROUND(r.exp_per_dau / NULLIF(ctrl.ctrl_exp_per_dau, 0) - 1, 6) AS exp_per_dau_diff
+        ,ROUND(r.str_one / NULLIF(ctrl.ctrl_str_one, 0) - 1, 6) AS str_one_diff
+        ,ROUND(r.ros_one / NULLIF(ctrl.ctrl_ros_one, 0) - 1, 6) AS ros_one_diff
+        ,ROUND(r.str / NULLIF(ctrl.ctrl_str, 0) - 1, 6) AS str_diff
+        ,ROUND(r.ros / NULLIF(ctrl.ctrl_ros, 0) - 1, 6) AS ros_diff
+        ,ROUND(r.vovh24 / NULLIF(ctrl.ctrl_vovh24, 0) - 1, 6) AS vovh24_diff
+        ,ROUND(r.str_plus / NULLIF(ctrl.ctrl_str_plus, 0) - 1, 6) AS str_plus_diff
+        ,ROUND(r.ros_minus / NULLIF(ctrl.ctrl_ros_minus, 0) - 1, 6) AS ros_minus_diff
+        ,ROUND(r.bn_rov / NULLIF(ctrl.ctrl_bn_rov, 0) - 1, 6) AS bn_rov_diff
+        ,ROUND(r.c1_rov / NULLIF(ctrl.ctrl_c1_rov, 0) - 1, 6) AS c1_rov_diff
+        ,ROUND(r.cn_rov / NULLIF(ctrl.ctrl_cn_rov, 0) - 1, 6) AS cn_rov_diff
+        ,ROUND(r.d1_rov / NULLIF(ctrl.ctrl_d1_rov, 0) - 1, 6) AS d1_rov_diff
+        ,ROUND(r.dn_rov / NULLIF(ctrl.ctrl_dn_rov, 0) - 1, 6) AS dn_rov_diff
+        ,ROUND(r.total_rov / NULLIF(ctrl.ctrl_total_rov, 0) - 1, 6) AS total_rov_diff
+        ,ROUND(r.ecs / NULLIF(ctrl.ctrl_ecs, 0) - 1, 6) AS ecs_diff
+        ,ROUND(r.ecs_ratio / NULLIF(ctrl.ctrl_ecs_ratio, 0) - 1, 6) AS ecs_ratio_diff
+        ,ROUND(r.arp / NULLIF(ctrl.ctrl_arp, 0) - 1, 6) AS arp_diff
+FROM    t_combined r
+LEFT JOIN t_ctrl ctrl
+ON      r.dt = ctrl.dt
+AND     r.apptype = ctrl.apptype
+ORDER BY r.dt DESC, r.apptype, r.abcode, r.suffix_group
+;

+ 629 - 0
tasks/00_尾号实验/base_v3_new_v3_hardcode.sql

@@ -0,0 +1,629 @@
+-- ════════════════════════════════════════════════════════════════════════════
+-- 两层尾号映射 (SCD Type 2 模式) + 基线对比(vs 89 桶 + vs 历史 5 天)
+--
+-- 第一层 t_suffix_group:物理尾号 → 分流桶 ID(16 个 hex 尾号分成 8 个 2-元桶)
+--   - 分流规则不变时,此层永不改
+--
+-- 第二层 t_experiment_map:分流桶 → 实验名 + 生效日期
+--   - 只列出"分配了具体实验"的桶,未列出的桶自动默认为"对照组"
+--   - 支持 1 对多:同一个实验占多个桶时,用同一 abcode 字符串多加几行
+--   - 实验切换:不删旧行,关闭 end_dt + 追加新行(保留历史可回溯)
+--
+-- ┌─ 基线配置 ──────────────────────────────────────────────────────────┐
+-- │  基线桶:89              (对照组物理桶,横向对比基准)              │
+-- │  基线天数:20260307~20260311(5 天均值,DAU2 纵向对比基准)         │
+-- │  新增列:dau_vs_5d / dau_vs_5d_diff / d_* 系列差值列              │
+-- └─────────────────────────────────────────────────────────────────────┘
+-- ════════════════════════════════════════════════════════════════════════════
+WITH t_suffix_group AS
+(
+    SELECT "a" AS suffix, "ab" AS suffix_group
+    UNION ALL SELECT "b", "ab"
+    UNION ALL SELECT "0", "01"
+    UNION ALL SELECT "1", "01"
+    UNION ALL SELECT "2", "2c"
+    UNION ALL SELECT "c", "2c"
+    UNION ALL SELECT "3", "34"
+    UNION ALL SELECT "4", "34"
+    UNION ALL SELECT "5", "5d"
+    UNION ALL SELECT "d", "5d"
+    UNION ALL SELECT "6", "67"
+    UNION ALL SELECT "7", "67"
+    UNION ALL SELECT "8", "89"
+    UNION ALL SELECT "9", "89"
+    UNION ALL SELECT "e", "ef"
+    UNION ALL SELECT "f", "ef"
+)
+-- 当前实验映射
+--   未列出的桶(89 / 2c)→ 自动默认为"对照组"
+--   同一个 suffix_group 可以有多行(SCD Type 2),但同一时间只能命中一行
+,t_experiment_map AS
+(
+    -- 前基线(ab 桶,从未变动)
+    SELECT "ab" AS suffix_group, "实验组:变更str*ros建模目标实验" AS abcode, "20260413" AS start_dt, "29991231" AS end_dt
+
+    -- 变更str*ros建模目标实验(分阶段扩量,当前占用 4 个桶;1 对多)
+    --   20260320: 首批上 01 桶
+    --   20260330: 扩到 67 桶(此时 67 桶的 bn_ros 实验已下线 10 天 ⚠️)
+    --   20260407: 同日扩到 5d 桶(5d 的解构str 实验下线)和 34 桶(34 此前是默认对照组)
+    UNION ALL SELECT "01", "实验组:变更str*ros建模目标实验", "20260320", "29991231"
+    UNION ALL SELECT "67", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
+    UNION ALL SELECT "5d", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+    UNION ALL SELECT "34", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+
+    -- 67 桶的前实验:bn_ros 新损失函数
+    --   20260320~20260329 为空窗期(10 天),此间 67 → 默认"对照组"
+    UNION ALL SELECT "67", "实验组:bn_ros新损失函数",    "20260311", "20260319"
+
+    -- 5d 桶的前实验:解构特征排序 str 模型
+    --   20260407 直接被建模目标实验接手,无空窗
+    UNION ALL SELECT "5d", "实验组:解构特征排序str模型", "20260314", "20260406"
+
+    -- ef 桶的历史:解构str&召回(已下线)→ 空窗 17 天 → DNN 模型(仍在运行)
+    --   20260321~20260406 为空窗期,此间 ef → 默认"对照组"
+    UNION ALL SELECT "ef", "实验组:解构特征排序str模型&召回", "20260314", "20260320"
+    UNION ALL SELECT "ef", "实验组:DNN模型",                   "20260407", "29991231"
+    UNION ALL SELECT "2c", "实验组:DNN模型-调参", "20260413", "29991231"
+
+    UNION ALL SELECT "89", "对照组", "20260301", "20260412"
+    UNION ALL SELECT "89", "实验组:变更str*ros建模目标实验", "20260413", "29991231"
+
+
+    -- ────────────────────────────────────────────────────────────────────
+    -- 📖 修改样例(复制下面的行到上面 UNION ALL 列表里使用)
+    --
+    -- 样例 A:新增一个占用单桶的实验
+    --   UNION ALL SELECT "2c", "实验组:新策略 X", "20260501", "29991231"
+    --
+    -- 样例 B:新增一个 1 对多 实验(同一实验占 01 + 34 两个桶)
+    --   用同一 abcode 字符串加两行即可,下游 GROUP BY 自动合并:
+    --   UNION ALL SELECT "01", "实验组:大流量 Y", "20260601", "29991231"
+    --   UNION ALL SELECT "34", "实验组:大流量 Y", "20260601", "29991231"
+    --
+    -- 样例 C:实验切换(SCD Type 2 —— 保留历史)
+    --   假设 01 桶 20260701 从 实验 A 切换到 实验 B:
+    --   Step 1: 把原来那行 end_dt 改成切换前一天:
+    --     SELECT "01", "实验组:A", "20260320", "20260630"
+    --   Step 2: 追加新实验行:
+    --     UNION ALL SELECT "01", "实验组:B", "20260701", "29991231"
+    --
+    -- 样例 D:实验下线回到对照组(产生空窗)
+    --   直接把该行的 end_dt 改成下线前一天即可(不用追加行):
+    --     SELECT "5d", "实验组:A", "20250101", "20260630"
+    --   20260701 之后 5d 桶没有任何有效行覆盖,自动进入"对照组"
+    --   ⚠️ 如果这是有意的空窗,没问题;如果只是忘了接新实验,后续记得补
+    -- ────────────────────────────────────────────────────────────────────
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,sg.suffix_group
+            ,COALESCE(m.abcode,"对照组") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,a.mid
+                        ,a.vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        ,cc.cn
+                        ,cc.c1
+                        ,dd.dn
+                        ,dd.d1
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108 a
+                LEFT JOIN   (
+                                -- c1/cn:分享后被点击的回流 UV
+                                SELECT  a.machinecode AS mid
+                                        ,a.subsessionid
+                                        ,a.videoid AS vid
+                                        ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
+                                        ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS c1
+                                FROM    (
+                                            SELECT  DISTINCT machinecode
+                                                    ,shareobjectid AS videoid
+                                                    ,recomTraceId
+                                                    ,subsessionid
+                                                    ,sharedepth
+                                                    ,shareid
+                                            FROM    loghubods.user_share_log
+                                            WHERE   dt = '${dt}'
+                                            AND     topic = 'share'
+                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                        ) a
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b
+                                ON      a.shareid = b.rootshareid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,shareobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'share'
+                                                AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                            ) b1
+                                ON      b.machinecode = b1.machinecode
+                                AND     b.subsessionid = b1.subsessionid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b2
+                                ON      b1.shareid = b2.rootshareid
+                                GROUP BY a.machinecode
+                                         ,a.subsessionid
+                                         ,a.videoid
+                            ) cc
+                ON      a.mid = cc.mid
+                AND     a.subsessionid = cc.subsessionid
+                AND     a.vid = cc.vid
+                LEFT JOIN   (
+                                -- d1/dn:下一条视频带来的回流
+                                SELECT  *
+                                        ,LAG(回流,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS dn
+                                        ,LAG(回流1,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS d1
+                                FROM    (
+                                            SELECT  a.mid AS mid
+                                                    ,a.subsessionid
+                                                    ,a.videoid AS vid
+                                                    ,COUNT(DISTINCT b.shareid) AS 分享次数
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END) AS 回流
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END) AS 回流1
+                                                    ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp ASC) AS rn
+                                            FROM    (
+                                                        SELECT  *
+                                                        FROM    (
+                                                                    SELECT  DISTINCT mid
+                                                                            ,subsessionid
+                                                                            ,videoid
+                                                                            ,logtimestamp
+                                                                            ,ROW_NUMBER() OVER (PARTITION BY mid,subsessionid,videoid ORDER BY logtimestamp ASC) AS rn
+                                                                    FROM    loghubods.video_action_log_rp
+                                                                    WHERE   dt = '${dt}'
+                                                                    AND     businesstype = 'videoView'
+                                                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                                )
+                                                        WHERE   rn = 1
+                                                    ) a
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,shareobjectid AS videoid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,shareid
+                                                                    ,clienttimestamp
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'share'
+                                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                        ) b
+                                            ON      a.mid = b.machinecode
+                                            AND     a.subsessionid = b.subsessionid
+                                            AND     a.videoid = b.videoid
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,clickobjectid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,rootshareid
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'click'
+                                                        ) c
+                                            ON      b.shareid = c.rootshareid
+                                            GROUP BY a.mid
+                                                     ,a.subsessionid
+                                                     ,a.videoid
+                                                     ,a.logtimestamp
+                                        )
+                            ) dd
+                ON      a.mid = dd.mid
+                AND     a.subsessionid = dd.subsessionid
+                AND     a.vid = dd.vid
+                WHERE   dt="${dt}"
+                AND     apptype IN ("4")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    -- INNER JOIN: 合法尾号(在 16 个 hex 里)才进分析;防御异常数据
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    -- LEFT JOIN: 可无实验匹配,此时 m.abcode 为 NULL → COALESCE 为"对照组"
+    LEFT JOIN t_experiment_map m
+    ON      sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+)
+-- 桶内每个 vid 的曝光数(ECS / ARP 的共同中间件,避免重复扫 t_base)
+,t_vid_exp AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,vid
+            ,COUNT(1) AS vid_exp_cnt
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+             ,vid
+)
+-- 桶内 ECS (Effective Catalog Size):曝光实际"相当于推了多少条视频"
+-- ECS = 2 * Σ(p_i * rank_i) - 1
+--   p_i    = vid 在桶内曝光占比
+--   rank_i = 按曝光降序的排名(1 起)
+-- 值域 [1, distinct_vid_cnt],越大越分散,越小越头部集中
+,t_bucket_ecs AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,2 * SUM(p * rn) - 1 AS ecs
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,abcode
+                        ,suffix
+                        ,vid_exp_cnt / SUM(vid_exp_cnt) OVER (
+                            PARTITION BY dt, apptype, abcode, suffix
+                        ) AS p
+                        ,ROW_NUMBER() OVER (
+                            PARTITION BY dt, apptype, abcode, suffix
+                            ORDER BY vid_exp_cnt DESC
+                        ) AS rn
+                FROM    t_vid_exp
+            ) t
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+)
+-- 全平台每个 vid 的曝光度(作为 ARP 的 popularity reference)
+-- 注意:不过滤 abcode,让 reference 覆盖全部合法尾号
+,t_vid_global_pop AS
+(
+    SELECT  dt
+            ,apptype
+            ,vid
+            ,COUNT(1) AS vid_global_pop
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,vid
+)
+-- 桶内 ARP (Average Recommendation Popularity):推荐视频的平均热门度
+-- 按桶内曝光量加权:曝光越多的 vid 对 ARP 影响越大
+-- 组合 ECS + ARP 可识别四象限:
+--   高ECS + 低ARP = 分散 + 偏长尾        ✅ 理想
+--   高ECS + 高ARP = 分散 + 头部内部多样化  ⚠️ 需警惕
+--   低ECS + 低ARP = 集中 + 冷门(小众爆发) ❓ 特殊
+--   低ECS + 高ARP = 集中 + 头部            ❌ 模型坍缩
+,t_bucket_arp AS
+(
+    SELECT  v.dt
+            ,v.apptype
+            ,v.abcode
+            ,v.suffix
+            ,SUM(v.vid_exp_cnt * g.vid_global_pop) / SUM(v.vid_exp_cnt) AS arp
+    FROM    t_vid_exp v
+    LEFT JOIN t_vid_global_pop g
+    ON      v.dt = g.dt
+    AND     v.apptype = g.apptype
+    AND     v.vid = g.vid
+    GROUP BY v.dt
+             ,v.apptype
+             ,v.abcode
+             ,v.suffix
+)
+-- dau2:按单尾号聚合
+,t_dau2_bucket AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"对照组") AS abcode
+            ,sg.suffix_group
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                -- FROM    loghubods.useractive_log_per5min
+                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("4")
+            ) sub
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    LEFT JOIN t_experiment_map m
+    ON      sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"对照组")
+             ,sg.suffix_group
+             ,sub.suffix
+)
+-- dau2:按 suffix_group 求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,AVG(dau2) AS dau2
+    FROM    t_dau2_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix_group
+)
+-- 按单尾号聚合(尾号内 UV 去重)
+,t_bucket AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,suffix
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
+            ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
+            ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
+            ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS bn_rov
+            ,COALESCE(SUM(c1) / COUNT(1),0) AS c1_rov
+            ,COALESCE(SUM(cn) / COUNT(1),0) AS cn_rov
+            ,COALESCE(SUM(d1) / COUNT(1),0) AS d1_rov
+            ,COALESCE(SUM(dn) / COUNT(1),0) AS dn_rov
+            -- [NEW] 合并 ROV = bn_rov + cn_rov + dn_rov(三者分母同为 COUNT(1),可合并)
+            ,COALESCE((SUM(return_n_uv) + SUM(cn) + SUM(dn)) / COUNT(1),0) AS total_rov
+            ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            -- [NEW] 桶内去重 vid 数(ECS 的天然配套)
+            ,COUNT(DISTINCT vid) AS distinct_vid_cnt
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            ,COALESCE(SUM(cn),0) AS cn
+            ,COALESCE(SUM(c1),0) AS c1
+            ,COALESCE(SUM(dn),0) AS dn
+            ,COALESCE(SUM(d1),0) AS d1
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix_group
+             ,suffix
+)
+-- 按实验组求尾号均值(新增:合并 ROV + 分发多样性三件套)
+,t_metrics AS
+(
+    SELECT  b.dt
+            ,b.apptype
+            ,b.abcode
+            ,b.suffix_group
+            ,ROUND(AVG(b.exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(b.str_one),6) AS str_one
+            ,ROUND(AVG(b.ros_one),6) AS ros_one
+            ,ROUND(AVG(b.str),6) AS str
+            ,ROUND(AVG(b.ros),6) AS ros
+            ,ROUND(AVG(b.str_plus),6) AS str_plus
+            ,ROUND(AVG(b.ros_minus),6) AS ros_minus
+            ,ROUND(AVG(b.bn_rov),6) AS bn_rov
+            ,ROUND(AVG(b.c1_rov),6) AS c1_rov
+            ,ROUND(AVG(b.cn_rov),6) AS cn_rov
+            ,ROUND(AVG(b.d1_rov),6) AS d1_rov
+            ,ROUND(AVG(b.dn_rov),6) AS dn_rov
+            ,ROUND(AVG(b.total_rov),6) AS total_rov
+            ,ROUND(AVG(b.vovh24),6) AS vovh24
+            ,AVG(b.dau) AS dau
+            ,AVG(b.exp) AS exp
+            ,ROUND(AVG(b.distinct_vid_cnt),0) AS distinct_vid_cnt
+            ,ROUND(AVG(e.ecs),1) AS ecs
+            -- ECS 归一化比值:去掉池子大小的影响,纯形态指标
+            ,ROUND(AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS ecs_ratio
+            -- Gini 系数:快手/Twitter/Netflix 业界标准,数学上 Gini = 1 - ecs_ratio
+            ,ROUND(1 - AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS gini
+            ,ROUND(AVG(a.arp),0) AS arp
+            ,AVG(b.is_share) AS is_share
+            ,AVG(b.share_cnt) AS share_cnt
+            ,AVG(b.is_return_1) AS is_return_1
+            ,AVG(b.return_n_uv) AS return_n_uv
+            ,AVG(b.viewh24) AS viewh24
+            ,AVG(b.return_n_uv_noself) AS return_n_uv_noself
+            ,AVG(b.cn) AS cn
+            ,AVG(b.c1) AS c1
+            ,AVG(b.dn) AS dn
+            ,AVG(b.d1) AS d1
+            ,WM_CONCAT(DISTINCT ',',b.suffix) AS suffix
+    FROM    t_bucket b
+    LEFT JOIN t_bucket_ecs e
+    ON      b.dt = e.dt
+    AND     b.apptype = e.apptype
+    AND     b.abcode = e.abcode
+    AND     b.suffix = e.suffix
+    LEFT JOIN t_bucket_arp a
+    ON      b.dt = a.dt
+    AND     b.apptype = a.apptype
+    AND     b.abcode = a.abcode
+    AND     b.suffix = a.suffix
+    GROUP BY b.dt
+             ,b.apptype
+             ,b.abcode
+             ,b.suffix_group
+)
+-- ════════════════════════════════════════════════════════════════════════════
+-- 基线对比层:DAU2 历史 5 天均值 + 基线桶(89)横向对比
+-- ════════════════════════════════════════════════════════════════════════════
+-- 基线 5 天 DAU2 均值(硬编码,基线期 20260307~20260311,apptype=4)
+-- 预计算结果,避免每次查询重复扫描 useractive_log
+-- 如需更换基线期,重新运行基线查询并替换下面的数值
+,t_dau2_base5 AS
+(
+    SELECT "01" AS suffix_group, 221923.1 AS dau2_base5
+    UNION ALL SELECT "2c", 223926.3
+    UNION ALL SELECT "34", 220940.6
+    UNION ALL SELECT "5d", 221669.4
+    UNION ALL SELECT "67", 217974.6
+    UNION ALL SELECT "89", 224279.4
+    UNION ALL SELECT "ab", 222393.9
+    UNION ALL SELECT "ef", 219735.4
+)
+-- 合并主表 + dau2 + dau_vs_5d
+,t_combined AS
+(
+    SELECT  a.*
+            ,b.dau2
+            ,ROUND(b.dau2 / NULLIF(c.dau2_base5, 0), 6) AS dau_vs_5d
+    FROM    t_metrics a
+    LEFT JOIN t_dau2 b
+    ON      a.dt = b.dt
+    AND     a.apptype = b.apptype
+    AND     a.abcode = b.abcode
+    AND     a.suffix_group = b.suffix_group
+    LEFT JOIN t_dau2_base5 c
+    ON      a.suffix_group = c.suffix_group
+)
+-- 基线桶(89)每日指标,作为横向对比基准
+,t_ctrl AS
+(
+    SELECT  dt
+            ,apptype
+            ,dau_vs_5d  AS ctrl_dau_vs_5d
+            ,exp        AS ctrl_exp
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one    AS ctrl_str_one
+            ,ros_one    AS ctrl_ros_one
+            ,str        AS ctrl_str
+            ,ros        AS ctrl_ros
+            ,vovh24     AS ctrl_vovh24
+            ,str_plus   AS ctrl_str_plus
+            ,ros_minus  AS ctrl_ros_minus
+            ,bn_rov     AS ctrl_bn_rov
+            ,c1_rov     AS ctrl_c1_rov
+            ,cn_rov     AS ctrl_cn_rov
+            ,d1_rov     AS ctrl_d1_rov
+            ,dn_rov     AS ctrl_dn_rov
+            ,total_rov  AS ctrl_total_rov
+            ,ecs        AS ctrl_ecs
+            ,ecs_ratio  AS ctrl_ecs_ratio
+            ,arp        AS ctrl_arp
+    FROM    t_combined
+    WHERE   suffix_group = '89'
+)
+-- 最终输出:原始指标 + 基线对比差值(后缀 _diff = 当前值 / 基线桶值 - 1)
+SELECT  r.dt
+        ,r.apptype
+        ,r.abcode
+        ,r.suffix_group
+        ,r.suffix
+        -- ── 原始指标 ──
+        ,r.exp_per_dau
+        ,r.str_one
+        ,r.ros_one
+        ,r.str
+        ,r.ros
+        ,r.str_plus
+        ,r.ros_minus
+        ,r.bn_rov
+        ,r.c1_rov
+        ,r.cn_rov
+        ,r.d1_rov
+        ,r.dn_rov
+        ,r.total_rov
+        ,r.vovh24
+        ,r.dau
+        ,r.exp
+        ,r.distinct_vid_cnt
+        ,r.ecs
+        ,r.ecs_ratio
+        ,r.gini
+        ,r.arp
+        ,r.is_share
+        ,r.share_cnt
+        ,r.is_return_1
+        ,r.return_n_uv
+        ,r.viewh24
+        ,r.return_n_uv_noself
+        ,r.cn
+        ,r.c1
+        ,r.dn
+        ,r.d1
+        ,r.dau2
+        -- ── DAU2 纵向对比(vs 历史 5 天均值) ──
+        ,r.dau_vs_5d
+        -- ── 横向对比:当日各桶 vs 基线桶(89),公式 = 当前值 / 基线值 - 1 ──
+        ,ROUND(r.dau_vs_5d / NULLIF(ctrl.ctrl_dau_vs_5d, 0) - 1, 6) AS dau_vs_5d_diff
+        ,ROUND(r.exp / NULLIF(ctrl.ctrl_exp, 0) - 1, 6) AS exp_diff
+        ,ROUND(r.exp_per_dau / NULLIF(ctrl.ctrl_exp_per_dau, 0) - 1, 6) AS exp_per_dau_diff
+        ,ROUND(r.str_one / NULLIF(ctrl.ctrl_str_one, 0) - 1, 6) AS str_one_diff
+        ,ROUND(r.ros_one / NULLIF(ctrl.ctrl_ros_one, 0) - 1, 6) AS ros_one_diff
+        ,ROUND(r.str / NULLIF(ctrl.ctrl_str, 0) - 1, 6) AS str_diff
+        ,ROUND(r.ros / NULLIF(ctrl.ctrl_ros, 0) - 1, 6) AS ros_diff
+        ,ROUND(r.vovh24 / NULLIF(ctrl.ctrl_vovh24, 0) - 1, 6) AS vovh24_diff
+        ,ROUND(r.str_plus / NULLIF(ctrl.ctrl_str_plus, 0) - 1, 6) AS str_plus_diff
+        ,ROUND(r.ros_minus / NULLIF(ctrl.ctrl_ros_minus, 0) - 1, 6) AS ros_minus_diff
+        ,ROUND(r.bn_rov / NULLIF(ctrl.ctrl_bn_rov, 0) - 1, 6) AS bn_rov_diff
+        ,ROUND(r.c1_rov / NULLIF(ctrl.ctrl_c1_rov, 0) - 1, 6) AS c1_rov_diff
+        ,ROUND(r.cn_rov / NULLIF(ctrl.ctrl_cn_rov, 0) - 1, 6) AS cn_rov_diff
+        ,ROUND(r.d1_rov / NULLIF(ctrl.ctrl_d1_rov, 0) - 1, 6) AS d1_rov_diff
+        ,ROUND(r.dn_rov / NULLIF(ctrl.ctrl_dn_rov, 0) - 1, 6) AS dn_rov_diff
+        ,ROUND(r.total_rov / NULLIF(ctrl.ctrl_total_rov, 0) - 1, 6) AS total_rov_diff
+        ,ROUND(r.ecs / NULLIF(ctrl.ctrl_ecs, 0) - 1, 6) AS ecs_diff
+        ,ROUND(r.ecs_ratio / NULLIF(ctrl.ctrl_ecs_ratio, 0) - 1, 6) AS ecs_ratio_diff
+        ,ROUND(r.arp / NULLIF(ctrl.ctrl_arp, 0) - 1, 6) AS arp_diff
+FROM    t_combined r
+LEFT JOIN t_ctrl ctrl
+ON      r.dt = ctrl.dt
+AND     r.apptype = ctrl.apptype
+ORDER BY r.dt DESC, r.apptype, r.abcode, r.suffix_group
+;

+ 647 - 0
tasks/00_尾号实验/base_v3_new_v3_scan.sql

@@ -0,0 +1,647 @@
+-- ════════════════════════════════════════════════════════════════════════════
+-- 两层尾号映射 (SCD Type 2 模式) + 基线对比(vs 89 桶 + vs 历史 5 天)
+--
+-- 第一层 t_suffix_group:物理尾号 → 分流桶 ID(16 个 hex 尾号分成 8 个 2-元桶)
+--   - 分流规则不变时,此层永不改
+--
+-- 第二层 t_experiment_map:分流桶 → 实验名 + 生效日期
+--   - 只列出"分配了具体实验"的桶,未列出的桶自动默认为"对照组"
+--   - 支持 1 对多:同一个实验占多个桶时,用同一 abcode 字符串多加几行
+--   - 实验切换:不删旧行,关闭 end_dt + 追加新行(保留历史可回溯)
+--
+-- ┌─ 基线配置 ──────────────────────────────────────────────────────────┐
+-- │  基线桶:89              (对照组物理桶,横向对比基准)              │
+-- │  基线天数:20260307~20260311(5 天均值,DAU2 纵向对比基准)         │
+-- │  新增列:dau_vs_5d / dau_vs_5d_diff / d_* 系列差值列              │
+-- └─────────────────────────────────────────────────────────────────────┘
+-- ════════════════════════════════════════════════════════════════════════════
+WITH t_suffix_group AS
+(
+    SELECT "a" AS suffix, "ab" AS suffix_group
+    UNION ALL SELECT "b", "ab"
+    UNION ALL SELECT "0", "01"
+    UNION ALL SELECT "1", "01"
+    UNION ALL SELECT "2", "2c"
+    UNION ALL SELECT "c", "2c"
+    UNION ALL SELECT "3", "34"
+    UNION ALL SELECT "4", "34"
+    UNION ALL SELECT "5", "5d"
+    UNION ALL SELECT "d", "5d"
+    UNION ALL SELECT "6", "67"
+    UNION ALL SELECT "7", "67"
+    UNION ALL SELECT "8", "89"
+    UNION ALL SELECT "9", "89"
+    UNION ALL SELECT "e", "ef"
+    UNION ALL SELECT "f", "ef"
+)
+-- 当前实验映射
+--   未列出的桶(89 / 2c)→ 自动默认为"对照组"
+--   同一个 suffix_group 可以有多行(SCD Type 2),但同一时间只能命中一行
+,t_experiment_map AS
+(
+    -- 前基线(ab 桶,从未变动)
+    SELECT "ab" AS suffix_group, "实验组:变更str*ros建模目标实验" AS abcode, "20260413" AS start_dt, "29991231" AS end_dt
+
+    -- 变更str*ros建模目标实验(分阶段扩量,当前占用 4 个桶;1 对多)
+    --   20260320: 首批上 01 桶
+    --   20260330: 扩到 67 桶(此时 67 桶的 bn_ros 实验已下线 10 天 ⚠️)
+    --   20260407: 同日扩到 5d 桶(5d 的解构str 实验下线)和 34 桶(34 此前是默认对照组)
+    UNION ALL SELECT "01", "实验组:变更str*ros建模目标实验", "20260320", "29991231"
+    UNION ALL SELECT "67", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
+    UNION ALL SELECT "5d", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+    UNION ALL SELECT "34", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+
+    -- 67 桶的前实验:bn_ros 新损失函数
+    --   20260320~20260329 为空窗期(10 天),此间 67 → 默认"对照组"
+    UNION ALL SELECT "67", "实验组:bn_ros新损失函数",    "20260311", "20260319"
+
+    -- 5d 桶的前实验:解构特征排序 str 模型
+    --   20260407 直接被建模目标实验接手,无空窗
+    UNION ALL SELECT "5d", "实验组:解构特征排序str模型", "20260314", "20260406"
+
+    -- ef 桶的历史:解构str&召回(已下线)→ 空窗 17 天 → DNN 模型(仍在运行)
+    --   20260321~20260406 为空窗期,此间 ef → 默认"对照组"
+    UNION ALL SELECT "ef", "实验组:解构特征排序str模型&召回", "20260314", "20260320"
+    UNION ALL SELECT "ef", "实验组:DNN模型",                   "20260407", "29991231"
+    UNION ALL SELECT "2c", "实验组:DNN模型-调参", "20260413", "29991231"
+
+    UNION ALL SELECT "89", "对照组", "20260301", "20260412"
+    UNION ALL SELECT "89", "实验组:变更str*ros建模目标实验", "20260413", "29991231"
+
+
+    -- ────────────────────────────────────────────────────────────────────
+    -- 📖 修改样例(复制下面的行到上面 UNION ALL 列表里使用)
+    --
+    -- 样例 A:新增一个占用单桶的实验
+    --   UNION ALL SELECT "2c", "实验组:新策略 X", "20260501", "29991231"
+    --
+    -- 样例 B:新增一个 1 对多 实验(同一实验占 01 + 34 两个桶)
+    --   用同一 abcode 字符串加两行即可,下游 GROUP BY 自动合并:
+    --   UNION ALL SELECT "01", "实验组:大流量 Y", "20260601", "29991231"
+    --   UNION ALL SELECT "34", "实验组:大流量 Y", "20260601", "29991231"
+    --
+    -- 样例 C:实验切换(SCD Type 2 —— 保留历史)
+    --   假设 01 桶 20260701 从 实验 A 切换到 实验 B:
+    --   Step 1: 把原来那行 end_dt 改成切换前一天:
+    --     SELECT "01", "实验组:A", "20260320", "20260630"
+    --   Step 2: 追加新实验行:
+    --     UNION ALL SELECT "01", "实验组:B", "20260701", "29991231"
+    --
+    -- 样例 D:实验下线回到对照组(产生空窗)
+    --   直接把该行的 end_dt 改成下线前一天即可(不用追加行):
+    --     SELECT "5d", "实验组:A", "20250101", "20260630"
+    --   20260701 之后 5d 桶没有任何有效行覆盖,自动进入"对照组"
+    --   ⚠️ 如果这是有意的空窗,没问题;如果只是忘了接新实验,后续记得补
+    -- ────────────────────────────────────────────────────────────────────
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,sg.suffix_group
+            ,COALESCE(m.abcode,"对照组") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,a.mid
+                        ,a.vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        ,cc.cn
+                        ,cc.c1
+                        ,dd.dn
+                        ,dd.d1
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108 a
+                LEFT JOIN   (
+                                -- c1/cn:分享后被点击的回流 UV
+                                SELECT  a.machinecode AS mid
+                                        ,a.subsessionid
+                                        ,a.videoid AS vid
+                                        ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
+                                        ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS c1
+                                FROM    (
+                                            SELECT  DISTINCT machinecode
+                                                    ,shareobjectid AS videoid
+                                                    ,recomTraceId
+                                                    ,subsessionid
+                                                    ,sharedepth
+                                                    ,shareid
+                                            FROM    loghubods.user_share_log
+                                            WHERE   dt = '${dt}'
+                                            AND     topic = 'share'
+                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                        ) a
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b
+                                ON      a.shareid = b.rootshareid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,shareobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'share'
+                                                AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                            ) b1
+                                ON      b.machinecode = b1.machinecode
+                                AND     b.subsessionid = b1.subsessionid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b2
+                                ON      b1.shareid = b2.rootshareid
+                                GROUP BY a.machinecode
+                                         ,a.subsessionid
+                                         ,a.videoid
+                            ) cc
+                ON      a.mid = cc.mid
+                AND     a.subsessionid = cc.subsessionid
+                AND     a.vid = cc.vid
+                LEFT JOIN   (
+                                -- d1/dn:下一条视频带来的回流
+                                SELECT  *
+                                        ,LAG(回流,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS dn
+                                        ,LAG(回流1,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS d1
+                                FROM    (
+                                            SELECT  a.mid AS mid
+                                                    ,a.subsessionid
+                                                    ,a.videoid AS vid
+                                                    ,COUNT(DISTINCT b.shareid) AS 分享次数
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END) AS 回流
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END) AS 回流1
+                                                    ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp ASC) AS rn
+                                            FROM    (
+                                                        SELECT  *
+                                                        FROM    (
+                                                                    SELECT  DISTINCT mid
+                                                                            ,subsessionid
+                                                                            ,videoid
+                                                                            ,logtimestamp
+                                                                            ,ROW_NUMBER() OVER (PARTITION BY mid,subsessionid,videoid ORDER BY logtimestamp ASC) AS rn
+                                                                    FROM    loghubods.video_action_log_rp
+                                                                    WHERE   dt = '${dt}'
+                                                                    AND     businesstype = 'videoView'
+                                                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                                )
+                                                        WHERE   rn = 1
+                                                    ) a
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,shareobjectid AS videoid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,shareid
+                                                                    ,clienttimestamp
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'share'
+                                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                        ) b
+                                            ON      a.mid = b.machinecode
+                                            AND     a.subsessionid = b.subsessionid
+                                            AND     a.videoid = b.videoid
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,clickobjectid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,rootshareid
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'click'
+                                                        ) c
+                                            ON      b.shareid = c.rootshareid
+                                            GROUP BY a.mid
+                                                     ,a.subsessionid
+                                                     ,a.videoid
+                                                     ,a.logtimestamp
+                                        )
+                            ) dd
+                ON      a.mid = dd.mid
+                AND     a.subsessionid = dd.subsessionid
+                AND     a.vid = dd.vid
+                WHERE   dt="${dt}"
+                AND     apptype IN ("4")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    -- INNER JOIN: 合法尾号(在 16 个 hex 里)才进分析;防御异常数据
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    -- LEFT JOIN: 可无实验匹配,此时 m.abcode 为 NULL → COALESCE 为"对照组"
+    LEFT JOIN t_experiment_map m
+    ON      sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+)
+-- 桶内每个 vid 的曝光数(ECS / ARP 的共同中间件,避免重复扫 t_base)
+,t_vid_exp AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,vid
+            ,COUNT(1) AS vid_exp_cnt
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+             ,vid
+)
+-- 桶内 ECS (Effective Catalog Size):曝光实际"相当于推了多少条视频"
+-- ECS = 2 * Σ(p_i * rank_i) - 1
+--   p_i    = vid 在桶内曝光占比
+--   rank_i = 按曝光降序的排名(1 起)
+-- 值域 [1, distinct_vid_cnt],越大越分散,越小越头部集中
+,t_bucket_ecs AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,2 * SUM(p * rn) - 1 AS ecs
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,abcode
+                        ,suffix
+                        ,vid_exp_cnt / SUM(vid_exp_cnt) OVER (
+                            PARTITION BY dt, apptype, abcode, suffix
+                        ) AS p
+                        ,ROW_NUMBER() OVER (
+                            PARTITION BY dt, apptype, abcode, suffix
+                            ORDER BY vid_exp_cnt DESC
+                        ) AS rn
+                FROM    t_vid_exp
+            ) t
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+)
+-- 全平台每个 vid 的曝光度(作为 ARP 的 popularity reference)
+-- 注意:不过滤 abcode,让 reference 覆盖全部合法尾号
+,t_vid_global_pop AS
+(
+    SELECT  dt
+            ,apptype
+            ,vid
+            ,COUNT(1) AS vid_global_pop
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,vid
+)
+-- 桶内 ARP (Average Recommendation Popularity):推荐视频的平均热门度
+-- 按桶内曝光量加权:曝光越多的 vid 对 ARP 影响越大
+-- 组合 ECS + ARP 可识别四象限:
+--   高ECS + 低ARP = 分散 + 偏长尾        ✅ 理想
+--   高ECS + 高ARP = 分散 + 头部内部多样化  ⚠️ 需警惕
+--   低ECS + 低ARP = 集中 + 冷门(小众爆发) ❓ 特殊
+--   低ECS + 高ARP = 集中 + 头部            ❌ 模型坍缩
+,t_bucket_arp AS
+(
+    SELECT  v.dt
+            ,v.apptype
+            ,v.abcode
+            ,v.suffix
+            ,SUM(v.vid_exp_cnt * g.vid_global_pop) / SUM(v.vid_exp_cnt) AS arp
+    FROM    t_vid_exp v
+    LEFT JOIN t_vid_global_pop g
+    ON      v.dt = g.dt
+    AND     v.apptype = g.apptype
+    AND     v.vid = g.vid
+    GROUP BY v.dt
+             ,v.apptype
+             ,v.abcode
+             ,v.suffix
+)
+-- dau2:按单尾号聚合
+,t_dau2_bucket AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"对照组") AS abcode
+            ,sg.suffix_group
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                -- FROM    loghubods.useractive_log_per5min
+                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("4")
+            ) sub
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    LEFT JOIN t_experiment_map m
+    ON      sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"对照组")
+             ,sg.suffix_group
+             ,sub.suffix
+)
+-- dau2:按 suffix_group 求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,AVG(dau2) AS dau2
+    FROM    t_dau2_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix_group
+)
+-- 按单尾号聚合(尾号内 UV 去重)
+,t_bucket AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,suffix
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
+            ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
+            ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
+            ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS bn_rov
+            ,COALESCE(SUM(c1) / COUNT(1),0) AS c1_rov
+            ,COALESCE(SUM(cn) / COUNT(1),0) AS cn_rov
+            ,COALESCE(SUM(d1) / COUNT(1),0) AS d1_rov
+            ,COALESCE(SUM(dn) / COUNT(1),0) AS dn_rov
+            -- [NEW] 合并 ROV = bn_rov + cn_rov + dn_rov(三者分母同为 COUNT(1),可合并)
+            ,COALESCE((SUM(return_n_uv) + SUM(cn) + SUM(dn)) / COUNT(1),0) AS total_rov
+            ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            -- [NEW] 桶内去重 vid 数(ECS 的天然配套)
+            ,COUNT(DISTINCT vid) AS distinct_vid_cnt
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            ,COALESCE(SUM(cn),0) AS cn
+            ,COALESCE(SUM(c1),0) AS c1
+            ,COALESCE(SUM(dn),0) AS dn
+            ,COALESCE(SUM(d1),0) AS d1
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix_group
+             ,suffix
+)
+-- 按实验组求尾号均值(新增:合并 ROV + 分发多样性三件套)
+,t_metrics AS
+(
+    SELECT  b.dt
+            ,b.apptype
+            ,b.abcode
+            ,b.suffix_group
+            ,ROUND(AVG(b.exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(b.str_one),6) AS str_one
+            ,ROUND(AVG(b.ros_one),6) AS ros_one
+            ,ROUND(AVG(b.str),6) AS str
+            ,ROUND(AVG(b.ros),6) AS ros
+            ,ROUND(AVG(b.str_plus),6) AS str_plus
+            ,ROUND(AVG(b.ros_minus),6) AS ros_minus
+            ,ROUND(AVG(b.bn_rov),6) AS bn_rov
+            ,ROUND(AVG(b.c1_rov),6) AS c1_rov
+            ,ROUND(AVG(b.cn_rov),6) AS cn_rov
+            ,ROUND(AVG(b.d1_rov),6) AS d1_rov
+            ,ROUND(AVG(b.dn_rov),6) AS dn_rov
+            ,ROUND(AVG(b.total_rov),6) AS total_rov
+            ,ROUND(AVG(b.vovh24),6) AS vovh24
+            ,AVG(b.dau) AS dau
+            ,AVG(b.exp) AS exp
+            ,ROUND(AVG(b.distinct_vid_cnt),0) AS distinct_vid_cnt
+            ,ROUND(AVG(e.ecs),1) AS ecs
+            -- ECS 归一化比值:去掉池子大小的影响,纯形态指标
+            ,ROUND(AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS ecs_ratio
+            -- Gini 系数:快手/Twitter/Netflix 业界标准,数学上 Gini = 1 - ecs_ratio
+            ,ROUND(1 - AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS gini
+            ,ROUND(AVG(a.arp),0) AS arp
+            ,AVG(b.is_share) AS is_share
+            ,AVG(b.share_cnt) AS share_cnt
+            ,AVG(b.is_return_1) AS is_return_1
+            ,AVG(b.return_n_uv) AS return_n_uv
+            ,AVG(b.viewh24) AS viewh24
+            ,AVG(b.return_n_uv_noself) AS return_n_uv_noself
+            ,AVG(b.cn) AS cn
+            ,AVG(b.c1) AS c1
+            ,AVG(b.dn) AS dn
+            ,AVG(b.d1) AS d1
+            ,WM_CONCAT(DISTINCT ',',b.suffix) AS suffix
+    FROM    t_bucket b
+    LEFT JOIN t_bucket_ecs e
+    ON      b.dt = e.dt
+    AND     b.apptype = e.apptype
+    AND     b.abcode = e.abcode
+    AND     b.suffix = e.suffix
+    LEFT JOIN t_bucket_arp a
+    ON      b.dt = a.dt
+    AND     b.apptype = a.apptype
+    AND     b.abcode = a.abcode
+    AND     b.suffix = a.suffix
+    GROUP BY b.dt
+             ,b.apptype
+             ,b.abcode
+             ,b.suffix_group
+)
+-- ════════════════════════════════════════════════════════════════════════════
+-- 基线对比层:DAU2 历史 5 天均值 + 基线桶(89)横向对比
+-- ════════════════════════════════════════════════════════════════════════════
+-- 基线 5 天 DAU2 均值(独立扫描 useractive_log,无硬编码)
+,t_dau2_base5 AS
+(
+    SELECT  suffix_group
+            ,AVG(dau2_daily) AS dau2_base5
+    FROM    (
+                SELECT  dt
+                        ,suffix_group
+                        ,AVG(dau2) AS dau2_daily
+                FROM    (
+                            SELECT  SUBSTR(sub.dt,1,8) AS dt
+                                    ,sg.suffix_group
+                                    ,sub.suffix
+                                    ,COUNT(DISTINCT sub.machinecode) AS dau2
+                            FROM    (
+                                        SELECT  dt
+                                                ,machinecode
+                                                ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                                        FROM    loghubods.useractive_log
+                                        WHERE   dt IN ('20260307','20260308','20260309','20260310','20260311')
+                                        AND     apptype IN ("4")
+                                    ) sub
+                            INNER JOIN t_suffix_group sg
+                            ON      sub.suffix = sg.suffix
+                            GROUP BY SUBSTR(sub.dt,1,8)
+                                     ,sg.suffix_group
+                                     ,sub.suffix
+                        ) t_per_suffix
+                GROUP BY dt, suffix_group
+            ) t_per_day
+    GROUP BY suffix_group
+)
+-- 合并主表 + dau2 + dau_vs_5d
+,t_combined AS
+(
+    SELECT  a.*
+            ,b.dau2
+            ,ROUND(b.dau2 / NULLIF(c.dau2_base5, 0), 6) AS dau_vs_5d
+    FROM    t_metrics a
+    LEFT JOIN t_dau2 b
+    ON      a.dt = b.dt
+    AND     a.apptype = b.apptype
+    AND     a.abcode = b.abcode
+    AND     a.suffix_group = b.suffix_group
+    LEFT JOIN t_dau2_base5 c
+    ON      a.suffix_group = c.suffix_group
+)
+-- 基线桶(89)每日指标,作为横向对比基准
+,t_ctrl AS
+(
+    SELECT  dt
+            ,apptype
+            ,dau_vs_5d  AS ctrl_dau_vs_5d
+            ,exp        AS ctrl_exp
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one    AS ctrl_str_one
+            ,ros_one    AS ctrl_ros_one
+            ,str        AS ctrl_str
+            ,ros        AS ctrl_ros
+            ,vovh24     AS ctrl_vovh24
+            ,str_plus   AS ctrl_str_plus
+            ,ros_minus  AS ctrl_ros_minus
+            ,bn_rov     AS ctrl_bn_rov
+            ,c1_rov     AS ctrl_c1_rov
+            ,cn_rov     AS ctrl_cn_rov
+            ,d1_rov     AS ctrl_d1_rov
+            ,dn_rov     AS ctrl_dn_rov
+            ,total_rov  AS ctrl_total_rov
+            ,ecs        AS ctrl_ecs
+            ,ecs_ratio  AS ctrl_ecs_ratio
+            ,arp        AS ctrl_arp
+    FROM    t_combined
+    WHERE   suffix_group = '89'
+)
+-- 最终输出:原始指标 + 基线对比差值(后缀 _diff = 当前值 / 基线桶值 - 1)
+SELECT  r.dt
+        ,r.apptype
+        ,r.abcode
+        ,r.suffix_group
+        ,r.suffix
+        -- ── 原始指标 ──
+        ,r.exp_per_dau
+        ,r.str_one
+        ,r.ros_one
+        ,r.str
+        ,r.ros
+        ,r.str_plus
+        ,r.ros_minus
+        ,r.bn_rov
+        ,r.c1_rov
+        ,r.cn_rov
+        ,r.d1_rov
+        ,r.dn_rov
+        ,r.total_rov
+        ,r.vovh24
+        ,r.dau
+        ,r.exp
+        ,r.distinct_vid_cnt
+        ,r.ecs
+        ,r.ecs_ratio
+        ,r.gini
+        ,r.arp
+        ,r.is_share
+        ,r.share_cnt
+        ,r.is_return_1
+        ,r.return_n_uv
+        ,r.viewh24
+        ,r.return_n_uv_noself
+        ,r.cn
+        ,r.c1
+        ,r.dn
+        ,r.d1
+        ,r.dau2
+        -- ── DAU2 纵向对比(vs 历史 5 天均值) ──
+        ,r.dau_vs_5d
+        -- ── 横向对比:当日各桶 vs 基线桶(89),公式 = 当前值 / 基线值 - 1 ──
+        ,ROUND(r.dau_vs_5d / NULLIF(ctrl.ctrl_dau_vs_5d, 0) - 1, 6) AS dau_vs_5d_diff
+        ,ROUND(r.exp / NULLIF(ctrl.ctrl_exp, 0) - 1, 6) AS exp_diff
+        ,ROUND(r.exp_per_dau / NULLIF(ctrl.ctrl_exp_per_dau, 0) - 1, 6) AS exp_per_dau_diff
+        ,ROUND(r.str_one / NULLIF(ctrl.ctrl_str_one, 0) - 1, 6) AS str_one_diff
+        ,ROUND(r.ros_one / NULLIF(ctrl.ctrl_ros_one, 0) - 1, 6) AS ros_one_diff
+        ,ROUND(r.str / NULLIF(ctrl.ctrl_str, 0) - 1, 6) AS str_diff
+        ,ROUND(r.ros / NULLIF(ctrl.ctrl_ros, 0) - 1, 6) AS ros_diff
+        ,ROUND(r.vovh24 / NULLIF(ctrl.ctrl_vovh24, 0) - 1, 6) AS vovh24_diff
+        ,ROUND(r.str_plus / NULLIF(ctrl.ctrl_str_plus, 0) - 1, 6) AS str_plus_diff
+        ,ROUND(r.ros_minus / NULLIF(ctrl.ctrl_ros_minus, 0) - 1, 6) AS ros_minus_diff
+        ,ROUND(r.bn_rov / NULLIF(ctrl.ctrl_bn_rov, 0) - 1, 6) AS bn_rov_diff
+        ,ROUND(r.c1_rov / NULLIF(ctrl.ctrl_c1_rov, 0) - 1, 6) AS c1_rov_diff
+        ,ROUND(r.cn_rov / NULLIF(ctrl.ctrl_cn_rov, 0) - 1, 6) AS cn_rov_diff
+        ,ROUND(r.d1_rov / NULLIF(ctrl.ctrl_d1_rov, 0) - 1, 6) AS d1_rov_diff
+        ,ROUND(r.dn_rov / NULLIF(ctrl.ctrl_dn_rov, 0) - 1, 6) AS dn_rov_diff
+        ,ROUND(r.total_rov / NULLIF(ctrl.ctrl_total_rov, 0) - 1, 6) AS total_rov_diff
+        ,ROUND(r.ecs / NULLIF(ctrl.ctrl_ecs, 0) - 1, 6) AS ecs_diff
+        ,ROUND(r.ecs_ratio / NULLIF(ctrl.ctrl_ecs_ratio, 0) - 1, 6) AS ecs_ratio_diff
+        ,ROUND(r.arp / NULLIF(ctrl.ctrl_arp, 0) - 1, 6) AS arp_diff
+FROM    t_combined r
+LEFT JOIN t_ctrl ctrl
+ON      r.dt = ctrl.dt
+AND     r.apptype = ctrl.apptype
+ORDER BY r.dt DESC, r.apptype, r.abcode, r.suffix_group
+;

+ 10 - 0
tasks/00_尾号实验/base_v4_v1_new_v3.json

@@ -0,0 +1,10 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "vJQSTF",
+  "sort": "dt:desc,suffix_group:asc",
+  "order": {
+      "suffix_group": ["ab", "34", "2c", "67", "01", "5d", "ef", "89"]
+  },
+  "cols": null,
+  "append_cols": true
+}

+ 619 - 0
tasks/00_尾号实验/base_v4_v1_new_v3.sql

@@ -0,0 +1,619 @@
+-- ════════════════════════════════════════════════════════════════════════════
+-- 两层尾号映射 (SCD Type 2 模式) — apptype = 0 + 基线对比(vs 89 桶 + vs 历史 5 天)
+-- [硬编码版:基线 DAU2 预计算,无额外扫描]
+--
+-- 第一层 t_suffix_group:物理尾号 → 分流桶 ID(16 个 hex 尾号分成 8 个 2-元桶)
+--   - 分流规则不变时,此层永不改
+--
+-- 第二层 t_experiment_map:分流桶 → 实验名 + 生效日期
+--   - 只列出"分配了具体实验"的桶,未列出的桶自动默认为"对照组"
+--   - 支持 1 对多:同一个实验占多个桶时,用同一 abcode 字符串多加几行
+--   - 实验切换:不删旧行,关闭 end_dt + 追加新行(保留历史可回溯)
+--
+-- ┌─ 基线配置 ──────────────────────────────────────────────────────────┐
+-- │  基线桶:89              (对照组物理桶,横向对比基准)              │
+-- │  基线天数:20260307~20260311(5 天均值,DAU2 纵向对比基准)         │
+-- │  新增列:dau_vs_5d / dau_vs_5d_diff / *_diff 系列差值列            │
+-- └─────────────────────────────────────────────────────────────────────┘
+-- ════════════════════════════════════════════════════════════════════════════
+WITH t_suffix_group AS
+(
+    SELECT "a" AS suffix, "ab" AS suffix_group
+    UNION ALL SELECT "b", "ab"
+    UNION ALL SELECT "0", "01"
+    UNION ALL SELECT "1", "01"
+    UNION ALL SELECT "2", "2c"
+    UNION ALL SELECT "c", "2c"
+    UNION ALL SELECT "3", "34"
+    UNION ALL SELECT "4", "34"
+    UNION ALL SELECT "5", "5d"
+    UNION ALL SELECT "d", "5d"
+    UNION ALL SELECT "6", "67"
+    UNION ALL SELECT "7", "67"
+    UNION ALL SELECT "8", "89"
+    UNION ALL SELECT "9", "89"
+    UNION ALL SELECT "e", "ef"
+    UNION ALL SELECT "f", "ef"
+)
+-- 当前实验映射 (apptype = 0)
+--   未列出的桶(89 / 2c)→ 自动默认为"对照组"
+--   同一个 suffix_group 可以有多行(SCD Type 2),但同一时间只能命中一行
+--   TODO: start_dt 全填 '20250101' 是占位,请替换为真实上线日期
+,t_experiment_map AS
+(
+    -- 前基线(ab 桶)
+    SELECT "ab" AS suffix_group, "实验组:变更str*ros建模目标实验" AS abcode, "20260413" AS start_dt, "29991231" AS end_dt
+   
+    -- 建模目标实验
+    UNION ALL SELECT "01", "实验组:变更str*ros建模目标实验", "20260320", "29991231"
+
+    -- bn_ros 新损失函数
+    UNION ALL SELECT "34", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
+
+    -- cn_rov 实验
+    UNION ALL SELECT "67", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
+
+    -- 解构特征排序 str 模型
+    UNION ALL SELECT "5d", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+
+    UNION ALL SELECT "ef", "实验组:DNN模型-调参", "20260410", "29991231"
+
+    UNION ALL SELECT "2c", "实验组:DNN模型", "20260413", "29991231"
+
+    UNION ALL SELECT "89", "实验组:变更str*ros建模目标实验", "20260413", "29991231"
+
+    UNION ALL SELECT "89", "对照组", "20260301", "20260412"
+
+    -- ────────────────────────────────────────────────────────────────────
+    -- 📖 修改样例(复制下面的行到上面 UNION ALL 列表里使用)
+    --
+    -- 样例 A:新增一个占用单桶的实验
+    --   UNION ALL SELECT "2c", "实验组:新策略 X", "20260501", "29991231"
+    --
+    -- 样例 B:新增一个 1 对多 实验(同一实验占 01 + 34 两个桶)
+    --   用同一 abcode 字符串加两行即可,下游 GROUP BY 自动合并:
+    --   UNION ALL SELECT "01", "实验组:大流量 Y", "20260601", "29991231"
+    --   UNION ALL SELECT "34", "实验组:大流量 Y", "20260601", "29991231"
+    --
+    -- 样例 C:实验切换(SCD Type 2 —— 保留历史)
+    --   假设 01 桶 20260701 从 实验 A 切换到 实验 B:
+    --   Step 1: 把原来那行 end_dt 改成切换前一天:
+    --     SELECT "01", "实验组:A", "20250101", "20260630"
+    --   Step 2: 追加新实验行:
+    --     UNION ALL SELECT "01", "实验组:B", "20260701", "29991231"
+    --
+    -- 样例 D:实验下线回到对照组(产生空窗)
+    --   直接把该行的 end_dt 改成下线前一天即可(不用追加行):
+    --     SELECT "5d", "实验组:A", "20250101", "20260630"
+    --   20260701 之后 5d 桶没有任何有效行覆盖,自动进入"对照组"
+    -- ────────────────────────────────────────────────────────────────────
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,sg.suffix_group
+            ,COALESCE(m.abcode,"对照组") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,a.mid
+                        ,a.vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        ,cc.cn
+                        ,cc.c1
+                        ,dd.dn
+                        ,dd.d1
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108 a
+                LEFT JOIN   (
+                                -- c1/cn:分享后被点击的回流 UV
+                                SELECT  a.machinecode AS mid
+                                        ,a.subsessionid
+                                        ,a.videoid AS vid
+                                        ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
+                                        ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS c1
+                                FROM    (
+                                            SELECT  DISTINCT machinecode
+                                                    ,shareobjectid AS videoid
+                                                    ,recomTraceId
+                                                    ,subsessionid
+                                                    ,sharedepth
+                                                    ,shareid
+                                            FROM    loghubods.user_share_log
+                                            WHERE   dt = '${dt}'
+                                            AND     topic = 'share'
+                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                        ) a
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b
+                                ON      a.shareid = b.rootshareid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,shareobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'share'
+                                                AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                            ) b1
+                                ON      b.machinecode = b1.machinecode
+                                AND     b.subsessionid = b1.subsessionid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b2
+                                ON      b1.shareid = b2.rootshareid
+                                GROUP BY a.machinecode
+                                         ,a.subsessionid
+                                         ,a.videoid
+                            ) cc
+                ON      a.mid = cc.mid
+                AND     a.subsessionid = cc.subsessionid
+                AND     a.vid = cc.vid
+                LEFT JOIN   (
+                                -- d1/dn:下一条视频带来的回流
+                                SELECT  *
+                                        ,LAG(回流,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS dn
+                                        ,LAG(回流1,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS d1
+                                FROM    (
+                                            SELECT  a.mid AS mid
+                                                    ,a.subsessionid
+                                                    ,a.videoid AS vid
+                                                    ,COUNT(DISTINCT b.shareid) AS 分享次数
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END) AS 回流
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END) AS 回流1
+                                                    ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp ASC) AS rn
+                                            FROM    (
+                                                        SELECT  *
+                                                        FROM    (
+                                                                    SELECT  DISTINCT mid
+                                                                            ,subsessionid
+                                                                            ,videoid
+                                                                            ,logtimestamp
+                                                                            ,ROW_NUMBER() OVER (PARTITION BY mid,subsessionid,videoid ORDER BY logtimestamp ASC) AS rn
+                                                                    FROM    loghubods.video_action_log_rp
+                                                                    WHERE   dt = '${dt}'
+                                                                    AND     businesstype = 'videoView'
+                                                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                                )
+                                                        WHERE   rn = 1
+                                                    ) a
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,shareobjectid AS videoid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,shareid
+                                                                    ,clienttimestamp
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'share'
+                                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                        ) b
+                                            ON      a.mid = b.machinecode
+                                            AND     a.subsessionid = b.subsessionid
+                                            AND     a.videoid = b.videoid
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,clickobjectid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,rootshareid
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'click'
+                                                        ) c
+                                            ON      b.shareid = c.rootshareid
+                                            GROUP BY a.mid
+                                                     ,a.subsessionid
+                                                     ,a.videoid
+                                                     ,a.logtimestamp
+                                        )
+                            ) dd
+                ON      a.mid = dd.mid
+                AND     a.subsessionid = dd.subsessionid
+                AND     a.vid = dd.vid
+                WHERE   dt="${dt}"
+                AND     apptype IN ("0")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    -- INNER JOIN: 合法尾号(在 16 个 hex 里)才进分析;防御异常数据
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    -- LEFT JOIN: 可无实验匹配,此时 m.abcode 为 NULL → COALESCE 为"对照组"
+    LEFT JOIN t_experiment_map m
+    ON      sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+)
+-- 桶内每个 vid 的曝光数(ECS / ARP 的共同中间件,避免重复扫 t_base)
+,t_vid_exp AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,vid
+            ,COUNT(1) AS vid_exp_cnt
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+             ,vid
+)
+-- 桶内 ECS (Effective Catalog Size):曝光实际"相当于推了多少条视频"
+-- ECS = 2 * Σ(p_i * rank_i) - 1
+--   p_i    = vid 在桶内曝光占比
+--   rank_i = 按曝光降序的排名(1 起)
+-- 值域 [1, distinct_vid_cnt],越大越分散,越小越头部集中
+,t_bucket_ecs AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,2 * SUM(p * rn) - 1 AS ecs
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,abcode
+                        ,suffix
+                        ,vid_exp_cnt / SUM(vid_exp_cnt) OVER (
+                            PARTITION BY dt, apptype, abcode, suffix
+                        ) AS p
+                        ,ROW_NUMBER() OVER (
+                            PARTITION BY dt, apptype, abcode, suffix
+                            ORDER BY vid_exp_cnt DESC
+                        ) AS rn
+                FROM    t_vid_exp
+            ) t
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+)
+-- 全平台每个 vid 的曝光度(作为 ARP 的 popularity reference)
+-- 注意:不过滤 abcode,让 reference 覆盖全部合法尾号
+,t_vid_global_pop AS
+(
+    SELECT  dt
+            ,apptype
+            ,vid
+            ,COUNT(1) AS vid_global_pop
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,vid
+)
+-- 桶内 ARP (Average Recommendation Popularity):推荐视频的平均热门度
+-- 按桶内曝光量加权:曝光越多的 vid 对 ARP 影响越大
+-- 组合 ECS + ARP 可识别四象限:
+--   高ECS + 低ARP = 分散 + 偏长尾        ✅ 理想
+--   高ECS + 高ARP = 分散 + 头部内部多样化  ⚠️ 需警惕
+--   低ECS + 低ARP = 集中 + 冷门(小众爆发) ❓ 特殊
+--   低ECS + 高ARP = 集中 + 头部            ❌ 模型坍缩
+,t_bucket_arp AS
+(
+    SELECT  v.dt
+            ,v.apptype
+            ,v.abcode
+            ,v.suffix
+            ,SUM(v.vid_exp_cnt * g.vid_global_pop) / SUM(v.vid_exp_cnt) AS arp
+    FROM    t_vid_exp v
+    LEFT JOIN t_vid_global_pop g
+    ON      v.dt = g.dt
+    AND     v.apptype = g.apptype
+    AND     v.vid = g.vid
+    GROUP BY v.dt
+             ,v.apptype
+             ,v.abcode
+             ,v.suffix
+)
+-- dau2:按单尾号聚合
+,t_dau2_bucket AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"对照组") AS abcode
+            ,sg.suffix_group
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                -- FROM    loghubods.useractive_log_per5min
+                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("0")
+                AND     GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                AND     GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') NOT IN ("ab100")
+            ) sub
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    LEFT JOIN t_experiment_map m
+    ON      sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"对照组")
+             ,sg.suffix_group
+             ,sub.suffix
+)
+-- dau2:按 suffix_group 求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,AVG(dau2) AS dau2
+    FROM    t_dau2_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix_group
+)
+-- 按单尾号聚合(尾号内 UV 去重)
+,t_bucket AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,suffix
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
+            ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
+            ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
+            ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS bn_rov
+            ,COALESCE(SUM(c1) / COUNT(1),0) AS c1_rov
+            ,COALESCE(SUM(cn) / COUNT(1),0) AS cn_rov
+            ,COALESCE(SUM(d1) / COUNT(1),0) AS d1_rov
+            ,COALESCE(SUM(dn) / COUNT(1),0) AS dn_rov
+            -- 合并 ROV = bn_rov + cn_rov + dn_rov(三者分母同为 COUNT(1),可合并)
+            ,COALESCE((SUM(return_n_uv) + SUM(cn) + SUM(dn)) / COUNT(1),0) AS total_rov
+            ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            -- 桶内去重 vid 数(ECS 的天然配套)
+            ,COUNT(DISTINCT vid) AS distinct_vid_cnt
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            ,COALESCE(SUM(cn),0) AS cn
+            ,COALESCE(SUM(c1),0) AS c1
+            ,COALESCE(SUM(dn),0) AS dn
+            ,COALESCE(SUM(d1),0) AS d1
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix_group
+             ,suffix
+)
+-- 按 suffix_group 求尾号均值(含合并 ROV + 分发多样性三件套)
+,t_metrics AS
+(
+    SELECT  b.dt
+            ,b.apptype
+            ,b.abcode
+            ,b.suffix_group
+            ,ROUND(AVG(b.exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(b.str_one),6) AS str_one
+            ,ROUND(AVG(b.ros_one),6) AS ros_one
+            ,ROUND(AVG(b.str),6) AS str
+            ,ROUND(AVG(b.ros),6) AS ros
+            ,ROUND(AVG(b.str_plus),6) AS str_plus
+            ,ROUND(AVG(b.ros_minus),6) AS ros_minus
+            ,ROUND(AVG(b.bn_rov),6) AS bn_rov
+            ,ROUND(AVG(b.c1_rov),6) AS c1_rov
+            ,ROUND(AVG(b.cn_rov),6) AS cn_rov
+            ,ROUND(AVG(b.d1_rov),6) AS d1_rov
+            ,ROUND(AVG(b.dn_rov),6) AS dn_rov
+            ,ROUND(AVG(b.total_rov),6) AS total_rov
+            ,ROUND(AVG(b.vovh24),6) AS vovh24
+            ,AVG(b.dau) AS dau
+            ,AVG(b.exp) AS exp
+            ,ROUND(AVG(b.distinct_vid_cnt),0) AS distinct_vid_cnt
+            ,ROUND(AVG(e.ecs),1) AS ecs
+            -- ECS 归一化比值:去掉池子大小的影响,纯形态指标
+            ,ROUND(AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS ecs_ratio
+            -- Gini 系数:快手/Twitter/Netflix 业界标准,数学上 Gini = 1 - ecs_ratio
+            ,ROUND(1 - AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS gini
+            ,ROUND(AVG(a.arp),0) AS arp
+            ,AVG(b.is_share) AS is_share
+            ,AVG(b.share_cnt) AS share_cnt
+            ,AVG(b.is_return_1) AS is_return_1
+            ,AVG(b.return_n_uv) AS return_n_uv
+            ,AVG(b.viewh24) AS viewh24
+            ,AVG(b.return_n_uv_noself) AS return_n_uv_noself
+            ,AVG(b.cn) AS cn
+            ,AVG(b.c1) AS c1
+            ,AVG(b.dn) AS dn
+            ,AVG(b.d1) AS d1
+            ,WM_CONCAT(DISTINCT ',',b.suffix) AS suffix
+    FROM    t_bucket b
+    LEFT JOIN t_bucket_ecs e
+    ON      b.dt = e.dt
+    AND     b.apptype = e.apptype
+    AND     b.abcode = e.abcode
+    AND     b.suffix = e.suffix
+    LEFT JOIN t_bucket_arp a
+    ON      b.dt = a.dt
+    AND     b.apptype = a.apptype
+    AND     b.abcode = a.abcode
+    AND     b.suffix = a.suffix
+    GROUP BY b.dt
+             ,b.apptype
+             ,b.abcode
+             ,b.suffix_group
+)
+-- ════════════════════════════════════════════════════════════════════════════
+-- 基线对比层:DAU2 历史 5 天均值 + 基线桶(89)横向对比
+-- ════════════════════════════════════════════════════════════════════════════
+-- 基线 5 天 DAU2 均值(硬编码,基线期 20260307~20260311,apptype=0)
+,t_dau2_base5 AS
+(
+    SELECT "01" AS suffix_group, 250063.2 AS dau2_base5
+    UNION ALL SELECT "2c", 261125.5
+    UNION ALL SELECT "34", 258002.4
+    UNION ALL SELECT "5d", 253589.2
+    UNION ALL SELECT "67", 258466.2
+    UNION ALL SELECT "89", 251052.3
+    UNION ALL SELECT "ab", 248110.7
+    UNION ALL SELECT "ef", 247799.1
+)
+-- 合并主表 + dau2 + dau_vs_5d
+,t_combined AS
+(
+    SELECT  a.*
+            ,b.dau2
+            ,ROUND(b.dau2 / NULLIF(c.dau2_base5, 0), 6) AS dau_vs_5d
+    FROM    t_metrics a
+    LEFT JOIN t_dau2 b
+    ON      a.dt = b.dt
+    AND     a.apptype = b.apptype
+    AND     a.abcode = b.abcode
+    AND     a.suffix_group = b.suffix_group
+    LEFT JOIN t_dau2_base5 c
+    ON      a.suffix_group = c.suffix_group
+)
+-- 基线桶(89)每日指标,作为横向对比基准
+,t_ctrl AS
+(
+    SELECT  dt
+            ,apptype
+            ,dau_vs_5d  AS ctrl_dau_vs_5d
+            ,exp        AS ctrl_exp
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one    AS ctrl_str_one
+            ,ros_one    AS ctrl_ros_one
+            ,str        AS ctrl_str
+            ,ros        AS ctrl_ros
+            ,vovh24     AS ctrl_vovh24
+            ,str_plus   AS ctrl_str_plus
+            ,ros_minus  AS ctrl_ros_minus
+            ,bn_rov     AS ctrl_bn_rov
+            ,c1_rov     AS ctrl_c1_rov
+            ,cn_rov     AS ctrl_cn_rov
+            ,d1_rov     AS ctrl_d1_rov
+            ,dn_rov     AS ctrl_dn_rov
+            ,total_rov  AS ctrl_total_rov
+            ,ecs        AS ctrl_ecs
+            ,ecs_ratio  AS ctrl_ecs_ratio
+            ,arp        AS ctrl_arp
+    FROM    t_combined
+    WHERE   suffix_group = '89'
+)
+SELECT  r.dt
+        ,r.apptype
+        ,r.abcode
+        ,r.suffix_group
+        ,r.suffix
+        ,r.exp_per_dau
+        ,r.str_one
+        ,r.ros_one
+        ,r.str
+        ,r.ros
+        ,r.str_plus
+        ,r.ros_minus
+        ,r.bn_rov
+        ,r.c1_rov
+        ,r.cn_rov
+        ,r.d1_rov
+        ,r.dn_rov
+        ,r.total_rov
+        ,r.vovh24
+        ,r.dau
+        ,r.exp
+        ,r.distinct_vid_cnt
+        ,r.ecs
+        ,r.ecs_ratio
+        ,r.gini
+        ,r.arp
+        ,r.is_share
+        ,r.share_cnt
+        ,r.is_return_1
+        ,r.return_n_uv
+        ,r.viewh24
+        ,r.return_n_uv_noself
+        ,r.cn
+        ,r.c1
+        ,r.dn
+        ,r.d1
+        ,r.dau2
+        ,r.dau_vs_5d
+        ,ROUND(r.dau_vs_5d / NULLIF(ctrl.ctrl_dau_vs_5d, 0) - 1, 6) AS dau_vs_5d_diff
+        ,ROUND(r.exp / NULLIF(ctrl.ctrl_exp, 0) - 1, 6) AS exp_diff
+        ,ROUND(r.exp_per_dau / NULLIF(ctrl.ctrl_exp_per_dau, 0) - 1, 6) AS exp_per_dau_diff
+        ,ROUND(r.str_one / NULLIF(ctrl.ctrl_str_one, 0) - 1, 6) AS str_one_diff
+        ,ROUND(r.ros_one / NULLIF(ctrl.ctrl_ros_one, 0) - 1, 6) AS ros_one_diff
+        ,ROUND(r.str / NULLIF(ctrl.ctrl_str, 0) - 1, 6) AS str_diff
+        ,ROUND(r.ros / NULLIF(ctrl.ctrl_ros, 0) - 1, 6) AS ros_diff
+        ,ROUND(r.vovh24 / NULLIF(ctrl.ctrl_vovh24, 0) - 1, 6) AS vovh24_diff
+        ,ROUND(r.str_plus / NULLIF(ctrl.ctrl_str_plus, 0) - 1, 6) AS str_plus_diff
+        ,ROUND(r.ros_minus / NULLIF(ctrl.ctrl_ros_minus, 0) - 1, 6) AS ros_minus_diff
+        ,ROUND(r.bn_rov / NULLIF(ctrl.ctrl_bn_rov, 0) - 1, 6) AS bn_rov_diff
+        ,ROUND(r.c1_rov / NULLIF(ctrl.ctrl_c1_rov, 0) - 1, 6) AS c1_rov_diff
+        ,ROUND(r.cn_rov / NULLIF(ctrl.ctrl_cn_rov, 0) - 1, 6) AS cn_rov_diff
+        ,ROUND(r.d1_rov / NULLIF(ctrl.ctrl_d1_rov, 0) - 1, 6) AS d1_rov_diff
+        ,ROUND(r.dn_rov / NULLIF(ctrl.ctrl_dn_rov, 0) - 1, 6) AS dn_rov_diff
+        ,ROUND(r.total_rov / NULLIF(ctrl.ctrl_total_rov, 0) - 1, 6) AS total_rov_diff
+        ,ROUND(r.ecs / NULLIF(ctrl.ctrl_ecs, 0) - 1, 6) AS ecs_diff
+        ,ROUND(r.ecs_ratio / NULLIF(ctrl.ctrl_ecs_ratio, 0) - 1, 6) AS ecs_ratio_diff
+        ,ROUND(r.arp / NULLIF(ctrl.ctrl_arp, 0) - 1, 6) AS arp_diff
+FROM    t_combined r
+LEFT JOIN t_ctrl ctrl
+ON      r.dt = ctrl.dt
+AND     r.apptype = ctrl.apptype
+ORDER BY r.dt DESC, r.apptype, r.abcode, r.suffix_group
+;

+ 643 - 0
tasks/00_尾号实验/base_v4_v1_new_v3_scan.sql

@@ -0,0 +1,643 @@
+-- ════════════════════════════════════════════════════════════════════════════
+-- 两层尾号映射 (SCD Type 2 模式) — apptype = 0 + 基线对比(vs 89 桶 + vs 历史 5 天)
+-- [scan 版:独立扫描 useractive_log 获取基线天数据]
+--
+-- 第一层 t_suffix_group:物理尾号 → 分流桶 ID(16 个 hex 尾号分成 8 个 2-元桶)
+--   - 分流规则不变时,此层永不改
+--
+-- 第二层 t_experiment_map:分流桶 → 实验名 + 生效日期
+--   - 只列出"分配了具体实验"的桶,未列出的桶自动默认为"对照组"
+--   - 支持 1 对多:同一个实验占多个桶时,用同一 abcode 字符串多加几行
+--   - 实验切换:不删旧行,关闭 end_dt + 追加新行(保留历史可回溯)
+--
+-- ┌─ 基线配置 ──────────────────────────────────────────────────────────┐
+-- │  基线桶:89              (对照组物理桶,横向对比基准)              │
+-- │  基线天数:20260307~20260311(5 天均值,DAU2 纵向对比基准)         │
+-- │  新增列:dau_vs_5d / dau_vs_5d_diff / *_diff 系列差值列            │
+-- └─────────────────────────────────────────────────────────────────────┘
+-- ════════════════════════════════════════════════════════════════════════════
+WITH t_suffix_group AS
+(
+    SELECT "a" AS suffix, "ab" AS suffix_group
+    UNION ALL SELECT "b", "ab"
+    UNION ALL SELECT "0", "01"
+    UNION ALL SELECT "1", "01"
+    UNION ALL SELECT "2", "2c"
+    UNION ALL SELECT "c", "2c"
+    UNION ALL SELECT "3", "34"
+    UNION ALL SELECT "4", "34"
+    UNION ALL SELECT "5", "5d"
+    UNION ALL SELECT "d", "5d"
+    UNION ALL SELECT "6", "67"
+    UNION ALL SELECT "7", "67"
+    UNION ALL SELECT "8", "89"
+    UNION ALL SELECT "9", "89"
+    UNION ALL SELECT "e", "ef"
+    UNION ALL SELECT "f", "ef"
+)
+-- 当前实验映射 (apptype = 0)
+--   未列出的桶(89 / 2c)→ 自动默认为"对照组"
+--   同一个 suffix_group 可以有多行(SCD Type 2),但同一时间只能命中一行
+--   TODO: start_dt 全填 '20250101' 是占位,请替换为真实上线日期
+,t_experiment_map AS
+(
+    -- 前基线(ab 桶)
+    SELECT "ab" AS suffix_group, "实验组:变更str*ros建模目标实验" AS abcode, "20260413" AS start_dt, "29991231" AS end_dt
+   
+    -- 建模目标实验
+    UNION ALL SELECT "01", "实验组:变更str*ros建模目标实验", "20260320", "29991231"
+
+    -- bn_ros 新损失函数
+    UNION ALL SELECT "34", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
+
+    -- cn_rov 实验
+    UNION ALL SELECT "67", "实验组:变更str*ros建模目标实验", "20260330", "29991231"
+
+    -- 解构特征排序 str 模型
+    UNION ALL SELECT "5d", "实验组:变更str*ros建模目标实验", "20260407", "29991231"
+
+    UNION ALL SELECT "ef", "实验组:DNN模型-调参", "20260410", "29991231"
+
+    UNION ALL SELECT "2c", "实验组:DNN模型", "20260413", "29991231"
+
+    UNION ALL SELECT "89", "实验组:变更str*ros建模目标实验", "20260413", "29991231"
+
+    UNION ALL SELECT "89", "对照组", "20260301", "20260412"
+
+    -- ────────────────────────────────────────────────────────────────────
+    -- 📖 修改样例(复制下面的行到上面 UNION ALL 列表里使用)
+    --
+    -- 样例 A:新增一个占用单桶的实验
+    --   UNION ALL SELECT "2c", "实验组:新策略 X", "20260501", "29991231"
+    --
+    -- 样例 B:新增一个 1 对多 实验(同一实验占 01 + 34 两个桶)
+    --   用同一 abcode 字符串加两行即可,下游 GROUP BY 自动合并:
+    --   UNION ALL SELECT "01", "实验组:大流量 Y", "20260601", "29991231"
+    --   UNION ALL SELECT "34", "实验组:大流量 Y", "20260601", "29991231"
+    --
+    -- 样例 C:实验切换(SCD Type 2 —— 保留历史)
+    --   假设 01 桶 20260701 从 实验 A 切换到 实验 B:
+    --   Step 1: 把原来那行 end_dt 改成切换前一天:
+    --     SELECT "01", "实验组:A", "20250101", "20260630"
+    --   Step 2: 追加新实验行:
+    --     UNION ALL SELECT "01", "实验组:B", "20260701", "29991231"
+    --
+    -- 样例 D:实验下线回到对照组(产生空窗)
+    --   直接把该行的 end_dt 改成下线前一天即可(不用追加行):
+    --     SELECT "5d", "实验组:A", "20250101", "20260630"
+    --   20260701 之后 5d 桶没有任何有效行覆盖,自动进入"对照组"
+    -- ────────────────────────────────────────────────────────────────────
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,sg.suffix_group
+            ,COALESCE(m.abcode,"对照组") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,a.mid
+                        ,a.vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        ,cc.cn
+                        ,cc.c1
+                        ,dd.dn
+                        ,dd.d1
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108 a
+                LEFT JOIN   (
+                                -- c1/cn:分享后被点击的回流 UV
+                                SELECT  a.machinecode AS mid
+                                        ,a.subsessionid
+                                        ,a.videoid AS vid
+                                        ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
+                                        ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS c1
+                                FROM    (
+                                            SELECT  DISTINCT machinecode
+                                                    ,shareobjectid AS videoid
+                                                    ,recomTraceId
+                                                    ,subsessionid
+                                                    ,sharedepth
+                                                    ,shareid
+                                            FROM    loghubods.user_share_log
+                                            WHERE   dt = '${dt}'
+                                            AND     topic = 'share'
+                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                        ) a
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b
+                                ON      a.shareid = b.rootshareid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,shareobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'share'
+                                                AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                            ) b1
+                                ON      b.machinecode = b1.machinecode
+                                AND     b.subsessionid = b1.subsessionid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b2
+                                ON      b1.shareid = b2.rootshareid
+                                GROUP BY a.machinecode
+                                         ,a.subsessionid
+                                         ,a.videoid
+                            ) cc
+                ON      a.mid = cc.mid
+                AND     a.subsessionid = cc.subsessionid
+                AND     a.vid = cc.vid
+                LEFT JOIN   (
+                                -- d1/dn:下一条视频带来的回流
+                                SELECT  *
+                                        ,LAG(回流,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS dn
+                                        ,LAG(回流1,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS d1
+                                FROM    (
+                                            SELECT  a.mid AS mid
+                                                    ,a.subsessionid
+                                                    ,a.videoid AS vid
+                                                    ,COUNT(DISTINCT b.shareid) AS 分享次数
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END) AS 回流
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END) AS 回流1
+                                                    ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp ASC) AS rn
+                                            FROM    (
+                                                        SELECT  *
+                                                        FROM    (
+                                                                    SELECT  DISTINCT mid
+                                                                            ,subsessionid
+                                                                            ,videoid
+                                                                            ,logtimestamp
+                                                                            ,ROW_NUMBER() OVER (PARTITION BY mid,subsessionid,videoid ORDER BY logtimestamp ASC) AS rn
+                                                                    FROM    loghubods.video_action_log_rp
+                                                                    WHERE   dt = '${dt}'
+                                                                    AND     businesstype = 'videoView'
+                                                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                                )
+                                                        WHERE   rn = 1
+                                                    ) a
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,shareobjectid AS videoid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,shareid
+                                                                    ,clienttimestamp
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'share'
+                                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                        ) b
+                                            ON      a.mid = b.machinecode
+                                            AND     a.subsessionid = b.subsessionid
+                                            AND     a.videoid = b.videoid
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,clickobjectid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,rootshareid
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'click'
+                                                        ) c
+                                            ON      b.shareid = c.rootshareid
+                                            GROUP BY a.mid
+                                                     ,a.subsessionid
+                                                     ,a.videoid
+                                                     ,a.logtimestamp
+                                        )
+                            ) dd
+                ON      a.mid = dd.mid
+                AND     a.subsessionid = dd.subsessionid
+                AND     a.vid = dd.vid
+                WHERE   dt="${dt}"
+                AND     apptype IN ("0")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    -- INNER JOIN: 合法尾号(在 16 个 hex 里)才进分析;防御异常数据
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    -- LEFT JOIN: 可无实验匹配,此时 m.abcode 为 NULL → COALESCE 为"对照组"
+    LEFT JOIN t_experiment_map m
+    ON      sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+)
+-- 桶内每个 vid 的曝光数(ECS / ARP 的共同中间件,避免重复扫 t_base)
+,t_vid_exp AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,vid
+            ,COUNT(1) AS vid_exp_cnt
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+             ,vid
+)
+-- 桶内 ECS (Effective Catalog Size):曝光实际"相当于推了多少条视频"
+-- ECS = 2 * Σ(p_i * rank_i) - 1
+--   p_i    = vid 在桶内曝光占比
+--   rank_i = 按曝光降序的排名(1 起)
+-- 值域 [1, distinct_vid_cnt],越大越分散,越小越头部集中
+,t_bucket_ecs AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,2 * SUM(p * rn) - 1 AS ecs
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,abcode
+                        ,suffix
+                        ,vid_exp_cnt / SUM(vid_exp_cnt) OVER (
+                            PARTITION BY dt, apptype, abcode, suffix
+                        ) AS p
+                        ,ROW_NUMBER() OVER (
+                            PARTITION BY dt, apptype, abcode, suffix
+                            ORDER BY vid_exp_cnt DESC
+                        ) AS rn
+                FROM    t_vid_exp
+            ) t
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+)
+-- 全平台每个 vid 的曝光度(作为 ARP 的 popularity reference)
+-- 注意:不过滤 abcode,让 reference 覆盖全部合法尾号
+,t_vid_global_pop AS
+(
+    SELECT  dt
+            ,apptype
+            ,vid
+            ,COUNT(1) AS vid_global_pop
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,vid
+)
+-- 桶内 ARP (Average Recommendation Popularity):推荐视频的平均热门度
+-- 按桶内曝光量加权:曝光越多的 vid 对 ARP 影响越大
+-- 组合 ECS + ARP 可识别四象限:
+--   高ECS + 低ARP = 分散 + 偏长尾        ✅ 理想
+--   高ECS + 高ARP = 分散 + 头部内部多样化  ⚠️ 需警惕
+--   低ECS + 低ARP = 集中 + 冷门(小众爆发) ❓ 特殊
+--   低ECS + 高ARP = 集中 + 头部            ❌ 模型坍缩
+,t_bucket_arp AS
+(
+    SELECT  v.dt
+            ,v.apptype
+            ,v.abcode
+            ,v.suffix
+            ,SUM(v.vid_exp_cnt * g.vid_global_pop) / SUM(v.vid_exp_cnt) AS arp
+    FROM    t_vid_exp v
+    LEFT JOIN t_vid_global_pop g
+    ON      v.dt = g.dt
+    AND     v.apptype = g.apptype
+    AND     v.vid = g.vid
+    GROUP BY v.dt
+             ,v.apptype
+             ,v.abcode
+             ,v.suffix
+)
+-- dau2:按单尾号聚合
+,t_dau2_bucket AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"对照组") AS abcode
+            ,sg.suffix_group
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                -- FROM    loghubods.useractive_log_per5min
+                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("0")
+                AND     GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                AND     GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') NOT IN ("ab100")
+            ) sub
+    INNER JOIN t_suffix_group sg
+    ON      sub.suffix = sg.suffix
+    LEFT JOIN t_experiment_map m
+    ON      sg.suffix_group = m.suffix_group
+    AND     '${dt}' BETWEEN m.start_dt AND m.end_dt
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"对照组")
+             ,sg.suffix_group
+             ,sub.suffix
+)
+-- dau2:按 suffix_group 求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,AVG(dau2) AS dau2
+    FROM    t_dau2_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix_group
+)
+-- 按单尾号聚合(尾号内 UV 去重)
+,t_bucket AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix_group
+            ,suffix
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
+            ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
+            ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
+            ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS bn_rov
+            ,COALESCE(SUM(c1) / COUNT(1),0) AS c1_rov
+            ,COALESCE(SUM(cn) / COUNT(1),0) AS cn_rov
+            ,COALESCE(SUM(d1) / COUNT(1),0) AS d1_rov
+            ,COALESCE(SUM(dn) / COUNT(1),0) AS dn_rov
+            -- 合并 ROV = bn_rov + cn_rov + dn_rov(三者分母同为 COUNT(1),可合并)
+            ,COALESCE((SUM(return_n_uv) + SUM(cn) + SUM(dn)) / COUNT(1),0) AS total_rov
+            ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            -- 桶内去重 vid 数(ECS 的天然配套)
+            ,COUNT(DISTINCT vid) AS distinct_vid_cnt
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            ,COALESCE(SUM(cn),0) AS cn
+            ,COALESCE(SUM(c1),0) AS c1
+            ,COALESCE(SUM(dn),0) AS dn
+            ,COALESCE(SUM(d1),0) AS d1
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix_group
+             ,suffix
+)
+-- 按 suffix_group 求尾号均值(含合并 ROV + 分发多样性三件套)
+,t_metrics AS
+(
+    SELECT  b.dt
+            ,b.apptype
+            ,b.abcode
+            ,b.suffix_group
+            ,ROUND(AVG(b.exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(b.str_one),6) AS str_one
+            ,ROUND(AVG(b.ros_one),6) AS ros_one
+            ,ROUND(AVG(b.str),6) AS str
+            ,ROUND(AVG(b.ros),6) AS ros
+            ,ROUND(AVG(b.str_plus),6) AS str_plus
+            ,ROUND(AVG(b.ros_minus),6) AS ros_minus
+            ,ROUND(AVG(b.bn_rov),6) AS bn_rov
+            ,ROUND(AVG(b.c1_rov),6) AS c1_rov
+            ,ROUND(AVG(b.cn_rov),6) AS cn_rov
+            ,ROUND(AVG(b.d1_rov),6) AS d1_rov
+            ,ROUND(AVG(b.dn_rov),6) AS dn_rov
+            ,ROUND(AVG(b.total_rov),6) AS total_rov
+            ,ROUND(AVG(b.vovh24),6) AS vovh24
+            ,AVG(b.dau) AS dau
+            ,AVG(b.exp) AS exp
+            ,ROUND(AVG(b.distinct_vid_cnt),0) AS distinct_vid_cnt
+            ,ROUND(AVG(e.ecs),1) AS ecs
+            -- ECS 归一化比值:去掉池子大小的影响,纯形态指标
+            ,ROUND(AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS ecs_ratio
+            -- Gini 系数:快手/Twitter/Netflix 业界标准,数学上 Gini = 1 - ecs_ratio
+            ,ROUND(1 - AVG(e.ecs) / NULLIF(AVG(b.distinct_vid_cnt),0),6) AS gini
+            ,ROUND(AVG(a.arp),0) AS arp
+            ,AVG(b.is_share) AS is_share
+            ,AVG(b.share_cnt) AS share_cnt
+            ,AVG(b.is_return_1) AS is_return_1
+            ,AVG(b.return_n_uv) AS return_n_uv
+            ,AVG(b.viewh24) AS viewh24
+            ,AVG(b.return_n_uv_noself) AS return_n_uv_noself
+            ,AVG(b.cn) AS cn
+            ,AVG(b.c1) AS c1
+            ,AVG(b.dn) AS dn
+            ,AVG(b.d1) AS d1
+            ,WM_CONCAT(DISTINCT ',',b.suffix) AS suffix
+    FROM    t_bucket b
+    LEFT JOIN t_bucket_ecs e
+    ON      b.dt = e.dt
+    AND     b.apptype = e.apptype
+    AND     b.abcode = e.abcode
+    AND     b.suffix = e.suffix
+    LEFT JOIN t_bucket_arp a
+    ON      b.dt = a.dt
+    AND     b.apptype = a.apptype
+    AND     b.abcode = a.abcode
+    AND     b.suffix = a.suffix
+    GROUP BY b.dt
+             ,b.apptype
+             ,b.abcode
+             ,b.suffix_group
+)
+-- ════════════════════════════════════════════════════════════════════════════
+-- 基线对比层:DAU2 历史 5 天均值 + 基线桶(89)横向对比
+-- ════════════════════════════════════════════════════════════════════════════
+-- 基线 5 天 DAU2 均值(独立扫描 useractive_log,无硬编码)
+,t_dau2_base5 AS
+(
+    SELECT  suffix_group
+            ,AVG(dau2_daily) AS dau2_base5
+    FROM    (
+                SELECT  dt
+                        ,suffix_group
+                        ,AVG(dau2) AS dau2_daily
+                FROM    (
+                            SELECT  SUBSTR(sub.dt,1,8) AS dt
+                                    ,sg.suffix_group
+                                    ,sub.suffix
+                                    ,COUNT(DISTINCT sub.machinecode) AS dau2
+                            FROM    (
+                                        SELECT  dt
+                                                ,machinecode
+                                                ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                                        FROM    loghubods.useractive_log
+                                        WHERE   dt IN ('20260307','20260308','20260309','20260310','20260311')
+                                        AND     apptype IN ("0")
+                                    ) sub
+                            INNER JOIN t_suffix_group sg
+                            ON      sub.suffix = sg.suffix
+                            GROUP BY SUBSTR(sub.dt,1,8)
+                                     ,sg.suffix_group
+                                     ,sub.suffix
+                        ) t_per_suffix
+                GROUP BY dt, suffix_group
+            ) t_per_day
+    GROUP BY suffix_group
+)
+-- 合并主表 + dau2 + dau_vs_5d
+,t_combined AS
+(
+    SELECT  a.*
+            ,b.dau2
+            ,ROUND(b.dau2 / NULLIF(c.dau2_base5, 0), 6) AS dau_vs_5d
+    FROM    t_metrics a
+    LEFT JOIN t_dau2 b
+    ON      a.dt = b.dt
+    AND     a.apptype = b.apptype
+    AND     a.abcode = b.abcode
+    AND     a.suffix_group = b.suffix_group
+    LEFT JOIN t_dau2_base5 c
+    ON      a.suffix_group = c.suffix_group
+)
+-- 基线桶(89)每日指标,作为横向对比基准
+,t_ctrl AS
+(
+    SELECT  dt
+            ,apptype
+            ,dau_vs_5d  AS ctrl_dau_vs_5d
+            ,exp        AS ctrl_exp
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one    AS ctrl_str_one
+            ,ros_one    AS ctrl_ros_one
+            ,str        AS ctrl_str
+            ,ros        AS ctrl_ros
+            ,vovh24     AS ctrl_vovh24
+            ,str_plus   AS ctrl_str_plus
+            ,ros_minus  AS ctrl_ros_minus
+            ,bn_rov     AS ctrl_bn_rov
+            ,c1_rov     AS ctrl_c1_rov
+            ,cn_rov     AS ctrl_cn_rov
+            ,d1_rov     AS ctrl_d1_rov
+            ,dn_rov     AS ctrl_dn_rov
+            ,total_rov  AS ctrl_total_rov
+            ,ecs        AS ctrl_ecs
+            ,ecs_ratio  AS ctrl_ecs_ratio
+            ,arp        AS ctrl_arp
+    FROM    t_combined
+    WHERE   suffix_group = '89'
+)
+-- 最终输出:原始指标 + 基线对比差值(后缀 _diff = 当前值 / 基线桶值 - 1)
+SELECT  r.dt
+        ,r.apptype
+        ,r.abcode
+        ,r.suffix_group
+        ,r.suffix
+        -- ── 原始指标 ──
+        ,r.exp_per_dau
+        ,r.str_one
+        ,r.ros_one
+        ,r.str
+        ,r.ros
+        ,r.str_plus
+        ,r.ros_minus
+        ,r.bn_rov
+        ,r.c1_rov
+        ,r.cn_rov
+        ,r.d1_rov
+        ,r.dn_rov
+        ,r.total_rov
+        ,r.vovh24
+        ,r.dau
+        ,r.exp
+        ,r.distinct_vid_cnt
+        ,r.ecs
+        ,r.ecs_ratio
+        ,r.gini
+        ,r.arp
+        ,r.is_share
+        ,r.share_cnt
+        ,r.is_return_1
+        ,r.return_n_uv
+        ,r.viewh24
+        ,r.return_n_uv_noself
+        ,r.cn
+        ,r.c1
+        ,r.dn
+        ,r.d1
+        ,r.dau2
+        -- ── DAU2 纵向对比(vs 历史 5 天均值) ──
+        ,r.dau_vs_5d
+        -- ── 横向对比:当日各桶 vs 基线桶(89),公式 = 当前值 / 基线值 - 1 ──
+        ,ROUND(r.dau_vs_5d / NULLIF(ctrl.ctrl_dau_vs_5d, 0) - 1, 6) AS dau_vs_5d_diff
+        ,ROUND(r.exp / NULLIF(ctrl.ctrl_exp, 0) - 1, 6) AS exp_diff
+        ,ROUND(r.exp_per_dau / NULLIF(ctrl.ctrl_exp_per_dau, 0) - 1, 6) AS exp_per_dau_diff
+        ,ROUND(r.str_one / NULLIF(ctrl.ctrl_str_one, 0) - 1, 6) AS str_one_diff
+        ,ROUND(r.ros_one / NULLIF(ctrl.ctrl_ros_one, 0) - 1, 6) AS ros_one_diff
+        ,ROUND(r.str / NULLIF(ctrl.ctrl_str, 0) - 1, 6) AS str_diff
+        ,ROUND(r.ros / NULLIF(ctrl.ctrl_ros, 0) - 1, 6) AS ros_diff
+        ,ROUND(r.vovh24 / NULLIF(ctrl.ctrl_vovh24, 0) - 1, 6) AS vovh24_diff
+        ,ROUND(r.str_plus / NULLIF(ctrl.ctrl_str_plus, 0) - 1, 6) AS str_plus_diff
+        ,ROUND(r.ros_minus / NULLIF(ctrl.ctrl_ros_minus, 0) - 1, 6) AS ros_minus_diff
+        ,ROUND(r.bn_rov / NULLIF(ctrl.ctrl_bn_rov, 0) - 1, 6) AS bn_rov_diff
+        ,ROUND(r.c1_rov / NULLIF(ctrl.ctrl_c1_rov, 0) - 1, 6) AS c1_rov_diff
+        ,ROUND(r.cn_rov / NULLIF(ctrl.ctrl_cn_rov, 0) - 1, 6) AS cn_rov_diff
+        ,ROUND(r.d1_rov / NULLIF(ctrl.ctrl_d1_rov, 0) - 1, 6) AS d1_rov_diff
+        ,ROUND(r.dn_rov / NULLIF(ctrl.ctrl_dn_rov, 0) - 1, 6) AS dn_rov_diff
+        ,ROUND(r.total_rov / NULLIF(ctrl.ctrl_total_rov, 0) - 1, 6) AS total_rov_diff
+        ,ROUND(r.ecs / NULLIF(ctrl.ctrl_ecs, 0) - 1, 6) AS ecs_diff
+        ,ROUND(r.ecs_ratio / NULLIF(ctrl.ctrl_ecs_ratio, 0) - 1, 6) AS ecs_ratio_diff
+        ,ROUND(r.arp / NULLIF(ctrl.ctrl_arp, 0) - 1, 6) AS arp_diff
+FROM    t_combined r
+LEFT JOIN t_ctrl ctrl
+ON      r.dt = ctrl.dt
+AND     r.apptype = ctrl.apptype
+ORDER BY r.dt DESC, r.apptype, r.abcode, r.suffix_group
+;