Просмотр исходного кода

feat(table_gen): agg_wide 对齐 cn_total/dn_total/all 为 uv+pv+exp+ror+rov 五字段结构

- cn_total/dn_total 各补 _pv/_exp,COMMENT 写明加法公式
- all 重构: 删重复 all_rovn,B链改用 bn_uv 对齐 cn/dn 口径,新增 all_pv/all_exp
- DDL 188→193,SELECT 同步对齐

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
yangxiaohui 3 недель назад
Родитель
Сommit
8100ce23ab
1 измененных файлов с 32 добавлено и 21 удалено
  1. 32 21
      table_gen/loghubods.dwd_recsys_alg_exposure_agg_wide_20260209.sql

+ 32 - 21
table_gen/loghubods.dwd_recsys_alg_exposure_agg_wide_20260209.sql

@@ -13,7 +13,7 @@
 --                  每组: _real(label) + _pred(预估) + _copc + _mae + _var
 --                  _real 与漏斗字段等价: str_real=return_rate_noself, rovn_real=return_uv_noself/exposure_cnt
 --   B/C/D 链       每级: _uv + _pv + _exp + _ror + _rov
---   全链路         all_return_n_uv = B + C + D
+--   全链路         all_uv/pv/exp = B + C + D, all_ror/rov
 --
 -- rov/ror 分母推导 (逐级递推):
 --   depth 维度: depth=1 的成本=该hop入口; depth=N+1 的成本=depth=N 的输出
@@ -128,7 +128,9 @@ CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_agg_wide_20260209 (
     ,cn_3_exp                 BIGINT    COMMENT 'C链hop3: 回流session曝光数'
     ,cn_3_ror                 DOUBLE    COMMENT 'cn_3_uv/cn_2_uv'
     ,cn_3_rov                 DOUBLE    COMMENT 'cn_3_uv/cn_2_exp'
-    ,cn_total_uv              BIGINT    COMMENT 'C链合计UV'
+    ,cn_total_uv              BIGINT    COMMENT 'C链合计UV = cn_1_uv + cn_2_uv + cn_3_uv'
+    ,cn_total_pv              BIGINT    COMMENT 'C链合计PV = cn_1_pv + cn_2_pv + cn_3_pv'
+    ,cn_total_exp             BIGINT    COMMENT 'C链合计EXP = cn_1_exp + cn_2_exp + cn_3_exp'
     ,cn_total_ror             DOUBLE    COMMENT 'cn_total_uv/bn_uv'
     ,cn_total_rov             DOUBLE    COMMENT 'cn_total_uv/bn_exp'
     -- C链 depth拆分 hop1
@@ -197,7 +199,9 @@ CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_agg_wide_20260209 (
     ,dn_3_exp                 BIGINT    COMMENT 'D链hop3: 回流session曝光数'
     ,dn_3_ror                 DOUBLE    COMMENT 'dn_3_uv/dn_2_uv'
     ,dn_3_rov                 DOUBLE    COMMENT 'dn_3_uv/dn_2_exp'
-    ,dn_total_uv              BIGINT    COMMENT 'D链合计UV'
+    ,dn_total_uv              BIGINT    COMMENT 'D链合计UV = dn_1_uv + dn_2_uv + dn_3_uv'
+    ,dn_total_pv              BIGINT    COMMENT 'D链合计PV = dn_1_pv + dn_2_pv + dn_3_pv'
+    ,dn_total_exp             BIGINT    COMMENT 'D链合计EXP = dn_1_exp + dn_2_exp + dn_3_exp'
     ,dn_total_ror             DOUBLE    COMMENT 'dn_total_uv/exposure_uv'
     ,dn_total_rov             DOUBLE    COMMENT 'dn_total_uv/d0'
     -- D链 depth拆分 hop1
@@ -250,10 +254,11 @@ CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_agg_wide_20260209 (
     ,d3_3_rov                 DOUBLE    COMMENT 'd3_3_uv/d2_3_exp'
 
     -- ==================== 全链路 ====================
-    ,all_return_n_uv          BIGINT    COMMENT '全链路拉回UV = B + C + D'
-    ,all_rovn                 DOUBLE    COMMENT 'all_return_n_uv/exposure_cnt'
-    ,all_ror                  DOUBLE    COMMENT 'all_return_n_uv/exposure_uv'
-    ,all_rov                  DOUBLE    COMMENT 'all_return_n_uv/exposure_cnt'
+    ,all_uv                   BIGINT    COMMENT '全链路拉回UV = bn_uv + cn_total_uv + dn_total_uv'
+    ,all_pv                   BIGINT    COMMENT '全链路拉回PV = bn_pv + cn_total_pv + dn_total_pv'
+    ,all_exp                  BIGINT    COMMENT '全链路拉回EXP = bn_exp + cn_total_exp + dn_total_exp'
+    ,all_ror                  DOUBLE    COMMENT 'all_uv/exposure_uv'
+    ,all_rov                  DOUBLE    COMMENT 'all_uv/exposure_cnt'
 )
 COMMENT '曝光回流链路CUBE聚合-宽表版 (5维度: 用户分层/小时段/进入品类TOP1/推荐品类TOP10/内容idTOP1)'
 ;
@@ -487,7 +492,9 @@ SELECT
         ,SUM(CAST(cn_3_exp AS BIGINT))                                                  AS cn_3_exp
         ,ROUND(COALESCE(SUM(CAST(cn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS cn_3_ror
         ,ROUND(COALESCE(SUM(CAST(cn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS cn_3_rov
-        ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))  AS cn_total_uv
+        ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))    AS cn_total_uv
+        ,SUM(CAST(cn_1_pv AS BIGINT)) + SUM(CAST(cn_2_pv AS BIGINT)) + SUM(CAST(cn_3_pv AS BIGINT))    AS cn_total_pv
+        ,SUM(CAST(cn_1_exp AS BIGINT)) + SUM(CAST(cn_2_exp AS BIGINT)) + SUM(CAST(cn_3_exp AS BIGINT))  AS cn_total_exp
         ,ROUND(COALESCE(
             (SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)))
             / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0)
@@ -564,7 +571,9 @@ SELECT
         ,SUM(CAST(dn_3_exp AS BIGINT))                                                  AS dn_3_exp
         ,ROUND(COALESCE(SUM(CAST(dn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS dn_3_ror
         ,ROUND(COALESCE(SUM(CAST(dn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS dn_3_rov
-        ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))  AS dn_total_uv
+        ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))    AS dn_total_uv
+        ,SUM(CAST(dn_1_pv AS BIGINT)) + SUM(CAST(dn_2_pv AS BIGINT)) + SUM(CAST(dn_3_pv AS BIGINT))    AS dn_total_pv
+        ,SUM(CAST(dn_1_exp AS BIGINT)) + SUM(CAST(dn_2_exp AS BIGINT)) + SUM(CAST(dn_3_exp AS BIGINT))  AS dn_total_exp
         ,ROUND(COALESCE(
             (SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)))
             / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0)
@@ -625,28 +634,30 @@ SELECT
         ,ROUND(COALESCE(SUM(CAST(d3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d3_3_rov
 
         -- ==================== 全链路 ====================
-        ,SUM(CAST(return_n_uv_noself AS BIGINT))
+        ,SUM(CAST(bn_uv AS BIGINT))
             + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
             + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
-                                                                                        AS all_return_n_uv
+                                                                                        AS all_uv
+        ,SUM(CAST(bn_pv AS BIGINT))
+            + SUM(CAST(cn_1_pv AS BIGINT)) + SUM(CAST(cn_2_pv AS BIGINT)) + SUM(CAST(cn_3_pv AS BIGINT))
+            + SUM(CAST(dn_1_pv AS BIGINT)) + SUM(CAST(dn_2_pv AS BIGINT)) + SUM(CAST(dn_3_pv AS BIGINT))
+                                                                                        AS all_pv
+        ,SUM(CAST(bn_exp AS BIGINT))
+            + SUM(CAST(cn_1_exp AS BIGINT)) + SUM(CAST(cn_2_exp AS BIGINT)) + SUM(CAST(cn_3_exp AS BIGINT))
+            + SUM(CAST(dn_1_exp AS BIGINT)) + SUM(CAST(dn_2_exp AS BIGINT)) + SUM(CAST(dn_3_exp AS BIGINT))
+                                                                                        AS all_exp
         ,ROUND(COALESCE(
-            (   SUM(CAST(return_n_uv_noself AS BIGINT))
+            (   SUM(CAST(bn_uv AS BIGINT))
               + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
               + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
-            ) / NULLIF(COUNT(1), 0)
-        , 0), 6)                                                                        AS all_rovn
+            ) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0)
+        , 0), 6)                                                                        AS all_ror
         ,ROUND(COALESCE(
-            (   SUM(CAST(return_n_uv_noself AS BIGINT))
+            (   SUM(CAST(bn_uv AS BIGINT))
               + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
               + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
             ) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0)
         , 0), 6)                                                                        AS all_rov
-        ,ROUND(COALESCE(
-            (   SUM(CAST(return_n_uv_noself AS BIGINT))
-              + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
-              + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
-            ) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0)
-        , 0), 6)                                                                        AS all_ror
 
 
 -- =====================================================================