Просмотр исходного кода

feat(table_gen): 曝光基础表增加 D 链(session内后续曝光传播) d1/d2/d3 字段

新增 6 列 d_1~d_3 及对应 mids,D 链通过同 subsession 内后续曝光的
BFS 去环传播计算,与已有 B/C 多跳列对齐。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
yangxiaohui 4 недель назад
Родитель
Сommit
118f0cb240
1 измененных файлов с 103 добавлено и 1 удалено
  1. 103 1
      table_gen/loghubods.dwd_recsys_alg_exposure_base_20260206.sql

+ 103 - 1
table_gen/loghubods.dwd_recsys_alg_exposure_base_20260206.sql

@@ -54,10 +54,16 @@ CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_base_20260206
     ,c_1                       STRING COMMENT '1跳回流SUM(B)'
     ,c_2                       STRING COMMENT '2跳回流SUM(B)'
     ,c_3                       STRING COMMENT '3跳回流SUM(B)'
+    ,d_1                       STRING COMMENT 'D链1跳: 同subsession后续曝光的B之和'
+    ,d_2                       STRING COMMENT 'D链2跳: d1回流用户session内曝光的B之和'
+    ,d_3                       STRING COMMENT 'D链3跳: d2回流用户session内曝光的B之和(去环)'
     ,b_mids                    STRING COMMENT 'B对应的回流mid列表'
     ,c_1_mids                  STRING COMMENT 'C_1对应的回流mid列表'
     ,c_2_mids                  STRING COMMENT 'C_2对应的回流mid列表'
     ,c_3_mids                  STRING COMMENT 'C_3对应的回流mid列表'
+    ,d_1_mids                  STRING COMMENT 'D链1跳对应的回流mid列表'
+    ,d_2_mids                  STRING COMMENT 'D链2跳对应的回流mid列表'
+    ,d_3_mids                  STRING COMMENT 'D链3跳对应的回流mid列表'
     ,extend                    STRING
 )
 PARTITIONED BY
@@ -66,7 +72,7 @@ PARTITIONED BY
     ,hh                        STRING COMMENT '小时:04'
 )
 STORED AS ALIORC
-TBLPROPERTIES ('comment' = '推荐算法-labelmatch表-20260206更新-含多跳B/C')
+TBLPROPERTIES ('comment' = '推荐算法-labelmatch表-20260206更新-含多跳B/C/D')
 LIFECYCLE 3650
 ;
 
@@ -744,6 +750,84 @@ WITH t_return AS
     GROUP BY f.source_id
 )
 --========================================
+-- D 链: session 内后续曝光传播 (BFS 去环)
+--========================================
+,t_d1 AS (
+    SELECT  e1.id AS exposure_id
+            ,SUM(bn2.B) AS D_1
+    FROM    t_exposure e1
+    JOIN    t_exposure e2
+    ON      e1.subsessionid = e2.subsessionid
+    AND     CAST(e2.ts AS BIGINT) > CAST(e1.ts AS BIGINT)
+    JOIN    t_exposure_bn bn2
+    ON      e2.id = bn2.exposure_id
+    GROUP BY e1.id
+)
+,t_d1_mids AS (
+    SELECT  e1.id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS D_1_mids
+    FROM    t_exposure e1
+    JOIN    t_exposure e2
+    ON      e1.subsessionid = e2.subsessionid
+    AND     CAST(e2.ts AS BIGINT) > CAST(e1.ts AS BIGINT)
+    JOIN    t_share_return sr
+    ON      e2.id = sr.exposure_id
+    GROUP BY e1.id
+)
+,t_d1_frontier AS (
+    SELECT DISTINCT e1.id AS source_id
+           ,sr.return_subsessionid AS reached_sub
+    FROM    t_exposure e1
+    JOIN    t_exposure e2
+    ON      e1.subsessionid = e2.subsessionid
+    AND     CAST(e2.ts AS BIGINT) > CAST(e1.ts AS BIGINT)
+    JOIN    t_share_return sr
+    ON      e2.id = sr.exposure_id
+)
+,t_d2 AS (
+    SELECT  f.source_id AS exposure_id, SUM(bn.B) AS D_2
+    FROM    t_d1_frontier f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    GROUP BY f.source_id
+)
+,t_d2_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS D_2_mids
+    FROM    t_d1_frontier f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    JOIN    t_share_return sr ON bn.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+,t_d2_frontier AS (
+    SELECT DISTINCT f1.source_id, sr2.return_subsessionid AS reached_sub
+    FROM    t_d1_frontier f1
+    JOIN    t_exposure e1 ON f1.reached_sub = e1.subsessionid
+    JOIN    t_exposure_bn bn1 ON e1.id = bn1.exposure_id
+    JOIN    t_share_return sr2 ON bn1.exposure_id = sr2.exposure_id
+    LEFT JOIN t_d1_frontier v1
+        ON  f1.source_id = v1.source_id
+        AND sr2.return_subsessionid = v1.reached_sub
+    WHERE   v1.source_id IS NULL
+)
+,t_d3 AS (
+    SELECT  f.source_id AS exposure_id, SUM(bn.B) AS D_3
+    FROM    t_d2_frontier f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    GROUP BY f.source_id
+)
+,t_d3_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS D_3_mids
+    FROM    t_d2_frontier f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    JOIN    t_share_return sr ON bn.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+--========================================
 -- 以下为原有 CTE 继续
 --========================================
 ,t_share_with_label AS
@@ -879,10 +963,16 @@ WITH t_return AS
             ,COALESCE(c1_hop.C_1, 0) AS c_1
             ,COALESCE(c2_hop.C_2, 0) AS c_2
             ,COALESCE(c3_hop.C_3, 0) AS c_3
+            ,COALESCE(d1_hop.D_1, 0) AS d_1
+            ,COALESCE(d2_hop.D_2, 0) AS d_2
+            ,COALESCE(d3_hop.D_3, 0) AS d_3
             ,CONCAT_WS(',', bn_hop.B_mids) AS b_mids
             ,CONCAT_WS(',', c1m_hop.C_1_mids) AS c_1_mids
             ,CONCAT_WS(',', c2m_hop.C_2_mids) AS c_2_mids
             ,CONCAT_WS(',', c3m_hop.C_3_mids) AS c_3_mids
+            ,CONCAT_WS(',', d1m_hop.D_1_mids) AS d_1_mids
+            ,CONCAT_WS(',', d2m_hop.D_2_mids) AS d_2_mids
+            ,CONCAT_WS(',', d3m_hop.D_3_mids) AS d_3_mids
             ,JSON_FORMAT(
                         JSON_OBJECT("animationSceneType",animationSceneType,"extParams",extParams,"rootsessionid",rootsessionid_new,"versioncode",versioncode,"group_name",tc.group_name)
             ) AS extend
@@ -907,6 +997,18 @@ WITH t_return AS
     ON      ta.id = c3_hop.exposure_id
     LEFT JOIN t_c3_mids c3m_hop
     ON      ta.id = c3m_hop.exposure_id
+    LEFT JOIN t_d1 d1_hop
+    ON      ta.id = d1_hop.exposure_id
+    LEFT JOIN t_d1_mids d1m_hop
+    ON      ta.id = d1m_hop.exposure_id
+    LEFT JOIN t_d2 d2_hop
+    ON      ta.id = d2_hop.exposure_id
+    LEFT JOIN t_d2_mids d2m_hop
+    ON      ta.id = d2m_hop.exposure_id
+    LEFT JOIN t_d3 d3_hop
+    ON      ta.id = d3_hop.exposure_id
+    LEFT JOIN t_d3_mids d3m_hop
+    ON      ta.id = d3m_hop.exposure_id
 )SELECT  *
 FROM    t_exposure_share_return
 ;