Browse Source

feat: 添加用户行为表 loghubods.user_behavior 及尾号实验 SQL 更新

- 新增 table_gen/loghubods.user_behavior.sql 生产建表+写入
- 新增 tasks/00_表的开发/ 目录,含生产SQL副本和测试SQL
- 尾号实验 base_v1/v2/v3 更新实验组映射和时间范围

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
yangxiaohui 6 days ago
parent
commit
3dd300bfab

+ 28 - 0
table_gen/loghubods.user_behavior.sql

@@ -0,0 +1,28 @@
+CREATE TABLE IF NOT EXISTS loghubods.user_behavior
+(
+    app_type    STRING  COMMENT '应用类型',
+    mid         STRING  COMMENT '用户标识',
+    action_type STRING  COMMENT '行为类型: share=分享 / click=回流',
+    item_id     STRING  COMMENT '视频ID',
+    ts          BIGINT  COMMENT '事件时间戳(秒)',
+    scene_json  STRING  COMMENT '场景信息(预留)',
+    ext_json    STRING  COMMENT '扩展信息(预留)'
+)
+PARTITIONED BY (dt STRING)
+;
+
+INSERT OVERWRITE TABLE loghubods.user_behavior PARTITION (dt = '${dt}')
+SELECT  apptype                                     AS app_type
+       ,machinecode                                 AS mid
+       ,topic                                       AS action_type
+       ,CASE WHEN topic = 'share' THEN shareobjectid
+             WHEN topic = 'click' THEN clickobjectid
+        END                                         AS item_id
+       ,CAST(clienttimestamp / 1000 AS BIGINT)       AS ts
+       ,NULL                                        AS scene_json
+       ,NULL                                        AS ext_json
+FROM    loghubods.user_share_log
+WHERE   dt = '${dt}'
+AND     topic IN ('share', 'click')
+AND     machinecode IS NOT NULL
+;

+ 7 - 2
tasks/00_尾号实验/base_v1.sql

@@ -5,6 +5,11 @@ WITH t_abmap AS
     UNION ALL SELECT "3", "实验组:ros损失函数优化"
     UNION ALL SELECT "4", "实验组:c1_rovn & 去掉vor实验"
     UNION ALL SELECT "5", "实验组:b0_str & 去掉vor实验"
+    UNION ALL SELECT "6", "实验组:去掉vor实验"
+    UNION ALL SELECT "e", "实验组:c1_rovn"
+    UNION ALL SELECT "f", "实验组:b0_ror + c1_rovn + 去掉vor实验"
+    UNION ALL SELECT "a", "对照组"
+    UNION ALL SELECT "b", "对照组"
     UNION ALL SELECT "c", "对照组"
     UNION ALL SELECT "d", "对照组"
 )
@@ -34,7 +39,7 @@ WITH t_abmap AS
                         ,flowpool
                 FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
                 WHERE   dt ="${dt}"
-                -- and hh between "21" and "24"
+                and hh between "15" and "24"
                 AND     apptype IN ("4")
                 AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
                 AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
@@ -56,7 +61,7 @@ WITH t_abmap AS
                         ,machinecode
                         ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
                 FROM    loghubods.useractive_log_per5min
-                WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","239999")
+                WHERE   dt BETWEEN CONCAT("${dt}","150000") AND CONCAT("${dt}","239999")
                 AND     apptype IN ("4")
             ) sub
     LEFT JOIN t_abmap m

+ 12 - 10
tasks/00_尾号实验/base_v2.sql

@@ -1,13 +1,15 @@
 WITH t_abmap AS
 (
-    SELECT "1" AS suffix, "实验组:str校准 & ros天级更新" AS abcode
-    UNION ALL SELECT "2", "实验组:str校准 & ros统计量"
-    UNION ALL SELECT "3", "实验组:ros损失函数优化"
-    UNION ALL SELECT "5", "实验组:b0_str & 去掉vor实验"
+    SELECT "0" AS suffix, "实验组:ros损失函数优化" AS abcode
+    UNION ALL SELECT "0", "实验组:ros损失函数优化"
+    UNION ALL SELECT "5", "实验组:ros损失函数优化"
+    UNION ALL SELECT "f", "实验组:ros损失函数优化"
     UNION ALL SELECT "4", "实验组:c1_rovn & 去掉vor实验"
-    UNION ALL SELECT "6", "实验组:去掉vor实验"
+    UNION ALL SELECT "6", "实验组:c1_rovn & 去掉vor实验"
+    UNION ALL SELECT "7", "实验组:c1_rovn & 去掉vor实验"
+    UNION ALL SELECT "8", "实验组:c1_rovn"
+    UNION ALL SELECT "9", "实验组:c1_rovn"
     UNION ALL SELECT "e", "实验组:c1_rovn"
-    UNION ALL SELECT "f", "实验组:b0_ror & c1_rovn & 去掉vor实验"
     UNION ALL SELECT "a", "对照组"
     UNION ALL SELECT "b", "对照组"
     UNION ALL SELECT "c", "对照组"
@@ -60,10 +62,10 @@ WITH t_abmap AS
                         ,apptype
                         ,machinecode
                         ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
-                FROM    loghubods.useractive_log
-                WHERE   dt="${dt}"
-                -- FROM    loghubods.useractive_log_per5min
-                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                -- FROM    loghubods.useractive_log
+                -- WHERE   dt="${dt}"
+                FROM    loghubods.useractive_log_per5min
+                WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
                 AND     apptype IN ("4")
             ) sub
     LEFT JOIN t_abmap m

+ 2 - 0
tasks/00_尾号实验/base_v3.sql

@@ -64,6 +64,8 @@ WITH t_abmap AS
                         ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
                 FROM    loghubods.useractive_log
                 WHERE   dt="${dt}"
+                -- FROM    loghubods.useractive_log_per5min
+                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
                 AND     apptype IN ("4")
             ) sub
     LEFT JOIN t_abmap m

+ 31 - 0
tasks/00_表的开发/loghubods.user_behavior/00_生产.sql

@@ -0,0 +1,31 @@
+CREATE TABLE IF NOT EXISTS loghubods.user_behavior
+(
+    apptype     STRING  COMMENT '应用类型',
+    mid         STRING  COMMENT '用户标识',
+    action_type STRING  COMMENT '行为类型: 分享/回流',
+    item_id     STRING  COMMENT '视频ID',
+    ts          BIGINT  COMMENT '事件时间戳(秒)',
+    scene_json  STRING  COMMENT '场景信息(预留)',
+    ext_json    STRING  COMMENT '扩展信息(预留)'
+)
+PARTITIONED BY (dt STRING)
+;
+
+INSERT OVERWRITE TABLE loghubods.user_behavior PARTITION (dt = '${dt}')
+SELECT  apptype
+       ,machinecode                                 AS mid
+       ,CASE WHEN topic = 'share' THEN '分享'
+             WHEN topic = 'click' THEN '回流'
+        END                                         AS action_type
+       ,CASE WHEN topic = 'share' THEN shareobjectid
+             WHEN topic = 'click' THEN clickobjectid
+        END                                         AS item_id
+       ,CAST(clienttimestamp / 1000 AS BIGINT)       AS ts
+       ,NULL                                        AS scene_json
+       ,NULL                                        AS ext_json
+FROM    loghubods.user_share_log
+WHERE   dt = '${dt}'
+AND     topic IN ('share', 'click')
+AND     (machinecode LIKE 'weixin_openid_%'
+         OR machinecode RLIKE '^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$')
+;

+ 21 - 0
tasks/00_表的开发/loghubods.user_behavior/01_基本数据.sql

@@ -0,0 +1,21 @@
+-- user_behavior 测试:验证 SELECT 逻辑
+-- 使用: python fetch_daily.py tasks/00_表的开发/loghubods.user_behavior/01_基本数据.sql --date 20260302
+
+SELECT  apptype
+       ,machinecode                                 AS mid
+       ,CASE WHEN topic = 'share' THEN '分享'
+             WHEN topic = 'click' THEN '回流'
+        END                                         AS action_type
+       ,CASE WHEN topic = 'share' THEN shareobjectid
+             WHEN topic = 'click' THEN clickobjectid
+        END                                         AS item_id
+       ,CAST(clienttimestamp / 1000 AS BIGINT)       AS ts
+       ,CAST(NULL AS STRING)                         AS scene_json
+       ,CAST(NULL AS STRING)                         AS ext_json
+FROM    loghubods.user_share_log
+WHERE   dt = '${dt}'
+AND     topic IN ('share', 'click')
+AND     (machinecode LIKE 'weixin_openid_%'
+         OR machinecode RLIKE '^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$')
+LIMIT   100
+;