Просмотр исходного кода

feat(user_relation): 新增 root_mid 字段(从 rootshareid 提取根分享者)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
yangxiaohui 6 дней назад
Родитель
Сommit
6c0ebba322

+ 37 - 0
tasks/00_表的开发/loghubods.user_relation/00_生产.sql

@@ -0,0 +1,37 @@
+-- DROP TABLE IF EXISTS loghubods.user_relation;
+
+CREATE TABLE IF NOT EXISTS loghubods.user_relation
+(
+    apptype   STRING  COMMENT '应用类型',
+    from_mid  STRING  COMMENT '分享者标识',
+    to_mid    STRING  COMMENT '点击者标识',
+    item_id   STRING  COMMENT '视频ID',
+    ts        BIGINT  COMMENT '点击时间戳(秒)',
+    root_mid  STRING  COMMENT '根分享者标识',
+    ext_json  STRING  COMMENT '扩展信息'
+)
+PARTITIONED BY (dt STRING)
+;
+
+INSERT OVERWRITE TABLE loghubods.user_relation PARTITION (dt = '${dt}')
+SELECT  apptype
+       ,from_mid
+       ,machinecode                                                          AS to_mid
+       ,clickobjectid                                                        AS item_id
+       ,CAST(clienttimestamp / 1000 AS BIGINT)                               AS ts
+       ,CASE WHEN root_mid LIKE 'weixin_openid_%'
+                  OR root_mid RLIKE '^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$'
+             THEN root_mid
+        END                                                                 AS root_mid
+       ,CONCAT('{"depth":', CAST(usersharedepth AS BIGINT), '}')             AS ext_json
+FROM (
+    SELECT  apptype, shareid, rootshareid, machinecode, clickobjectid, clienttimestamp, usersharedepth
+           ,REGEXP_REPLACE(shareid, '(-[0-9a-f]{4})?-[0-9]{13}[0-9]*$', '') AS from_mid
+           ,REGEXP_REPLACE(rootshareid, '(-[0-9a-f]{4})?-[0-9]{13}[0-9]*$', '') AS root_mid
+    FROM    loghubods.user_share_log
+    WHERE   dt = '${dt}'
+    AND     topic = 'click'
+) t
+WHERE   from_mid LIKE 'weixin_openid_%'
+OR      from_mid RLIKE '^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$'
+;

+ 25 - 0
tasks/00_表的开发/loghubods.user_relation/01_基本数据.sql

@@ -0,0 +1,25 @@
+-- user_relation 测试:验证 SELECT 逻辑
+-- 使用: python fetch_daily.py tasks/00_表的开发/loghubods.user_relation/01_基本数据.sql --date 20260302
+
+SELECT  apptype
+       ,from_mid
+       ,machinecode                                                          AS to_mid
+       ,clickobjectid                                                        AS item_id
+       ,CAST(clienttimestamp / 1000 AS BIGINT)                               AS ts
+       ,CASE WHEN root_mid LIKE 'weixin_openid_%'
+                  OR root_mid RLIKE '^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$'
+             THEN root_mid
+        END                                                                 AS root_mid
+       ,CONCAT('{"depth":', CAST(usersharedepth AS BIGINT), '}')             AS ext_json
+FROM (
+    SELECT  apptype, shareid, rootshareid, machinecode, clickobjectid, clienttimestamp, usersharedepth
+           ,REGEXP_REPLACE(shareid, '(-[0-9a-f]{4})?-[0-9]{13}[0-9]*$', '') AS from_mid
+           ,REGEXP_REPLACE(rootshareid, '(-[0-9a-f]{4})?-[0-9]{13}[0-9]*$', '') AS root_mid
+    FROM    loghubods.user_share_log
+    WHERE   dt = '${dt}'
+    AND     topic = 'click'
+) t
+WHERE   from_mid LIKE 'weixin_openid_%'
+OR      from_mid RLIKE '^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$'
+LIMIT   100
+;