Explorar el Código

feat(洞察): 拆分 depth 分析为独立 SQL,新增 click top 视频查询

- 00_uv情况: 回退为原始两层(topic分组+自点/他点),移除 depth 块
- 02_click_depth分布: 独立文件,按用户 max depth 分桶(0~50逐值,>50归桶),含 uv/cnt/vid 占比
- 03_click_top视频: 视频粒度 top100 by UV,含 title/发布时间/avg_depth/max_depth
- 04_click_top视频_by_depth: 视频粒度 top100 by max_depth

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
yangxiaohui hace 3 semanas
padre
commit
478c942e1b

+ 21 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/00_uv情况.sql

@@ -0,0 +1,21 @@
+
+-- 按 topic 分组 + 合计
+SELECT dt, COALESCE(topic, '合计') as topic,
+       count(distinct machinecode) as uv, sum(1) as cnt
+FROM loghubods.user_share_log
+WHERE dt = '${dt}'
+GROUP BY dt, topic
+GROUPING SETS ((dt, topic), (dt))
+
+UNION ALL
+
+-- click 再分: from_mid 与 target_mid 是否相同
+SELECT dt,
+       CASE WHEN split(shareid, '-')[0] = machinecode
+            THEN 'click_自点' ELSE 'click_他点' END as topic,
+       count(distinct machinecode) as uv, sum(1) as cnt
+FROM loghubods.user_share_log
+WHERE dt = '${dt}' AND topic = 'click'
+GROUP BY dt,
+       CASE WHEN split(shareid, '-')[0] = machinecode
+            THEN 'click_自点' ELSE 'click_他点' END

+ 26 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/02_click_depth分布.sql

@@ -0,0 +1,26 @@
+-- click 按用户 max depth 分桶分布
+-- 每用户取当天最大 usersharedepth;0~50 逐个展示,>50 归一桶
+-- vid 为桶内去重 clickobjectid
+-- 使用: python fetch_daily.py .../02_click_depth分布.sql --date 20260210
+
+SELECT dt, depth_bucket,
+       uv,  ROUND(uv  * 100.0 / SUM(uv)  OVER(PARTITION BY dt), 2) as uv_pct,
+       cnt, ROUND(cnt * 100.0 / SUM(cnt) OVER(PARTITION BY dt), 2) as cnt_pct,
+       vid, ROUND(vid * 100.0 / SUM(vid) OVER(PARTITION BY dt), 2) as vid_pct,
+       actual_max
+FROM (
+    SELECT t.dt,
+           CASE WHEN max_depth <= 50 THEN max_depth ELSE 51 END as depth_bucket,
+           COUNT(DISTINCT machinecode) as uv,
+           COUNT(1) as cnt,
+           COUNT(DISTINCT clickobjectid) as vid,
+           MAX(max_depth) as actual_max
+    FROM (
+        SELECT dt, machinecode, clickobjectid,
+               MAX(CAST(usersharedepth AS BIGINT)) OVER(PARTITION BY dt, machinecode) as max_depth
+        FROM loghubods.user_share_log
+        WHERE dt = '${dt}' AND topic = 'click'
+    ) t
+    GROUP BY t.dt, CASE WHEN max_depth <= 50 THEN max_depth ELSE 51 END
+) agg
+ORDER BY depth_bucket

+ 27 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/03_click_top视频.sql

@@ -0,0 +1,27 @@
+-- click 视频粒度聚合 top100(按 uv 降序)+ 视频标题、发布时间
+-- 使用: python fetch_daily.py .../03_click_top视频.sql --date 20260210
+
+SELECT t.dt, t.clickobjectid,
+       v.title,
+       TO_CHAR(FROM_UNIXTIME(v.publish_ts / 1000), 'yyyy-MM-dd HH:mm:ss') as publish_time,
+       DATEDIFF(TO_DATE(t.dt, 'yyyyMMdd'), TO_DATE(TO_CHAR(FROM_UNIXTIME(v.publish_ts / 1000), 'yyyyMMdd'), 'yyyyMMdd'), 'dd') as days_since_pub,
+       t.uv,  ROUND(t.uv  * 100.0 / SUM(t.uv)  OVER(PARTITION BY t.dt), 2) as uv_pct,
+       t.cnt, ROUND(t.cnt * 100.0 / SUM(t.cnt) OVER(PARTITION BY t.dt), 2) as cnt_pct,
+       t.max_depth, t.avg_depth
+FROM (
+    SELECT dt, clickobjectid,
+           COUNT(DISTINCT machinecode) as uv,
+           COUNT(1) as cnt,
+           MAX(CAST(usersharedepth AS BIGINT)) as max_depth,
+           ROUND(AVG(CAST(usersharedepth AS BIGINT)), 2) as avg_depth,
+           ROW_NUMBER() OVER(PARTITION BY dt ORDER BY COUNT(DISTINCT machinecode) DESC) as rn
+    FROM loghubods.user_share_log
+    WHERE dt = '${dt}' AND topic = 'click'
+    GROUP BY dt, clickobjectid
+) t
+LEFT JOIN (
+    SELECT id as vid, title, gmt_create_timestamp as publish_ts
+    FROM videoods.wx_video
+) v ON t.clickobjectid = v.vid
+WHERE t.rn <= 100
+ORDER BY t.uv DESC

+ 27 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/04_click_top视频_by_depth.sql

@@ -0,0 +1,27 @@
+-- click 视频粒度聚合 top100(按 max_depth 降序)+ 视频标题、发布时间
+-- 使用: python fetch_daily.py .../04_click_top视频_by_depth.sql --date 20260210
+
+SELECT t.dt, t.clickobjectid,
+       v.title,
+       TO_CHAR(FROM_UNIXTIME(v.publish_ts / 1000), 'yyyy-MM-dd HH:mm:ss') as publish_time,
+       DATEDIFF(TO_DATE(t.dt, 'yyyyMMdd'), TO_DATE(TO_CHAR(FROM_UNIXTIME(v.publish_ts / 1000), 'yyyyMMdd'), 'yyyyMMdd'), 'dd') as days_since_pub,
+       t.uv,  ROUND(t.uv  * 100.0 / SUM(t.uv)  OVER(PARTITION BY t.dt), 2) as uv_pct,
+       t.cnt, ROUND(t.cnt * 100.0 / SUM(t.cnt) OVER(PARTITION BY t.dt), 2) as cnt_pct,
+       t.max_depth, t.avg_depth
+FROM (
+    SELECT dt, clickobjectid,
+           COUNT(DISTINCT machinecode) as uv,
+           COUNT(1) as cnt,
+           MAX(CAST(usersharedepth AS BIGINT)) as max_depth,
+           ROUND(AVG(CAST(usersharedepth AS BIGINT)), 2) as avg_depth,
+           ROW_NUMBER() OVER(PARTITION BY dt ORDER BY MAX(CAST(usersharedepth AS BIGINT)) DESC) as rn
+    FROM loghubods.user_share_log
+    WHERE dt = '${dt}' AND topic = 'click'
+    GROUP BY dt, clickobjectid
+) t
+LEFT JOIN (
+    SELECT id as vid, title, gmt_create_timestamp as publish_ts
+    FROM videoods.wx_video
+) v ON t.clickobjectid = v.vid
+WHERE t.rn <= 100
+ORDER BY t.max_depth DESC