Просмотр исходного кода

feat(洞察): 添加 click shareid/rootshareid 来源分布查询

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
yangxiaohui 3 недель назад
Родитель
Сommit
9e47761fa5

+ 64 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_rootshareid来源分布.sql

@@ -0,0 +1,64 @@
+-- click 中 rootshareid 的来源时间分布
+-- 当天回流(click)用 rootshareid 追溯到源头分享(share),按"来自N天前"分组
+-- 与 shareid 版本的区别:rootshareid 追溯传播链顶端,看源头分享的时间跨度
+-- 分享侧占比:分母 = N天前那天的全部分享量
+-- 回流侧占比:分母 = 当天总回流量(SUM OVER 窗口函数)
+-- vid: share 用 shareobjectid,click 用 clickobjectid
+-- 使用: python fetch_daily.py "tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_rootshareid来源分布.sql" --date 20260210
+
+WITH clicks AS (
+    -- 当天 click 明细(保留行级,不聚合)
+    SELECT rootshareid, machinecode, clickobjectid
+    FROM loghubods.user_share_log
+    WHERE dt = '${dt}'
+      AND topic = 'click'
+      AND rootshareid IS NOT NULL AND rootshareid <> ''
+),
+share_info AS (
+    -- 回溯 90 天:每个 shareid 的首次分享日期 + 分享人 + 分享内容
+    SELECT
+        shareid,
+        MIN(dt)                  AS share_date,
+        MAX(machinecode)         AS share_mid,
+        MAX(shareobjectid)       AS share_vid
+    FROM loghubods.user_share_log
+    WHERE dt >= TO_CHAR(DATEADD(TO_DATE('${dt}', 'yyyyMMdd'), -90, 'dd'), 'yyyyMMdd')
+      AND dt <= '${dt}'
+      AND topic = 'share'
+      AND shareid IS NOT NULL AND shareid <> ''
+    GROUP BY shareid
+),
+daily_total AS (
+    -- 回溯 90 天:每天的总分享 cnt / uv / vid(分享侧占比的分母)
+    SELECT
+        dt                             AS share_date,
+        COUNT(DISTINCT shareid)        AS day_share_cnt,
+        COUNT(DISTINCT machinecode)    AS day_share_uv,
+        COUNT(DISTINCT shareobjectid)  AS day_share_vid
+    FROM loghubods.user_share_log
+    WHERE dt >= TO_CHAR(DATEADD(TO_DATE('${dt}', 'yyyyMMdd'), -90, 'dd'), 'yyyyMMdd')
+      AND dt <= '${dt}'
+      AND topic = 'share'
+      AND shareid IS NOT NULL AND shareid <> ''
+    GROUP BY dt
+)
+SELECT
+    '${dt}'                                                                                          AS dt,
+    DATEDIFF(TO_DATE('${dt}', 'yyyyMMdd'), TO_DATE(si.share_date, 'yyyyMMdd'), 'dd')                 AS `来自N天前的分享`,
+    COUNT(DISTINCT c.rootshareid)                                                                    AS `分享cnt`,
+    ROUND(COUNT(DISTINCT c.rootshareid) * 100.0 / tot.day_share_cnt, 2)                              AS `分享cnt占比`,
+    COUNT(DISTINCT si.share_mid)                                                                     AS `分享uv`,
+    ROUND(COUNT(DISTINCT si.share_mid) * 100.0 / tot.day_share_uv, 2)                                AS `分享uv占比`,
+    COUNT(DISTINCT si.share_vid)                                                                     AS `分享vid`,
+    ROUND(COUNT(DISTINCT si.share_vid) * 100.0 / tot.day_share_vid, 2)                               AS `分享vid占比`,
+    COUNT(*)                                                                                         AS `回流cnt`,
+    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(), 2)                                                AS `回流cnt占比`,
+    COUNT(DISTINCT c.machinecode)                                                                    AS `回流uv`,
+    ROUND(COUNT(DISTINCT c.machinecode) * 100.0 / SUM(COUNT(DISTINCT c.machinecode)) OVER(), 2)      AS `回流uv占比`,
+    COUNT(DISTINCT c.clickobjectid)                                                                  AS `回流vid`,
+    ROUND(COUNT(DISTINCT c.clickobjectid) * 100.0 / SUM(COUNT(DISTINCT c.clickobjectid)) OVER(), 2) AS `回流vid占比`
+FROM clicks c
+LEFT JOIN share_info si ON c.rootshareid = si.shareid
+LEFT JOIN daily_total tot ON si.share_date = tot.share_date
+GROUP BY si.share_date, tot.day_share_cnt, tot.day_share_uv, tot.day_share_vid
+ORDER BY `来自N天前的分享`

+ 63 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_shareid来源分布.sql

@@ -0,0 +1,63 @@
+-- click 中 shareid 的来源时间分布
+-- 当天回流(click)追溯到之前的分享(share),按"来自N天前"分组
+-- 分享侧占比:分母 = N天前那天的全部分享量
+-- 回流侧占比:分母 = 当天总回流量(SUM OVER 窗口函数)
+-- vid: share 用 shareobjectid,click 用 clickobjectid
+-- 使用: python fetch_daily.py "tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_shareid来源分布.sql" --date 20260210
+
+WITH clicks AS (
+    -- 当天 click 明细(保留行级,不聚合)
+    SELECT shareid, machinecode, clickobjectid
+    FROM loghubods.user_share_log
+    WHERE dt = '${dt}'
+      AND topic = 'click'
+      AND shareid IS NOT NULL AND shareid <> ''
+),
+share_info AS (
+    -- 回溯 90 天:每个 shareid 的首次分享日期 + 分享人 + 分享内容
+    SELECT
+        shareid,
+        MIN(dt)              AS share_date,
+        MAX(machinecode)     AS share_mid,
+        MAX(shareobjectid)   AS share_vid
+    FROM loghubods.user_share_log
+    WHERE dt >= TO_CHAR(DATEADD(TO_DATE('${dt}', 'yyyyMMdd'), -90, 'dd'), 'yyyyMMdd')
+      AND dt <= '${dt}'
+      AND topic = 'share'
+      AND shareid IS NOT NULL AND shareid <> ''
+    GROUP BY shareid
+),
+daily_total AS (
+    -- 回溯 90 天:每天的总分享 cnt / uv / vid(分享侧占比的分母)
+    SELECT
+        dt                             AS share_date,
+        COUNT(DISTINCT shareid)        AS day_share_cnt,
+        COUNT(DISTINCT machinecode)    AS day_share_uv,
+        COUNT(DISTINCT shareobjectid)  AS day_share_vid
+    FROM loghubods.user_share_log
+    WHERE dt >= TO_CHAR(DATEADD(TO_DATE('${dt}', 'yyyyMMdd'), -90, 'dd'), 'yyyyMMdd')
+      AND dt <= '${dt}'
+      AND topic = 'share'
+      AND shareid IS NOT NULL AND shareid <> ''
+    GROUP BY dt
+)
+SELECT
+    '${dt}'                                                                                          AS dt,
+    DATEDIFF(TO_DATE('${dt}', 'yyyyMMdd'), TO_DATE(si.share_date, 'yyyyMMdd'), 'dd')                 AS `来自N天前的分享`,
+    COUNT(DISTINCT c.shareid)                                                                        AS `分享cnt`,
+    ROUND(COUNT(DISTINCT c.shareid) * 100.0 / tot.day_share_cnt, 2)                                  AS `分享cnt占比`,
+    COUNT(DISTINCT si.share_mid)                                                                     AS `分享uv`,
+    ROUND(COUNT(DISTINCT si.share_mid) * 100.0 / tot.day_share_uv, 2)                                AS `分享uv占比`,
+    COUNT(DISTINCT si.share_vid)                                                                     AS `分享vid`,
+    ROUND(COUNT(DISTINCT si.share_vid) * 100.0 / tot.day_share_vid, 2)                               AS `分享vid占比`,
+    COUNT(*)                                                                                         AS `回流cnt`,
+    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(), 2)                                                AS `回流cnt占比`,
+    COUNT(DISTINCT c.machinecode)                                                                    AS `回流uv`,
+    ROUND(COUNT(DISTINCT c.machinecode) * 100.0 / SUM(COUNT(DISTINCT c.machinecode)) OVER(), 2)      AS `回流uv占比`,
+    COUNT(DISTINCT c.clickobjectid)                                                                  AS `回流vid`,
+    ROUND(COUNT(DISTINCT c.clickobjectid) * 100.0 / SUM(COUNT(DISTINCT c.clickobjectid)) OVER(), 2) AS `回流vid占比`
+FROM clicks c
+LEFT JOIN share_info si ON c.shareid = si.shareid
+LEFT JOIN daily_total tot ON si.share_date = tot.share_date
+GROUP BY si.share_date, tot.day_share_cnt, tot.day_share_uv, tot.day_share_vid
+ORDER BY `来自N天前的分享`

+ 56 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_shareid来源分布_180d.sql

@@ -0,0 +1,56 @@
+-- click 中 shareid 的来源时间分布(180 天回溯)
+-- 当天回流(click)追溯到之前的分享(share),按"来自N天前"分组
+-- 分享侧占比:分母 = N天前那天的全部分享量
+-- 回流侧占比:分母 = 当天总回流量(SUM OVER 窗口函数)
+-- 使用: python fetch_daily.py "tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_shareid来源分布_180d.sql" --date 20260210
+
+WITH clicks AS (
+    -- 当天 click 明细(保留行级,不聚合)
+    SELECT shareid, machinecode
+    FROM loghubods.user_share_log
+    WHERE dt = '${dt}'
+      AND topic = 'click'
+      AND shareid IS NOT NULL AND shareid <> ''
+),
+share_info AS (
+    -- 回溯 180 天:每个 shareid 的首次分享日期 + 分享人
+    SELECT
+        shareid,
+        MIN(dt)              AS share_date,
+        MAX(machinecode)     AS share_mid
+    FROM loghubods.user_share_log
+    WHERE dt >= TO_CHAR(DATEADD(TO_DATE('${dt}', 'yyyyMMdd'), -180, 'dd'), 'yyyyMMdd')
+      AND dt <= '${dt}'
+      AND topic = 'share'
+      AND shareid IS NOT NULL AND shareid <> ''
+    GROUP BY shareid
+),
+daily_total AS (
+    -- 回溯 180 天:每天的总分享 cnt 和 uv(分享侧占比的分母)
+    SELECT
+        dt                             AS share_date,
+        COUNT(DISTINCT shareid)        AS day_share_cnt,
+        COUNT(DISTINCT machinecode)    AS day_share_uv
+    FROM loghubods.user_share_log
+    WHERE dt >= TO_CHAR(DATEADD(TO_DATE('${dt}', 'yyyyMMdd'), -180, 'dd'), 'yyyyMMdd')
+      AND dt <= '${dt}'
+      AND topic = 'share'
+      AND shareid IS NOT NULL AND shareid <> ''
+    GROUP BY dt
+)
+SELECT
+    '${dt}'                                                                                          AS dt,
+    DATEDIFF(TO_DATE('${dt}', 'yyyyMMdd'), TO_DATE(si.share_date, 'yyyyMMdd'), 'dd')                 AS `来自N天前的分享`,
+    COUNT(DISTINCT c.shareid)                                                                        AS `分享cnt`,
+    ROUND(COUNT(DISTINCT c.shareid) * 100.0 / tot.day_share_cnt, 2)                                  AS `分享cnt占比`,
+    COUNT(DISTINCT si.share_mid)                                                                     AS `分享uv`,
+    ROUND(COUNT(DISTINCT si.share_mid) * 100.0 / tot.day_share_uv, 2)                                AS `分享uv占比`,
+    COUNT(*)                                                                                         AS `回流cnt`,
+    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(), 2)                                                AS `回流cnt占比`,
+    COUNT(DISTINCT c.machinecode)                                                                    AS `回流uv`,
+    ROUND(COUNT(DISTINCT c.machinecode) * 100.0 / SUM(COUNT(DISTINCT c.machinecode)) OVER(), 2)      AS `回流uv占比`
+FROM clicks c
+LEFT JOIN share_info si ON c.shareid = si.shareid
+LEFT JOIN daily_total tot ON si.share_date = tot.share_date
+GROUP BY si.share_date, tot.day_share_cnt, tot.day_share_uv
+ORDER BY `来自N天前的分享`