Просмотр исходного кода

feat(尾号实验): 添加 base_v3/v4 单尾号聚合 & abcode 过滤

- v3(Android)/v4(iOS) 拆为 t_bucket → t_metrics 两层聚合,消除跨尾号去重偏差
- t_dau2 同样拆为 t_dau2_bucket → AVG
- v4 useractive_log 新增 ab_test003 过滤,与曝光表人群口径一致
- v4 排除 ab5/ab6/ab7

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
yangxiaohui 1 неделя назад
Родитель
Сommit
49a387dc48

+ 7 - 0
tasks/00_尾号实验/base_v3.json

@@ -0,0 +1,7 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "RV6DCd",
+  "sort": "dt:desc",
+  "cols": null,
+  "filter": "abcode!=other,abcode!=6,abcode!=e,abcode!=f"
+}

+ 177 - 0
tasks/00_尾号实验/base_v3.sql

@@ -0,0 +1,177 @@
+WITH t_abmap AS
+(
+    SELECT "0" AS suffix, "实验组:ros损失函数优化" AS abcode
+    UNION ALL SELECT "5", "实验组:ros损失函数优化"
+    UNION ALL SELECT "f", "实验组:ros损失函数优化"
+    UNION ALL SELECT "4", "实验组:c1_rovn & 去掉vor实验"
+    UNION ALL SELECT "6", "实验组:c1_rovn & 去掉vor实验"
+    UNION ALL SELECT "7", "实验组:c1_rovn & 去掉vor实验"
+    UNION ALL SELECT "8", "实验组:c1_rovn"
+    UNION ALL SELECT "9", "实验组:c1_rovn"
+    UNION ALL SELECT "e", "实验组:c1_rovn"
+    UNION ALL SELECT "a", "对照组"
+    UNION ALL SELECT "b", "对照组"
+    UNION ALL SELECT "c", "对照组"
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,COALESCE(m.abcode,"other") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,mid
+                        ,vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        -- ,abcode as abcode_origin
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+                WHERE   dt="${dt}"
+                AND     apptype IN ("4")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "4"
+    AND     sub.suffix = m.suffix
+)
+-- dau2:按单尾号聚合
+,t_dau2_bucket AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"other") AS abcode
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                AND     apptype IN ("4")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "4"
+    AND     sub.suffix = m.suffix
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"other")
+             ,sub.suffix
+)
+-- dau2:按实验组求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,AVG(dau2) AS dau2
+    FROM    t_dau2_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+)
+-- 按单尾号聚合(尾号内 UV 去重)
+,t_bucket AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
+            ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
+            ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
+            ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS rovn
+            ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+)
+-- 按实验组求尾号均值
+,t_metrics AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,ROUND(AVG(exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(str_one),6) AS str_one
+            ,ROUND(AVG(ros_one),6) AS ros_one
+            ,ROUND(AVG(str),6) AS str
+            ,ROUND(AVG(ros),6) AS ros
+            ,ROUND(AVG(str_plus),6) AS str_plus
+            ,ROUND(AVG(ros_minus),6) AS ros_minus
+            ,ROUND(AVG(rovn),6) AS rovn
+            ,ROUND(AVG(vovh24),6) AS vovh24
+            ,AVG(dau) AS dau
+            ,AVG(exp) AS exp
+            ,AVG(is_share) AS is_share
+            ,AVG(share_cnt) AS share_cnt
+            ,AVG(is_return_1) AS is_return_1
+            ,AVG(return_n_uv) AS return_n_uv
+            ,AVG(viewh24) AS viewh24
+            ,AVG(return_n_uv_noself) AS return_n_uv_noself
+            ,WM_CONCAT(DISTINCT ',',suffix) AS suffix
+    FROM    t_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+)
+SELECT  a.dt
+        ,a.apptype
+        ,a.abcode
+        ,a.suffix
+        ,a.exp_per_dau
+        ,a.str_one
+        ,a.ros_one
+        ,a.str
+        ,a.ros
+        ,a.str_plus
+        ,a.ros_minus
+        ,a.rovn
+        ,a.vovh24
+        ,a.dau
+        ,a.exp
+        ,a.is_share
+        ,a.share_cnt
+        ,a.is_return_1
+        ,a.return_n_uv
+        ,a.viewh24
+        ,a.return_n_uv_noself
+        ,b.dau2
+FROM    t_metrics a
+LEFT JOIN t_dau2 b
+ON      a.dt = b.dt
+AND     a.apptype = b.apptype
+AND     a.abcode = b.abcode
+ORDER BY a.dt DESC,a.apptype,a.abcode
+;

+ 7 - 0
tasks/00_尾号实验/base_v4.json

@@ -0,0 +1,7 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "NFPs3X",
+  "sort": "dt:desc",
+  "cols": null,
+  "filter": "abcode!=other,abcode!=6,abcode!=e,abcode!=f"
+}

+ 175 - 0
tasks/00_尾号实验/base_v4.sql

@@ -0,0 +1,175 @@
+WITH t_abmap AS
+(
+    SELECT "3" AS suffix, "实验组:ros损失函数优化" AS abcode
+    UNION ALL SELECT "4", "实验组:c1_rovn & 去掉vor实验"
+    UNION ALL SELECT "5", "实验组:c1_rovn"
+    UNION ALL SELECT "a", "对照组"
+    UNION ALL SELECT "b", "对照组"
+    UNION ALL SELECT "c", "对照组"
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,COALESCE(m.abcode,"other") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,mid
+                        ,vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        -- ,abcode as abcode_origin
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+                WHERE   dt="${dt}"
+                AND     apptype IN ("0")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "0"
+    AND     sub.suffix = m.suffix
+)
+-- dau2:按单尾号聚合
+,t_dau2_bucket AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"other") AS abcode
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                -- FROM    loghubods.useractive_log_per5min
+                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("0")
+                AND     GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') IN ("ab0","ab1","ab2","ab3","ab4","ab8","ab9")
+                AND     GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "0"
+    AND     sub.suffix = m.suffix
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"other")
+             ,sub.suffix
+)
+-- dau2:按实验组求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,AVG(dau2) AS dau2
+    FROM    t_dau2_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+)
+-- 按单尾号聚合(尾号内 UV 去重)
+,t_bucket AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
+            ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
+            ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
+            ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS rovn
+            ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+)
+-- 按实验组求尾号均值
+,t_metrics AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,ROUND(AVG(exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(str_one),6) AS str_one
+            ,ROUND(AVG(ros_one),6) AS ros_one
+            ,ROUND(AVG(str),6) AS str
+            ,ROUND(AVG(ros),6) AS ros
+            ,ROUND(AVG(str_plus),6) AS str_plus
+            ,ROUND(AVG(ros_minus),6) AS ros_minus
+            ,ROUND(AVG(rovn),6) AS rovn
+            ,ROUND(AVG(vovh24),6) AS vovh24
+            ,AVG(dau) AS dau
+            ,AVG(exp) AS exp
+            ,AVG(is_share) AS is_share
+            ,AVG(share_cnt) AS share_cnt
+            ,AVG(is_return_1) AS is_return_1
+            ,AVG(return_n_uv) AS return_n_uv
+            ,AVG(viewh24) AS viewh24
+            ,AVG(return_n_uv_noself) AS return_n_uv_noself
+            ,WM_CONCAT(DISTINCT ',',suffix) AS suffix
+    FROM    t_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+)
+SELECT  a.dt
+        ,a.apptype
+        ,a.abcode
+        ,a.suffix
+        ,a.exp_per_dau
+        ,a.str_one
+        ,a.ros_one
+        ,a.str
+        ,a.ros
+        ,a.str_plus
+        ,a.ros_minus
+        ,a.rovn
+        ,a.vovh24
+        ,a.dau
+        ,a.exp
+        ,a.is_share
+        ,a.share_cnt
+        ,a.is_return_1
+        ,a.return_n_uv
+        ,a.viewh24
+        ,a.return_n_uv_noself
+        ,b.dau2
+FROM    t_metrics a
+LEFT JOIN t_dau2 b
+ON      a.dt = b.dt
+AND     a.apptype = b.apptype
+AND     a.abcode = b.abcode
+ORDER BY a.dt DESC,a.apptype,a.abcode
+;