Bläddra i källkod

feat: 更新尾号实验 base_v3/v4 系列 SQL,新增 dim/v2/v3 变体,优化 fetch_daily

- 更新 base_v3/v4/v4_v1 的 SQL 和 JSON 配置
- 新增 base_v3_dim、base_v3_dim_v2、base_v3_v3 维度分析 SQL
- 新增 base_v4_v2、base_v4_v3 实验变体
- 删除 base_v4 copy.sql 冗余文件
- 优化 fetch_daily.py 逻辑

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
yangxiaohui 1 dag sedan
förälder
incheckning
894686e968

+ 61 - 9
fetch_daily.py

@@ -163,6 +163,7 @@ def load_feishu_config(sql_file):
         "cols": None,
         "filter": None,
         "limit": None,
+        "append_cols": False,
     }
 
     root_dir = Path(__file__).parent
@@ -261,7 +262,7 @@ def column_index_to_letter(col_idx):
     return result
 
 
-def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc", cols_spec=None, filter_spec=None, limit=None):
+def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc", cols_spec=None, filter_spec=None, limit=None, append_cols=False):
     """上传 CSV 文件到飞书表格(通过模板行继承样式)
 
     第1行: 表头
@@ -276,6 +277,7 @@ def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc",
         cols_spec: 列映射规格,如 "dt:日期,name,value:数值"
         filter_spec: 过滤条件,dict {"字段": "值"} 或 str "字段=值,字段=值"
         limit: 上传行数上限
+        append_cols: 是否将飞书中没有的新列追加到右侧(默认 False 忽略)
     """
     from feishu import Client, LARK_HOST, APP_ID, APP_SECRET, request
 
@@ -395,7 +397,7 @@ def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc",
         print(f"飞书表头: {feishu_cols_str}")
         print(f"CSV表头: {header}")
 
-        # 校验字段一致性(警告但继续,以飞书表头为准)
+        # 校验字段一致性
         feishu_set = set(feishu_cols_str)
         csv_set = set(header)
 
@@ -405,14 +407,25 @@ def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc",
         if missing_in_csv:
             print(f"警告: CSV缺少字段(将填空值): {missing_in_csv}")
         if missing_in_feishu:
-            print(f"警告: 飞书缺少字段(将忽略): {missing_in_feishu}")
+            if append_cols:
+                print(f"新增列(将追加到右侧): {missing_in_feishu}")
+            else:
+                print(f"警告: 飞书缺少字段(将忽略): {missing_in_feishu}")
+
+        # 确定最终列顺序:飞书已有列 + (可选) CSV新增列
+        final_col_names = list(feishu_cols_str)
+        append_col_names = []
+        if append_cols and missing_in_feishu:
+            # 按 CSV 中的原始顺序追加新列
+            append_col_names = [c for c in header if c in missing_in_feishu]
+            final_col_names.extend(append_col_names)
 
-        # 按飞书表头顺序重排数据(用纯文本版本做匹配)
+        # 按最终列顺序重排数据
         csv_col_index = {name: i for i, name in enumerate(header)}
         new_converted_rows = []
         for row in converted_rows:
             new_row = []
-            for col_name in feishu_cols_str:
+            for col_name in final_col_names:
                 if col_name in csv_col_index:
                     new_row.append(row[csv_col_index[col_name]])
                 else:
@@ -420,7 +433,45 @@ def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc",
             new_converted_rows.append(new_row)
 
         converted_rows = new_converted_rows
-        header = feishu_cols
+
+        # 写入新增列的表头到飞书
+        if append_col_names:
+            # 先扩展列数
+            add_cols = len(append_col_names)
+            expand_headers = {
+                'Content-Type': 'application/json; charset=utf-8',
+                'Authorization': f'Bearer {access_token}'
+            }
+            expand_payload = {
+                "dimension": {
+                    "sheetId": sheet_id,
+                    "majorDimension": "COLUMNS",
+                    "length": add_cols
+                }
+            }
+            try:
+                request("POST", f"{LARK_HOST}/open-apis/sheets/v2/spreadsheets/{sheet_token}/dimension_range",
+                        expand_headers, expand_payload)
+                current_cols += add_cols
+                print(f"扩展列数: +{add_cols}列(追加新字段)")
+            except Exception as e:
+                print(f"  扩展列数失败: {e}")
+
+            # 写入新列表头
+            start_col_idx = len(feishu_cols_str) + 1
+            start_col = column_index_to_letter(start_col_idx)
+            end_col = column_index_to_letter(start_col_idx + add_cols - 1)
+            append_range = f"{sheet_id}!{start_col}1:{end_col}1"
+            client.batch_update_values(access_token, sheet_token, {
+                "valueRanges": [{"range": append_range, "values": [append_col_names]}]
+            })
+            print(f"已写入新列表头: {append_col_names}")
+
+            # header 使用飞书原始表头 + 新增列名
+            header = list(feishu_cols) + append_col_names
+        else:
+            header = feishu_cols
+
         print(f"已按飞书表头顺序重排数据")
     else:
         # 飞书表头为空,用 CSV 表头写入(飞书单次最多写100列,需分批)
@@ -682,6 +733,7 @@ def main():
         args.filter = feishu_config["filter"]
     if args.limit is None:
         args.limit = feishu_config["limit"]
+    append_cols = feishu_config.get("append_cols", False)
 
     # 打印飞书配置
     if args.feishu:
@@ -703,7 +755,7 @@ def main():
             merged_file = merge_csv_files(daily_dir)
             # 如果指定了飞书上传
             if args.feishu and merged_file:
-                upload_to_feishu(merged_file, args.feishu, args.sheet_id, args.sort, args.cols, args.filter, args.limit)
+                upload_to_feishu(merged_file, args.feishu, args.sheet_id, args.sort, args.cols, args.filter, args.limit, append_cols)
         else:
             print("没有可合并的数据")
         return
@@ -767,7 +819,7 @@ def main():
             print(f"数据目录: {output_file}")
             # 如果指定了飞书上传
             if args.feishu and output_file.exists():
-                upload_to_feishu(output_file, args.feishu, args.sheet_id, args.sort, args.cols, args.filter, args.limit)
+                upload_to_feishu(output_file, args.feishu, args.sheet_id, args.sort, args.cols, args.filter, args.limit, append_cols)
         except Exception as e:
             print(f"✗ 执行失败: {e}")
         return
@@ -807,7 +859,7 @@ def main():
     if args.feishu:
         merged_file = merge_csv_files(daily_dir)
         if merged_file:
-            upload_to_feishu(merged_file, args.feishu, args.sheet_id, args.sort, args.cols, args.filter, args.limit)
+            upload_to_feishu(merged_file, args.feishu, args.sheet_id, args.sort, args.cols, args.filter, args.limit, append_cols)
 
 
 if __name__ == "__main__":

+ 1 - 1
tasks/00_尾号实验/base_v3.json

@@ -3,5 +3,5 @@
   "sheet_id": "RV6DCd",
   "sort": "dt:desc",
   "cols": null,
-  "filter": "abcode!=other,abcode!=6,abcode!=e,abcode!=f"
+  "filter": "abcode!=前基线,abcode!=6,abcode!=e,abcode!=f"
 }

+ 6 - 3
tasks/00_尾号实验/base_v3.sql

@@ -1,14 +1,17 @@
 WITH t_abmap AS
 (
-    SELECT "c" AS suffix, "前基线" AS abcode
+    SELECT "a" AS suffix, "前基线" AS abcode
+    UNION ALL SELECT "b", "前基线"
     UNION ALL SELECT "e", "实验组:解构特征排序str模型&召回"
     UNION ALL SELECT "f", "实验组:解构特征排序str模型&召回"
     UNION ALL SELECT "5", "实验组:解构特征排序str模型"
     UNION ALL SELECT "d", "实验组:解构特征排序str模型"
     UNION ALL SELECT "6", "实验组:bn_ros新损失函数"
     UNION ALL SELECT "7", "实验组:bn_ros新损失函数"
-    UNION ALL SELECT "a", "对照组"
-    UNION ALL SELECT "b", "对照组"
+    UNION ALL SELECT "8", "对照组"
+    UNION ALL SELECT "0", "实验组:建模目标实验"
+    UNION ALL SELECT "1", "实验组:建模目标实验"
+    UNION ALL SELECT "9", "对照组"
 )
 ,t_base AS
 (

+ 7 - 0
tasks/00_尾号实验/base_v3_dim.json

@@ -0,0 +1,7 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "bjf5jW",
+  "sort": "dt:desc",
+  "cols": null,
+  "filter": "abcode!=other,abcode!=6,abcode!=e,abcode!=f"
+}

+ 383 - 0
tasks/00_尾号实验/base_v3_dim.sql

@@ -0,0 +1,383 @@
+WITH t_abmap AS
+(
+    SELECT "c" AS suffix, "前基线" AS abcode
+    UNION ALL SELECT "e", "实验组:解构特征排序str模型&召回"
+    UNION ALL SELECT "f", "实验组:解构特征排序str模型&召回"
+    UNION ALL SELECT "5", "实验组:解构特征排序str模型"
+    UNION ALL SELECT "d", "实验组:解构特征排序str模型"
+    UNION ALL SELECT "6", "实验组:bn_ros新损失函数"
+    UNION ALL SELECT "7", "实验组:bn_ros新损失函数"
+    UNION ALL SELECT "a", "对照组"
+    UNION ALL SELECT "b", "对照组"
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,COALESCE(m.abcode,"other") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,hh
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,a.mid
+                        ,a.vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        ,cc.cn
+                        ,cc.c1
+                        ,dd.dn
+                        ,dd.d1
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108 a
+                LEFT JOIN   (
+                                -- c1/cn:分享后被点击的回流 UV
+                                SELECT  a.machinecode AS mid
+                                        ,a.subsessionid
+                                        ,a.videoid AS vid
+                                        ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
+                                        ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS c1
+                                FROM    (
+                                            SELECT  DISTINCT machinecode
+                                                    ,shareobjectid AS videoid
+                                                    ,recomTraceId
+                                                    ,subsessionid
+                                                    ,sharedepth
+                                                    ,shareid
+                                            FROM    loghubods.user_share_log
+                                            WHERE   dt = '${dt}'
+                                            AND     topic = 'share'
+                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                        ) a
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b
+                                ON      a.shareid = b.rootshareid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,shareobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'share'
+                                                AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                            ) b1
+                                ON      b.machinecode = b1.machinecode
+                                AND     b.subsessionid = b1.subsessionid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b2
+                                ON      b1.shareid = b2.rootshareid
+                                GROUP BY a.machinecode
+                                         ,a.subsessionid
+                                         ,a.videoid
+                            ) cc
+                ON      a.mid = cc.mid
+                AND     a.subsessionid = cc.subsessionid
+                AND     a.vid = cc.vid
+                LEFT JOIN   (
+                                -- d1/dn:下一条视频带来的回流
+                                SELECT  *
+                                        ,LAG(回流,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS dn
+                                        ,LAG(回流1,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS d1
+                                FROM    (
+                                            SELECT  a.mid AS mid
+                                                    ,a.subsessionid
+                                                    ,a.videoid AS vid
+                                                    ,COUNT(DISTINCT b.shareid) AS 分享次数
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END) AS 回流
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END) AS 回流1
+                                                    ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp ASC) AS rn
+                                            FROM    (
+                                                        SELECT  *
+                                                        FROM    (
+                                                                    SELECT  DISTINCT mid
+                                                                            ,subsessionid
+                                                                            ,videoid
+                                                                            ,logtimestamp
+                                                                            ,ROW_NUMBER() OVER (PARTITION BY mid,subsessionid,videoid ORDER BY logtimestamp ASC) AS rn
+                                                                    FROM    loghubods.video_action_log_rp
+                                                                    WHERE   dt = '${dt}'
+                                                                    AND     businesstype = 'videoView'
+                                                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                                )
+                                                        WHERE   rn = 1
+                                                    ) a
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,shareobjectid AS videoid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,shareid
+                                                                    ,clienttimestamp
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'share'
+                                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                        ) b
+                                            ON      a.mid = b.machinecode
+                                            AND     a.subsessionid = b.subsessionid
+                                            AND     a.videoid = b.videoid
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,clickobjectid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,rootshareid
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'click'
+                                                        ) c
+                                            ON      b.shareid = c.rootshareid
+                                            GROUP BY a.mid
+                                                     ,a.subsessionid
+                                                     ,a.videoid
+                                                     ,a.logtimestamp
+                                        )
+                            ) dd
+                ON      a.mid = dd.mid
+                AND     a.subsessionid = dd.subsessionid
+                AND     a.vid = dd.vid
+                WHERE   dt="${dt}"
+                AND     apptype IN ("4")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "4"
+    AND     sub.suffix = m.suffix
+)
+-- top10 视频排名(跨尾号统计曝光量排序)
+,t_vid_rank AS
+(
+    SELECT * FROM (
+        SELECT  dt, hh, apptype, abcode, vid
+                ,ROW_NUMBER() OVER (PARTITION BY dt, hh, apptype, abcode ORDER BY cnt DESC) AS vid_rank
+        FROM (
+            SELECT  dt, hh, apptype, abcode, vid, COUNT(1) AS cnt
+            FROM    t_base
+            WHERE   page = "推荐" AND abcode != "other"
+            GROUP BY dt, hh, apptype, abcode, vid
+        )
+    ) WHERE vid_rank <= 10
+)
+-- 视频标题
+,t_vid_title AS
+(
+    SELECT  CAST(id AS STRING) AS vid
+            ,title
+    FROM    videoods.wx_video
+    WHERE   id IN (SELECT DISTINCT CAST(vid AS BIGINT) FROM t_vid_rank)
+)
+-- dau2:按单尾号聚合
+,t_dau2_bucket AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"other") AS abcode
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log_per5min
+                WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("4")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "4"
+    AND     sub.suffix = m.suffix
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"other")
+             ,sub.suffix
+)
+-- dau2:按实验组求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,AVG(dau2) AS dau2
+    FROM    t_dau2_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+)
+-- 按单尾号聚合(INNER JOIN t_vid_rank 只留 top10 视频)
+,t_bucket AS
+(
+    SELECT  b.dt
+            ,b.hh
+            ,b.apptype
+            ,b.abcode
+            ,b.suffix
+            ,b.vid
+            ,COALESCE(v.title,'') AS title
+            ,r.vid_rank
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT b.mid),0) AS exp_per_dau
+            ,COALESCE(SUM(b.is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(b.return_n_uv) / SUM(b.is_share),0) AS ros_one
+            ,COALESCE(SUM(b.share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(b.return_n_uv) / SUM(b.share_cnt),0) AS ros
+            ,COALESCE(SUM(b.is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(b.return_n_uv) / SUM(b.is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(b.return_n_uv) / COUNT(1),0) AS bn_rov
+            ,COALESCE(SUM(b.c1) / COUNT(1),0) AS c1_rov
+            ,COALESCE(SUM(b.cn) / COUNT(1),0) AS cn_rov
+            ,COALESCE(SUM(b.d1) / COUNT(1),0) AS d1_rov
+            ,COALESCE(SUM(b.dn) / COUNT(1),0) AS dn_rov
+            ,COALESCE(SUM(b.new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT b.mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(b.is_share),0) AS is_share
+            ,COALESCE(SUM(b.share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(b.is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(b.return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(b.new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(b.return_n_uv_noself),0) AS return_n_uv_noself
+            ,COALESCE(SUM(b.cn),0) AS cn
+            ,COALESCE(SUM(b.c1),0) AS c1
+            ,COALESCE(SUM(b.dn),0) AS dn
+            ,COALESCE(SUM(b.d1),0) AS d1
+    FROM    t_base b
+    INNER JOIN t_vid_rank r
+    ON      b.dt = r.dt
+    AND     b.hh = r.hh
+    AND     b.apptype = r.apptype
+    AND     b.abcode = r.abcode
+    AND     b.vid = r.vid
+    LEFT JOIN t_vid_title v
+    ON      b.vid = v.vid
+    WHERE   b.page = "推荐"
+    AND     b.abcode != "other"
+    GROUP BY b.dt
+             ,b.hh
+             ,b.apptype
+             ,b.abcode
+             ,b.suffix
+             ,b.vid
+             ,v.title
+             ,r.vid_rank
+)
+-- 按实验组求尾号均值
+,t_metrics AS
+(
+    SELECT  dt
+            ,hh
+            ,apptype
+            ,abcode
+            ,vid
+            ,title
+            ,vid_rank
+            ,ROUND(AVG(exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(str_one),6) AS str_one
+            ,ROUND(AVG(ros_one),6) AS ros_one
+            ,ROUND(AVG(str),6) AS str
+            ,ROUND(AVG(ros),6) AS ros
+            ,ROUND(AVG(str_plus),6) AS str_plus
+            ,ROUND(AVG(ros_minus),6) AS ros_minus
+            ,ROUND(AVG(bn_rov),6) AS bn_rov
+            ,ROUND(AVG(c1_rov),6) AS c1_rov
+            ,ROUND(AVG(cn_rov),6) AS cn_rov
+            ,ROUND(AVG(d1_rov),6) AS d1_rov
+            ,ROUND(AVG(dn_rov),6) AS dn_rov
+            ,ROUND(AVG(vovh24),6) AS vovh24
+            ,AVG(dau) AS dau
+            ,AVG(exp) AS exp
+            ,AVG(is_share) AS is_share
+            ,AVG(share_cnt) AS share_cnt
+            ,AVG(is_return_1) AS is_return_1
+            ,AVG(return_n_uv) AS return_n_uv
+            ,AVG(viewh24) AS viewh24
+            ,AVG(return_n_uv_noself) AS return_n_uv_noself
+            ,AVG(cn) AS cn
+            ,AVG(c1) AS c1
+            ,AVG(dn) AS dn
+            ,AVG(d1) AS d1
+            ,WM_CONCAT(DISTINCT ',',suffix) AS suffix
+    FROM    t_bucket
+    GROUP BY dt
+             ,hh
+             ,apptype
+             ,abcode
+             ,vid
+             ,title
+             ,vid_rank
+)
+SELECT  a.dt
+        ,a.hh
+        ,a.apptype
+        ,a.abcode
+        ,a.vid
+        ,a.title
+        ,a.vid_rank
+        ,a.suffix
+        ,a.exp_per_dau
+        ,a.str_one
+        ,a.ros_one
+        ,a.str
+        ,a.ros
+        ,a.str_plus
+        ,a.ros_minus
+        ,a.bn_rov
+        ,a.c1_rov
+        ,a.cn_rov
+        ,a.d1_rov
+        ,a.dn_rov
+        ,a.vovh24
+        ,a.dau
+        ,a.exp
+        ,a.is_share
+        ,a.share_cnt
+        ,a.is_return_1
+        ,a.return_n_uv
+        ,a.viewh24
+        ,a.return_n_uv_noself
+        ,a.cn
+        ,a.c1
+        ,a.dn
+        ,a.d1
+        ,b.dau2
+FROM    t_metrics a
+LEFT JOIN t_dau2 b
+ON      a.dt = b.dt
+AND     a.apptype = b.apptype
+AND     a.abcode = b.abcode
+ORDER BY a.dt DESC,a.hh,a.apptype,a.abcode,a.vid_rank
+;

+ 430 - 0
tasks/00_尾号实验/base_v3_dim_v2.sql

@@ -0,0 +1,430 @@
+WITH t_abmap AS
+(
+    -- apptype = "4"
+    SELECT "4" AS apptype, "c" AS suffix, "前基线" AS abcode
+    UNION ALL SELECT "4", "e", "实验组:解构特征排序str模型&召回"
+    UNION ALL SELECT "4", "f", "实验组:解构特征排序str模型&召回"
+    UNION ALL SELECT "4", "5", "实验组:解构特征排序str模型"
+    UNION ALL SELECT "4", "d", "实验组:解构特征排序str模型"
+    UNION ALL SELECT "4", "6", "实验组:bn_ros新损失函数"
+    UNION ALL SELECT "4", "7", "实验组:bn_ros新损失函数"
+    UNION ALL SELECT "4", "a", "对照组"
+    UNION ALL SELECT "4", "b", "对照组"
+    -- apptype = "0"
+    UNION ALL SELECT "0", "c", "前基线"
+    UNION ALL SELECT "0", "e", "实验组:解构特征排序str模型&召回"
+    UNION ALL SELECT "0", "f", "实验组:解构特征排序str模型&召回"
+    UNION ALL SELECT "0", "5", "实验组:解构特征排序str模型"
+    UNION ALL SELECT "0", "d", "实验组:解构特征排序str模型"
+    UNION ALL SELECT "0", "3", "实验组:bn_ros新损失函数"
+    UNION ALL SELECT "0", "4", "实验组:bn_ros新损失函数"
+    UNION ALL SELECT "0", "6", "实验组:cn_rov"
+    UNION ALL SELECT "0", "7", "实验组:cn_rov"
+    UNION ALL SELECT "0", "a", "对照组"
+    UNION ALL SELECT "0", "b", "对照组"
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,COALESCE(m.abcode,"other") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,hh
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,a.mid
+                        ,a.vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        ,cc.cn
+                        ,cc.c1
+                        ,dd.dn
+                        ,dd.d1
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108 a
+                LEFT JOIN   (
+                                -- c1/cn:分享后被点击的回流 UV
+                                SELECT  a.machinecode AS mid
+                                        ,a.subsessionid
+                                        ,a.videoid AS vid
+                                        ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
+                                        ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS c1
+                                FROM    (
+                                            SELECT  DISTINCT machinecode
+                                                    ,shareobjectid AS videoid
+                                                    ,recomTraceId
+                                                    ,subsessionid
+                                                    ,sharedepth
+                                                    ,shareid
+                                            FROM    loghubods.user_share_log
+                                            WHERE   dt = '${dt}'
+                                            AND     topic = 'share'
+                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                        ) a
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b
+                                ON      a.shareid = b.rootshareid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,shareobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'share'
+                                                AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                            ) b1
+                                ON      b.machinecode = b1.machinecode
+                                AND     b.subsessionid = b1.subsessionid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b2
+                                ON      b1.shareid = b2.rootshareid
+                                GROUP BY a.machinecode
+                                         ,a.subsessionid
+                                         ,a.videoid
+                            ) cc
+                ON      a.mid = cc.mid
+                AND     a.subsessionid = cc.subsessionid
+                AND     a.vid = cc.vid
+                LEFT JOIN   (
+                                -- d1/dn:下一条视频带来的回流
+                                SELECT  *
+                                        ,LAG(回流,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS dn
+                                        ,LAG(回流1,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS d1
+                                FROM    (
+                                            SELECT  a.mid AS mid
+                                                    ,a.subsessionid
+                                                    ,a.videoid AS vid
+                                                    ,COUNT(DISTINCT b.shareid) AS 分享次数
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END) AS 回流
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END) AS 回流1
+                                                    ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp ASC) AS rn
+                                            FROM    (
+                                                        SELECT  *
+                                                        FROM    (
+                                                                    SELECT  DISTINCT mid
+                                                                            ,subsessionid
+                                                                            ,videoid
+                                                                            ,logtimestamp
+                                                                            ,ROW_NUMBER() OVER (PARTITION BY mid,subsessionid,videoid ORDER BY logtimestamp ASC) AS rn
+                                                                    FROM    loghubods.video_action_log_rp
+                                                                    WHERE   dt = '${dt}'
+                                                                    AND     businesstype = 'videoView'
+                                                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                                )
+                                                        WHERE   rn = 1
+                                                    ) a
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,shareobjectid AS videoid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,shareid
+                                                                    ,clienttimestamp
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'share'
+                                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                        ) b
+                                            ON      a.mid = b.machinecode
+                                            AND     a.subsessionid = b.subsessionid
+                                            AND     a.videoid = b.videoid
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,clickobjectid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,rootshareid
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'click'
+                                                        ) c
+                                            ON      b.shareid = c.rootshareid
+                                            GROUP BY a.mid
+                                                     ,a.subsessionid
+                                                     ,a.videoid
+                                                     ,a.logtimestamp
+                                        )
+                            ) dd
+                ON      a.mid = dd.mid
+                AND     a.subsessionid = dd.subsessionid
+                AND     a.vid = dd.vid
+                WHERE   dt="${dt}"
+                AND     apptype IN ("0","4")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     (
+                            (apptype = "4" AND abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9"))
+                         OR (apptype = "0" AND abcode IN ("ab0","ab1","ab2","ab3","ab4","ab8","ab9"))
+                        )
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = m.apptype
+    AND     sub.suffix = m.suffix
+)
+-- top10 视频排名(跨尾号统计曝光量排序,按 page 分开排)
+,t_vid_rank AS
+(
+    SELECT * FROM (
+        SELECT  dt, hh, apptype, abcode, page, vid
+                ,ROW_NUMBER() OVER (PARTITION BY dt, hh, apptype, abcode, page ORDER BY cnt DESC) AS vid_rank
+        FROM (
+            SELECT  dt, hh, apptype, abcode, page, vid, COUNT(1) AS cnt
+            FROM    t_base
+            WHERE   abcode != "other"
+            GROUP BY dt, hh, apptype, abcode, page, vid
+        )
+    ) WHERE vid_rank <= 10
+)
+-- 视频标题
+,t_vid_title AS
+(
+    SELECT  CAST(id AS STRING) AS vid
+            ,title
+    FROM    videoods.wx_video
+    WHERE   id IN (SELECT DISTINCT CAST(vid AS BIGINT) FROM t_vid_rank)
+)
+-- dau2:按单尾号聚合(UNION ALL 合并两个 apptype)
+,t_dau2_bucket AS
+(
+    -- apptype = "4":useractive_log_per5min
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"other") AS abcode
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log_per5min
+                WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("4")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = m.apptype
+    AND     sub.suffix = m.suffix
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"other")
+             ,sub.suffix
+    UNION ALL
+    -- apptype = "0":useractive_log + ab_test003 过滤
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"other") AS abcode
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                AND     apptype IN ("0")
+                AND     GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                AND     GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = m.apptype
+    AND     sub.suffix = m.suffix
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"other")
+             ,sub.suffix
+)
+-- dau2:按实验组求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,AVG(dau2) AS dau2
+    FROM    t_dau2_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+)
+-- 按单尾号聚合(INNER JOIN t_vid_rank 只留 top10 视频,按 page 分组)
+,t_bucket AS
+(
+    SELECT  b.dt
+            ,b.hh
+            ,b.apptype
+            ,b.abcode
+            ,b.page
+            ,b.suffix
+            ,b.vid
+            ,COALESCE(v.title,'') AS title
+            ,r.vid_rank
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT b.mid),0) AS exp_per_dau
+            ,COALESCE(SUM(b.is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(b.return_n_uv) / SUM(b.is_share),0) AS ros_one
+            ,COALESCE(SUM(b.share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(b.return_n_uv) / SUM(b.share_cnt),0) AS ros
+            ,COALESCE(SUM(b.is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(b.return_n_uv) / SUM(b.is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(b.return_n_uv) / COUNT(1),0) AS bn_rov
+            ,COALESCE(SUM(b.c1) / COUNT(1),0) AS c1_rov
+            ,COALESCE(SUM(b.cn) / COUNT(1),0) AS cn_rov
+            ,COALESCE(SUM(b.d1) / COUNT(1),0) AS d1_rov
+            ,COALESCE(SUM(b.dn) / COUNT(1),0) AS dn_rov
+            ,COALESCE(SUM(b.new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT b.mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(b.is_share),0) AS is_share
+            ,COALESCE(SUM(b.share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(b.is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(b.return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(b.new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(b.return_n_uv_noself),0) AS return_n_uv_noself
+            ,COALESCE(SUM(b.cn),0) AS cn
+            ,COALESCE(SUM(b.c1),0) AS c1
+            ,COALESCE(SUM(b.dn),0) AS dn
+            ,COALESCE(SUM(b.d1),0) AS d1
+    FROM    t_base b
+    INNER JOIN t_vid_rank r
+    ON      b.dt = r.dt
+    AND     b.hh = r.hh
+    AND     b.apptype = r.apptype
+    AND     b.abcode = r.abcode
+    AND     b.page = r.page
+    AND     b.vid = r.vid
+    LEFT JOIN t_vid_title v
+    ON      b.vid = v.vid
+    WHERE   b.abcode != "other"
+    GROUP BY b.dt
+             ,b.hh
+             ,b.apptype
+             ,b.abcode
+             ,b.page
+             ,b.suffix
+             ,b.vid
+             ,v.title
+             ,r.vid_rank
+)
+-- 按实验组求尾号均值
+,t_metrics AS
+(
+    SELECT  dt
+            ,hh
+            ,apptype
+            ,abcode
+            ,page
+            ,vid
+            ,title
+            ,vid_rank
+            ,ROUND(AVG(exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(str_one),6) AS str_one
+            ,ROUND(AVG(ros_one),6) AS ros_one
+            ,ROUND(AVG(str),6) AS str
+            ,ROUND(AVG(ros),6) AS ros
+            ,ROUND(AVG(str_plus),6) AS str_plus
+            ,ROUND(AVG(ros_minus),6) AS ros_minus
+            ,ROUND(AVG(bn_rov),6) AS bn_rov
+            ,ROUND(AVG(c1_rov),6) AS c1_rov
+            ,ROUND(AVG(cn_rov),6) AS cn_rov
+            ,ROUND(AVG(d1_rov),6) AS d1_rov
+            ,ROUND(AVG(dn_rov),6) AS dn_rov
+            ,ROUND(AVG(vovh24),6) AS vovh24
+            ,AVG(dau) AS dau
+            ,AVG(exp) AS exp
+            ,AVG(is_share) AS is_share
+            ,AVG(share_cnt) AS share_cnt
+            ,AVG(is_return_1) AS is_return_1
+            ,AVG(return_n_uv) AS return_n_uv
+            ,AVG(viewh24) AS viewh24
+            ,AVG(return_n_uv_noself) AS return_n_uv_noself
+            ,AVG(cn) AS cn
+            ,AVG(c1) AS c1
+            ,AVG(dn) AS dn
+            ,AVG(d1) AS d1
+            ,WM_CONCAT(DISTINCT ',',suffix) AS suffix
+    FROM    t_bucket
+    GROUP BY dt
+             ,hh
+             ,apptype
+             ,abcode
+             ,page
+             ,vid
+             ,title
+             ,vid_rank
+)
+SELECT  a.dt
+        ,a.hh
+        ,a.apptype
+        ,a.abcode
+        ,a.page
+        ,a.vid
+        ,a.title
+        ,a.vid_rank
+        ,a.suffix
+        ,a.exp_per_dau
+        ,a.str_one
+        ,a.ros_one
+        ,a.str
+        ,a.ros
+        ,a.str_plus
+        ,a.ros_minus
+        ,a.bn_rov
+        ,a.c1_rov
+        ,a.cn_rov
+        ,a.d1_rov
+        ,a.dn_rov
+        ,a.vovh24
+        ,a.dau
+        ,a.exp
+        ,a.is_share
+        ,a.share_cnt
+        ,a.is_return_1
+        ,a.return_n_uv
+        ,a.viewh24
+        ,a.return_n_uv_noself
+        ,a.cn
+        ,a.c1
+        ,a.dn
+        ,a.d1
+        ,b.dau2
+FROM    t_metrics a
+LEFT JOIN t_dau2 b
+ON      a.dt = b.dt
+AND     a.apptype = b.apptype
+AND     a.abcode = b.abcode
+ORDER BY a.dt DESC,a.hh,a.apptype,a.abcode,a.page,a.vid_rank
+;

+ 394 - 0
tasks/00_尾号实验/base_v3_v3.sql

@@ -0,0 +1,394 @@
+WITH t_abmap AS
+(
+    SELECT "c" AS suffix, "前基线" AS abcode
+    UNION ALL SELECT "e", "实验组:解构特征排序str模型&召回"
+    UNION ALL SELECT "f", "实验组:解构特征排序str模型&召回"
+    UNION ALL SELECT "5", "实验组:解构特征排序str模型"
+    UNION ALL SELECT "d", "实验组:解构特征排序str模型"
+    UNION ALL SELECT "6", "实验组:bn_ros新损失函数"
+    UNION ALL SELECT "7", "实验组:bn_ros新损失函数"
+    UNION ALL SELECT "a", "对照组"
+    UNION ALL SELECT "b", "对照组"
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,COALESCE(m.abcode,"other") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,page AS page_raw
+                        ,a.mid
+                        ,a.vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        ,cc.cn
+                        ,cc.c1
+                        ,dd.dn
+                        ,dd.d1
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108 a
+                LEFT JOIN   (
+                                -- c1/cn:分享后被点击的回流 UV
+                                SELECT  a.machinecode AS mid
+                                        ,a.subsessionid
+                                        ,a.videoid AS vid
+                                        ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
+                                        ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS c1
+                                FROM    (
+                                            SELECT  DISTINCT machinecode
+                                                    ,shareobjectid AS videoid
+                                                    ,recomTraceId
+                                                    ,subsessionid
+                                                    ,sharedepth
+                                                    ,shareid
+                                            FROM    loghubods.user_share_log
+                                            WHERE   dt = '${dt}'
+                                            AND     topic = 'share'
+                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                        ) a
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b
+                                ON      a.shareid = b.rootshareid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,shareobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'share'
+                                                AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                            ) b1
+                                ON      b.machinecode = b1.machinecode
+                                AND     b.subsessionid = b1.subsessionid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b2
+                                ON      b1.shareid = b2.rootshareid
+                                GROUP BY a.machinecode
+                                         ,a.subsessionid
+                                         ,a.videoid
+                            ) cc
+                ON      a.mid = cc.mid
+                AND     a.subsessionid = cc.subsessionid
+                AND     a.vid = cc.vid
+                LEFT JOIN   (
+                                -- d1/dn:下一条视频带来的回流
+                                SELECT  *
+                                        ,LAG(回流,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS dn
+                                        ,LAG(回流1,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS d1
+                                FROM    (
+                                            SELECT  a.mid AS mid
+                                                    ,a.subsessionid
+                                                    ,a.videoid AS vid
+                                                    ,COUNT(DISTINCT b.shareid) AS 分享次数
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END) AS 回流
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END) AS 回流1
+                                                    ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp ASC) AS rn
+                                            FROM    (
+                                                        SELECT  *
+                                                        FROM    (
+                                                                    SELECT  DISTINCT mid
+                                                                            ,subsessionid
+                                                                            ,videoid
+                                                                            ,logtimestamp
+                                                                            ,ROW_NUMBER() OVER (PARTITION BY mid,subsessionid,videoid ORDER BY logtimestamp ASC) AS rn
+                                                                    FROM    loghubods.video_action_log_rp
+                                                                    WHERE   dt = '${dt}'
+                                                                    AND     businesstype = 'videoView'
+                                                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                                )
+                                                        WHERE   rn = 1
+                                                    ) a
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,shareobjectid AS videoid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,shareid
+                                                                    ,clienttimestamp
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'share'
+                                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                        ) b
+                                            ON      a.mid = b.machinecode
+                                            AND     a.subsessionid = b.subsessionid
+                                            AND     a.videoid = b.videoid
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,clickobjectid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,rootshareid
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'click'
+                                                        ) c
+                                            ON      b.shareid = c.rootshareid
+                                            GROUP BY a.mid
+                                                     ,a.subsessionid
+                                                     ,a.videoid
+                                                     ,a.logtimestamp
+                                        )
+                            ) dd
+                ON      a.mid = dd.mid
+                AND     a.subsessionid = dd.subsessionid
+                AND     a.vid = dd.vid
+                WHERE   dt="${dt}"
+                AND     apptype IN ("4")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "4"
+    AND     sub.suffix = m.suffix
+)
+-- dau2(总 + 内外部×首层):按单尾号聚合
+,t_dau2_bucket AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"other") AS abcode
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+            ,COUNT(DISTINCT CASE WHEN sub.source_type = "内部" THEN sub.machinecode END) AS dau2_inner
+            ,COUNT(DISTINCT CASE WHEN sub.source_type = "内部" AND sub.is_first_layer = 1 THEN sub.machinecode END) AS dau2_inner_first
+            ,COUNT(DISTINCT CASE WHEN sub.source_type = "内部" AND sub.is_first_layer = 0 THEN sub.machinecode END) AS dau2_inner_nonfirst
+            ,COUNT(DISTINCT CASE WHEN sub.source_type = "外部" THEN sub.machinecode END) AS dau2_outer
+            ,COUNT(DISTINCT CASE WHEN sub.source_type = "外部" AND sub.is_first_layer = 1 THEN sub.machinecode END) AS dau2_outer_first
+            ,COUNT(DISTINCT CASE WHEN sub.source_type = "外部" AND sub.is_first_layer = 0 THEN sub.machinecode END) AS dau2_outer_nonfirst
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,subsessionid
+                        ,sessionid
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                        ,CASE   WHEN GET_JSON_OBJECT(extparams,'$.rootSourceId') != '' AND GET_JSON_OBJECT(extparams,'$.rootSourceId') IS NOT NULL THEN "外部"
+                                ELSE "内部"
+                        END AS source_type
+                        ,CASE   WHEN GET_JSON_OBJECT(extparams,'$.rootSessionId') = subsessionid
+                                  OR GET_JSON_OBJECT(extparams,'$.rootSessionId') = sessionid THEN 1
+                                ELSE 0
+                        END AS is_first_layer
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                -- FROM    loghubods.useractive_log_per5min
+                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("4")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "4"
+    AND     sub.suffix = m.suffix
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"other")
+             ,sub.suffix
+)
+-- dau2(总 + 内外部×首层):按实验组求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,AVG(dau2) AS dau2
+            ,AVG(dau2_inner) AS dau2_inner
+            ,AVG(dau2_inner_first) AS dau2_inner_first
+            ,AVG(dau2_inner_nonfirst) AS dau2_inner_nonfirst
+            ,AVG(dau2_outer) AS dau2_outer
+            ,AVG(dau2_outer_first) AS dau2_outer_first
+            ,AVG(dau2_outer_nonfirst) AS dau2_outer_nonfirst
+    FROM    t_dau2_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+)
+-- dau2(按页面):按单尾号聚合,从曝光表按page分类统计
+,t_dau2_page_bucket AS
+(
+    SELECT  dt
+            ,abcode
+            ,suffix
+            ,COUNT(DISTINCT CASE WHEN page = "推荐" THEN mid END) AS dau2_recommend
+            ,COUNT(DISTINCT CASE WHEN page = "非推荐" THEN mid END) AS dau2_non_recommend
+            ,COUNT(DISTINCT CASE WHEN page_raw = "回流后沉浸页&内页feed" THEN mid END) AS dau2_return_immerse
+    FROM    t_base
+    GROUP BY dt
+             ,abcode
+             ,suffix
+)
+-- dau2(按页面):按实验组求尾号均值
+,t_dau2_page AS
+(
+    SELECT  dt
+            ,abcode
+            ,AVG(dau2_recommend) AS dau2_recommend
+            ,AVG(dau2_non_recommend) AS dau2_non_recommend
+            ,AVG(dau2_return_immerse) AS dau2_return_immerse
+    FROM    t_dau2_page_bucket
+    GROUP BY dt
+             ,abcode
+)
+-- 按单尾号聚合(尾号内 UV 去重)
+,t_bucket AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
+            ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
+            ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
+            ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS bn_rov
+            ,COALESCE(SUM(c1) / COUNT(1),0) AS c1_rov
+            ,COALESCE(SUM(cn) / COUNT(1),0) AS cn_rov
+            ,COALESCE(SUM(d1) / COUNT(1),0) AS d1_rov
+            ,COALESCE(SUM(dn) / COUNT(1),0) AS dn_rov
+            ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            ,COALESCE(SUM(cn),0) AS cn
+            ,COALESCE(SUM(c1),0) AS c1
+            ,COALESCE(SUM(dn),0) AS dn
+            ,COALESCE(SUM(d1),0) AS d1
+    FROM    t_base
+    WHERE   page = "推荐"
+    AND     abcode != "other"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+)
+-- 按实验组求尾号均值
+,t_metrics AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,ROUND(AVG(exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(str_one),6) AS str_one
+            ,ROUND(AVG(ros_one),6) AS ros_one
+            ,ROUND(AVG(str),6) AS str
+            ,ROUND(AVG(ros),6) AS ros
+            ,ROUND(AVG(str_plus),6) AS str_plus
+            ,ROUND(AVG(ros_minus),6) AS ros_minus
+            ,ROUND(AVG(bn_rov),6) AS bn_rov
+            ,ROUND(AVG(c1_rov),6) AS c1_rov
+            ,ROUND(AVG(cn_rov),6) AS cn_rov
+            ,ROUND(AVG(d1_rov),6) AS d1_rov
+            ,ROUND(AVG(dn_rov),6) AS dn_rov
+            ,ROUND(AVG(vovh24),6) AS vovh24
+            ,AVG(dau) AS dau
+            ,AVG(exp) AS exp
+            ,AVG(is_share) AS is_share
+            ,AVG(share_cnt) AS share_cnt
+            ,AVG(is_return_1) AS is_return_1
+            ,AVG(return_n_uv) AS return_n_uv
+            ,AVG(viewh24) AS viewh24
+            ,AVG(return_n_uv_noself) AS return_n_uv_noself
+            ,AVG(cn) AS cn
+            ,AVG(c1) AS c1
+            ,AVG(dn) AS dn
+            ,AVG(d1) AS d1
+            ,WM_CONCAT(DISTINCT ',',suffix) AS suffix
+    FROM    t_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+)
+SELECT  a.dt
+        ,a.apptype
+        ,a.abcode
+        ,a.suffix
+        ,a.exp_per_dau
+        ,a.str_one
+        ,a.ros_one
+        ,a.str
+        ,a.ros
+        ,a.str_plus
+        ,a.ros_minus
+        ,a.bn_rov
+        ,a.c1_rov
+        ,a.cn_rov
+        ,a.d1_rov
+        ,a.dn_rov
+        ,a.vovh24
+        ,a.dau
+        ,a.exp
+        ,a.is_share
+        ,a.share_cnt
+        ,a.is_return_1
+        ,a.return_n_uv
+        ,a.viewh24
+        ,a.return_n_uv_noself
+        ,a.cn
+        ,a.c1
+        ,a.dn
+        ,a.d1
+        ,b.dau2
+        ,b.dau2_inner
+        ,b.dau2_inner_first
+        ,b.dau2_inner_nonfirst
+        ,b.dau2_outer
+        ,b.dau2_outer_first
+        ,b.dau2_outer_nonfirst
+        ,c.dau2_recommend
+        ,c.dau2_non_recommend
+        ,c.dau2_return_immerse
+FROM    t_metrics a
+LEFT JOIN t_dau2 b
+ON      a.dt = b.dt
+AND     a.apptype = b.apptype
+AND     a.abcode = b.abcode
+LEFT JOIN t_dau2_page c
+ON      a.dt = c.dt
+AND     a.abcode = c.abcode
+ORDER BY a.dt DESC,a.apptype,a.abcode
+;

+ 0 - 173
tasks/00_尾号实验/base_v4 copy.sql

@@ -1,173 +0,0 @@
-WITH t_abmap AS
-(
-    SELECT "3" AS suffix, "实验组:ros损失函数优化" AS abcode
-    UNION ALL SELECT "4", "实验组:c1_rovn & 去掉vor实验"
-    UNION ALL SELECT "5", "实验组:c1_rovn"
-    UNION ALL SELECT "a", "对照组"
-    UNION ALL SELECT "b", "对照组"
-    UNION ALL SELECT "c", "对照组"
-)
-,t_base AS
-(
-    SELECT  sub.*
-            ,COALESCE(m.abcode,"other") AS abcode
-    FROM    (
-                SELECT  dt
-                        ,apptype
-                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
-                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
-                                WHEN page IN ("回流页","其他") THEN "非推荐"
-                                ELSE "其他"
-                        END AS page
-                        ,mid
-                        ,vid
-                        ,is_share
-                        ,share_cnt
-                        ,is_return_1
-                        ,is_return_n
-                        ,is_return_noself
-                        ,return_1_uv
-                        ,return_n_uv
-                        ,return_n_uv_noself
-                        ,new_exposure_cnt
-                        ,flowpool
-                        -- ,abcode as abcode_origin
-                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
-                WHERE   dt="${dt}"
-                AND     apptype IN ("0")
-                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
-                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
-                AND     abcode NOT IN ("ab100")
-            ) sub
-    LEFT JOIN t_abmap m
-    ON      sub.apptype = "0"
-    AND     sub.suffix = m.suffix
-)
--- dau2:按单尾号聚合
-,t_dau2_bucket AS
-(
-    SELECT  SUBSTR(sub.dt,1,8) AS dt
-            ,sub.apptype
-            ,COALESCE(m.abcode,"other") AS abcode
-            ,sub.suffix
-            ,COUNT(DISTINCT sub.machinecode) AS dau2
-    FROM    (
-                SELECT  dt
-                        ,apptype
-                        ,machinecode
-                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
-                FROM    loghubods.useractive_log
-                WHERE   dt="${dt}"
-                -- FROM    loghubods.useractive_log_per5min
-                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
-                AND     apptype IN ("0")
-            ) sub
-    LEFT JOIN t_abmap m
-    ON      sub.apptype = "0"
-    AND     sub.suffix = m.suffix
-    GROUP BY SUBSTR(sub.dt,1,8)
-             ,sub.apptype
-             ,COALESCE(m.abcode,"other")
-             ,sub.suffix
-)
--- dau2:按实验组求尾号均值
-,t_dau2 AS
-(
-    SELECT  dt
-            ,apptype
-            ,abcode
-            ,AVG(dau2) AS dau2
-    FROM    t_dau2_bucket
-    GROUP BY dt
-             ,apptype
-             ,abcode
-)
--- 按单尾号聚合(尾号内 UV 去重)
-,t_bucket AS
-(
-    SELECT  dt
-            ,apptype
-            ,abcode
-            ,suffix
-            ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
-            ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
-            ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
-            ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
-            ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
-            ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
-            ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
-            ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS rovn
-            ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
-            ,COUNT(DISTINCT mid) AS dau
-            ,COUNT(1) AS exp
-            ,COALESCE(SUM(is_share),0) AS is_share
-            ,COALESCE(SUM(share_cnt),0) AS share_cnt
-            ,COALESCE(SUM(is_return_1),0) AS is_return_1
-            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
-            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
-            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
-    FROM    t_base
-    WHERE   page = "推荐"
-    GROUP BY dt
-             ,apptype
-             ,abcode
-             ,suffix
-)
--- 按实验组求尾号均值
-,t_metrics AS
-(
-    SELECT  dt
-            ,apptype
-            ,abcode
-            ,ROUND(AVG(exp_per_dau),2) AS exp_per_dau
-            ,ROUND(AVG(str_one),6) AS str_one
-            ,ROUND(AVG(ros_one),6) AS ros_one
-            ,ROUND(AVG(str),6) AS str
-            ,ROUND(AVG(ros),6) AS ros
-            ,ROUND(AVG(str_plus),6) AS str_plus
-            ,ROUND(AVG(ros_minus),6) AS ros_minus
-            ,ROUND(AVG(rovn),6) AS rovn
-            ,ROUND(AVG(vovh24),6) AS vovh24
-            ,AVG(dau) AS dau
-            ,AVG(exp) AS exp
-            ,AVG(is_share) AS is_share
-            ,AVG(share_cnt) AS share_cnt
-            ,AVG(is_return_1) AS is_return_1
-            ,AVG(return_n_uv) AS return_n_uv
-            ,AVG(viewh24) AS viewh24
-            ,AVG(return_n_uv_noself) AS return_n_uv_noself
-            ,WM_CONCAT(DISTINCT ',',suffix) AS suffix
-    FROM    t_bucket
-    GROUP BY dt
-             ,apptype
-             ,abcode
-)
-SELECT  a.dt
-        ,a.apptype
-        ,a.abcode
-        ,a.suffix
-        ,a.exp_per_dau
-        ,a.str_one
-        ,a.ros_one
-        ,a.str
-        ,a.ros
-        ,a.str_plus
-        ,a.ros_minus
-        ,a.rovn
-        ,a.vovh24
-        ,a.dau
-        ,a.exp
-        ,a.is_share
-        ,a.share_cnt
-        ,a.is_return_1
-        ,a.return_n_uv
-        ,a.viewh24
-        ,a.return_n_uv_noself
-        ,b.dau2
-FROM    t_metrics a
-LEFT JOIN t_dau2 b
-ON      a.dt = b.dt
-AND     a.apptype = b.apptype
-AND     a.abcode = b.abcode
-ORDER BY a.dt DESC,a.apptype,a.abcode
-;

+ 1 - 1
tasks/00_尾号实验/base_v4.json

@@ -3,5 +3,5 @@
   "sheet_id": "NFPs3X",
   "sort": "dt:desc",
   "cols": null,
-  "filter": "abcode!=other,abcode!=6,abcode!=e,abcode!=f"
+  "filter": "abcode!=前基线,abcode!=6,abcode!=e,abcode!=f"
 }

+ 2 - 1
tasks/00_尾号实验/base_v4_v1.json

@@ -3,5 +3,6 @@
   "sheet_id": "I1byJV",
   "sort": "dt:desc",
   "cols": null,
-  "filter": "abcode!=other,abcode!=6,abcode!=e,abcode!=f"
+  "filter": "abcode!=前基线,abcode!=other,abcode!=e,abcode!=f"
+
 }

+ 6 - 3
tasks/00_尾号实验/base_v4_v1.sql

@@ -1,6 +1,7 @@
 WITH t_abmap AS
 (
-    SELECT "c" AS suffix, "前基线" AS abcode
+    SELECT "a" AS suffix, "前基线" AS abcode
+    UNION ALL SELECT "b", "前基线"
     UNION ALL SELECT "e", "实验组:解构特征排序str模型&召回"
     UNION ALL SELECT "f", "实验组:解构特征排序str模型&召回"
     UNION ALL SELECT "5", "实验组:解构特征排序str模型"
@@ -9,8 +10,10 @@ WITH t_abmap AS
     UNION ALL SELECT "4", "实验组:bn_ros新损失函数"
     UNION ALL SELECT "6", "实验组:cn_rov"
     UNION ALL SELECT "7", "实验组:cn_rov"
-    UNION ALL SELECT "a", "对照组"
-    UNION ALL SELECT "b", "对照组"
+    UNION ALL SELECT "0", "实验组:建模目标实验"
+    UNION ALL SELECT "1", "实验组:建模目标实验"
+    UNION ALL SELECT "8", "对照组"
+    UNION ALL SELECT "9", "对照组"
 )
 ,t_base AS
 (

+ 7 - 0
tasks/00_尾号实验/base_v4_v2.json

@@ -0,0 +1,7 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "weRL8U",
+  "sort": "dt:desc",
+  "cols": null,
+  "filter": "abcode!=other,abcode!=6,abcode!=e,abcode!=f"
+}

+ 368 - 0
tasks/00_尾号实验/base_v4_v2.sql

@@ -0,0 +1,368 @@
+WITH t_abmap AS
+(
+    SELECT "c" AS suffix, "前基线" AS abcode
+    UNION ALL SELECT "e", "实验组:解构特征排序str模型&召回"
+    UNION ALL SELECT "f", "实验组:解构特征排序str模型&召回"
+    UNION ALL SELECT "5", "实验组:解构特征排序str模型"
+    UNION ALL SELECT "d", "实验组:解构特征排序str模型"
+    UNION ALL SELECT "3", "实验组:bn_ros新损失函数"
+    UNION ALL SELECT "4", "实验组:bn_ros新损失函数"
+    UNION ALL SELECT "6", "实验组:cn_rov"
+    UNION ALL SELECT "7", "实验组:cn_rov"
+    UNION ALL SELECT "a", "对照组"
+    UNION ALL SELECT "b", "对照组"
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,COALESCE(m.abcode,"other") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,page AS page_raw
+                        ,a.mid
+                        ,a.vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        ,cc.cn
+                        ,cc.c1
+                        ,dd.dn
+                        ,dd.d1
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108 a
+                LEFT JOIN   (
+                                SELECT  a.machinecode AS mid
+                                        ,a.subsessionid
+                                        ,a.videoid AS vid
+                                        ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
+                                        ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS c1
+                                FROM    (
+                                            SELECT  DISTINCT machinecode
+                                                    ,shareobjectid AS videoid
+                                                    ,recomTraceId
+                                                    ,subsessionid
+                                                    ,sharedepth
+                                                    ,shareid
+                                            FROM    loghubods.user_share_log
+                                            WHERE   dt = '${dt}'
+                                            AND     topic = 'share'
+                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                        ) a
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b
+                                ON      a.shareid = b.rootshareid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,shareobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'share'
+                                                AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                            ) b1
+                                ON      b.machinecode = b1.machinecode
+                                AND     b.subsessionid = b1.subsessionid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b2
+                                ON      b1.shareid = b2.rootshareid
+                                GROUP BY a.machinecode
+                                         ,a.subsessionid
+                                         ,a.videoid
+                            ) cc
+                ON      a.mid = cc.mid
+                AND     a.subsessionid = cc.subsessionid
+                AND     a.vid = cc.vid
+                LEFT JOIN   (
+                                SELECT  *
+                                        ,LAG(回流,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS dn
+                                        ,LAG(回流1,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS d1
+                                FROM    (
+                                            SELECT  a.mid AS mid
+                                                    ,a.subsessionid
+                                                    ,a.videoid AS vid
+                                                    ,COUNT(DISTINCT b.shareid) AS 分享次数
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END) AS 回流
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END) AS 回流1
+                                                    ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp ASC) AS rn
+                                            FROM    (
+                                                        SELECT  *
+                                                        FROM    (
+                                                                    SELECT  DISTINCT mid
+                                                                            ,subsessionid
+                                                                            ,videoid
+                                                                            ,logtimestamp
+                                                                            ,ROW_NUMBER() OVER (PARTITION BY mid,subsessionid,videoid ORDER BY logtimestamp ASC) AS rn
+                                                                    FROM    loghubods.video_action_log_rp
+                                                                    WHERE   dt = '${dt}'
+                                                                    AND     businesstype = 'videoView'
+                                                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                                )
+                                                        WHERE   rn = 1
+                                                    ) a
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,shareobjectid AS videoid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,shareid
+                                                                    ,clienttimestamp
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'share'
+                                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                        ) b
+                                            ON      a.mid = b.machinecode
+                                            AND     a.subsessionid = b.subsessionid
+                                            AND     a.videoid = b.videoid
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,clickobjectid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,rootshareid
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'click'
+                                                        ) c
+                                            ON      b.shareid = c.rootshareid
+                                            GROUP BY a.mid
+                                                     ,a.subsessionid
+                                                     ,a.videoid
+                                                     ,a.logtimestamp
+                                        )
+                            ) dd
+                ON      a.mid = dd.mid
+                AND     a.subsessionid = dd.subsessionid
+                AND     a.vid = dd.vid
+                WHERE   dt="${dt}"
+                AND     apptype IN ("0")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "0"
+    AND     sub.suffix = m.suffix
+)
+-- dau2(总):按单尾号聚合
+,t_dau2_bucket AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"other") AS abcode
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                -- FROM    loghubods.useractive_log_per5min
+                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("0")
+                AND     GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') IN ("ab0","ab1","ab2","ab3","ab4","ab5", "ab6", "ab7", "ab8","ab9")
+                AND     GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "0"
+    AND     sub.suffix = m.suffix
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"other")
+             ,sub.suffix
+)
+-- dau2(总):按实验组求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,AVG(dau2) AS dau2
+    FROM    t_dau2_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+)
+-- dau2(按页面):按单尾号聚合,从曝光表按page分类统计
+,t_dau2_page_bucket AS
+(
+    SELECT  dt
+            ,abcode
+            ,suffix
+            ,COUNT(DISTINCT CASE WHEN page = "推荐" THEN mid END) AS dau2_recommend
+            ,COUNT(DISTINCT CASE WHEN page = "非推荐" THEN mid END) AS dau2_non_recommend
+            ,COUNT(DISTINCT CASE WHEN page_raw = "回流后沉浸页&内页feed" THEN mid END) AS dau2_return_immerse
+    FROM    t_base
+    GROUP BY dt
+             ,abcode
+             ,suffix
+)
+-- dau2(按页面):按实验组求尾号均值
+,t_dau2_page AS
+(
+    SELECT  dt
+            ,abcode
+            ,AVG(dau2_recommend) AS dau2_recommend
+            ,AVG(dau2_non_recommend) AS dau2_non_recommend
+            ,AVG(dau2_return_immerse) AS dau2_return_immerse
+    FROM    t_dau2_page_bucket
+    GROUP BY dt
+             ,abcode
+)
+-- 按单尾号聚合(尾号内 UV 去重)
+,t_bucket AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
+            ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
+            ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
+            ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS bn_rov
+            ,COALESCE(SUM(c1) / COUNT(1),0) AS c1_rov
+            ,COALESCE(SUM(cn) / COUNT(1),0) AS cn_rov
+            ,COALESCE(SUM(d1) / COUNT(1),0) AS d1_rov
+            ,COALESCE(SUM(dn) / COUNT(1),0) AS dn_rov
+            ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            ,COALESCE(SUM(cn),0) AS cn
+            ,COALESCE(SUM(c1),0) AS c1
+            ,COALESCE(SUM(dn),0) AS dn
+            ,COALESCE(SUM(d1),0) AS d1
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+)
+-- 按实验组求尾号均值
+,t_metrics AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,ROUND(AVG(exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(str_one),6) AS str_one
+            ,ROUND(AVG(ros_one),6) AS ros_one
+            ,ROUND(AVG(str),6) AS str
+            ,ROUND(AVG(ros),6) AS ros
+            ,ROUND(AVG(str_plus),6) AS str_plus
+            ,ROUND(AVG(ros_minus),6) AS ros_minus
+            ,ROUND(AVG(bn_rov),6) AS bn_rov
+            ,ROUND(AVG(c1_rov),6) AS c1_rov
+            ,ROUND(AVG(cn_rov),6) AS cn_rov
+            ,ROUND(AVG(d1_rov),6) AS d1_rov
+            ,ROUND(AVG(dn_rov),6) AS dn_rov
+            ,ROUND(AVG(vovh24),6) AS vovh24
+            ,AVG(dau) AS dau
+            ,AVG(exp) AS exp
+            ,AVG(is_share) AS is_share
+            ,AVG(share_cnt) AS share_cnt
+            ,AVG(is_return_1) AS is_return_1
+            ,AVG(return_n_uv) AS return_n_uv
+            ,AVG(viewh24) AS viewh24
+            ,AVG(return_n_uv_noself) AS return_n_uv_noself
+            ,AVG(cn) AS cn
+            ,AVG(c1) AS c1
+            ,AVG(dn) AS dn
+            ,AVG(d1) AS d1
+            ,WM_CONCAT(DISTINCT ',',suffix) AS suffix
+    FROM    t_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+)
+SELECT  a.dt
+        ,a.apptype
+        ,a.abcode
+        ,a.suffix
+        ,a.exp_per_dau
+        ,a.str_one
+        ,a.ros_one
+        ,a.str
+        ,a.ros
+        ,a.str_plus
+        ,a.ros_minus
+        ,a.bn_rov
+        ,a.c1_rov
+        ,a.cn_rov
+        ,a.d1_rov
+        ,a.dn_rov
+        ,a.vovh24
+        ,a.dau
+        ,a.exp
+        ,a.is_share
+        ,a.share_cnt
+        ,a.is_return_1
+        ,a.return_n_uv
+        ,a.viewh24
+        ,a.return_n_uv_noself
+        ,a.cn
+        ,a.c1
+        ,a.dn
+        ,a.d1
+        ,b.dau2
+        ,c.dau2_recommend
+        ,c.dau2_non_recommend
+        ,c.dau2_return_immerse
+FROM    t_metrics a
+LEFT JOIN t_dau2 b
+ON      a.dt = b.dt
+AND     a.apptype = b.apptype
+AND     a.abcode = b.abcode
+LEFT JOIN t_dau2_page c
+ON      a.dt = c.dt
+AND     a.abcode = c.abcode
+ORDER BY a.dt DESC,a.apptype,a.abcode
+;

+ 8 - 0
tasks/00_尾号实验/base_v4_v3.json

@@ -0,0 +1,8 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "wG3ELn",
+  "sort": "dt:desc",
+  "cols": null,
+  "append_cols": true,
+  "filter": "abcode!=other,abcode!=6,abcode!=e,abcode!=f"
+}

+ 395 - 0
tasks/00_尾号实验/base_v4_v3.sql

@@ -0,0 +1,395 @@
+WITH t_abmap AS
+(
+    SELECT "c" AS suffix, "前基线" AS abcode
+    UNION ALL SELECT "e", "实验组:解构特征排序str模型&召回"
+    UNION ALL SELECT "f", "实验组:解构特征排序str模型&召回"
+    UNION ALL SELECT "5", "实验组:解构特征排序str模型"
+    UNION ALL SELECT "d", "实验组:解构特征排序str模型"
+    UNION ALL SELECT "3", "实验组:bn_ros新损失函数"
+    UNION ALL SELECT "4", "实验组:bn_ros新损失函数"
+    UNION ALL SELECT "6", "实验组:cn_rov"
+    UNION ALL SELECT "7", "实验组:cn_rov"
+    UNION ALL SELECT "a", "对照组"
+    UNION ALL SELECT "b", "对照组"
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,COALESCE(m.abcode,"other") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,page AS page_raw
+                        ,a.mid
+                        ,a.vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                        ,cc.cn
+                        ,cc.c1
+                        ,dd.dn
+                        ,dd.d1
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108 a
+                LEFT JOIN   (
+                                SELECT  a.machinecode AS mid
+                                        ,a.subsessionid
+                                        ,a.videoid AS vid
+                                        ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS cn
+                                        ,COUNT(DISTINCT CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode THEN b2.machinecode END) AS c1
+                                FROM    (
+                                            SELECT  DISTINCT machinecode
+                                                    ,shareobjectid AS videoid
+                                                    ,recomTraceId
+                                                    ,subsessionid
+                                                    ,sharedepth
+                                                    ,shareid
+                                            FROM    loghubods.user_share_log
+                                            WHERE   dt = '${dt}'
+                                            AND     topic = 'share'
+                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                        ) a
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b
+                                ON      a.shareid = b.rootshareid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,shareobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'share'
+                                                AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                            ) b1
+                                ON      b.machinecode = b1.machinecode
+                                AND     b.subsessionid = b1.subsessionid
+                                LEFT JOIN   (
+                                                SELECT  DISTINCT machinecode
+                                                        ,clickobjectid
+                                                        ,recomTraceId
+                                                        ,subsessionid
+                                                        ,sharedepth
+                                                        ,shareid
+                                                        ,rootshareid
+                                                FROM    loghubods.user_share_log
+                                                WHERE   dt = '${dt}'
+                                                AND     topic = 'click'
+                                            ) b2
+                                ON      b1.shareid = b2.rootshareid
+                                GROUP BY a.machinecode
+                                         ,a.subsessionid
+                                         ,a.videoid
+                            ) cc
+                ON      a.mid = cc.mid
+                AND     a.subsessionid = cc.subsessionid
+                AND     a.vid = cc.vid
+                LEFT JOIN   (
+                                SELECT  *
+                                        ,LAG(回流,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS dn
+                                        ,LAG(回流1,1,0) OVER (PARTITION BY mid,subsessionid ORDER BY rn DESC) AS d1
+                                FROM    (
+                                            SELECT  a.mid AS mid
+                                                    ,a.subsessionid
+                                                    ,a.videoid AS vid
+                                                    ,COUNT(DISTINCT b.shareid) AS 分享次数
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode THEN c.machinecode END) AS 回流
+                                                    ,COUNT(DISTINCT CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1 THEN c.machinecode END) AS 回流1
+                                                    ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp ASC) AS rn
+                                            FROM    (
+                                                        SELECT  *
+                                                        FROM    (
+                                                                    SELECT  DISTINCT mid
+                                                                            ,subsessionid
+                                                                            ,videoid
+                                                                            ,logtimestamp
+                                                                            ,ROW_NUMBER() OVER (PARTITION BY mid,subsessionid,videoid ORDER BY logtimestamp ASC) AS rn
+                                                                    FROM    loghubods.video_action_log_rp
+                                                                    WHERE   dt = '${dt}'
+                                                                    AND     businesstype = 'videoView'
+                                                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                                )
+                                                        WHERE   rn = 1
+                                                    ) a
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,shareobjectid AS videoid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,shareid
+                                                                    ,clienttimestamp
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'share'
+                                                            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                                        ) b
+                                            ON      a.mid = b.machinecode
+                                            AND     a.subsessionid = b.subsessionid
+                                            AND     a.videoid = b.videoid
+                                            LEFT JOIN   (
+                                                            SELECT  DISTINCT machinecode
+                                                                    ,clickobjectid
+                                                                    ,recomTraceId
+                                                                    ,subsessionid
+                                                                    ,sharedepth
+                                                                    ,rootshareid
+                                                            FROM    loghubods.user_share_log
+                                                            WHERE   dt = '${dt}'
+                                                            AND     topic = 'click'
+                                                        ) c
+                                            ON      b.shareid = c.rootshareid
+                                            GROUP BY a.mid
+                                                     ,a.subsessionid
+                                                     ,a.videoid
+                                                     ,a.logtimestamp
+                                        )
+                            ) dd
+                ON      a.mid = dd.mid
+                AND     a.subsessionid = dd.subsessionid
+                AND     a.vid = dd.vid
+                WHERE   dt="${dt}"
+                AND     apptype IN ("0")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "0"
+    AND     sub.suffix = m.suffix
+)
+-- dau2(总 + 内外部×首层):按单尾号聚合
+,t_dau2_bucket AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"other") AS abcode
+            ,sub.suffix
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+            ,COUNT(DISTINCT CASE WHEN sub.source_type = "内部" THEN sub.machinecode END) AS dau2_inner
+            ,COUNT(DISTINCT CASE WHEN sub.source_type = "内部" AND sub.is_first_layer = 1 THEN sub.machinecode END) AS dau2_inner_first
+            ,COUNT(DISTINCT CASE WHEN sub.source_type = "内部" AND sub.is_first_layer = 0 THEN sub.machinecode END) AS dau2_inner_nonfirst
+            ,COUNT(DISTINCT CASE WHEN sub.source_type = "外部" THEN sub.machinecode END) AS dau2_outer
+            ,COUNT(DISTINCT CASE WHEN sub.source_type = "外部" AND sub.is_first_layer = 1 THEN sub.machinecode END) AS dau2_outer_first
+            ,COUNT(DISTINCT CASE WHEN sub.source_type = "外部" AND sub.is_first_layer = 0 THEN sub.machinecode END) AS dau2_outer_nonfirst
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,subsessionid
+                        ,sessionid
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                        ,CASE   WHEN GET_JSON_OBJECT(extparams,'$.rootSourceId') != '' AND GET_JSON_OBJECT(extparams,'$.rootSourceId') IS NOT NULL THEN "外部"
+                                ELSE "内部"
+                        END AS source_type
+                        ,CASE   WHEN GET_JSON_OBJECT(extparams,'$.rootSessionId') = subsessionid
+                                  OR GET_JSON_OBJECT(extparams,'$.rootSessionId') = sessionid THEN 1
+                                ELSE 0
+                        END AS is_first_layer
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                -- FROM    loghubods.useractive_log_per5min
+                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("0")
+                AND     GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') IN ("ab0","ab1","ab2","ab3","ab4","ab5", "ab6", "ab7", "ab8","ab9")
+                AND     GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "0"
+    AND     sub.suffix = m.suffix
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"other")
+             ,sub.suffix
+)
+-- dau2(总 + 内外部×首层):按实验组求尾号均值
+,t_dau2 AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,AVG(dau2) AS dau2
+            ,AVG(dau2_inner) AS dau2_inner
+            ,AVG(dau2_inner_first) AS dau2_inner_first
+            ,AVG(dau2_inner_nonfirst) AS dau2_inner_nonfirst
+            ,AVG(dau2_outer) AS dau2_outer
+            ,AVG(dau2_outer_first) AS dau2_outer_first
+            ,AVG(dau2_outer_nonfirst) AS dau2_outer_nonfirst
+    FROM    t_dau2_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+)
+-- dau2(按页面):按单尾号聚合,从曝光表按page分类统计
+,t_dau2_page_bucket AS
+(
+    SELECT  dt
+            ,abcode
+            ,suffix
+            ,COUNT(DISTINCT CASE WHEN page = "推荐" THEN mid END) AS dau2_recommend
+            ,COUNT(DISTINCT CASE WHEN page = "非推荐" THEN mid END) AS dau2_non_recommend
+            ,COUNT(DISTINCT CASE WHEN page_raw = "回流后沉浸页&内页feed" THEN mid END) AS dau2_return_immerse
+    FROM    t_base
+    GROUP BY dt
+             ,abcode
+             ,suffix
+)
+-- dau2(按页面):按实验组求尾号均值
+,t_dau2_page AS
+(
+    SELECT  dt
+            ,abcode
+            ,AVG(dau2_recommend) AS dau2_recommend
+            ,AVG(dau2_non_recommend) AS dau2_non_recommend
+            ,AVG(dau2_return_immerse) AS dau2_return_immerse
+    FROM    t_dau2_page_bucket
+    GROUP BY dt
+             ,abcode
+)
+-- 按单尾号聚合(尾号内 UV 去重)
+,t_bucket AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,suffix
+            ,COALESCE(COUNT(1) / COUNT(DISTINCT mid),0) AS exp_per_dau
+            ,COALESCE(SUM(is_share) / COUNT(1),0) AS str_one
+            ,COALESCE(SUM(return_n_uv) / SUM(is_share),0) AS ros_one
+            ,COALESCE(SUM(share_cnt) / COUNT(1),0) AS str
+            ,COALESCE(SUM(return_n_uv) / SUM(share_cnt),0) AS ros
+            ,COALESCE(SUM(is_return_1) / COUNT(1),0) AS str_plus
+            ,COALESCE(SUM(return_n_uv) / SUM(is_return_1),0) AS ros_minus
+            ,COALESCE(SUM(return_n_uv) / COUNT(1),0) AS bn_rov
+            ,COALESCE(SUM(c1) / COUNT(1),0) AS c1_rov
+            ,COALESCE(SUM(cn) / COUNT(1),0) AS cn_rov
+            ,COALESCE(SUM(d1) / COUNT(1),0) AS d1_rov
+            ,COALESCE(SUM(dn) / COUNT(1),0) AS dn_rov
+            ,COALESCE(SUM(new_exposure_cnt) / COUNT(1),0) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            ,COALESCE(SUM(cn),0) AS cn
+            ,COALESCE(SUM(c1),0) AS c1
+            ,COALESCE(SUM(dn),0) AS dn
+            ,COALESCE(SUM(d1),0) AS d1
+    FROM    t_base
+    WHERE   page = "推荐"
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,suffix
+)
+-- 按实验组求尾号均值
+,t_metrics AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,ROUND(AVG(exp_per_dau),2) AS exp_per_dau
+            ,ROUND(AVG(str_one),6) AS str_one
+            ,ROUND(AVG(ros_one),6) AS ros_one
+            ,ROUND(AVG(str),6) AS str
+            ,ROUND(AVG(ros),6) AS ros
+            ,ROUND(AVG(str_plus),6) AS str_plus
+            ,ROUND(AVG(ros_minus),6) AS ros_minus
+            ,ROUND(AVG(bn_rov),6) AS bn_rov
+            ,ROUND(AVG(c1_rov),6) AS c1_rov
+            ,ROUND(AVG(cn_rov),6) AS cn_rov
+            ,ROUND(AVG(d1_rov),6) AS d1_rov
+            ,ROUND(AVG(dn_rov),6) AS dn_rov
+            ,ROUND(AVG(vovh24),6) AS vovh24
+            ,AVG(dau) AS dau
+            ,AVG(exp) AS exp
+            ,AVG(is_share) AS is_share
+            ,AVG(share_cnt) AS share_cnt
+            ,AVG(is_return_1) AS is_return_1
+            ,AVG(return_n_uv) AS return_n_uv
+            ,AVG(viewh24) AS viewh24
+            ,AVG(return_n_uv_noself) AS return_n_uv_noself
+            ,AVG(cn) AS cn
+            ,AVG(c1) AS c1
+            ,AVG(dn) AS dn
+            ,AVG(d1) AS d1
+            ,WM_CONCAT(DISTINCT ',',suffix) AS suffix
+    FROM    t_bucket
+    GROUP BY dt
+             ,apptype
+             ,abcode
+)
+SELECT  a.dt
+        ,a.apptype
+        ,a.abcode
+        ,a.suffix
+        ,a.exp_per_dau
+        ,a.str_one
+        ,a.ros_one
+        ,a.str
+        ,a.ros
+        ,a.str_plus
+        ,a.ros_minus
+        ,a.bn_rov
+        ,a.c1_rov
+        ,a.cn_rov
+        ,a.d1_rov
+        ,a.dn_rov
+        ,a.vovh24
+        ,a.dau
+        ,a.exp
+        ,a.is_share
+        ,a.share_cnt
+        ,a.is_return_1
+        ,a.return_n_uv
+        ,a.viewh24
+        ,a.return_n_uv_noself
+        ,a.cn
+        ,a.c1
+        ,a.dn
+        ,a.d1
+        ,b.dau2
+        ,b.dau2_inner
+        ,b.dau2_inner_first
+        ,b.dau2_inner_nonfirst
+        ,b.dau2_outer
+        ,b.dau2_outer_first
+        ,b.dau2_outer_nonfirst
+        ,c.dau2_recommend
+        ,c.dau2_non_recommend
+        ,c.dau2_return_immerse
+FROM    t_metrics a
+LEFT JOIN t_dau2 b
+ON      a.dt = b.dt
+AND     a.apptype = b.apptype
+AND     a.abcode = b.abcode
+LEFT JOIN t_dau2_page c
+ON      a.dt = c.dt
+AND     a.abcode = c.abcode
+ORDER BY a.dt DESC,a.apptype,a.abcode
+;