|
|
@@ -0,0 +1,483 @@
|
|
|
+-- 预处理:解析 scoresmap + page 分类
|
|
|
+-- v4: 新增 top20 vid 分组 + GROUPING SETS + 曝光占比
|
|
|
+-- v5: 新增相对对照组的变化率字段
|
|
|
+-- v6: 新增模型评估指标(AUC/GAUC/Spearman)
|
|
|
+WITH t_raw AS
|
|
|
+(
|
|
|
+ SELECT *
|
|
|
+ ,REPLACE(GET_JSON_OBJECT(extend_alg,'$.scoresMap'),"\\","") AS scoresmap
|
|
|
+ ,CASE WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
|
|
|
+ WHEN page IN ("回流页","其他") THEN "非推荐"
|
|
|
+ ELSE "其他"
|
|
|
+ END AS page_type
|
|
|
+ FROM loghubods.dwd_recsys_alg_sample_all_20250212
|
|
|
+ WHERE dt = '${dt}'
|
|
|
+ AND apptype IN ("0","4")
|
|
|
+ AND abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
|
|
|
+ AND abcode NOT IN ("ab100")
|
|
|
+ AND extend_alg IS NOT NULL
|
|
|
+ AND GET_JSON_OBJECT(extend_alg,'$.scoresMap') IS NOT NULL
|
|
|
+)
|
|
|
+-- 过滤:只保留推荐页面
|
|
|
+,t_filtered AS
|
|
|
+(
|
|
|
+ SELECT *
|
|
|
+ FROM t_raw
|
|
|
+ WHERE page_type = "推荐"
|
|
|
+)
|
|
|
+-- 特征提取与维度映射
|
|
|
+,t_base AS
|
|
|
+(
|
|
|
+ SELECT dt
|
|
|
+ ,apptype
|
|
|
+ ,CASE WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
|
|
|
+ WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
|
|
|
+ WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准"
|
|
|
+ WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
|
|
|
+ WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "ab4-5"
|
|
|
+ ELSE "其他"
|
|
|
+ END AS abcode
|
|
|
+ ,page_type AS page
|
|
|
+ ,mid
|
|
|
+ ,vid
|
|
|
+ ,is_share
|
|
|
+ ,share_cnt
|
|
|
+ ,is_return_1
|
|
|
+ ,is_return_n
|
|
|
+ ,is_return_noself
|
|
|
+ ,return_1_uv
|
|
|
+ ,return_n_uv
|
|
|
+ ,return_n_uv_noself
|
|
|
+ ,new_exposure_cnt
|
|
|
+ ,flowpool
|
|
|
+ ,scoresmap
|
|
|
+ ,CAST(GET_JSON_OBJECT(scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
|
|
|
+ ,1.22 * pow(CAST(GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') AS DOUBLE), 1.15) AS rosn_pred
|
|
|
+ ,CAST(GET_JSON_OBJECT(scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_stat
|
|
|
+ ,GET_JSON_OBJECT(v1_feature,'$.title') AS vid_title
|
|
|
+ FROM t_filtered
|
|
|
+)
|
|
|
+,t_valid AS
|
|
|
+(
|
|
|
+ SELECT *
|
|
|
+ FROM t_base
|
|
|
+ WHERE str_pred IS NOT NULL
|
|
|
+ AND rosn_pred IS NOT NULL
|
|
|
+)
|
|
|
+-- 计算每个 abcode 下曝光量 top20 的 vid
|
|
|
+,t_vid_rank AS
|
|
|
+(
|
|
|
+ SELECT dt
|
|
|
+ ,apptype
|
|
|
+ ,abcode
|
|
|
+ ,vid
|
|
|
+ ,COUNT(1) AS vid_exp_cnt
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode ORDER BY COUNT(1) DESC) AS vid_rank
|
|
|
+ FROM t_valid
|
|
|
+ GROUP BY dt, apptype, abcode, vid
|
|
|
+)
|
|
|
+,t_top5_vid AS
|
|
|
+(
|
|
|
+ SELECT dt, apptype, abcode, vid, vid_rank
|
|
|
+ FROM t_vid_rank
|
|
|
+ WHERE vid_rank <= 20
|
|
|
+)
|
|
|
+-- 标记 top20 vid
|
|
|
+,t_with_top5 AS
|
|
|
+(
|
|
|
+ SELECT a.*
|
|
|
+ ,CASE WHEN b.vid IS NOT NULL THEN a.vid ELSE NULL END AS top5_vid
|
|
|
+ ,CASE WHEN b.vid IS NOT NULL THEN a.vid_title ELSE NULL END AS top5_vid_title
|
|
|
+ ,b.vid_rank AS top5_vid_rank
|
|
|
+ FROM t_valid a
|
|
|
+ LEFT JOIN t_top5_vid b
|
|
|
+ ON a.dt = b.dt
|
|
|
+ AND a.apptype = b.apptype
|
|
|
+ AND a.abcode = b.abcode
|
|
|
+ AND a.vid = b.vid
|
|
|
+)
|
|
|
+-- ========== 模型评估指标计算 ==========
|
|
|
+-- 1. 添加排名(用于 AUC 和 Spearman 计算)
|
|
|
+,t_with_rank AS
|
|
|
+(
|
|
|
+ SELECT *
|
|
|
+ -- 二分类标签
|
|
|
+ ,CASE WHEN is_return_noself > 0 THEN 1 ELSE 0 END AS is_pos
|
|
|
+ -- 整体排名(按实验组)
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode ORDER BY str_pred) AS str_rank_global
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode ORDER BY rosn_pred) AS rosn_pred_rank_global
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode ORDER BY rosn_stat) AS rosn_stat_rank_global
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode ORDER BY return_n_uv_noself) AS rosn_real_rank_global
|
|
|
+ -- 整体排名(按实验组×视频)
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode, top5_vid ORDER BY str_pred) AS str_rank_vid
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode, top5_vid ORDER BY rosn_pred) AS rosn_pred_rank_vid
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode, top5_vid ORDER BY rosn_stat) AS rosn_stat_rank_vid
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode, top5_vid ORDER BY return_n_uv_noself) AS rosn_real_rank_vid
|
|
|
+ -- 用户维度排名(按用户)
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode, mid ORDER BY str_pred) AS str_rank_user
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode, mid ORDER BY rosn_pred) AS rosn_pred_rank_user
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode, mid ORDER BY rosn_stat) AS rosn_stat_rank_user
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode, mid ORDER BY return_n_uv_noself) AS rosn_real_rank_user
|
|
|
+ FROM t_with_top5
|
|
|
+)
|
|
|
+-- 2. 计算整体 AUC(按实验组)
|
|
|
+,t_auc_global AS
|
|
|
+(
|
|
|
+ SELECT dt, apptype, abcode
|
|
|
+ ,'all' AS vid
|
|
|
+ ,COUNT(1) AS n_total
|
|
|
+ ,SUM(is_pos) AS n_pos
|
|
|
+ ,COUNT(1) - SUM(is_pos) AS n_neg
|
|
|
+ ,SUM(CASE WHEN is_pos = 1 THEN str_rank_global ELSE 0 END) AS sum_pos_rank
|
|
|
+ FROM t_with_rank
|
|
|
+ GROUP BY dt, apptype, abcode
|
|
|
+)
|
|
|
+,t_auc_global_result AS
|
|
|
+(
|
|
|
+ SELECT dt, apptype, abcode, vid
|
|
|
+ ,CASE
|
|
|
+ WHEN n_pos = 0 OR n_neg = 0 THEN NULL
|
|
|
+ ELSE round((sum_pos_rank * 1.0 / n_pos - (n_pos + 1) / 2.0) / n_neg, 6)
|
|
|
+ END AS str_auc
|
|
|
+ FROM t_auc_global
|
|
|
+)
|
|
|
+-- 3. 计算整体 AUC(按实验组×视频)
|
|
|
+,t_auc_vid AS
|
|
|
+(
|
|
|
+ SELECT dt, apptype, abcode, top5_vid AS vid
|
|
|
+ ,COUNT(1) AS n_total
|
|
|
+ ,SUM(is_pos) AS n_pos
|
|
|
+ ,COUNT(1) - SUM(is_pos) AS n_neg
|
|
|
+ ,SUM(CASE WHEN is_pos = 1 THEN str_rank_vid ELSE 0 END) AS sum_pos_rank
|
|
|
+ FROM t_with_rank
|
|
|
+ WHERE top5_vid IS NOT NULL
|
|
|
+ GROUP BY dt, apptype, abcode, top5_vid
|
|
|
+)
|
|
|
+,t_auc_vid_result AS
|
|
|
+(
|
|
|
+ SELECT dt, apptype, abcode, CAST(vid AS STRING) AS vid
|
|
|
+ ,CASE
|
|
|
+ WHEN n_pos = 0 OR n_neg = 0 THEN NULL
|
|
|
+ ELSE round((sum_pos_rank * 1.0 / n_pos - (n_pos + 1) / 2.0) / n_neg, 6)
|
|
|
+ END AS str_auc
|
|
|
+ FROM t_auc_vid
|
|
|
+)
|
|
|
+-- 4. 计算用户维度 GAUC(按实验组)
|
|
|
+,t_user_auc AS
|
|
|
+(
|
|
|
+ SELECT dt, apptype, abcode, mid
|
|
|
+ ,COUNT(1) AS user_exp
|
|
|
+ ,SUM(is_pos) AS user_n_pos
|
|
|
+ ,COUNT(1) - SUM(is_pos) AS user_n_neg
|
|
|
+ ,SUM(CASE WHEN is_pos = 1 THEN str_rank_user ELSE 0 END) AS user_sum_pos_rank
|
|
|
+ FROM t_with_rank
|
|
|
+ GROUP BY dt, apptype, abcode, mid
|
|
|
+)
|
|
|
+,t_user_auc_valid AS
|
|
|
+(
|
|
|
+ SELECT *
|
|
|
+ -- 要求至少 5 个样本且正负样本都存在,裁剪到 [0, 1]
|
|
|
+ ,CASE
|
|
|
+ WHEN user_exp < 5 OR user_n_pos = 0 OR user_n_neg = 0 THEN NULL
|
|
|
+ ELSE GREATEST(0.0, LEAST(1.0,
|
|
|
+ (user_sum_pos_rank * 1.0 / user_n_pos - (user_n_pos + 1) / 2.0) / user_n_neg
|
|
|
+ ))
|
|
|
+ END AS user_auc
|
|
|
+ FROM t_user_auc
|
|
|
+)
|
|
|
+,t_gauc_result AS
|
|
|
+(
|
|
|
+ SELECT dt, apptype, abcode
|
|
|
+ ,'all' AS vid
|
|
|
+ ,round(SUM(user_exp * user_auc) / NULLIF(SUM(CASE WHEN user_auc IS NOT NULL THEN user_exp ELSE 0 END), 0), 6) AS str_gauc
|
|
|
+ FROM t_user_auc_valid
|
|
|
+ GROUP BY dt, apptype, abcode
|
|
|
+)
|
|
|
+-- 5. 计算整体 Spearman(按实验组)
|
|
|
+-- 使用简化公式:1 - 6 * Σd² / (n * (n² - 1)),避免大数溢出
|
|
|
+,t_spearman_global AS
|
|
|
+(
|
|
|
+ SELECT dt, apptype, abcode
|
|
|
+ ,'all' AS vid
|
|
|
+ ,CAST(COUNT(1) AS DOUBLE) AS n
|
|
|
+ -- d = rank_pred - rank_real,计算 Σd²
|
|
|
+ ,SUM(CAST((rosn_pred_rank_global - rosn_real_rank_global) AS DOUBLE)
|
|
|
+ * (rosn_pred_rank_global - rosn_real_rank_global)) AS sum_d2_pred
|
|
|
+ ,SUM(CAST((rosn_stat_rank_global - rosn_real_rank_global) AS DOUBLE)
|
|
|
+ * (rosn_stat_rank_global - rosn_real_rank_global)) AS sum_d2_stat
|
|
|
+ FROM t_with_rank
|
|
|
+ GROUP BY dt, apptype, abcode
|
|
|
+)
|
|
|
+,t_spearman_global_result AS
|
|
|
+(
|
|
|
+ SELECT dt, apptype, abcode, vid
|
|
|
+ -- Spearman = 1 - 6 * Σd² / (n * (n² - 1))
|
|
|
+ ,round(1.0 - 6.0 * sum_d2_pred / NULLIF(n * (n * n - 1), 0), 6) AS rosn_corr
|
|
|
+ ,round(1.0 - 6.0 * sum_d2_stat / NULLIF(n * (n * n - 1), 0), 6) AS rosn_stat_corr
|
|
|
+ FROM t_spearman_global
|
|
|
+)
|
|
|
+-- 6. 计算整体 Spearman(按实验组×视频)
|
|
|
+,t_spearman_vid AS
|
|
|
+(
|
|
|
+ SELECT dt, apptype, abcode, top5_vid AS vid
|
|
|
+ ,CAST(COUNT(1) AS DOUBLE) AS n
|
|
|
+ ,SUM(CAST((rosn_pred_rank_vid - rosn_real_rank_vid) AS DOUBLE)
|
|
|
+ * (rosn_pred_rank_vid - rosn_real_rank_vid)) AS sum_d2_pred
|
|
|
+ ,SUM(CAST((rosn_stat_rank_vid - rosn_real_rank_vid) AS DOUBLE)
|
|
|
+ * (rosn_stat_rank_vid - rosn_real_rank_vid)) AS sum_d2_stat
|
|
|
+ FROM t_with_rank
|
|
|
+ WHERE top5_vid IS NOT NULL
|
|
|
+ GROUP BY dt, apptype, abcode, top5_vid
|
|
|
+)
|
|
|
+,t_spearman_vid_result AS
|
|
|
+(
|
|
|
+ SELECT dt, apptype, abcode, CAST(vid AS STRING) AS vid
|
|
|
+ ,round(1.0 - 6.0 * sum_d2_pred / NULLIF(n * (n * n - 1), 0), 6) AS rosn_corr
|
|
|
+ ,round(1.0 - 6.0 * sum_d2_stat / NULLIF(n * (n * n - 1), 0), 6) AS rosn_stat_corr
|
|
|
+ FROM t_spearman_vid
|
|
|
+)
|
|
|
+-- 7. 计算用户维度 Spearman(按实验组)
|
|
|
+,t_user_spearman AS
|
|
|
+(
|
|
|
+ SELECT dt, apptype, abcode, mid
|
|
|
+ ,COUNT(1) AS user_exp
|
|
|
+ ,CAST(COUNT(1) AS DOUBLE) AS n
|
|
|
+ ,SUM(CAST((rosn_pred_rank_user - rosn_real_rank_user) AS DOUBLE)
|
|
|
+ * (rosn_pred_rank_user - rosn_real_rank_user)) AS sum_d2_pred
|
|
|
+ ,SUM(CAST((rosn_stat_rank_user - rosn_real_rank_user) AS DOUBLE)
|
|
|
+ * (rosn_stat_rank_user - rosn_real_rank_user)) AS sum_d2_stat
|
|
|
+ FROM t_with_rank
|
|
|
+ GROUP BY dt, apptype, abcode, mid
|
|
|
+)
|
|
|
+,t_user_spearman_valid AS
|
|
|
+(
|
|
|
+ SELECT *
|
|
|
+ -- 要求至少 5 个样本以保证稳定性
|
|
|
+ ,CASE
|
|
|
+ WHEN n < 5 THEN NULL
|
|
|
+ ELSE 1.0 - 6.0 * sum_d2_pred / NULLIF(n * (n * n - 1), 0)
|
|
|
+ END AS user_rosn_corr
|
|
|
+ ,CASE
|
|
|
+ WHEN n < 5 THEN NULL
|
|
|
+ ELSE 1.0 - 6.0 * sum_d2_stat / NULLIF(n * (n * n - 1), 0)
|
|
|
+ END AS user_rosn_stat_corr
|
|
|
+ FROM t_user_spearman
|
|
|
+)
|
|
|
+,t_gspearman_result AS
|
|
|
+(
|
|
|
+ SELECT dt, apptype, abcode
|
|
|
+ ,'all' AS vid
|
|
|
+ ,round(SUM(user_exp * user_rosn_corr) / NULLIF(SUM(CASE WHEN user_rosn_corr IS NOT NULL THEN user_exp ELSE 0 END), 0), 6) AS rosn_gcorr
|
|
|
+ ,round(SUM(user_exp * user_rosn_stat_corr) / NULLIF(SUM(CASE WHEN user_rosn_stat_corr IS NOT NULL THEN user_exp ELSE 0 END), 0), 6) AS rosn_stat_gcorr
|
|
|
+ FROM t_user_spearman_valid
|
|
|
+ GROUP BY dt, apptype, abcode
|
|
|
+)
|
|
|
+-- 8. 合并所有指标(实验组粒度)
|
|
|
+,t_metrics_global AS
|
|
|
+(
|
|
|
+ SELECT a.dt, a.apptype, a.abcode, a.vid
|
|
|
+ ,a.str_auc
|
|
|
+ ,b.str_gauc
|
|
|
+ ,c.rosn_corr, c.rosn_stat_corr
|
|
|
+ ,d.rosn_gcorr, d.rosn_stat_gcorr
|
|
|
+ FROM t_auc_global_result a
|
|
|
+ LEFT JOIN t_gauc_result b
|
|
|
+ ON a.dt = b.dt AND a.apptype = b.apptype AND a.abcode = b.abcode
|
|
|
+ LEFT JOIN t_spearman_global_result c
|
|
|
+ ON a.dt = c.dt AND a.apptype = c.apptype AND a.abcode = c.abcode
|
|
|
+ LEFT JOIN t_gspearman_result d
|
|
|
+ ON a.dt = d.dt AND a.apptype = d.apptype AND a.abcode = d.abcode
|
|
|
+)
|
|
|
+-- 9. 合并所有指标(视频粒度,只有整体指标,无用户维度)
|
|
|
+,t_metrics_vid AS
|
|
|
+(
|
|
|
+ SELECT a.dt, a.apptype, a.abcode, a.vid
|
|
|
+ ,a.str_auc
|
|
|
+ ,CAST(NULL AS DOUBLE) AS str_gauc
|
|
|
+ ,b.rosn_corr, b.rosn_stat_corr
|
|
|
+ ,CAST(NULL AS DOUBLE) AS rosn_gcorr
|
|
|
+ ,CAST(NULL AS DOUBLE) AS rosn_stat_gcorr
|
|
|
+ FROM t_auc_vid_result a
|
|
|
+ LEFT JOIN t_spearman_vid_result b
|
|
|
+ ON a.dt = b.dt AND a.apptype = b.apptype AND a.abcode = b.abcode AND a.vid = b.vid
|
|
|
+)
|
|
|
+-- 10. 合并两个粒度的指标
|
|
|
+,t_metrics_all AS
|
|
|
+(
|
|
|
+ SELECT * FROM t_metrics_global
|
|
|
+ UNION ALL
|
|
|
+ SELECT * FROM t_metrics_vid
|
|
|
+)
|
|
|
+-- ========== 原有聚合逻辑 ==========
|
|
|
+,t_agg AS
|
|
|
+(
|
|
|
+ SELECT dt
|
|
|
+ ,COALESCE(apptype, 'sum') AS apptype
|
|
|
+ ,COALESCE(abcode, 'sum') AS abcode
|
|
|
+ ,COALESCE(CAST(top5_vid AS STRING), 'all') AS vid
|
|
|
+ ,CASE WHEN GROUPING(top5_vid) = 1 THEN NULL ELSE MAX(top5_vid_title) END AS vid_title
|
|
|
+ ,CASE WHEN GROUPING(top5_vid) = 1 THEN NULL ELSE MAX(top5_vid_rank) END AS vid_rank
|
|
|
+ -- COPC
|
|
|
+ ,round((SUM(is_return_noself) / COUNT(1)) / NULLIF(SUM(str_pred) / COUNT(1), 0), 4) AS str_copc
|
|
|
+ ,round((SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0)) / NULLIF(SUM(rosn_pred) / COUNT(1), 0), 4) AS rosn_copc
|
|
|
+ ,round((SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0)) / NULLIF(SUM(rosn_stat) / COUNT(1), 0), 4) AS rosn_stat_copc
|
|
|
+ ,round((SUM(return_n_uv_noself) / COUNT(1)) / NULLIF(AVG(str_pred * rosn_pred), 0), 4) AS rovn_copc
|
|
|
+ ,round((SUM(return_n_uv_noself) / COUNT(1)) / NULLIF(AVG(str_pred * rosn_stat), 0), 4) AS rovn_stat_copc
|
|
|
+ -- 模型预测与真实值
|
|
|
+ ,round(COALESCE(SUM(is_return_noself) / COUNT(1),0),6) AS str_real
|
|
|
+ ,round(COALESCE(SUM(str_pred) / COUNT(1),0),6) AS str_pred
|
|
|
+ ,round(COALESCE(SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0),0),6) AS rosn_real
|
|
|
+ ,round(COALESCE(SUM(rosn_pred) / COUNT(1),0),6) AS rosn_pred
|
|
|
+ ,round(COALESCE(SUM(rosn_stat) / COUNT(1),0),6) AS rosn_stat
|
|
|
+ ,round(SUM(return_n_uv_noself) / COUNT(1), 6) AS rovn_real
|
|
|
+ ,round(AVG(str_pred * rosn_pred), 6) AS rovn_pred
|
|
|
+ ,round(AVG(str_pred * rosn_stat), 6) AS rovn_stat
|
|
|
+ -- 误差
|
|
|
+ ,round(AVG(ABS(rosn_pred - return_n_uv_noself)),6) AS rosn_pred_mae
|
|
|
+ ,round(AVG(ABS(rosn_stat - return_n_uv_noself)),6) AS rosn_stat_mae
|
|
|
+ -- 业务指标
|
|
|
+ ,round(COALESCE(COUNT(1) / COUNT(DISTINCT mid),0),2) AS exp_per_dau
|
|
|
+ ,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
|
|
|
+ ,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
|
|
|
+ ,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
|
|
|
+ ,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
|
|
|
+ ,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
|
|
|
+ ,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
|
|
|
+ ,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
|
|
|
+ ,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
|
|
|
+ ,COUNT(DISTINCT mid) AS dau
|
|
|
+ ,COUNT(1) AS exp
|
|
|
+ ,COALESCE(SUM(is_share),0) AS is_share
|
|
|
+ ,COALESCE(SUM(share_cnt),0) AS share_cnt
|
|
|
+ ,COALESCE(SUM(is_return_1),0) AS is_return_1
|
|
|
+ ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
|
|
|
+ ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
|
|
|
+ ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
|
|
|
+ FROM t_with_rank
|
|
|
+ GROUP BY dt, apptype, abcode, top5_vid
|
|
|
+ GROUPING SETS (
|
|
|
+ (dt, apptype, abcode),
|
|
|
+ (dt, apptype, abcode, top5_vid)
|
|
|
+ )
|
|
|
+ HAVING top5_vid IS NOT NULL OR GROUPING(top5_vid) = 1
|
|
|
+)
|
|
|
+-- JOIN 模型评估指标
|
|
|
+,t_agg_with_metrics AS
|
|
|
+(
|
|
|
+ SELECT a.*
|
|
|
+ ,b.str_auc
|
|
|
+ ,b.str_gauc
|
|
|
+ ,b.rosn_corr
|
|
|
+ ,b.rosn_gcorr
|
|
|
+ ,b.rosn_stat_corr
|
|
|
+ ,b.rosn_stat_gcorr
|
|
|
+ FROM t_agg a
|
|
|
+ LEFT JOIN t_metrics_all b
|
|
|
+ ON a.dt = b.dt AND a.apptype = b.apptype AND a.abcode = b.abcode AND a.vid = b.vid
|
|
|
+)
|
|
|
+-- 新增:获取对照组基准值并计算变化率
|
|
|
+,t_with_baseline AS
|
|
|
+(
|
|
|
+ SELECT *
|
|
|
+ -- 计算曝光占比
|
|
|
+ ,round(exp * 1.0 / MAX(CASE WHEN vid = 'all' THEN exp END) OVER (PARTITION BY dt, apptype, abcode), 4) AS exp_pct
|
|
|
+ -- 对照组基准值(业务指标)
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN exp_per_dau END) OVER (PARTITION BY dt, apptype, vid) AS exp_per_dau_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN str_one END) OVER (PARTITION BY dt, apptype, vid) AS str_one_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN ros_one END) OVER (PARTITION BY dt, apptype, vid) AS ros_one_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN str END) OVER (PARTITION BY dt, apptype, vid) AS str_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN ros END) OVER (PARTITION BY dt, apptype, vid) AS ros_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN str_plus END) OVER (PARTITION BY dt, apptype, vid) AS str_plus_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN ros_minus END) OVER (PARTITION BY dt, apptype, vid) AS ros_minus_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN rovn END) OVER (PARTITION BY dt, apptype, vid) AS rovn_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN vovh24 END) OVER (PARTITION BY dt, apptype, vid) AS vovh24_base
|
|
|
+ -- 对照组基准值(COPC 指标)
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN str_copc END) OVER (PARTITION BY dt, apptype, vid) AS str_copc_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN rosn_copc END) OVER (PARTITION BY dt, apptype, vid) AS rosn_copc_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN rosn_stat_copc END) OVER (PARTITION BY dt, apptype, vid) AS rosn_stat_copc_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN rovn_copc END) OVER (PARTITION BY dt, apptype, vid) AS rovn_copc_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN rovn_stat_copc END) OVER (PARTITION BY dt, apptype, vid) AS rovn_stat_copc_base
|
|
|
+ -- 对照组基准值(真实值)
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN str_real END) OVER (PARTITION BY dt, apptype, vid) AS str_real_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN rosn_real END) OVER (PARTITION BY dt, apptype, vid) AS rosn_real_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN rovn_real END) OVER (PARTITION BY dt, apptype, vid) AS rovn_real_base
|
|
|
+ -- 对照组基准值(计数指标)
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN dau END) OVER (PARTITION BY dt, apptype, vid) AS dau_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN exp END) OVER (PARTITION BY dt, apptype, vid) AS exp_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN is_share END) OVER (PARTITION BY dt, apptype, vid) AS is_share_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN share_cnt END) OVER (PARTITION BY dt, apptype, vid) AS share_cnt_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN is_return_1 END) OVER (PARTITION BY dt, apptype, vid) AS is_return_1_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN return_n_uv END) OVER (PARTITION BY dt, apptype, vid) AS return_n_uv_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN viewh24 END) OVER (PARTITION BY dt, apptype, vid) AS viewh24_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN return_n_uv_noself END) OVER (PARTITION BY dt, apptype, vid) AS return_n_uv_noself_base
|
|
|
+ -- 对照组基准值(模型评估指标)
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN str_auc END) OVER (PARTITION BY dt, apptype, vid) AS str_auc_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN str_gauc END) OVER (PARTITION BY dt, apptype, vid) AS str_gauc_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN rosn_corr END) OVER (PARTITION BY dt, apptype, vid) AS rosn_corr_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN rosn_gcorr END) OVER (PARTITION BY dt, apptype, vid) AS rosn_gcorr_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN rosn_stat_corr END) OVER (PARTITION BY dt, apptype, vid) AS rosn_stat_corr_base
|
|
|
+ ,MAX(CASE WHEN abcode = '对照组' THEN rosn_stat_gcorr END) OVER (PARTITION BY dt, apptype, vid) AS rosn_stat_gcorr_base
|
|
|
+ FROM t_agg_with_metrics
|
|
|
+)
|
|
|
+-- 最终输出:原有字段 + 模型评估指标 + 变化率
|
|
|
+SELECT dt
|
|
|
+ ,apptype
|
|
|
+ ,abcode
|
|
|
+ ,vid
|
|
|
+ ,vid_title
|
|
|
+ ,vid_rank
|
|
|
+ ,exp_pct
|
|
|
+ ,round((dau - dau_base) / NULLIF(dau_base, 0), 4) AS dau_chg
|
|
|
+ ,round((exp - exp_base) / NULLIF(exp_base, 0), 4) AS exp_chg
|
|
|
+ -- COPC
|
|
|
+ ,str_copc, rosn_copc, rosn_stat_copc, rovn_copc, rovn_stat_copc
|
|
|
+ -- 模型预测与真实值
|
|
|
+ ,str_real, str_pred, rosn_real, rosn_pred, rosn_stat
|
|
|
+ ,rovn_real, rovn_pred, rovn_stat
|
|
|
+ ,rosn_pred_mae, rosn_stat_mae
|
|
|
+ -- ========== 模型评估指标 ==========
|
|
|
+ ,str_auc, str_gauc
|
|
|
+ ,rosn_corr, rosn_gcorr
|
|
|
+ ,rosn_stat_corr, rosn_stat_gcorr
|
|
|
+ -- 业务指标
|
|
|
+ ,exp_per_dau, str_one, ros_one, str, ros, str_plus, ros_minus, rovn, vovh24
|
|
|
+ -- 计数
|
|
|
+ ,dau, exp, is_share, share_cnt, is_return_1, return_n_uv, viewh24, return_n_uv_noself
|
|
|
+ -- ========== 变化率字段 ==========
|
|
|
+ -- 业务指标变化率
|
|
|
+ ,round((exp_per_dau - exp_per_dau_base) / NULLIF(exp_per_dau_base, 0), 4) AS exp_per_dau_chg
|
|
|
+ ,round((str_one - str_one_base) / NULLIF(str_one_base, 0), 4) AS str_one_chg
|
|
|
+ ,round((ros_one - ros_one_base) / NULLIF(ros_one_base, 0), 4) AS ros_one_chg
|
|
|
+ ,round((str - str_base) / NULLIF(str_base, 0), 4) AS str_chg
|
|
|
+ ,round((ros - ros_base) / NULLIF(ros_base, 0), 4) AS ros_chg
|
|
|
+ ,round((str_plus - str_plus_base) / NULLIF(str_plus_base, 0), 4) AS str_plus_chg
|
|
|
+ ,round((ros_minus - ros_minus_base) / NULLIF(ros_minus_base, 0), 4) AS ros_minus_chg
|
|
|
+ ,round((rovn - rovn_base) / NULLIF(rovn_base, 0), 4) AS rovn_chg
|
|
|
+ ,round((vovh24 - vovh24_base) / NULLIF(vovh24_base, 0), 4) AS vovh24_chg
|
|
|
+ -- COPC 变化率
|
|
|
+ ,round((str_copc - str_copc_base) / NULLIF(str_copc_base, 0), 4) AS str_copc_chg
|
|
|
+ ,round((rosn_copc - rosn_copc_base) / NULLIF(rosn_copc_base, 0), 4) AS rosn_copc_chg
|
|
|
+ ,round((rosn_stat_copc - rosn_stat_copc_base) / NULLIF(rosn_stat_copc_base, 0), 4) AS rosn_stat_copc_chg
|
|
|
+ ,round((rovn_copc - rovn_copc_base) / NULLIF(rovn_copc_base, 0), 4) AS rovn_copc_chg
|
|
|
+ ,round((rovn_stat_copc - rovn_stat_copc_base) / NULLIF(rovn_stat_copc_base, 0), 4) AS rovn_stat_copc_chg
|
|
|
+ -- 真实值变化率
|
|
|
+ ,round((str_real - str_real_base) / NULLIF(str_real_base, 0), 4) AS str_real_chg
|
|
|
+ ,round((rosn_real - rosn_real_base) / NULLIF(rosn_real_base, 0), 4) AS rosn_real_chg
|
|
|
+ ,round((rovn_real - rovn_real_base) / NULLIF(rovn_real_base, 0), 4) AS rovn_real_chg
|
|
|
+ -- 模型评估指标变化率
|
|
|
+ ,round((str_auc - str_auc_base) / NULLIF(str_auc_base, 0), 4) AS str_auc_chg
|
|
|
+ ,round((str_gauc - str_gauc_base) / NULLIF(str_gauc_base, 0), 4) AS str_gauc_chg
|
|
|
+ ,round((rosn_corr - rosn_corr_base) / NULLIF(rosn_corr_base, 0), 4) AS rosn_corr_chg
|
|
|
+ ,round((rosn_gcorr - rosn_gcorr_base) / NULLIF(rosn_gcorr_base, 0), 4) AS rosn_gcorr_chg
|
|
|
+ ,round((rosn_stat_corr - rosn_stat_corr_base) / NULLIF(rosn_stat_corr_base, 0), 4) AS rosn_stat_corr_chg
|
|
|
+ ,round((rosn_stat_gcorr - rosn_stat_gcorr_base) / NULLIF(rosn_stat_gcorr_base, 0), 4) AS rosn_stat_gcorr_chg
|
|
|
+ -- 计数指标变化率
|
|
|
+ ,round((is_share - is_share_base) / NULLIF(is_share_base, 0), 4) AS is_share_chg
|
|
|
+ ,round((share_cnt - share_cnt_base) / NULLIF(share_cnt_base, 0), 4) AS share_cnt_chg
|
|
|
+ ,round((is_return_1 - is_return_1_base) / NULLIF(is_return_1_base, 0), 4) AS is_return_1_chg
|
|
|
+ ,round((return_n_uv - return_n_uv_base) / NULLIF(return_n_uv_base, 0), 4) AS return_n_uv_chg
|
|
|
+ ,round((viewh24 - viewh24_base) / NULLIF(viewh24_base, 0), 4) AS viewh24_chg
|
|
|
+ ,round((return_n_uv_noself - return_n_uv_noself_base) / NULLIF(return_n_uv_noself_base, 0), 4) AS return_n_uv_noself_chg
|
|
|
+FROM t_with_baseline
|
|
|
+ORDER BY dt DESC, apptype, abcode, exp DESC
|
|
|
+;
|