|
@@ -0,0 +1,211 @@
|
|
|
|
|
+-- ════════════════════════════════════════════════════════════════════════════
|
|
|
|
|
+-- 模型预估诊断 v1:按天 × 策略,分析 str / ros 预估均值、COPC、负数占比
|
|
|
|
|
+--
|
|
|
|
|
+-- 目标:诊断线上 ros (NorXGBScore) 出现负数的问题
|
|
|
|
|
+-- - str / ros 预估均值(原始 + 经过 1.22*pow(x,1.15) 变换后)
|
|
|
|
|
+-- - COPC 校准度(actual / predicted)
|
|
|
|
|
+-- - ros 负数占比 (NorXGBScore < 0)
|
|
|
|
|
+-- - 极值诊断(min/max)
|
|
|
|
|
+--
|
|
|
|
|
+-- 字段含义:
|
|
|
|
|
+-- - str_pred = scoresMap.fmRov (XGB str 预测)
|
|
|
|
|
+-- - ros_pred = scoresMap.NorXGBScore (XGB ros 原始预测,可能为负)
|
|
|
|
|
+-- - ros_pred_tx = 1.22 * pow(NorXGBScore, 1.15) (线上变换后,负数 → NULL)
|
|
|
|
|
+--
|
|
|
|
|
+-- 数据源:loghubods.dwd_recsys_alg_sample_all_20250212
|
|
|
|
|
+-- 分组:dt × apptype × abcode (策略, 通过尾号→策略映射)
|
|
|
|
|
+-- ════════════════════════════════════════════════════════════════════════════
|
|
|
|
|
+WITH t_suffix_group AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT "a" AS suffix, "ab" AS suffix_group
|
|
|
|
|
+ UNION ALL SELECT "b", "ab"
|
|
|
|
|
+ UNION ALL SELECT "0", "01"
|
|
|
|
|
+ UNION ALL SELECT "1", "01"
|
|
|
|
|
+ UNION ALL SELECT "2", "2c"
|
|
|
|
|
+ UNION ALL SELECT "c", "2c"
|
|
|
|
|
+ UNION ALL SELECT "3", "34"
|
|
|
|
|
+ UNION ALL SELECT "4", "34"
|
|
|
|
|
+ UNION ALL SELECT "5", "5d"
|
|
|
|
|
+ UNION ALL SELECT "d", "5d"
|
|
|
|
|
+ UNION ALL SELECT "6", "67"
|
|
|
|
|
+ UNION ALL SELECT "7", "67"
|
|
|
|
|
+ UNION ALL SELECT "8", "89"
|
|
|
|
|
+ UNION ALL SELECT "9", "89"
|
|
|
|
|
+ UNION ALL SELECT "e", "ef"
|
|
|
|
|
+ UNION ALL SELECT "f", "ef"
|
|
|
|
|
+)
|
|
|
|
|
+,t_experiment_map AS
|
|
|
|
|
+(
|
|
|
|
|
+ -- ── apptype = 4 ──────────────────────────────────────────────────
|
|
|
|
|
+ SELECT "4" AS apptype, "ab" AS suffix_group, "实验组:变更str*ros建模目标实验 有问题" AS abcode, "20260413" AS start_dt, "20260415" AS end_dt
|
|
|
|
|
+ UNION ALL SELECT "4", "ab", "实验组:变更str*ros建模目标实验", "20260416", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "4", "01", "实验组:变更str*ros建模目标实验", "20260320", "20260410"
|
|
|
|
|
+ UNION ALL SELECT "4", "01", "实验组:变更str*ros建模目标实验 有问题", "20260411", "20260415"
|
|
|
|
|
+ UNION ALL SELECT "4", "01", "实验组:变更str*ros建模目标实验", "20260416", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "4", "67", "实验组:变更str*ros建模目标实验", "20260330", "20260410"
|
|
|
|
|
+ UNION ALL SELECT "4", "67", "实验组:变更str*ros建模目标实验 有问题", "20260411", "20260415"
|
|
|
|
|
+ UNION ALL SELECT "4", "67", "实验组:变更str*ros建模目标实验", "20260416", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "4", "5d", "实验组:变更str*ros建模目标实验", "20260407", "20260410"
|
|
|
|
|
+ UNION ALL SELECT "4", "5d", "实验组:变更str*ros建模目标实验 有问题", "20260411", "20260415"
|
|
|
|
|
+ UNION ALL SELECT "4", "5d", "实验组:变更str*ros建模目标实验", "20260416", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "4", "34", "实验组:变更str*ros建模目标实验", "20260407", "20260410"
|
|
|
|
|
+ UNION ALL SELECT "4", "34", "实验组:变更str*ros建模目标实验 有问题", "20260411", "20260415"
|
|
|
|
|
+ UNION ALL SELECT "4", "34", "实验组:变更str*ros建模目标实验", "20260416", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "4", "67", "实验组:bn_ros新损失函数", "20260311", "20260319"
|
|
|
|
|
+ UNION ALL SELECT "4", "5d", "实验组:解构特征排序str模型", "20260314", "20260406"
|
|
|
|
|
+ UNION ALL SELECT "4", "ef", "实验组:解构特征排序str模型&召回", "20260314", "20260320"
|
|
|
|
|
+ UNION ALL SELECT "4", "ef", "实验组:DNN模型", "20260407", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "4", "2c", "实验组:DNN模型-调参", "20260413", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "4", "89", "对照组", "20260301", "299991231"
|
|
|
|
|
+
|
|
|
|
|
+ -- ── apptype = 0 ──────────────────────────────────────────────────
|
|
|
|
|
+ UNION ALL SELECT "0", "ab", "实验组:变更str*ros建模目标实验 有问题", "20260413", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "0", "ab", "实验组:DNN模型-调参", "20260416", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "0", "01", "实验组:变更str*ros建模目标实验", "20260320", "20260410"
|
|
|
|
|
+ UNION ALL SELECT "0", "01", "实验组:变更str*ros建模目标实验 有问题", "20260411", "20260415"
|
|
|
|
|
+ UNION ALL SELECT "0", "01", "实验组:变更str*ros建模目标实验", "20260416", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "0", "34", "实验组:变更str*ros建模目标实验", "20260330", "20260410"
|
|
|
|
|
+ UNION ALL SELECT "0", "34", "实验组:变更str*ros建模目标实验 有问题", "20260411", "20260415"
|
|
|
|
|
+ UNION ALL SELECT "0", "34", "实验组:DNN模型-调参", "20260416", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "0", "67", "实验组:变更str*ros建模目标实验", "20260330", "20260410"
|
|
|
|
|
+ UNION ALL SELECT "0", "67", "实验组:变更str*ros建模目标实验 有问题", "20260411", "20260415"
|
|
|
|
|
+ UNION ALL SELECT "0", "67", "实验组:DNN模型-调参", "20260416", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "0", "5d", "实验组:变更str*ros建模目标实验", "20260407", "20260410"
|
|
|
|
|
+ UNION ALL SELECT "0", "5d", "实验组:变更str*ros建模目标实验 有问题", "20260411", "20260415"
|
|
|
|
|
+ UNION ALL SELECT "0", "5d", "实验组:DNN模型-调参", "20260416", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "0", "ef", "实验组:DNN模型-调参", "20260410", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "0", "2c", "实验组:DNN模型", "20260413", "29991231"
|
|
|
|
|
+ UNION ALL SELECT "0", "89", "对照组", "20260301", "29991231"
|
|
|
|
|
+)
|
|
|
|
|
+-- 抽取模型分 & 尾号
|
|
|
|
|
+-- 关键字段:
|
|
|
|
|
+-- scoresMap.fmRov = XGB str 预测(所有策略都有)
|
|
|
|
|
+-- scoresMap.NorXGBScore = XGB ros 原始预测(XGB 策略下有)
|
|
|
|
|
+-- scoresMap.NorDNNScore = DNN ros 原始预测(DNN 策略下有)
|
|
|
|
|
+-- 上线变换:1.22 * pow(x, 1.15),x<0 → NULL
|
|
|
|
|
+,t_scores AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,mid
|
|
|
|
|
+ ,vid
|
|
|
|
|
+ ,is_share
|
|
|
|
|
+ ,share_cnt
|
|
|
|
|
+ ,is_return_noself
|
|
|
|
|
+ ,return_n_uv
|
|
|
|
|
+ ,return_n_uv_noself
|
|
|
|
|
+ ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),
|
|
|
|
|
+ LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
|
|
|
|
|
+ -- 一次性抽出 scoresMap 字符串,避免重复 GET_JSON_OBJECT
|
|
|
|
|
+ ,REPLACE(GET_JSON_OBJECT(extend_alg,'$.scoresMap'),"\\","") AS scoresmap_clean
|
|
|
|
|
+ FROM loghubods.dwd_recsys_alg_sample_all_20250212
|
|
|
|
|
+ WHERE dt = '${dt}'
|
|
|
|
|
+ AND apptype IN ("0","4")
|
|
|
|
|
+ AND extend_alg IS NOT NULL
|
|
|
|
|
+)
|
|
|
|
|
+,t_scores2 AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,mid, vid
|
|
|
|
|
+ ,is_share, share_cnt, is_return_noself, return_n_uv, return_n_uv_noself
|
|
|
|
|
+ ,suffix
|
|
|
|
|
+ -- str 预测
|
|
|
|
|
+ ,CAST(GET_JSON_OBJECT(scoresmap_clean,'$.fmRov') AS DOUBLE) AS str_pred
|
|
|
|
|
+ -- XGB ros 原始
|
|
|
|
|
+ ,CAST(GET_JSON_OBJECT(scoresmap_clean,'$.NorXGBScore') AS DOUBLE) AS xgb_ros_raw
|
|
|
|
|
+ -- DNN ros 原始
|
|
|
|
|
+ ,CAST(GET_JSON_OBJECT(scoresmap_clean,'$.NorDNNScore') AS DOUBLE) AS dnn_ros_raw
|
|
|
|
|
+ FROM t_scores
|
|
|
|
|
+)
|
|
|
|
|
+-- 关联实验映射
|
|
|
|
|
+,t_base AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT s.*
|
|
|
|
|
+ ,sg.suffix_group
|
|
|
|
|
+ ,COALESCE(m.abcode,"对照组") AS abcode
|
|
|
|
|
+ FROM t_scores2 s
|
|
|
|
|
+ INNER JOIN t_suffix_group sg ON s.suffix = sg.suffix
|
|
|
|
|
+ LEFT JOIN t_experiment_map m
|
|
|
|
|
+ ON s.apptype = m.apptype
|
|
|
|
|
+ AND sg.suffix_group = m.suffix_group
|
|
|
|
|
+ AND '${dt}' BETWEEN m.start_dt AND m.end_dt
|
|
|
|
|
+)
|
|
|
|
|
+-- 按天 × 策略聚合
|
|
|
|
|
+,t_metrics AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT dt
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,abcode
|
|
|
|
|
+ -- ── 样本量 & 字段覆盖 ──
|
|
|
|
|
+ ,COUNT(1) AS exp_cnt
|
|
|
|
|
+ ,SUM(CASE WHEN str_pred IS NOT NULL THEN 1 ELSE 0 END) AS str_pred_cnt
|
|
|
|
|
+ ,SUM(CASE WHEN xgb_ros_raw IS NOT NULL THEN 1 ELSE 0 END) AS xgb_cnt
|
|
|
|
|
+ ,SUM(CASE WHEN dnn_ros_raw IS NOT NULL THEN 1 ELSE 0 END) AS dnn_cnt
|
|
|
|
|
+
|
|
|
|
|
+ -- ── 真实值 ──
|
|
|
|
|
+ ,ROUND(SUM(is_share) / COUNT(1), 6) AS str_actual
|
|
|
|
|
+ ,ROUND(SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself),0), 6) AS ros_actual
|
|
|
|
|
+ ,ROUND(SUM(return_n_uv_noself) / COUNT(1), 6) AS rov_actual
|
|
|
|
|
+
|
|
|
|
|
+ -- ── 预估均值 ──
|
|
|
|
|
+ ,ROUND(AVG(str_pred), 6) AS str_pred_mean
|
|
|
|
|
+ ,ROUND(AVG(xgb_ros_raw), 6) AS xgb_ros_mean
|
|
|
|
|
+ ,ROUND(AVG(dnn_ros_raw), 6) AS dnn_ros_mean
|
|
|
|
|
+
|
|
|
|
|
+ -- ── COPC = actual / predicted ──
|
|
|
|
|
+ ,ROUND((SUM(is_share) / COUNT(1)) / NULLIF(AVG(str_pred),0), 4) AS str_copc
|
|
|
|
|
+ -- XGB ros COPC(在 XGB ros 非空 & 分享样本上)
|
|
|
|
|
+ ,ROUND(
|
|
|
|
|
+ AVG(CASE WHEN is_return_noself=1 THEN return_n_uv_noself END)
|
|
|
|
|
+ / NULLIF(AVG(CASE WHEN is_return_noself=1 THEN xgb_ros_raw END),0),
|
|
|
|
|
+ 4) AS xgb_ros_copc
|
|
|
|
|
+ -- DNN ros COPC
|
|
|
|
|
+ ,ROUND(
|
|
|
|
|
+ AVG(CASE WHEN is_return_noself=1 THEN return_n_uv_noself END)
|
|
|
|
|
+ / NULLIF(AVG(CASE WHEN is_return_noself=1 THEN dnn_ros_raw END),0),
|
|
|
|
|
+ 4) AS dnn_ros_copc
|
|
|
|
|
+
|
|
|
|
|
+ -- ── 负数占比(在该字段非空的样本中) ──
|
|
|
|
|
+ ,ROUND(SUM(CASE WHEN xgb_ros_raw < 0 THEN 1 ELSE 0 END)
|
|
|
|
|
+ / NULLIF(SUM(CASE WHEN xgb_ros_raw IS NOT NULL THEN 1 ELSE 0 END),0),
|
|
|
|
|
+ 6) AS xgb_neg_ratio
|
|
|
|
|
+ ,ROUND(SUM(CASE WHEN dnn_ros_raw < 0 THEN 1 ELSE 0 END)
|
|
|
|
|
+ / NULLIF(SUM(CASE WHEN dnn_ros_raw IS NOT NULL THEN 1 ELSE 0 END),0),
|
|
|
|
|
+ 6) AS dnn_neg_ratio
|
|
|
|
|
+ -- str 负数占比(sanity check)
|
|
|
|
|
+ ,ROUND(SUM(CASE WHEN str_pred < 0 THEN 1 ELSE 0 END)
|
|
|
|
|
+ / NULLIF(SUM(CASE WHEN str_pred IS NOT NULL THEN 1 ELSE 0 END),0),
|
|
|
|
|
+ 6) AS str_neg_ratio
|
|
|
|
|
+
|
|
|
|
|
+ -- ── 极值诊断 ──
|
|
|
|
|
+ ,ROUND(MIN(xgb_ros_raw), 6) AS xgb_ros_min
|
|
|
|
|
+ ,ROUND(MAX(xgb_ros_raw), 6) AS xgb_ros_max
|
|
|
|
|
+ ,ROUND(MIN(dnn_ros_raw), 6) AS dnn_ros_min
|
|
|
|
|
+ ,ROUND(MAX(dnn_ros_raw), 6) AS dnn_ros_max
|
|
|
|
|
+ ,ROUND(MIN(str_pred), 6) AS str_pred_min
|
|
|
|
|
+ ,ROUND(MAX(str_pred), 6) AS str_pred_max
|
|
|
|
|
+
|
|
|
|
|
+ -- ── DNN ros 分布桶(关注负数区间)──
|
|
|
|
|
+ ,ROUND(SUM(CASE WHEN dnn_ros_raw < -0.1 THEN 1 ELSE 0 END)
|
|
|
|
|
+ / NULLIF(SUM(CASE WHEN dnn_ros_raw IS NOT NULL THEN 1 ELSE 0 END),0),
|
|
|
|
|
+ 6) AS dnn_lt_neg01
|
|
|
|
|
+ ,ROUND(SUM(CASE WHEN dnn_ros_raw >= -0.1 AND dnn_ros_raw < 0 THEN 1 ELSE 0 END)
|
|
|
|
|
+ / NULLIF(SUM(CASE WHEN dnn_ros_raw IS NOT NULL THEN 1 ELSE 0 END),0),
|
|
|
|
|
+ 6) AS dnn_neg01_to_0
|
|
|
|
|
+ ,ROUND(SUM(CASE WHEN dnn_ros_raw >= 0 AND dnn_ros_raw < 0.5 THEN 1 ELSE 0 END)
|
|
|
|
|
+ / NULLIF(SUM(CASE WHEN dnn_ros_raw IS NOT NULL THEN 1 ELSE 0 END),0),
|
|
|
|
|
+ 6) AS dnn_0_to_05
|
|
|
|
|
+ ,ROUND(SUM(CASE WHEN dnn_ros_raw >= 0.5 AND dnn_ros_raw < 2 THEN 1 ELSE 0 END)
|
|
|
|
|
+ / NULLIF(SUM(CASE WHEN dnn_ros_raw IS NOT NULL THEN 1 ELSE 0 END),0),
|
|
|
|
|
+ 6) AS dnn_05_to_2
|
|
|
|
|
+ ,ROUND(SUM(CASE WHEN dnn_ros_raw >= 2 THEN 1 ELSE 0 END)
|
|
|
|
|
+ / NULLIF(SUM(CASE WHEN dnn_ros_raw IS NOT NULL THEN 1 ELSE 0 END),0),
|
|
|
|
|
+ 6) AS dnn_gte_2
|
|
|
|
|
+ FROM t_base
|
|
|
|
|
+ GROUP BY dt, apptype, abcode
|
|
|
|
|
+)
|
|
|
|
|
+SELECT *
|
|
|
|
|
+FROM t_metrics
|
|
|
|
|
+ORDER BY dt DESC, apptype, abcode
|
|
|
|
|
+;
|