test_1.sql 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. WITH
  2. -- 用户拉活量分层
  3. t_user_type AS (
  4. SELECT DISTINCT type, openid
  5. FROM loghubods.mid_share_return_people_1year
  6. WHERE dt = TO_CHAR(DATEADD(TO_DATE('${dt}','YYYYMMDD'),-1,'dd'),'YYYYMMDD')
  7. AND type IS NOT NULL
  8. AND type != 'S_ALL'
  9. AND type NOT REGEXP 'R50'
  10. )
  11. -- 模型预估分数
  12. ,t_score AS (
  13. SELECT apptype
  14. ,videoid
  15. ,recommendtraceid
  16. ,scoresmap
  17. ,sortscore
  18. FROM (
  19. SELECT apptype
  20. ,videoid
  21. ,recommendtraceid
  22. ,scoresmap
  23. ,sortscore
  24. ,ROW_NUMBER() OVER (PARTITION BY apptype,videoid,recommendtraceid) AS rn
  25. FROM loghubods.statistics_log_hour
  26. WHERE dt = '${dt}'
  27. AND scoresmap IS NOT NULL
  28. )
  29. WHERE rn = 1
  30. )
  31. -- 宽表
  32. ,t_wide AS (
  33. SELECT base.*
  34. ,CASE WHEN e.type IS NULL OR e.type = 'R_0' THEN 'R0&新用户'
  35. WHEN e.type IN ('R_1','R_2_10','R_10_50') THEN 'R1-50'
  36. WHEN e.type IN ('R_50_100','R_100_180','R_180_330') THEN 'R_180_330'
  37. ELSE e.type
  38. END AS user_type
  39. ,vt_head.merge_leve2 AS head_merge_leve2
  40. ,vt_vid.merge_leve2 AS vid_merge_leve2
  41. ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
  42. ,1.22*POW(CAST(GET_JSON_OBJECT(e1.scoresmap,'$.NorXGBScore') AS DOUBLE),1.15) AS rosn_pred
  43. ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_pred_origin
  44. ,e1.sortscore
  45. ,CASE
  46. WHEN CAST(hh AS INT) BETWEEN 0 AND 3 THEN '00-03'
  47. WHEN CAST(hh AS INT) BETWEEN 4 AND 7 THEN '04-07'
  48. WHEN CAST(hh AS INT) BETWEEN 8 AND 11 THEN '08-11'
  49. WHEN CAST(hh AS INT) BETWEEN 12 AND 15 THEN '12-15'
  50. WHEN CAST(hh AS INT) BETWEEN 16 AND 19 THEN '16-19'
  51. WHEN CAST(hh AS INT) BETWEEN 20 AND 23 THEN '20-23'
  52. ELSE '-'
  53. END AS hh_bucket
  54. FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
  55. LEFT JOIN t_user_type e
  56. ON SUBSTRING_INDEX(base.mid,'weixin_openid_',-1) = e.openid
  57. LEFT JOIN loghubods.video_merge_tag vt_head
  58. ON base.headvideoid = vt_head.videoid
  59. LEFT JOIN loghubods.video_merge_tag vt_vid
  60. ON base.vid = vt_vid.videoid
  61. LEFT JOIN t_score e1
  62. ON base.apptype = e1.apptype
  63. AND base.vid = e1.videoid
  64. AND base.recomtraceid = e1.recommendtraceid
  65. WHERE base.dt = '${dt}'
  66. )
  67. SELECT
  68. -- ==================== 维度列 ====================
  69. CASE WHEN GROUPING(user_type) = 1 THEN 'SUM' ELSE NVL(user_type, 'SUM') END AS user_type
  70. ,CASE WHEN GROUPING(hh_bucket) = 1 THEN 'SUM' ELSE NVL(hh_bucket, 'SUM') END AS hh_bucket
  71. ,CASE WHEN GROUPING(head_merge_leve2) = 1 THEN 'SUM' ELSE NVL(head_merge_leve2, 'SUM') END AS head_merge_leve2
  72. ,CASE WHEN GROUPING(vid_merge_leve2) = 1 THEN 'SUM' ELSE NVL(vid_merge_leve2, 'SUM') END AS vid_merge_leve2
  73. -- ==================== 基础流量 ====================
  74. ,COUNT(1) AS exposure_cnt
  75. ,COUNT(DISTINCT mid) AS exposure_uv
  76. ,COUNT(DISTINCT vid) AS vid_cnt
  77. ,ROUND(COUNT(1) / COUNT(DISTINCT mid), 4) AS exposure_per_user
  78. -- ==================== 分享 ====================
  79. ,SUM(CAST(is_share AS BIGINT)) AS share_exposure_cnt
  80. ,SUM(CAST(share_cnt AS BIGINT)) AS share_cnt
  81. -- ==================== STR 指标 ====================
  82. ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6) AS str_real
  83. ,ROUND(COALESCE(SUM(str_pred) / NULLIF(COUNT(1), 0), 0), 6) AS str_pred
  84. ,ROUND(
  85. (SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
  86. / NULLIF(SUM(str_pred) / NULLIF(COUNT(1), 0), 0)
  87. , 4) AS str_copc
  88. ,ROUND(AVG(ABS(str_pred - CAST(is_return_noself AS BIGINT))), 6) AS str_mae
  89. ,ROUND(VARIANCE(str_pred - CAST(is_return_noself AS BIGINT)), 6) AS str_var
  90. -- ==================== ROSN 指标 ====================
  91. ,ROUND(COALESCE(
  92. SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0)
  93. , 0), 6) AS rosn_real
  94. ,ROUND(COALESCE(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0), 6) AS rosn_pred
  95. ,ROUND(
  96. (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0))
  97. / NULLIF(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0)
  98. , 4) AS rosn_copc
  99. ,ROUND(AVG(rosn_pred_origin), 6) AS rosn_pred_origin
  100. ,ROUND(AVG(
  101. CASE WHEN CAST(is_return_noself AS BIGINT) = 1
  102. THEN ABS(rosn_pred - CAST(return_n_uv_noself AS BIGINT))
  103. END
  104. ), 6) AS rosn_mae
  105. ,ROUND(VARIANCE(
  106. CASE WHEN CAST(is_return_noself AS BIGINT) = 1
  107. THEN rosn_pred - CAST(return_n_uv_noself AS BIGINT)
  108. END
  109. ), 6) AS rosn_var
  110. -- ==================== ROVN 指标 ====================
  111. ,ROUND(COALESCE(SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6) AS rovn
  112. ,ROUND(AVG(str_pred * rosn_pred), 6) AS rovn_pred
  113. ,ROUND(
  114. (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
  115. / NULLIF(AVG(str_pred * rosn_pred), 0)
  116. , 4) AS rovn_copc
  117. ,ROUND(AVG(ABS(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT))), 6) AS rovn_mae
  118. ,ROUND(VARIANCE(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT)), 6) AS rovn_var
  119. ,ROUND(AVG(CAST(sortscore AS DOUBLE)), 6) AS sortscore_avg
  120. -- ==================== B链 ====================
  121. ,SUM(CAST(bn_exp AS BIGINT)) AS bn_exp
  122. ,SUM(CAST(bn_pv AS BIGINT)) AS bn_pv
  123. ,SUM(CAST(bn_uv AS BIGINT)) AS bn_uv
  124. ,SUM(CAST(b1_exp AS BIGINT)) AS b1_exp
  125. ,SUM(CAST(b1_pv AS BIGINT)) AS b1_pv
  126. ,SUM(CAST(b1_uv AS BIGINT)) AS b1_uv
  127. ,SUM(CAST(b2_exp AS BIGINT)) AS b2_exp
  128. ,SUM(CAST(b2_pv AS BIGINT)) AS b2_pv
  129. ,SUM(CAST(b2_uv AS BIGINT)) AS b2_uv
  130. ,SUM(CAST(b3_exp AS BIGINT)) AS b3_exp
  131. ,SUM(CAST(b3_pv AS BIGINT)) AS b3_pv
  132. ,SUM(CAST(b3_uv AS BIGINT)) AS b3_uv
  133. -- ==================== C链 (全量depth, 按hop) ====================
  134. ,SUM(CAST(cn_1_exp AS BIGINT)) AS cn_1_exp
  135. ,SUM(CAST(cn_1_pv AS BIGINT)) AS cn_1_pv
  136. ,SUM(CAST(cn_1_uv AS BIGINT)) AS cn_1_uv
  137. ,SUM(CAST(cn_2_exp AS BIGINT)) AS cn_2_exp
  138. ,SUM(CAST(cn_2_pv AS BIGINT)) AS cn_2_pv
  139. ,SUM(CAST(cn_2_uv AS BIGINT)) AS cn_2_uv
  140. ,SUM(CAST(cn_3_exp AS BIGINT)) AS cn_3_exp
  141. ,SUM(CAST(cn_3_pv AS BIGINT)) AS cn_3_pv
  142. ,SUM(CAST(cn_3_uv AS BIGINT)) AS cn_3_uv
  143. ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)) AS cn_total_uv
  144. -- ==================== D链 (全量depth, 按hop) ====================
  145. ,SUM(CAST(d0 AS BIGINT)) AS d0
  146. ,SUM(CAST(dn_1_exp AS BIGINT)) AS dn_1_exp
  147. ,SUM(CAST(dn_1_pv AS BIGINT)) AS dn_1_pv
  148. ,SUM(CAST(dn_1_uv AS BIGINT)) AS dn_1_uv
  149. ,SUM(CAST(dn_2_exp AS BIGINT)) AS dn_2_exp
  150. ,SUM(CAST(dn_2_pv AS BIGINT)) AS dn_2_pv
  151. ,SUM(CAST(dn_2_uv AS BIGINT)) AS dn_2_uv
  152. ,SUM(CAST(dn_3_exp AS BIGINT)) AS dn_3_exp
  153. ,SUM(CAST(dn_3_pv AS BIGINT)) AS dn_3_pv
  154. ,SUM(CAST(dn_3_uv AS BIGINT)) AS dn_3_uv
  155. ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)) AS dn_total_uv
  156. -- ==================== 全链路 ====================
  157. ,SUM(CAST(return_n_uv_noself AS BIGINT))
  158. + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
  159. + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
  160. AS all_return_n_uv
  161. ,ROUND(COALESCE(
  162. ( SUM(CAST(return_n_uv_noself AS BIGINT))
  163. + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
  164. + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
  165. ) / NULLIF(COUNT(1), 0)
  166. , 0), 6) AS all_rovn
  167. -- =====================================================================
  168. -- FROM + GROUP BY CUBE
  169. -- =====================================================================
  170. FROM t_wide
  171. GROUP BY CUBE(
  172. user_type
  173. ,hh_bucket
  174. ,head_merge_leve2
  175. ,vid_merge_leve2
  176. )
  177. ORDER BY exposure_cnt DESC
  178. limit 1000
  179. ;