test_2.sql 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. WITH
  2. -- 用户拉活量分层
  3. t_user_type AS (
  4. SELECT DISTINCT type, openid
  5. FROM loghubods.mid_share_return_people_1year
  6. WHERE dt = TO_CHAR(DATEADD(TO_DATE('${dt}','YYYYMMDD'),-1,'dd'),'YYYYMMDD')
  7. AND type IS NOT NULL
  8. AND type != 'S_ALL'
  9. AND type NOT REGEXP 'R50'
  10. )
  11. -- 模型预估分数
  12. ,t_score AS (
  13. SELECT apptype
  14. ,videoid
  15. ,recommendtraceid
  16. ,scoresmap
  17. ,sortscore
  18. FROM (
  19. SELECT apptype
  20. ,videoid
  21. ,recommendtraceid
  22. ,scoresmap
  23. ,sortscore
  24. ,ROW_NUMBER() OVER (PARTITION BY apptype,videoid,recommendtraceid) AS rn
  25. FROM loghubods.statistics_log_hour
  26. WHERE dt LIKE '${dt}%'
  27. AND scoresmap IS NOT NULL
  28. )
  29. WHERE rn = 1
  30. )
  31. -- TOP1 进入内容品类(场): 按回流人数取 TOP1
  32. ,t_top_head_cate AS (
  33. SELECT vt.merge_leve2
  34. FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
  35. JOIN loghubods.video_merge_tag vt ON base.headvideoid = vt.videoid
  36. WHERE base.dt = '${dt}'
  37. AND vt.merge_leve2 IS NOT NULL
  38. GROUP BY vt.merge_leve2
  39. ORDER BY SUM(CAST(is_return_noself AS BIGINT)) DESC
  40. LIMIT 1
  41. )
  42. -- TOP10 推荐内容品类(货): 按曝光次数取 TOP10
  43. ,t_top_vid_cate AS (
  44. SELECT vt.merge_leve2
  45. FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
  46. JOIN loghubods.video_merge_tag vt ON base.vid = vt.videoid
  47. WHERE base.dt = '${dt}'
  48. AND vt.merge_leve2 IS NOT NULL
  49. GROUP BY vt.merge_leve2
  50. ORDER BY COUNT(1) DESC
  51. LIMIT 10
  52. )
  53. -- TOP1 内容id(货): 每品类按曝光次数取 TOP1 (曝光>10w)
  54. ,t_top_vid AS (
  55. SELECT merge_leve2, vid
  56. FROM (
  57. SELECT vt.merge_leve2
  58. ,base.vid
  59. ,COUNT(1) AS exp_cnt
  60. ,ROW_NUMBER() OVER (PARTITION BY vt.merge_leve2 ORDER BY COUNT(1) DESC) AS rk
  61. FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
  62. JOIN loghubods.video_merge_tag vt ON base.vid = vt.videoid
  63. WHERE base.dt = '${dt}'
  64. AND vt.merge_leve2 IS NOT NULL
  65. GROUP BY vt.merge_leve2, base.vid
  66. HAVING exp_cnt > 100000
  67. )
  68. WHERE rk <= 1
  69. )
  70. -- 宽表
  71. ,t_wide AS (
  72. SELECT base.*
  73. ,CASE WHEN e.type IS NULL OR e.type = 'R_0' THEN 'R0&新用户'
  74. WHEN e.type IN ('R_1','R_2_10','R_10_50') THEN 'R1-50'
  75. WHEN e.type IN ('R_50_100','R_100_180','R_180_330') THEN 'R_180_330'
  76. ELSE e.type
  77. END AS user_type
  78. ,CASE WHEN th.merge_leve2 IS NOT NULL THEN vt_head.merge_leve2 ELSE '其他' END AS head_merge_leve2
  79. ,CASE WHEN tv.merge_leve2 IS NOT NULL THEN vt_vid.merge_leve2 ELSE '其他' END AS vid_merge_leve2
  80. ,CASE WHEN ti.vid IS NOT NULL THEN base.vid ELSE '其他' END AS vid_id
  81. ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
  82. ,1.22*POW(CAST(GET_JSON_OBJECT(e1.scoresmap,'$.NorXGBScore') AS DOUBLE),1.15) AS rosn_pred
  83. ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_pred_origin
  84. ,e1.sortscore
  85. ,CASE
  86. WHEN CAST(hh AS INT) BETWEEN 0 AND 3 THEN '00-03'
  87. WHEN CAST(hh AS INT) BETWEEN 4 AND 7 THEN '04-07'
  88. WHEN CAST(hh AS INT) BETWEEN 8 AND 11 THEN '08-11'
  89. WHEN CAST(hh AS INT) BETWEEN 12 AND 15 THEN '12-15'
  90. WHEN CAST(hh AS INT) BETWEEN 16 AND 19 THEN '16-19'
  91. WHEN CAST(hh AS INT) BETWEEN 20 AND 23 THEN '20-23'
  92. ELSE '-'
  93. END AS hh_bucket
  94. FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
  95. LEFT JOIN t_user_type e
  96. ON SUBSTRING_INDEX(base.mid,'weixin_openid_',-1) = e.openid
  97. LEFT JOIN loghubods.video_merge_tag vt_head
  98. ON base.headvideoid = vt_head.videoid
  99. LEFT JOIN loghubods.video_merge_tag vt_vid
  100. ON base.vid = vt_vid.videoid
  101. LEFT JOIN t_score e1
  102. ON base.apptype = e1.apptype
  103. AND base.vid = e1.videoid
  104. AND base.recomtraceid = e1.recommendtraceid
  105. LEFT JOIN t_top_head_cate th
  106. ON vt_head.merge_leve2 = th.merge_leve2
  107. LEFT JOIN t_top_vid_cate tv
  108. ON vt_vid.merge_leve2 = tv.merge_leve2
  109. LEFT JOIN t_top_vid ti
  110. ON base.vid = ti.vid
  111. WHERE base.dt = '${dt}'
  112. )
  113. SELECT
  114. -- ==================== 维度列 ====================
  115. '${dt}' AS dt
  116. ,CASE WHEN GROUPING(user_type) = 1 THEN 'SUM' ELSE NVL(user_type, 'SUM') END AS user_type
  117. ,CASE WHEN GROUPING(hh_bucket) = 1 THEN 'SUM' ELSE NVL(hh_bucket, 'SUM') END AS hh_bucket
  118. ,CASE WHEN GROUPING(head_merge_leve2) = 1 THEN 'SUM' ELSE NVL(head_merge_leve2, 'SUM') END AS head_merge_leve2
  119. ,CASE WHEN GROUPING(vid_merge_leve2) = 1 THEN 'SUM' ELSE NVL(vid_merge_leve2, 'SUM') END AS vid_merge_leve2
  120. ,CASE WHEN GROUPING(vid_id) = 1 THEN 'SUM' ELSE NVL(vid_id, 'SUM') END AS vid_id
  121. -- ==================== 基础流量 ====================
  122. ,COUNT(1) AS exposure_cnt
  123. ,COUNT(DISTINCT mid) AS exposure_uv
  124. ,COUNT(DISTINCT vid) AS vid_cnt
  125. ,ROUND(COUNT(1) / COUNT(DISTINCT mid), 4) AS exposure_per_user
  126. -- ==================== 分享 ====================
  127. ,SUM(CAST(is_share AS BIGINT)) AS share_exposure_cnt
  128. ,SUM(CAST(share_cnt AS BIGINT)) AS share_cnt
  129. -- ==================== STR 指标 ====================
  130. ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6) AS str_real
  131. ,ROUND(COALESCE(SUM(str_pred) / NULLIF(COUNT(1), 0), 0), 6) AS str_pred
  132. ,ROUND(
  133. (SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
  134. / NULLIF(SUM(str_pred) / NULLIF(COUNT(1), 0), 0)
  135. , 4) AS str_copc
  136. ,ROUND(AVG(ABS(str_pred - CAST(is_return_noself AS BIGINT))), 6) AS str_mae
  137. ,ROUND(VARIANCE(str_pred - CAST(is_return_noself AS BIGINT)), 6) AS str_var
  138. -- ==================== ROSN 指标 ====================
  139. ,ROUND(COALESCE(
  140. SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0)
  141. , 0), 6) AS rosn_real
  142. ,ROUND(COALESCE(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0), 6) AS rosn_pred
  143. ,ROUND(
  144. (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0))
  145. / NULLIF(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0)
  146. , 4) AS rosn_copc
  147. ,ROUND(AVG(rosn_pred_origin), 6) AS rosn_pred_origin
  148. ,ROUND(AVG(
  149. CASE WHEN CAST(is_return_noself AS BIGINT) = 1
  150. THEN ABS(rosn_pred - CAST(return_n_uv_noself AS BIGINT))
  151. END
  152. ), 6) AS rosn_mae
  153. ,ROUND(VARIANCE(
  154. CASE WHEN CAST(is_return_noself AS BIGINT) = 1
  155. THEN rosn_pred - CAST(return_n_uv_noself AS BIGINT)
  156. END
  157. ), 6) AS rosn_var
  158. -- ==================== ROVN 指标 ====================
  159. ,ROUND(COALESCE(SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6) AS rovn
  160. ,ROUND(AVG(str_pred * rosn_pred), 6) AS rovn_pred
  161. ,ROUND(
  162. (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
  163. / NULLIF(AVG(str_pred * rosn_pred), 0)
  164. , 4) AS rovn_copc
  165. ,ROUND(AVG(ABS(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT))), 6) AS rovn_mae
  166. ,ROUND(VARIANCE(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT)), 6) AS rovn_var
  167. ,ROUND(AVG(CAST(sortscore AS DOUBLE)), 6) AS sortscore_avg
  168. -- ==================== B链 ====================
  169. ,SUM(CAST(bn_exp AS BIGINT)) AS bn_exp
  170. ,SUM(CAST(bn_pv AS BIGINT)) AS bn_pv
  171. ,SUM(CAST(bn_uv AS BIGINT)) AS bn_uv
  172. ,SUM(CAST(b1_exp AS BIGINT)) AS b1_exp
  173. ,SUM(CAST(b1_pv AS BIGINT)) AS b1_pv
  174. ,SUM(CAST(b1_uv AS BIGINT)) AS b1_uv
  175. ,SUM(CAST(b2_exp AS BIGINT)) AS b2_exp
  176. ,SUM(CAST(b2_pv AS BIGINT)) AS b2_pv
  177. ,SUM(CAST(b2_uv AS BIGINT)) AS b2_uv
  178. ,SUM(CAST(b3_exp AS BIGINT)) AS b3_exp
  179. ,SUM(CAST(b3_pv AS BIGINT)) AS b3_pv
  180. ,SUM(CAST(b3_uv AS BIGINT)) AS b3_uv
  181. -- ==================== C链 (全量depth, 按hop) ====================
  182. ,SUM(CAST(cn_1_exp AS BIGINT)) AS cn_1_exp
  183. ,SUM(CAST(cn_1_pv AS BIGINT)) AS cn_1_pv
  184. ,SUM(CAST(cn_1_uv AS BIGINT)) AS cn_1_uv
  185. ,SUM(CAST(cn_2_exp AS BIGINT)) AS cn_2_exp
  186. ,SUM(CAST(cn_2_pv AS BIGINT)) AS cn_2_pv
  187. ,SUM(CAST(cn_2_uv AS BIGINT)) AS cn_2_uv
  188. ,SUM(CAST(cn_3_exp AS BIGINT)) AS cn_3_exp
  189. ,SUM(CAST(cn_3_pv AS BIGINT)) AS cn_3_pv
  190. ,SUM(CAST(cn_3_uv AS BIGINT)) AS cn_3_uv
  191. ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)) AS cn_total_uv
  192. -- ==================== C链 (按depth拆分) ====================
  193. ,SUM(CAST(c1_1_exp AS BIGINT)) AS c1_1_exp
  194. ,SUM(CAST(c1_1_pv AS BIGINT)) AS c1_1_pv
  195. ,SUM(CAST(c1_1_uv AS BIGINT)) AS c1_1_uv
  196. ,SUM(CAST(c1_2_exp AS BIGINT)) AS c1_2_exp
  197. ,SUM(CAST(c1_2_pv AS BIGINT)) AS c1_2_pv
  198. ,SUM(CAST(c1_2_uv AS BIGINT)) AS c1_2_uv
  199. ,SUM(CAST(c1_3_exp AS BIGINT)) AS c1_3_exp
  200. ,SUM(CAST(c1_3_pv AS BIGINT)) AS c1_3_pv
  201. ,SUM(CAST(c1_3_uv AS BIGINT)) AS c1_3_uv
  202. ,SUM(CAST(c2_1_exp AS BIGINT)) AS c2_1_exp
  203. ,SUM(CAST(c2_1_pv AS BIGINT)) AS c2_1_pv
  204. ,SUM(CAST(c2_1_uv AS BIGINT)) AS c2_1_uv
  205. ,SUM(CAST(c2_2_exp AS BIGINT)) AS c2_2_exp
  206. ,SUM(CAST(c2_2_pv AS BIGINT)) AS c2_2_pv
  207. ,SUM(CAST(c2_2_uv AS BIGINT)) AS c2_2_uv
  208. ,SUM(CAST(c2_3_exp AS BIGINT)) AS c2_3_exp
  209. ,SUM(CAST(c2_3_pv AS BIGINT)) AS c2_3_pv
  210. ,SUM(CAST(c2_3_uv AS BIGINT)) AS c2_3_uv
  211. ,SUM(CAST(c3_1_exp AS BIGINT)) AS c3_1_exp
  212. ,SUM(CAST(c3_1_pv AS BIGINT)) AS c3_1_pv
  213. ,SUM(CAST(c3_1_uv AS BIGINT)) AS c3_1_uv
  214. ,SUM(CAST(c3_2_exp AS BIGINT)) AS c3_2_exp
  215. ,SUM(CAST(c3_2_pv AS BIGINT)) AS c3_2_pv
  216. ,SUM(CAST(c3_2_uv AS BIGINT)) AS c3_2_uv
  217. ,SUM(CAST(c3_3_exp AS BIGINT)) AS c3_3_exp
  218. ,SUM(CAST(c3_3_pv AS BIGINT)) AS c3_3_pv
  219. ,SUM(CAST(c3_3_uv AS BIGINT)) AS c3_3_uv
  220. -- ==================== D链 (全量depth, 按hop) ====================
  221. ,SUM(CAST(d0 AS BIGINT)) AS d0
  222. ,SUM(CAST(dn_1_exp AS BIGINT)) AS dn_1_exp
  223. ,SUM(CAST(dn_1_pv AS BIGINT)) AS dn_1_pv
  224. ,SUM(CAST(dn_1_uv AS BIGINT)) AS dn_1_uv
  225. ,SUM(CAST(dn_2_exp AS BIGINT)) AS dn_2_exp
  226. ,SUM(CAST(dn_2_pv AS BIGINT)) AS dn_2_pv
  227. ,SUM(CAST(dn_2_uv AS BIGINT)) AS dn_2_uv
  228. ,SUM(CAST(dn_3_exp AS BIGINT)) AS dn_3_exp
  229. ,SUM(CAST(dn_3_pv AS BIGINT)) AS dn_3_pv
  230. ,SUM(CAST(dn_3_uv AS BIGINT)) AS dn_3_uv
  231. ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)) AS dn_total_uv
  232. -- ==================== D链 (按depth拆分) ====================
  233. ,SUM(CAST(d1_1_exp AS BIGINT)) AS d1_1_exp
  234. ,SUM(CAST(d1_1_pv AS BIGINT)) AS d1_1_pv
  235. ,SUM(CAST(d1_1_uv AS BIGINT)) AS d1_1_uv
  236. ,SUM(CAST(d1_2_exp AS BIGINT)) AS d1_2_exp
  237. ,SUM(CAST(d1_2_pv AS BIGINT)) AS d1_2_pv
  238. ,SUM(CAST(d1_2_uv AS BIGINT)) AS d1_2_uv
  239. ,SUM(CAST(d1_3_exp AS BIGINT)) AS d1_3_exp
  240. ,SUM(CAST(d1_3_pv AS BIGINT)) AS d1_3_pv
  241. ,SUM(CAST(d1_3_uv AS BIGINT)) AS d1_3_uv
  242. ,SUM(CAST(d2_1_exp AS BIGINT)) AS d2_1_exp
  243. ,SUM(CAST(d2_1_pv AS BIGINT)) AS d2_1_pv
  244. ,SUM(CAST(d2_1_uv AS BIGINT)) AS d2_1_uv
  245. ,SUM(CAST(d2_2_exp AS BIGINT)) AS d2_2_exp
  246. ,SUM(CAST(d2_2_pv AS BIGINT)) AS d2_2_pv
  247. ,SUM(CAST(d2_2_uv AS BIGINT)) AS d2_2_uv
  248. ,SUM(CAST(d2_3_exp AS BIGINT)) AS d2_3_exp
  249. ,SUM(CAST(d2_3_pv AS BIGINT)) AS d2_3_pv
  250. ,SUM(CAST(d2_3_uv AS BIGINT)) AS d2_3_uv
  251. ,SUM(CAST(d3_1_exp AS BIGINT)) AS d3_1_exp
  252. ,SUM(CAST(d3_1_pv AS BIGINT)) AS d3_1_pv
  253. ,SUM(CAST(d3_1_uv AS BIGINT)) AS d3_1_uv
  254. ,SUM(CAST(d3_2_exp AS BIGINT)) AS d3_2_exp
  255. ,SUM(CAST(d3_2_pv AS BIGINT)) AS d3_2_pv
  256. ,SUM(CAST(d3_2_uv AS BIGINT)) AS d3_2_uv
  257. ,SUM(CAST(d3_3_exp AS BIGINT)) AS d3_3_exp
  258. ,SUM(CAST(d3_3_pv AS BIGINT)) AS d3_3_pv
  259. ,SUM(CAST(d3_3_uv AS BIGINT)) AS d3_3_uv
  260. -- ==================== 全链路 ====================
  261. ,SUM(CAST(return_n_uv_noself AS BIGINT))
  262. + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
  263. + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
  264. AS all_return_n_uv
  265. ,ROUND(COALESCE(
  266. ( SUM(CAST(return_n_uv_noself AS BIGINT))
  267. + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
  268. + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
  269. ) / NULLIF(COUNT(1), 0)
  270. , 0), 6) AS all_rovn
  271. -- =====================================================================
  272. -- FROM + GROUP BY CUBE
  273. -- =====================================================================
  274. FROM t_wide
  275. GROUP BY CUBE(
  276. user_type
  277. ,hh_bucket
  278. ,head_merge_leve2
  279. ,vid_merge_leve2
  280. ,vid_id
  281. )
  282. ORDER BY exposure_cnt DESC
  283. ;