loghubods.dwd_recsys_alg_exposure_agg_wide_20260209.sql 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678
  1. -- =====================================================================
  2. -- 曝光回流链路 CUBE 聚合表 (宽表版, 含用户/品类维度 + 模型预估 + 全链路漏斗)
  3. -- 维度: user_type × hh_bucket × head_merge_leve2 × vid_merge_leve2 × vid_id (CUBE)
  4. -- 依赖: base_20260209 → (JOIN user_type + video_merge_tag + t_score) → CUBE 聚合
  5. -- 参考: de.sql + dwd_recsys_alg_exposure_agg_20260209
  6. -- =====================================================================
  7. --
  8. -- 指标分区:
  9. -- 基础流量 exposure_cnt / exposure_uv / vid_cnt / exposure_per_user
  10. -- 分享&回流漏斗 share_exposure_cnt → share_cnt → return_exposure_cnt → return_uv
  11. -- + 4 个 rate (share_rate / return_rate / return_rate_noself / share_return_rate)
  12. -- 模型预估 STR (曝光→非自身回流概率) / ROSN (条件回流UV) / ROVN (STR×ROSN)
  13. -- 每组: _real(label) + _pred(预估) + _copc + _mae + _var
  14. -- _real 与漏斗字段等价: str_real=return_rate_noself, rovn_real=return_uv_noself/exposure_cnt
  15. -- B/C/D 链 每级: _uv + _pv + _exp + _ror + _rov
  16. -- 全链路 all_uv/pv/exp = B + C + D, all_ror/rov
  17. --
  18. -- rov/ror 分母推导 (逐级递推):
  19. -- depth 维度: depth=1 的成本=该hop入口; depth=N+1 的成本=depth=N 的输出
  20. -- hop 维度: hop1 的成本=链路入口; hop N+1 的成本=hop N 全量depth的输出
  21. --
  22. -- B链: bn/b1 → rov=uv/COUNT(1), ror=uv/COUNT(DISTINCT mid)
  23. -- b2 → rov=uv/b1_exp, ror=uv/b1_uv
  24. -- b3 → rov=uv/b2_exp, ror=uv/b2_uv
  25. -- C链全量: cn_1 → rov=uv/bn_exp, ror=uv/bn_uv
  26. -- cn_2 → rov=uv/cn_1_exp, ror=uv/cn_1_uv
  27. -- cn_3 → rov=uv/cn_2_exp, ror=uv/cn_2_uv
  28. -- C链depth拆分: cX_Y → rov=uv/上级exp, ror=uv/上级uv (X=depth, Y=hop)
  29. -- hop1: c1_1→bn, c2_1→c1_1, c3_1→c2_1
  30. -- hop2: c1_2→cn_1, c2_2→c1_2, c3_2→c2_2
  31. -- hop3: c1_3→cn_2, c2_3→c1_3, c3_3→c2_3
  32. -- D链全量: dn_1 → rov=uv/d0, ror=uv/COUNT(DISTINCT mid)
  33. -- dn_2 → rov=uv/dn_1_exp, ror=uv/dn_1_uv
  34. -- dn_3 → rov=uv/dn_2_exp, ror=uv/dn_2_uv
  35. -- D链depth拆分: dX_Y (同 C 链模式)
  36. -- hop1: d1_1→d0/mid, d2_1→d1_1, d3_1→d2_1
  37. -- hop2: d1_2→dn_1, d2_2→d1_2, d3_2→d2_2
  38. -- hop3: d1_3→dn_2, d2_3→d1_3, d3_3→d2_3
  39. -- 全链路: all → rov=uv/COUNT(1), ror=uv/COUNT(DISTINCT mid)
  40. -- =====================================================================
  41. -- DROP TABLE IF EXISTS loghubods.dwd_recsys_alg_exposure_agg_wide_20260209;
  42. CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_agg_wide_20260209 (
  43. -- ==================== 维度列 ====================
  44. dt STRING COMMENT '日期'
  45. ,user_type STRING COMMENT '用户拉活量分层(R0&新用户/R1-50/R_180_330等,汇总为SUM)'
  46. ,hh_bucket STRING COMMENT '小时段(00-03/04-07/.../20-23,汇总为SUM)'
  47. ,head_merge_leve2 STRING COMMENT '进入内容品类(headvideoid品类,汇总为SUM)'
  48. ,vid_merge_leve2 STRING COMMENT '推荐内容品类(vid品类,TOP10曝光+其他,汇总为SUM)'
  49. ,vid_id STRING COMMENT '内容id(品类曝光TOP1+其他,汇总为SUM)'
  50. -- ==================== 基础流量 ====================
  51. ,exposure_cnt BIGINT COMMENT '曝光次数'
  52. ,exposure_uv BIGINT COMMENT '曝光人数(mid去重)'
  53. ,vid_cnt BIGINT COMMENT '视频个数(vid去重)'
  54. ,exposure_per_user DOUBLE COMMENT '人均曝光次数 = 曝光次数/曝光人数'
  55. -- ==================== 分享 & 回流漏斗 ====================
  56. ,share_exposure_cnt BIGINT COMMENT '产生分享的曝光数'
  57. ,share_cnt BIGINT COMMENT '分享总次数'
  58. ,return_exposure_cnt BIGINT COMMENT '产生回流的曝光数(含自身) = SUM(is_return_n)'
  59. ,return_exposure_cnt_noself BIGINT COMMENT '产生回流的曝光数(非自身) = SUM(is_return_noself)'
  60. ,return_uv BIGINT COMMENT '回流人数(含自身) = SUM(return_n_uv)'
  61. ,return_uv_noself BIGINT COMMENT '回流人数(非自身) = SUM(return_n_uv_noself)'
  62. ,share_rate DOUBLE COMMENT '分享率 = share_exposure_cnt/exposure_cnt'
  63. ,return_rate DOUBLE COMMENT '回流率(含自身) = return_exposure_cnt/exposure_cnt'
  64. ,return_rate_noself DOUBLE COMMENT '回流率(非自身) = return_exposure_cnt_noself/exposure_cnt'
  65. ,share_return_rate DOUBLE COMMENT '分享→回流转化率(非自身) = return_exposure_cnt_noself/share_exposure_cnt'
  66. -- ==================== 模型预估: STR (曝光→非自身回流概率) ====================
  67. ,str_real DOUBLE COMMENT '= return_rate_noself, 模型label'
  68. ,str_pred DOUBLE COMMENT 'STR预估 = SUM(str_pred)/exposure_cnt'
  69. ,str_copc DOUBLE COMMENT 'STR copc = str_real/str_pred'
  70. ,str_mae DOUBLE COMMENT 'STR MAE = AVG(|str_pred - str_real|)'
  71. ,str_var DOUBLE COMMENT 'STR VAR = VARIANCE(str_pred - str_real)'
  72. -- ==================== 模型预估: ROSN (条件回流UV, 非自身) ====================
  73. ,rosn_real DOUBLE COMMENT '= return_uv_noself/return_exposure_cnt_noself, 模型label'
  74. ,rosn_pred DOUBLE COMMENT 'ROSN预估 = SUM(rosn_pred WHERE is_return_noself=1)/SUM(is_return_noself)'
  75. ,rosn_copc DOUBLE COMMENT 'ROSN copc = rosn_real/rosn_pred'
  76. ,rosn_pred_origin DOUBLE COMMENT 'ROSN原始预估均值 = AVG(rosn_pred_origin)'
  77. ,rosn_mae DOUBLE COMMENT 'ROSN MAE = AVG(|rosn_pred - rosn_real|) WHERE is_return_noself=1'
  78. ,rosn_var DOUBLE COMMENT 'ROSN VAR = VARIANCE(rosn_pred - rosn_real) WHERE is_return_noself=1'
  79. -- ==================== 模型预估: ROVN (STR×ROSN) ====================
  80. ,rovn_real DOUBLE COMMENT '= return_uv_noself/exposure_cnt, 模型label'
  81. ,rovn_pred DOUBLE COMMENT 'rovn预估 = AVG(str_pred*rosn_pred)'
  82. ,rovn_copc DOUBLE COMMENT 'rovn copc = rovn_real/rovn_pred'
  83. ,rovn_mae DOUBLE COMMENT 'rovn MAE = AVG(|rovn_pred - rovn_real|)'
  84. ,rovn_var DOUBLE COMMENT 'rovn VAR = VARIANCE(rovn_pred - rovn_real)'
  85. ,sortscore_avg DOUBLE COMMENT 'sortscore均值'
  86. -- ==================== B链 (分享→点击) ====================
  87. ,bn_uv BIGINT COMMENT 'B链全量: 回流去重人数'
  88. ,bn_pv BIGINT COMMENT 'B链全量: 回流点击次数'
  89. ,bn_exp BIGINT COMMENT 'B链全量: 回流session曝光数'
  90. ,bn_ror DOUBLE COMMENT 'bn_uv/exposure_uv'
  91. ,bn_rov DOUBLE COMMENT 'bn_uv/exposure_cnt'
  92. ,b1_uv BIGINT COMMENT 'B链depth=1: 回流去重人数'
  93. ,b1_pv BIGINT COMMENT 'B链depth=1: 回流点击次数'
  94. ,b1_exp BIGINT COMMENT 'B链depth=1: 回流session曝光数'
  95. ,b1_ror DOUBLE COMMENT 'b1_uv/exposure_uv'
  96. ,b1_rov DOUBLE COMMENT 'b1_uv/exposure_cnt'
  97. ,b2_uv BIGINT COMMENT 'B链depth=2: 回流去重人数'
  98. ,b2_pv BIGINT COMMENT 'B链depth=2: 回流点击次数'
  99. ,b2_exp BIGINT COMMENT 'B链depth=2: 回流session曝光数'
  100. ,b2_ror DOUBLE COMMENT 'b2_uv/b1_uv'
  101. ,b2_rov DOUBLE COMMENT 'b2_uv/b1_exp'
  102. ,b3_uv BIGINT COMMENT 'B链depth=3: 回流去重人数'
  103. ,b3_pv BIGINT COMMENT 'B链depth=3: 回流点击次数'
  104. ,b3_exp BIGINT COMMENT 'B链depth=3: 回流session曝光数'
  105. ,b3_ror DOUBLE COMMENT 'b3_uv/b2_uv'
  106. ,b3_rov DOUBLE COMMENT 'b3_uv/b2_exp'
  107. -- ==================== C链 (全量depth, 按hop) ====================
  108. ,cn_1_uv BIGINT COMMENT 'C链hop1: 回流去重人数'
  109. ,cn_1_pv BIGINT COMMENT 'C链hop1: 回流点击次数'
  110. ,cn_1_exp BIGINT COMMENT 'C链hop1: 回流session曝光数'
  111. ,cn_1_ror DOUBLE COMMENT 'cn_1_uv/bn_uv'
  112. ,cn_1_rov DOUBLE COMMENT 'cn_1_uv/bn_exp'
  113. ,cn_2_uv BIGINT COMMENT 'C链hop2: 回流去重人数'
  114. ,cn_2_pv BIGINT COMMENT 'C链hop2: 回流点击次数'
  115. ,cn_2_exp BIGINT COMMENT 'C链hop2: 回流session曝光数'
  116. ,cn_2_ror DOUBLE COMMENT 'cn_2_uv/cn_1_uv'
  117. ,cn_2_rov DOUBLE COMMENT 'cn_2_uv/cn_1_exp'
  118. ,cn_3_uv BIGINT COMMENT 'C链hop3: 回流去重人数'
  119. ,cn_3_pv BIGINT COMMENT 'C链hop3: 回流点击次数'
  120. ,cn_3_exp BIGINT COMMENT 'C链hop3: 回流session曝光数'
  121. ,cn_3_ror DOUBLE COMMENT 'cn_3_uv/cn_2_uv'
  122. ,cn_3_rov DOUBLE COMMENT 'cn_3_uv/cn_2_exp'
  123. ,cn_total_uv BIGINT COMMENT 'C链合计UV = cn_1_uv + cn_2_uv + cn_3_uv'
  124. ,cn_total_pv BIGINT COMMENT 'C链合计PV = cn_1_pv + cn_2_pv + cn_3_pv'
  125. ,cn_total_exp BIGINT COMMENT 'C链合计EXP = cn_1_exp + cn_2_exp + cn_3_exp'
  126. ,cn_total_ror DOUBLE COMMENT 'cn_total_uv/bn_uv'
  127. ,cn_total_rov DOUBLE COMMENT 'cn_total_uv/bn_exp'
  128. -- C链 depth拆分 hop1
  129. ,c1_1_uv BIGINT COMMENT 'C链d1-hop1: 回流去重人数'
  130. ,c1_1_pv BIGINT COMMENT 'C链d1-hop1: 回流点击次数'
  131. ,c1_1_exp BIGINT COMMENT 'C链d1-hop1: 回流session曝光数'
  132. ,c1_1_ror DOUBLE COMMENT 'c1_1_uv/bn_uv'
  133. ,c1_1_rov DOUBLE COMMENT 'c1_1_uv/bn_exp'
  134. ,c2_1_uv BIGINT COMMENT 'C链d2-hop1: 回流去重人数'
  135. ,c2_1_pv BIGINT COMMENT 'C链d2-hop1: 回流点击次数'
  136. ,c2_1_exp BIGINT COMMENT 'C链d2-hop1: 回流session曝光数'
  137. ,c2_1_ror DOUBLE COMMENT 'c2_1_uv/c1_1_uv'
  138. ,c2_1_rov DOUBLE COMMENT 'c2_1_uv/c1_1_exp'
  139. ,c3_1_uv BIGINT COMMENT 'C链d3-hop1: 回流去重人数'
  140. ,c3_1_pv BIGINT COMMENT 'C链d3-hop1: 回流点击次数'
  141. ,c3_1_exp BIGINT COMMENT 'C链d3-hop1: 回流session曝光数'
  142. ,c3_1_ror DOUBLE COMMENT 'c3_1_uv/c2_1_uv'
  143. ,c3_1_rov DOUBLE COMMENT 'c3_1_uv/c2_1_exp'
  144. -- C链 depth拆分 hop2
  145. ,c1_2_uv BIGINT COMMENT 'C链d1-hop2: 回流去重人数'
  146. ,c1_2_pv BIGINT COMMENT 'C链d1-hop2: 回流点击次数'
  147. ,c1_2_exp BIGINT COMMENT 'C链d1-hop2: 回流session曝光数'
  148. ,c1_2_ror DOUBLE COMMENT 'c1_2_uv/cn_1_uv'
  149. ,c1_2_rov DOUBLE COMMENT 'c1_2_uv/cn_1_exp'
  150. ,c2_2_uv BIGINT COMMENT 'C链d2-hop2: 回流去重人数'
  151. ,c2_2_pv BIGINT COMMENT 'C链d2-hop2: 回流点击次数'
  152. ,c2_2_exp BIGINT COMMENT 'C链d2-hop2: 回流session曝光数'
  153. ,c2_2_ror DOUBLE COMMENT 'c2_2_uv/c1_2_uv'
  154. ,c2_2_rov DOUBLE COMMENT 'c2_2_uv/c1_2_exp'
  155. ,c3_2_uv BIGINT COMMENT 'C链d3-hop2: 回流去重人数'
  156. ,c3_2_pv BIGINT COMMENT 'C链d3-hop2: 回流点击次数'
  157. ,c3_2_exp BIGINT COMMENT 'C链d3-hop2: 回流session曝光数'
  158. ,c3_2_ror DOUBLE COMMENT 'c3_2_uv/c2_2_uv'
  159. ,c3_2_rov DOUBLE COMMENT 'c3_2_uv/c2_2_exp'
  160. -- C链 depth拆分 hop3
  161. ,c1_3_uv BIGINT COMMENT 'C链d1-hop3: 回流去重人数'
  162. ,c1_3_pv BIGINT COMMENT 'C链d1-hop3: 回流点击次数'
  163. ,c1_3_exp BIGINT COMMENT 'C链d1-hop3: 回流session曝光数'
  164. ,c1_3_ror DOUBLE COMMENT 'c1_3_uv/cn_2_uv'
  165. ,c1_3_rov DOUBLE COMMENT 'c1_3_uv/cn_2_exp'
  166. ,c2_3_uv BIGINT COMMENT 'C链d2-hop3: 回流去重人数'
  167. ,c2_3_pv BIGINT COMMENT 'C链d2-hop3: 回流点击次数'
  168. ,c2_3_exp BIGINT COMMENT 'C链d2-hop3: 回流session曝光数'
  169. ,c2_3_ror DOUBLE COMMENT 'c2_3_uv/c1_3_uv'
  170. ,c2_3_rov DOUBLE COMMENT 'c2_3_uv/c1_3_exp'
  171. ,c3_3_uv BIGINT COMMENT 'C链d3-hop3: 回流去重人数'
  172. ,c3_3_pv BIGINT COMMENT 'C链d3-hop3: 回流点击次数'
  173. ,c3_3_exp BIGINT COMMENT 'C链d3-hop3: 回流session曝光数'
  174. ,c3_3_ror DOUBLE COMMENT 'c3_3_uv/c2_3_uv'
  175. ,c3_3_rov DOUBLE COMMENT 'c3_3_uv/c2_3_exp'
  176. -- ==================== D链 (session内后续曝光传播) ====================
  177. ,d0 BIGINT COMMENT 'D链初始成本: session内后续曝光数'
  178. ,dn_1_uv BIGINT COMMENT 'D链hop1: 回流去重人数'
  179. ,dn_1_pv BIGINT COMMENT 'D链hop1: 回流点击次数'
  180. ,dn_1_exp BIGINT COMMENT 'D链hop1: 回流session曝光数'
  181. ,dn_1_ror DOUBLE COMMENT 'dn_1_uv/exposure_uv'
  182. ,dn_1_rov DOUBLE COMMENT 'dn_1_uv/d0'
  183. ,dn_2_uv BIGINT COMMENT 'D链hop2: 回流去重人数'
  184. ,dn_2_pv BIGINT COMMENT 'D链hop2: 回流点击次数'
  185. ,dn_2_exp BIGINT COMMENT 'D链hop2: 回流session曝光数'
  186. ,dn_2_ror DOUBLE COMMENT 'dn_2_uv/dn_1_uv'
  187. ,dn_2_rov DOUBLE COMMENT 'dn_2_uv/dn_1_exp'
  188. ,dn_3_uv BIGINT COMMENT 'D链hop3: 回流去重人数'
  189. ,dn_3_pv BIGINT COMMENT 'D链hop3: 回流点击次数'
  190. ,dn_3_exp BIGINT COMMENT 'D链hop3: 回流session曝光数'
  191. ,dn_3_ror DOUBLE COMMENT 'dn_3_uv/dn_2_uv'
  192. ,dn_3_rov DOUBLE COMMENT 'dn_3_uv/dn_2_exp'
  193. ,dn_total_uv BIGINT COMMENT 'D链合计UV = dn_1_uv + dn_2_uv + dn_3_uv'
  194. ,dn_total_pv BIGINT COMMENT 'D链合计PV = dn_1_pv + dn_2_pv + dn_3_pv'
  195. ,dn_total_exp BIGINT COMMENT 'D链合计EXP = dn_1_exp + dn_2_exp + dn_3_exp'
  196. ,dn_total_ror DOUBLE COMMENT 'dn_total_uv/exposure_uv'
  197. ,dn_total_rov DOUBLE COMMENT 'dn_total_uv/d0'
  198. -- D链 depth拆分 hop1
  199. ,d1_1_uv BIGINT COMMENT 'D链d1-hop1: 回流去重人数'
  200. ,d1_1_pv BIGINT COMMENT 'D链d1-hop1: 回流点击次数'
  201. ,d1_1_exp BIGINT COMMENT 'D链d1-hop1: 回流session曝光数'
  202. ,d1_1_ror DOUBLE COMMENT 'd1_1_uv/exposure_uv'
  203. ,d1_1_rov DOUBLE COMMENT 'd1_1_uv/d0'
  204. ,d2_1_uv BIGINT COMMENT 'D链d2-hop1: 回流去重人数'
  205. ,d2_1_pv BIGINT COMMENT 'D链d2-hop1: 回流点击次数'
  206. ,d2_1_exp BIGINT COMMENT 'D链d2-hop1: 回流session曝光数'
  207. ,d2_1_ror DOUBLE COMMENT 'd2_1_uv/d1_1_uv'
  208. ,d2_1_rov DOUBLE COMMENT 'd2_1_uv/d1_1_exp'
  209. ,d3_1_uv BIGINT COMMENT 'D链d3-hop1: 回流去重人数'
  210. ,d3_1_pv BIGINT COMMENT 'D链d3-hop1: 回流点击次数'
  211. ,d3_1_exp BIGINT COMMENT 'D链d3-hop1: 回流session曝光数'
  212. ,d3_1_ror DOUBLE COMMENT 'd3_1_uv/d2_1_uv'
  213. ,d3_1_rov DOUBLE COMMENT 'd3_1_uv/d2_1_exp'
  214. -- D链 depth拆分 hop2
  215. ,d1_2_uv BIGINT COMMENT 'D链d1-hop2: 回流去重人数'
  216. ,d1_2_pv BIGINT COMMENT 'D链d1-hop2: 回流点击次数'
  217. ,d1_2_exp BIGINT COMMENT 'D链d1-hop2: 回流session曝光数'
  218. ,d1_2_ror DOUBLE COMMENT 'd1_2_uv/dn_1_uv'
  219. ,d1_2_rov DOUBLE COMMENT 'd1_2_uv/dn_1_exp'
  220. ,d2_2_uv BIGINT COMMENT 'D链d2-hop2: 回流去重人数'
  221. ,d2_2_pv BIGINT COMMENT 'D链d2-hop2: 回流点击次数'
  222. ,d2_2_exp BIGINT COMMENT 'D链d2-hop2: 回流session曝光数'
  223. ,d2_2_ror DOUBLE COMMENT 'd2_2_uv/d1_2_uv'
  224. ,d2_2_rov DOUBLE COMMENT 'd2_2_uv/d1_2_exp'
  225. ,d3_2_uv BIGINT COMMENT 'D链d3-hop2: 回流去重人数'
  226. ,d3_2_pv BIGINT COMMENT 'D链d3-hop2: 回流点击次数'
  227. ,d3_2_exp BIGINT COMMENT 'D链d3-hop2: 回流session曝光数'
  228. ,d3_2_ror DOUBLE COMMENT 'd3_2_uv/d2_2_uv'
  229. ,d3_2_rov DOUBLE COMMENT 'd3_2_uv/d2_2_exp'
  230. -- D链 depth拆分 hop3
  231. ,d1_3_uv BIGINT COMMENT 'D链d1-hop3: 回流去重人数'
  232. ,d1_3_pv BIGINT COMMENT 'D链d1-hop3: 回流点击次数'
  233. ,d1_3_exp BIGINT COMMENT 'D链d1-hop3: 回流session曝光数'
  234. ,d1_3_ror DOUBLE COMMENT 'd1_3_uv/dn_2_uv'
  235. ,d1_3_rov DOUBLE COMMENT 'd1_3_uv/dn_2_exp'
  236. ,d2_3_uv BIGINT COMMENT 'D链d2-hop3: 回流去重人数'
  237. ,d2_3_pv BIGINT COMMENT 'D链d2-hop3: 回流点击次数'
  238. ,d2_3_exp BIGINT COMMENT 'D链d2-hop3: 回流session曝光数'
  239. ,d2_3_ror DOUBLE COMMENT 'd2_3_uv/d1_3_uv'
  240. ,d2_3_rov DOUBLE COMMENT 'd2_3_uv/d1_3_exp'
  241. ,d3_3_uv BIGINT COMMENT 'D链d3-hop3: 回流去重人数'
  242. ,d3_3_pv BIGINT COMMENT 'D链d3-hop3: 回流点击次数'
  243. ,d3_3_exp BIGINT COMMENT 'D链d3-hop3: 回流session曝光数'
  244. ,d3_3_ror DOUBLE COMMENT 'd3_3_uv/d2_3_uv'
  245. ,d3_3_rov DOUBLE COMMENT 'd3_3_uv/d2_3_exp'
  246. -- ==================== 全链路 ====================
  247. ,all_uv BIGINT COMMENT '全链路拉回UV = bn_uv + cn_total_uv + dn_total_uv'
  248. ,all_pv BIGINT COMMENT '全链路拉回PV = bn_pv + cn_total_pv + dn_total_pv'
  249. ,all_exp BIGINT COMMENT '全链路拉回EXP = bn_exp + cn_total_exp + dn_total_exp'
  250. ,all_ror DOUBLE COMMENT 'all_uv/exposure_uv'
  251. ,all_rov DOUBLE COMMENT 'all_uv/exposure_cnt'
  252. )
  253. COMMENT '曝光回流链路CUBE聚合-宽表版 (5维度: 用户分层/小时段/进入品类TOP1/推荐品类TOP10/内容idTOP1)'
  254. ;
  255. -- =====================================================================
  256. -- INSERT: 从 wide 宽表聚合, CUBE 全维度组合
  257. -- =====================================================================
  258. -- SELECT * FROM loghubods.dwd_recsys_alg_exposure_agg_wide_20260209 WHERE dt = '${dt}' ORDER BY exposure_cnt DESC LIMIT 100;
  259. INSERT OVERWRITE TABLE loghubods.dwd_recsys_alg_exposure_agg_wide_20260209
  260. WITH
  261. -- 用户拉活量分层
  262. t_user_type AS (
  263. SELECT DISTINCT type, openid
  264. FROM loghubods.mid_share_return_people_1year
  265. WHERE dt = TO_CHAR(DATEADD(TO_DATE('${dt}','YYYYMMDD'),-1,'dd'),'YYYYMMDD')
  266. AND type IS NOT NULL
  267. AND type != 'S_ALL'
  268. AND type NOT REGEXP 'R50'
  269. )
  270. -- 模型预估分数
  271. ,t_score AS (
  272. SELECT apptype
  273. ,videoid
  274. ,recommendtraceid
  275. ,scoresmap
  276. ,sortscore
  277. FROM (
  278. SELECT apptype
  279. ,videoid
  280. ,recommendtraceid
  281. ,scoresmap
  282. ,sortscore
  283. ,ROW_NUMBER() OVER (PARTITION BY apptype,videoid,recommendtraceid) AS rn
  284. FROM loghubods.statistics_log_hour
  285. WHERE dt LIKE '${dt}%'
  286. AND scoresmap IS NOT NULL
  287. )
  288. WHERE rn = 1
  289. )
  290. -- TOP1 进入内容品类(场): 按回流人数取 TOP1
  291. ,t_top_head_cate AS (
  292. SELECT vt.merge_leve2
  293. FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
  294. JOIN loghubods.video_merge_tag vt ON base.headvideoid = vt.videoid
  295. WHERE base.dt = '${dt}'
  296. AND vt.merge_leve2 IS NOT NULL
  297. GROUP BY vt.merge_leve2
  298. ORDER BY SUM(CAST(is_return_noself AS BIGINT)) DESC
  299. LIMIT 1
  300. )
  301. -- TOP10 推荐内容品类(货): 按曝光次数取 TOP10
  302. ,t_top_vid_cate AS (
  303. SELECT vt.merge_leve2
  304. FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
  305. JOIN loghubods.video_merge_tag vt ON base.vid = vt.videoid
  306. WHERE base.dt = '${dt}'
  307. AND vt.merge_leve2 IS NOT NULL
  308. GROUP BY vt.merge_leve2
  309. ORDER BY COUNT(1) DESC
  310. LIMIT 10
  311. )
  312. -- TOP1 内容id(货): 每品类按曝光次数取 TOP1 (曝光>10w)
  313. ,t_top_vid AS (
  314. SELECT merge_leve2, vid
  315. FROM (
  316. SELECT vt.merge_leve2
  317. ,base.vid
  318. ,COUNT(1) AS exp_cnt
  319. ,ROW_NUMBER() OVER (PARTITION BY vt.merge_leve2 ORDER BY COUNT(1) DESC) AS rk
  320. FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
  321. JOIN loghubods.video_merge_tag vt ON base.vid = vt.videoid
  322. WHERE base.dt = '${dt}'
  323. AND vt.merge_leve2 IS NOT NULL
  324. GROUP BY vt.merge_leve2, base.vid
  325. HAVING exp_cnt > 100000
  326. )
  327. WHERE rk <= 1
  328. )
  329. -- 宽表
  330. ,t_wide AS (
  331. SELECT base.*
  332. ,CASE WHEN e.type IS NULL OR e.type = 'R_0' THEN 'R0&新用户'
  333. WHEN e.type IN ('R_1','R_2_10','R_10_50') THEN 'R1-50'
  334. WHEN e.type IN ('R_50_100','R_100_180','R_180_330') THEN 'R_180_330'
  335. ELSE e.type
  336. END AS user_type
  337. ,CASE WHEN th.merge_leve2 IS NOT NULL THEN vt_head.merge_leve2 ELSE '其他' END AS head_merge_leve2
  338. ,CASE WHEN tv.merge_leve2 IS NOT NULL THEN vt_vid.merge_leve2 ELSE '其他' END AS vid_merge_leve2
  339. ,CASE WHEN ti.vid IS NOT NULL THEN base.vid ELSE '其他' END AS vid_id
  340. ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
  341. ,1.22*POW(CAST(GET_JSON_OBJECT(e1.scoresmap,'$.NorXGBScore') AS DOUBLE),1.15) AS rosn_pred
  342. ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_pred_origin
  343. ,e1.sortscore
  344. ,CASE
  345. WHEN CAST(hh AS INT) BETWEEN 0 AND 3 THEN '00-03'
  346. WHEN CAST(hh AS INT) BETWEEN 4 AND 7 THEN '04-07'
  347. WHEN CAST(hh AS INT) BETWEEN 8 AND 11 THEN '08-11'
  348. WHEN CAST(hh AS INT) BETWEEN 12 AND 15 THEN '12-15'
  349. WHEN CAST(hh AS INT) BETWEEN 16 AND 19 THEN '16-19'
  350. WHEN CAST(hh AS INT) BETWEEN 20 AND 23 THEN '20-23'
  351. ELSE '-'
  352. END AS hh_bucket
  353. FROM loghubods.dwd_recsys_alg_exposure_base_20260209 base
  354. LEFT JOIN t_user_type e
  355. ON SUBSTRING_INDEX(base.mid,'weixin_openid_',-1) = e.openid
  356. LEFT JOIN loghubods.video_merge_tag vt_head
  357. ON base.headvideoid = vt_head.videoid
  358. LEFT JOIN loghubods.video_merge_tag vt_vid
  359. ON base.vid = vt_vid.videoid
  360. LEFT JOIN t_score e1
  361. ON base.apptype = e1.apptype
  362. AND base.vid = e1.videoid
  363. AND base.recomtraceid = e1.recommendtraceid
  364. LEFT JOIN t_top_head_cate th
  365. ON vt_head.merge_leve2 = th.merge_leve2
  366. LEFT JOIN t_top_vid_cate tv
  367. ON vt_vid.merge_leve2 = tv.merge_leve2
  368. LEFT JOIN t_top_vid ti
  369. ON base.vid = ti.vid
  370. WHERE base.dt = '${dt}'
  371. )
  372. SELECT
  373. -- ==================== 维度列 ====================
  374. '${dt}' AS dt
  375. ,CASE WHEN GROUPING(user_type) = 1 THEN 'SUM' ELSE NVL(user_type, 'SUM') END AS user_type
  376. ,CASE WHEN GROUPING(hh_bucket) = 1 THEN 'SUM' ELSE NVL(hh_bucket, 'SUM') END AS hh_bucket
  377. ,CASE WHEN GROUPING(head_merge_leve2) = 1 THEN 'SUM' ELSE NVL(head_merge_leve2, 'SUM') END AS head_merge_leve2
  378. ,CASE WHEN GROUPING(vid_merge_leve2) = 1 THEN 'SUM' ELSE NVL(vid_merge_leve2, 'SUM') END AS vid_merge_leve2
  379. ,CASE WHEN GROUPING(vid_id) = 1 THEN 'SUM' ELSE NVL(vid_id, 'SUM') END AS vid_id
  380. -- ==================== 基础流量 ====================
  381. ,COUNT(1) AS exposure_cnt
  382. ,COUNT(DISTINCT mid) AS exposure_uv
  383. ,COUNT(DISTINCT vid) AS vid_cnt
  384. ,ROUND(COUNT(1) / COUNT(DISTINCT mid), 4) AS exposure_per_user
  385. -- ==================== 分享 & 回流漏斗 ====================
  386. ,SUM(CAST(is_share AS BIGINT)) AS share_exposure_cnt
  387. ,SUM(CAST(share_cnt AS BIGINT)) AS share_cnt
  388. ,SUM(CAST(is_return_n AS BIGINT)) AS return_exposure_cnt
  389. ,SUM(CAST(is_return_noself AS BIGINT)) AS return_exposure_cnt_noself
  390. ,SUM(CAST(return_n_uv AS BIGINT)) AS return_uv
  391. ,SUM(CAST(return_n_uv_noself AS BIGINT)) AS return_uv_noself
  392. ,ROUND(COALESCE(SUM(CAST(is_share AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6) AS share_rate
  393. ,ROUND(COALESCE(SUM(CAST(is_return_n AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6) AS return_rate
  394. ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6) AS return_rate_noself
  395. ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(CAST(SUM(CAST(is_share AS BIGINT)) AS DOUBLE), 0), 0), 6) AS share_return_rate
  396. -- ==================== 模型预估: STR (曝光→非自身回流概率) ====================
  397. ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6) AS str_real
  398. ,ROUND(COALESCE(SUM(str_pred) / NULLIF(COUNT(1), 0), 0), 6) AS str_pred
  399. ,ROUND(
  400. (SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
  401. / NULLIF(SUM(str_pred) / NULLIF(COUNT(1), 0), 0)
  402. , 4) AS str_copc
  403. ,ROUND(AVG(ABS(str_pred - CAST(is_return_noself AS BIGINT))), 6) AS str_mae
  404. ,ROUND(VARIANCE(str_pred - CAST(is_return_noself AS BIGINT)), 6) AS str_var
  405. -- ==================== 模型预估: ROSN (条件回流UV, 非自身) ====================
  406. ,ROUND(COALESCE(
  407. SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0)
  408. , 0), 6) AS rosn_real
  409. ,ROUND(COALESCE(SUM(CASE WHEN CAST(is_return_noself AS BIGINT) = 1 THEN rosn_pred END) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0), 6) AS rosn_pred
  410. ,ROUND(
  411. (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0))
  412. / NULLIF(SUM(CASE WHEN CAST(is_return_noself AS BIGINT) = 1 THEN rosn_pred END) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0)
  413. , 4) AS rosn_copc
  414. ,ROUND(AVG(rosn_pred_origin), 6) AS rosn_pred_origin
  415. ,ROUND(AVG(
  416. CASE WHEN CAST(is_return_noself AS BIGINT) = 1
  417. THEN ABS(rosn_pred - CAST(return_n_uv_noself AS BIGINT))
  418. END
  419. ), 6) AS rosn_mae
  420. ,ROUND(VARIANCE(
  421. CASE WHEN CAST(is_return_noself AS BIGINT) = 1
  422. THEN rosn_pred - CAST(return_n_uv_noself AS BIGINT)
  423. END
  424. ), 6) AS rosn_var
  425. -- ==================== 模型预估: ROVN (STR×ROSN) ====================
  426. ,ROUND(COALESCE(SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6) AS rovn_real
  427. ,ROUND(AVG(str_pred * rosn_pred), 6) AS rovn_pred
  428. ,ROUND(
  429. (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
  430. / NULLIF(AVG(str_pred * rosn_pred), 0)
  431. , 4) AS rovn_copc
  432. ,ROUND(AVG(ABS(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT))), 6) AS rovn_mae
  433. ,ROUND(VARIANCE(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT)), 6) AS rovn_var
  434. ,ROUND(AVG(CAST(sortscore AS DOUBLE)), 6) AS sortscore_avg
  435. -- ==================== B链 ====================
  436. ,SUM(CAST(bn_uv AS BIGINT)) AS bn_uv
  437. ,SUM(CAST(bn_pv AS BIGINT)) AS bn_pv
  438. ,SUM(CAST(bn_exp AS BIGINT)) AS bn_exp
  439. ,ROUND(COALESCE(SUM(CAST(bn_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6) AS bn_ror
  440. ,ROUND(COALESCE(SUM(CAST(bn_uv AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6) AS bn_rov
  441. ,SUM(CAST(b1_uv AS BIGINT)) AS b1_uv
  442. ,SUM(CAST(b1_pv AS BIGINT)) AS b1_pv
  443. ,SUM(CAST(b1_exp AS BIGINT)) AS b1_exp
  444. ,ROUND(COALESCE(SUM(CAST(b1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6) AS b1_ror
  445. ,ROUND(COALESCE(SUM(CAST(b1_uv AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6) AS b1_rov
  446. ,SUM(CAST(b2_uv AS BIGINT)) AS b2_uv
  447. ,SUM(CAST(b2_pv AS BIGINT)) AS b2_pv
  448. ,SUM(CAST(b2_exp AS BIGINT)) AS b2_exp
  449. ,ROUND(COALESCE(SUM(CAST(b2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS b2_ror
  450. ,ROUND(COALESCE(SUM(CAST(b2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS b2_rov
  451. ,SUM(CAST(b3_uv AS BIGINT)) AS b3_uv
  452. ,SUM(CAST(b3_pv AS BIGINT)) AS b3_pv
  453. ,SUM(CAST(b3_exp AS BIGINT)) AS b3_exp
  454. ,ROUND(COALESCE(SUM(CAST(b3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS b3_ror
  455. ,ROUND(COALESCE(SUM(CAST(b3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS b3_rov
  456. -- ==================== C链 (全量depth, 按hop) ====================
  457. ,SUM(CAST(cn_1_uv AS BIGINT)) AS cn_1_uv
  458. ,SUM(CAST(cn_1_pv AS BIGINT)) AS cn_1_pv
  459. ,SUM(CAST(cn_1_exp AS BIGINT)) AS cn_1_exp
  460. ,ROUND(COALESCE(SUM(CAST(cn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS cn_1_ror
  461. ,ROUND(COALESCE(SUM(CAST(cn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS cn_1_rov
  462. ,SUM(CAST(cn_2_uv AS BIGINT)) AS cn_2_uv
  463. ,SUM(CAST(cn_2_pv AS BIGINT)) AS cn_2_pv
  464. ,SUM(CAST(cn_2_exp AS BIGINT)) AS cn_2_exp
  465. ,ROUND(COALESCE(SUM(CAST(cn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS cn_2_ror
  466. ,ROUND(COALESCE(SUM(CAST(cn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS cn_2_rov
  467. ,SUM(CAST(cn_3_uv AS BIGINT)) AS cn_3_uv
  468. ,SUM(CAST(cn_3_pv AS BIGINT)) AS cn_3_pv
  469. ,SUM(CAST(cn_3_exp AS BIGINT)) AS cn_3_exp
  470. ,ROUND(COALESCE(SUM(CAST(cn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS cn_3_ror
  471. ,ROUND(COALESCE(SUM(CAST(cn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS cn_3_rov
  472. ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)) AS cn_total_uv
  473. ,SUM(CAST(cn_1_pv AS BIGINT)) + SUM(CAST(cn_2_pv AS BIGINT)) + SUM(CAST(cn_3_pv AS BIGINT)) AS cn_total_pv
  474. ,SUM(CAST(cn_1_exp AS BIGINT)) + SUM(CAST(cn_2_exp AS BIGINT)) + SUM(CAST(cn_3_exp AS BIGINT)) AS cn_total_exp
  475. ,ROUND(COALESCE(
  476. (SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)))
  477. / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0)
  478. , 0), 6) AS cn_total_ror
  479. ,ROUND(COALESCE(
  480. (SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)))
  481. / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0)
  482. , 0), 6) AS cn_total_rov
  483. -- ==================== C链 (按depth拆分) ====================
  484. -- hop1
  485. ,SUM(CAST(c1_1_uv AS BIGINT)) AS c1_1_uv
  486. ,SUM(CAST(c1_1_pv AS BIGINT)) AS c1_1_pv
  487. ,SUM(CAST(c1_1_exp AS BIGINT)) AS c1_1_exp
  488. ,ROUND(COALESCE(SUM(CAST(c1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c1_1_ror
  489. ,ROUND(COALESCE(SUM(CAST(c1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c1_1_rov
  490. ,SUM(CAST(c2_1_uv AS BIGINT)) AS c2_1_uv
  491. ,SUM(CAST(c2_1_pv AS BIGINT)) AS c2_1_pv
  492. ,SUM(CAST(c2_1_exp AS BIGINT)) AS c2_1_exp
  493. ,ROUND(COALESCE(SUM(CAST(c2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c2_1_ror
  494. ,ROUND(COALESCE(SUM(CAST(c2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c2_1_rov
  495. ,SUM(CAST(c3_1_uv AS BIGINT)) AS c3_1_uv
  496. ,SUM(CAST(c3_1_pv AS BIGINT)) AS c3_1_pv
  497. ,SUM(CAST(c3_1_exp AS BIGINT)) AS c3_1_exp
  498. ,ROUND(COALESCE(SUM(CAST(c3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c3_1_ror
  499. ,ROUND(COALESCE(SUM(CAST(c3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c3_1_rov
  500. -- hop2
  501. ,SUM(CAST(c1_2_uv AS BIGINT)) AS c1_2_uv
  502. ,SUM(CAST(c1_2_pv AS BIGINT)) AS c1_2_pv
  503. ,SUM(CAST(c1_2_exp AS BIGINT)) AS c1_2_exp
  504. ,ROUND(COALESCE(SUM(CAST(c1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c1_2_ror
  505. ,ROUND(COALESCE(SUM(CAST(c1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c1_2_rov
  506. ,SUM(CAST(c2_2_uv AS BIGINT)) AS c2_2_uv
  507. ,SUM(CAST(c2_2_pv AS BIGINT)) AS c2_2_pv
  508. ,SUM(CAST(c2_2_exp AS BIGINT)) AS c2_2_exp
  509. ,ROUND(COALESCE(SUM(CAST(c2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c2_2_ror
  510. ,ROUND(COALESCE(SUM(CAST(c2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c2_2_rov
  511. ,SUM(CAST(c3_2_uv AS BIGINT)) AS c3_2_uv
  512. ,SUM(CAST(c3_2_pv AS BIGINT)) AS c3_2_pv
  513. ,SUM(CAST(c3_2_exp AS BIGINT)) AS c3_2_exp
  514. ,ROUND(COALESCE(SUM(CAST(c3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c3_2_ror
  515. ,ROUND(COALESCE(SUM(CAST(c3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c3_2_rov
  516. -- hop3
  517. ,SUM(CAST(c1_3_uv AS BIGINT)) AS c1_3_uv
  518. ,SUM(CAST(c1_3_pv AS BIGINT)) AS c1_3_pv
  519. ,SUM(CAST(c1_3_exp AS BIGINT)) AS c1_3_exp
  520. ,ROUND(COALESCE(SUM(CAST(c1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c1_3_ror
  521. ,ROUND(COALESCE(SUM(CAST(c1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c1_3_rov
  522. ,SUM(CAST(c2_3_uv AS BIGINT)) AS c2_3_uv
  523. ,SUM(CAST(c2_3_pv AS BIGINT)) AS c2_3_pv
  524. ,SUM(CAST(c2_3_exp AS BIGINT)) AS c2_3_exp
  525. ,ROUND(COALESCE(SUM(CAST(c2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c2_3_ror
  526. ,ROUND(COALESCE(SUM(CAST(c2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c2_3_rov
  527. ,SUM(CAST(c3_3_uv AS BIGINT)) AS c3_3_uv
  528. ,SUM(CAST(c3_3_pv AS BIGINT)) AS c3_3_pv
  529. ,SUM(CAST(c3_3_exp AS BIGINT)) AS c3_3_exp
  530. ,ROUND(COALESCE(SUM(CAST(c3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c3_3_ror
  531. ,ROUND(COALESCE(SUM(CAST(c3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS c3_3_rov
  532. -- ==================== D链 (全量depth, 按hop) ====================
  533. ,SUM(CAST(d0 AS BIGINT)) AS d0
  534. ,SUM(CAST(dn_1_uv AS BIGINT)) AS dn_1_uv
  535. ,SUM(CAST(dn_1_pv AS BIGINT)) AS dn_1_pv
  536. ,SUM(CAST(dn_1_exp AS BIGINT)) AS dn_1_exp
  537. ,ROUND(COALESCE(SUM(CAST(dn_1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6) AS dn_1_ror
  538. ,ROUND(COALESCE(SUM(CAST(dn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0), 0), 6) AS dn_1_rov
  539. ,SUM(CAST(dn_2_uv AS BIGINT)) AS dn_2_uv
  540. ,SUM(CAST(dn_2_pv AS BIGINT)) AS dn_2_pv
  541. ,SUM(CAST(dn_2_exp AS BIGINT)) AS dn_2_exp
  542. ,ROUND(COALESCE(SUM(CAST(dn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS dn_2_ror
  543. ,ROUND(COALESCE(SUM(CAST(dn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS dn_2_rov
  544. ,SUM(CAST(dn_3_uv AS BIGINT)) AS dn_3_uv
  545. ,SUM(CAST(dn_3_pv AS BIGINT)) AS dn_3_pv
  546. ,SUM(CAST(dn_3_exp AS BIGINT)) AS dn_3_exp
  547. ,ROUND(COALESCE(SUM(CAST(dn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS dn_3_ror
  548. ,ROUND(COALESCE(SUM(CAST(dn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS dn_3_rov
  549. ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)) AS dn_total_uv
  550. ,SUM(CAST(dn_1_pv AS BIGINT)) + SUM(CAST(dn_2_pv AS BIGINT)) + SUM(CAST(dn_3_pv AS BIGINT)) AS dn_total_pv
  551. ,SUM(CAST(dn_1_exp AS BIGINT)) + SUM(CAST(dn_2_exp AS BIGINT)) + SUM(CAST(dn_3_exp AS BIGINT)) AS dn_total_exp
  552. ,ROUND(COALESCE(
  553. (SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)))
  554. / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0)
  555. , 0), 6) AS dn_total_ror
  556. ,ROUND(COALESCE(
  557. (SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)))
  558. / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0)
  559. , 0), 6) AS dn_total_rov
  560. -- ==================== D链 (按depth拆分) ====================
  561. -- hop1
  562. ,SUM(CAST(d1_1_uv AS BIGINT)) AS d1_1_uv
  563. ,SUM(CAST(d1_1_pv AS BIGINT)) AS d1_1_pv
  564. ,SUM(CAST(d1_1_exp AS BIGINT)) AS d1_1_exp
  565. ,ROUND(COALESCE(SUM(CAST(d1_1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6) AS d1_1_ror
  566. ,ROUND(COALESCE(SUM(CAST(d1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d1_1_rov
  567. ,SUM(CAST(d2_1_uv AS BIGINT)) AS d2_1_uv
  568. ,SUM(CAST(d2_1_pv AS BIGINT)) AS d2_1_pv
  569. ,SUM(CAST(d2_1_exp AS BIGINT)) AS d2_1_exp
  570. ,ROUND(COALESCE(SUM(CAST(d2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d2_1_ror
  571. ,ROUND(COALESCE(SUM(CAST(d2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d2_1_rov
  572. ,SUM(CAST(d3_1_uv AS BIGINT)) AS d3_1_uv
  573. ,SUM(CAST(d3_1_pv AS BIGINT)) AS d3_1_pv
  574. ,SUM(CAST(d3_1_exp AS BIGINT)) AS d3_1_exp
  575. ,ROUND(COALESCE(SUM(CAST(d3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d3_1_ror
  576. ,ROUND(COALESCE(SUM(CAST(d3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d3_1_rov
  577. -- hop2
  578. ,SUM(CAST(d1_2_uv AS BIGINT)) AS d1_2_uv
  579. ,SUM(CAST(d1_2_pv AS BIGINT)) AS d1_2_pv
  580. ,SUM(CAST(d1_2_exp AS BIGINT)) AS d1_2_exp
  581. ,ROUND(COALESCE(SUM(CAST(d1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d1_2_ror
  582. ,ROUND(COALESCE(SUM(CAST(d1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d1_2_rov
  583. ,SUM(CAST(d2_2_uv AS BIGINT)) AS d2_2_uv
  584. ,SUM(CAST(d2_2_pv AS BIGINT)) AS d2_2_pv
  585. ,SUM(CAST(d2_2_exp AS BIGINT)) AS d2_2_exp
  586. ,ROUND(COALESCE(SUM(CAST(d2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d2_2_ror
  587. ,ROUND(COALESCE(SUM(CAST(d2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d2_2_rov
  588. ,SUM(CAST(d3_2_uv AS BIGINT)) AS d3_2_uv
  589. ,SUM(CAST(d3_2_pv AS BIGINT)) AS d3_2_pv
  590. ,SUM(CAST(d3_2_exp AS BIGINT)) AS d3_2_exp
  591. ,ROUND(COALESCE(SUM(CAST(d3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d3_2_ror
  592. ,ROUND(COALESCE(SUM(CAST(d3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d3_2_rov
  593. -- hop3
  594. ,SUM(CAST(d1_3_uv AS BIGINT)) AS d1_3_uv
  595. ,SUM(CAST(d1_3_pv AS BIGINT)) AS d1_3_pv
  596. ,SUM(CAST(d1_3_exp AS BIGINT)) AS d1_3_exp
  597. ,ROUND(COALESCE(SUM(CAST(d1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d1_3_ror
  598. ,ROUND(COALESCE(SUM(CAST(d1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d1_3_rov
  599. ,SUM(CAST(d2_3_uv AS BIGINT)) AS d2_3_uv
  600. ,SUM(CAST(d2_3_pv AS BIGINT)) AS d2_3_pv
  601. ,SUM(CAST(d2_3_exp AS BIGINT)) AS d2_3_exp
  602. ,ROUND(COALESCE(SUM(CAST(d2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d2_3_ror
  603. ,ROUND(COALESCE(SUM(CAST(d2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d2_3_rov
  604. ,SUM(CAST(d3_3_uv AS BIGINT)) AS d3_3_uv
  605. ,SUM(CAST(d3_3_pv AS BIGINT)) AS d3_3_pv
  606. ,SUM(CAST(d3_3_exp AS BIGINT)) AS d3_3_exp
  607. ,ROUND(COALESCE(SUM(CAST(d3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d3_3_ror
  608. ,ROUND(COALESCE(SUM(CAST(d3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6) AS d3_3_rov
  609. -- ==================== 全链路 ====================
  610. ,SUM(CAST(bn_uv AS BIGINT))
  611. + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
  612. + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
  613. AS all_uv
  614. ,SUM(CAST(bn_pv AS BIGINT))
  615. + SUM(CAST(cn_1_pv AS BIGINT)) + SUM(CAST(cn_2_pv AS BIGINT)) + SUM(CAST(cn_3_pv AS BIGINT))
  616. + SUM(CAST(dn_1_pv AS BIGINT)) + SUM(CAST(dn_2_pv AS BIGINT)) + SUM(CAST(dn_3_pv AS BIGINT))
  617. AS all_pv
  618. ,SUM(CAST(bn_exp AS BIGINT))
  619. + SUM(CAST(cn_1_exp AS BIGINT)) + SUM(CAST(cn_2_exp AS BIGINT)) + SUM(CAST(cn_3_exp AS BIGINT))
  620. + SUM(CAST(dn_1_exp AS BIGINT)) + SUM(CAST(dn_2_exp AS BIGINT)) + SUM(CAST(dn_3_exp AS BIGINT))
  621. AS all_exp
  622. ,ROUND(COALESCE(
  623. ( SUM(CAST(bn_uv AS BIGINT))
  624. + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
  625. + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
  626. ) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0)
  627. , 0), 6) AS all_ror
  628. ,ROUND(COALESCE(
  629. ( SUM(CAST(bn_uv AS BIGINT))
  630. + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
  631. + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
  632. ) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0)
  633. , 0), 6) AS all_rov
  634. -- =====================================================================
  635. -- FROM + GROUP BY CUBE
  636. -- =====================================================================
  637. FROM t_wide
  638. GROUP BY CUBE(
  639. user_type
  640. ,hh_bucket
  641. ,head_merge_leve2
  642. ,vid_merge_leve2
  643. ,vid_id
  644. )
  645. ORDER BY exposure_cnt DESC
  646. ;