loghubods.mid_global_feature_20250212.sql 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. -- Task: 01_全局用户特征_20250212 ID: 1023819179 Type: ODPS_SQL
  2. CREATE TABLE IF NOT EXISTS loghubods.mid_global_feature_20250212
  3. (
  4. mid STRING COMMENT 'mid'
  5. ,feature STRING COMMENT '特征JSON'
  6. )
  7. COMMENT '全部曝光场景下的mid特征'
  8. PARTITIONED BY
  9. (
  10. dt STRING COMMENT '天'
  11. ,hh STRING COMMENT '小时'
  12. )
  13. LIFECYCLE 30
  14. ;
  15. INSERT OVERWRITE TABLE loghubods.mid_global_feature_20250212 PARTITION (dt = '${dt}',hh = '${hh}')
  16. WITH t_exp AS -- 曝光特征
  17. (
  18. SELECT vid
  19. ,mid
  20. ,apptype
  21. ,page
  22. ,recommendpagetype
  23. ,province
  24. ,hotsencetype
  25. ,machineinfo_brand AS brand
  26. ,is_share
  27. ,return_n_uv
  28. ,return_1_uv
  29. ,share_cnt
  30. ,is_return_1
  31. ,new_exposure_cnt
  32. ,GET_JSON_OBJECT(extend, "$.animationSceneType") AS animationSceneType
  33. ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) AS ts_now
  34. ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - CAST(ts AS BIGINT) AS ts_diff
  35. FROM loghubods.dwd_recsys_alg_exposure_base_20250108
  36. WHERE CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 24 * 7),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
  37. AND apptype NOT IN ("12")
  38. )
  39. ,t_agg AS
  40. (
  41. SELECT mid
  42. ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN 1 ELSE 0 END) AS exp_1h
  43. ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN 1 ELSE 0 END) AS exp_3h
  44. ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN 1 ELSE 0 END) AS exp_6h
  45. ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN 1 ELSE 0 END) AS exp_12h
  46. ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN 1 ELSE 0 END) AS exp_24h
  47. ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN 1 ELSE 0 END) AS exp_72h
  48. ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN 1 ELSE 0 END) AS exp_168h
  49. ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN is_share ELSE 0 END) AS is_share_1h
  50. ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN is_share ELSE 0 END) AS is_share_3h
  51. ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN is_share ELSE 0 END) AS is_share_6h
  52. ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN is_share ELSE 0 END) AS is_share_12h
  53. ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN is_share ELSE 0 END) AS is_share_24h
  54. ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN is_share ELSE 0 END) AS is_share_72h
  55. ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN is_share ELSE 0 END) AS is_share_168h
  56. ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN share_cnt ELSE 0 END) AS share_cnt_1h
  57. ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN share_cnt ELSE 0 END) AS share_cnt_3h
  58. ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN share_cnt ELSE 0 END) AS share_cnt_6h
  59. ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN share_cnt ELSE 0 END) AS share_cnt_12h
  60. ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN share_cnt ELSE 0 END) AS share_cnt_24h
  61. ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN share_cnt ELSE 0 END) AS share_cnt_72h
  62. ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN share_cnt ELSE 0 END) AS share_cnt_168h
  63. ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN is_return_1 ELSE 0 END) AS is_return_1_1h
  64. ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN is_return_1 ELSE 0 END) AS is_return_1_3h
  65. ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN is_return_1 ELSE 0 END) AS is_return_1_6h
  66. ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN is_return_1 ELSE 0 END) AS is_return_1_12h
  67. ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN is_return_1 ELSE 0 END) AS is_return_1_24h
  68. ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN is_return_1 ELSE 0 END) AS is_return_1_72h
  69. ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN is_return_1 ELSE 0 END) AS is_return_1_168h
  70. ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN return_1_uv ELSE 0 END) AS return_1_uv_1h
  71. ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN return_1_uv ELSE 0 END) AS return_1_uv_3h
  72. ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN return_1_uv ELSE 0 END) AS return_1_uv_6h
  73. ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN return_1_uv ELSE 0 END) AS return_1_uv_12h
  74. ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN return_1_uv ELSE 0 END) AS return_1_uv_24h
  75. ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN return_1_uv ELSE 0 END) AS return_1_uv_72h
  76. ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN return_1_uv ELSE 0 END) AS return_1_uv_168h
  77. ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN share_cnt ELSE 0 END) AS share_1h -- 曝光的分享次数当作用户的分享次数
  78. ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN share_cnt ELSE 0 END) AS share_3h
  79. ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN share_cnt ELSE 0 END) AS share_6h
  80. ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN share_cnt ELSE 0 END) AS share_12h
  81. ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN share_cnt ELSE 0 END) AS share_24h
  82. ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN share_cnt ELSE 0 END) AS share_72h
  83. ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN share_cnt ELSE 0 END) AS share_168h
  84. ,SUM(
  85. CASE WHEN ts_diff <= 3600 * 1
  86. AND page IN ("回流页","详情页")
  87. AND animationSceneType IS NULL THEN 1 ELSE 0 END
  88. ) AS click_1h -- 回流页的曝光当作用户的回流点击次数
  89. ,SUM(
  90. CASE WHEN ts_diff <= 3600 * 3
  91. AND page IN ("回流页","详情页")
  92. AND animationSceneType IS NULL THEN 1 ELSE 0 END
  93. ) AS click_3h
  94. ,SUM(
  95. CASE WHEN ts_diff <= 3600 * 6
  96. AND page IN ("回流页","详情页")
  97. AND animationSceneType IS NULL THEN 1 ELSE 0 END
  98. ) AS click_6h
  99. ,SUM(
  100. CASE WHEN ts_diff <= 3600 * 12
  101. AND page IN ("回流页","详情页")
  102. AND animationSceneType IS NULL THEN 1 ELSE 0 END
  103. ) AS click_12h
  104. ,SUM(
  105. CASE WHEN ts_diff <= 3600 * 24
  106. AND page IN ("回流页","详情页")
  107. AND animationSceneType IS NULL THEN 1 ELSE 0 END
  108. ) AS click_24h
  109. ,SUM(
  110. CASE WHEN ts_diff <= 3600 * 72
  111. AND page IN ("回流页","详情页")
  112. AND animationSceneType IS NULL THEN 1 ELSE 0 END
  113. ) AS click_72h
  114. ,SUM(
  115. CASE WHEN ts_diff <= 3600 * 168
  116. AND page IN ("回流页","详情页")
  117. AND animationSceneType IS NULL THEN 1 ELSE 0 END
  118. ) AS click_168h
  119. FROM t_exp
  120. WHERE ts_diff >= 0
  121. GROUP BY mid
  122. )
  123. ,t_index AS
  124. (
  125. SELECT *
  126. ,ROUND(COALESCE(is_share_1h / exp_1h,0),6) AS str_one_1h
  127. ,ROUND(COALESCE(return_1_uv_1h / is_share_1h,0),6) AS ros_one_1h
  128. ,ROUND(COALESCE(share_cnt_1h / exp_1h,0),6) AS str_1h
  129. ,ROUND(COALESCE(return_1_uv_1h / share_cnt_1h,0),6) AS ros_1h
  130. ,ROUND(COALESCE(is_return_1_1h / exp_1h,0),6) AS str_plus_1h
  131. ,ROUND(COALESCE(return_1_uv_1h / is_return_1_1h,0),6) AS ros_minus_1h
  132. ,ROUND(COALESCE(return_1_uv_1h / exp_1h,0),6) AS rovn_1h
  133. ,ROUND(COALESCE(is_share_3h / exp_3h,0),6) AS str_one_3h
  134. ,ROUND(COALESCE(return_1_uv_3h / is_share_3h,0),6) AS ros_one_3h
  135. ,ROUND(COALESCE(share_cnt_3h / exp_3h,0),6) AS str_3h
  136. ,ROUND(COALESCE(return_1_uv_3h / share_cnt_3h,0),6) AS ros_3h
  137. ,ROUND(COALESCE(is_return_1_3h / exp_3h,0),6) AS str_plus_3h
  138. ,ROUND(COALESCE(return_1_uv_3h / is_return_1_3h,0),6) AS ros_minus_3h
  139. ,ROUND(COALESCE(return_1_uv_3h / exp_3h,0),6) AS rovn_3h
  140. ,ROUND(COALESCE(is_share_6h / exp_6h,0),6) AS str_one_6h
  141. ,ROUND(COALESCE(return_1_uv_6h / is_share_6h,0),6) AS ros_one_6h
  142. ,ROUND(COALESCE(share_cnt_6h / exp_6h,0),6) AS str_6h
  143. ,ROUND(COALESCE(return_1_uv_6h / share_cnt_6h,0),6) AS ros_6h
  144. ,ROUND(COALESCE(is_return_1_6h / exp_6h,0),6) AS str_plus_6h
  145. ,ROUND(COALESCE(return_1_uv_6h / is_return_1_6h,0),6) AS ros_minus_6h
  146. ,ROUND(COALESCE(return_1_uv_6h / exp_6h,0),6) AS rovn_6h
  147. ,ROUND(COALESCE(is_share_12h / exp_12h,0),6) AS str_one_12h
  148. ,ROUND(COALESCE(return_1_uv_12h / is_share_12h,0),6) AS ros_one_12h
  149. ,ROUND(COALESCE(share_cnt_12h / exp_12h,0),6) AS str_12h
  150. ,ROUND(COALESCE(return_1_uv_12h / share_cnt_12h,0),6) AS ros_12h
  151. ,ROUND(COALESCE(is_return_1_12h / exp_12h,0),6) AS str_plus_12h
  152. ,ROUND(COALESCE(return_1_uv_12h / is_return_1_12h,0),6) AS ros_minus_12h
  153. ,ROUND(COALESCE(return_1_uv_12h / exp_12h,0),6) AS rovn_12h
  154. ,ROUND(COALESCE(is_share_24h / exp_24h,0),6) AS str_one_24h
  155. ,ROUND(COALESCE(return_1_uv_24h / is_share_24h,0),6) AS ros_one_24h
  156. ,ROUND(COALESCE(share_cnt_24h / exp_24h,0),6) AS str_24h
  157. ,ROUND(COALESCE(return_1_uv_24h / share_cnt_24h,0),6) AS ros_24h
  158. ,ROUND(COALESCE(is_return_1_24h / exp_24h,0),6) AS str_plus_24h
  159. ,ROUND(COALESCE(return_1_uv_24h / is_return_1_24h,0),6) AS ros_minus_24h
  160. ,ROUND(COALESCE(return_1_uv_24h / exp_24h,0),6) AS rovn_24h
  161. ,ROUND(COALESCE(is_share_72h / exp_72h,0),6) AS str_one_72h
  162. ,ROUND(COALESCE(return_1_uv_72h / is_share_72h,0),6) AS ros_one_72h
  163. ,ROUND(COALESCE(share_cnt_72h / exp_72h,0),6) AS str_72h
  164. ,ROUND(COALESCE(return_1_uv_72h / share_cnt_72h,0),6) AS ros_72h
  165. ,ROUND(COALESCE(is_return_1_72h / exp_72h,0),6) AS str_plus_72h
  166. ,ROUND(COALESCE(return_1_uv_72h / is_return_1_72h,0),6) AS ros_minus_72h
  167. ,ROUND(COALESCE(return_1_uv_72h / exp_72h,0),6) AS rovn_72h
  168. ,ROUND(COALESCE(is_share_168h / exp_168h,0),6) AS str_one_168h
  169. ,ROUND(COALESCE(return_1_uv_168h / is_share_168h,0),6) AS ros_one_168h
  170. ,ROUND(COALESCE(share_cnt_168h / exp_168h,0),6) AS str_168h
  171. ,ROUND(COALESCE(return_1_uv_168h / share_cnt_168h,0),6) AS ros_168h
  172. ,ROUND(COALESCE(is_return_1_168h / exp_168h,0),6) AS str_plus_168h
  173. ,ROUND(COALESCE(return_1_uv_168h / is_return_1_168h,0),6) AS ros_minus_168h
  174. ,ROUND(COALESCE(return_1_uv_168h / exp_168h,0),6) AS rovn_168h
  175. FROM t_agg
  176. )
  177. ,t_result AS (
  178. SELECT mid
  179. ,JSON_FORMAT(JSON_OBJECT(
  180. "exp_1h", CAST(exp_1h AS STRING),
  181. "exp_3h", CAST(exp_3h AS STRING),
  182. "exp_6h", CAST(exp_6h AS STRING),
  183. "exp_12h", CAST(exp_12h AS STRING),
  184. "exp_24h", CAST(exp_24h AS STRING),
  185. "exp_72h", CAST(exp_72h AS STRING),
  186. "exp_168h", CAST(exp_168h AS STRING),
  187. "is_share_1h", CAST(is_share_1h AS STRING),
  188. "is_share_3h", CAST(is_share_3h AS STRING),
  189. "is_share_6h", CAST(is_share_6h AS STRING),
  190. "is_share_12h", CAST(is_share_12h AS STRING),
  191. "is_share_24h", CAST(is_share_24h AS STRING),
  192. "is_share_72h", CAST(is_share_72h AS STRING),
  193. "is_share_168h", CAST(is_share_168h AS STRING),
  194. "share_cnt_1h", CAST(share_cnt_1h AS STRING),
  195. "share_cnt_3h", CAST(share_cnt_3h AS STRING),
  196. "share_cnt_6h", CAST(share_cnt_6h AS STRING),
  197. "share_cnt_12h", CAST(share_cnt_12h AS STRING),
  198. "share_cnt_24h", CAST(share_cnt_24h AS STRING),
  199. "share_cnt_72h", CAST(share_cnt_72h AS STRING),
  200. "share_cnt_168h", CAST(share_cnt_168h AS STRING),
  201. "is_return_1_1h", CAST(is_return_1_1h AS STRING),
  202. "is_return_1_3h", CAST(is_return_1_3h AS STRING),
  203. "is_return_1_6h", CAST(is_return_1_6h AS STRING),
  204. "is_return_1_12h", CAST(is_return_1_12h AS STRING),
  205. "is_return_1_24h", CAST(is_return_1_24h AS STRING),
  206. "is_return_1_72h", CAST(is_return_1_72h AS STRING),
  207. "is_return_1_168h", CAST(is_return_1_168h AS STRING),
  208. "return_1_uv_1h", CAST(return_1_uv_1h AS STRING),
  209. "return_1_uv_3h", CAST(return_1_uv_3h AS STRING),
  210. "return_1_uv_6h", CAST(return_1_uv_6h AS STRING),
  211. "return_1_uv_12h", CAST(return_1_uv_12h AS STRING),
  212. "return_1_uv_24h", CAST(return_1_uv_24h AS STRING),
  213. "return_1_uv_72h", CAST(return_1_uv_72h AS STRING),
  214. "return_1_uv_168h", CAST(return_1_uv_168h AS STRING),
  215. "share_1h", CAST(share_1h AS STRING),
  216. "share_3h", CAST(share_3h AS STRING),
  217. "share_6h", CAST(share_6h AS STRING),
  218. "share_12h", CAST(share_12h AS STRING),
  219. "share_24h", CAST(share_24h AS STRING),
  220. "share_72h", CAST(share_72h AS STRING),
  221. "share_168h", CAST(share_168h AS STRING),
  222. "click_1h", CAST(click_1h AS STRING),
  223. "click_3h", CAST(click_3h AS STRING),
  224. "click_6h", CAST(click_6h AS STRING),
  225. "click_12h", CAST(click_12h AS STRING),
  226. "click_24h", CAST(click_24h AS STRING),
  227. "click_72h", CAST(click_72h AS STRING),
  228. "click_168h", CAST(click_168h AS STRING),
  229. "str_one_1h", CAST(str_one_1h AS STRING),
  230. -- "ros_one_1h", CAST(ros_one_1h AS STRING),
  231. -- "str_1h", CAST(str_1h AS STRING),
  232. -- "ros_1h", CAST(ros_1h AS STRING),
  233. -- "str_plus_1h", CAST(str_plus_1h AS STRING),
  234. -- "ros_minus_1h", CAST(ros_minus_1h AS STRING),
  235. -- "rovn_1h", CAST(rovn_1h AS STRING),
  236. -- "str_one_3h", CAST(str_one_3h AS STRING),
  237. -- "ros_one_3h", CAST(ros_one_3h AS STRING),
  238. -- "str_3h", CAST(str_3h AS STRING),
  239. -- "ros_3h", CAST(ros_3h AS STRING),
  240. -- "str_plus_3h", CAST(str_plus_3h AS STRING),
  241. -- "ros_minus_3h", CAST(ros_minus_3h AS STRING),
  242. -- "rovn_3h", CAST(rovn_3h AS STRING),
  243. -- "str_one_6h", CAST(str_one_6h AS STRING),
  244. -- "ros_one_6h", CAST(ros_one_6h AS STRING),
  245. -- "str_6h", CAST(str_6h AS STRING),
  246. -- "ros_6h", CAST(ros_6h AS STRING),
  247. -- "str_plus_6h", CAST(str_plus_6h AS STRING),
  248. -- "ros_minus_6h", CAST(ros_minus_6h AS STRING),
  249. -- "rovn_6h", CAST(rovn_6h AS STRING),
  250. -- "str_one_12h", CAST(str_one_12h AS STRING),
  251. -- "ros_one_12h", CAST(ros_one_12h AS STRING),
  252. -- "str_12h", CAST(str_12h AS STRING),
  253. -- "ros_12h", CAST(ros_12h AS STRING),
  254. -- "str_plus_12h", CAST(str_plus_12h AS STRING),
  255. -- "ros_minus_12h", CAST(ros_minus_12h AS STRING),
  256. -- "rovn_12h", CAST(rovn_12h AS STRING),
  257. -- "str_one_24h", CAST(str_one_24h AS STRING),
  258. -- "ros_one_24h", CAST(ros_one_24h AS STRING),
  259. -- "str_24h", CAST(str_24h AS STRING),
  260. -- "ros_24h", CAST(ros_24h AS STRING),
  261. -- "str_plus_24h", CAST(str_plus_24h AS STRING),
  262. -- "ros_minus_24h", CAST(ros_minus_24h AS STRING),
  263. -- "rovn_24h", CAST(rovn_24h AS STRING),
  264. -- "str_one_72h", CAST(str_one_72h AS STRING),
  265. -- "ros_one_72h", CAST(ros_one_72h AS STRING),
  266. -- "str_72h", CAST(str_72h AS STRING),
  267. -- "ros_72h", CAST(ros_72h AS STRING),
  268. -- "str_plus_72h", CAST(str_plus_72h AS STRING),
  269. -- "ros_minus_72h", CAST(ros_minus_72h AS STRING),
  270. -- "rovn_72h", CAST(rovn_72h AS STRING),
  271. -- "str_one_168h", CAST(str_one_168h AS STRING),
  272. -- "ros_one_168h", CAST(ros_one_168h AS STRING),
  273. -- "str_168h", CAST(str_168h AS STRING),
  274. -- "ros_168h", CAST(ros_168h AS STRING),
  275. -- "str_plus_168h", CAST(str_plus_168h AS STRING),
  276. -- "ros_minus_168h", CAST(ros_minus_168h AS STRING),
  277. "rovn_168h", CAST(rovn_168h AS STRING)
  278. ))
  279. FROM t_index
  280. )SELECT *
  281. FROM t_result
  282. ;