loghubods.alg_vid_global_feature_20250212.sql 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. -- Task: 01_全局视频特征_20250211 ID: 1023781563 Type: ODPS_SQL
  2. CREATE TABLE IF NOT EXISTS loghubods.alg_vid_global_feature_20250212
  3. (
  4. vid STRING COMMENT '视频id'
  5. ,feature STRING COMMENT '特征JSON'
  6. )
  7. COMMENT '00_全局视频特征'
  8. PARTITIONED BY
  9. (
  10. dt STRING COMMENT '天'
  11. ,hh STRING COMMENT '小时'
  12. )
  13. LIFECYCLE 30
  14. ;
  15. INSERT OVERWRITE TABLE loghubods.alg_vid_global_feature_20250212 PARTITION (dt = '${dt}',hh = '${hh}')
  16. WITH t_base AS
  17. (
  18. SELECT vid
  19. ,is_share
  20. ,share_cnt
  21. ,is_return_1
  22. ,return_1_uv
  23. ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) AS ts_now
  24. ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - CAST(ts AS BIGINT) AS ts_diff
  25. FROM loghubods.dwd_recsys_alg_exposure_base_20250108
  26. WHERE CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 24 * 7),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
  27. AND apptype NOT IN ("12")
  28. )
  29. ,t_agg AS
  30. (
  31. SELECT vid
  32. ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN 1 ELSE 0 END) AS exp_1h
  33. ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN 1 ELSE 0 END) AS exp_3h
  34. ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN 1 ELSE 0 END) AS exp_6h
  35. ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN 1 ELSE 0 END) AS exp_12h
  36. ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN 1 ELSE 0 END) AS exp_24h
  37. ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN 1 ELSE 0 END) AS exp_72h
  38. ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN 1 ELSE 0 END) AS exp_168h
  39. ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN is_share ELSE 0 END) AS is_share_1h
  40. ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN is_share ELSE 0 END) AS is_share_3h
  41. ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN is_share ELSE 0 END) AS is_share_6h
  42. ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN is_share ELSE 0 END) AS is_share_12h
  43. ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN is_share ELSE 0 END) AS is_share_24h
  44. ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN is_share ELSE 0 END) AS is_share_72h
  45. ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN is_share ELSE 0 END) AS is_share_168h
  46. ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN share_cnt ELSE 0 END) AS share_cnt_1h
  47. ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN share_cnt ELSE 0 END) AS share_cnt_3h
  48. ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN share_cnt ELSE 0 END) AS share_cnt_6h
  49. ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN share_cnt ELSE 0 END) AS share_cnt_12h
  50. ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN share_cnt ELSE 0 END) AS share_cnt_24h
  51. ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN share_cnt ELSE 0 END) AS share_cnt_72h
  52. ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN share_cnt ELSE 0 END) AS share_cnt_168h
  53. ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN is_return_1 ELSE 0 END) AS is_return_1_1h
  54. ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN is_return_1 ELSE 0 END) AS is_return_1_3h
  55. ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN is_return_1 ELSE 0 END) AS is_return_1_6h
  56. ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN is_return_1 ELSE 0 END) AS is_return_1_12h
  57. ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN is_return_1 ELSE 0 END) AS is_return_1_24h
  58. ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN is_return_1 ELSE 0 END) AS is_return_1_72h
  59. ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN is_return_1 ELSE 0 END) AS is_return_1_168h
  60. ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN return_1_uv ELSE 0 END) AS return_1_uv_1h
  61. ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN return_1_uv ELSE 0 END) AS return_1_uv_3h
  62. ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN return_1_uv ELSE 0 END) AS return_1_uv_6h
  63. ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN return_1_uv ELSE 0 END) AS return_1_uv_12h
  64. ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN return_1_uv ELSE 0 END) AS return_1_uv_24h
  65. ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN return_1_uv ELSE 0 END) AS return_1_uv_72h
  66. ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN return_1_uv ELSE 0 END) AS return_1_uv_168h
  67. FROM t_base
  68. WHERE ts_diff >= 0
  69. GROUP BY vid
  70. )
  71. ,t_index AS
  72. (
  73. SELECT *
  74. ,ROUND(COALESCE(is_share_1h / exp_1h,0),6) AS str_one_1h
  75. ,ROUND(COALESCE(return_1_uv_1h / is_share_1h,0),6) AS ros_one_1h
  76. ,ROUND(COALESCE(share_cnt_1h / exp_1h,0),6) AS str_1h
  77. ,ROUND(COALESCE(return_1_uv_1h / share_cnt_1h,0),6) AS ros_1h
  78. ,ROUND(COALESCE(is_return_1_1h / exp_1h,0),6) AS str_plus_1h
  79. ,ROUND(COALESCE(return_1_uv_1h / is_return_1_1h,0),6) AS ros_minus_1h
  80. ,ROUND(COALESCE(return_1_uv_1h / exp_1h,0),6) AS rovn_1h
  81. ,ROUND(COALESCE(is_share_3h / exp_3h,0),6) AS str_one_3h
  82. ,ROUND(COALESCE(return_1_uv_3h / is_share_3h,0),6) AS ros_one_3h
  83. ,ROUND(COALESCE(share_cnt_3h / exp_3h,0),6) AS str_3h
  84. ,ROUND(COALESCE(return_1_uv_3h / share_cnt_3h,0),6) AS ros_3h
  85. ,ROUND(COALESCE(is_return_1_3h / exp_3h,0),6) AS str_plus_3h
  86. ,ROUND(COALESCE(return_1_uv_3h / is_return_1_3h,0),6) AS ros_minus_3h
  87. ,ROUND(COALESCE(return_1_uv_3h / exp_3h,0),6) AS rovn_3h
  88. ,ROUND(COALESCE(is_share_6h / exp_6h,0),6) AS str_one_6h
  89. ,ROUND(COALESCE(return_1_uv_6h / is_share_6h,0),6) AS ros_one_6h
  90. ,ROUND(COALESCE(share_cnt_6h / exp_6h,0),6) AS str_6h
  91. ,ROUND(COALESCE(return_1_uv_6h / share_cnt_6h,0),6) AS ros_6h
  92. ,ROUND(COALESCE(is_return_1_6h / exp_6h,0),6) AS str_plus_6h
  93. ,ROUND(COALESCE(return_1_uv_6h / is_return_1_6h,0),6) AS ros_minus_6h
  94. ,ROUND(COALESCE(return_1_uv_6h / exp_6h,0),6) AS rovn_6h
  95. ,ROUND(COALESCE(is_share_12h / exp_12h,0),6) AS str_one_12h
  96. ,ROUND(COALESCE(return_1_uv_12h / is_share_12h,0),6) AS ros_one_12h
  97. ,ROUND(COALESCE(share_cnt_12h / exp_12h,0),6) AS str_12h
  98. ,ROUND(COALESCE(return_1_uv_12h / share_cnt_12h,0),6) AS ros_12h
  99. ,ROUND(COALESCE(is_return_1_12h / exp_12h,0),6) AS str_plus_12h
  100. ,ROUND(COALESCE(return_1_uv_12h / is_return_1_12h,0),6) AS ros_minus_12h
  101. ,ROUND(COALESCE(return_1_uv_12h / exp_12h,0),6) AS rovn_12h
  102. ,ROUND(COALESCE(is_share_24h / exp_24h,0),6) AS str_one_24h
  103. ,ROUND(COALESCE(return_1_uv_24h / is_share_24h,0),6) AS ros_one_24h
  104. ,ROUND(COALESCE(share_cnt_24h / exp_24h,0),6) AS str_24h
  105. ,ROUND(COALESCE(return_1_uv_24h / share_cnt_24h,0),6) AS ros_24h
  106. ,ROUND(COALESCE(is_return_1_24h / exp_24h,0),6) AS str_plus_24h
  107. ,ROUND(COALESCE(return_1_uv_24h / is_return_1_24h,0),6) AS ros_minus_24h
  108. ,ROUND(COALESCE(return_1_uv_24h / exp_24h,0),6) AS rovn_24h
  109. ,ROUND(COALESCE(is_share_72h / exp_72h,0),6) AS str_one_72h
  110. ,ROUND(COALESCE(return_1_uv_72h / is_share_72h,0),6) AS ros_one_72h
  111. ,ROUND(COALESCE(share_cnt_72h / exp_72h,0),6) AS str_72h
  112. ,ROUND(COALESCE(return_1_uv_72h / share_cnt_72h,0),6) AS ros_72h
  113. ,ROUND(COALESCE(is_return_1_72h / exp_72h,0),6) AS str_plus_72h
  114. ,ROUND(COALESCE(return_1_uv_72h / is_return_1_72h,0),6) AS ros_minus_72h
  115. ,ROUND(COALESCE(return_1_uv_72h / exp_72h,0),6) AS rovn_72h
  116. ,ROUND(COALESCE(is_share_168h / exp_168h,0),6) AS str_one_168h
  117. ,ROUND(COALESCE(return_1_uv_168h / is_share_168h,0),6) AS ros_one_168h
  118. ,ROUND(COALESCE(share_cnt_168h / exp_168h,0),6) AS str_168h
  119. ,ROUND(COALESCE(return_1_uv_168h / share_cnt_168h,0),6) AS ros_168h
  120. ,ROUND(COALESCE(is_return_1_168h / exp_168h,0),6) AS str_plus_168h
  121. ,ROUND(COALESCE(return_1_uv_168h / is_return_1_168h,0),6) AS ros_minus_168h
  122. ,ROUND(COALESCE(return_1_uv_168h / exp_168h,0),6) AS rovn_168h
  123. FROM t_agg
  124. )
  125. ,t_result AS
  126. (
  127. SELECT vid
  128. ,JSON_FORMAT(JSON_OBJECT(
  129. "exp_1h", CAST(exp_1h AS STRING),
  130. "exp_3h", CAST(exp_3h AS STRING),
  131. "exp_6h", CAST(exp_6h AS STRING),
  132. "exp_12h", CAST(exp_12h AS STRING),
  133. "exp_24h", CAST(exp_24h AS STRING),
  134. "exp_72h", CAST(exp_72h AS STRING),
  135. "exp_168h", CAST(exp_168h AS STRING),
  136. "is_share_1h", CAST(is_share_1h AS STRING),
  137. "is_share_3h", CAST(is_share_3h AS STRING),
  138. "is_share_6h", CAST(is_share_6h AS STRING),
  139. "is_share_12h", CAST(is_share_12h AS STRING),
  140. "is_share_24h", CAST(is_share_24h AS STRING),
  141. "is_share_72h", CAST(is_share_72h AS STRING),
  142. "is_share_168h", CAST(is_share_168h AS STRING),
  143. "share_cnt_1h", CAST(share_cnt_1h AS STRING),
  144. "share_cnt_3h", CAST(share_cnt_3h AS STRING),
  145. "share_cnt_6h", CAST(share_cnt_6h AS STRING),
  146. "share_cnt_12h", CAST(share_cnt_12h AS STRING),
  147. "share_cnt_24h", CAST(share_cnt_24h AS STRING),
  148. "share_cnt_72h", CAST(share_cnt_72h AS STRING),
  149. "share_cnt_168h", CAST(share_cnt_168h AS STRING),
  150. "is_return_1_1h", CAST(is_return_1_1h AS STRING),
  151. "is_return_1_3h", CAST(is_return_1_3h AS STRING),
  152. "is_return_1_6h", CAST(is_return_1_6h AS STRING),
  153. "is_return_1_12h", CAST(is_return_1_12h AS STRING),
  154. "is_return_1_24h", CAST(is_return_1_24h AS STRING),
  155. "is_return_1_72h", CAST(is_return_1_72h AS STRING),
  156. "is_return_1_168h", CAST(is_return_1_168h AS STRING),
  157. "return_1_uv_1h", CAST(return_1_uv_1h AS STRING),
  158. "return_1_uv_3h", CAST(return_1_uv_3h AS STRING),
  159. "return_1_uv_6h", CAST(return_1_uv_6h AS STRING),
  160. "return_1_uv_12h", CAST(return_1_uv_12h AS STRING),
  161. "return_1_uv_24h", CAST(return_1_uv_24h AS STRING),
  162. "return_1_uv_72h", CAST(return_1_uv_72h AS STRING),
  163. "return_1_uv_168h", CAST(return_1_uv_168h AS STRING),
  164. "str_one_1h", CAST(str_one_1h AS STRING),
  165. -- "ros_one_1h", CAST(ros_one_1h AS STRING),
  166. -- "str_1h", CAST(str_1h AS STRING),
  167. -- "ros_1h", CAST(ros_1h AS STRING),
  168. -- "str_plus_1h", CAST(str_plus_1h AS STRING),
  169. -- "ros_minus_1h", CAST(ros_minus_1h AS STRING),
  170. -- "rovn_1h", CAST(rovn_1h AS STRING),
  171. -- "str_one_3h", CAST(str_one_3h AS STRING),
  172. -- "ros_one_3h", CAST(ros_one_3h AS STRING),
  173. -- "str_3h", CAST(str_3h AS STRING),
  174. -- "ros_3h", CAST(ros_3h AS STRING),
  175. -- "str_plus_3h", CAST(str_plus_3h AS STRING),
  176. -- "ros_minus_3h", CAST(ros_minus_3h AS STRING),
  177. -- "rovn_3h", CAST(rovn_3h AS STRING),
  178. -- "str_one_6h", CAST(str_one_6h AS STRING),
  179. -- "ros_one_6h", CAST(ros_one_6h AS STRING),
  180. -- "str_6h", CAST(str_6h AS STRING),
  181. -- "ros_6h", CAST(ros_6h AS STRING),
  182. -- "str_plus_6h", CAST(str_plus_6h AS STRING),
  183. -- "ros_minus_6h", CAST(ros_minus_6h AS STRING),
  184. -- "rovn_6h", CAST(rovn_6h AS STRING),
  185. -- "str_one_12h", CAST(str_one_12h AS STRING),
  186. -- "ros_one_12h", CAST(ros_one_12h AS STRING),
  187. -- "str_12h", CAST(str_12h AS STRING),
  188. -- "ros_12h", CAST(ros_12h AS STRING),
  189. -- "str_plus_12h", CAST(str_plus_12h AS STRING),
  190. -- "ros_minus_12h", CAST(ros_minus_12h AS STRING),
  191. -- "rovn_12h", CAST(rovn_12h AS STRING),
  192. -- "str_one_24h", CAST(str_one_24h AS STRING),
  193. -- "ros_one_24h", CAST(ros_one_24h AS STRING),
  194. -- "str_24h", CAST(str_24h AS STRING),
  195. -- "ros_24h", CAST(ros_24h AS STRING),
  196. -- "str_plus_24h", CAST(str_plus_24h AS STRING),
  197. -- "ros_minus_24h", CAST(ros_minus_24h AS STRING),
  198. -- "rovn_24h", CAST(rovn_24h AS STRING),
  199. -- "str_one_72h", CAST(str_one_72h AS STRING),
  200. -- "ros_one_72h", CAST(ros_one_72h AS STRING),
  201. -- "str_72h", CAST(str_72h AS STRING),
  202. -- "ros_72h", CAST(ros_72h AS STRING),
  203. -- "str_plus_72h", CAST(str_plus_72h AS STRING),
  204. -- "ros_minus_72h", CAST(ros_minus_72h AS STRING),
  205. -- "rovn_72h", CAST(rovn_72h AS STRING),
  206. -- "str_one_168h", CAST(str_one_168h AS STRING),
  207. -- "ros_one_168h", CAST(ros_one_168h AS STRING),
  208. -- "str_168h", CAST(str_168h AS STRING),
  209. -- "ros_168h", CAST(ros_168h AS STRING),
  210. -- "str_plus_168h", CAST(str_plus_168h AS STRING),
  211. -- "ros_minus_168h", CAST(ros_minus_168h AS STRING),
  212. "rovn_168h", CAST(rovn_168h AS STRING)
  213. ))
  214. FROM t_index
  215. )
  216. SELECT *
  217. FROM t_result
  218. ;