| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219 |
- -- Task: 01_全局视频特征_20250211 ID: 1023781563 Type: ODPS_SQL
- CREATE TABLE IF NOT EXISTS loghubods.alg_vid_global_feature_20250212
- (
- vid STRING COMMENT '视频id'
- ,feature STRING COMMENT '特征JSON'
- )
- COMMENT '00_全局视频特征'
- PARTITIONED BY
- (
- dt STRING COMMENT '天'
- ,hh STRING COMMENT '小时'
- )
- LIFECYCLE 30
- ;
- INSERT OVERWRITE TABLE loghubods.alg_vid_global_feature_20250212 PARTITION (dt = '${dt}',hh = '${hh}')
- WITH t_base AS
- (
- SELECT vid
- ,is_share
- ,share_cnt
- ,is_return_1
- ,return_1_uv
- ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) AS ts_now
- ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - CAST(ts AS BIGINT) AS ts_diff
- FROM loghubods.dwd_recsys_alg_exposure_base_20250108
- WHERE CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 24 * 7),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
- AND apptype NOT IN ("12")
- )
- ,t_agg AS
- (
- SELECT vid
- ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN 1 ELSE 0 END) AS exp_1h
- ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN 1 ELSE 0 END) AS exp_3h
- ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN 1 ELSE 0 END) AS exp_6h
- ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN 1 ELSE 0 END) AS exp_12h
- ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN 1 ELSE 0 END) AS exp_24h
- ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN 1 ELSE 0 END) AS exp_72h
- ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN 1 ELSE 0 END) AS exp_168h
- ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN is_share ELSE 0 END) AS is_share_1h
- ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN is_share ELSE 0 END) AS is_share_3h
- ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN is_share ELSE 0 END) AS is_share_6h
- ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN is_share ELSE 0 END) AS is_share_12h
- ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN is_share ELSE 0 END) AS is_share_24h
- ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN is_share ELSE 0 END) AS is_share_72h
- ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN is_share ELSE 0 END) AS is_share_168h
- ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN share_cnt ELSE 0 END) AS share_cnt_1h
- ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN share_cnt ELSE 0 END) AS share_cnt_3h
- ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN share_cnt ELSE 0 END) AS share_cnt_6h
- ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN share_cnt ELSE 0 END) AS share_cnt_12h
- ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN share_cnt ELSE 0 END) AS share_cnt_24h
- ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN share_cnt ELSE 0 END) AS share_cnt_72h
- ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN share_cnt ELSE 0 END) AS share_cnt_168h
- ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN is_return_1 ELSE 0 END) AS is_return_1_1h
- ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN is_return_1 ELSE 0 END) AS is_return_1_3h
- ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN is_return_1 ELSE 0 END) AS is_return_1_6h
- ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN is_return_1 ELSE 0 END) AS is_return_1_12h
- ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN is_return_1 ELSE 0 END) AS is_return_1_24h
- ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN is_return_1 ELSE 0 END) AS is_return_1_72h
- ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN is_return_1 ELSE 0 END) AS is_return_1_168h
- ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN return_1_uv ELSE 0 END) AS return_1_uv_1h
- ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN return_1_uv ELSE 0 END) AS return_1_uv_3h
- ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN return_1_uv ELSE 0 END) AS return_1_uv_6h
- ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN return_1_uv ELSE 0 END) AS return_1_uv_12h
- ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN return_1_uv ELSE 0 END) AS return_1_uv_24h
- ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN return_1_uv ELSE 0 END) AS return_1_uv_72h
- ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN return_1_uv ELSE 0 END) AS return_1_uv_168h
- FROM t_base
- WHERE ts_diff >= 0
- GROUP BY vid
- )
- ,t_index AS
- (
- SELECT *
- ,ROUND(COALESCE(is_share_1h / exp_1h,0),6) AS str_one_1h
- ,ROUND(COALESCE(return_1_uv_1h / is_share_1h,0),6) AS ros_one_1h
- ,ROUND(COALESCE(share_cnt_1h / exp_1h,0),6) AS str_1h
- ,ROUND(COALESCE(return_1_uv_1h / share_cnt_1h,0),6) AS ros_1h
- ,ROUND(COALESCE(is_return_1_1h / exp_1h,0),6) AS str_plus_1h
- ,ROUND(COALESCE(return_1_uv_1h / is_return_1_1h,0),6) AS ros_minus_1h
- ,ROUND(COALESCE(return_1_uv_1h / exp_1h,0),6) AS rovn_1h
- ,ROUND(COALESCE(is_share_3h / exp_3h,0),6) AS str_one_3h
- ,ROUND(COALESCE(return_1_uv_3h / is_share_3h,0),6) AS ros_one_3h
- ,ROUND(COALESCE(share_cnt_3h / exp_3h,0),6) AS str_3h
- ,ROUND(COALESCE(return_1_uv_3h / share_cnt_3h,0),6) AS ros_3h
- ,ROUND(COALESCE(is_return_1_3h / exp_3h,0),6) AS str_plus_3h
- ,ROUND(COALESCE(return_1_uv_3h / is_return_1_3h,0),6) AS ros_minus_3h
- ,ROUND(COALESCE(return_1_uv_3h / exp_3h,0),6) AS rovn_3h
- ,ROUND(COALESCE(is_share_6h / exp_6h,0),6) AS str_one_6h
- ,ROUND(COALESCE(return_1_uv_6h / is_share_6h,0),6) AS ros_one_6h
- ,ROUND(COALESCE(share_cnt_6h / exp_6h,0),6) AS str_6h
- ,ROUND(COALESCE(return_1_uv_6h / share_cnt_6h,0),6) AS ros_6h
- ,ROUND(COALESCE(is_return_1_6h / exp_6h,0),6) AS str_plus_6h
- ,ROUND(COALESCE(return_1_uv_6h / is_return_1_6h,0),6) AS ros_minus_6h
- ,ROUND(COALESCE(return_1_uv_6h / exp_6h,0),6) AS rovn_6h
- ,ROUND(COALESCE(is_share_12h / exp_12h,0),6) AS str_one_12h
- ,ROUND(COALESCE(return_1_uv_12h / is_share_12h,0),6) AS ros_one_12h
- ,ROUND(COALESCE(share_cnt_12h / exp_12h,0),6) AS str_12h
- ,ROUND(COALESCE(return_1_uv_12h / share_cnt_12h,0),6) AS ros_12h
- ,ROUND(COALESCE(is_return_1_12h / exp_12h,0),6) AS str_plus_12h
- ,ROUND(COALESCE(return_1_uv_12h / is_return_1_12h,0),6) AS ros_minus_12h
- ,ROUND(COALESCE(return_1_uv_12h / exp_12h,0),6) AS rovn_12h
- ,ROUND(COALESCE(is_share_24h / exp_24h,0),6) AS str_one_24h
- ,ROUND(COALESCE(return_1_uv_24h / is_share_24h,0),6) AS ros_one_24h
- ,ROUND(COALESCE(share_cnt_24h / exp_24h,0),6) AS str_24h
- ,ROUND(COALESCE(return_1_uv_24h / share_cnt_24h,0),6) AS ros_24h
- ,ROUND(COALESCE(is_return_1_24h / exp_24h,0),6) AS str_plus_24h
- ,ROUND(COALESCE(return_1_uv_24h / is_return_1_24h,0),6) AS ros_minus_24h
- ,ROUND(COALESCE(return_1_uv_24h / exp_24h,0),6) AS rovn_24h
- ,ROUND(COALESCE(is_share_72h / exp_72h,0),6) AS str_one_72h
- ,ROUND(COALESCE(return_1_uv_72h / is_share_72h,0),6) AS ros_one_72h
- ,ROUND(COALESCE(share_cnt_72h / exp_72h,0),6) AS str_72h
- ,ROUND(COALESCE(return_1_uv_72h / share_cnt_72h,0),6) AS ros_72h
- ,ROUND(COALESCE(is_return_1_72h / exp_72h,0),6) AS str_plus_72h
- ,ROUND(COALESCE(return_1_uv_72h / is_return_1_72h,0),6) AS ros_minus_72h
- ,ROUND(COALESCE(return_1_uv_72h / exp_72h,0),6) AS rovn_72h
- ,ROUND(COALESCE(is_share_168h / exp_168h,0),6) AS str_one_168h
- ,ROUND(COALESCE(return_1_uv_168h / is_share_168h,0),6) AS ros_one_168h
- ,ROUND(COALESCE(share_cnt_168h / exp_168h,0),6) AS str_168h
- ,ROUND(COALESCE(return_1_uv_168h / share_cnt_168h,0),6) AS ros_168h
- ,ROUND(COALESCE(is_return_1_168h / exp_168h,0),6) AS str_plus_168h
- ,ROUND(COALESCE(return_1_uv_168h / is_return_1_168h,0),6) AS ros_minus_168h
- ,ROUND(COALESCE(return_1_uv_168h / exp_168h,0),6) AS rovn_168h
- FROM t_agg
- )
- ,t_result AS
- (
- SELECT vid
- ,JSON_FORMAT(JSON_OBJECT(
- "exp_1h", CAST(exp_1h AS STRING),
- "exp_3h", CAST(exp_3h AS STRING),
- "exp_6h", CAST(exp_6h AS STRING),
- "exp_12h", CAST(exp_12h AS STRING),
- "exp_24h", CAST(exp_24h AS STRING),
- "exp_72h", CAST(exp_72h AS STRING),
- "exp_168h", CAST(exp_168h AS STRING),
- "is_share_1h", CAST(is_share_1h AS STRING),
- "is_share_3h", CAST(is_share_3h AS STRING),
- "is_share_6h", CAST(is_share_6h AS STRING),
- "is_share_12h", CAST(is_share_12h AS STRING),
- "is_share_24h", CAST(is_share_24h AS STRING),
- "is_share_72h", CAST(is_share_72h AS STRING),
- "is_share_168h", CAST(is_share_168h AS STRING),
- "share_cnt_1h", CAST(share_cnt_1h AS STRING),
- "share_cnt_3h", CAST(share_cnt_3h AS STRING),
- "share_cnt_6h", CAST(share_cnt_6h AS STRING),
- "share_cnt_12h", CAST(share_cnt_12h AS STRING),
- "share_cnt_24h", CAST(share_cnt_24h AS STRING),
- "share_cnt_72h", CAST(share_cnt_72h AS STRING),
- "share_cnt_168h", CAST(share_cnt_168h AS STRING),
- "is_return_1_1h", CAST(is_return_1_1h AS STRING),
- "is_return_1_3h", CAST(is_return_1_3h AS STRING),
- "is_return_1_6h", CAST(is_return_1_6h AS STRING),
- "is_return_1_12h", CAST(is_return_1_12h AS STRING),
- "is_return_1_24h", CAST(is_return_1_24h AS STRING),
- "is_return_1_72h", CAST(is_return_1_72h AS STRING),
- "is_return_1_168h", CAST(is_return_1_168h AS STRING),
- "return_1_uv_1h", CAST(return_1_uv_1h AS STRING),
- "return_1_uv_3h", CAST(return_1_uv_3h AS STRING),
- "return_1_uv_6h", CAST(return_1_uv_6h AS STRING),
- "return_1_uv_12h", CAST(return_1_uv_12h AS STRING),
- "return_1_uv_24h", CAST(return_1_uv_24h AS STRING),
- "return_1_uv_72h", CAST(return_1_uv_72h AS STRING),
- "return_1_uv_168h", CAST(return_1_uv_168h AS STRING),
- "str_one_1h", CAST(str_one_1h AS STRING),
- -- "ros_one_1h", CAST(ros_one_1h AS STRING),
- -- "str_1h", CAST(str_1h AS STRING),
- -- "ros_1h", CAST(ros_1h AS STRING),
- -- "str_plus_1h", CAST(str_plus_1h AS STRING),
- -- "ros_minus_1h", CAST(ros_minus_1h AS STRING),
- -- "rovn_1h", CAST(rovn_1h AS STRING),
- -- "str_one_3h", CAST(str_one_3h AS STRING),
- -- "ros_one_3h", CAST(ros_one_3h AS STRING),
- -- "str_3h", CAST(str_3h AS STRING),
- -- "ros_3h", CAST(ros_3h AS STRING),
- -- "str_plus_3h", CAST(str_plus_3h AS STRING),
- -- "ros_minus_3h", CAST(ros_minus_3h AS STRING),
- -- "rovn_3h", CAST(rovn_3h AS STRING),
- -- "str_one_6h", CAST(str_one_6h AS STRING),
- -- "ros_one_6h", CAST(ros_one_6h AS STRING),
- -- "str_6h", CAST(str_6h AS STRING),
- -- "ros_6h", CAST(ros_6h AS STRING),
- -- "str_plus_6h", CAST(str_plus_6h AS STRING),
- -- "ros_minus_6h", CAST(ros_minus_6h AS STRING),
- -- "rovn_6h", CAST(rovn_6h AS STRING),
- -- "str_one_12h", CAST(str_one_12h AS STRING),
- -- "ros_one_12h", CAST(ros_one_12h AS STRING),
- -- "str_12h", CAST(str_12h AS STRING),
- -- "ros_12h", CAST(ros_12h AS STRING),
- -- "str_plus_12h", CAST(str_plus_12h AS STRING),
- -- "ros_minus_12h", CAST(ros_minus_12h AS STRING),
- -- "rovn_12h", CAST(rovn_12h AS STRING),
- -- "str_one_24h", CAST(str_one_24h AS STRING),
- -- "ros_one_24h", CAST(ros_one_24h AS STRING),
- -- "str_24h", CAST(str_24h AS STRING),
- -- "ros_24h", CAST(ros_24h AS STRING),
- -- "str_plus_24h", CAST(str_plus_24h AS STRING),
- -- "ros_minus_24h", CAST(ros_minus_24h AS STRING),
- -- "rovn_24h", CAST(rovn_24h AS STRING),
- -- "str_one_72h", CAST(str_one_72h AS STRING),
- -- "ros_one_72h", CAST(ros_one_72h AS STRING),
- -- "str_72h", CAST(str_72h AS STRING),
- -- "ros_72h", CAST(ros_72h AS STRING),
- -- "str_plus_72h", CAST(str_plus_72h AS STRING),
- -- "ros_minus_72h", CAST(ros_minus_72h AS STRING),
- -- "rovn_72h", CAST(rovn_72h AS STRING),
- -- "str_one_168h", CAST(str_one_168h AS STRING),
- -- "ros_one_168h", CAST(ros_one_168h AS STRING),
- -- "str_168h", CAST(str_168h AS STRING),
- -- "ros_168h", CAST(ros_168h AS STRING),
- -- "str_plus_168h", CAST(str_plus_168h AS STRING),
- -- "ros_minus_168h", CAST(ros_minus_168h AS STRING),
- "rovn_168h", CAST(rovn_168h AS STRING)
- ))
- FROM t_index
- )
- SELECT *
- FROM t_result
- ;
|