| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283 |
- -- Task: 01_全局用户特征_20250212 ID: 1023819179 Type: ODPS_SQL
- CREATE TABLE IF NOT EXISTS loghubods.mid_global_feature_20250212
- (
- mid STRING COMMENT 'mid'
- ,feature STRING COMMENT '特征JSON'
- )
- COMMENT '全部曝光场景下的mid特征'
- PARTITIONED BY
- (
- dt STRING COMMENT '天'
- ,hh STRING COMMENT '小时'
- )
- LIFECYCLE 30
- ;
- INSERT OVERWRITE TABLE loghubods.mid_global_feature_20250212 PARTITION (dt = '${dt}',hh = '${hh}')
- WITH t_exp AS -- 曝光特征
- (
- SELECT vid
- ,mid
- ,apptype
- ,page
- ,recommendpagetype
- ,province
- ,hotsencetype
- ,machineinfo_brand AS brand
- ,is_share
- ,return_n_uv
- ,return_1_uv
- ,share_cnt
- ,is_return_1
- ,new_exposure_cnt
- ,GET_JSON_OBJECT(extend, "$.animationSceneType") AS animationSceneType
- ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) AS ts_now
- ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - CAST(ts AS BIGINT) AS ts_diff
- FROM loghubods.dwd_recsys_alg_exposure_base_20250108
- WHERE CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 24 * 7),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
- AND apptype NOT IN ("12")
- )
- ,t_agg AS
- (
- SELECT mid
- ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN 1 ELSE 0 END) AS exp_1h
- ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN 1 ELSE 0 END) AS exp_3h
- ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN 1 ELSE 0 END) AS exp_6h
- ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN 1 ELSE 0 END) AS exp_12h
- ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN 1 ELSE 0 END) AS exp_24h
- ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN 1 ELSE 0 END) AS exp_72h
- ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN 1 ELSE 0 END) AS exp_168h
- ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN is_share ELSE 0 END) AS is_share_1h
- ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN is_share ELSE 0 END) AS is_share_3h
- ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN is_share ELSE 0 END) AS is_share_6h
- ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN is_share ELSE 0 END) AS is_share_12h
- ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN is_share ELSE 0 END) AS is_share_24h
- ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN is_share ELSE 0 END) AS is_share_72h
- ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN is_share ELSE 0 END) AS is_share_168h
- ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN share_cnt ELSE 0 END) AS share_cnt_1h
- ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN share_cnt ELSE 0 END) AS share_cnt_3h
- ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN share_cnt ELSE 0 END) AS share_cnt_6h
- ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN share_cnt ELSE 0 END) AS share_cnt_12h
- ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN share_cnt ELSE 0 END) AS share_cnt_24h
- ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN share_cnt ELSE 0 END) AS share_cnt_72h
- ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN share_cnt ELSE 0 END) AS share_cnt_168h
- ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN is_return_1 ELSE 0 END) AS is_return_1_1h
- ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN is_return_1 ELSE 0 END) AS is_return_1_3h
- ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN is_return_1 ELSE 0 END) AS is_return_1_6h
- ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN is_return_1 ELSE 0 END) AS is_return_1_12h
- ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN is_return_1 ELSE 0 END) AS is_return_1_24h
- ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN is_return_1 ELSE 0 END) AS is_return_1_72h
- ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN is_return_1 ELSE 0 END) AS is_return_1_168h
- ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN return_1_uv ELSE 0 END) AS return_1_uv_1h
- ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN return_1_uv ELSE 0 END) AS return_1_uv_3h
- ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN return_1_uv ELSE 0 END) AS return_1_uv_6h
- ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN return_1_uv ELSE 0 END) AS return_1_uv_12h
- ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN return_1_uv ELSE 0 END) AS return_1_uv_24h
- ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN return_1_uv ELSE 0 END) AS return_1_uv_72h
- ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN return_1_uv ELSE 0 END) AS return_1_uv_168h
- ,SUM(CASE WHEN ts_diff <= 3600 * 1 THEN share_cnt ELSE 0 END) AS share_1h -- 曝光的分享次数当作用户的分享次数
- ,SUM(CASE WHEN ts_diff <= 3600 * 3 THEN share_cnt ELSE 0 END) AS share_3h
- ,SUM(CASE WHEN ts_diff <= 3600 * 6 THEN share_cnt ELSE 0 END) AS share_6h
- ,SUM(CASE WHEN ts_diff <= 3600 * 12 THEN share_cnt ELSE 0 END) AS share_12h
- ,SUM(CASE WHEN ts_diff <= 3600 * 24 THEN share_cnt ELSE 0 END) AS share_24h
- ,SUM(CASE WHEN ts_diff <= 3600 * 72 THEN share_cnt ELSE 0 END) AS share_72h
- ,SUM(CASE WHEN ts_diff <= 3600 * 168 THEN share_cnt ELSE 0 END) AS share_168h
- ,SUM(
- CASE WHEN ts_diff <= 3600 * 1
- AND page IN ("回流页","详情页")
- AND animationSceneType IS NULL THEN 1 ELSE 0 END
- ) AS click_1h -- 回流页的曝光当作用户的回流点击次数
- ,SUM(
- CASE WHEN ts_diff <= 3600 * 3
- AND page IN ("回流页","详情页")
- AND animationSceneType IS NULL THEN 1 ELSE 0 END
- ) AS click_3h
- ,SUM(
- CASE WHEN ts_diff <= 3600 * 6
- AND page IN ("回流页","详情页")
- AND animationSceneType IS NULL THEN 1 ELSE 0 END
- ) AS click_6h
- ,SUM(
- CASE WHEN ts_diff <= 3600 * 12
- AND page IN ("回流页","详情页")
- AND animationSceneType IS NULL THEN 1 ELSE 0 END
- ) AS click_12h
- ,SUM(
- CASE WHEN ts_diff <= 3600 * 24
- AND page IN ("回流页","详情页")
- AND animationSceneType IS NULL THEN 1 ELSE 0 END
- ) AS click_24h
- ,SUM(
- CASE WHEN ts_diff <= 3600 * 72
- AND page IN ("回流页","详情页")
- AND animationSceneType IS NULL THEN 1 ELSE 0 END
- ) AS click_72h
- ,SUM(
- CASE WHEN ts_diff <= 3600 * 168
- AND page IN ("回流页","详情页")
- AND animationSceneType IS NULL THEN 1 ELSE 0 END
- ) AS click_168h
- FROM t_exp
- WHERE ts_diff >= 0
- GROUP BY mid
- )
- ,t_index AS
- (
- SELECT *
- ,ROUND(COALESCE(is_share_1h / exp_1h,0),6) AS str_one_1h
- ,ROUND(COALESCE(return_1_uv_1h / is_share_1h,0),6) AS ros_one_1h
- ,ROUND(COALESCE(share_cnt_1h / exp_1h,0),6) AS str_1h
- ,ROUND(COALESCE(return_1_uv_1h / share_cnt_1h,0),6) AS ros_1h
- ,ROUND(COALESCE(is_return_1_1h / exp_1h,0),6) AS str_plus_1h
- ,ROUND(COALESCE(return_1_uv_1h / is_return_1_1h,0),6) AS ros_minus_1h
- ,ROUND(COALESCE(return_1_uv_1h / exp_1h,0),6) AS rovn_1h
- ,ROUND(COALESCE(is_share_3h / exp_3h,0),6) AS str_one_3h
- ,ROUND(COALESCE(return_1_uv_3h / is_share_3h,0),6) AS ros_one_3h
- ,ROUND(COALESCE(share_cnt_3h / exp_3h,0),6) AS str_3h
- ,ROUND(COALESCE(return_1_uv_3h / share_cnt_3h,0),6) AS ros_3h
- ,ROUND(COALESCE(is_return_1_3h / exp_3h,0),6) AS str_plus_3h
- ,ROUND(COALESCE(return_1_uv_3h / is_return_1_3h,0),6) AS ros_minus_3h
- ,ROUND(COALESCE(return_1_uv_3h / exp_3h,0),6) AS rovn_3h
- ,ROUND(COALESCE(is_share_6h / exp_6h,0),6) AS str_one_6h
- ,ROUND(COALESCE(return_1_uv_6h / is_share_6h,0),6) AS ros_one_6h
- ,ROUND(COALESCE(share_cnt_6h / exp_6h,0),6) AS str_6h
- ,ROUND(COALESCE(return_1_uv_6h / share_cnt_6h,0),6) AS ros_6h
- ,ROUND(COALESCE(is_return_1_6h / exp_6h,0),6) AS str_plus_6h
- ,ROUND(COALESCE(return_1_uv_6h / is_return_1_6h,0),6) AS ros_minus_6h
- ,ROUND(COALESCE(return_1_uv_6h / exp_6h,0),6) AS rovn_6h
- ,ROUND(COALESCE(is_share_12h / exp_12h,0),6) AS str_one_12h
- ,ROUND(COALESCE(return_1_uv_12h / is_share_12h,0),6) AS ros_one_12h
- ,ROUND(COALESCE(share_cnt_12h / exp_12h,0),6) AS str_12h
- ,ROUND(COALESCE(return_1_uv_12h / share_cnt_12h,0),6) AS ros_12h
- ,ROUND(COALESCE(is_return_1_12h / exp_12h,0),6) AS str_plus_12h
- ,ROUND(COALESCE(return_1_uv_12h / is_return_1_12h,0),6) AS ros_minus_12h
- ,ROUND(COALESCE(return_1_uv_12h / exp_12h,0),6) AS rovn_12h
- ,ROUND(COALESCE(is_share_24h / exp_24h,0),6) AS str_one_24h
- ,ROUND(COALESCE(return_1_uv_24h / is_share_24h,0),6) AS ros_one_24h
- ,ROUND(COALESCE(share_cnt_24h / exp_24h,0),6) AS str_24h
- ,ROUND(COALESCE(return_1_uv_24h / share_cnt_24h,0),6) AS ros_24h
- ,ROUND(COALESCE(is_return_1_24h / exp_24h,0),6) AS str_plus_24h
- ,ROUND(COALESCE(return_1_uv_24h / is_return_1_24h,0),6) AS ros_minus_24h
- ,ROUND(COALESCE(return_1_uv_24h / exp_24h,0),6) AS rovn_24h
- ,ROUND(COALESCE(is_share_72h / exp_72h,0),6) AS str_one_72h
- ,ROUND(COALESCE(return_1_uv_72h / is_share_72h,0),6) AS ros_one_72h
- ,ROUND(COALESCE(share_cnt_72h / exp_72h,0),6) AS str_72h
- ,ROUND(COALESCE(return_1_uv_72h / share_cnt_72h,0),6) AS ros_72h
- ,ROUND(COALESCE(is_return_1_72h / exp_72h,0),6) AS str_plus_72h
- ,ROUND(COALESCE(return_1_uv_72h / is_return_1_72h,0),6) AS ros_minus_72h
- ,ROUND(COALESCE(return_1_uv_72h / exp_72h,0),6) AS rovn_72h
- ,ROUND(COALESCE(is_share_168h / exp_168h,0),6) AS str_one_168h
- ,ROUND(COALESCE(return_1_uv_168h / is_share_168h,0),6) AS ros_one_168h
- ,ROUND(COALESCE(share_cnt_168h / exp_168h,0),6) AS str_168h
- ,ROUND(COALESCE(return_1_uv_168h / share_cnt_168h,0),6) AS ros_168h
- ,ROUND(COALESCE(is_return_1_168h / exp_168h,0),6) AS str_plus_168h
- ,ROUND(COALESCE(return_1_uv_168h / is_return_1_168h,0),6) AS ros_minus_168h
- ,ROUND(COALESCE(return_1_uv_168h / exp_168h,0),6) AS rovn_168h
- FROM t_agg
- )
- ,t_result AS (
- SELECT mid
- ,JSON_FORMAT(JSON_OBJECT(
- "exp_1h", CAST(exp_1h AS STRING),
- "exp_3h", CAST(exp_3h AS STRING),
- "exp_6h", CAST(exp_6h AS STRING),
- "exp_12h", CAST(exp_12h AS STRING),
- "exp_24h", CAST(exp_24h AS STRING),
- "exp_72h", CAST(exp_72h AS STRING),
- "exp_168h", CAST(exp_168h AS STRING),
- "is_share_1h", CAST(is_share_1h AS STRING),
- "is_share_3h", CAST(is_share_3h AS STRING),
- "is_share_6h", CAST(is_share_6h AS STRING),
- "is_share_12h", CAST(is_share_12h AS STRING),
- "is_share_24h", CAST(is_share_24h AS STRING),
- "is_share_72h", CAST(is_share_72h AS STRING),
- "is_share_168h", CAST(is_share_168h AS STRING),
- "share_cnt_1h", CAST(share_cnt_1h AS STRING),
- "share_cnt_3h", CAST(share_cnt_3h AS STRING),
- "share_cnt_6h", CAST(share_cnt_6h AS STRING),
- "share_cnt_12h", CAST(share_cnt_12h AS STRING),
- "share_cnt_24h", CAST(share_cnt_24h AS STRING),
- "share_cnt_72h", CAST(share_cnt_72h AS STRING),
- "share_cnt_168h", CAST(share_cnt_168h AS STRING),
- "is_return_1_1h", CAST(is_return_1_1h AS STRING),
- "is_return_1_3h", CAST(is_return_1_3h AS STRING),
- "is_return_1_6h", CAST(is_return_1_6h AS STRING),
- "is_return_1_12h", CAST(is_return_1_12h AS STRING),
- "is_return_1_24h", CAST(is_return_1_24h AS STRING),
- "is_return_1_72h", CAST(is_return_1_72h AS STRING),
- "is_return_1_168h", CAST(is_return_1_168h AS STRING),
- "return_1_uv_1h", CAST(return_1_uv_1h AS STRING),
- "return_1_uv_3h", CAST(return_1_uv_3h AS STRING),
- "return_1_uv_6h", CAST(return_1_uv_6h AS STRING),
- "return_1_uv_12h", CAST(return_1_uv_12h AS STRING),
- "return_1_uv_24h", CAST(return_1_uv_24h AS STRING),
- "return_1_uv_72h", CAST(return_1_uv_72h AS STRING),
- "return_1_uv_168h", CAST(return_1_uv_168h AS STRING),
- "share_1h", CAST(share_1h AS STRING),
- "share_3h", CAST(share_3h AS STRING),
- "share_6h", CAST(share_6h AS STRING),
- "share_12h", CAST(share_12h AS STRING),
- "share_24h", CAST(share_24h AS STRING),
- "share_72h", CAST(share_72h AS STRING),
- "share_168h", CAST(share_168h AS STRING),
- "click_1h", CAST(click_1h AS STRING),
- "click_3h", CAST(click_3h AS STRING),
- "click_6h", CAST(click_6h AS STRING),
- "click_12h", CAST(click_12h AS STRING),
- "click_24h", CAST(click_24h AS STRING),
- "click_72h", CAST(click_72h AS STRING),
- "click_168h", CAST(click_168h AS STRING),
- "str_one_1h", CAST(str_one_1h AS STRING),
- -- "ros_one_1h", CAST(ros_one_1h AS STRING),
- -- "str_1h", CAST(str_1h AS STRING),
- -- "ros_1h", CAST(ros_1h AS STRING),
- -- "str_plus_1h", CAST(str_plus_1h AS STRING),
- -- "ros_minus_1h", CAST(ros_minus_1h AS STRING),
- -- "rovn_1h", CAST(rovn_1h AS STRING),
- -- "str_one_3h", CAST(str_one_3h AS STRING),
- -- "ros_one_3h", CAST(ros_one_3h AS STRING),
- -- "str_3h", CAST(str_3h AS STRING),
- -- "ros_3h", CAST(ros_3h AS STRING),
- -- "str_plus_3h", CAST(str_plus_3h AS STRING),
- -- "ros_minus_3h", CAST(ros_minus_3h AS STRING),
- -- "rovn_3h", CAST(rovn_3h AS STRING),
- -- "str_one_6h", CAST(str_one_6h AS STRING),
- -- "ros_one_6h", CAST(ros_one_6h AS STRING),
- -- "str_6h", CAST(str_6h AS STRING),
- -- "ros_6h", CAST(ros_6h AS STRING),
- -- "str_plus_6h", CAST(str_plus_6h AS STRING),
- -- "ros_minus_6h", CAST(ros_minus_6h AS STRING),
- -- "rovn_6h", CAST(rovn_6h AS STRING),
- -- "str_one_12h", CAST(str_one_12h AS STRING),
- -- "ros_one_12h", CAST(ros_one_12h AS STRING),
- -- "str_12h", CAST(str_12h AS STRING),
- -- "ros_12h", CAST(ros_12h AS STRING),
- -- "str_plus_12h", CAST(str_plus_12h AS STRING),
- -- "ros_minus_12h", CAST(ros_minus_12h AS STRING),
- -- "rovn_12h", CAST(rovn_12h AS STRING),
- -- "str_one_24h", CAST(str_one_24h AS STRING),
- -- "ros_one_24h", CAST(ros_one_24h AS STRING),
- -- "str_24h", CAST(str_24h AS STRING),
- -- "ros_24h", CAST(ros_24h AS STRING),
- -- "str_plus_24h", CAST(str_plus_24h AS STRING),
- -- "ros_minus_24h", CAST(ros_minus_24h AS STRING),
- -- "rovn_24h", CAST(rovn_24h AS STRING),
- -- "str_one_72h", CAST(str_one_72h AS STRING),
- -- "ros_one_72h", CAST(ros_one_72h AS STRING),
- -- "str_72h", CAST(str_72h AS STRING),
- -- "ros_72h", CAST(ros_72h AS STRING),
- -- "str_plus_72h", CAST(str_plus_72h AS STRING),
- -- "ros_minus_72h", CAST(ros_minus_72h AS STRING),
- -- "rovn_72h", CAST(rovn_72h AS STRING),
- -- "str_one_168h", CAST(str_one_168h AS STRING),
- -- "ros_one_168h", CAST(ros_one_168h AS STRING),
- -- "str_168h", CAST(str_168h AS STRING),
- -- "ros_168h", CAST(ros_168h AS STRING),
- -- "str_plus_168h", CAST(str_plus_168h AS STRING),
- -- "ros_minus_168h", CAST(ros_minus_168h AS STRING),
- "rovn_168h", CAST(rovn_168h AS STRING)
- ))
- FROM t_index
- )SELECT *
- FROM t_result
- ;
|