|
@@ -0,0 +1,202 @@
|
|
|
|
|
+--@exclude_input=loghubods.video_action_log_flow_new
|
|
|
|
|
+-- =====================================================================
|
|
|
|
|
+-- 上游曝光去重视图表 (行级, 每行 = 一次去重后的曝光)
|
|
|
|
|
+-- 版本: 20250402 (最早版本, 纯曝光去重, 无分享/回流)
|
|
|
|
|
+-- LIFECYCLE 3 (仅保留 3 天, 作为下游 base 表的输入中间层)
|
|
|
|
|
+-- =====================================================================
|
|
|
|
|
+--
|
|
|
|
|
+-- 数据源: video_action_log_flow_new (businesstype=videoView)
|
|
|
|
|
+-- 过滤: apptype NOT IN ('12'), mid/videoid IS NOT NULL
|
|
|
|
|
+--
|
|
|
|
|
+-- 去重逻辑:
|
|
|
|
|
+-- ROW_NUMBER OVER (PARTITION BY dthh,apptype,uid,mid,vid,sessionid,subsessionid,pagesource
|
|
|
|
|
+-- ORDER BY logtimestamp) = 1
|
|
|
|
|
+--
|
|
|
|
|
+-- 输出拆分 (UNION ALL):
|
|
|
|
|
+-- 非 share 页: pagesource NOT REGEXP "-pages/user-videos-share$", rn=1
|
|
|
|
|
+-- share 页: pagesource REGEXP "-pages/user-videos-share$" (不过滤 rn, 保留全部)
|
|
|
|
|
+--
|
|
|
|
|
+-- id 生成: CONCAT(dthh, ":", subsessionid, ":", dthh_id)
|
|
|
|
|
+-- dthh_id = ROW_NUMBER OVER (PARTITION BY dthh,subsessionid ORDER BY clienttimestamp DESC)
|
|
|
|
|
+--
|
|
|
|
|
+-- 与 base_20260209 的关系:
|
|
|
|
|
+-- base_20260209.t_exposure CTE 复用了本表的去重逻辑
|
|
|
|
|
+-- 本表仅输出曝光字段, base 表在此基础上 JOIN 分享/回流/B/C/D 链
|
|
|
|
|
+-- =====================================================================
|
|
|
|
|
+CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_base_view_20250402
|
|
|
|
|
+(
|
|
|
|
|
+ dthh_id string
|
|
|
|
|
+ ,dthh string
|
|
|
|
|
+ ,apptype string
|
|
|
|
|
+ ,uid string
|
|
|
|
|
+ ,mid string
|
|
|
|
|
+ ,vid string
|
|
|
|
|
+ ,sessionid string
|
|
|
|
|
+ ,subsessionid string
|
|
|
|
|
+ ,rootsessionid_new string
|
|
|
|
|
+ ,pagesource string
|
|
|
|
|
+ ,recommendlogvo string-- 推荐算法的返回结果日志存在这个字段中
|
|
|
|
|
+ ,abcode string-- 推荐算法的ab分组
|
|
|
|
|
+ ,recommendpagetype string -- 三种回流头部;两种下滑-沉浸页下滑和feed下滑
|
|
|
|
|
+ ,recomtraceid string
|
|
|
|
|
+ ,headvideoid string
|
|
|
|
|
+ ,rootsourceid string
|
|
|
|
|
+ ,hotsencetype string
|
|
|
|
|
+ ,animationSceneType string
|
|
|
|
|
+ ,extParams string
|
|
|
|
|
+ ,flowpool string-- 14#68#3#1735262438476#2
|
|
|
|
|
+ ,level string
|
|
|
|
|
+ ,clientip string
|
|
|
|
|
+ ,machineinfo_brand string
|
|
|
|
|
+ ,machineinfo_model string
|
|
|
|
|
+ ,machineinfo_system string
|
|
|
|
|
+ ,machineinfo_wechatversion string
|
|
|
|
|
+ ,machineinfo_sdkversion string
|
|
|
|
|
+ ,province string
|
|
|
|
|
+ ,city string
|
|
|
|
|
+ ,versioncode string
|
|
|
|
|
+ , ts string
|
|
|
|
|
+ ,rn string
|
|
|
|
|
+ ,id string
|
|
|
|
|
+)
|
|
|
|
|
+PARTITIONED BY
|
|
|
|
|
+(
|
|
|
|
|
+ dt STRING COMMENT '日期:20240105'
|
|
|
|
|
+ ,hh STRING COMMENT '小时:04'
|
|
|
|
|
+)
|
|
|
|
|
+STORED AS ALIORC
|
|
|
|
|
+TBLPROPERTIES ('columnar.nested.type' = 'true','comment' = '推荐算法-labelmatch表-上游曝光表')
|
|
|
|
|
+LIFECYCLE 3
|
|
|
|
|
+;
|
|
|
|
|
+SET hive.exec.dynamic.partition = true
|
|
|
|
|
+;
|
|
|
|
|
+
|
|
|
|
|
+SET hive.exec.dynamic.partition.mode = nonstrict
|
|
|
|
|
+;
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+INSERT OVERWRITE TABLE loghubods.dwd_recsys_alg_exposure_base_view_20250402 PARTITION (dt,hh)
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+WITH t_exposure_base AS
|
|
|
|
|
+(
|
|
|
|
|
+ SELECT ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),subsessionid ORDER BY clienttimestamp DESC ) AS dthh_id
|
|
|
|
|
+ ,CONCAT(year,month,day,hour) AS dthh
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,uid
|
|
|
|
|
+ ,mid
|
|
|
|
|
+ ,videoid AS vid
|
|
|
|
|
+ ,sessionid
|
|
|
|
|
+ ,subsessionid
|
|
|
|
|
+ ,rootsessionid_new
|
|
|
|
|
+ ,pagesource
|
|
|
|
|
+ ,recommendlogvo -- 推荐算法的返回结果日志存在这个字段中
|
|
|
|
|
+ ,COALESCE(GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003'),"unknown") AS abcode -- 推荐算法的ab分组
|
|
|
|
|
+ ,GET_JSON_OBJECT(extparams,'$.recommendPageType') AS recommendpagetype -- 三种回流头部;两种下滑-沉浸页下滑和feed下滑
|
|
|
|
|
+ ,GET_JSON_OBJECT(extparams,'$.recomTraceId') AS recomtraceid
|
|
|
|
|
+ ,CASE WHEN GET_JSON_OBJECT(extParams,'$.head_videoid') IS NOT NULL THEN GET_JSON_OBJECT(extParams,'$.head_videoid')
|
|
|
|
|
+ ELSE GET_JSON_OBJECT(extParams,'$.head_videoId')
|
|
|
|
|
+ END AS headvideoid
|
|
|
|
|
+ ,GET_JSON_OBJECT(extParams,'$.rootSourceId') AS rootsourceid
|
|
|
|
|
+ ,COALESCE(hotsencetype,sencetype,"other") AS hotsencetype
|
|
|
|
|
+ ,GET_JSON_OBJECT(extParams,'$.animationSceneType') AS animationSceneType
|
|
|
|
|
+ ,extParams AS extParams
|
|
|
|
|
+ ,flowpool -- 14#68#3#1735262438476#2
|
|
|
|
|
+ ,SPLIT(flowpool,'#')[2] AS level
|
|
|
|
|
+ ,clientip
|
|
|
|
|
+ ,machineinfo_brand
|
|
|
|
|
+ ,machineinfo_model
|
|
|
|
|
+ ,machineinfo_system
|
|
|
|
|
+ ,machineinfo_wechatversion
|
|
|
|
|
+ ,machineinfo_sdkversion
|
|
|
|
|
+ ,ANALYSISIP(clientip,"region") AS province
|
|
|
|
|
+ ,ANALYSISIP(clientip,"city") AS city
|
|
|
|
|
+ ,versioncode
|
|
|
|
|
+ ,CAST(logtimestamp / 1000 AS BIGINT) AS ts
|
|
|
|
|
+ ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,uid,mid,videoid,sessionid,subsessionid,pagesource ORDER BY logtimestamp ) AS rn
|
|
|
|
|
+ ,CONCAT(year,month,day) AS dt
|
|
|
|
|
+ ,hour AS hh
|
|
|
|
|
+ FROM loghubods.video_action_log_flow_new
|
|
|
|
|
+ WHERE CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') --WHERE CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
|
|
|
|
|
+ AND businesstype IN ('videoView')
|
|
|
|
|
+ AND apptype IS NOT NULL
|
|
|
|
|
+ AND apptype NOT IN ('12')
|
|
|
|
|
+ AND mid IS NOT NULL
|
|
|
|
|
+ AND videoid IS NOT NULL
|
|
|
|
|
+)
|
|
|
|
|
+SELECT dthh_id
|
|
|
|
|
+ ,dthh
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,uid
|
|
|
|
|
+ ,mid
|
|
|
|
|
+ ,vid
|
|
|
|
|
+ ,sessionid
|
|
|
|
|
+ ,subsessionid
|
|
|
|
|
+ ,rootsessionid_new
|
|
|
|
|
+ ,pagesource
|
|
|
|
|
+ ,recommendlogvo
|
|
|
|
|
+ ,abcode
|
|
|
|
|
+ ,recommendpagetype
|
|
|
|
|
+ ,recomtraceid
|
|
|
|
|
+ ,headvideoid
|
|
|
|
|
+ ,rootsourceid
|
|
|
|
|
+ ,hotsencetype
|
|
|
|
|
+ ,animationscenetype
|
|
|
|
|
+ ,extparams
|
|
|
|
|
+ ,flowpool
|
|
|
|
|
+ ,level
|
|
|
|
|
+ ,clientip
|
|
|
|
|
+ ,machineinfo_brand
|
|
|
|
|
+ ,machineinfo_model
|
|
|
|
|
+ ,machineinfo_system
|
|
|
|
|
+ ,machineinfo_wechatversion
|
|
|
|
|
+ ,machineinfo_sdkversion
|
|
|
|
|
+ ,province
|
|
|
|
|
+ ,city
|
|
|
|
|
+ ,versioncode
|
|
|
|
|
+ ,ts
|
|
|
|
|
+ ,rn
|
|
|
|
|
+ ,CONCAT(dthh,":",subsessionid,":",dthh_id) AS id
|
|
|
|
|
+ ,dt
|
|
|
|
|
+ ,hh
|
|
|
|
|
+FROM t_exposure_base
|
|
|
|
|
+WHERE pagesource NOT REGEXP "-pages/user-videos-share$"
|
|
|
|
|
+AND rn = 1
|
|
|
|
|
+UNION ALL
|
|
|
|
|
+SELECT dthh_id
|
|
|
|
|
+ ,dthh
|
|
|
|
|
+ ,apptype
|
|
|
|
|
+ ,uid
|
|
|
|
|
+ ,mid
|
|
|
|
|
+ ,vid
|
|
|
|
|
+ ,sessionid
|
|
|
|
|
+ ,subsessionid
|
|
|
|
|
+ ,rootsessionid_new
|
|
|
|
|
+ ,pagesource
|
|
|
|
|
+ ,recommendlogvo
|
|
|
|
|
+ ,abcode
|
|
|
|
|
+ ,recommendpagetype
|
|
|
|
|
+ ,recomtraceid
|
|
|
|
|
+ ,headvideoid
|
|
|
|
|
+ ,rootsourceid
|
|
|
|
|
+ ,hotsencetype
|
|
|
|
|
+ ,animationscenetype
|
|
|
|
|
+ ,extparams
|
|
|
|
|
+ ,flowpool
|
|
|
|
|
+ ,level
|
|
|
|
|
+ ,clientip
|
|
|
|
|
+ ,machineinfo_brand
|
|
|
|
|
+ ,machineinfo_model
|
|
|
|
|
+ ,machineinfo_system
|
|
|
|
|
+ ,machineinfo_wechatversion
|
|
|
|
|
+ ,machineinfo_sdkversion
|
|
|
|
|
+ ,province
|
|
|
|
|
+ ,city
|
|
|
|
|
+ ,versioncode
|
|
|
|
|
+ ,ts
|
|
|
|
|
+ ,rn
|
|
|
|
|
+ ,CONCAT(dthh,":",subsessionid,":",dthh_id) AS id
|
|
|
|
|
+ ,dt
|
|
|
|
|
+ ,hh
|
|
|
|
|
+FROM t_exposure_base
|
|
|
|
|
+WHERE pagesource REGEXP "-pages/user-videos-share$"
|