|
|
@@ -0,0 +1,163 @@
|
|
|
+-- 品类命中分析 - 明细版本
|
|
|
+-- 输出每条记录的用户历史品类和频次,方便debug
|
|
|
+
|
|
|
+-- Step 1: 解析用户一级品类历史
|
|
|
+WITH user_cate1_exploded AS (
|
|
|
+ SELECT
|
|
|
+ mid,
|
|
|
+ get_json_object(json_obj, '$.na') AS history_cat1
|
|
|
+ FROM (
|
|
|
+ SELECT
|
|
|
+ mid,
|
|
|
+ json_piece AS json_obj
|
|
|
+ FROM loghubods.alg_recsys_feature_user_share_return_stat
|
|
|
+ LATERAL VIEW explode(
|
|
|
+ split(
|
|
|
+ regexp_replace(
|
|
|
+ regexp_replace(
|
|
|
+ regexp_replace(
|
|
|
+ get_json_object(feature, '$.c1_s'),
|
|
|
+ '\\\\\"', '\"'
|
|
|
+ ),
|
|
|
+ '^\\[|\\]$', ''
|
|
|
+ ),
|
|
|
+ '\\},\\{', '}|{'
|
|
|
+ ),
|
|
|
+ '\\|'
|
|
|
+ )
|
|
|
+ ) t AS json_piece
|
|
|
+ WHERE dt = '${end}'
|
|
|
+ AND get_json_object(feature, '$.c1_s') IS NOT NULL
|
|
|
+ AND get_json_object(feature, '$.c1_s') != '[]'
|
|
|
+ ) exploded
|
|
|
+ WHERE json_obj IS NOT NULL AND json_obj != ''
|
|
|
+)
|
|
|
+
|
|
|
+-- Step 2: 用户一级品类列表和频次
|
|
|
+,user_cate1_agg AS (
|
|
|
+ SELECT
|
|
|
+ mid,
|
|
|
+ collect_set(history_cat1) AS history_cat1_list,
|
|
|
+ concat_ws(',', collect_set(history_cat1)) AS 用户历史一级品类
|
|
|
+ FROM user_cate1_exploded
|
|
|
+ WHERE history_cat1 IS NOT NULL
|
|
|
+ GROUP BY mid
|
|
|
+)
|
|
|
+
|
|
|
+,user_cate1_freq AS (
|
|
|
+ SELECT
|
|
|
+ mid,
|
|
|
+ concat_ws('|', collect_list(cat_freq)) AS 用户历史一级品类频次
|
|
|
+ FROM (
|
|
|
+ SELECT mid, concat(history_cat1, ':', cast(count(*) as string)) AS cat_freq
|
|
|
+ FROM user_cate1_exploded
|
|
|
+ WHERE history_cat1 IS NOT NULL
|
|
|
+ GROUP BY mid, history_cat1
|
|
|
+ ) t
|
|
|
+ GROUP BY mid
|
|
|
+)
|
|
|
+
|
|
|
+-- Step 3: 解析用户二级品类历史
|
|
|
+,user_cate2_exploded AS (
|
|
|
+ SELECT
|
|
|
+ mid,
|
|
|
+ get_json_object(json_obj, '$.na') AS history_cat2
|
|
|
+ FROM (
|
|
|
+ SELECT
|
|
|
+ mid,
|
|
|
+ json_piece AS json_obj
|
|
|
+ FROM loghubods.alg_recsys_feature_user_share_return_stat
|
|
|
+ LATERAL VIEW explode(
|
|
|
+ split(
|
|
|
+ regexp_replace(
|
|
|
+ regexp_replace(
|
|
|
+ regexp_replace(
|
|
|
+ get_json_object(feature, '$.c2_s'),
|
|
|
+ '\\\\\"', '\"'
|
|
|
+ ),
|
|
|
+ '^\\[|\\]$', ''
|
|
|
+ ),
|
|
|
+ '\\},\\{', '}|{'
|
|
|
+ ),
|
|
|
+ '\\|'
|
|
|
+ )
|
|
|
+ ) t AS json_piece
|
|
|
+ WHERE dt = '${end}'
|
|
|
+ AND get_json_object(feature, '$.c2_s') IS NOT NULL
|
|
|
+ AND get_json_object(feature, '$.c2_s') != '[]'
|
|
|
+ ) exploded
|
|
|
+ WHERE json_obj IS NOT NULL AND json_obj != ''
|
|
|
+)
|
|
|
+
|
|
|
+-- Step 4: 用户二级品类列表和频次
|
|
|
+,user_cate2_agg AS (
|
|
|
+ SELECT
|
|
|
+ mid,
|
|
|
+ collect_set(history_cat2) AS history_cat2_list,
|
|
|
+ concat_ws(',', collect_set(history_cat2)) AS 用户历史二级品类
|
|
|
+ FROM user_cate2_exploded
|
|
|
+ WHERE history_cat2 IS NOT NULL
|
|
|
+ GROUP BY mid
|
|
|
+)
|
|
|
+
|
|
|
+,user_cate2_freq AS (
|
|
|
+ SELECT
|
|
|
+ mid,
|
|
|
+ concat_ws('|', collect_list(cat_freq)) AS 用户历史二级品类频次
|
|
|
+ FROM (
|
|
|
+ SELECT mid, concat(history_cat2, ':', cast(count(*) as string)) AS cat_freq
|
|
|
+ FROM user_cate2_exploded
|
|
|
+ WHERE history_cat2 IS NOT NULL
|
|
|
+ GROUP BY mid, history_cat2
|
|
|
+ ) t
|
|
|
+ GROUP BY mid
|
|
|
+)
|
|
|
+
|
|
|
+-- Step 5: 基础数据
|
|
|
+,base_data AS (
|
|
|
+ SELECT
|
|
|
+ dt,
|
|
|
+ channel,
|
|
|
+ mid,
|
|
|
+ 再分享merge一级品类 AS 再分享一级品类,
|
|
|
+ 再分享merge二级品类 AS 再分享二级品类,
|
|
|
+ 再分享群聊回流uv,
|
|
|
+ 再分享单聊回流uv,
|
|
|
+ 是否原视频
|
|
|
+ FROM loghubods.opengid_base_data
|
|
|
+ WHERE dt >= ${start}
|
|
|
+ AND dt <= ${end}
|
|
|
+ AND usersharedepth = 0
|
|
|
+ AND videoid IS NOT NULL
|
|
|
+)
|
|
|
+
|
|
|
+-- Step 6: 输出明细(含用户历史品类)
|
|
|
+SELECT
|
|
|
+ a.dt,
|
|
|
+ a.channel,
|
|
|
+ a.mid,
|
|
|
+ a.再分享一级品类,
|
|
|
+ a.再分享二级品类,
|
|
|
+ b.用户历史一级品类,
|
|
|
+ c.用户历史一级品类频次,
|
|
|
+ d.用户历史二级品类,
|
|
|
+ e.用户历史二级品类频次,
|
|
|
+ CASE
|
|
|
+ WHEN b.history_cat1_list IS NULL THEN '无历史'
|
|
|
+ WHEN array_contains(b.history_cat1_list, a.再分享一级品类) THEN '命中'
|
|
|
+ ELSE '未命中'
|
|
|
+ END AS 一级品类命中,
|
|
|
+ CASE
|
|
|
+ WHEN d.history_cat2_list IS NULL THEN '无历史'
|
|
|
+ WHEN array_contains(d.history_cat2_list, a.再分享二级品类) THEN '命中'
|
|
|
+ ELSE '未命中'
|
|
|
+ END AS 二级品类命中,
|
|
|
+ a.再分享群聊回流uv + a.再分享单聊回流uv AS 裂变uv,
|
|
|
+ a.是否原视频
|
|
|
+FROM base_data a
|
|
|
+LEFT JOIN user_cate1_agg b ON a.mid = b.mid
|
|
|
+LEFT JOIN user_cate1_freq c ON a.mid = c.mid
|
|
|
+LEFT JOIN user_cate2_agg d ON a.mid = d.mid
|
|
|
+LEFT JOIN user_cate2_freq e ON a.mid = e.mid
|
|
|
+ORDER BY a.dt, a.channel, a.mid
|
|
|
+;
|