现在有一个推荐场景的问题, 我的推荐预估的是 str、ros, vor 是统计出来的,最终的排序公式是str * ros * vor = vov str用的是 fm 模型,ros用的是 xgb模型,vor 用的是 24 小时的统计量; 现在有一个问题,最近看到一些头部的 item,vov 低,但是给的曝光量很多,现在要你分析具体的原因; 我会给你一个 sql: 模型:
exp 是曝光的意思;具体 sql 如下,请你基于以下 sql 来分析具体低vov高曝光的原因是什么?这个问题的影响面有哪些?怎么解决:
WITH tab_base AS (
SELECT *
,((0.059 * fmRovOrigin) / (1 - (1 - 0.059) * fmRovOrigin)) AS online_fmrov
,((0.036 * fmRovOrigin) / (1 - (1 - 0.036) * fmRovOrigin)) AS real_fmrov
FROM (
SELECT dt
,hh
,vid
,is_share
,share_cnt
,is_return_1
,is_return_n
,is_return_noself
,return_1_uv
,return_n_uv
,return_n_uv_noself
,new_exposure_cnt
,CAST(score AS DOUBLE) AS score
,CAST(GET_JSON_OBJECT(scoresmap,'$.fmRov') AS DOUBLE) AS fmRov
,CAST(GET_JSON_OBJECT(scoresmap,'$.fmRovOrigin') AS DOUBLE) AS fmRovOrigin
,CAST(GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') AS DOUBLE) AS NorXGBScore
,CAST(GET_JSON_OBJECT(scoresmap,'$.vor') AS DOUBLE) AS vor
,CAST(GET_JSON_OBJECT(scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS hasReturnRovScore
FROM (
SELECT dt
,hh
,vid
,is_share
,share_cnt
,is_return_1
,is_return_n
,is_return_noself
,return_1_uv
,return_n_uv
,return_n_uv_noself
,new_exposure_cnt
,score
,REPLACE(GET_JSON_OBJECT(extend_alg,'$.scoresMap'),"\\","") AS scoresmap
FROM loghubods.dwd_recsys_alg_sample_all_20250212
WHERE dt BETWEEN '${start_dt}' AND '${end_dt}'
AND hh BETWEEN '${start_hh}' AND '${end_hh}'
AND apptype = '${apptype}'
--AND vid IN ('62421458','55931081','62955809','58807530')
AND vid IN ('62967014','63159658','62151288')
AND extend_alg IS NOT NULL
AND GET_JSON_OBJECT(extend_alg,'$.scoresMap') IS NOT NULL
AND page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页")
AND abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
AND abcode NOT IN ("ab100")
)
WHERE GET_JSON_OBJECT(scoresmap,'$.fmRov') IS NOT NULL
AND GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') IS NOT NULL
AND GET_JSON_OBJECT(scoresmap,'$.vor') IS NOT NULL
AND GET_JSON_OBJECT(scoresmap,'$.hasReturnRovScore') IS NOT NULL
)
) ,tab_pre AS (
SELECT dt --,hh
,vid
,COUNT(1) AS cnt
,AVG(score) AS score
,AVG(fmRov) AS fmRov
,AVG(online_fmrov) AS online_fmrov
,AVG(real_fmrov) AS real_fmrov
,AVG(1.22 * pow(NorXGBScore,1.15)) AS NorXGBScore
,AVG(vor) AS vor
,AVG(hasReturnRovScore) AS hasReturnRovScore
,STDDEV(score) AS std_score
,STDDEV(fmRov) AS std_fmRov
,STDDEV(NorXGBScore) AS std_NorXGBScore
,STDDEV(vor) AS std_vor
,STDDEV(hasReturnRovScore) AS std_hasReturnRovScore
FROM tab_base
GROUP BY dt -- ,hh
,vid
) ,tab_post AS (
SELECT dt --,hh
,vid
,COUNT(1) AS exp
,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
,round(COALESCE(SUM(is_return_noself) / COUNT(1),0),6) AS str_plus_noself
,round(COALESCE(SUM(return_n_uv_noself) / SUM(is_return_noself),0),6) AS ros_minus_noself
FROM tab_base
GROUP BY dt -- ,hh
,vid
) SELECT t1.*
,t2.*
FROM tab_post t1 LEFT JOIN tab_pre t2 ON t1.dt = t2.dt --AND t1.hh = t2.hh AND t1.vid = t2.vid ORDER BY t1.dt,t1.vid ;