new.md 5.6 KB

现在有一个推荐场景的问题, 我的推荐预估的是 str、ros, vor 是统计出来的,最终的排序公式是str * ros * vor = vov str用的是 fm 模型,ros用的是 xgb模型,vor 用的是 24 小时的统计量; 现在有一个问题,最近看到一些头部的 item,vov 低,但是给的曝光量很多,现在要你分析具体的原因; 我会给你一个 sql: exp 是曝光的意思;具体 sql 如下,请你基于以下 sql 来分析具体低vov高曝光的原因是什么?这个问题的影响面有哪些?怎么解决:

模型:

  1. fmRov = str
  2. fmRovOrigin = str采样还原前
  3. NorXGBScore = ros
  4. vor=vor 真实
    1. Str = str-plus-noself
    2. Ros = ros-minus-noself
    3. Rov = return-n-uv-noself / exp
    4. Vov = new-exposure-cnt / exp

WITH tab_base AS (

SELECT  *
        ,((0.059 * fmRovOrigin) / (1 - (1 - 0.059) * fmRovOrigin)) AS online_fmrov
        ,((0.036 * fmRovOrigin) / (1 - (1 - 0.036) * fmRovOrigin)) AS real_fmrov
FROM    (
            SELECT  dt
                    ,hh
                    ,vid
                    ,is_share
                    ,share_cnt
                    ,is_return_1
                    ,is_return_n
                    ,is_return_noself
                    ,return_1_uv
                    ,return_n_uv
                    ,return_n_uv_noself
                    ,new_exposure_cnt
                    ,CAST(score AS DOUBLE) AS score
                    ,CAST(GET_JSON_OBJECT(scoresmap,'$.fmRov') AS DOUBLE) AS fmRov
                    ,CAST(GET_JSON_OBJECT(scoresmap,'$.fmRovOrigin') AS DOUBLE) AS fmRovOrigin
                    ,CAST(GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') AS DOUBLE) AS NorXGBScore
                    ,CAST(GET_JSON_OBJECT(scoresmap,'$.vor') AS DOUBLE) AS vor
                    ,CAST(GET_JSON_OBJECT(scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS hasReturnRovScore
            FROM    (
                        SELECT  dt
                                ,hh
                                ,vid
                                ,is_share
                                ,share_cnt
                                ,is_return_1
                                ,is_return_n
                                ,is_return_noself
                                ,return_1_uv
                                ,return_n_uv
                                ,return_n_uv_noself
                                ,new_exposure_cnt
                                ,score
                                ,REPLACE(GET_JSON_OBJECT(extend_alg,'$.scoresMap'),"\\","") AS scoresmap
                        FROM    loghubods.dwd_recsys_alg_sample_all_20250212
                        WHERE   dt BETWEEN '${start_dt}' AND '${end_dt}'
                        AND     hh BETWEEN '${start_hh}' AND '${end_hh}'
                        AND     apptype = '${apptype}'
                        --AND     vid IN ('62421458','55931081','62955809','58807530') 
                        AND     vid IN ('62967014','63159658','62151288')
                        AND     extend_alg IS NOT NULL
                        AND     GET_JSON_OBJECT(extend_alg,'$.scoresMap') IS NOT NULL
                        AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页")
                        AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
                        AND     abcode NOT IN ("ab100")
                    ) 
            WHERE   GET_JSON_OBJECT(scoresmap,'$.fmRov') IS NOT NULL
            AND     GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') IS NOT NULL
            AND     GET_JSON_OBJECT(scoresmap,'$.vor') IS NOT NULL
            AND     GET_JSON_OBJECT(scoresmap,'$.hasReturnRovScore') IS NOT NULL
        ) 

) ,tab_pre AS (

SELECT  dt --,hh
        ,vid
        ,COUNT(1) AS cnt
        ,AVG(score) AS score
        ,AVG(fmRov) AS fmRov
        ,AVG(online_fmrov) AS online_fmrov
        ,AVG(real_fmrov) AS real_fmrov
        ,AVG(1.22 * pow(NorXGBScore,1.15)) AS NorXGBScore
        ,AVG(vor) AS vor
        ,AVG(hasReturnRovScore) AS hasReturnRovScore
        ,STDDEV(score) AS std_score
        ,STDDEV(fmRov) AS std_fmRov
        ,STDDEV(NorXGBScore) AS std_NorXGBScore
        ,STDDEV(vor) AS std_vor
        ,STDDEV(hasReturnRovScore) AS std_hasReturnRovScore
FROM    tab_base
GROUP BY dt -- ,hh
         ,vid

) ,tab_post AS (

SELECT  dt --,hh
        ,vid
        ,COUNT(1) AS exp
        ,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
        ,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
        ,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
        ,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
        ,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
        ,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
        ,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
        ,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
        ,round(COALESCE(SUM(is_return_noself) / COUNT(1),0),6) AS str_plus_noself
        ,round(COALESCE(SUM(return_n_uv_noself) / SUM(is_return_noself),0),6) AS ros_minus_noself
FROM    tab_base
GROUP BY dt -- ,hh
         ,vid

) SELECT t1.*

    ,t2.*

FROM tab_post t1 LEFT JOIN tab_pre t2 ON t1.dt = t2.dt --AND t1.hh = t2.hh AND t1.vid = t2.vid ORDER BY t1.dt,t1.vid ;