Просмотр исходного кода

feat(头部品类分析): 区分headvideoid为空情况,增加整体汇总行

- SQL区分三种情况:headvideoid为空、未关联头部、实际品类
- 下钻表格增加rn=0整体汇总行,展示各人群整体指标
- 新增headvideoid分布子查询,分析空值占比(约13-20%)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
yangxiaohui 2 месяцев назад
Родитель
Сommit
612add6082

+ 50 - 0
tasks/人群品类曝光分析/头部品类分析/headvideoid分布/query.sql

@@ -0,0 +1,50 @@
+-- 分析 headvideoid 为空的占比
+WITH t_rec AS (
+    SELECT  dt
+            ,mid
+            ,subsessionid
+            ,headvideoid
+            ,CASE WHEN rootsourceid = '' OR rootsourceid IS NULL THEN '内部' ELSE '外部' END AS in_out
+            ,GET_JSON_OBJECT(extend,"$.extParams.userShareDepth") AS layer
+            ,page
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page_rec
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   dt BETWEEN "${start}" AND "${end}"
+    AND     apptype IN ('4','0')
+    AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+)
+,t_agg AS (
+    SELECT  dt
+            ,CASE   WHEN in_out = '内部' THEN '内部'
+                    WHEN layer = '0' THEN '外部0层'
+                    WHEN CAST(layer AS INT) > 0 THEN '外部裂变'
+                    ELSE '其他'
+            END AS crowd
+            ,CASE   WHEN headvideoid IS NULL OR headvideoid = '' THEN 'headvideoid为空'
+                    ELSE 'headvideoid有值'
+            END AS headvid_status
+            ,COUNT(1) AS cnt
+    FROM    t_rec
+    WHERE   page_rec = '推荐'
+    GROUP BY dt
+            ,CASE   WHEN in_out = '内部' THEN '内部'
+                    WHEN layer = '0' THEN '外部0层'
+                    WHEN CAST(layer AS INT) > 0 THEN '外部裂变'
+                    ELSE '其他'
+            END
+            ,CASE   WHEN headvideoid IS NULL OR headvideoid = '' THEN 'headvideoid为空'
+                    ELSE 'headvideoid有值'
+            END
+)
+SELECT  dt
+        ,crowd
+        ,headvid_status
+        ,cnt
+        ,round(cnt / SUM(cnt) OVER (PARTITION BY dt, crowd), 4) AS pct
+FROM    t_agg
+WHERE   crowd <> '其他'
+ORDER BY dt DESC, crowd, headvid_status
+;

+ 3 - 1
tasks/人群品类曝光分析/头部品类分析/query.sql

@@ -41,6 +41,7 @@ WITH t_head AS (
     SELECT  r.dt
             ,r.mid
             ,r.subsessionid
+            ,r.headvideoid
             ,r.rec_vid
             ,r.ts
             ,r.rec_in_out
@@ -78,7 +79,8 @@ WITH t_head AS (
             END AS crowd
             ,head_vid
             ,head_cate1
-            ,CASE   WHEN head_vid IS NULL THEN '未关联头部'
+            ,CASE   WHEN headvideoid IS NULL OR headvideoid = '' THEN 'headvideoid为空'
+                    WHEN head_vid IS NULL THEN '未关联头部'
                     WHEN head_cate2 IS NULL OR head_cate2 = '' THEN 'unknown'
                     ELSE head_cate2
             END AS head_cate2

+ 52 - 7
tasks/人群品类曝光分析/头部品类分析/visualize.py

@@ -133,6 +133,18 @@ def calc_head_drill_data(date=None):
     for crowd in crowd_list:
         crowd_df = agg_all[agg_all['crowd'] == crowd]
         result['all'][crowd] = {}
+        # 计算整体汇总
+        total_exp = int(crowd_df['exp'].sum())
+        total_share = crowd_df['share_cnt'].sum()
+        total_return = crowd_df['return_n_uv'].sum()
+        total_new_exp = crowd_df['new_exposure_cnt'].sum()
+        result['all'][crowd]['_total'] = {
+            'exp': total_exp,
+            'str': round(total_share / (total_exp + 1), 4),
+            'ros': round(total_return / (total_share + 1), 4),
+            'rovn': round(total_return / (total_exp + 1), 4),
+            'vov': round(total_new_exp / (total_exp + 1), 4),
+        }
         for _, row in crowd_df.iterrows():
             result['all'][crowd][row['rec_cate2']] = {
                 'exp': int(row['exp']),
@@ -148,6 +160,18 @@ def calc_head_drill_data(date=None):
         for crowd in crowd_list:
             crowd_df = agg[(agg['head_cate2'] == head_cate) & (agg['crowd'] == crowd)]
             result[head_cate][crowd] = {}
+            # 计算该头部品类下的整体汇总
+            total_exp = int(crowd_df['exp'].sum())
+            total_share = crowd_df['share_cnt'].sum()
+            total_return = crowd_df['return_n_uv'].sum()
+            total_new_exp = crowd_df['new_exposure_cnt'].sum()
+            result[head_cate][crowd]['_total'] = {
+                'exp': total_exp,
+                'str': round(total_share / (total_exp + 1), 4),
+                'ros': round(total_return / (total_share + 1), 4),
+                'rovn': round(total_return / (total_exp + 1), 4),
+                'vov': round(total_new_exp / (total_exp + 1), 4),
+            }
             for _, row in crowd_df.iterrows():
                 result[head_cate][crowd][row['rec_cate2']] = {
                     'exp': int(row['exp']),
@@ -706,18 +730,27 @@ html_content = f"""<!DOCTYPE html>
             return;
         }}
 
-        // 为每个人群计算 Top N
+        // 为每个人群计算 Top N 和整体汇总
         const crowdTopN = {{}};
+        const crowdTotal = {{}};
         crowdList.forEach(crowd => {{
             const items = [];
             if (data[crowd]) {{
                 for (const cat in data[crowd]) {{
-                    items.push({{
-                        cat: cat,
-                        sortVal: data[crowd][cat][sortBy] || 0,
-                        showVal: data[crowd][cat][showMetric] || 0,
-                        exp: data[crowd][cat].exp || 0
-                    }});
+                    if (cat === '_total') {{
+                        // 保存整体汇总
+                        crowdTotal[crowd] = {{
+                            exp: data[crowd][cat].exp || 0,
+                            showVal: data[crowd][cat][showMetric] || 0
+                        }};
+                    }} else {{
+                        items.push({{
+                            cat: cat,
+                            sortVal: data[crowd][cat][sortBy] || 0,
+                            showVal: data[crowd][cat][showMetric] || 0,
+                            exp: data[crowd][cat].exp || 0
+                        }});
+                    }}
                 }}
             }}
             items.sort((a, b) => b.sortVal - a.sortVal);
@@ -776,6 +809,18 @@ html_content = f"""<!DOCTYPE html>
             if (crowdTopN[crowd].length === 0) {{
                 html += `<tr><td colspan="${{colSpan}}" style="color:#999">无数据</td></tr>`;
             }} else {{
+                // 先添加整体汇总行 (rn=0)
+                if (crowdTotal[crowd]) {{
+                    const totalExp = parseInt(crowdTotal[crowd].exp).toLocaleString();
+                    const totalMetric = (crowdTotal[crowd].showVal * 100).toFixed(1) + '%';
+                    html += `<tr style="background:#f5f5f5;font-weight:bold">
+                        <td class="rn">0</td>
+                        <td class="cat" style="background:#e0e0e0">整体</td>
+                        <td class="val">${{totalExp}}</td>
+                        ${{showMetric !== 'exp' ? `<td class="val">${{totalMetric}}</td>` : ''}}
+                    </tr>`;
+                }}
+                // 添加 Top N 品类
                 crowdTopN[crowd].forEach((item, i) => {{
                     const expDisplay = parseInt(item.exp).toLocaleString();
                     const metricDisplay = (item.showVal * 100).toFixed(1) + '%';