Sfoglia il codice sorgente

feat(头部品类分析): 新增反向下钻和双向亲和度

- Tab 1 新增反向下钻:选择推荐品类,查看从哪些头部品类进入
- Tab 3 新增按列基准亲和度:推荐某品类时,从哪进入更好
- 更新亲和度说明,解释两种计算方式的业务含义

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
yangxiaohui 2 mesi fa
parent
commit
24f32b1938
1 ha cambiato i file con 337 aggiunte e 13 eliminazioni
  1. 337 13
      tasks/承接/头部品类与承接品类分析/visualize.py

+ 337 - 13
tasks/承接/头部品类与承接品类分析/visualize.py

@@ -233,6 +233,91 @@ def calc_head_drill_data(date=None):
 
     return {'heads': ['all'] + head_list, 'data': result}
 
+# 计算推荐品类下钻数据(反向:选择推荐品类,看从哪些头部品类进入)
+def calc_rec_drill_data(date=None):
+    ch_df = df.copy()
+    if date and date != 'all':
+        ch_df = ch_df[ch_df['dt'].astype(str) == str(date)]
+    if len(ch_df) == 0:
+        return None
+
+    agg = ch_df.groupby(['rec_cate2', 'crowd', 'head_cate2']).agg({
+        'exp': 'sum',
+        'share_cnt': 'sum',
+        'return_n_uv': 'sum',
+        'new_exposure_cnt': 'sum',
+    }).reset_index()
+
+    agg['str'] = agg['share_cnt'] / (agg['exp'] + 1)
+    agg['ros'] = agg['return_n_uv'] / (agg['share_cnt'] + 1)
+    agg['rovn'] = agg['return_n_uv'] / (agg['exp'] + 1)
+    agg['vov'] = agg['new_exposure_cnt'] / (agg['exp'] + 1)
+
+    result = {}
+
+    # all 选项(不区分推荐品类)
+    agg_all = ch_df.groupby(['crowd', 'head_cate2']).agg({
+        'exp': 'sum', 'share_cnt': 'sum', 'return_n_uv': 'sum', 'new_exposure_cnt': 'sum',
+    }).reset_index()
+    agg_all['str'] = agg_all['share_cnt'] / (agg_all['exp'] + 1)
+    agg_all['ros'] = agg_all['return_n_uv'] / (agg_all['share_cnt'] + 1)
+    agg_all['rovn'] = agg_all['return_n_uv'] / (agg_all['exp'] + 1)
+    agg_all['vov'] = agg_all['new_exposure_cnt'] / (agg_all['exp'] + 1)
+
+    result['all'] = {}
+    for crowd in crowd_list:
+        crowd_df = agg_all[agg_all['crowd'] == crowd]
+        result['all'][crowd] = {}
+        total_exp = int(crowd_df['exp'].sum())
+        total_share = crowd_df['share_cnt'].sum()
+        total_return = crowd_df['return_n_uv'].sum()
+        total_new_exp = crowd_df['new_exposure_cnt'].sum()
+        result['all'][crowd]['_total'] = {
+            'exp': total_exp,
+            'str': round(total_share / (total_exp + 1), 4),
+            'ros': round(total_return / (total_share + 1), 4),
+            'rovn': round(total_return / (total_exp + 1), 4),
+            'vov': round(total_new_exp / (total_exp + 1), 4),
+        }
+        for _, row in crowd_df.iterrows():
+            result['all'][crowd][row['head_cate2']] = {
+                'exp': int(row['exp']),
+                'str': round(row['str'], 4),
+                'ros': round(row['ros'], 4),
+                'rovn': round(row['rovn'], 4),
+                'vov': round(row['vov'], 4),
+            }
+
+    for rec_cate in agg['rec_cate2'].unique():
+        result[rec_cate] = {}
+        for crowd in crowd_list:
+            crowd_df = agg[(agg['rec_cate2'] == rec_cate) & (agg['crowd'] == crowd)]
+            result[rec_cate][crowd] = {}
+            total_exp = int(crowd_df['exp'].sum())
+            total_share = crowd_df['share_cnt'].sum()
+            total_return = crowd_df['return_n_uv'].sum()
+            total_new_exp = crowd_df['new_exposure_cnt'].sum()
+            result[rec_cate][crowd]['_total'] = {
+                'exp': total_exp,
+                'str': round(total_share / (total_exp + 1), 4),
+                'ros': round(total_return / (total_share + 1), 4),
+                'rovn': round(total_return / (total_exp + 1), 4),
+                'vov': round(total_new_exp / (total_exp + 1), 4),
+            }
+            for _, row in crowd_df.iterrows():
+                result[rec_cate][crowd][row['head_cate2']] = {
+                    'exp': int(row['exp']),
+                    'str': round(row['str'], 4),
+                    'ros': round(row['ros'], 4),
+                    'rovn': round(row['rovn'], 4),
+                    'vov': round(row['vov'], 4),
+                }
+
+    rec_exp = ch_df.groupby('rec_cate2')['exp'].sum().sort_values(ascending=False)
+    rec_list = rec_exp.index.tolist()
+
+    return {'recs': ['all'] + rec_list, 'data': result}
+
 # 预计算Tab1数据
 all_data = {}
 for crowd in crowd_list:
@@ -248,6 +333,13 @@ for dt in date_options:
     if drill:
         head_drill_data[dt] = drill
 
+# 反向下钻:按推荐品类
+rec_drill_data = {}
+for dt in date_options:
+    drill = calc_rec_drill_data(dt)
+    if drill:
+        rec_drill_data[dt] = drill
+
 # ========== Tab2: 品类一致性数据 ==========
 df_valid = df[~df['head_cate2'].isin(['headvideoid为空', '未匹配品类'])].copy()
 df_valid['is_same_cate'] = df_valid['head_cate2'] == df_valid['rec_cate2']
@@ -325,34 +417,45 @@ base_matrix = calc_affinity_matrix(df_valid, EXP_THRESHOLD_TOTAL)
 fixed_cate_list = base_matrix['rows'] if base_matrix else []
 
 def calc_affinity_matrix_fixed(data_df, exp_threshold, fixed_list):
+    # 按行基准(进入品类平均 vov)
     head_baseline = data_df.groupby('head_cate2').apply(
         lambda x: x['new_exposure_cnt'].sum() / x['exp'].sum(), include_groups=False
     ).to_dict()
+    # 按列基准(推荐品类平均 vov)
+    rec_baseline = data_df.groupby('rec_cate2').apply(
+        lambda x: x['new_exposure_cnt'].sum() / x['exp'].sum(), include_groups=False
+    ).to_dict()
 
     affinity_dict = {}
     for (head, rec), grp in data_df.groupby(['head_cate2', 'rec_cate2']):
         if grp['exp'].sum() >= exp_threshold:
             pair_vov = grp['new_exposure_cnt'].sum() / grp['exp'].sum()
-            baseline = head_baseline.get(head, 1)
-            affinity = pair_vov / baseline if baseline > 0 else 0
+            baseline_row = head_baseline.get(head, 1)
+            baseline_col = rec_baseline.get(rec, 1)
+            affinity = pair_vov / baseline_row if baseline_row > 0 else 0
+            affinity_col = pair_vov / baseline_col if baseline_col > 0 else 0
             affinity_dict[(head, rec)] = {
                 'vov': round(pair_vov, 4),
                 'affinity': round(affinity, 2),
+                'affinity_col': round(affinity_col, 2),
                 'exp': int(grp['exp'].sum())
             }
 
-    result = {'rows': fixed_list, 'cols': fixed_list, 'affinity': {}, 'vov': {}, 'exp': {}}
+    result = {'rows': fixed_list, 'cols': fixed_list, 'affinity': {}, 'affinity_col': {}, 'vov': {}, 'exp': {}}
     for head in fixed_list:
         result['affinity'][head] = {}
+        result['affinity_col'][head] = {}
         result['vov'][head] = {}
         result['exp'][head] = {}
         for rec in fixed_list:
             if (head, rec) in affinity_dict:
                 result['affinity'][head][rec] = float(affinity_dict[(head, rec)]['affinity'])
+                result['affinity_col'][head][rec] = float(affinity_dict[(head, rec)]['affinity_col'])
                 result['vov'][head][rec] = float(affinity_dict[(head, rec)]['vov'])
                 result['exp'][head][rec] = int(affinity_dict[(head, rec)]['exp'])
             else:
                 result['affinity'][head][rec] = 0
+                result['affinity_col'][head][rec] = 0
                 result['vov'][head][rec] = 0
                 result['exp'][head][rec] = 0
     return result
@@ -415,6 +518,7 @@ for date in date_list_aff:
 # 转为JSON
 data_json = json.dumps(all_data, ensure_ascii=False)
 head_drill_json = json.dumps(head_drill_data, ensure_ascii=False)
+rec_drill_json = json.dumps(rec_drill_data, ensure_ascii=False)
 crowd_list_json = json.dumps(crowd_list, ensure_ascii=False)
 dates_json = json.dumps(date_options)
 consistency_json = json.dumps(consistency_data, ensure_ascii=False)
@@ -643,6 +747,54 @@ html_content = f"""<!DOCTYPE html>
                 </div>
                 <div class="compare-section" id="drill-section"></div>
             </div>
+
+            <!-- 反向下钻表格 -->
+            <div style="margin-top: 30px; border-top: 2px solid #e0e0e0; padding-top: 20px;">
+                <h3 style="margin-bottom: 15px; font-size: 16px; color: #333;">推荐某品类后,用户是从哪些品类进入的?</h3>
+                <div class="controls">
+                    <div class="control-group">
+                        <label>推荐品类:</label>
+                        <select id="rec-drill-rec" onchange="updateRecDrill()"></select>
+                    </div>
+                    <div class="control-group">
+                        <label>排序:</label>
+                        <select id="rec-drill-sort" onchange="updateRecDrill()">
+                            <option value="exp" selected>exp</option>
+                            <option value="str">str</option>
+                            <option value="ros">ros</option>
+                            <option value="rovn">rovn</option>
+                            <option value="vov">vov</option>
+                        </select>
+                    </div>
+                    <div class="control-group">
+                        <label>展示:</label>
+                        <select id="rec-drill-metric" onchange="updateRecDrill()">
+                            <option value="exp">exp</option>
+                            <option value="str">str</option>
+                            <option value="ros">ros</option>
+                            <option value="rovn">rovn</option>
+                            <option value="vov" selected>vov</option>
+                        </select>
+                    </div>
+                    <div class="control-group">
+                        <label>Top:</label>
+                        <select id="rec-drill-topn" onchange="updateRecDrill()">
+                            <option value="5">5</option>
+                            <option value="10" selected>10</option>
+                            <option value="15">15</option>
+                            <option value="20">20</option>
+                        </select>
+                    </div>
+                    <div class="control-group date-switcher">
+                        <label>日期:</label>
+                        <button onclick="switchRecDrillDate(-1)">◀</button>
+                        <select id="rec-drill-date" onchange="initRecDrill()">{date_options_html}</select>
+                        <button onclick="switchRecDrillDate(1)">▶</button>
+                        <button id="rec-drill-play-btn" class="play-btn" onclick="toggleRecDrillPlay()">▶</button>
+                    </div>
+                </div>
+                <div class="compare-section" id="rec-drill-section"></div>
+            </div>
         </div>
 
         <!-- Tab 2: 品类一致性 -->
@@ -677,13 +829,14 @@ html_content = f"""<!DOCTYPE html>
         <!-- Tab 3: 品类亲和性矩阵 -->
         <div id="tab-affinity" class="tab-content">
             <div class="insight-box">
-                <h5>亲和性 = 这个组合的表现 / 进入品类的平均表现</h5>
+                <h5>两种亲和性计算方式</h5>
                 <p>
-                <strong>举例</strong>:用户从「搞笑段子」进入,平均裂变率 0.4<br>
-                • 推荐「搞笑段子→搞笑段子」裂变率 0.8,亲和性 = 0.8/0.4 = <span style="color:#2e7d32;font-weight:bold">2.0 ✓ 更对味</span><br>
-                • 推荐「搞笑段子→历史名人」裂变率 0.2,亲和性 = 0.2/0.4 = <span style="color:#c62828;font-weight:bold">0.5 ✗ 不对味</span><br><br>
-                <strong>颜色</strong>:<span style="background:#c8e6c9;padding:2px 6px;border-radius:3px">绿色=高亲和</span>
-                <span style="background:#ffcdd2;padding:2px 6px;border-radius:3px;margin-left:10px">红色=低亲和</span>
+                <strong>按行基准</strong>:组合 vov / 进入品类平均 vov → 「从A品类进入,推什么更好?」<br>
+                <span style="color:#666;font-size:12px">举例:用户从「搞笑」进入(平均vov 0.4),推「搞笑→美食」vov 0.8,亲和性=0.8/0.4=2.0</span><br><br>
+                <strong>按列基准</strong>:组合 vov / 推荐品类平均 vov → 「推B品类,从哪进入更好?」<br>
+                <span style="color:#666;font-size:12px">举例:推「美食」(平均vov 0.5),从「搞笑→美食」vov 0.8,亲和性=0.8/0.5=1.6</span><br><br>
+                <strong>颜色</strong>:<span style="background:#c8e6c9;padding:2px 6px;border-radius:3px">绿色=高亲和(>1)</span>
+                <span style="background:#ffcdd2;padding:2px 6px;border-radius:3px;margin-left:10px">红色=低亲和(<1)</span>
                 </p>
             </div>
 
@@ -707,7 +860,8 @@ html_content = f"""<!DOCTYPE html>
                 <div class="control-group">
                     <label>显示指标:</label>
                     <select id="aff-metric" onchange="updateAffMatrix()">
-                        <option value="affinity" selected>亲和性 (affinity)</option>
+                        <option value="affinity" selected>亲和性-按行 (从这品类进入,推什么更好)</option>
+                        <option value="affinity_col">亲和性-按列 (推这品类,从哪进入更好)</option>
                         <option value="vov">裂变率 (vov)</option>
                         <option value="exp">曝光量 (exp)</option>
                     </select>
@@ -764,6 +918,7 @@ html_content = f"""<!DOCTYPE html>
     // Data
     const allData = {data_json};
     const headDrillData = {head_drill_json};
+    const recDrillData = {rec_drill_json};
     const crowdList = {crowd_list_json};
     const dates = {dates_json};
     const consistencyData = {consistency_json};
@@ -772,7 +927,7 @@ html_content = f"""<!DOCTYPE html>
     const dateListAff = {date_list_aff_json};
 
     const crowdColors = {{ '内部': '#4CAF50', '外部0层': '#2196F3', '外部裂变': '#FF9800' }};
-    let playInterval1 = null, drillPlayInterval = null, affPlayInterval = null, rankPlayInterval = null;
+    let playInterval1 = null, drillPlayInterval = null, recDrillPlayInterval = null, affPlayInterval = null, rankPlayInterval = null;
     let currentRowOrder = null, currentColOrder = null;
     let sortState = {{ row: null, col: null, asc: true }};
     let lastCrowd = null, lastDate = null;
@@ -1145,6 +1300,173 @@ html_content = f"""<!DOCTYPE html>
         }}
     }}
 
+    // ========== 反向下钻:选择推荐品类,看从哪些头部品类进入 ==========
+    function initRecDrill() {{
+        const date = document.getElementById('rec-drill-date').value;
+        const recSelect = document.getElementById('rec-drill-rec');
+
+        if (!recDrillData[date]) {{
+            recSelect.innerHTML = '<option value="">无数据</option>';
+            return;
+        }}
+
+        const recs = recDrillData[date].recs;
+        recSelect.innerHTML = recs.map((r, i) => {{
+            const label = r === 'all' ? '全部(不区分推荐品类)' : `#${{i}} ${{r}}`;
+            return `<option value="${{r}}">${{label}}</option>`;
+        }}).join('');
+
+        updateRecDrill();
+    }}
+
+    function updateRecDrill() {{
+        const date = document.getElementById('rec-drill-date').value;
+        const recCate = document.getElementById('rec-drill-rec').value;
+        const sortBy = document.getElementById('rec-drill-sort').value;
+        const showMetric = document.getElementById('rec-drill-metric').value;
+        const topN = parseInt(document.getElementById('rec-drill-topn').value);
+
+        if (!recDrillData[date] || !recCate) {{
+            document.getElementById('rec-drill-section').innerHTML = '<p>无数据</p>';
+            return;
+        }}
+
+        const data = recDrillData[date].data[recCate];
+        if (!data) {{
+            document.getElementById('rec-drill-section').innerHTML = '<p>该推荐品类无数据</p>';
+            return;
+        }}
+
+        const crowdTopN = {{}};
+        const crowdTotal = {{}};
+        crowdList.forEach(crowd => {{
+            const items = [];
+            if (data[crowd]) {{
+                for (const cat in data[crowd]) {{
+                    if (cat === '_total') {{
+                        crowdTotal[crowd] = {{ exp: data[crowd][cat].exp || 0, showVal: data[crowd][cat][showMetric] || 0 }};
+                    }} else {{
+                        items.push({{
+                            cat: cat,
+                            sortVal: data[crowd][cat][sortBy] || 0,
+                            showVal: data[crowd][cat][showMetric] || 0,
+                            exp: data[crowd][cat].exp || 0
+                        }});
+                    }}
+                }}
+            }}
+            items.sort((a, b) => b.sortVal - a.sortVal);
+            crowdTopN[crowd] = items.slice(0, topN);
+        }});
+
+        const allCats = new Set();
+        crowdList.forEach(crowd => {{ crowdTopN[crowd].forEach(item => allCats.add(item.cat)); }});
+        const catList = Array.from(allCats);
+
+        const catColors = {{}};
+        const colorPalette = ['#FFCDD2', '#F8BBD0', '#E1BEE7', '#D1C4E9', '#C5CAE9', '#BBDEFB', '#B3E5FC', '#B2EBF2', '#B2DFDB', '#C8E6C9', '#DCEDC8', '#F0F4C3', '#FFF9C4', '#FFECB3', '#FFE0B2', '#FFCCBC', '#D7CCC8', '#CFD8DC', '#BCAAA4', '#B0BEC5'];
+        catList.forEach((cat, i) => {{ catColors[cat] = colorPalette[i % colorPalette.length]; }});
+
+        let maxVal = 0, minVal = Infinity;
+        crowdList.forEach(crowd => {{
+            crowdTopN[crowd].forEach(item => {{
+                if (item.showVal > maxVal) maxVal = item.showVal;
+                if (item.showVal < minVal) minVal = item.showVal;
+            }});
+        }});
+        if (minVal === Infinity) minVal = 0;
+
+        function getValueColor(val) {{
+            if (maxVal === minVal) return '#C8E6C9';
+            const ratio = (val - minVal) / (maxVal - minVal);
+            const r = Math.round(200 - ratio * 120);
+            const g = Math.round(230 - ratio * 80);
+            const b = Math.round(201 - ratio * 120);
+            return `rgb(${{r}},${{g}},${{b}})`;
+        }}
+
+        let html = '';
+        crowdList.forEach(crowd => {{
+            const colSpan = showMetric === 'exp' ? 3 : 4;
+            html += `<div class="crowd-block">
+                <table>
+                    <thead>
+                        <tr><th colspan="${{colSpan}}" style="background:${{crowdColors[crowd]}};color:white">${{crowd}}</th></tr>
+                        <tr><th class="rn">rn</th><th>进入品类</th><th>exp</th>${{showMetric !== 'exp' ? `<th>${{showMetric}}</th>` : ''}}</tr>
+                    </thead>
+                    <tbody>`;
+
+            if (crowdTopN[crowd].length === 0) {{
+                html += `<tr><td colspan="${{colSpan}}" style="color:#999">无数据</td></tr>`;
+            }} else {{
+                if (crowdTotal[crowd]) {{
+                    const totalExp = parseInt(crowdTotal[crowd].exp).toLocaleString();
+                    const totalMetric = (crowdTotal[crowd].showVal * 100).toFixed(1) + '%';
+                    html += `<tr style="background:#f5f5f5;font-weight:bold">
+                        <td class="rn">0</td>
+                        <td class="cat" style="background:#e0e0e0">整体</td>
+                        <td class="val">${{totalExp}}</td>
+                        ${{showMetric !== 'exp' ? `<td class="val">${{totalMetric}}</td>` : ''}}
+                    </tr>`;
+                }}
+                crowdTopN[crowd].forEach((item, i) => {{
+                    const expDisplay = parseInt(item.exp).toLocaleString();
+                    const metricDisplay = (item.showVal * 100).toFixed(1) + '%';
+                    const valColor = getValueColor(item.showVal);
+                    const catColor = catColors[item.cat];
+                    const catAttr = item.cat.replace(/"/g, '&quot;');
+                    html += `<tr>
+                        <td class="rn">${{i + 1}}</td>
+                        <td class="cat" style="background:${{catColor}}" data-cat="${{catAttr}}" onmouseenter="highlightCat(this)" onmouseleave="unhighlightCat()">${{item.cat}}</td>
+                        <td class="val">${{expDisplay}}</td>
+                        ${{showMetric !== 'exp' ? `<td class="val" style="background:${{valColor}}">${{metricDisplay}}</td>` : ''}}
+                    </tr>`;
+                }});
+            }}
+            html += `</tbody></table></div>`;
+        }});
+
+        document.getElementById('rec-drill-section').innerHTML = html;
+    }}
+
+    function switchRecDrillDate(delta) {{
+        const select = document.getElementById('rec-drill-date');
+        const idx = dates.indexOf(select.value);
+        const newIdx = idx + delta;
+        if (newIdx >= 0 && newIdx < dates.length) {{
+            select.value = dates[newIdx];
+            initRecDrill();
+        }}
+    }}
+
+    function toggleRecDrillPlay() {{
+        const btn = document.getElementById('rec-drill-play-btn');
+        if (recDrillPlayInterval) {{
+            clearInterval(recDrillPlayInterval);
+            recDrillPlayInterval = null;
+            btn.classList.remove('playing');
+            btn.textContent = '▶';
+        }} else {{
+            btn.classList.add('playing');
+            btn.textContent = '⏸';
+            let idx = 0;
+            const play = () => {{
+                if (idx >= dates.length) {{
+                    clearInterval(recDrillPlayInterval);
+                    recDrillPlayInterval = null;
+                    btn.classList.remove('playing');
+                    btn.textContent = '▶';
+                    return;
+                }}
+                document.getElementById('rec-drill-date').value = dates[idx];
+                initRecDrill();
+                idx++;
+            }};
+            play();
+            recDrillPlayInterval = setInterval(play, 1500);
+        }}
+    }}
+
     function highlightCat(el) {{
         const cat = el.getAttribute('data-cat');
         document.querySelectorAll('.cat[data-cat]').forEach(cell => {{
@@ -1229,7 +1551,8 @@ html_content = f"""<!DOCTYPE html>
         }}));
 
         let maxVal, minVal = 0;
-        if (metric === 'affinity') {{
+        const isAffinity = (metric === 'affinity' || metric === 'affinity_col');
+        if (isAffinity) {{
             maxVal = 2; minVal = 0.5;
         }} else if (metric === 'vov') {{
             allVals.sort((a, b) => a - b);
@@ -1240,7 +1563,7 @@ html_content = f"""<!DOCTYPE html>
         }}
 
         function getColor(val) {{
-            if (metric === 'affinity') {{
+            if (isAffinity) {{
                 if (val >= 1) {{
                     const ratio = Math.min((val - 1) / (maxVal - 1), 1);
                     return `rgb(${{Math.round(200 - ratio * 200)}}, ${{Math.round(230 - ratio * 30)}}, ${{Math.round(200 - ratio * 200)}})`;
@@ -1425,6 +1748,7 @@ html_content = f"""<!DOCTYPE html>
     // Initialize
     updateMatrix1();
     initHeadDrill();
+    initRecDrill();
     initConsistency();
     updateAffMatrix();
     initRanking();