Просмотр исходного кода

fix(渠道效果分析): 修复可视化脚本并增加多矩阵视图

- 修复f-string语法错误
- 增加回流率、点击UV、进入推荐率三个矩阵视图
- 每个矩阵带颜色编码图例

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
yangxiaohui 2 месяцев назад
Родитель
Сommit
ee5b55915f
2 измененных файлов с 251 добавлено и 130 удалено
  1. 1 1
      tables/loghubods/opengid_base_data.txt
  2. 250 129
      tasks/渠道效果分析/visualize.py

+ 1 - 1
tables/loghubods/opengid_base_data.txt

@@ -1,7 +1,7 @@
 表名: loghubods.opengid_base_data
 注释: (无)
 创建时间: 2025-10-28 16:34:36
-最后修改: 2026-01-05 05:09:17
+最后修改: 2026-01-05 13:38:09
 
 ============================================================
 字段名                            类型              注释

+ 250 - 129
tasks/渠道效果分析/visualize.py

@@ -2,19 +2,17 @@
 # coding=utf-8
 """
 渠道效果分析可视化
-输出交互式 HTML 报告
+输出交互式 HTML 报告,包含多个矩阵视图
 """
 import pandas as pd
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
+import json
 from pathlib import Path
 
 task_dir = Path(__file__).parent
 output_dir = task_dir / "output"
 
-# 找到最新的原始数据文件(排除分析结果文件)
-csv_files = [f for f in output_dir.glob("*.csv")
-             if f.stem.count('_') == 1]  # 只保留 日期_日期.csv 格式(只有1个下划线)
+# 找到最新的原始数据文件
+csv_files = [f for f in output_dir.glob("*.csv") if f.stem.count('_') == 1]
 if not csv_files:
     print("没有找到数据文件,请先运行 query.sql")
     exit(1)
@@ -41,7 +39,7 @@ for ch in channel_stats['channel']:
     channel_stats.loc[channel_stats['channel'] == ch, '进入推荐率'] = (ch_df['进入推荐率'] * ch_df['点击uv']).sum() / uv
     channel_stats.loc[channel_stats['channel'] == ch, '再分享回流率'] = (ch_df['再分享回流率'] * ch_df['点击uv']).sum() / uv
 
-channel_stats = channel_stats.sort_values('点击uv', ascending=True)
+channel_stats = channel_stats.sort_values('点击uv', ascending=False)
 
 # 渠道×品类
 channel_category = df.groupby(['channel', 'merge一级品类']).apply(
@@ -63,131 +61,144 @@ daily = df.groupby(['dt', 'channel']).apply(
 ).reset_index()
 
 # ============================================================
-# 创建可视化
+# 准备图表数据
 # ============================================================
 
-# 1. 渠道整体表现 - 横向条形图
-fig1 = make_subplots(rows=1, cols=2, subplot_titles=('点击UV', '再分享回流率'),
-                     specs=[[{"type": "bar"}, {"type": "bar"}]])
-
-fig1.add_trace(
-    go.Bar(y=channel_stats['channel'], x=channel_stats['点击uv'],
-           orientation='h', name='点击UV', marker_color='steelblue'),
-    row=1, col=1
-)
-
-fig1.add_trace(
-    go.Bar(y=channel_stats['channel'], x=channel_stats['再分享回流率'],
-           orientation='h', name='回流率', marker_color='coral',
-           text=[f"{x:.1%}" for x in channel_stats['再分享回流率']], textposition='outside'),
-    row=1, col=2
-)
-
-fig1.update_layout(height=500, title_text="渠道整体表现", showlegend=False)
-fig1.update_xaxes(tickformat=',', row=1, col=1)
-fig1.update_xaxes(tickformat='.0%', row=1, col=2)
-
-# 2. 渠道×品类热力图
-main_channels = channel_stats.nlargest(8, '点击uv')['channel'].tolist()
+# 主要渠道(UV > 10000)
+main_channels = channel_stats[channel_stats['点击uv'] > 10000]['channel'].tolist()[:8]
 valid_categories = pivot_uv[pivot_uv.sum(axis=1) >= 1000].index.tolist()
-
-heatmap_data = pivot_ror.loc[valid_categories, [c for c in main_channels if c in pivot_ror.columns]]
-
-fig2 = go.Figure(data=go.Heatmap(
-    z=heatmap_data.values,
-    x=[c[:12] for c in heatmap_data.columns],
-    y=heatmap_data.index,
-    colorscale='RdYlGn',
-    text=[[f"{v:.1%}" if pd.notna(v) else "" for v in row] for row in heatmap_data.values],
-    texttemplate="%{text}",
-    textfont={"size": 10},
-    hovertemplate="渠道: %{x}<br>品类: %{y}<br>回流率: %{z:.1%}<extra></extra>"
-))
-
-fig2.update_layout(
-    title="渠道×品类 回流率热力图",
-    height=600,
-    xaxis_title="渠道",
-    yaxis_title="品类"
-)
-
-# 3. 每日趋势折线图
-fig3 = go.Figure()
-
-top_channels = channel_stats.nlargest(6, '点击uv')['channel'].tolist()
-colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
-
-for i, ch in enumerate(top_channels):
+heatmap_cols = [c for c in main_channels if c in pivot_ror.columns]
+
+# 1. 渠道表格行
+channel_rows = []
+for _, row in channel_stats.iterrows():
+    bar_width = min(row['再分享回流率'] * 200, 100)
+    channel_rows.append(
+        f"<tr><td>{row['channel']}</td>"
+        f"<td>{int(row['点击uv']):,}</td>"
+        f"<td>{row['再分享回流率']:.1%}</td>"
+        f"<td><div style='background:#007bff;height:20px;width:{bar_width}%'></div></td></tr>"
+    )
+
+# 2. 回流率热力图
+def get_cell_class(val):
+    if val is None:
+        return ""
+    if val > 30:
+        return "high"
+    if val > 15:
+        return "medium"
+    return "low"
+
+ror_header = "<tr><th>品类</th>" + "".join([f"<th>{c[:10]}</th>" for c in heatmap_cols]) + "</tr>"
+ror_rows = []
+for cat in valid_categories:
+    cells = [f"<td>{str(cat)[:12]}</td>"]
+    for ch in heatmap_cols:
+        if ch in pivot_ror.columns and pd.notna(pivot_ror.loc[cat, ch]):
+            val = pivot_ror.loc[cat, ch] * 100
+            cls = get_cell_class(val)
+            cells.append(f'<td class="{cls}">{val:.1f}%</td>')
+        else:
+            cells.append("<td>-</td>")
+    ror_rows.append("<tr>" + "".join(cells) + "</tr>")
+
+# 3. UV分布热力图
+def get_uv_class(val):
+    if val >= 50000:
+        return "high"
+    if val >= 10000:
+        return "medium"
+    if val >= 1000:
+        return "low"
+    return ""
+
+uv_header = "<tr><th>品类</th>" + "".join([f"<th>{c[:10]}</th>" for c in heatmap_cols]) + "</tr>"
+uv_rows = []
+for cat in valid_categories:
+    cells = [f"<td>{str(cat)[:12]}</td>"]
+    for ch in heatmap_cols:
+        if ch in pivot_uv.columns:
+            val = pivot_uv.loc[cat, ch]
+            if val > 0:
+                cls = get_uv_class(val)
+                cells.append(f'<td class="{cls}">{int(val):,}</td>')
+            else:
+                cells.append("<td>-</td>")
+        else:
+            cells.append("<td>-</td>")
+    uv_rows.append("<tr>" + "".join(cells) + "</tr>")
+
+# 4. 进入推荐率热力图
+pivot_recommend = df.groupby(['channel', 'merge一级品类']).apply(
+    lambda x: (x['进入推荐率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0,
+    include_groups=False
+).unstack()
+
+def get_recommend_class(val):
+    if val >= 0.8:
+        return "high"
+    if val >= 0.7:
+        return "medium"
+    return "low"
+
+recommend_rows = []
+for cat in valid_categories:
+    cells = [f"<td>{str(cat)[:12]}</td>"]
+    for ch in heatmap_cols:
+        if ch in pivot_recommend.columns and cat in pivot_recommend.index and pd.notna(pivot_recommend.loc[cat, ch]):
+            val = pivot_recommend.loc[cat, ch]
+            cls = get_recommend_class(val)
+            cells.append(f'<td class="{cls}">{val:.1%}</td>')
+        else:
+            cells.append("<td>-</td>")
+    recommend_rows.append("<tr>" + "".join(cells) + "</tr>")
+
+# 5. 趋势数据
+top_channels = channel_stats.head(6)['channel'].tolist()
+trend_data = {}
+for ch in top_channels:
     ch_daily = daily[daily['channel'] == ch].sort_values('dt')
-    fig3.add_trace(go.Scatter(
-        x=ch_daily['dt'].astype(str),
-        y=ch_daily['回流率'],
-        mode='lines+markers',
-        name=ch[:15],
-        line=dict(color=colors[i % len(colors)]),
-        hovertemplate=f"{ch}<br>日期: %{{x}}<br>回流率: %{{y:.1%}}<extra></extra>"
-    ))
-
-fig3.update_layout(
-    title="渠道每日回流率趋势",
-    height=400,
-    xaxis_title="日期",
-    yaxis_title="回流率",
-    yaxis_tickformat='.0%',
-    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
-)
-
-# 4. UV vs 回流率散点图
-fig4 = go.Figure()
-
-fig4.add_trace(go.Scatter(
-    x=channel_category['点击uv'],
-    y=channel_category['回流率'],
-    mode='markers',
-    marker=dict(
-        size=8,
-        color=channel_category['回流率'],
-        colorscale='Viridis',
-        showscale=True,
-        colorbar=dict(title="回流率")
-    ),
-    text=channel_category.apply(lambda r: f"{r['channel'][:10]}<br>{r['merge一级品类']}", axis=1),
-    hovertemplate="%{text}<br>UV: %{x:,}<br>回流率: %{y:.1%}<extra></extra>"
-))
-
-fig4.update_layout(
-    title="渠道×品类 UV vs 回流率 (寻找高UV高回流组合)",
-    height=500,
-    xaxis_title="点击UV",
-    yaxis_title="回流率",
-    xaxis_type="log",
-    yaxis_tickformat='.0%'
-)
-
-# 添加参考线
-median_ror = channel_category['回流率'].median()
-fig4.add_hline(y=median_ror, line_dash="dash", line_color="gray",
-               annotation_text=f"中位数: {median_ror:.1%}")
+    trend_data[ch] = {
+        'dates': [str(d) for d in ch_daily['dt'].tolist()],
+        'values': [round(x * 100, 1) for x in ch_daily['回流率'].tolist()]
+    }
+
+# 6. 散点图数据
+scatter_data = []
+for _, row in channel_category.iterrows():
+    if row['点击uv'] >= 100:
+        scatter_data.append({
+            'x': int(row['点击uv']),
+            'y': round(row['回流率'] * 100, 1),
+            'channel': row['channel'][:12],
+            'category': str(row['merge一级品类'])[:10] if pd.notna(row['merge一级品类']) else ''
+        })
 
 # ============================================================
 # 生成 HTML
 # ============================================================
 
-html_content = f"""
-<!DOCTYPE html>
+total_uv = int(df['点击uv'].sum())
+avg_ror = (df['再分享回流率'] * df['点击uv']).sum() / df['点击uv'].sum()
+channel_count = df['channel'].nunique()
+category_count = df['merge一级品类'].nunique()
+date_range = f"{df['dt'].min()} ~ {df['dt'].max()}"
+
+html_content = f"""<!DOCTYPE html>
 <html>
 <head>
     <meta charset="utf-8">
     <title>渠道效果分析报告</title>
-    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
     <style>
         body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
                margin: 20px; background: #f5f5f5; }}
         .container {{ max-width: 1400px; margin: 0 auto; }}
         h1 {{ color: #333; border-bottom: 2px solid #007bff; padding-bottom: 10px; }}
         h2 {{ color: #555; margin-top: 30px; }}
-        .chart {{ background: white; padding: 20px; margin: 20px 0;
-                  border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
+        .chart-container {{ background: white; padding: 20px; margin: 20px 0;
+                           border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
         .summary {{ background: white; padding: 20px; margin: 20px 0;
                    border-radius: 8px; display: flex; gap: 20px; flex-wrap: wrap; }}
         .stat-card {{ flex: 1; min-width: 150px; padding: 15px;
@@ -195,50 +206,159 @@ html_content = f"""
                      border-radius: 8px; color: white; text-align: center; }}
         .stat-card h3 {{ margin: 0; font-size: 24px; }}
         .stat-card p {{ margin: 5px 0 0; opacity: 0.9; }}
+        table {{ width: 100%; border-collapse: collapse; margin: 10px 0; }}
+        th, td {{ padding: 8px 12px; text-align: left; border-bottom: 1px solid #ddd; }}
+        th {{ background: #f8f9fa; font-weight: 600; position: sticky; top: 0; }}
+        tr:hover {{ background: #f5f5f5; }}
+        .heatmap {{ overflow-x: auto; }}
+        .heatmap table {{ font-size: 12px; }}
+        .heatmap td {{ text-align: center; min-width: 80px; }}
+        .high {{ background: #28a745; color: white; }}
+        .medium {{ background: #ffc107; }}
+        .low {{ background: #dc3545; color: white; }}
+        canvas {{ max-height: 400px; }}
+        .matrix-section {{ margin-bottom: 40px; }}
+        .legend {{ font-size: 12px; margin: 10px 0; }}
+        .legend span {{ padding: 2px 8px; margin-right: 10px; border-radius: 3px; }}
     </style>
 </head>
 <body>
     <div class="container">
         <h1>渠道效果分析报告</h1>
-        <p>数据范围: {df['dt'].min()} ~ {df['dt'].max()}</p>
+        <p>数据范围: {date_range}</p>
 
         <div class="summary">
             <div class="stat-card">
-                <h3>{int(df['点击uv'].sum()):,}</h3>
+                <h3>{total_uv:,}</h3>
                 <p>总点击UV</p>
             </div>
             <div class="stat-card">
-                <h3>{(df['再分享回流率'] * df['点击uv']).sum() / df['点击uv'].sum():.1%}</h3>
+                <h3>{avg_ror:.1%}</h3>
                 <p>平均回流率</p>
             </div>
             <div class="stat-card">
-                <h3>{df['channel'].nunique()}</h3>
+                <h3>{channel_count}</h3>
                 <p>渠道数</p>
             </div>
             <div class="stat-card">
-                <h3>{df['merge一级品类'].nunique()}</h3>
+                <h3>{category_count}</h3>
                 <p>品类数</p>
             </div>
         </div>
 
         <h2>1. 渠道整体表现</h2>
-        <div class="chart" id="chart1"></div>
+        <div class="chart-container">
+            <table>
+                <tr><th>渠道</th><th>点击UV</th><th>回流率</th><th>可视化</th></tr>
+                {"".join(channel_rows)}
+            </table>
+        </div>
 
-        <h2>2. 渠道×品类 回流率热力图</h2>
-        <div class="chart" id="chart2"></div>
+        <h2>2. 渠道×品类 回流率矩阵</h2>
+        <div class="chart-container heatmap matrix-section">
+            <div class="legend">
+                <span class="high">高 &gt;30%</span>
+                <span class="medium">中 15-30%</span>
+                <span class="low">低 &lt;15%</span>
+            </div>
+            <table>
+                {ror_header}
+                {"".join(ror_rows)}
+            </table>
+        </div>
 
-        <h2>3. 每日回流率趋势</h2>
-        <div class="chart" id="chart3"></div>
+        <h2>3. 渠道×品类 点击UV矩阵</h2>
+        <div class="chart-container heatmap matrix-section">
+            <div class="legend">
+                <span class="high">高 &gt;5万</span>
+                <span class="medium">中 1-5万</span>
+                <span class="low">低 &lt;1万</span>
+            </div>
+            <table>
+                {uv_header}
+                {"".join(uv_rows)}
+            </table>
+        </div>
 
-        <h2>4. UV vs 回流率 (寻找优质组合)</h2>
-        <div class="chart" id="chart4"></div>
+        <h2>4. 渠道×品类 进入推荐率矩阵</h2>
+        <div class="chart-container heatmap matrix-section">
+            <div class="legend">
+                <span class="high">高 &gt;80%</span>
+                <span class="medium">中 70-80%</span>
+                <span class="low">低 &lt;70%</span>
+            </div>
+            <table>
+                {ror_header}
+                {"".join(recommend_rows)}
+            </table>
+        </div>
+
+        <h2>5. 每日回流率趋势</h2>
+        <div class="chart-container">
+            <canvas id="trendChart"></canvas>
+        </div>
+
+        <h2>6. UV vs 回流率 散点分布</h2>
+        <div class="chart-container">
+            <canvas id="scatterChart"></canvas>
+        </div>
     </div>
 
     <script>
-        Plotly.newPlot('chart1', {fig1.to_json()}.data, {fig1.to_json()}.layout);
-        Plotly.newPlot('chart2', {fig2.to_json()}.data, {fig2.to_json()}.layout);
-        Plotly.newPlot('chart3', {fig3.to_json()}.data, {fig3.to_json()}.layout);
-        Plotly.newPlot('chart4', {fig4.to_json()}.data, {fig4.to_json()}.layout);
+        // 趋势图
+        const trendCtx = document.getElementById('trendChart').getContext('2d');
+        const trendData = {json.dumps(trend_data, ensure_ascii=False)};
+        const colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b'];
+        const datasets = Object.keys(trendData).map((ch, i) => ({{
+            label: ch.substring(0, 15),
+            data: trendData[ch].values,
+            borderColor: colors[i % colors.length],
+            tension: 0.1,
+            fill: false
+        }}));
+        new Chart(trendCtx, {{
+            type: 'line',
+            data: {{
+                labels: trendData[Object.keys(trendData)[0]].dates,
+                datasets: datasets
+            }},
+            options: {{
+                responsive: true,
+                plugins: {{ legend: {{ position: 'top' }} }},
+                scales: {{ y: {{ title: {{ display: true, text: '回流率(%)' }} }} }}
+            }}
+        }});
+
+        // 散点图
+        const scatterCtx = document.getElementById('scatterChart').getContext('2d');
+        const scatterData = {json.dumps(scatter_data, ensure_ascii=False)};
+        new Chart(scatterCtx, {{
+            type: 'scatter',
+            data: {{
+                datasets: [{{
+                    label: '渠道×品类',
+                    data: scatterData.map(d => ({{ x: d.x, y: d.y }})),
+                    backgroundColor: 'rgba(54, 162, 235, 0.5)'
+                }}]
+            }},
+            options: {{
+                responsive: true,
+                scales: {{
+                    x: {{ type: 'logarithmic', title: {{ display: true, text: '点击UV' }} }},
+                    y: {{ title: {{ display: true, text: '回流率(%)' }} }}
+                }},
+                plugins: {{
+                    tooltip: {{
+                        callbacks: {{
+                            label: (ctx) => {{
+                                const d = scatterData[ctx.dataIndex];
+                                return d.channel + ' - ' + d.category + ': UV=' + d.x + ', 回流率=' + d.y + '%';
+                            }}
+                        }}
+                    }}
+                }}
+            }}
+        }});
     </script>
 </body>
 </html>
@@ -249,3 +369,4 @@ with open(html_file, 'w', encoding='utf-8') as f:
     f.write(html_content)
 
 print(f"\nHTML 报告已生成: {html_file}")
+print(f"包含矩阵: 回流率、点击UV、进入推荐率")