Преглед изворни кода

feat(渠道效果分析): 新增可视化脚本生成HTML报告

- 渠道整体表现横向条形图
- 渠道×品类回流率热力图
- 每日回流率趋势折线图
- UV vs 回流率散点图(寻找优质组合)
- 使用 plotly 生成交互式图表

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
yangxiaohui пре 2 месеци
родитељ
комит
75a13e9515
1 измењених фајлова са 251 додато и 0 уклоњено
  1. 251 0
      tasks/渠道效果分析/visualize.py

+ 251 - 0
tasks/渠道效果分析/visualize.py

@@ -0,0 +1,251 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+渠道效果分析可视化
+输出交互式 HTML 报告
+"""
+import pandas as pd
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from pathlib import Path
+
+task_dir = Path(__file__).parent
+output_dir = task_dir / "output"
+
+# 找到最新的原始数据文件(排除分析结果文件)
+csv_files = [f for f in output_dir.glob("*.csv")
+             if f.stem.count('_') == 1]  # 只保留 日期_日期.csv 格式(只有1个下划线)
+if not csv_files:
+    print("没有找到数据文件,请先运行 query.sql")
+    exit(1)
+
+latest_file = max(csv_files, key=lambda x: x.stat().st_mtime)
+df = pd.read_csv(latest_file)
+
+print(f"分析文件: {latest_file.name}")
+print(f"时间范围: {df['dt'].min()} ~ {df['dt'].max()}")
+
+# ============================================================
+# 数据准备
+# ============================================================
+
+# 渠道整体
+channel_stats = df.groupby('channel').agg({
+    '点击uv': 'sum',
+    '再分享回流uv': 'sum'
+}).reset_index()
+
+for ch in channel_stats['channel']:
+    ch_df = df[df['channel'] == ch]
+    uv = ch_df['点击uv'].sum()
+    channel_stats.loc[channel_stats['channel'] == ch, '进入推荐率'] = (ch_df['进入推荐率'] * ch_df['点击uv']).sum() / uv
+    channel_stats.loc[channel_stats['channel'] == ch, '再分享回流率'] = (ch_df['再分享回流率'] * ch_df['点击uv']).sum() / uv
+
+channel_stats = channel_stats.sort_values('点击uv', ascending=True)
+
+# 渠道×品类
+channel_category = df.groupby(['channel', 'merge一级品类']).apply(
+    lambda x: pd.Series({
+        '点击uv': x['点击uv'].sum(),
+        '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
+    }), include_groups=False
+).reset_index()
+
+pivot_ror = channel_category.pivot(index='merge一级品类', columns='channel', values='回流率')
+pivot_uv = channel_category.pivot(index='merge一级品类', columns='channel', values='点击uv').fillna(0)
+
+# 每日趋势
+daily = df.groupby(['dt', 'channel']).apply(
+    lambda x: pd.Series({
+        '点击uv': x['点击uv'].sum(),
+        '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
+    }), include_groups=False
+).reset_index()
+
+# ============================================================
+# 创建可视化
+# ============================================================
+
+# 1. 渠道整体表现 - 横向条形图
+fig1 = make_subplots(rows=1, cols=2, subplot_titles=('点击UV', '再分享回流率'),
+                     specs=[[{"type": "bar"}, {"type": "bar"}]])
+
+fig1.add_trace(
+    go.Bar(y=channel_stats['channel'], x=channel_stats['点击uv'],
+           orientation='h', name='点击UV', marker_color='steelblue'),
+    row=1, col=1
+)
+
+fig1.add_trace(
+    go.Bar(y=channel_stats['channel'], x=channel_stats['再分享回流率'],
+           orientation='h', name='回流率', marker_color='coral',
+           text=[f"{x:.1%}" for x in channel_stats['再分享回流率']], textposition='outside'),
+    row=1, col=2
+)
+
+fig1.update_layout(height=500, title_text="渠道整体表现", showlegend=False)
+fig1.update_xaxes(tickformat=',', row=1, col=1)
+fig1.update_xaxes(tickformat='.0%', row=1, col=2)
+
+# 2. 渠道×品类热力图
+main_channels = channel_stats.nlargest(8, '点击uv')['channel'].tolist()
+valid_categories = pivot_uv[pivot_uv.sum(axis=1) >= 1000].index.tolist()
+
+heatmap_data = pivot_ror.loc[valid_categories, [c for c in main_channels if c in pivot_ror.columns]]
+
+fig2 = go.Figure(data=go.Heatmap(
+    z=heatmap_data.values,
+    x=[c[:12] for c in heatmap_data.columns],
+    y=heatmap_data.index,
+    colorscale='RdYlGn',
+    text=[[f"{v:.1%}" if pd.notna(v) else "" for v in row] for row in heatmap_data.values],
+    texttemplate="%{text}",
+    textfont={"size": 10},
+    hovertemplate="渠道: %{x}<br>品类: %{y}<br>回流率: %{z:.1%}<extra></extra>"
+))
+
+fig2.update_layout(
+    title="渠道×品类 回流率热力图",
+    height=600,
+    xaxis_title="渠道",
+    yaxis_title="品类"
+)
+
+# 3. 每日趋势折线图
+fig3 = go.Figure()
+
+top_channels = channel_stats.nlargest(6, '点击uv')['channel'].tolist()
+colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
+
+for i, ch in enumerate(top_channels):
+    ch_daily = daily[daily['channel'] == ch].sort_values('dt')
+    fig3.add_trace(go.Scatter(
+        x=ch_daily['dt'].astype(str),
+        y=ch_daily['回流率'],
+        mode='lines+markers',
+        name=ch[:15],
+        line=dict(color=colors[i % len(colors)]),
+        hovertemplate=f"{ch}<br>日期: %{{x}}<br>回流率: %{{y:.1%}}<extra></extra>"
+    ))
+
+fig3.update_layout(
+    title="渠道每日回流率趋势",
+    height=400,
+    xaxis_title="日期",
+    yaxis_title="回流率",
+    yaxis_tickformat='.0%',
+    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
+)
+
+# 4. UV vs 回流率散点图
+fig4 = go.Figure()
+
+fig4.add_trace(go.Scatter(
+    x=channel_category['点击uv'],
+    y=channel_category['回流率'],
+    mode='markers',
+    marker=dict(
+        size=8,
+        color=channel_category['回流率'],
+        colorscale='Viridis',
+        showscale=True,
+        colorbar=dict(title="回流率")
+    ),
+    text=channel_category.apply(lambda r: f"{r['channel'][:10]}<br>{r['merge一级品类']}", axis=1),
+    hovertemplate="%{text}<br>UV: %{x:,}<br>回流率: %{y:.1%}<extra></extra>"
+))
+
+fig4.update_layout(
+    title="渠道×品类 UV vs 回流率 (寻找高UV高回流组合)",
+    height=500,
+    xaxis_title="点击UV",
+    yaxis_title="回流率",
+    xaxis_type="log",
+    yaxis_tickformat='.0%'
+)
+
+# 添加参考线
+median_ror = channel_category['回流率'].median()
+fig4.add_hline(y=median_ror, line_dash="dash", line_color="gray",
+               annotation_text=f"中位数: {median_ror:.1%}")
+
+# ============================================================
+# 生成 HTML
+# ============================================================
+
+html_content = f"""
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="utf-8">
+    <title>渠道效果分析报告</title>
+    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
+    <style>
+        body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+               margin: 20px; background: #f5f5f5; }}
+        .container {{ max-width: 1400px; margin: 0 auto; }}
+        h1 {{ color: #333; border-bottom: 2px solid #007bff; padding-bottom: 10px; }}
+        h2 {{ color: #555; margin-top: 30px; }}
+        .chart {{ background: white; padding: 20px; margin: 20px 0;
+                  border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
+        .summary {{ background: white; padding: 20px; margin: 20px 0;
+                   border-radius: 8px; display: flex; gap: 20px; flex-wrap: wrap; }}
+        .stat-card {{ flex: 1; min-width: 150px; padding: 15px;
+                     background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+                     border-radius: 8px; color: white; text-align: center; }}
+        .stat-card h3 {{ margin: 0; font-size: 24px; }}
+        .stat-card p {{ margin: 5px 0 0; opacity: 0.9; }}
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>渠道效果分析报告</h1>
+        <p>数据范围: {df['dt'].min()} ~ {df['dt'].max()}</p>
+
+        <div class="summary">
+            <div class="stat-card">
+                <h3>{int(df['点击uv'].sum()):,}</h3>
+                <p>总点击UV</p>
+            </div>
+            <div class="stat-card">
+                <h3>{(df['再分享回流率'] * df['点击uv']).sum() / df['点击uv'].sum():.1%}</h3>
+                <p>平均回流率</p>
+            </div>
+            <div class="stat-card">
+                <h3>{df['channel'].nunique()}</h3>
+                <p>渠道数</p>
+            </div>
+            <div class="stat-card">
+                <h3>{df['merge一级品类'].nunique()}</h3>
+                <p>品类数</p>
+            </div>
+        </div>
+
+        <h2>1. 渠道整体表现</h2>
+        <div class="chart" id="chart1"></div>
+
+        <h2>2. 渠道×品类 回流率热力图</h2>
+        <div class="chart" id="chart2"></div>
+
+        <h2>3. 每日回流率趋势</h2>
+        <div class="chart" id="chart3"></div>
+
+        <h2>4. UV vs 回流率 (寻找优质组合)</h2>
+        <div class="chart" id="chart4"></div>
+    </div>
+
+    <script>
+        Plotly.newPlot('chart1', {fig1.to_json()}.data, {fig1.to_json()}.layout);
+        Plotly.newPlot('chart2', {fig2.to_json()}.data, {fig2.to_json()}.layout);
+        Plotly.newPlot('chart3', {fig3.to_json()}.data, {fig3.to_json()}.layout);
+        Plotly.newPlot('chart4', {fig4.to_json()}.data, {fig4.to_json()}.layout);
+    </script>
+</body>
+</html>
+"""
+
+html_file = output_dir / f"{latest_file.stem}_报告.html"
+with open(html_file, 'w', encoding='utf-8') as f:
+    f.write(html_content)
+
+print(f"\nHTML 报告已生成: {html_file}")