|
|
@@ -0,0 +1,251 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+# coding=utf-8
|
|
|
+"""
|
|
|
+渠道效果分析可视化
|
|
|
+输出交互式 HTML 报告
|
|
|
+"""
|
|
|
+import pandas as pd
|
|
|
+import plotly.graph_objects as go
|
|
|
+from plotly.subplots import make_subplots
|
|
|
+from pathlib import Path
|
|
|
+
|
|
|
+task_dir = Path(__file__).parent
|
|
|
+output_dir = task_dir / "output"
|
|
|
+
|
|
|
+# 找到最新的原始数据文件(排除分析结果文件)
|
|
|
+csv_files = [f for f in output_dir.glob("*.csv")
|
|
|
+ if f.stem.count('_') == 1] # 只保留 日期_日期.csv 格式(只有1个下划线)
|
|
|
+if not csv_files:
|
|
|
+ print("没有找到数据文件,请先运行 query.sql")
|
|
|
+ exit(1)
|
|
|
+
|
|
|
+latest_file = max(csv_files, key=lambda x: x.stat().st_mtime)
|
|
|
+df = pd.read_csv(latest_file)
|
|
|
+
|
|
|
+print(f"分析文件: {latest_file.name}")
|
|
|
+print(f"时间范围: {df['dt'].min()} ~ {df['dt'].max()}")
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# 数据准备
|
|
|
+# ============================================================
|
|
|
+
|
|
|
+# 渠道整体
|
|
|
+channel_stats = df.groupby('channel').agg({
|
|
|
+ '点击uv': 'sum',
|
|
|
+ '再分享回流uv': 'sum'
|
|
|
+}).reset_index()
|
|
|
+
|
|
|
+for ch in channel_stats['channel']:
|
|
|
+ ch_df = df[df['channel'] == ch]
|
|
|
+ uv = ch_df['点击uv'].sum()
|
|
|
+ channel_stats.loc[channel_stats['channel'] == ch, '进入推荐率'] = (ch_df['进入推荐率'] * ch_df['点击uv']).sum() / uv
|
|
|
+ channel_stats.loc[channel_stats['channel'] == ch, '再分享回流率'] = (ch_df['再分享回流率'] * ch_df['点击uv']).sum() / uv
|
|
|
+
|
|
|
+channel_stats = channel_stats.sort_values('点击uv', ascending=True)
|
|
|
+
|
|
|
+# 渠道×品类
|
|
|
+channel_category = df.groupby(['channel', 'merge一级品类']).apply(
|
|
|
+ lambda x: pd.Series({
|
|
|
+ '点击uv': x['点击uv'].sum(),
|
|
|
+ '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
|
|
|
+ }), include_groups=False
|
|
|
+).reset_index()
|
|
|
+
|
|
|
+pivot_ror = channel_category.pivot(index='merge一级品类', columns='channel', values='回流率')
|
|
|
+pivot_uv = channel_category.pivot(index='merge一级品类', columns='channel', values='点击uv').fillna(0)
|
|
|
+
|
|
|
+# 每日趋势
|
|
|
+daily = df.groupby(['dt', 'channel']).apply(
|
|
|
+ lambda x: pd.Series({
|
|
|
+ '点击uv': x['点击uv'].sum(),
|
|
|
+ '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
|
|
|
+ }), include_groups=False
|
|
|
+).reset_index()
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# 创建可视化
|
|
|
+# ============================================================
|
|
|
+
|
|
|
+# 1. 渠道整体表现 - 横向条形图
|
|
|
+fig1 = make_subplots(rows=1, cols=2, subplot_titles=('点击UV', '再分享回流率'),
|
|
|
+ specs=[[{"type": "bar"}, {"type": "bar"}]])
|
|
|
+
|
|
|
+fig1.add_trace(
|
|
|
+ go.Bar(y=channel_stats['channel'], x=channel_stats['点击uv'],
|
|
|
+ orientation='h', name='点击UV', marker_color='steelblue'),
|
|
|
+ row=1, col=1
|
|
|
+)
|
|
|
+
|
|
|
+fig1.add_trace(
|
|
|
+ go.Bar(y=channel_stats['channel'], x=channel_stats['再分享回流率'],
|
|
|
+ orientation='h', name='回流率', marker_color='coral',
|
|
|
+ text=[f"{x:.1%}" for x in channel_stats['再分享回流率']], textposition='outside'),
|
|
|
+ row=1, col=2
|
|
|
+)
|
|
|
+
|
|
|
+fig1.update_layout(height=500, title_text="渠道整体表现", showlegend=False)
|
|
|
+fig1.update_xaxes(tickformat=',', row=1, col=1)
|
|
|
+fig1.update_xaxes(tickformat='.0%', row=1, col=2)
|
|
|
+
|
|
|
+# 2. 渠道×品类热力图
|
|
|
+main_channels = channel_stats.nlargest(8, '点击uv')['channel'].tolist()
|
|
|
+valid_categories = pivot_uv[pivot_uv.sum(axis=1) >= 1000].index.tolist()
|
|
|
+
|
|
|
+heatmap_data = pivot_ror.loc[valid_categories, [c for c in main_channels if c in pivot_ror.columns]]
|
|
|
+
|
|
|
+fig2 = go.Figure(data=go.Heatmap(
|
|
|
+ z=heatmap_data.values,
|
|
|
+ x=[c[:12] for c in heatmap_data.columns],
|
|
|
+ y=heatmap_data.index,
|
|
|
+ colorscale='RdYlGn',
|
|
|
+ text=[[f"{v:.1%}" if pd.notna(v) else "" for v in row] for row in heatmap_data.values],
|
|
|
+ texttemplate="%{text}",
|
|
|
+ textfont={"size": 10},
|
|
|
+ hovertemplate="渠道: %{x}<br>品类: %{y}<br>回流率: %{z:.1%}<extra></extra>"
|
|
|
+))
|
|
|
+
|
|
|
+fig2.update_layout(
|
|
|
+ title="渠道×品类 回流率热力图",
|
|
|
+ height=600,
|
|
|
+ xaxis_title="渠道",
|
|
|
+ yaxis_title="品类"
|
|
|
+)
|
|
|
+
|
|
|
+# 3. 每日趋势折线图
|
|
|
+fig3 = go.Figure()
|
|
|
+
|
|
|
+top_channels = channel_stats.nlargest(6, '点击uv')['channel'].tolist()
|
|
|
+colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
|
|
|
+
|
|
|
+for i, ch in enumerate(top_channels):
|
|
|
+ ch_daily = daily[daily['channel'] == ch].sort_values('dt')
|
|
|
+ fig3.add_trace(go.Scatter(
|
|
|
+ x=ch_daily['dt'].astype(str),
|
|
|
+ y=ch_daily['回流率'],
|
|
|
+ mode='lines+markers',
|
|
|
+ name=ch[:15],
|
|
|
+ line=dict(color=colors[i % len(colors)]),
|
|
|
+ hovertemplate=f"{ch}<br>日期: %{{x}}<br>回流率: %{{y:.1%}}<extra></extra>"
|
|
|
+ ))
|
|
|
+
|
|
|
+fig3.update_layout(
|
|
|
+ title="渠道每日回流率趋势",
|
|
|
+ height=400,
|
|
|
+ xaxis_title="日期",
|
|
|
+ yaxis_title="回流率",
|
|
|
+ yaxis_tickformat='.0%',
|
|
|
+ legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
|
|
|
+)
|
|
|
+
|
|
|
+# 4. UV vs 回流率散点图
|
|
|
+fig4 = go.Figure()
|
|
|
+
|
|
|
+fig4.add_trace(go.Scatter(
|
|
|
+ x=channel_category['点击uv'],
|
|
|
+ y=channel_category['回流率'],
|
|
|
+ mode='markers',
|
|
|
+ marker=dict(
|
|
|
+ size=8,
|
|
|
+ color=channel_category['回流率'],
|
|
|
+ colorscale='Viridis',
|
|
|
+ showscale=True,
|
|
|
+ colorbar=dict(title="回流率")
|
|
|
+ ),
|
|
|
+ text=channel_category.apply(lambda r: f"{r['channel'][:10]}<br>{r['merge一级品类']}", axis=1),
|
|
|
+ hovertemplate="%{text}<br>UV: %{x:,}<br>回流率: %{y:.1%}<extra></extra>"
|
|
|
+))
|
|
|
+
|
|
|
+fig4.update_layout(
|
|
|
+ title="渠道×品类 UV vs 回流率 (寻找高UV高回流组合)",
|
|
|
+ height=500,
|
|
|
+ xaxis_title="点击UV",
|
|
|
+ yaxis_title="回流率",
|
|
|
+ xaxis_type="log",
|
|
|
+ yaxis_tickformat='.0%'
|
|
|
+)
|
|
|
+
|
|
|
+# 添加参考线
|
|
|
+median_ror = channel_category['回流率'].median()
|
|
|
+fig4.add_hline(y=median_ror, line_dash="dash", line_color="gray",
|
|
|
+ annotation_text=f"中位数: {median_ror:.1%}")
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# 生成 HTML
|
|
|
+# ============================================================
|
|
|
+
|
|
|
+html_content = f"""
|
|
|
+<!DOCTYPE html>
|
|
|
+<html>
|
|
|
+<head>
|
|
|
+ <meta charset="utf-8">
|
|
|
+ <title>渠道效果分析报告</title>
|
|
|
+ <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
|
|
|
+ <style>
|
|
|
+ body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
|
|
+ margin: 20px; background: #f5f5f5; }}
|
|
|
+ .container {{ max-width: 1400px; margin: 0 auto; }}
|
|
|
+ h1 {{ color: #333; border-bottom: 2px solid #007bff; padding-bottom: 10px; }}
|
|
|
+ h2 {{ color: #555; margin-top: 30px; }}
|
|
|
+ .chart {{ background: white; padding: 20px; margin: 20px 0;
|
|
|
+ border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); }}
|
|
|
+ .summary {{ background: white; padding: 20px; margin: 20px 0;
|
|
|
+ border-radius: 8px; display: flex; gap: 20px; flex-wrap: wrap; }}
|
|
|
+ .stat-card {{ flex: 1; min-width: 150px; padding: 15px;
|
|
|
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
|
+ border-radius: 8px; color: white; text-align: center; }}
|
|
|
+ .stat-card h3 {{ margin: 0; font-size: 24px; }}
|
|
|
+ .stat-card p {{ margin: 5px 0 0; opacity: 0.9; }}
|
|
|
+ </style>
|
|
|
+</head>
|
|
|
+<body>
|
|
|
+ <div class="container">
|
|
|
+ <h1>渠道效果分析报告</h1>
|
|
|
+ <p>数据范围: {df['dt'].min()} ~ {df['dt'].max()}</p>
|
|
|
+
|
|
|
+ <div class="summary">
|
|
|
+ <div class="stat-card">
|
|
|
+ <h3>{int(df['点击uv'].sum()):,}</h3>
|
|
|
+ <p>总点击UV</p>
|
|
|
+ </div>
|
|
|
+ <div class="stat-card">
|
|
|
+ <h3>{(df['再分享回流率'] * df['点击uv']).sum() / df['点击uv'].sum():.1%}</h3>
|
|
|
+ <p>平均回流率</p>
|
|
|
+ </div>
|
|
|
+ <div class="stat-card">
|
|
|
+ <h3>{df['channel'].nunique()}</h3>
|
|
|
+ <p>渠道数</p>
|
|
|
+ </div>
|
|
|
+ <div class="stat-card">
|
|
|
+ <h3>{df['merge一级品类'].nunique()}</h3>
|
|
|
+ <p>品类数</p>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <h2>1. 渠道整体表现</h2>
|
|
|
+ <div class="chart" id="chart1"></div>
|
|
|
+
|
|
|
+ <h2>2. 渠道×品类 回流率热力图</h2>
|
|
|
+ <div class="chart" id="chart2"></div>
|
|
|
+
|
|
|
+ <h2>3. 每日回流率趋势</h2>
|
|
|
+ <div class="chart" id="chart3"></div>
|
|
|
+
|
|
|
+ <h2>4. UV vs 回流率 (寻找优质组合)</h2>
|
|
|
+ <div class="chart" id="chart4"></div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <script>
|
|
|
+ Plotly.newPlot('chart1', {fig1.to_json()}.data, {fig1.to_json()}.layout);
|
|
|
+ Plotly.newPlot('chart2', {fig2.to_json()}.data, {fig2.to_json()}.layout);
|
|
|
+ Plotly.newPlot('chart3', {fig3.to_json()}.data, {fig3.to_json()}.layout);
|
|
|
+ Plotly.newPlot('chart4', {fig4.to_json()}.data, {fig4.to_json()}.layout);
|
|
|
+ </script>
|
|
|
+</body>
|
|
|
+</html>
|
|
|
+"""
|
|
|
+
|
|
|
+html_file = output_dir / f"{latest_file.stem}_报告.html"
|
|
|
+with open(html_file, 'w', encoding='utf-8') as f:
|
|
|
+ f.write(html_content)
|
|
|
+
|
|
|
+print(f"\nHTML 报告已生成: {html_file}")
|