|
|
@@ -0,0 +1,187 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+# coding=utf-8
|
|
|
+"""
|
|
|
+渠道效果分析
|
|
|
+分析不同渠道的整体表现、渠道×品类表现
|
|
|
+"""
|
|
|
+import pandas as pd
|
|
|
+import numpy as np
|
|
|
+from pathlib import Path
|
|
|
+
|
|
|
+task_dir = Path(__file__).parent
|
|
|
+output_dir = task_dir / "output"
|
|
|
+
|
|
|
+csv_files = [f for f in output_dir.glob("*.csv") if '_分析' not in f.name]
|
|
|
+if not csv_files:
|
|
|
+ print("没有找到数据文件,请先运行 query.sql")
|
|
|
+ exit(1)
|
|
|
+
|
|
|
+latest_file = max(csv_files, key=lambda x: x.stat().st_mtime)
|
|
|
+df = pd.read_csv(latest_file)
|
|
|
+
|
|
|
+lines = []
|
|
|
+
|
|
|
+
|
|
|
+def log(text=""):
|
|
|
+ print(text)
|
|
|
+ lines.append(text)
|
|
|
+
|
|
|
+
|
|
|
+log(f"分析文件: {latest_file.name}")
|
|
|
+log(f"时间范围: {df['dt'].min()} ~ {df['dt'].max()}")
|
|
|
+log()
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# 一、渠道整体表现
|
|
|
+# ============================================================
|
|
|
+log("=" * 80)
|
|
|
+log("一、渠道整体表现")
|
|
|
+log("=" * 80)
|
|
|
+log()
|
|
|
+
|
|
|
+channel_stats = df.groupby('channel').agg({
|
|
|
+ '点击uv': 'sum',
|
|
|
+ '进入推荐uv': 'sum',
|
|
|
+ '再分享回流uv': 'sum',
|
|
|
+ '原视频回流uv': 'sum',
|
|
|
+ '推荐回流uv': 'sum'
|
|
|
+}).reset_index()
|
|
|
+
|
|
|
+channel_stats['进入推荐率'] = channel_stats['进入推荐uv'] / channel_stats['点击uv']
|
|
|
+channel_stats['再分享回流率'] = channel_stats['再分享回流uv'] / (channel_stats['点击uv'] + 10)
|
|
|
+channel_stats['原视频回流率'] = channel_stats['原视频回流uv'] / (channel_stats['点击uv'] + 10)
|
|
|
+channel_stats['推荐回流率'] = channel_stats['推荐回流uv'] / (channel_stats['点击uv'] + 10)
|
|
|
+channel_stats = channel_stats.sort_values('点击uv', ascending=False)
|
|
|
+
|
|
|
+header = f"{'渠道':<25} {'点击UV':>12} {'进入推荐率':>10} {'回流率':>10} {'原视频':>8} {'推荐':>8}"
|
|
|
+log(header)
|
|
|
+log("-" * 80)
|
|
|
+
|
|
|
+for _, row in channel_stats.iterrows():
|
|
|
+ log(f"{row['channel']:<25} {int(row['点击uv']):>12,} {row['进入推荐率']:>10.1%} {row['再分享回流率']:>10.1%} {row['原视频回流率']:>8.1%} {row['推荐回流率']:>8.1%}")
|
|
|
+
|
|
|
+log()
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# 二、渠道每日趋势
|
|
|
+# ============================================================
|
|
|
+log("=" * 80)
|
|
|
+log("二、渠道每日回流率趋势")
|
|
|
+log("=" * 80)
|
|
|
+log()
|
|
|
+
|
|
|
+# 按日期和渠道汇总
|
|
|
+daily_channel = df.groupby(['dt', 'channel']).agg({
|
|
|
+ '点击uv': 'sum',
|
|
|
+ '再分享回流uv': 'sum'
|
|
|
+}).reset_index()
|
|
|
+daily_channel['回流率'] = daily_channel['再分享回流uv'] / (daily_channel['点击uv'] + 10)
|
|
|
+
|
|
|
+# 只看主要渠道(UV > 10000)
|
|
|
+main_channels = channel_stats[channel_stats['点击uv'] > 10000]['channel'].tolist()
|
|
|
+dates = sorted(df['dt'].unique())
|
|
|
+
|
|
|
+header = f"{'渠道':<25}" + "".join([f"{str(d)[-4:]:>8}" for d in dates])
|
|
|
+log(header)
|
|
|
+log("-" * 80)
|
|
|
+
|
|
|
+for ch in main_channels:
|
|
|
+ ch_data = daily_channel[daily_channel['channel'] == ch].set_index('dt')
|
|
|
+ row_str = f"{ch:<25}"
|
|
|
+ for d in dates:
|
|
|
+ if d in ch_data.index:
|
|
|
+ rate = ch_data.loc[d, '回流率']
|
|
|
+ row_str += f"{rate:>8.1%}"
|
|
|
+ else:
|
|
|
+ row_str += f"{'--':>8}"
|
|
|
+ log(row_str)
|
|
|
+
|
|
|
+log()
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# 三、渠道×品类效果矩阵
|
|
|
+# ============================================================
|
|
|
+log("=" * 80)
|
|
|
+log("三、渠道×品类效果矩阵(回流率)")
|
|
|
+log("=" * 80)
|
|
|
+log()
|
|
|
+
|
|
|
+# 按渠道和品类汇总
|
|
|
+channel_category = df.groupby(['channel', 'merge一级品类']).agg({
|
|
|
+ '点击uv': 'sum',
|
|
|
+ '再分享回流uv': 'sum'
|
|
|
+}).reset_index()
|
|
|
+channel_category['回流率'] = channel_category['再分享回流uv'] / (channel_category['点击uv'] + 10)
|
|
|
+
|
|
|
+# 创建透视表
|
|
|
+pivot_uv = channel_category.pivot(index='merge一级品类', columns='channel', values='点击uv').fillna(0)
|
|
|
+pivot_ror = channel_category.pivot(index='merge一级品类', columns='channel', values='回流率')
|
|
|
+
|
|
|
+# 过滤小样本
|
|
|
+min_uv = 1000
|
|
|
+valid_categories = pivot_uv[pivot_uv.sum(axis=1) >= min_uv].index
|
|
|
+valid_channels = main_channels[:8] # 取前8个主要渠道
|
|
|
+
|
|
|
+log(f"品类数: {len(valid_categories)}, 渠道数: {len(valid_channels)}")
|
|
|
+log()
|
|
|
+
|
|
|
+# 各品类最佳渠道
|
|
|
+log("【各品类最佳渠道】")
|
|
|
+log("-" * 80)
|
|
|
+
|
|
|
+for category in valid_categories:
|
|
|
+ if category not in pivot_ror.index:
|
|
|
+ continue
|
|
|
+ row = pivot_ror.loc[category, [c for c in valid_channels if c in pivot_ror.columns]].dropna()
|
|
|
+ if len(row) == 0:
|
|
|
+ continue
|
|
|
+ best_ch = row.idxmax()
|
|
|
+ best_ror = row.max()
|
|
|
+ cat_uv = int(pivot_uv.loc[category].sum())
|
|
|
+ cat_name = str(category)[:15] if pd.notna(category) else '(空)'
|
|
|
+ log(f" {cat_name:<17} UV={cat_uv:>8,} → {best_ch:<20} 回流率={best_ror:.1%}")
|
|
|
+
|
|
|
+log()
|
|
|
+
|
|
|
+# 各渠道最佳品类
|
|
|
+log("【各渠道最佳品类】")
|
|
|
+log("-" * 80)
|
|
|
+
|
|
|
+for ch in valid_channels:
|
|
|
+ if ch not in pivot_ror.columns:
|
|
|
+ continue
|
|
|
+ col = pivot_ror.loc[valid_categories, ch].dropna()
|
|
|
+ if len(col) == 0:
|
|
|
+ continue
|
|
|
+ best_cat = col.idxmax()
|
|
|
+ best_ror = col.max()
|
|
|
+ ch_uv = int(channel_stats[channel_stats['channel'] == ch]['点击uv'].values[0])
|
|
|
+ cat_name = str(best_cat)[:15] if pd.notna(best_cat) else '(空)'
|
|
|
+ log(f" {ch:<25} UV={ch_uv:>10,} → {cat_name:<15} 回流率={best_ror:.1%}")
|
|
|
+
|
|
|
+log()
|
|
|
+
|
|
|
+# ============================================================
|
|
|
+# 四、汇总统计
|
|
|
+# ============================================================
|
|
|
+log("=" * 80)
|
|
|
+log("四、汇总统计")
|
|
|
+log("=" * 80)
|
|
|
+log()
|
|
|
+
|
|
|
+total_uv = df['点击uv'].sum()
|
|
|
+total_recommend = df['进入推荐uv'].sum()
|
|
|
+total_return = df['再分享回流uv'].sum()
|
|
|
+
|
|
|
+log(f"总点击UV: {int(total_uv):,}")
|
|
|
+log(f"总进入推荐UV: {int(total_recommend):,} ({total_recommend/total_uv:.1%})")
|
|
|
+log(f"总再分享回流UV: {int(total_return):,} ({total_return/total_uv:.1%})")
|
|
|
+log(f"渠道数: {df['channel'].nunique()}")
|
|
|
+log(f"品类数: {df['merge一级品类'].nunique()}")
|
|
|
+log()
|
|
|
+
|
|
|
+# 保存
|
|
|
+result_file = output_dir / f"{latest_file.stem}_分析.txt"
|
|
|
+with open(result_file, 'w', encoding='utf-8') as f:
|
|
|
+ f.write("\n".join(lines))
|
|
|
+log(f"结果已保存: {result_file}")
|