|
@@ -39,18 +39,21 @@ log("一、渠道整体表现")
|
|
|
log("=" * 80)
|
|
log("=" * 80)
|
|
|
log()
|
|
log()
|
|
|
|
|
|
|
|
|
|
+# 按渠道汇总(加权平均)
|
|
|
channel_stats = df.groupby('channel').agg({
|
|
channel_stats = df.groupby('channel').agg({
|
|
|
'点击uv': 'sum',
|
|
'点击uv': 'sum',
|
|
|
- '进入推荐uv': 'sum',
|
|
|
|
|
- '再分享回流uv': 'sum',
|
|
|
|
|
- '原视频回流uv': 'sum',
|
|
|
|
|
- '推荐回流uv': 'sum'
|
|
|
|
|
|
|
+ '再分享回流uv': 'sum'
|
|
|
}).reset_index()
|
|
}).reset_index()
|
|
|
|
|
|
|
|
-channel_stats['进入推荐率'] = channel_stats['进入推荐uv'] / channel_stats['点击uv']
|
|
|
|
|
-channel_stats['再分享回流率'] = channel_stats['再分享回流uv'] / (channel_stats['点击uv'] + 10)
|
|
|
|
|
-channel_stats['原视频回流率'] = channel_stats['原视频回流uv'] / (channel_stats['点击uv'] + 10)
|
|
|
|
|
-channel_stats['推荐回流率'] = channel_stats['推荐回流uv'] / (channel_stats['点击uv'] + 10)
|
|
|
|
|
|
|
+# 用加权方式计算整体指标
|
|
|
|
|
+for ch in channel_stats['channel']:
|
|
|
|
|
+ ch_df = df[df['channel'] == ch]
|
|
|
|
|
+ uv = ch_df['点击uv'].sum()
|
|
|
|
|
+ channel_stats.loc[channel_stats['channel'] == ch, '进入推荐率'] = (ch_df['进入推荐率'] * ch_df['点击uv']).sum() / uv
|
|
|
|
|
+ channel_stats.loc[channel_stats['channel'] == ch, '再分享回流率'] = (ch_df['再分享回流率'] * ch_df['点击uv']).sum() / uv
|
|
|
|
|
+ channel_stats.loc[channel_stats['channel'] == ch, '原视频再分享回流率'] = (ch_df['原视频再分享回流率'] * ch_df['点击uv']).sum() / uv
|
|
|
|
|
+ channel_stats.loc[channel_stats['channel'] == ch, '推荐再分享回流率'] = (ch_df['推荐再分享回流率'] * ch_df['点击uv']).sum() / uv
|
|
|
|
|
+
|
|
|
channel_stats = channel_stats.sort_values('点击uv', ascending=False)
|
|
channel_stats = channel_stats.sort_values('点击uv', ascending=False)
|
|
|
|
|
|
|
|
header = f"{'渠道':<25} {'点击UV':>12} {'进入推荐率':>10} {'回流率':>10} {'原视频':>8} {'推荐':>8}"
|
|
header = f"{'渠道':<25} {'点击UV':>12} {'进入推荐率':>10} {'回流率':>10} {'原视频':>8} {'推荐':>8}"
|
|
@@ -58,7 +61,7 @@ log(header)
|
|
|
log("-" * 80)
|
|
log("-" * 80)
|
|
|
|
|
|
|
|
for _, row in channel_stats.iterrows():
|
|
for _, row in channel_stats.iterrows():
|
|
|
- log(f"{row['channel']:<25} {int(row['点击uv']):>12,} {row['进入推荐率']:>10.1%} {row['再分享回流率']:>10.1%} {row['原视频回流率']:>8.1%} {row['推荐回流率']:>8.1%}")
|
|
|
|
|
|
|
+ log(f"{row['channel']:<25} {int(row['点击uv']):>12,} {row['进入推荐率']:>10.1%} {row['再分享回流率']:>10.1%} {row['原视频再分享回流率']:>8.1%} {row['推荐再分享回流率']:>8.1%}")
|
|
|
|
|
|
|
|
log()
|
|
log()
|
|
|
|
|
|
|
@@ -71,13 +74,13 @@ log("=" * 80)
|
|
|
log()
|
|
log()
|
|
|
|
|
|
|
|
# 按日期和渠道汇总
|
|
# 按日期和渠道汇总
|
|
|
-daily_channel = df.groupby(['dt', 'channel']).agg({
|
|
|
|
|
- '点击uv': 'sum',
|
|
|
|
|
- '再分享回流uv': 'sum'
|
|
|
|
|
-}).reset_index()
|
|
|
|
|
-daily_channel['回流率'] = daily_channel['再分享回流uv'] / (daily_channel['点击uv'] + 10)
|
|
|
|
|
|
|
+daily_channel = df.groupby(['dt', 'channel']).apply(
|
|
|
|
|
+ lambda x: pd.Series({
|
|
|
|
|
+ '点击uv': x['点击uv'].sum(),
|
|
|
|
|
+ '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum()
|
|
|
|
|
+ })
|
|
|
|
|
+).reset_index()
|
|
|
|
|
|
|
|
-# 只看主要渠道(UV > 10000)
|
|
|
|
|
main_channels = channel_stats[channel_stats['点击uv'] > 10000]['channel'].tolist()
|
|
main_channels = channel_stats[channel_stats['点击uv'] > 10000]['channel'].tolist()
|
|
|
dates = sorted(df['dt'].unique())
|
|
dates = sorted(df['dt'].unique())
|
|
|
|
|
|
|
@@ -107,25 +110,23 @@ log("=" * 80)
|
|
|
log()
|
|
log()
|
|
|
|
|
|
|
|
# 按渠道和品类汇总
|
|
# 按渠道和品类汇总
|
|
|
-channel_category = df.groupby(['channel', 'merge一级品类']).agg({
|
|
|
|
|
- '点击uv': 'sum',
|
|
|
|
|
- '再分享回流uv': 'sum'
|
|
|
|
|
-}).reset_index()
|
|
|
|
|
-channel_category['回流率'] = channel_category['再分享回流uv'] / (channel_category['点击uv'] + 10)
|
|
|
|
|
|
|
+channel_category = df.groupby(['channel', 'merge一级品类']).apply(
|
|
|
|
|
+ lambda x: pd.Series({
|
|
|
|
|
+ '点击uv': x['点击uv'].sum(),
|
|
|
|
|
+ '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
|
|
|
|
|
+ })
|
|
|
|
|
+).reset_index()
|
|
|
|
|
|
|
|
-# 创建透视表
|
|
|
|
|
pivot_uv = channel_category.pivot(index='merge一级品类', columns='channel', values='点击uv').fillna(0)
|
|
pivot_uv = channel_category.pivot(index='merge一级品类', columns='channel', values='点击uv').fillna(0)
|
|
|
pivot_ror = channel_category.pivot(index='merge一级品类', columns='channel', values='回流率')
|
|
pivot_ror = channel_category.pivot(index='merge一级品类', columns='channel', values='回流率')
|
|
|
|
|
|
|
|
-# 过滤小样本
|
|
|
|
|
min_uv = 1000
|
|
min_uv = 1000
|
|
|
valid_categories = pivot_uv[pivot_uv.sum(axis=1) >= min_uv].index
|
|
valid_categories = pivot_uv[pivot_uv.sum(axis=1) >= min_uv].index
|
|
|
-valid_channels = main_channels[:8] # 取前8个主要渠道
|
|
|
|
|
|
|
+valid_channels = main_channels[:8]
|
|
|
|
|
|
|
|
log(f"品类数: {len(valid_categories)}, 渠道数: {len(valid_channels)}")
|
|
log(f"品类数: {len(valid_categories)}, 渠道数: {len(valid_channels)}")
|
|
|
log()
|
|
log()
|
|
|
|
|
|
|
|
-# 各品类最佳渠道
|
|
|
|
|
log("【各品类最佳渠道】")
|
|
log("【各品类最佳渠道】")
|
|
|
log("-" * 80)
|
|
log("-" * 80)
|
|
|
|
|
|
|
@@ -143,7 +144,6 @@ for category in valid_categories:
|
|
|
|
|
|
|
|
log()
|
|
log()
|
|
|
|
|
|
|
|
-# 各渠道最佳品类
|
|
|
|
|
log("【各渠道最佳品类】")
|
|
log("【各渠道最佳品类】")
|
|
|
log("-" * 80)
|
|
log("-" * 80)
|
|
|
|
|
|
|
@@ -162,20 +162,52 @@ for ch in valid_channels:
|
|
|
log()
|
|
log()
|
|
|
|
|
|
|
|
# ============================================================
|
|
# ============================================================
|
|
|
-# 四、汇总统计
|
|
|
|
|
|
|
+# 四、渠道×二级品类效果
|
|
|
|
|
+# ============================================================
|
|
|
|
|
+log("=" * 80)
|
|
|
|
|
+log("四、渠道×二级品类效果(Top 20)")
|
|
|
|
|
+log("=" * 80)
|
|
|
|
|
+log()
|
|
|
|
|
+
|
|
|
|
|
+# 按渠道和二级品类汇总
|
|
|
|
|
+if 'merge二级品类' in df.columns:
|
|
|
|
|
+ channel_cat2 = df.groupby(['channel', 'merge一级品类', 'merge二级品类']).apply(
|
|
|
|
|
+ lambda x: pd.Series({
|
|
|
|
|
+ '点击uv': x['点击uv'].sum(),
|
|
|
|
|
+ '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
|
|
|
|
|
+ })
|
|
|
|
|
+ ).reset_index()
|
|
|
|
|
+
|
|
|
|
|
+ # 过滤小样本,取回流率最高的
|
|
|
|
|
+ cat2_filtered = channel_cat2[channel_cat2['点击uv'] >= 500].nlargest(20, '回流率')
|
|
|
|
|
+
|
|
|
|
|
+ header = f"{'渠道':<20} {'一级品类':<12} {'二级品类':<15} {'UV':>10} {'回流率':>8}"
|
|
|
|
|
+ log(header)
|
|
|
|
|
+ log("-" * 80)
|
|
|
|
|
+
|
|
|
|
|
+ for _, row in cat2_filtered.iterrows():
|
|
|
|
|
+ cat1 = str(row['merge一级品类'])[:10] if pd.notna(row['merge一级品类']) else ''
|
|
|
|
|
+ cat2 = str(row['merge二级品类'])[:12] if pd.notna(row['merge二级品类']) else ''
|
|
|
|
|
+ log(f"{row['channel']:<20} {cat1:<12} {cat2:<15} {int(row['点击uv']):>10,} {row['回流率']:>8.1%}")
|
|
|
|
|
+
|
|
|
|
|
+ log()
|
|
|
|
|
+
|
|
|
|
|
+# ============================================================
|
|
|
|
|
+# 五、汇总统计
|
|
|
# ============================================================
|
|
# ============================================================
|
|
|
log("=" * 80)
|
|
log("=" * 80)
|
|
|
-log("四、汇总统计")
|
|
|
|
|
|
|
+log("五、汇总统计")
|
|
|
log("=" * 80)
|
|
log("=" * 80)
|
|
|
log()
|
|
log()
|
|
|
|
|
|
|
|
total_uv = df['点击uv'].sum()
|
|
total_uv = df['点击uv'].sum()
|
|
|
-total_recommend = df['进入推荐uv'].sum()
|
|
|
|
|
total_return = df['再分享回流uv'].sum()
|
|
total_return = df['再分享回流uv'].sum()
|
|
|
|
|
+avg_recommend = (df['进入推荐率'] * df['点击uv']).sum() / total_uv
|
|
|
|
|
+avg_return = (df['再分享回流率'] * df['点击uv']).sum() / total_uv
|
|
|
|
|
|
|
|
log(f"总点击UV: {int(total_uv):,}")
|
|
log(f"总点击UV: {int(total_uv):,}")
|
|
|
-log(f"总进入推荐UV: {int(total_recommend):,} ({total_recommend/total_uv:.1%})")
|
|
|
|
|
-log(f"总再分享回流UV: {int(total_return):,} ({total_return/total_uv:.1%})")
|
|
|
|
|
|
|
+log(f"平均进入推荐率: {avg_recommend:.1%}")
|
|
|
|
|
+log(f"平均再分享回流率: {avg_return:.1%}")
|
|
|
log(f"渠道数: {df['channel'].nunique()}")
|
|
log(f"渠道数: {df['channel'].nunique()}")
|
|
|
log(f"品类数: {df['merge一级品类'].nunique()}")
|
|
log(f"品类数: {df['merge一级品类'].nunique()}")
|
|
|
log()
|
|
log()
|