|
|
@@ -78,7 +78,7 @@ daily_channel = df.groupby(['dt', 'channel']).apply(
|
|
|
lambda x: pd.Series({
|
|
|
'点击uv': x['点击uv'].sum(),
|
|
|
'回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum()
|
|
|
- })
|
|
|
+ }), include_groups=False
|
|
|
).reset_index()
|
|
|
|
|
|
main_channels = channel_stats[channel_stats['点击uv'] > 10000]['channel'].tolist()
|
|
|
@@ -114,7 +114,7 @@ channel_category = df.groupby(['channel', 'merge一级品类']).apply(
|
|
|
lambda x: pd.Series({
|
|
|
'点击uv': x['点击uv'].sum(),
|
|
|
'回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
|
|
|
- })
|
|
|
+ }), include_groups=False
|
|
|
).reset_index()
|
|
|
|
|
|
pivot_uv = channel_category.pivot(index='merge一级品类', columns='channel', values='点击uv').fillna(0)
|
|
|
@@ -195,7 +195,7 @@ log()
|
|
|
# 四、渠道×二级品类效果
|
|
|
# ============================================================
|
|
|
log("=" * 80)
|
|
|
-log("四、渠道×二级品类效果(Top 20)")
|
|
|
+log("四、渠道×二级品类效果")
|
|
|
log("=" * 80)
|
|
|
log()
|
|
|
|
|
|
@@ -205,21 +205,80 @@ if 'merge二级品类' in df.columns:
|
|
|
lambda x: pd.Series({
|
|
|
'点击uv': x['点击uv'].sum(),
|
|
|
'回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
|
|
|
- })
|
|
|
+ }), include_groups=False
|
|
|
).reset_index()
|
|
|
|
|
|
- # 过滤小样本,取回流率最高的
|
|
|
- cat2_filtered = channel_cat2[channel_cat2['点击uv'] >= 500].nlargest(20, '回流率')
|
|
|
+ # 二级品类汇总(跨渠道)
|
|
|
+ cat2_stats = channel_cat2.groupby(['merge一级品类', 'merge二级品类']).agg({
|
|
|
+ '点击uv': 'sum'
|
|
|
+ }).reset_index()
|
|
|
+ cat2_stats = cat2_stats[cat2_stats['点击uv'] >= 500].sort_values('点击uv', ascending=False)
|
|
|
+
|
|
|
+ # 二级品类的回流率 pivot
|
|
|
+ pivot_cat2_ror = channel_cat2.pivot_table(
|
|
|
+ index=['merge一级品类', 'merge二级品类'],
|
|
|
+ columns='channel',
|
|
|
+ values='回流率'
|
|
|
+ )
|
|
|
+ pivot_cat2_uv = channel_cat2.pivot_table(
|
|
|
+ index=['merge一级品类', 'merge二级品类'],
|
|
|
+ columns='channel',
|
|
|
+ values='点击uv',
|
|
|
+ fill_value=0
|
|
|
+ )
|
|
|
+
|
|
|
+ valid_cat2 = [(row['merge一级品类'], row['merge二级品类']) for _, row in cat2_stats.head(30).iterrows()]
|
|
|
+
|
|
|
+ log("【各二级品类 Top 5 渠道】")
|
|
|
+ log("-" * 80)
|
|
|
|
|
|
- header = f"{'渠道':<20} {'一级品类':<12} {'二级品类':<15} {'UV':>10} {'回流率':>8}"
|
|
|
- log(header)
|
|
|
+ for cat1, cat2 in valid_cat2:
|
|
|
+ if (cat1, cat2) not in pivot_cat2_ror.index:
|
|
|
+ continue
|
|
|
+ row = pivot_cat2_ror.loc[(cat1, cat2), [c for c in valid_channels if c in pivot_cat2_ror.columns]].dropna().sort_values(ascending=False)
|
|
|
+ if len(row) == 0:
|
|
|
+ continue
|
|
|
+ cat2_name = f"{str(cat1)[:8]}/{str(cat2)[:10]}" if pd.notna(cat2) else str(cat1)[:15]
|
|
|
+ cat2_total_uv = int(pivot_cat2_uv.loc[(cat1, cat2)].sum())
|
|
|
+ log(f" {cat2_name:<20} 总UV={cat2_total_uv:>10,}")
|
|
|
+ for ch in row.head(5).index:
|
|
|
+ ch_name = ch[:20] if pd.notna(ch) else ''
|
|
|
+ ch_uv = int(pivot_cat2_uv.loc[(cat1, cat2), ch]) if ch in pivot_cat2_uv.columns else 0
|
|
|
+ log(f" {ch_name:<20} UV={ch_uv:>8,} 回流率={row[ch]:.1%}")
|
|
|
+ log()
|
|
|
+
|
|
|
+ log()
|
|
|
+
|
|
|
+ log("【各渠道 Top 5 二级品类】")
|
|
|
log("-" * 80)
|
|
|
|
|
|
+ for ch in valid_channels:
|
|
|
+ if ch not in pivot_cat2_ror.columns:
|
|
|
+ continue
|
|
|
+ col = pivot_cat2_ror.loc[valid_cat2, ch].dropna().sort_values(ascending=False)
|
|
|
+ if len(col) == 0:
|
|
|
+ continue
|
|
|
+ ch_uv = int(channel_stats[channel_stats['channel'] == ch]['点击uv'].values[0])
|
|
|
+ log(f" {ch:<25} UV={ch_uv:>10,}")
|
|
|
+ for (cat1, cat2) in col.head(5).index:
|
|
|
+ cat2_name = f"{str(cat1)[:8]}/{str(cat2)[:10]}" if pd.notna(cat2) else str(cat1)[:15]
|
|
|
+ cat2_uv = int(pivot_cat2_uv.loc[(cat1, cat2), ch]) if ch in pivot_cat2_uv.columns else 0
|
|
|
+ log(f" {cat2_name:<20} UV={cat2_uv:>8,} 回流率={col[(cat1, cat2)]:.1%}")
|
|
|
+ log()
|
|
|
+
|
|
|
+ log()
|
|
|
+
|
|
|
+ # 回流率 Top 20
|
|
|
+ log("【回流率 Top 20 渠道×二级品类组合】")
|
|
|
+ log("-" * 80)
|
|
|
+ cat2_filtered = channel_cat2[channel_cat2['点击uv'] >= 500].nlargest(20, '回流率')
|
|
|
+ header = f"{'渠道':<20} {'一级品类':<10} {'二级品类':<12} {'UV':>10} {'回流率':>8}"
|
|
|
+ log(header)
|
|
|
+ log("-" * 80)
|
|
|
for _, row in cat2_filtered.iterrows():
|
|
|
cat1 = str(row['merge一级品类'])[:10] if pd.notna(row['merge一级品类']) else ''
|
|
|
cat2 = str(row['merge二级品类'])[:12] if pd.notna(row['merge二级品类']) else ''
|
|
|
- log(f"{row['channel']:<20} {cat1:<12} {cat2:<15} {int(row['点击uv']):>10,} {row['回流率']:>8.1%}")
|
|
|
-
|
|
|
+ log(f"{row['channel']:<20} {cat1:<10} {cat2:<12} {int(row['点击uv']):>10,} {row['回流率']:>8.1%}")
|
|
|
log()
|
|
|
|
|
|
# ============================================================
|