Просмотр исходного кода

feat(渠道效果分析): 增加二级品类Top 5分析

- 各二级品类 Top 5 渠道(UV + 回流率)
- 各渠道 Top 5 二级品类(UV + 回流率)
- 回流率 Top 20 渠道×二级品类组合
- 修复pandas deprecation warnings

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
yangxiaohui 2 месяцев назад
Родитель
Сommit
d45b483c03
1 измененных файлов с 69 добавлено и 10 удалено
  1. 69 10
      tasks/渠道效果分析/analyze.py

+ 69 - 10
tasks/渠道效果分析/analyze.py

@@ -78,7 +78,7 @@ daily_channel = df.groupby(['dt', 'channel']).apply(
     lambda x: pd.Series({
     lambda x: pd.Series({
         '点击uv': x['点击uv'].sum(),
         '点击uv': x['点击uv'].sum(),
         '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum()
         '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum()
-    })
+    }), include_groups=False
 ).reset_index()
 ).reset_index()
 
 
 main_channels = channel_stats[channel_stats['点击uv'] > 10000]['channel'].tolist()
 main_channels = channel_stats[channel_stats['点击uv'] > 10000]['channel'].tolist()
@@ -114,7 +114,7 @@ channel_category = df.groupby(['channel', 'merge一级品类']).apply(
     lambda x: pd.Series({
     lambda x: pd.Series({
         '点击uv': x['点击uv'].sum(),
         '点击uv': x['点击uv'].sum(),
         '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
         '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
-    })
+    }), include_groups=False
 ).reset_index()
 ).reset_index()
 
 
 pivot_uv = channel_category.pivot(index='merge一级品类', columns='channel', values='点击uv').fillna(0)
 pivot_uv = channel_category.pivot(index='merge一级品类', columns='channel', values='点击uv').fillna(0)
@@ -195,7 +195,7 @@ log()
 # 四、渠道×二级品类效果
 # 四、渠道×二级品类效果
 # ============================================================
 # ============================================================
 log("=" * 80)
 log("=" * 80)
-log("四、渠道×二级品类效果(Top 20)")
+log("四、渠道×二级品类效果")
 log("=" * 80)
 log("=" * 80)
 log()
 log()
 
 
@@ -205,21 +205,80 @@ if 'merge二级品类' in df.columns:
         lambda x: pd.Series({
         lambda x: pd.Series({
             '点击uv': x['点击uv'].sum(),
             '点击uv': x['点击uv'].sum(),
             '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
             '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
-        })
+        }), include_groups=False
     ).reset_index()
     ).reset_index()
 
 
-    # 过滤小样本,取回流率最高的
-    cat2_filtered = channel_cat2[channel_cat2['点击uv'] >= 500].nlargest(20, '回流率')
+    # 二级品类汇总(跨渠道)
+    cat2_stats = channel_cat2.groupby(['merge一级品类', 'merge二级品类']).agg({
+        '点击uv': 'sum'
+    }).reset_index()
+    cat2_stats = cat2_stats[cat2_stats['点击uv'] >= 500].sort_values('点击uv', ascending=False)
+
+    # 二级品类的回流率 pivot
+    pivot_cat2_ror = channel_cat2.pivot_table(
+        index=['merge一级品类', 'merge二级品类'],
+        columns='channel',
+        values='回流率'
+    )
+    pivot_cat2_uv = channel_cat2.pivot_table(
+        index=['merge一级品类', 'merge二级品类'],
+        columns='channel',
+        values='点击uv',
+        fill_value=0
+    )
+
+    valid_cat2 = [(row['merge一级品类'], row['merge二级品类']) for _, row in cat2_stats.head(30).iterrows()]
+
+    log("【各二级品类 Top 5 渠道】")
+    log("-" * 80)
 
 
-    header = f"{'渠道':<20} {'一级品类':<12} {'二级品类':<15} {'UV':>10} {'回流率':>8}"
-    log(header)
+    for cat1, cat2 in valid_cat2:
+        if (cat1, cat2) not in pivot_cat2_ror.index:
+            continue
+        row = pivot_cat2_ror.loc[(cat1, cat2), [c for c in valid_channels if c in pivot_cat2_ror.columns]].dropna().sort_values(ascending=False)
+        if len(row) == 0:
+            continue
+        cat2_name = f"{str(cat1)[:8]}/{str(cat2)[:10]}" if pd.notna(cat2) else str(cat1)[:15]
+        cat2_total_uv = int(pivot_cat2_uv.loc[(cat1, cat2)].sum())
+        log(f"  {cat2_name:<20} 总UV={cat2_total_uv:>10,}")
+        for ch in row.head(5).index:
+            ch_name = ch[:20] if pd.notna(ch) else ''
+            ch_uv = int(pivot_cat2_uv.loc[(cat1, cat2), ch]) if ch in pivot_cat2_uv.columns else 0
+            log(f"      {ch_name:<20} UV={ch_uv:>8,} 回流率={row[ch]:.1%}")
+        log()
+
+    log()
+
+    log("【各渠道 Top 5 二级品类】")
     log("-" * 80)
     log("-" * 80)
 
 
+    for ch in valid_channels:
+        if ch not in pivot_cat2_ror.columns:
+            continue
+        col = pivot_cat2_ror.loc[valid_cat2, ch].dropna().sort_values(ascending=False)
+        if len(col) == 0:
+            continue
+        ch_uv = int(channel_stats[channel_stats['channel'] == ch]['点击uv'].values[0])
+        log(f"  {ch:<25} UV={ch_uv:>10,}")
+        for (cat1, cat2) in col.head(5).index:
+            cat2_name = f"{str(cat1)[:8]}/{str(cat2)[:10]}" if pd.notna(cat2) else str(cat1)[:15]
+            cat2_uv = int(pivot_cat2_uv.loc[(cat1, cat2), ch]) if ch in pivot_cat2_uv.columns else 0
+            log(f"      {cat2_name:<20} UV={cat2_uv:>8,} 回流率={col[(cat1, cat2)]:.1%}")
+        log()
+
+    log()
+
+    # 回流率 Top 20
+    log("【回流率 Top 20 渠道×二级品类组合】")
+    log("-" * 80)
+    cat2_filtered = channel_cat2[channel_cat2['点击uv'] >= 500].nlargest(20, '回流率')
+    header = f"{'渠道':<20} {'一级品类':<10} {'二级品类':<12} {'UV':>10} {'回流率':>8}"
+    log(header)
+    log("-" * 80)
     for _, row in cat2_filtered.iterrows():
     for _, row in cat2_filtered.iterrows():
         cat1 = str(row['merge一级品类'])[:10] if pd.notna(row['merge一级品类']) else ''
         cat1 = str(row['merge一级品类'])[:10] if pd.notna(row['merge一级品类']) else ''
         cat2 = str(row['merge二级品类'])[:12] if pd.notna(row['merge二级品类']) else ''
         cat2 = str(row['merge二级品类'])[:12] if pd.notna(row['merge二级品类']) else ''
-        log(f"{row['channel']:<20} {cat1:<12} {cat2:<15} {int(row['点击uv']):>10,} {row['回流率']:>8.1%}")
-
+        log(f"{row['channel']:<20} {cat1:<10} {cat2:<12} {int(row['点击uv']):>10,} {row['回流率']:>8.1%}")
     log()
     log()
 
 
 # ============================================================
 # ============================================================