Просмотр исходного кода

fix(渠道效果分析): SQL直接输出核心指标,增加二级品类分析

- SQL 直接计算进入推荐率、回流率等指标
- 增加 merge二级品类 维度
- 新增渠道×二级品类效果 Top 20

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
yangxiaohui 2 месяцев назад
Родитель
Сommit
3a96e3b5f4
2 измененных файлов с 74 добавлено и 36 удалено
  1. 62 30
      tasks/渠道效果分析/analyze.py
  2. 12 6
      tasks/渠道效果分析/query.sql

+ 62 - 30
tasks/渠道效果分析/analyze.py

@@ -39,18 +39,21 @@ log("一、渠道整体表现")
 log("=" * 80)
 log()
 
+# 按渠道汇总(加权平均)
 channel_stats = df.groupby('channel').agg({
     '点击uv': 'sum',
-    '进入推荐uv': 'sum',
-    '再分享回流uv': 'sum',
-    '原视频回流uv': 'sum',
-    '推荐回流uv': 'sum'
+    '再分享回流uv': 'sum'
 }).reset_index()
 
-channel_stats['进入推荐率'] = channel_stats['进入推荐uv'] / channel_stats['点击uv']
-channel_stats['再分享回流率'] = channel_stats['再分享回流uv'] / (channel_stats['点击uv'] + 10)
-channel_stats['原视频回流率'] = channel_stats['原视频回流uv'] / (channel_stats['点击uv'] + 10)
-channel_stats['推荐回流率'] = channel_stats['推荐回流uv'] / (channel_stats['点击uv'] + 10)
+# 用加权方式计算整体指标
+for ch in channel_stats['channel']:
+    ch_df = df[df['channel'] == ch]
+    uv = ch_df['点击uv'].sum()
+    channel_stats.loc[channel_stats['channel'] == ch, '进入推荐率'] = (ch_df['进入推荐率'] * ch_df['点击uv']).sum() / uv
+    channel_stats.loc[channel_stats['channel'] == ch, '再分享回流率'] = (ch_df['再分享回流率'] * ch_df['点击uv']).sum() / uv
+    channel_stats.loc[channel_stats['channel'] == ch, '原视频再分享回流率'] = (ch_df['原视频再分享回流率'] * ch_df['点击uv']).sum() / uv
+    channel_stats.loc[channel_stats['channel'] == ch, '推荐再分享回流率'] = (ch_df['推荐再分享回流率'] * ch_df['点击uv']).sum() / uv
+
 channel_stats = channel_stats.sort_values('点击uv', ascending=False)
 
 header = f"{'渠道':<25} {'点击UV':>12} {'进入推荐率':>10} {'回流率':>10} {'原视频':>8} {'推荐':>8}"
@@ -58,7 +61,7 @@ log(header)
 log("-" * 80)
 
 for _, row in channel_stats.iterrows():
-    log(f"{row['channel']:<25} {int(row['点击uv']):>12,} {row['进入推荐率']:>10.1%} {row['再分享回流率']:>10.1%} {row['原视频回流率']:>8.1%} {row['推荐回流率']:>8.1%}")
+    log(f"{row['channel']:<25} {int(row['点击uv']):>12,} {row['进入推荐率']:>10.1%} {row['再分享回流率']:>10.1%} {row['原视频再分享回流率']:>8.1%} {row['推荐再分享回流率']:>8.1%}")
 
 log()
 
@@ -71,13 +74,13 @@ log("=" * 80)
 log()
 
 # 按日期和渠道汇总
-daily_channel = df.groupby(['dt', 'channel']).agg({
-    '点击uv': 'sum',
-    '再分享回流uv': 'sum'
-}).reset_index()
-daily_channel['回流率'] = daily_channel['再分享回流uv'] / (daily_channel['点击uv'] + 10)
+daily_channel = df.groupby(['dt', 'channel']).apply(
+    lambda x: pd.Series({
+        '点击uv': x['点击uv'].sum(),
+        '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum()
+    })
+).reset_index()
 
-# 只看主要渠道(UV > 10000)
 main_channels = channel_stats[channel_stats['点击uv'] > 10000]['channel'].tolist()
 dates = sorted(df['dt'].unique())
 
@@ -107,25 +110,23 @@ log("=" * 80)
 log()
 
 # 按渠道和品类汇总
-channel_category = df.groupby(['channel', 'merge一级品类']).agg({
-    '点击uv': 'sum',
-    '再分享回流uv': 'sum'
-}).reset_index()
-channel_category['回流率'] = channel_category['再分享回流uv'] / (channel_category['点击uv'] + 10)
+channel_category = df.groupby(['channel', 'merge一级品类']).apply(
+    lambda x: pd.Series({
+        '点击uv': x['点击uv'].sum(),
+        '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
+    })
+).reset_index()
 
-# 创建透视表
 pivot_uv = channel_category.pivot(index='merge一级品类', columns='channel', values='点击uv').fillna(0)
 pivot_ror = channel_category.pivot(index='merge一级品类', columns='channel', values='回流率')
 
-# 过滤小样本
 min_uv = 1000
 valid_categories = pivot_uv[pivot_uv.sum(axis=1) >= min_uv].index
-valid_channels = main_channels[:8]  # 取前8个主要渠道
+valid_channels = main_channels[:8]
 
 log(f"品类数: {len(valid_categories)}, 渠道数: {len(valid_channels)}")
 log()
 
-# 各品类最佳渠道
 log("【各品类最佳渠道】")
 log("-" * 80)
 
@@ -143,7 +144,6 @@ for category in valid_categories:
 
 log()
 
-# 各渠道最佳品类
 log("【各渠道最佳品类】")
 log("-" * 80)
 
@@ -162,20 +162,52 @@ for ch in valid_channels:
 log()
 
 # ============================================================
-# 四、汇总统计
+# 四、渠道×二级品类效果
+# ============================================================
+log("=" * 80)
+log("四、渠道×二级品类效果(Top 20)")
+log("=" * 80)
+log()
+
+# 按渠道和二级品类汇总
+if 'merge二级品类' in df.columns:
+    channel_cat2 = df.groupby(['channel', 'merge一级品类', 'merge二级品类']).apply(
+        lambda x: pd.Series({
+            '点击uv': x['点击uv'].sum(),
+            '回流率': (x['再分享回流率'] * x['点击uv']).sum() / x['点击uv'].sum() if x['点击uv'].sum() > 0 else 0
+        })
+    ).reset_index()
+
+    # 过滤小样本,取回流率最高的
+    cat2_filtered = channel_cat2[channel_cat2['点击uv'] >= 500].nlargest(20, '回流率')
+
+    header = f"{'渠道':<20} {'一级品类':<12} {'二级品类':<15} {'UV':>10} {'回流率':>8}"
+    log(header)
+    log("-" * 80)
+
+    for _, row in cat2_filtered.iterrows():
+        cat1 = str(row['merge一级品类'])[:10] if pd.notna(row['merge一级品类']) else ''
+        cat2 = str(row['merge二级品类'])[:12] if pd.notna(row['merge二级品类']) else ''
+        log(f"{row['channel']:<20} {cat1:<12} {cat2:<15} {int(row['点击uv']):>10,} {row['回流率']:>8.1%}")
+
+    log()
+
+# ============================================================
+# 五、汇总统计
 # ============================================================
 log("=" * 80)
-log("四、汇总统计")
+log("、汇总统计")
 log("=" * 80)
 log()
 
 total_uv = df['点击uv'].sum()
-total_recommend = df['进入推荐uv'].sum()
 total_return = df['再分享回流uv'].sum()
+avg_recommend = (df['进入推荐率'] * df['点击uv']).sum() / total_uv
+avg_return = (df['再分享回流率'] * df['点击uv']).sum() / total_uv
 
 log(f"总点击UV: {int(total_uv):,}")
-log(f"总进入推荐UV: {int(total_recommend):,} ({total_recommend/total_uv:.1%})")
-log(f"总再分享回流UV: {int(total_return):,} ({total_return/total_uv:.1%})")
+log(f"平均进入推荐率: {avg_recommend:.1%}")
+log(f"平均再分享回流率: {avg_return:.1%}")
 log(f"渠道数: {df['channel'].nunique()}")
 log(f"品类数: {df['merge一级品类'].nunique()}")
 log()

+ 12 - 6
tasks/渠道效果分析/query.sql

@@ -4,19 +4,25 @@
 SELECT  dt
         ,channel
         ,merge一级品类
+        ,merge二级品类
         ,COUNT(DISTINCT mid) AS 点击uv
-        ,COUNT(DISTINCT CASE WHEN 是否进入推荐 = '1' THEN mid END) AS 进入推荐uv
+        ,COUNT(DISTINCT CASE WHEN 是否进入推荐 = '1' THEN mid END) / COUNT(DISTINCT mid) AS 进入推荐率
+        ,(SUM(CASE WHEN 再分享群聊回流uv > 0 THEN 再分享群聊回流uv ELSE 0 END)
+          + SUM(CASE WHEN 再分享单聊回流uv > 0 THEN 再分享单聊回流uv ELSE 0 END)
+         ) / (COUNT(DISTINCT mid) + 10) AS 再分享回流率
+        ,(SUM(CASE WHEN 是否原视频 = '是' THEN 再分享群聊回流uv ELSE 0 END)
+          + SUM(CASE WHEN 是否原视频 = '是' THEN 再分享单聊回流uv ELSE 0 END)
+         ) / (COUNT(DISTINCT mid) + 10) AS 原视频再分享回流率
+        ,(SUM(CASE WHEN 是否原视频 = '否' THEN 再分享群聊回流uv ELSE 0 END)
+          + SUM(CASE WHEN 是否原视频 = '否' THEN 再分享单聊回流uv ELSE 0 END)
+         ) / (COUNT(DISTINCT mid) + 10) AS 推荐再分享回流率
         ,SUM(CASE WHEN 再分享群聊回流uv > 0 THEN 再分享群聊回流uv ELSE 0 END)
          + SUM(CASE WHEN 再分享单聊回流uv > 0 THEN 再分享单聊回流uv ELSE 0 END) AS 再分享回流uv
-        ,SUM(CASE WHEN 是否原视频 = '是' THEN 再分享群聊回流uv ELSE 0 END)
-         + SUM(CASE WHEN 是否原视频 = '是' THEN 再分享单聊回流uv ELSE 0 END) AS 原视频回流uv
-        ,SUM(CASE WHEN 是否原视频 = '否' THEN 再分享群聊回流uv ELSE 0 END)
-         + SUM(CASE WHEN 是否原视频 = '否' THEN 再分享单聊回流uv ELSE 0 END) AS 推荐回流uv
 FROM    loghubods.opengid_base_data
 WHERE   dt >= ${start}
 AND     dt <= ${end}
 AND     usersharedepth = 0
 AND     videoid IS NOT NULL
-GROUP BY dt, channel, merge一级品类
+GROUP BY dt, channel, merge一级品类, merge二级品类
 ORDER BY dt, channel, 点击uv DESC
 ;