Procházet zdrojové kódy

feat: 新增渠道效果分析任务

- 渠道整体表现(UV、进入推荐率、回流率)
- 渠道每日回流率趋势
- 渠道×品类效果矩阵
- 各品类最佳渠道、各渠道最佳品类

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
yangxiaohui před 2 měsíci
rodič
revize
7b3a2bcc9d

+ 187 - 0
tasks/渠道效果分析/analyze.py

@@ -0,0 +1,187 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+渠道效果分析
+分析不同渠道的整体表现、渠道×品类表现
+"""
+import pandas as pd
+import numpy as np
+from pathlib import Path
+
+task_dir = Path(__file__).parent
+output_dir = task_dir / "output"
+
+csv_files = [f for f in output_dir.glob("*.csv") if '_分析' not in f.name]
+if not csv_files:
+    print("没有找到数据文件,请先运行 query.sql")
+    exit(1)
+
+latest_file = max(csv_files, key=lambda x: x.stat().st_mtime)
+df = pd.read_csv(latest_file)
+
+lines = []
+
+
+def log(text=""):
+    print(text)
+    lines.append(text)
+
+
+log(f"分析文件: {latest_file.name}")
+log(f"时间范围: {df['dt'].min()} ~ {df['dt'].max()}")
+log()
+
+# ============================================================
+# 一、渠道整体表现
+# ============================================================
+log("=" * 80)
+log("一、渠道整体表现")
+log("=" * 80)
+log()
+
+channel_stats = df.groupby('channel').agg({
+    '点击uv': 'sum',
+    '进入推荐uv': 'sum',
+    '再分享回流uv': 'sum',
+    '原视频回流uv': 'sum',
+    '推荐回流uv': 'sum'
+}).reset_index()
+
+channel_stats['进入推荐率'] = channel_stats['进入推荐uv'] / channel_stats['点击uv']
+channel_stats['再分享回流率'] = channel_stats['再分享回流uv'] / (channel_stats['点击uv'] + 10)
+channel_stats['原视频回流率'] = channel_stats['原视频回流uv'] / (channel_stats['点击uv'] + 10)
+channel_stats['推荐回流率'] = channel_stats['推荐回流uv'] / (channel_stats['点击uv'] + 10)
+channel_stats = channel_stats.sort_values('点击uv', ascending=False)
+
+header = f"{'渠道':<25} {'点击UV':>12} {'进入推荐率':>10} {'回流率':>10} {'原视频':>8} {'推荐':>8}"
+log(header)
+log("-" * 80)
+
+for _, row in channel_stats.iterrows():
+    log(f"{row['channel']:<25} {int(row['点击uv']):>12,} {row['进入推荐率']:>10.1%} {row['再分享回流率']:>10.1%} {row['原视频回流率']:>8.1%} {row['推荐回流率']:>8.1%}")
+
+log()
+
+# ============================================================
+# 二、渠道每日趋势
+# ============================================================
+log("=" * 80)
+log("二、渠道每日回流率趋势")
+log("=" * 80)
+log()
+
+# 按日期和渠道汇总
+daily_channel = df.groupby(['dt', 'channel']).agg({
+    '点击uv': 'sum',
+    '再分享回流uv': 'sum'
+}).reset_index()
+daily_channel['回流率'] = daily_channel['再分享回流uv'] / (daily_channel['点击uv'] + 10)
+
+# 只看主要渠道(UV > 10000)
+main_channels = channel_stats[channel_stats['点击uv'] > 10000]['channel'].tolist()
+dates = sorted(df['dt'].unique())
+
+header = f"{'渠道':<25}" + "".join([f"{str(d)[-4:]:>8}" for d in dates])
+log(header)
+log("-" * 80)
+
+for ch in main_channels:
+    ch_data = daily_channel[daily_channel['channel'] == ch].set_index('dt')
+    row_str = f"{ch:<25}"
+    for d in dates:
+        if d in ch_data.index:
+            rate = ch_data.loc[d, '回流率']
+            row_str += f"{rate:>8.1%}"
+        else:
+            row_str += f"{'--':>8}"
+    log(row_str)
+
+log()
+
+# ============================================================
+# 三、渠道×品类效果矩阵
+# ============================================================
+log("=" * 80)
+log("三、渠道×品类效果矩阵(回流率)")
+log("=" * 80)
+log()
+
+# 按渠道和品类汇总
+channel_category = df.groupby(['channel', 'merge一级品类']).agg({
+    '点击uv': 'sum',
+    '再分享回流uv': 'sum'
+}).reset_index()
+channel_category['回流率'] = channel_category['再分享回流uv'] / (channel_category['点击uv'] + 10)
+
+# 创建透视表
+pivot_uv = channel_category.pivot(index='merge一级品类', columns='channel', values='点击uv').fillna(0)
+pivot_ror = channel_category.pivot(index='merge一级品类', columns='channel', values='回流率')
+
+# 过滤小样本
+min_uv = 1000
+valid_categories = pivot_uv[pivot_uv.sum(axis=1) >= min_uv].index
+valid_channels = main_channels[:8]  # 取前8个主要渠道
+
+log(f"品类数: {len(valid_categories)}, 渠道数: {len(valid_channels)}")
+log()
+
+# 各品类最佳渠道
+log("【各品类最佳渠道】")
+log("-" * 80)
+
+for category in valid_categories:
+    if category not in pivot_ror.index:
+        continue
+    row = pivot_ror.loc[category, [c for c in valid_channels if c in pivot_ror.columns]].dropna()
+    if len(row) == 0:
+        continue
+    best_ch = row.idxmax()
+    best_ror = row.max()
+    cat_uv = int(pivot_uv.loc[category].sum())
+    cat_name = str(category)[:15] if pd.notna(category) else '(空)'
+    log(f"  {cat_name:<17} UV={cat_uv:>8,} → {best_ch:<20} 回流率={best_ror:.1%}")
+
+log()
+
+# 各渠道最佳品类
+log("【各渠道最佳品类】")
+log("-" * 80)
+
+for ch in valid_channels:
+    if ch not in pivot_ror.columns:
+        continue
+    col = pivot_ror.loc[valid_categories, ch].dropna()
+    if len(col) == 0:
+        continue
+    best_cat = col.idxmax()
+    best_ror = col.max()
+    ch_uv = int(channel_stats[channel_stats['channel'] == ch]['点击uv'].values[0])
+    cat_name = str(best_cat)[:15] if pd.notna(best_cat) else '(空)'
+    log(f"  {ch:<25} UV={ch_uv:>10,} → {cat_name:<15} 回流率={best_ror:.1%}")
+
+log()
+
+# ============================================================
+# 四、汇总统计
+# ============================================================
+log("=" * 80)
+log("四、汇总统计")
+log("=" * 80)
+log()
+
+total_uv = df['点击uv'].sum()
+total_recommend = df['进入推荐uv'].sum()
+total_return = df['再分享回流uv'].sum()
+
+log(f"总点击UV: {int(total_uv):,}")
+log(f"总进入推荐UV: {int(total_recommend):,} ({total_recommend/total_uv:.1%})")
+log(f"总再分享回流UV: {int(total_return):,} ({total_return/total_uv:.1%})")
+log(f"渠道数: {df['channel'].nunique()}")
+log(f"品类数: {df['merge一级品类'].nunique()}")
+log()
+
+# 保存
+result_file = output_dir / f"{latest_file.stem}_分析.txt"
+with open(result_file, 'w', encoding='utf-8') as f:
+    f.write("\n".join(lines))
+log(f"结果已保存: {result_file}")

+ 22 - 0
tasks/渠道效果分析/query.sql

@@ -0,0 +1,22 @@
+-- 渠道效果分析
+-- 分析不同渠道的整体表现、渠道×品类表现
+
+SELECT  dt
+        ,channel
+        ,merge一级品类
+        ,COUNT(DISTINCT mid) AS 点击uv
+        ,COUNT(DISTINCT CASE WHEN 是否进入推荐 = '1' THEN mid END) AS 进入推荐uv
+        ,SUM(CASE WHEN 再分享群聊回流uv > 0 THEN 再分享群聊回流uv ELSE 0 END)
+         + SUM(CASE WHEN 再分享单聊回流uv > 0 THEN 再分享单聊回流uv ELSE 0 END) AS 再分享回流uv
+        ,SUM(CASE WHEN 是否原视频 = '是' THEN 再分享群聊回流uv ELSE 0 END)
+         + SUM(CASE WHEN 是否原视频 = '是' THEN 再分享单聊回流uv ELSE 0 END) AS 原视频回流uv
+        ,SUM(CASE WHEN 是否原视频 = '否' THEN 再分享群聊回流uv ELSE 0 END)
+         + SUM(CASE WHEN 是否原视频 = '否' THEN 再分享单聊回流uv ELSE 0 END) AS 推荐回流uv
+FROM    loghubods.opengid_base_data
+WHERE   dt >= ${start}
+AND     dt <= ${end}
+AND     usersharedepth = 0
+AND     videoid IS NOT NULL
+GROUP BY dt, channel, merge一级品类
+ORDER BY dt, channel, 点击uv DESC
+;