Explorar o código

refactor(渠道效果分析): 统一使用四位小数格式,增加气泡图

- 所有比率改为四位小数格式(如 0.1000)
- 新增气泡图:X轴=UV占比,Y轴=回流率,气泡大小=UV量级
- 更新热力图阈值判断(使用小数)
- 更新图例和坐标轴标签

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
yangxiaohui hai 2 meses
pai
achega
3ff1a3f811
Modificáronse 2 ficheiros con 87 adicións e 36 borrados
  1. 10 10
      tasks/渠道效果分析/analyze.py
  2. 77 26
      tasks/渠道效果分析/visualize.py

+ 10 - 10
tasks/渠道效果分析/analyze.py

@@ -61,7 +61,7 @@ log(header)
 log("-" * 80)
 
 for _, row in channel_stats.iterrows():
-    log(f"{row['channel']:<25} {int(row['点击uv']):>12,} {row['进入推荐率']:>10.1%} {row['再分享回流率']:>10.1%} {row['原视频再分享回流率']:>8.1%} {row['推荐再分享回流率']:>8.1%}")
+    log(f"{row['channel']:<25} {int(row['点击uv']):>12,} {row['进入推荐率']:>10.4f} {row['再分享回流率']:>10.4f} {row['原视频再分享回流率']:>8.4f} {row['推荐再分享回流率']:>8.4f}")
 
 log()
 
@@ -94,7 +94,7 @@ for ch in main_channels:
     for d in dates:
         if d in ch_data.index:
             rate = ch_data.loc[d, '回流率']
-            row_str += f"{rate:>8.1%}"
+            row_str += f"{rate:>8.4f}"
         else:
             row_str += f"{'--':>8}"
     log(row_str)
@@ -145,7 +145,7 @@ for category in valid_categories:
     for ch in valid_channels:
         if ch in pivot_ror.columns and pd.notna(pivot_ror.loc[category, ch]):
             rate = pivot_ror.loc[category, ch]
-            row_str += f"{rate:>12.1%}"
+            row_str += f"{rate:>12.4f}"
         else:
             row_str += f"{'--':>12}"
     log(row_str)
@@ -167,7 +167,7 @@ for category in valid_categories:
     for ch in row.head(5).index:
         ch_name = ch[:20] if pd.notna(ch) else ''
         ch_uv = int(pivot_uv.loc[category, ch]) if ch in pivot_uv.columns else 0
-        log(f"      {ch_name:<20} UV={ch_uv:>8,} 回流率={row[ch]:.1%}")
+        log(f"      {ch_name:<20} UV={ch_uv:>8,} 回流率={row[ch]:.4f}")
     log()
 
 log()
@@ -186,7 +186,7 @@ for ch in valid_channels:
     for cat in col.head(5).index:
         cat_name = str(cat)[:15] if pd.notna(cat) else '(空)'
         cat_uv = int(pivot_uv.loc[cat, ch]) if ch in pivot_uv.columns else 0
-        log(f"      {cat_name:<17} UV={cat_uv:>8,} 回流率={col[cat]:.1%}")
+        log(f"      {cat_name:<17} UV={cat_uv:>8,} 回流率={col[cat]:.4f}")
     log()
 
 log()
@@ -244,7 +244,7 @@ if 'merge二级品类' in df.columns:
         for ch in row.head(5).index:
             ch_name = ch[:20] if pd.notna(ch) else ''
             ch_uv = int(pivot_cat2_uv.loc[(cat1, cat2), ch]) if ch in pivot_cat2_uv.columns else 0
-            log(f"      {ch_name:<20} UV={ch_uv:>8,} 回流率={row[ch]:.1%}")
+            log(f"      {ch_name:<20} UV={ch_uv:>8,} 回流率={row[ch]:.4f}")
         log()
 
     log()
@@ -263,7 +263,7 @@ if 'merge二级品类' in df.columns:
         for (cat1, cat2) in col.head(5).index:
             cat2_name = f"{str(cat1)[:8]}/{str(cat2)[:10]}" if pd.notna(cat2) else str(cat1)[:15]
             cat2_uv = int(pivot_cat2_uv.loc[(cat1, cat2), ch]) if ch in pivot_cat2_uv.columns else 0
-            log(f"      {cat2_name:<20} UV={cat2_uv:>8,} 回流率={col[(cat1, cat2)]:.1%}")
+            log(f"      {cat2_name:<20} UV={cat2_uv:>8,} 回流率={col[(cat1, cat2)]:.4f}")
         log()
 
     log()
@@ -278,7 +278,7 @@ if 'merge二级品类' in df.columns:
     for _, row in cat2_filtered.iterrows():
         cat1 = str(row['merge一级品类'])[:10] if pd.notna(row['merge一级品类']) else ''
         cat2 = str(row['merge二级品类'])[:12] if pd.notna(row['merge二级品类']) else ''
-        log(f"{row['channel']:<20} {cat1:<10} {cat2:<12} {int(row['点击uv']):>10,} {row['回流率']:>8.1%}")
+        log(f"{row['channel']:<20} {cat1:<10} {cat2:<12} {int(row['点击uv']):>10,} {row['回流率']:>8.4f}")
     log()
 
 # ============================================================
@@ -295,8 +295,8 @@ avg_recommend = (df['进入推荐率'] * df['点击uv']).sum() / total_uv
 avg_return = (df['再分享回流率'] * df['点击uv']).sum() / total_uv
 
 log(f"总点击UV: {int(total_uv):,}")
-log(f"平均进入推荐率: {avg_recommend:.1%}")
-log(f"平均再分享回流率: {avg_return:.1%}")
+log(f"平均进入推荐率: {avg_recommend:.4f}")
+log(f"平均再分享回流率: {avg_return:.4f}")
 log(f"渠道数: {df['channel'].nunique()}")
 log(f"品类数: {df['merge一级品类'].nunique()}")
 log()

+ 77 - 26
tasks/渠道效果分析/visualize.py

@@ -76,7 +76,7 @@ for _, row in channel_stats.iterrows():
     channel_rows.append(
         f"<tr><td>{row['channel']}</td>"
         f"<td>{int(row['点击uv']):,}</td>"
-        f"<td>{row['再分享回流率']:.1%}</td>"
+        f"<td>{row['再分享回流率']:.4f}</td>"
         f"<td><div style='background:#007bff;height:20px;width:{bar_width}%'></div></td></tr>"
     )
 
@@ -84,9 +84,9 @@ for _, row in channel_stats.iterrows():
 def get_cell_class(val):
     if val is None:
         return ""
-    if val > 30:
+    if val > 0.30:
         return "high"
-    if val > 15:
+    if val > 0.15:
         return "medium"
     return "low"
 
@@ -96,9 +96,9 @@ for cat in valid_categories:
     cells = [f"<td>{str(cat)[:12]}</td>"]
     for ch in heatmap_cols:
         if ch in pivot_ror.columns and pd.notna(pivot_ror.loc[cat, ch]):
-            val = pivot_ror.loc[cat, ch] * 100
+            val = pivot_ror.loc[cat, ch]
             cls = get_cell_class(val)
-            cells.append(f'<td class="{cls}">{val:.1f}%</td>')
+            cells.append(f'<td class="{cls}">{val:.4f}</td>')
         else:
             cells.append("<td>-</td>")
     ror_rows.append("<tr>" + "".join(cells) + "</tr>")
@@ -149,7 +149,7 @@ for cat in valid_categories:
         if ch in pivot_recommend.columns and cat in pivot_recommend.index and pd.notna(pivot_recommend.loc[cat, ch]):
             val = pivot_recommend.loc[cat, ch]
             cls = get_recommend_class(val)
-            cells.append(f'<td class="{cls}">{val:.1%}</td>')
+            cells.append(f'<td class="{cls}">{val:.4f}</td>')
         else:
             cells.append("<td>-</td>")
     recommend_rows.append("<tr>" + "".join(cells) + "</tr>")
@@ -185,9 +185,9 @@ for cat2 in valid_cat2_labels:
     cells = [f"<td>{cat2[:15]}</td>"]
     for ch in heatmap_cols:
         if ch in pivot_cat2_ror.columns and pd.notna(pivot_cat2_ror.loc[cat2, ch]):
-            val = pivot_cat2_ror.loc[cat2, ch] * 100
+            val = pivot_cat2_ror.loc[cat2, ch]
             cls = get_cell_class(val)
-            cells.append(f'<td class="{cls}">{val:.1f}%</td>')
+            cells.append(f'<td class="{cls}">{val:.4f}</td>')
         else:
             cells.append("<td>-</td>")
     cat2_ror_rows.append("<tr>" + "".join(cells) + "</tr>")
@@ -220,17 +220,30 @@ for ch in top_channels:
         'values': [round(x * 100, 1) for x in ch_daily['回流率'].tolist()]
     }
 
-# 6. 散点图数据
+# 7. 散点图数据
 scatter_data = []
 for _, row in channel_category.iterrows():
     if row['点击uv'] >= 100:
         scatter_data.append({
             'x': int(row['点击uv']),
-            'y': round(row['回流率'] * 100, 1),
+            'y': round(row['回流率'], 4),
             'channel': row['channel'][:12],
             'category': str(row['merge一级品类'])[:10] if pd.notna(row['merge一级品类']) else ''
         })
 
+# 8. 气泡图数据(渠道:占比 vs 回流率)
+total_uv_all = df['点击uv'].sum()
+bubble_data = []
+for _, row in channel_stats.iterrows():
+    share = row['点击uv'] / total_uv_all
+    bubble_data.append({
+        'x': round(share, 4),  # UV占比
+        'y': round(row['再分享回流率'], 4),  # 回流率
+        'r': max(5, min(50, row['点击uv'] / 20000)),  # 气泡大小
+        'uv': int(row['点击uv']),
+        'channel': row['channel'][:15]
+    })
+
 # ============================================================
 # 生成 HTML
 # ============================================================
@@ -289,7 +302,7 @@ html_content = f"""<!DOCTYPE html>
                 <p>总点击UV</p>
             </div>
             <div class="stat-card">
-                <h3>{avg_ror:.1%}</h3>
+                <h3>{avg_ror:.4f}</h3>
                 <p>平均回流率</p>
             </div>
             <div class="stat-card">
@@ -313,9 +326,9 @@ html_content = f"""<!DOCTYPE html>
         <h2>2. 渠道×品类 回流率矩阵</h2>
         <div class="chart-container heatmap matrix-section">
             <div class="legend">
-                <span class="high">高 &gt;30%</span>
-                <span class="medium">中 15-30%</span>
-                <span class="low">低 &lt;15%</span>
+                <span class="high">高 &gt;0.30</span>
+                <span class="medium">中 0.15-0.30</span>
+                <span class="low">低 &lt;0.15</span>
             </div>
             <table>
                 {ror_header}
@@ -339,9 +352,9 @@ html_content = f"""<!DOCTYPE html>
         <h2>4. 渠道×品类 进入推荐率矩阵</h2>
         <div class="chart-container heatmap matrix-section">
             <div class="legend">
-                <span class="high">高 &gt;80%</span>
-                <span class="medium">中 70-80%</span>
-                <span class="low">低 &lt;70%</span>
+                <span class="high">高 &gt;0.80</span>
+                <span class="medium">中 0.70-0.80</span>
+                <span class="low">低 &lt;0.70</span>
             </div>
             <table>
                 {ror_header}
@@ -352,9 +365,9 @@ html_content = f"""<!DOCTYPE html>
         <h2>5. 渠道×二级品类 回流率矩阵</h2>
         <div class="chart-container heatmap matrix-section">
             <div class="legend">
-                <span class="high">高 &gt;30%</span>
-                <span class="medium">中 15-30%</span>
-                <span class="low">低 &lt;15%</span>
+                <span class="high">高 &gt;0.30</span>
+                <span class="medium">中 0.15-0.30</span>
+                <span class="low">低 &lt;0.15</span>
             </div>
             <table>
                 {cat2_ror_header}
@@ -375,22 +388,60 @@ html_content = f"""<!DOCTYPE html>
             </table>
         </div>
 
-        <h2>7. 每日回流率趋势</h2>
+        <h2>7. 渠道占比 vs 回流率(气泡图)</h2>
+        <div class="chart-container">
+            <canvas id="bubbleChart"></canvas>
+            <p style="font-size:12px;color:#666;margin-top:10px;">X轴=UV占比,Y轴=回流率,气泡大小=UV量级</p>
+        </div>
+
+        <h2>8. 每日回流率趋势</h2>
         <div class="chart-container">
             <canvas id="trendChart"></canvas>
         </div>
 
-        <h2>8. UV vs 回流率 散点分布</h2>
+        <h2>9. UV vs 回流率 散点分布</h2>
         <div class="chart-container">
             <canvas id="scatterChart"></canvas>
         </div>
     </div>
 
     <script>
+        const colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'];
+
+        // 气泡图
+        const bubbleCtx = document.getElementById('bubbleChart').getContext('2d');
+        const bubbleData = {json.dumps(bubble_data, ensure_ascii=False)};
+        new Chart(bubbleCtx, {{
+            type: 'bubble',
+            data: {{
+                datasets: bubbleData.map((d, i) => ({{
+                    label: d.channel,
+                    data: [{{ x: d.x, y: d.y, r: d.r }}],
+                    backgroundColor: colors[i % colors.length] + '80'
+                }}))
+            }},
+            options: {{
+                responsive: true,
+                scales: {{
+                    x: {{ title: {{ display: true, text: 'UV占比' }} }},
+                    y: {{ title: {{ display: true, text: '回流率' }} }}
+                }},
+                plugins: {{
+                    tooltip: {{
+                        callbacks: {{
+                            label: (ctx) => {{
+                                const d = bubbleData[ctx.datasetIndex];
+                                return d.channel + ': 占比=' + (d.x * 100).toFixed(1) + '%, 回流率=' + d.y.toFixed(4) + ', UV=' + d.uv.toLocaleString();
+                            }}
+                        }}
+                    }}
+                }}
+            }}
+        }});
+
         // 趋势图
         const trendCtx = document.getElementById('trendChart').getContext('2d');
         const trendData = {json.dumps(trend_data, ensure_ascii=False)};
-        const colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b'];
         const datasets = Object.keys(trendData).map((ch, i) => ({{
             label: ch.substring(0, 15),
             data: trendData[ch].values,
@@ -407,7 +458,7 @@ html_content = f"""<!DOCTYPE html>
             options: {{
                 responsive: true,
                 plugins: {{ legend: {{ position: 'top' }} }},
-                scales: {{ y: {{ title: {{ display: true, text: '回流率(%)' }} }} }}
+                scales: {{ y: {{ title: {{ display: true, text: '回流率' }} }} }}
             }}
         }});
 
@@ -427,14 +478,14 @@ html_content = f"""<!DOCTYPE html>
                 responsive: true,
                 scales: {{
                     x: {{ type: 'logarithmic', title: {{ display: true, text: '点击UV' }} }},
-                    y: {{ title: {{ display: true, text: '回流率(%)' }} }}
+                    y: {{ title: {{ display: true, text: '回流率' }} }}
                 }},
                 plugins: {{
                     tooltip: {{
                         callbacks: {{
                             label: (ctx) => {{
                                 const d = scatterData[ctx.dataIndex];
-                                return d.channel + ' - ' + d.category + ': UV=' + d.x + ', 回流率=' + d.y + '%';
+                                return d.channel + ' - ' + d.category + ': UV=' + d.x + ', 回流率=' + d.y;
                             }}
                         }}
                     }}