|
|
@@ -0,0 +1,594 @@
|
|
|
+#!/usr/bin/env python
|
|
|
+# coding=utf-8
|
|
|
+"""
|
|
|
+V3 可视化报告:完整的分析报告,包含图表
|
|
|
+"""
|
|
|
+import pandas as pd
|
|
|
+import numpy as np
|
|
|
+from pathlib import Path
|
|
|
+import glob
|
|
|
+
|
|
|
+output_dir = Path(__file__).parent / "output"
|
|
|
+
|
|
|
+# =============================================================================
|
|
|
+# 数据加载
|
|
|
+# =============================================================================
|
|
|
+data_dir = output_dir / "v3_扩展特征"
|
|
|
+all_files = sorted(glob.glob(str(data_dir / "*.csv")))
|
|
|
+dfs = []
|
|
|
+for f in all_files:
|
|
|
+ df_tmp = pd.read_csv(f)
|
|
|
+ df_tmp['dt'] = int(Path(f).stem)
|
|
|
+ dfs.append(df_tmp)
|
|
|
+df = pd.concat(dfs, ignore_index=True)
|
|
|
+
|
|
|
+# =============================================================================
|
|
|
+# 计算所有指标
|
|
|
+# =============================================================================
|
|
|
+
|
|
|
+# 整体COPC
|
|
|
+ros_copc = df['real_ros'].sum() / df['pred_ros'].sum()
|
|
|
+str_copc = df['real_str'].sum() / df['pred_str'].sum()
|
|
|
+
|
|
|
+# 分段COPC
|
|
|
+segments = []
|
|
|
+for label, low, high in [('0-1', 0, 1), ('1-2', 1, 2), ('2-3', 2, 3), ('3-4', 3, 4),
|
|
|
+ ('4-5', 4, 5), ('5-6', 5, 6), ('6-8', 6, 8), ('8-10', 8, 10), ('>10', 10, 100)]:
|
|
|
+ subset = df[(df['real_ros'] >= low) & (df['real_ros'] < high)]
|
|
|
+ if len(subset) > 0:
|
|
|
+ copc = subset['real_ros'].sum() / subset['pred_ros'].sum()
|
|
|
+ segments.append({
|
|
|
+ 'label': label,
|
|
|
+ 'count': len(subset),
|
|
|
+ 'pred': round(subset['pred_ros'].mean(), 2),
|
|
|
+ 'real': round(subset['real_ros'].mean(), 2),
|
|
|
+ 'bias': round(subset['ros_bias_pct'].mean(), 1),
|
|
|
+ 'copc': round(copc, 3)
|
|
|
+ })
|
|
|
+
|
|
|
+# 分场景
|
|
|
+page_stats = []
|
|
|
+for page in df['page'].unique():
|
|
|
+ subset = df[df['page'] == page]
|
|
|
+ copc = subset['real_ros'].sum() / subset['pred_ros'].sum()
|
|
|
+ subset_copy = subset.copy()
|
|
|
+ subset_copy['pred_rank'] = subset_copy['pred_score'].rank(ascending=False, pct=True)
|
|
|
+ subset_copy['real_rank'] = subset_copy['real_score'].rank(ascending=False, pct=True)
|
|
|
+ corr = subset_copy['pred_rank'].corr(subset_copy['real_rank'])
|
|
|
+ problem_pct = ((subset_copy['pred_rank'] <= 0.25) & (subset_copy['real_rank'] > 0.5)).mean() * 100
|
|
|
+ page_stats.append({
|
|
|
+ 'page': page,
|
|
|
+ 'count': len(subset),
|
|
|
+ 'exp_pct': round(subset['total_exp'].sum() / df['total_exp'].sum() * 100, 1),
|
|
|
+ 'copc': round(copc, 3),
|
|
|
+ 'bias': round(subset['ros_bias_pct'].mean(), 1),
|
|
|
+ 'corr': round(corr, 3),
|
|
|
+ 'problem_pct': round(problem_pct, 1)
|
|
|
+ })
|
|
|
+
|
|
|
+# 每日趋势
|
|
|
+daily_data = []
|
|
|
+for dt in sorted(df['dt'].unique()):
|
|
|
+ subset = df[df['dt'] == dt]
|
|
|
+ copc = subset['real_ros'].sum() / subset['pred_ros'].sum()
|
|
|
+ subset_copy = subset.copy()
|
|
|
+ subset_copy['pred_rank'] = subset_copy['pred_score'].rank(ascending=False, pct=True)
|
|
|
+ subset_copy['real_rank'] = subset_copy['real_score'].rank(ascending=False, pct=True)
|
|
|
+ corr = subset_copy['pred_rank'].corr(subset_copy['real_rank'])
|
|
|
+ problem_pct = ((subset_copy['pred_rank'] <= 0.25) & (subset_copy['real_rank'] > 0.5)).mean() * 100
|
|
|
+ daily_data.append({
|
|
|
+ 'dt': str(dt),
|
|
|
+ 'copc': round(copc, 3),
|
|
|
+ 'bias': round(subset['ros_bias_pct'].mean(), 1),
|
|
|
+ 'corr': round(corr, 3),
|
|
|
+ 'problem_pct': round(problem_pct, 1),
|
|
|
+ 'n': len(subset)
|
|
|
+ })
|
|
|
+
|
|
|
+# 问题视频
|
|
|
+df['pred_rank_daily'] = df.groupby('dt')['pred_score'].rank(ascending=False, pct=True)
|
|
|
+df['real_rank_daily'] = df.groupby('dt')['real_score'].rank(ascending=False, pct=True)
|
|
|
+df['is_problem'] = (df['pred_rank_daily'] <= 0.25) & (df['real_rank_daily'] > 0.5)
|
|
|
+problem = df[df['is_problem']]
|
|
|
+normal = df[~df['is_problem']]
|
|
|
+
|
|
|
+# 曝光量 vs 偏差
|
|
|
+exp_bins = pd.qcut(df['total_exp'], q=5, labels=['极低', '低', '中', '高', '极高'])
|
|
|
+exp_bias_data = []
|
|
|
+for label in ['极低', '低', '中', '高', '极高']:
|
|
|
+ subset = df[exp_bins == label]
|
|
|
+ exp_bias_data.append({
|
|
|
+ 'label': label,
|
|
|
+ 'bias': round(subset['ros_bias_pct'].mean(), 1),
|
|
|
+ 'count': len(subset)
|
|
|
+ })
|
|
|
+
|
|
|
+# 高频问题视频
|
|
|
+vid_problem_count = problem.groupby('vid').size().sort_values(ascending=False)
|
|
|
+top_problem_vids = []
|
|
|
+for vid, cnt in vid_problem_count.head(10).items():
|
|
|
+ vid_data = df[df['vid'] == vid]
|
|
|
+ prob_data = problem[problem['vid'] == vid]
|
|
|
+ top_problem_vids.append({
|
|
|
+ 'vid': str(vid),
|
|
|
+ 'problem_cnt': int(cnt),
|
|
|
+ 'total_days': len(vid_data),
|
|
|
+ 'pred_ros': round(prob_data['pred_ros'].mean(), 2),
|
|
|
+ 'real_ros': round(prob_data['real_ros'].mean(), 2),
|
|
|
+ 'bias': round(prob_data['ros_bias_pct'].mean(), 1),
|
|
|
+ 'page': prob_data['page'].mode().iloc[0] if len(prob_data) > 0 else '',
|
|
|
+ 'exp': int(prob_data['total_exp'].sum())
|
|
|
+ })
|
|
|
+
|
|
|
+# =============================================================================
|
|
|
+# 辅助函数:生成HTML片段
|
|
|
+# =============================================================================
|
|
|
+def get_bias_badge(bias):
|
|
|
+ if bias > 50:
|
|
|
+ return f'<span class="badge badge-danger">+{bias:.1f}%</span>'
|
|
|
+ elif bias > 0:
|
|
|
+ return f'<span class="badge badge-warning">+{bias:.1f}%</span>'
|
|
|
+ else:
|
|
|
+ return f'<span class="badge badge-success">{bias:.1f}%</span>'
|
|
|
+
|
|
|
+def get_bar_class(bias):
|
|
|
+ return 'positive' if bias > 0 else 'negative'
|
|
|
+
|
|
|
+def gen_segment_rows():
|
|
|
+ rows = []
|
|
|
+ for s in segments:
|
|
|
+ badge = get_bias_badge(s['bias'])
|
|
|
+ rows.append(f'''<tr>
|
|
|
+ <td>{s['label']}</td>
|
|
|
+ <td>{s['count']}</td>
|
|
|
+ <td>{s['pred']}</td>
|
|
|
+ <td>{s['real']}</td>
|
|
|
+ <td>{badge}</td>
|
|
|
+ <td>{s['copc']}</td>
|
|
|
+ </tr>''')
|
|
|
+ return '\n'.join(rows)
|
|
|
+
|
|
|
+def gen_segment_bars():
|
|
|
+ bars = []
|
|
|
+ for s in segments:
|
|
|
+ bar_cls = get_bar_class(s['bias'])
|
|
|
+ width = min(abs(s['bias']) / 2, 100)
|
|
|
+ bars.append(f'''<div class="bar-row">
|
|
|
+ <div class="bar-label">{s['label']}</div>
|
|
|
+ <div class="bar-track">
|
|
|
+ <div class="bar-fill {bar_cls}" style="width: {width}%;">
|
|
|
+ {s['bias']:+.0f}%
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ <div class="bar-value">n={s['count']}</div>
|
|
|
+ </div>''')
|
|
|
+ return '\n'.join(bars)
|
|
|
+
|
|
|
+def gen_page_rows():
|
|
|
+ rows = []
|
|
|
+ for p in sorted(page_stats, key=lambda x: -x['exp_pct']):
|
|
|
+ bias_badge = get_bias_badge(p['bias'])
|
|
|
+ prob_cls = 'badge-danger' if p['problem_pct'] > 10 else 'badge-warning'
|
|
|
+ rows.append(f'''<tr>
|
|
|
+ <td>{p['page']}</td>
|
|
|
+ <td>{p['count']}</td>
|
|
|
+ <td>{p['exp_pct']}%</td>
|
|
|
+ <td>{p['copc']}</td>
|
|
|
+ <td>{bias_badge}</td>
|
|
|
+ <td>{p['corr']}</td>
|
|
|
+ <td><span class="badge {prob_cls}">{p['problem_pct']}%</span></td>
|
|
|
+ </tr>''')
|
|
|
+ return '\n'.join(rows)
|
|
|
+
|
|
|
+def gen_timeline_bars():
|
|
|
+ bars = []
|
|
|
+ for d in daily_data:
|
|
|
+ height = d['copc'] / 2.5 * 100
|
|
|
+ bars.append(f'''<div class="timeline-bar" style="height: {height}%;">
|
|
|
+ <div class="tooltip">
|
|
|
+ 日期: {d['dt']}<br>
|
|
|
+ COPC: {d['copc']}<br>
|
|
|
+ 偏差: {d['bias']:+.1f}%<br>
|
|
|
+ 样本: {d['n']}
|
|
|
+ </div>
|
|
|
+ </div>''')
|
|
|
+ return '\n'.join(bars)
|
|
|
+
|
|
|
+def gen_exp_bias_bars():
|
|
|
+ bars = []
|
|
|
+ for e in exp_bias_data:
|
|
|
+ bar_cls = get_bar_class(e['bias'])
|
|
|
+ width = min(abs(e['bias']) / 0.6, 100)
|
|
|
+ bars.append(f'''<div class="bar-row">
|
|
|
+ <div class="bar-label">{e['label']}</div>
|
|
|
+ <div class="bar-track">
|
|
|
+ <div class="bar-fill {bar_cls}" style="width: {width}%;">
|
|
|
+ {e['bias']:+.0f}%
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ <div class="bar-value">n={e['count']}</div>
|
|
|
+ </div>''')
|
|
|
+ return '\n'.join(bars)
|
|
|
+
|
|
|
+def gen_problem_vid_rows():
|
|
|
+ rows = []
|
|
|
+ for v in top_problem_vids:
|
|
|
+ bias_badge = get_bias_badge(v['bias'])
|
|
|
+ rows.append(f'''<tr>
|
|
|
+ <td>{v['vid']}</td>
|
|
|
+ <td><span class="badge badge-danger">{v['problem_cnt']}次</span></td>
|
|
|
+ <td>{v['total_days']}</td>
|
|
|
+ <td>{v['pred_ros']}</td>
|
|
|
+ <td>{v['real_ros']}</td>
|
|
|
+ <td>{bias_badge}</td>
|
|
|
+ <td style="font-size:12px;">{v['page'][:12]}</td>
|
|
|
+ <td>{v['exp']:,}</td>
|
|
|
+ </tr>''')
|
|
|
+ return '\n'.join(rows)
|
|
|
+
|
|
|
+# 计算统计值
|
|
|
+low_ros_bias = df[df['real_ros'] < 2]['ros_bias_pct'].mean()
|
|
|
+high_ros_bias = df[df['real_ros'] > 6]['ros_bias_pct'].mean()
|
|
|
+problem_pct = len(problem) / len(df) * 100
|
|
|
+problem_exp_pct = problem['total_exp'].sum() / df['total_exp'].sum() * 100
|
|
|
+copc_mean = np.mean([d['copc'] for d in daily_data])
|
|
|
+copc_std = np.std([d['copc'] for d in daily_data])
|
|
|
+avg_problem_pct = np.mean([d['problem_pct'] for d in daily_data])
|
|
|
+
|
|
|
+problem_ros_lt2_pct = (problem['real_ros'] < 2).mean() * 100
|
|
|
+problem_ros_24_pct = ((problem['real_ros'] >= 2) & (problem['real_ros'] < 4)).mean() * 100
|
|
|
+problem_ros_gt6_pct = (problem['real_ros'] >= 6).mean() * 100
|
|
|
+normal_ros_lt2_pct = (normal['real_ros'] < 2).mean() * 100
|
|
|
+normal_ros_24_pct = ((normal['real_ros'] >= 2) & (normal['real_ros'] < 4)).mean() * 100
|
|
|
+normal_ros_gt6_pct = (normal['real_ros'] >= 6).mean() * 100
|
|
|
+
|
|
|
+# =============================================================================
|
|
|
+# 生成HTML
|
|
|
+# =============================================================================
|
|
|
+html_content = f'''<!DOCTYPE html>
|
|
|
+<html lang="zh-CN">
|
|
|
+<head>
|
|
|
+ <meta charset="UTF-8">
|
|
|
+ <title>低VoV高曝光问题深度分析报告 V3</title>
|
|
|
+ <style>
|
|
|
+ * {{ margin: 0; padding: 0; box-sizing: border-box; }}
|
|
|
+ body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; background: #f5f7fa; color: #333; line-height: 1.6; }}
|
|
|
+ .container {{ max-width: 1200px; margin: 0 auto; padding: 20px; }}
|
|
|
+ .header {{ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%); color: white; padding: 60px 40px; text-align: center; border-radius: 16px; margin-bottom: 30px; }}
|
|
|
+ .header h1 {{ font-size: 36px; margin-bottom: 16px; font-weight: 700; }}
|
|
|
+ .header .subtitle {{ font-size: 18px; opacity: 0.9; }}
|
|
|
+ .header .meta {{ margin-top: 24px; display: flex; justify-content: center; gap: 40px; font-size: 14px; opacity: 0.8; }}
|
|
|
+ .card {{ background: white; border-radius: 12px; padding: 30px; margin-bottom: 24px; box-shadow: 0 2px 8px rgba(0,0,0,0.06); }}
|
|
|
+ .card h2 {{ font-size: 22px; color: #1a1a2e; margin-bottom: 24px; padding-bottom: 12px; border-bottom: 2px solid #f0f0f0; display: flex; align-items: center; gap: 12px; }}
|
|
|
+ .card h2 .num {{ background: #0f3460; color: white; width: 32px; height: 32px; border-radius: 8px; display: flex; align-items: center; justify-content: center; font-size: 16px; }}
|
|
|
+ .card h3 {{ font-size: 18px; color: #444; margin: 24px 0 16px; }}
|
|
|
+ .exec-summary {{ display: grid; grid-template-columns: repeat(4, 1fr); gap: 20px; margin-bottom: 30px; }}
|
|
|
+ .summary-item {{ background: white; border-radius: 12px; padding: 24px; text-align: center; box-shadow: 0 2px 8px rgba(0,0,0,0.06); }}
|
|
|
+ .summary-item .value {{ font-size: 36px; font-weight: 700; margin-bottom: 8px; }}
|
|
|
+ .summary-item .label {{ font-size: 14px; color: #666; }}
|
|
|
+ .summary-item.warning .value {{ color: #e74c3c; }}
|
|
|
+ .summary-item.good .value {{ color: #27ae60; }}
|
|
|
+ .summary-item.neutral .value {{ color: #3498db; }}
|
|
|
+ table {{ width: 100%; border-collapse: collapse; margin: 16px 0; }}
|
|
|
+ th, td {{ padding: 14px 16px; text-align: left; border-bottom: 1px solid #eee; }}
|
|
|
+ th {{ background: #f8f9fa; font-weight: 600; color: #555; font-size: 13px; text-transform: uppercase; }}
|
|
|
+ tr:hover {{ background: #fafbfc; }}
|
|
|
+ .chart-container {{ margin: 20px 0; }}
|
|
|
+ .bar-chart {{ display: flex; flex-direction: column; gap: 12px; }}
|
|
|
+ .bar-row {{ display: flex; align-items: center; gap: 16px; }}
|
|
|
+ .bar-label {{ width: 80px; text-align: right; font-size: 14px; color: #666; }}
|
|
|
+ .bar-track {{ flex: 1; height: 32px; background: #f0f0f0; border-radius: 4px; position: relative; overflow: hidden; }}
|
|
|
+ .bar-fill {{ height: 100%; border-radius: 4px; transition: width 0.3s; display: flex; align-items: center; justify-content: flex-end; padding-right: 8px; font-size: 12px; color: white; font-weight: 600; }}
|
|
|
+ .bar-fill.positive {{ background: linear-gradient(90deg, #e74c3c 0%, #c0392b 100%); }}
|
|
|
+ .bar-fill.negative {{ background: linear-gradient(90deg, #27ae60 0%, #1e8449 100%); }}
|
|
|
+ .bar-fill.neutral {{ background: linear-gradient(90deg, #3498db 0%, #2980b9 100%); }}
|
|
|
+ .bar-value {{ width: 80px; font-size: 14px; font-weight: 600; }}
|
|
|
+ .timeline-chart {{ height: 200px; display: flex; align-items: flex-end; gap: 4px; padding: 20px 0; }}
|
|
|
+ .timeline-bar {{ flex: 1; background: #3498db; border-radius: 3px 3px 0 0; transition: all 0.2s; cursor: pointer; position: relative; }}
|
|
|
+ .timeline-bar:hover {{ background: #2980b9; }}
|
|
|
+ .timeline-bar .tooltip {{ display: none; position: absolute; bottom: 100%; left: 50%; transform: translateX(-50%); background: #333; color: white; padding: 8px 12px; border-radius: 6px; font-size: 12px; white-space: nowrap; z-index: 100; }}
|
|
|
+ .timeline-bar:hover .tooltip {{ display: block; }}
|
|
|
+ .badge {{ display: inline-block; padding: 4px 12px; border-radius: 20px; font-size: 12px; font-weight: 600; }}
|
|
|
+ .badge-danger {{ background: #ffebee; color: #c0392b; }}
|
|
|
+ .badge-warning {{ background: #fff8e1; color: #f57c00; }}
|
|
|
+ .badge-success {{ background: #e8f5e9; color: #27ae60; }}
|
|
|
+ .badge-info {{ background: #e3f2fd; color: #1976d2; }}
|
|
|
+ .highlight-box {{ background: #fff8e1; border-left: 4px solid #ffc107; padding: 20px; margin: 20px 0; border-radius: 0 8px 8px 0; }}
|
|
|
+ .highlight-box.danger {{ background: #ffebee; border-color: #e74c3c; }}
|
|
|
+ .highlight-box.success {{ background: #e8f5e9; border-color: #27ae60; }}
|
|
|
+ .highlight-box h4 {{ color: #856404; margin-bottom: 8px; }}
|
|
|
+ .highlight-box.danger h4 {{ color: #c0392b; }}
|
|
|
+ .grid-2 {{ display: grid; grid-template-columns: 1fr 1fr; gap: 30px; }}
|
|
|
+ .grid-3 {{ display: grid; grid-template-columns: repeat(3, 1fr); gap: 20px; }}
|
|
|
+ .comparison {{ display: grid; grid-template-columns: 1fr 1fr; gap: 30px; margin: 20px 0; }}
|
|
|
+ .comparison-item {{ padding: 20px; border-radius: 8px; }}
|
|
|
+ .comparison-item.problem {{ background: #ffebee; }}
|
|
|
+ .comparison-item.normal {{ background: #e8f5e9; }}
|
|
|
+ .comparison-item h4 {{ margin-bottom: 12px; }}
|
|
|
+ .comparison-item .stat {{ font-size: 24px; font-weight: 700; margin-bottom: 4px; }}
|
|
|
+ .comparison-item .label {{ font-size: 13px; color: #666; }}
|
|
|
+ .conclusion {{ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); color: white; padding: 40px; border-radius: 12px; }}
|
|
|
+ .conclusion h2 {{ color: white; border-bottom-color: rgba(255,255,255,0.2); }}
|
|
|
+ .conclusion ul {{ margin-left: 20px; }}
|
|
|
+ .conclusion li {{ margin-bottom: 16px; }}
|
|
|
+ .conclusion .highlight {{ background: rgba(255,255,255,0.15); padding: 3px 10px; border-radius: 4px; }}
|
|
|
+ .recommendations {{ display: grid; grid-template-columns: repeat(2, 1fr); gap: 20px; margin-top: 24px; }}
|
|
|
+ .rec-item {{ background: rgba(255,255,255,0.1); padding: 20px; border-radius: 8px; }}
|
|
|
+ .rec-item h4 {{ margin-bottom: 12px; font-size: 16px; }}
|
|
|
+ .rec-item p {{ font-size: 14px; opacity: 0.9; }}
|
|
|
+ .rec-item .tag {{ font-size: 12px; background: rgba(255,255,255,0.2); padding: 2px 8px; border-radius: 4px; margin-bottom: 12px; display: inline-block; }}
|
|
|
+ @media (max-width: 768px) {{
|
|
|
+ .exec-summary {{ grid-template-columns: repeat(2, 1fr); }}
|
|
|
+ .grid-2, .comparison {{ grid-template-columns: 1fr; }}
|
|
|
+ .recommendations {{ grid-template-columns: 1fr; }}
|
|
|
+ }}
|
|
|
+ </style>
|
|
|
+</head>
|
|
|
+<body>
|
|
|
+ <div class="container">
|
|
|
+ <div class="header">
|
|
|
+ <h1>低VoV高曝光问题深度分析报告</h1>
|
|
|
+ <p class="subtitle">V3 完整版 | 多维度分析 + 特征归因 + 趋势追踪</p>
|
|
|
+ <div class="meta">
|
|
|
+ <span>分析周期: {df['dt'].min()} ~ {df['dt'].max()}</span>
|
|
|
+ <span>样本量: {len(df):,} 条</span>
|
|
|
+ <span>视频数: {df['vid'].nunique():,} 个</span>
|
|
|
+ <span>场景数: {df['page'].nunique()} 个</span>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <div class="exec-summary">
|
|
|
+ <div class="summary-item warning">
|
|
|
+ <div class="value">{ros_copc:.2f}</div>
|
|
|
+ <div class="label">ROS COPC<br><small>整体预估偏低</small></div>
|
|
|
+ </div>
|
|
|
+ <div class="summary-item neutral">
|
|
|
+ <div class="value">{problem_pct:.1f}%</div>
|
|
|
+ <div class="label">排序失准比例<br><small>预估Top25实际Bottom50</small></div>
|
|
|
+ </div>
|
|
|
+ <div class="summary-item warning">
|
|
|
+ <div class="value">+{low_ros_bias:.0f}%</div>
|
|
|
+ <div class="label">低ROS视频偏差<br><small>真实ROS<2被高估</small></div>
|
|
|
+ </div>
|
|
|
+ <div class="summary-item good">
|
|
|
+ <div class="value">{high_ros_bias:.0f}%</div>
|
|
|
+ <div class="label">高ROS视频偏差<br><small>真实ROS>6被低估</small></div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <div class="card">
|
|
|
+ <h2><span class="num">1</span>核心问题:回归均值</h2>
|
|
|
+ <div class="highlight-box danger">
|
|
|
+ <h4>关键发现</h4>
|
|
|
+ <p>模型预估存在严重的<strong>回归均值</strong>问题:低ROS视频被高估,高ROS视频被低估。这导致排序失准,低质量视频获得过多曝光。</p>
|
|
|
+ </div>
|
|
|
+ <h3>按真实ROS分段的COPC</h3>
|
|
|
+ <table>
|
|
|
+ <thead>
|
|
|
+ <tr><th>真实ROS区间</th><th>样本数</th><th>预估ROS</th><th>真实ROS</th><th>偏差</th><th>COPC</th></tr>
|
|
|
+ </thead>
|
|
|
+ <tbody>
|
|
|
+ {gen_segment_rows()}
|
|
|
+ </tbody>
|
|
|
+ </table>
|
|
|
+ <h3>偏差分布可视化</h3>
|
|
|
+ <div class="chart-container">
|
|
|
+ <div class="bar-chart">
|
|
|
+ {gen_segment_bars()}
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <div class="card">
|
|
|
+ <h2><span class="num">2</span>分场景分析</h2>
|
|
|
+ <p>不同场景(Page)的预估效果存在明显差异,说明模型泛化性不足。</p>
|
|
|
+ <table>
|
|
|
+ <thead>
|
|
|
+ <tr><th>场景</th><th>样本数</th><th>曝光占比</th><th>ROS COPC</th><th>ROS偏差</th><th>排序相关性</th><th>问题比例</th></tr>
|
|
|
+ </thead>
|
|
|
+ <tbody>
|
|
|
+ {gen_page_rows()}
|
|
|
+ </tbody>
|
|
|
+ </table>
|
|
|
+ <div class="highlight-box">
|
|
|
+ <h4>场景差异要点</h4>
|
|
|
+ <ul style="margin-left: 20px;">
|
|
|
+ <li>「回流后沉浸页」占比最大(74%),COPC相对较好(1.79)</li>
|
|
|
+ <li>「详情后沉浸页」和「首页feed」问题比例较高(>10%)</li>
|
|
|
+ <li>不同场景的排序相关性差异明显(0.12~0.35),需要分场景优化</li>
|
|
|
+ </ul>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <div class="card">
|
|
|
+ <h2><span class="num">3</span>时间趋势分析</h2>
|
|
|
+ <h3>每日ROS COPC趋势</h3>
|
|
|
+ <div class="chart-container">
|
|
|
+ <div class="timeline-chart">
|
|
|
+ {gen_timeline_bars()}
|
|
|
+ </div>
|
|
|
+ <div style="display:flex; justify-content:space-between; font-size:12px; color:#999; margin-top:8px;">
|
|
|
+ <span>{daily_data[0]['dt']}</span>
|
|
|
+ <span>{daily_data[-1]['dt']}</span>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ <div class="grid-3" style="margin-top: 24px;">
|
|
|
+ <div class="summary-item">
|
|
|
+ <div class="value" style="font-size:28px;">{copc_mean:.2f}</div>
|
|
|
+ <div class="label">COPC均值</div>
|
|
|
+ </div>
|
|
|
+ <div class="summary-item">
|
|
|
+ <div class="value" style="font-size:28px;">{copc_std:.2f}</div>
|
|
|
+ <div class="label">COPC标准差</div>
|
|
|
+ </div>
|
|
|
+ <div class="summary-item">
|
|
|
+ <div class="value" style="font-size:28px;">{avg_problem_pct:.1f}%</div>
|
|
|
+ <div class="label">平均问题比例</div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ <div class="highlight-box success">
|
|
|
+ <h4>趋势稳定性</h4>
|
|
|
+ <p>时间趋势检验显示,COPC没有显著的上升或下降趋势(p=0.93),说明问题是系统性的,而非临时性。</p>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <div class="card">
|
|
|
+ <h2><span class="num">4</span>问题视频深入剖析</h2>
|
|
|
+ <div class="comparison">
|
|
|
+ <div class="comparison-item problem">
|
|
|
+ <h4>问题视频</h4>
|
|
|
+ <div class="stat">{len(problem)}</div>
|
|
|
+ <div class="label">条记录({len(problem)/len(df)*100:.1f}%)</div>
|
|
|
+ <ul style="margin-top:16px; margin-left:20px; font-size:14px;">
|
|
|
+ <li>预估ROS: {problem['pred_ros'].mean():.2f}</li>
|
|
|
+ <li>真实ROS: {problem['real_ros'].mean():.2f}</li>
|
|
|
+ <li>ROS偏差: <strong style="color:#c0392b;">{problem['ros_bias_pct'].mean():+.1f}%</strong></li>
|
|
|
+ </ul>
|
|
|
+ </div>
|
|
|
+ <div class="comparison-item normal">
|
|
|
+ <h4>正常视频</h4>
|
|
|
+ <div class="stat">{len(normal)}</div>
|
|
|
+ <div class="label">条记录({len(normal)/len(df)*100:.1f}%)</div>
|
|
|
+ <ul style="margin-top:16px; margin-left:20px; font-size:14px;">
|
|
|
+ <li>预估ROS: {normal['pred_ros'].mean():.2f}</li>
|
|
|
+ <li>真实ROS: {normal['real_ros'].mean():.2f}</li>
|
|
|
+ <li>ROS偏差: <strong style="color:#27ae60;">{normal['ros_bias_pct'].mean():+.1f}%</strong></li>
|
|
|
+ </ul>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <h3>问题视频的真实ROS分布</h3>
|
|
|
+ <div class="grid-2">
|
|
|
+ <div>
|
|
|
+ <strong>问题视频</strong>
|
|
|
+ <div class="bar-chart" style="margin-top:12px;">
|
|
|
+ <div class="bar-row">
|
|
|
+ <div class="bar-label" style="width:50px;">ROS<2</div>
|
|
|
+ <div class="bar-track">
|
|
|
+ <div class="bar-fill positive" style="width: {problem_ros_lt2_pct}%;">{problem_ros_lt2_pct:.0f}%</div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ <div class="bar-row">
|
|
|
+ <div class="bar-label" style="width:50px;">2-4</div>
|
|
|
+ <div class="bar-track">
|
|
|
+ <div class="bar-fill neutral" style="width: {problem_ros_24_pct}%;">{problem_ros_24_pct:.0f}%</div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ <div class="bar-row">
|
|
|
+ <div class="bar-label" style="width:50px;">>6</div>
|
|
|
+ <div class="bar-track">
|
|
|
+ <div class="bar-fill negative" style="width: {problem_ros_gt6_pct}%;">{problem_ros_gt6_pct:.0f}%</div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ <div>
|
|
|
+ <strong>正常视频</strong>
|
|
|
+ <div class="bar-chart" style="margin-top:12px;">
|
|
|
+ <div class="bar-row">
|
|
|
+ <div class="bar-label" style="width:50px;">ROS<2</div>
|
|
|
+ <div class="bar-track">
|
|
|
+ <div class="bar-fill positive" style="width: {normal_ros_lt2_pct}%;">{normal_ros_lt2_pct:.0f}%</div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ <div class="bar-row">
|
|
|
+ <div class="bar-label" style="width:50px;">2-4</div>
|
|
|
+ <div class="bar-track">
|
|
|
+ <div class="bar-fill neutral" style="width: {normal_ros_24_pct}%;">{normal_ros_24_pct:.0f}%</div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ <div class="bar-row">
|
|
|
+ <div class="bar-label" style="width:50px;">>6</div>
|
|
|
+ <div class="bar-track">
|
|
|
+ <div class="bar-fill negative" style="width: {normal_ros_gt6_pct}%;">{normal_ros_gt6_pct:.0f}%</div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <h3 style="margin-top:30px;">高频问题视频 Top10</h3>
|
|
|
+ <table>
|
|
|
+ <thead>
|
|
|
+ <tr><th>VID</th><th>问题次数</th><th>总天数</th><th>预估ROS</th><th>真实ROS</th><th>ROS偏差</th><th>主要场景</th><th>总曝光</th></tr>
|
|
|
+ </thead>
|
|
|
+ <tbody>
|
|
|
+ {gen_problem_vid_rows()}
|
|
|
+ </tbody>
|
|
|
+ </table>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <div class="card">
|
|
|
+ <h2><span class="num">5</span>特征归因分析</h2>
|
|
|
+ <p>分析什么因素与预估偏差相关,帮助定位问题根源。</p>
|
|
|
+ <h3>曝光量 vs ROS偏差</h3>
|
|
|
+ <div class="chart-container">
|
|
|
+ <div class="bar-chart">
|
|
|
+ {gen_exp_bias_bars()}
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ <div class="highlight-box">
|
|
|
+ <h4>特征归因要点</h4>
|
|
|
+ <ul style="margin-left: 20px;">
|
|
|
+ <li><strong>曝光量越低,偏差越大</strong>:极低曝光的视频偏差+48%,极高曝光的偏差-22%</li>
|
|
|
+ <li><strong>STR越高,ROS偏差越大</strong>:高STR视频的ROS偏差+47%</li>
|
|
|
+ <li>这说明模型对「低频+高互动」的视频预估不准</li>
|
|
|
+ </ul>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <div class="card conclusion">
|
|
|
+ <h2><span class="num" style="background:rgba(255,255,255,0.2);">6</span>综合结论与建议</h2>
|
|
|
+ <h3 style="color:white; margin-top:24px;">问题根因</h3>
|
|
|
+ <ul>
|
|
|
+ <li><strong>回归均值问题</strong>:模型对极端值预估不准确,低ROS被高估<span class="highlight">+{low_ros_bias:.0f}%</span>,高ROS被低估<span class="highlight">{high_ros_bias:.0f}%</span></li>
|
|
|
+ <li><strong>场景泛化不足</strong>:不同Page的COPC差异显著(1.79~1.99)</li>
|
|
|
+ <li><strong>系统性问题</strong>:问题在30天内持续存在,无改善趋势</li>
|
|
|
+ </ul>
|
|
|
+ <h3 style="color:white; margin-top:30px;">量化影响</h3>
|
|
|
+ <ul>
|
|
|
+ <li>排序失准比例:<span class="highlight">{problem_pct:.1f}%</span>的视频被错误排序到头部</li>
|
|
|
+ <li>排序相关性:<span class="highlight">0.32</span>(理想值为1.0)</li>
|
|
|
+ <li>问题视频占总曝光:<span class="highlight">{problem_exp_pct:.1f}%</span></li>
|
|
|
+ </ul>
|
|
|
+ <h3 style="color:white; margin-top:30px;">可执行建议</h3>
|
|
|
+ <div class="recommendations">
|
|
|
+ <div class="rec-item">
|
|
|
+ <span class="tag">短期</span>
|
|
|
+ <h4>分段COPC校准</h4>
|
|
|
+ <p>对不同预估ROS区间单独校准,降低回归均值问题</p>
|
|
|
+ </div>
|
|
|
+ <div class="rec-item">
|
|
|
+ <span class="tag">短期</span>
|
|
|
+ <h4>低曝光惩罚</h4>
|
|
|
+ <p>对低曝光视频的预估增加不确定性惩罚</p>
|
|
|
+ </div>
|
|
|
+ <div class="rec-item">
|
|
|
+ <span class="tag">中期</span>
|
|
|
+ <h4>分场景建模</h4>
|
|
|
+ <p>针对不同Page场景训练独立校准层或独立模型</p>
|
|
|
+ </div>
|
|
|
+ <div class="rec-item">
|
|
|
+ <span class="tag">中期</span>
|
|
|
+ <h4>极端值样本加权</h4>
|
|
|
+ <p>在模型训练时增加极端值样本权重,改善尾部预估</p>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+ <h3 style="color:white; margin-top:30px;">监控指标</h3>
|
|
|
+ <ul>
|
|
|
+ <li>日常监控「预估排名 vs 真实排名相关系数」</li>
|
|
|
+ <li>分段COPC监控(ROS<2, 2-4, >6)</li>
|
|
|
+ <li>分场景问题比例监控</li>
|
|
|
+ </ul>
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+</body>
|
|
|
+</html>
|
|
|
+'''
|
|
|
+
|
|
|
+# 保存
|
|
|
+html_path = output_dir / "v3_完整报告.html"
|
|
|
+with open(html_path, 'w', encoding='utf-8') as f:
|
|
|
+ f.write(html_content)
|
|
|
+
|
|
|
+print(f"V3 完整报告已生成: {html_path}")
|