|
|
@@ -64,6 +64,21 @@ if sim_col not in df.columns:
|
|
|
print(f"缺少相似度列: {sim_col}")
|
|
|
exit(1)
|
|
|
|
|
|
+# 计算三种回流率的辅助函数
|
|
|
+def calc_return_rates(group, click):
|
|
|
+ """计算整体、头部、推荐三种回流率"""
|
|
|
+ overall_uv = group['整体回流uv'].sum() if '整体回流uv' in group.columns else 0
|
|
|
+ head_uv = group['头部回流uv'].sum() if '头部回流uv' in group.columns else 0
|
|
|
+ rec_uv = group['推荐回流uv'].sum() if '推荐回流uv' in group.columns else 0
|
|
|
+ return {
|
|
|
+ 'overall_uv': int(overall_uv),
|
|
|
+ 'overall_rate': round(float(overall_uv / click) if click > 0 else 0, 4),
|
|
|
+ 'head_uv': int(head_uv),
|
|
|
+ 'head_rate': round(float(head_uv / click) if click > 0 else 0, 4),
|
|
|
+ 'rec_uv': int(rec_uv),
|
|
|
+ 'rec_rate': round(float(rec_uv / click) if click > 0 else 0, 4),
|
|
|
+ }
|
|
|
+
|
|
|
# ========== 视角1:分享标题 → 视频 ==========
|
|
|
def build_share_title_view(input_df, min_video_titles=2, top_n=1000):
|
|
|
"""
|
|
|
@@ -102,20 +117,17 @@ def build_share_title_view(input_df, min_video_titles=2, top_n=1000):
|
|
|
channels_data = []
|
|
|
for channel, ch_group in vid_group.groupby('channel'):
|
|
|
click = int(ch_group['点击uv'].sum())
|
|
|
- return_uv = ch_group['原视频回流uv'].sum() if '原视频回流uv' in ch_group.columns else 0
|
|
|
- return_rate = return_uv / click if click > 0 else 0
|
|
|
- # 记录该渠道涉及的日期
|
|
|
+ rates = calc_return_rates(ch_group, click)
|
|
|
ch_dates = ch_group['dt'].unique().tolist() if 'dt' in ch_group.columns else []
|
|
|
channels_data.append({
|
|
|
'channel': str(channel) if pd.notna(channel) else '未知',
|
|
|
'click': click,
|
|
|
- 'return_uv': int(return_uv),
|
|
|
- 'return_rate': round(float(return_rate), 4),
|
|
|
- 'dates': ch_dates
|
|
|
+ 'dates': ch_dates,
|
|
|
+ **rates
|
|
|
})
|
|
|
|
|
|
vid_click = int(vid_group['点击uv'].sum())
|
|
|
- vid_return = vid_group['原视频回流uv'].sum() if '原视频回流uv' in vid_group.columns else 0
|
|
|
+ vid_rates = calc_return_rates(vid_group, vid_click)
|
|
|
# 获取二级品类(取第一条)
|
|
|
category = ''
|
|
|
if 'merge二级品类' in vid_group.columns:
|
|
|
@@ -125,15 +137,14 @@ def build_share_title_view(input_df, min_video_titles=2, top_n=1000):
|
|
|
'videoid': str(int(videoid)) if pd.notna(videoid) else '',
|
|
|
'category': category,
|
|
|
'total_click': vid_click,
|
|
|
- 'return_uv': int(vid_return),
|
|
|
- 'return_rate': round(float(vid_return / vid_click) if vid_click > 0 else 0, 4),
|
|
|
+ **vid_rates,
|
|
|
'channels': sorted(channels_data, key=lambda x: x['click'], reverse=True)
|
|
|
})
|
|
|
|
|
|
# 计算该视频标题的相似度(取平均)和回流率
|
|
|
sim = vt_group[sim_col].mean()
|
|
|
vt_click = int(vt_group['点击uv'].sum())
|
|
|
- vt_return = vt_group['原视频回流uv'].sum() if '原视频回流uv' in vt_group.columns else 0
|
|
|
+ vt_rates = calc_return_rates(vt_group, vt_click)
|
|
|
|
|
|
# 统计品类分布(每个品类有多少个不同视频ID)
|
|
|
categories_dist = []
|
|
|
@@ -149,20 +160,18 @@ def build_share_title_view(input_df, min_video_titles=2, top_n=1000):
|
|
|
'sim': round(float(sim), 2),
|
|
|
'categories': categories_dist,
|
|
|
'total_click': vt_click,
|
|
|
- 'return_uv': int(vt_return),
|
|
|
- 'return_rate': round(float(vt_return / vt_click) if vt_click > 0 else 0, 4),
|
|
|
+ **vt_rates,
|
|
|
'videos': sorted(videos_data, key=lambda x: x['total_click'], reverse=True)
|
|
|
})
|
|
|
|
|
|
st_click = int(title_data['点击uv'].sum())
|
|
|
- st_return = title_data['原视频回流uv'].sum() if '原视频回流uv' in title_data.columns else 0
|
|
|
+ st_rates = calc_return_rates(title_data, st_click)
|
|
|
|
|
|
result.append({
|
|
|
'share_title': str(share_title)[:80] if pd.notna(share_title) else '',
|
|
|
'cover': cover,
|
|
|
'total_click': st_click,
|
|
|
- 'return_uv': int(st_return),
|
|
|
- 'return_rate': round(float(st_return / st_click) if st_click > 0 else 0, 4),
|
|
|
+ **st_rates,
|
|
|
'video_titles': sorted(video_titles_data, key=lambda x: x['sim'], reverse=True)
|
|
|
})
|
|
|
|
|
|
@@ -209,29 +218,26 @@ def build_video_view(input_df, min_share_titles=2, top_n=1000):
|
|
|
channels_data = []
|
|
|
for channel, ch_group in st_group.groupby('channel'):
|
|
|
click = int(ch_group['点击uv'].sum())
|
|
|
- return_uv = ch_group['原视频回流uv'].sum() if '原视频回流uv' in ch_group.columns else 0
|
|
|
- return_rate = return_uv / click if click > 0 else 0
|
|
|
+ ch_rates = calc_return_rates(ch_group, click)
|
|
|
channels_data.append({
|
|
|
'channel': str(channel) if pd.notna(channel) else '未知',
|
|
|
'click': click,
|
|
|
- 'return_uv': int(return_uv),
|
|
|
- 'return_rate': round(float(return_rate), 4)
|
|
|
+ **ch_rates
|
|
|
})
|
|
|
|
|
|
st_click = int(st_group['点击uv'].sum())
|
|
|
- st_return = st_group['原视频回流uv'].sum() if '原视频回流uv' in st_group.columns else 0
|
|
|
+ st_rates = calc_return_rates(st_group, st_click)
|
|
|
share_titles_data.append({
|
|
|
'share_title': str(share_title)[:80] if pd.notna(share_title) else '',
|
|
|
'cover': cover,
|
|
|
'sim': round(float(sim), 2),
|
|
|
'total_click': st_click,
|
|
|
- 'return_uv': int(st_return),
|
|
|
- 'return_rate': round(float(st_return / st_click) if st_click > 0 else 0, 4),
|
|
|
+ **st_rates,
|
|
|
'channels': sorted(channels_data, key=lambda x: x['click'], reverse=True)
|
|
|
})
|
|
|
|
|
|
v_click = int(video_data['点击uv'].sum())
|
|
|
- v_return = video_data['原视频回流uv'].sum() if '原视频回流uv' in video_data.columns else 0
|
|
|
+ v_rates = calc_return_rates(video_data, v_click)
|
|
|
# 获取二级品类(取第一条)
|
|
|
category = ''
|
|
|
if 'merge二级品类' in video_data.columns:
|
|
|
@@ -242,8 +248,7 @@ def build_video_view(input_df, min_share_titles=2, top_n=1000):
|
|
|
'video_title': str(video_title)[:60] if pd.notna(video_title) else '',
|
|
|
'category': category,
|
|
|
'total_click': v_click,
|
|
|
- 'return_uv': int(v_return),
|
|
|
- 'return_rate': round(float(v_return / v_click) if v_click > 0 else 0, 4),
|
|
|
+ **v_rates,
|
|
|
'share_titles': sorted(share_titles_data, key=lambda x: x['sim'], reverse=True)
|
|
|
})
|
|
|
|
|
|
@@ -472,6 +477,17 @@ html_content = f"""<!DOCTYPE html>
|
|
|
}};
|
|
|
}}
|
|
|
|
|
|
+ // 重算三种回流率
|
|
|
+ function recalcRates(item, children, clickKey = 'click') {{
|
|
|
+ item.overall_uv = children.reduce((s, c) => s + (c.overall_uv || 0), 0);
|
|
|
+ item.head_uv = children.reduce((s, c) => s + (c.head_uv || 0), 0);
|
|
|
+ item.rec_uv = children.reduce((s, c) => s + (c.rec_uv || 0), 0);
|
|
|
+ const click = item.total_click || 0;
|
|
|
+ item.overall_rate = click > 0 ? item.overall_uv / click : 0;
|
|
|
+ item.head_rate = click > 0 ? item.head_uv / click : 0;
|
|
|
+ item.rec_rate = click > 0 ? item.rec_uv / click : 0;
|
|
|
+ }}
|
|
|
+
|
|
|
// 按渠道过滤数据并重新计算统计(递归)
|
|
|
function filterByChannel(data, channel) {{
|
|
|
if (channel === 'all') return JSON.parse(JSON.stringify(data));
|
|
|
@@ -488,20 +504,17 @@ html_content = f"""<!DOCTYPE html>
|
|
|
v.channels = v.channels.filter(ch => ch.channel === channel);
|
|
|
// 重算视频层统计
|
|
|
v.total_click = v.channels.reduce((s, c) => s + c.click, 0);
|
|
|
- v.return_uv = v.channels.reduce((s, c) => s + c.return_uv, 0);
|
|
|
- v.return_rate = v.total_click > 0 ? v.return_uv / v.total_click : 0;
|
|
|
+ recalcRates(v, v.channels);
|
|
|
return v;
|
|
|
}}).filter(v => v.channels.length > 0);
|
|
|
// 重算视频标题层统计
|
|
|
vt.total_click = vt.videos.reduce((s, v) => s + v.total_click, 0);
|
|
|
- vt.return_uv = vt.videos.reduce((s, v) => s + v.return_uv, 0);
|
|
|
- vt.return_rate = vt.total_click > 0 ? vt.return_uv / vt.total_click : 0;
|
|
|
+ recalcRates(vt, vt.videos);
|
|
|
return vt;
|
|
|
}}).filter(vt => vt.videos.length > 0);
|
|
|
// 重算分享标题层统计
|
|
|
st.total_click = st.video_titles.reduce((s, vt) => s + vt.total_click, 0);
|
|
|
- st.return_uv = st.video_titles.reduce((s, vt) => s + vt.return_uv, 0);
|
|
|
- st.return_rate = st.total_click > 0 ? st.return_uv / st.total_click : 0;
|
|
|
+ recalcRates(st, st.video_titles);
|
|
|
return st;
|
|
|
}}).filter(st => st.video_titles.length > 0);
|
|
|
// 按点击重新排序
|
|
|
@@ -517,14 +530,12 @@ html_content = f"""<!DOCTYPE html>
|
|
|
st.channels = st.channels.filter(ch => ch.channel === channel);
|
|
|
// 重算分享标题层统计
|
|
|
st.total_click = st.channels.reduce((s, c) => s + c.click, 0);
|
|
|
- st.return_uv = st.channels.reduce((s, c) => s + c.return_uv, 0);
|
|
|
- st.return_rate = st.total_click > 0 ? st.return_uv / st.total_click : 0;
|
|
|
+ recalcRates(st, st.channels);
|
|
|
return st;
|
|
|
}}).filter(st => st.channels.length > 0);
|
|
|
// 重算视频层统计
|
|
|
v.total_click = v.share_titles.reduce((s, st) => s + st.total_click, 0);
|
|
|
- v.return_uv = v.share_titles.reduce((s, st) => s + st.return_uv, 0);
|
|
|
- v.return_rate = v.total_click > 0 ? v.return_uv / v.total_click : 0;
|
|
|
+ recalcRates(v, v.share_titles);
|
|
|
return v;
|
|
|
}}).filter(v => v.share_titles.length > 0);
|
|
|
// 按点击重新排序
|
|
|
@@ -545,24 +556,14 @@ html_content = f"""<!DOCTYPE html>
|
|
|
let filteredShare = JSON.parse(JSON.stringify(shareData));
|
|
|
let filteredVideo = JSON.parse(JSON.stringify(videoData));
|
|
|
|
|
|
- // 视角1:筛选包含该品类视频的分享标题
|
|
|
+ // 视角1:筛选包含该品类视频的分享标题(但保留所有内容不过滤)
|
|
|
if (filteredShare.share_titles) {{
|
|
|
- filteredShare.share_titles = filteredShare.share_titles.map(st => {{
|
|
|
- st.video_titles = st.video_titles.map(vt => {{
|
|
|
- // 筛选该品类的视频
|
|
|
- vt.videos = vt.videos.filter(v => v.category === category);
|
|
|
- // 重算视频标题层统计
|
|
|
- vt.total_click = vt.videos.reduce((s, v) => s + v.total_click, 0);
|
|
|
- vt.return_uv = vt.videos.reduce((s, v) => s + v.return_uv, 0);
|
|
|
- vt.return_rate = vt.total_click > 0 ? vt.return_uv / vt.total_click : 0;
|
|
|
- return vt;
|
|
|
- }}).filter(vt => vt.videos.length > 0);
|
|
|
- // 重算分享标题层统计
|
|
|
- st.total_click = st.video_titles.reduce((s, vt) => s + vt.total_click, 0);
|
|
|
- st.return_uv = st.video_titles.reduce((s, vt) => s + vt.return_uv, 0);
|
|
|
- st.return_rate = st.total_click > 0 ? st.return_uv / st.total_click : 0;
|
|
|
- return st;
|
|
|
- }}).filter(st => st.video_titles.length > 0);
|
|
|
+ filteredShare.share_titles = filteredShare.share_titles.filter(st => {{
|
|
|
+ // 检查是否包含该品类的视频
|
|
|
+ return st.video_titles.some(vt =>
|
|
|
+ vt.videos.some(v => v.category === category)
|
|
|
+ );
|
|
|
+ }});
|
|
|
filteredShare.share_titles.sort((a, b) => b.total_click - a.total_click);
|
|
|
filteredShare.count = filteredShare.share_titles.length;
|
|
|
}}
|
|
|
@@ -727,6 +728,15 @@ html_content = f"""<!DOCTYPE html>
|
|
|
return rate >= q66 ? 'good' : (rate <= q33 ? 'bad' : '');
|
|
|
}}
|
|
|
|
|
|
+ // 渲染三种回流率
|
|
|
+ function renderRates(item) {{
|
|
|
+ return `
|
|
|
+ <span class="rate" style="${{rateGradient(item.overall_rate)}}">整体 ${{(item.overall_rate * 100).toFixed(1)}}%</span>
|
|
|
+ <span class="rate" style="${{rateGradient(item.head_rate)}}">头部 ${{(item.head_rate * 100).toFixed(1)}}%</span>
|
|
|
+ <span class="rate" style="${{rateGradient(item.rec_rate)}}">推荐 ${{(item.rec_rate * 100).toFixed(1)}}%</span>
|
|
|
+ `;
|
|
|
+ }}
|
|
|
+
|
|
|
// 渲染渠道明细
|
|
|
function renderChannels(channels) {{
|
|
|
return channels.map(ch => `
|
|
|
@@ -734,8 +744,7 @@ html_content = f"""<!DOCTYPE html>
|
|
|
<span class="channel-name">${{ch.channel}}</span>
|
|
|
<div class="channel-metrics">
|
|
|
<span>点击 ${{ch.click.toLocaleString()}}</span>
|
|
|
- <span>回流 ${{ch.return_uv.toLocaleString()}}</span>
|
|
|
- <span class="rate" style="${{rateGradient(ch.return_rate)}}">回流率 ${{(ch.return_rate * 100).toFixed(1)}}%</span>
|
|
|
+ ${{renderRates(ch)}}
|
|
|
</div>
|
|
|
</div>
|
|
|
`).join('');
|
|
|
@@ -761,7 +770,7 @@ html_content = f"""<!DOCTYPE html>
|
|
|
<div class="item-meta">
|
|
|
<span class="count">·${{st.video_titles.length}}</span>
|
|
|
<span>点击 ${{st.total_click.toLocaleString()}}</span>
|
|
|
- <span class="rate" style="${{rateGradient(st.return_rate)}}">回流率 ${{(st.return_rate * 100).toFixed(1)}}%</span>
|
|
|
+ ${{renderRates(st)}}
|
|
|
</div>
|
|
|
</div>
|
|
|
<div class="item-children">
|
|
|
@@ -778,7 +787,7 @@ html_content = f"""<!DOCTYPE html>
|
|
|
<span class="count">·${{vt.videos.length}}</span>
|
|
|
<span class="sim ${{simClass(vt.sim)}}">相似度 ${{vt.sim.toFixed(2)}}</span>
|
|
|
<span>点击 ${{vt.total_click.toLocaleString()}}</span>
|
|
|
- <span class="rate" style="${{rateGradient(vt.return_rate)}}">回流率 ${{(vt.return_rate * 100).toFixed(1)}}%</span>
|
|
|
+ ${{renderRates(vt)}}
|
|
|
</div>
|
|
|
</div>
|
|
|
<div class="item-children">
|
|
|
@@ -794,7 +803,7 @@ html_content = f"""<!DOCTYPE html>
|
|
|
<div class="item-meta">
|
|
|
<span class="count">·${{v.channels.length}}</span>
|
|
|
<span>点击 ${{v.total_click.toLocaleString()}}</span>
|
|
|
- <span class="rate" style="${{rateGradient(v.return_rate)}}">回流率 ${{(v.return_rate * 100).toFixed(1)}}%</span>
|
|
|
+ ${{renderRates(v)}}
|
|
|
</div>
|
|
|
</div>
|
|
|
<div class="item-children">
|
|
|
@@ -837,7 +846,7 @@ html_content = f"""<!DOCTYPE html>
|
|
|
<div class="item-meta">
|
|
|
<span class="count">·${{v.share_titles.length}}</span>
|
|
|
<span>点击 ${{v.total_click.toLocaleString()}}</span>
|
|
|
- <span class="rate" style="${{rateGradient(v.return_rate)}}">回流率 ${{(v.return_rate * 100).toFixed(1)}}%</span>
|
|
|
+ ${{renderRates(v)}}
|
|
|
</div>
|
|
|
</div>
|
|
|
<div class="item-children">
|
|
|
@@ -852,7 +861,7 @@ html_content = f"""<!DOCTYPE html>
|
|
|
<span class="count">·${{st.channels.length}}</span>
|
|
|
<span class="sim ${{simClass(st.sim)}}">相似度 ${{st.sim.toFixed(2)}}</span>
|
|
|
<span>点击 ${{st.total_click.toLocaleString()}}</span>
|
|
|
- <span class="rate" style="${{rateGradient(st.return_rate)}}">回流率 ${{(st.return_rate * 100).toFixed(1)}}%</span>
|
|
|
+ ${{renderRates(st)}}
|
|
|
</div>
|
|
|
</div>
|
|
|
<div class="item-children">
|