5 месяцев назад · 8a9f206654
--- a/fetch_daily.py
+++ b/fetch_daily.py
@@ -0,0 +1,182 @@
 
				+#!/usr/bin/env python
			
 
				+# coding=utf-8
			
 
				+"""
			
 
				+按天增量获取数据 - 通用版本
			
 
				+支持并发获取，自动跳过已有数据
			
 
				+
			
 
				+用法:
			
 
				+    python fetch_daily.py tasks/xxx/query.sql                    # 获取最近7天
			
 
				+    python fetch_daily.py tasks/xxx/query.sql --days 30          # 获取最近30天
			
 
				+    python fetch_daily.py tasks/xxx/query.sql --start 20260101 --end 20260107
			
 
				+    python fetch_daily.py tasks/xxx/query.sql --date 20260105    # 单天
			
 
				+    python fetch_daily.py tasks/xxx/query.sql --force            # 强制重新获取
			
 
				+    python fetch_daily.py tasks/xxx/query.sql --workers 10       # 设置并发数
			
 
				+"""
			
 
				+import argparse
			
 
				+import sys
			
 
				+from datetime import datetime, timedelta
			
 
				+from pathlib import Path
			
 
				+from concurrent.futures import ThreadPoolExecutor, as_completed
			
 
				+import threading
			
 
				+
			
 
				+sys.path.insert(0, str(Path(__file__).parent / "lib"))
			
 
				+from odps_module import ODPSClient
			
 
				+
			
 
				+# 线程安全的计数器
			
 
				+counter_lock = threading.Lock()
			
 
				+success_count = 0
			
 
				+fail_count = 0
			
 
				+
			
 
				+
			
 
				+def get_existing_dates(daily_dir):
			
 
				+    """获取已下载的日期列表"""
			
 
				+    existing = set()
			
 
				+    if not daily_dir.exists():
			
 
				+        return existing
			
 
				+    for f in daily_dir.glob("*.csv"):
			
 
				+        try:
			
 
				+            dt = f.stem
			
 
				+            if len(dt) == 8 and dt.isdigit():
			
 
				+                existing.add(dt)
			
 
				+        except:
			
 
				+            pass
			
 
				+    return existing
			
 
				+
			
 
				+
			
 
				+def get_date_range(start_str, end_str):
			
 
				+    """生成日期范围列表"""
			
 
				+    start = datetime.strptime(start_str, "%Y%m%d")
			
 
				+    end = datetime.strptime(end_str, "%Y%m%d")
			
 
				+    dates = []
			
 
				+    current = start
			
 
				+    while current <= end:
			
 
				+        dates.append(current.strftime("%Y%m%d"))
			
 
				+        current += timedelta(days=1)
			
 
				+    return dates
			
 
				+
			
 
				+
			
 
				+def fetch_single_day(dt, sql_template, daily_dir):
			
 
				+    """获取单天数据"""
			
 
				+    global success_count, fail_count
			
 
				+
			
 
				+    try:
			
 
				+        client = ODPSClient()
			
 
				+        sql = sql_template.replace("${dt}", dt)
			
 
				+        df = client.execute_sql(sql)
			
 
				+
			
 
				+        output_file = daily_dir / f"{dt}.csv"
			
 
				+
			
 
				+        if df is not None and len(df) > 0:
			
 
				+            df.to_csv(output_file, index=False)
			
 
				+            with counter_lock:
			
 
				+                success_count += 1
			
 
				+            return (dt, "success", len(df))
			
 
				+        elif df is not None:
			
 
				+            df.to_csv(output_file, index=False)
			
 
				+            with counter_lock:
			
 
				+                success_count += 1
			
 
				+            return (dt, "empty", 0)
			
 
				+        else:
			
 
				+            with counter_lock:
			
 
				+                fail_count += 1
			
 
				+            return (dt, "fail", 0)
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        with counter_lock:
			
 
				+            fail_count += 1
			
 
				+        return (dt, "error", str(e))
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    global success_count, fail_count
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(description="按天增量获取数据")
			
 
				+    parser.add_argument("sql_file", type=str, help="SQL文件路径")
			
 
				+    parser.add_argument("--days", type=int, default=7, help="获取最近N天 (默认7)")
			
 
				+    parser.add_argument("--start", type=str, help="开始日期 YYYYMMDD")
			
 
				+    parser.add_argument("--end", type=str, help="结束日期 YYYYMMDD")
			
 
				+    parser.add_argument("--date", type=str, help="单天日期 YYYYMMDD")
			
 
				+    parser.add_argument("--force", action="store_true", help="强制重新获取")
			
 
				+    parser.add_argument("--workers", type=int, default=5, help="并发数 (默认5)")
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # 解析 SQL 文件路径
			
 
				+    sql_file = Path(args.sql_file).resolve()
			
 
				+    if not sql_file.exists():
			
 
				+        print(f"错误: 找不到 {sql_file}")
			
 
				+        return
			
 
				+
			
 
				+    # 输出目录：SQL 同目录下的 output/SQL文件名/
			
 
				+    output_dir = sql_file.parent / "output"
			
 
				+    daily_dir = output_dir / sql_file.stem
			
 
				+    daily_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    print(f"SQL文件: {sql_file}")
			
 
				+    print(f"数据目录: {daily_dir}")
			
 
				+
			
 
				+    # 确定日期范围
			
 
				+    if args.date:
			
 
				+        target_dates = [args.date]
			
 
				+    elif args.start and args.end:
			
 
				+        target_dates = get_date_range(args.start, args.end)
			
 
				+    else:
			
 
				+        today = datetime.now()
			
 
				+        end_date = (today - timedelta(days=1)).strftime("%Y%m%d")
			
 
				+        start_date = (today - timedelta(days=args.days)).strftime("%Y%m%d")
			
 
				+        target_dates = get_date_range(start_date, end_date)
			
 
				+
			
 
				+    print(f"目标日期: {target_dates[0]} ~ {target_dates[-1]} ({len(target_dates)}天)")
			
 
				+
			
 
				+    # 检查已有数据
			
 
				+    existing_dates = get_existing_dates(daily_dir)
			
 
				+    print(f"已有数据: {len(existing_dates)}天")
			
 
				+
			
 
				+    # 确定需要获取的日期
			
 
				+    if args.force:
			
 
				+        missing_dates = target_dates
			
 
				+        print(f"强制模式: 重新获取所有 {len(missing_dates)} 天")
			
 
				+    else:
			
 
				+        missing_dates = [d for d in target_dates if d not in existing_dates]
			
 
				+        print(f"需要获取: {len(missing_dates)}天")
			
 
				+
			
 
				+    if not missing_dates:
			
 
				+        print("没有需要获取的数据，退出")
			
 
				+        return
			
 
				+
			
 
				+    # 读取 SQL 模板
			
 
				+    sql_template = sql_file.read_text(encoding="utf-8")
			
 
				+
			
 
				+    # 重置计数器
			
 
				+    success_count = 0
			
 
				+    fail_count = 0
			
 
				+
			
 
				+    # 并发获取
			
 
				+    workers = min(args.workers, len(missing_dates))
			
 
				+    print(f"\n开始获取 (并发数: {workers})...")
			
 
				+
			
 
				+    with ThreadPoolExecutor(max_workers=workers) as executor:
			
 
				+        futures = {
			
 
				+            executor.submit(fetch_single_day, dt, sql_template, daily_dir): dt
			
 
				+            for dt in missing_dates
			
 
				+        }
			
 
				+
			
 
				+        completed = 0
			
 
				+        for future in as_completed(futures):
			
 
				+            completed += 1
			
 
				+            dt, status, info = future.result()
			
 
				+
			
 
				+            if status == "success":
			
 
				+                print(f"  [{completed}/{len(missing_dates)}] ✓ {dt}: {info} 行")
			
 
				+            elif status == "empty":
			
 
				+                print(f"  [{completed}/{len(missing_dates)}] ⚠ {dt}: 无数据")
			
 
				+            elif status == "error":
			
 
				+                print(f"  [{completed}/{len(missing_dates)}] ✗ {dt}: {info}")
			
 
				+            else:
			
 
				+                print(f"  [{completed}/{len(missing_dates)}] ✗ {dt}: 失败")
			
 
				+
			
 
				+    print(f"\n完成! 成功: {success_count}, 失败: {fail_count}")
			
 
				+    print(f"数据目录: {daily_dir}")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/tasks/00_表的洞察/loghubods.alg_vid_feature_basic_info/01_基本数据.sql
+++ b/tasks/00_表的洞察/loghubods.alg_vid_feature_basic_info/01_基本数据.sql
@@ -0,0 +1,7 @@
 
				+-- 视频特征表样本数据查看
			
 
				+-- 使用: python fetch_daily.py tasks/00_表的洞察/loghubods.alg_vid_feature_basic_info/01_基本数据.sql --date 20260107
			
 
				+
			
 
				+SELECT *
			
 
				+FROM loghubods.alg_vid_feature_basic_info
			
 
				+WHERE dt = '${dt}'
			
 
				+LIMIT 100
			
--- a/tasks/00_表的洞察/loghubods.dwd_recsys_alg_exposure_base/01_基本数据.sql
+++ b/tasks/00_表的洞察/loghubods.dwd_recsys_alg_exposure_base/01_基本数据.sql
@@ -0,0 +1,7 @@
 
				+-- 曝光表样本数据查看
			
 
				+-- 使用: python fetch_daily.py tasks/00_表的洞察/loghubods.dwd_recsys_alg_exposure_base/01_基本数据.sql --date 20260107
			
 
				+
			
 
				+SELECT *
			
 
				+FROM loghubods.dwd_recsys_alg_exposure_base_20250108
			
 
				+WHERE dt = '${dt}'
			
 
				+LIMIT 100
			
--- a/tasks/00_表的洞察/loghubods.dwd_recsys_alg_sample_all/01_基本数据.sql
+++ b/tasks/00_表的洞察/loghubods.dwd_recsys_alg_sample_all/01_基本数据.sql
@@ -0,0 +1,7 @@
 
				+-- 推荐全量样本表数据查看
			
 
				+-- 使用: python fetch_daily.py tasks/00_表的洞察/loghubods.dwd_recsys_alg_sample_all/01_基本数据.sql --date 20260106
			
 
				+
			
 
				+SELECT *
			
 
				+FROM loghubods.dwd_recsys_alg_sample_all_20250212
			
 
				+WHERE dt = '${dt}'
			
 
				+LIMIT 100
			
--- a/tasks/archive/opengid原始分享数据探索/analyze.py
+++ b/tasks/archive/opengid原始分享数据探索/analyze.py
--- a/tasks/archive/opengid原始分享数据探索/query.sql
+++ b/tasks/archive/opengid原始分享数据探索/query.sql
--- a/tasks/archive/opengid数据探索/analyze.py
+++ b/tasks/archive/opengid数据探索/analyze.py
--- a/tasks/archive/opengid数据探索/query.sql
+++ b/tasks/archive/opengid数据探索/query.sql
--- a/tasks/archive/人群品类曝光分析/.DS_Store
+++ b/tasks/archive/人群品类曝光分析/.DS_Store
--- a/tasks/archive/人群品类曝光分析/query.sql
+++ b/tasks/archive/人群品类曝光分析/query.sql
--- a/tasks/archive/人群品类曝光分析/头部关联分析/.DS_Store
+++ b/tasks/archive/人群品类曝光分析/头部关联分析/.DS_Store
--- a/tasks/archive/人群品类曝光分析/头部关联分析/query.sql
+++ b/tasks/archive/人群品类曝光分析/头部关联分析/query.sql
--- a/tasks/archive/人群品类曝光分析/头部关联分析/query_v2_放宽条件.sql
+++ b/tasks/archive/人群品类曝光分析/头部关联分析/query_v2_放宽条件.sql
--- a/tasks/archive/人群品类曝光分析/头部关联分析/query_关联率对比.sql
+++ b/tasks/archive/人群品类曝光分析/头部关联分析/query_关联率对比.sql
--- a/tasks/archive/人群品类曝光分析/头部品类分析/.DS_Store
+++ b/tasks/archive/人群品类曝光分析/头部品类分析/.DS_Store
--- a/tasks/archive/人群品类曝光分析/头部品类分析/headvideoid分布/query.sql
+++ b/tasks/archive/人群品类曝光分析/头部品类分析/headvideoid分布/query.sql
--- a/tasks/archive/人群品类曝光分析/头部品类分析/query.sql
+++ b/tasks/archive/人群品类曝光分析/头部品类分析/query.sql
--- a/tasks/archive/人群品类曝光分析/头部品类分析/visualize.py
+++ b/tasks/archive/人群品类曝光分析/头部品类分析/visualize.py
--- a/tasks/archive/人群品类曝光分析/头部品类分析_简化版/analyze_category_correlation.py
+++ b/tasks/archive/人群品类曝光分析/头部品类分析_简化版/analyze_category_correlation.py
--- a/tasks/archive/人群品类曝光分析/头部品类分析_简化版/query.sql
+++ b/tasks/archive/人群品类曝光分析/头部品类分析_简化版/query.sql
--- a/tasks/archive/人群品类曝光分析/头部品类分析_简化版/visualize.py
+++ b/tasks/archive/人群品类曝光分析/头部品类分析_简化版/visualize.py
--- a/tasks/archive/人群品类曝光分析/头部品类分析_简化版/visualize_combined.py
+++ b/tasks/archive/人群品类曝光分析/头部品类分析_简化版/visualize_combined.py
--- a/tasks/archive/人群品类曝光分析/头部品类分析_简化版/visualize_correlation.py
+++ b/tasks/archive/人群品类曝光分析/头部品类分析_简化版/visualize_correlation.py
--- a/tasks/archive/人群品类曝光分析/头部品类分析_过滤小量/.DS_Store
+++ b/tasks/archive/人群品类曝光分析/头部品类分析_过滤小量/.DS_Store
--- a/tasks/archive/人群品类曝光分析/头部品类分析_过滤小量/query.sql
+++ b/tasks/archive/人群品类曝光分析/头部品类分析_过滤小量/query.sql
--- a/tasks/archive/人群品类曝光分析/头部品类分析_过滤小量/visualize.py
+++ b/tasks/archive/人群品类曝光分析/头部品类分析_过滤小量/visualize.py
--- a/tasks/archive/人群品类曝光分析/数据膨胀排查/query.sql
+++ b/tasks/archive/人群品类曝光分析/数据膨胀排查/query.sql
--- a/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v10_关联率排查.sql
+++ b/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v10_关联率排查.sql
--- a/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v11_放宽条件.sql
+++ b/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v11_放宽条件.sql
--- a/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v2.sql
+++ b/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v2.sql
--- a/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v3.sql
+++ b/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v3.sql
--- a/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v4.sql
+++ b/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v4.sql
--- a/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v5.sql
+++ b/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v5.sql
--- a/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v6.sql
+++ b/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v6.sql
--- a/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v7.sql
+++ b/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v7.sql
--- a/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v8.sql
+++ b/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v8.sql
--- a/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v9.sql
+++ b/tasks/archive/人群品类曝光分析/数据膨胀排查/query_v9.sql
--- a/tasks/archive/公众号投流素材缺失排查/analyze.py
+++ b/tasks/archive/公众号投流素材缺失排查/analyze.py
--- a/tasks/archive/公众号投流素材缺失排查/query.sql
+++ b/tasks/archive/公众号投流素材缺失排查/query.sql
--- a/tasks/archive/品类再分享分析/.DS_Store
+++ b/tasks/archive/品类再分享分析/.DS_Store
--- a/tasks/archive/品类再分享分析/README.md
+++ b/tasks/archive/品类再分享分析/README.md
--- a/tasks/archive/品类再分享分析/query.sql
+++ b/tasks/archive/品类再分享分析/query.sql
--- a/tasks/archive/品类再分享分析/visualize.py
+++ b/tasks/archive/品类再分享分析/visualize.py
--- a/tasks/archive/品类命中分析/.DS_Store
+++ b/tasks/archive/品类命中分析/.DS_Store
--- a/tasks/archive/品类命中分析/query.sql
+++ b/tasks/archive/品类命中分析/query.sql
--- a/tasks/archive/品类命中分析/query_debug.sql
+++ b/tasks/archive/品类命中分析/query_debug.sql
--- a/tasks/archive/品类命中分析/query_detail.sql
+++ b/tasks/archive/品类命中分析/query_detail.sql
--- a/tasks/archive/品类命中分析/visualize.py
+++ b/tasks/archive/品类命中分析/visualize.py
--- a/tasks/archive/推荐样本表探索/query.sql
+++ b/tasks/archive/推荐样本表探索/query.sql
--- a/tasks/archive/曝光样本表探索/daily_stats.sql
+++ b/tasks/archive/曝光样本表探索/daily_stats.sql
--- a/tasks/archive/曝光样本表探索/query.sql
+++ b/tasks/archive/曝光样本表探索/query.sql
--- a/tasks/archive/渠道再分享回流/query.sql
+++ b/tasks/archive/渠道再分享回流/query.sql
--- a/tasks/archive/渠道场景分布/analyze.py
+++ b/tasks/archive/渠道场景分布/analyze.py
--- a/tasks/archive/渠道场景分布/query.sql
+++ b/tasks/archive/渠道场景分布/query.sql
--- a/tasks/archive/渠道场景效果分析/README.md
+++ b/tasks/archive/渠道场景效果分析/README.md
--- a/tasks/archive/渠道场景效果分析/query.sql
+++ b/tasks/archive/渠道场景效果分析/query.sql
--- a/tasks/archive/渠道场景效果分析/visualize.py
+++ b/tasks/archive/渠道场景效果分析/visualize.py
--- a/tasks/archive/渠道效果分析/.DS_Store
+++ b/tasks/archive/渠道效果分析/.DS_Store
--- a/tasks/archive/渠道效果分析/README.md
+++ b/tasks/archive/渠道效果分析/README.md
--- a/tasks/archive/渠道效果分析/analyze.py
+++ b/tasks/archive/渠道效果分析/analyze.py
--- a/tasks/archive/渠道效果分析/query.sql
+++ b/tasks/archive/渠道效果分析/query.sql
--- a/tasks/archive/渠道效果分析/visualize.py
+++ b/tasks/archive/渠道效果分析/visualize.py
--- a/tasks/archive/渠道用户量统计/analyze.py
+++ b/tasks/archive/渠道用户量统计/analyze.py
--- a/tasks/archive/渠道用户量统计/query.sql
+++ b/tasks/archive/渠道用户量统计/query.sql
--- a/tasks/archive/素材字段分析/analyze.py
+++ b/tasks/archive/素材字段分析/analyze.py
--- a/tasks/archive/素材字段分析/query.sql
+++ b/tasks/archive/素材字段分析/query.sql
--- a/tasks/archive/素材视频内容分析/.DS_Store
+++ b/tasks/archive/素材视频内容分析/.DS_Store
--- a/tasks/archive/素材视频内容分析/README.md
+++ b/tasks/archive/素材视频内容分析/README.md
--- a/tasks/archive/素材视频内容分析/analyze.py
+++ b/tasks/archive/素材视频内容分析/analyze.py
--- a/tasks/archive/素材视频内容分析/query.sql
+++ b/tasks/archive/素材视频内容分析/query.sql
--- a/tasks/archive/素材视频内容分析/visualize.py
+++ b/tasks/archive/素材视频内容分析/visualize.py
--- a/tasks/archive/素材视频内容分析/visualize_html.py
+++ b/tasks/archive/素材视频内容分析/visualize_html.py
--- a/tasks/archive/素材视频匹配分析/analyze.py
+++ b/tasks/archive/素材视频匹配分析/analyze.py
--- a/tasks/archive/素材视频匹配分析/query.sql
+++ b/tasks/archive/素材视频匹配分析/query.sql
--- a/tasks/archive/素材视频维度分析/analyze.py
+++ b/tasks/archive/素材视频维度分析/analyze.py
--- a/tasks/archive/素材视频维度分析/analyze_match.py
+++ b/tasks/archive/素材视频维度分析/analyze_match.py
--- a/tasks/archive/素材视频维度分析/analyze_material_fields.py
+++ b/tasks/archive/素材视频维度分析/analyze_material_fields.py
--- a/tasks/archive/素材视频维度分析/query.sql
+++ b/tasks/archive/素材视频维度分析/query.sql
--- a/tasks/archive/表关联验证/query.sql
+++ b/tasks/archive/表关联验证/query.sql
--- a/tasks/archive/表关联验证/query_overall.sql
+++ b/tasks/archive/表关联验证/query_overall.sql
--- a/tasks/archive/表关联验证/内外部UV_subsession/query.sql
+++ b/tasks/archive/表关联验证/内外部UV_subsession/query.sql
--- a/tasks/archive/表关联验证/内外部验证_subsession/query.sql
+++ b/tasks/archive/表关联验证/内外部验证_subsession/query.sql
--- a/tasks/archive/表关联验证/冲突排查/query.sql
+++ b/tasks/archive/表关联验证/冲突排查/query.sql
--- a/tasks/archive/表结构查询_video_dimension_detail_add_column.csv
+++ b/tasks/archive/表结构查询_video_dimension_detail_add_column.csv
--- a/tasks/archive/视频二级品类分析/README.md
+++ b/tasks/archive/视频二级品类分析/README.md
--- a/tasks/archive/视频二级品类分析/analyze.py
+++ b/tasks/archive/视频二级品类分析/analyze.py
--- a/tasks/archive/视频二级品类分析/query.sql
+++ b/tasks/archive/视频二级品类分析/query.sql
--- a/tasks/archive/视频维度分析/query.sql
+++ b/tasks/archive/视频维度分析/query.sql
--- a/tasks/archive/视频维度详情分析/README.md
+++ b/tasks/archive/视频维度详情分析/README.md
--- a/tasks/archive/视频维度详情分析/analyze.py
+++ b/tasks/archive/视频维度详情分析/analyze.py
--- a/tasks/archive/视频维度详情分析/query.sql
+++ b/tasks/archive/视频维度详情分析/query.sql
--- a/tasks/头部/进入前的I与头部I的相关性分析/analyze.py
+++ b/tasks/头部/进入前的I与头部I的相关性分析/analyze.py
@@ -0,0 +1,364 @@
 
				+#!/usr/bin/env python
			
 
				+# coding=utf-8
			
 
				+"""
			
 
				+素材视频内容分析
			
 
				+分析视频内容特征（关键词、口播、引导）对传播效果的影响
			
 
				+包含：文章标题/分享标题与视频标题的相似度计算
			
 
				+"""
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+from pathlib import Path
			
 
				+import sys
			
 
				+sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
			
 
				+from lib.text_embedding_api import compare_phrases_batch
			
 
				+
			
 
				+# 找到最新的输出文件（支持子目录）
			
 
				+task_dir = Path(__file__).parent
			
 
				+output_dir = task_dir / "output"
			
 
				+csv_files = list(output_dir.glob("**/*.csv"))
			
 
				+if not csv_files:
			
 
				+    print("没有找到数据文件，请先运行 fetch_data.py")
			
 
				+    exit(1)
			
 
				+
			
 
				+latest_file = max(csv_files, key=lambda x: x.stat().st_mtime)
			
 
				+print(f"读取文件: {latest_file}")
			
 
				+df = pd.read_csv(latest_file)
			
 
				+
			
 
				+# 输出结果收集
			
 
				+lines = []
			
 
				+
			
 
				+
			
 
				+def log(text=""):
			
 
				+    print(text)
			
 
				+    lines.append(text)
			
 
				+
			
 
				+
			
 
				+log(f"分析文件: {latest_file.name}")
			
 
				+log()
			
 
				+
			
 
				+# ============================================================
			
 
				+# 计算标题相似度
			
 
				+# ============================================================
			
 
				+log("=" * 70)
			
 
				+log("计算标题相似度...")
			
 
				+log("=" * 70)
			
 
				+
			
 
				+# 准备所有相似度计算对
			
 
				+# 1. 文章标题 vs 视频标题
			
 
				+# 2. 分享标题 vs 视频标题
			
 
				+# 3. 文章标题 vs 视频口播
			
 
				+# 4. 分享标题 vs 视频口播
			
 
				+# 5. 文章标题 vs 一级品类
			
 
				+# 6. 文章标题 vs 二级品类
			
 
				+# 7. 分享标题 vs 一级品类
			
 
				+# 8. 分享标题 vs 二级品类
			
 
				+
			
 
				+similarity_configs = [
			
 
				+    ('文章标题', 'title', '文章标题_视频标题_相似度'),
			
 
				+    ('分享标题', 'title', '分享标题_视频标题_相似度'),
			
 
				+    ('文章标题', '视频口播', '文章标题_口播_相似度'),
			
 
				+    ('分享标题', '视频口播', '分享标题_口播_相似度'),
			
 
				+    ('文章标题', 'merge一级品类', '文章标题_一级品类_相似度'),
			
 
				+    ('文章标题', 'merge二级品类', '文章标题_二级品类_相似度'),
			
 
				+    ('分享标题', 'merge一级品类', '分享标题_一级品类_相似度'),
			
 
				+    ('分享标题', 'merge二级品类', '分享标题_二级品类_相似度'),
			
 
				+]
			
 
				+
			
 
				+BATCH_SIZE = 500
			
 
				+
			
 
				+for col1, col2, result_col in similarity_configs:
			
 
				+    # 初始化结果列
			
 
				+    df[result_col] = np.nan
			
 
				+
			
 
				+    # 准备配对数据
			
 
				+    pairs = []
			
 
				+    valid_indices = []
			
 
				+
			
 
				+    for idx, row in df.iterrows():
			
 
				+        text1 = str(row[col1]) if pd.notna(row[col1]) and row[col1] != '' else ''
			
 
				+        text2 = str(row[col2]) if pd.notna(row[col2]) and row[col2] != '' else ''
			
 
				+
			
 
				+        if text1 and text2:
			
 
				+            pairs.append((text1, text2))
			
 
				+            valid_indices.append(idx)
			
 
				+
			
 
				+    if not pairs:
			
 
				+        log(f"{result_col}: 无有效数据")
			
 
				+        continue
			
 
				+
			
 
				+    log(f"计算 {result_col}: {len(pairs)} 对")
			
 
				+
			
 
				+    # 批量计算
			
 
				+    scores = []
			
 
				+    for i in range(0, len(pairs), BATCH_SIZE):
			
 
				+        batch = pairs[i:i+BATCH_SIZE]
			
 
				+        results = compare_phrases_batch(batch)
			
 
				+        scores.extend([r['相似度'] for r in results])
			
 
				+        if (i + BATCH_SIZE) % 5000 == 0:
			
 
				+            log(f"  已处理 {min(i+BATCH_SIZE, len(pairs))}/{len(pairs)}")
			
 
				+
			
 
				+    # 写入结果
			
 
				+    for idx, score in zip(valid_indices, scores):
			
 
				+        df.at[idx, result_col] = score
			
 
				+
			
 
				+    log(f"  覆盖率: {df[result_col].notna().mean():.1%}")
			
 
				+log()
			
 
				+
			
 
				+# ============================================================
			
 
				+# 基本信息
			
 
				+# ============================================================
			
 
				+log("=" * 70)
			
 
				+log("基本信息")
			
 
				+log("=" * 70)
			
 
				+log(f"记录数: {len(df):,}")
			
 
				+log(f"视频数: {df['videoid'].nunique():,}")
			
 
				+log(f"总点击uv: {df['点击uv'].sum():,}")
			
 
				+log(f"总回流uv: {df['再分享回流uv'].sum():,}")
			
 
				+log()
			
 
				+
			
 
				+# 字段覆盖率
			
 
				+log("新增字段覆盖率:")
			
 
				+for col in ['视频关键词', '视频口播', '视频主题', '传播性判断', '是否有片尾引导']:
			
 
				+    if col in df.columns:
			
 
				+        coverage = df[col].notna().sum() / len(df)
			
 
				+        log(f"  {col}: {coverage:.1%}")
			
 
				+log()
			
 
				+
			
 
				+# ============================================================
			
 
				+# 传播性判断 vs 实际效果
			
 
				+# ============================================================
			
 
				+log("=" * 70)
			
 
				+log("AI 传播性判断 vs 实际效果")
			
 
				+log("=" * 70)
			
 
				+if '传播性判断' in df.columns:
			
 
				+    spread_stats = df.groupby('传播性判断').agg({
			
 
				+        'videoid': 'nunique',
			
 
				+        '点击uv': 'sum',
			
 
				+        '再分享回流uv': 'sum'
			
 
				+    }).rename(columns={'videoid': '视频数'})
			
 
				+    spread_stats['回流率'] = spread_stats['再分享回流uv'] / (spread_stats['点击uv'] + 10)
			
 
				+    spread_stats = spread_stats.sort_values('点击uv', ascending=False)
			
 
				+
			
 
				+    log(f"{'传播性判断':<15} {'视频数':>8} {'点击uv':>12} {'回流uv':>12} {'回流率':>10}")
			
 
				+    log("-" * 65)
			
 
				+    for spread, row in spread_stats.iterrows():
			
 
				+        spread_name = str(spread)[:13] if pd.notna(spread) else '(空)'
			
 
				+        log(f"{spread_name:<15} {int(row['视频数']):>8,} {int(row['点击uv']):>12,} {int(row['再分享回流uv']):>12,} {row['回流率']:>10.2%}")
			
 
				+log()
			
 
				+
			
 
				+# ============================================================
			
 
				+# 片尾引导效果分析
			
 
				+# ============================================================
			
 
				+log("=" * 70)
			
 
				+log("片尾引导效果分析")
			
 
				+log("=" * 70)
			
 
				+if '是否有片尾引导' in df.columns:
			
 
				+    guide_stats = df.groupby('是否有片尾引导').agg({
			
 
				+        'videoid': 'nunique',
			
 
				+        '点击uv': 'sum',
			
 
				+        '再分享回流uv': 'sum'
			
 
				+    }).rename(columns={'videoid': '视频数'})
			
 
				+    guide_stats['回流率'] = guide_stats['再分享回流uv'] / (guide_stats['点击uv'] + 10)
			
 
				+
			
 
				+    log(f"{'是否有引导':<15} {'视频数':>8} {'点击uv':>12} {'回流率':>10}")
			
 
				+    log("-" * 50)
			
 
				+    for guide, row in guide_stats.iterrows():
			
 
				+        guide_name = str(guide)[:13] if pd.notna(guide) else '(空)'
			
 
				+        log(f"{guide_name:<15} {int(row['视频数']):>8,} {int(row['点击uv']):>12,} {row['回流率']:>10.2%}")
			
 
				+log()
			
 
				+
			
 
				+# 引导强度分析
			
 
				+if '引导强度' in df.columns:
			
 
				+    log("引导强度细分:")
			
 
				+    strength_stats = df.groupby('引导强度').agg({
			
 
				+        'videoid': 'nunique',
			
 
				+        '点击uv': 'sum',
			
 
				+        '再分享回流uv': 'sum'
			
 
				+    }).rename(columns={'videoid': '视频数'})
			
 
				+    strength_stats['回流率'] = strength_stats['再分享回流uv'] / (strength_stats['点击uv'] + 10)
			
 
				+    strength_stats = strength_stats.sort_values('点击uv', ascending=False)
			
 
				+
			
 
				+    for strength, row in strength_stats.iterrows():
			
 
				+        strength_name = str(strength)[:20] if pd.notna(strength) else '(空)'
			
 
				+        log(f"  {strength_name:<22} 视频数={int(row['视频数']):>5}, 回流率={row['回流率']:.2%}")
			
 
				+log()
			
 
				+
			
 
				+# ============================================================
			
 
				+# 情感倾向分析
			
 
				+# ============================================================
			
 
				+log("=" * 70)
			
 
				+log("情感倾向效果分析")
			
 
				+log("=" * 70)
			
 
				+if '情感倾向' in df.columns:
			
 
				+    emotion_stats = df.groupby('情感倾向').agg({
			
 
				+        'videoid': 'nunique',
			
 
				+        '点击uv': 'sum',
			
 
				+        '再分享回流uv': 'sum'
			
 
				+    }).rename(columns={'videoid': '视频数'})
			
 
				+    emotion_stats['回流率'] = emotion_stats['再分享回流uv'] / (emotion_stats['点击uv'] + 10)
			
 
				+    emotion_stats = emotion_stats.sort_values('点击uv', ascending=False).head(10)
			
 
				+
			
 
				+    log(f"{'情感倾向':<20} {'视频数':>8} {'点击uv':>12} {'回流率':>10}")
			
 
				+    log("-" * 55)
			
 
				+    for emotion, row in emotion_stats.iterrows():
			
 
				+        emotion_name = str(emotion)[:18] if pd.notna(emotion) else '(空)'
			
 
				+        log(f"{emotion_name:<20} {int(row['视频数']):>8,} {int(row['点击uv']):>12,} {row['回流率']:>10.2%}")
			
 
				+log()
			
 
				+
			
 
				+# ============================================================
			
 
				+# 视频风格分析
			
 
				+# ============================================================
			
 
				+log("=" * 70)
			
 
				+log("视频风格效果分析（Top 15）")
			
 
				+log("=" * 70)
			
 
				+if '视频风格' in df.columns:
			
 
				+    style_stats = df.groupby('视频风格').agg({
			
 
				+        'videoid': 'nunique',
			
 
				+        '点击uv': 'sum',
			
 
				+        '再分享回流uv': 'sum'
			
 
				+    }).rename(columns={'videoid': '视频数'})
			
 
				+    style_stats['回流率'] = style_stats['再分享回流uv'] / (style_stats['点击uv'] + 10)
			
 
				+    style_stats = style_stats.sort_values('点击uv', ascending=False).head(15)
			
 
				+
			
 
				+    log(f"{'视频风格':<25} {'视频数':>8} {'点击uv':>12} {'回流率':>10}")
			
 
				+    log("-" * 60)
			
 
				+    for style, row in style_stats.iterrows():
			
 
				+        style_name = str(style)[:23] if pd.notna(style) else '(空)'
			
 
				+        log(f"{style_name:<25} {int(row['视频数']):>8,} {int(row['点击uv']):>12,} {row['回流率']:>10.2%}")
			
 
				+log()
			
 
				+
			
 
				+# ============================================================
			
 
				+# 高回流视频内容特征
			
 
				+# ============================================================
			
 
				+log("=" * 70)
			
 
				+log("高回流视频内容特征（回流率≥30%，点击≥1000）")
			
 
				+log("=" * 70)
			
 
				+high_return = df[(df['再分享回流率'] >= 0.3) & (df['点击uv'] >= 1000)]
			
 
				+log(f"符合条件视频数: {len(high_return)}")
			
 
				+log()
			
 
				+
			
 
				+if len(high_return) > 0:
			
 
				+    # 传播性分布
			
 
				+    if '传播性判断' in high_return.columns:
			
 
				+        spread_dist = high_return['传播性判断'].value_counts(normalize=True)
			
 
				+        log("传播性判断分布:")
			
 
				+        for spread, pct in spread_dist.items():
			
 
				+            log(f"  {spread}: {pct:.1%}")
			
 
				+        log()
			
 
				+
			
 
				+    # 引导分布
			
 
				+    if '是否有片尾引导' in high_return.columns:
			
 
				+        guide_dist = high_return['是否有片尾引导'].value_counts(normalize=True)
			
 
				+        log("片尾引导分布:")
			
 
				+        for guide, pct in guide_dist.items():
			
 
				+            log(f"  {guide}: {pct:.1%}")
			
 
				+        log()
			
 
				+
			
 
				+    # Top 视频样例
			
 
				+    log("Top 10 高回流视频:")
			
 
				+    log("-" * 70)
			
 
				+    top_return = high_return.nlargest(10, '再分享回流uv')
			
 
				+    for _, row in top_return.iterrows():
			
 
				+        title = str(row['title'])[:40] if pd.notna(row['title']) else '(无标题)'
			
 
				+        keywords = str(row['视频关键词'])[:50] if pd.notna(row['视频关键词']) else ''
			
 
				+        log(f"  {title}")
			
 
				+        log(f"    关键词: {keywords}")
			
 
				+        log(f"    点击uv={int(row['点击uv'])}, 回流率={row['再分享回流率']:.1%}, 传播性={row['传播性判断']}")
			
 
				+log()
			
 
				+
			
 
				+# ============================================================
			
 
				+# 关键词词频分析
			
 
				+# ============================================================
			
 
				+log("=" * 70)
			
 
				+log("视频关键词词频（Top 30）")
			
 
				+log("=" * 70)
			
 
				+if '视频关键词' in df.columns:
			
 
				+    # 提取所有关键词
			
 
				+    all_keywords = []
			
 
				+    for kw in df['视频关键词'].dropna():
			
 
				+        if isinstance(kw, str):
			
 
				+            # 按常见分隔符拆分
			
 
				+            for sep in [',', '，', '、', ';', '；']:
			
 
				+                kw = kw.replace(sep, ',')
			
 
				+            all_keywords.extend([k.strip() for k in kw.split(',') if k.strip()])
			
 
				+
			
 
				+    from collections import Counter
			
 
				+    kw_counts = Counter(all_keywords).most_common(30)
			
 
				+    for kw, cnt in kw_counts:
			
 
				+        log(f"  {kw}: {cnt}")
			
 
				+log()
			
 
				+
			
 
				+# ============================================================
			
 
				+# 标题相似度效果分析
			
 
				+# ============================================================
			
 
				+log("=" * 70)
			
 
				+log("标题相似度效果分析")
			
 
				+log("=" * 70)
			
 
				+
			
 
				+# 所有相似度指标
			
 
				+similarity_cols = [
			
 
				+    ('文章标题_视频标题_相似度', '文章标题 vs 视频标题'),
			
 
				+    ('分享标题_视频标题_相似度', '分享标题 vs 视频标题'),
			
 
				+    ('文章标题_口播_相似度', '文章标题 vs 视频口播'),
			
 
				+    ('分享标题_口播_相似度', '分享标题 vs 视频口播'),
			
 
				+    ('文章标题_一级品类_相似度', '文章标题 vs 一级品类'),
			
 
				+    ('文章标题_二级品类_相似度', '文章标题 vs 二级品类'),
			
 
				+    ('分享标题_一级品类_相似度', '分享标题 vs 一级品类'),
			
 
				+    ('分享标题_二级品类_相似度', '分享标题 vs 二级品类'),
			
 
				+]
			
 
				+
			
 
				+# 相似度汇总统计
			
 
				+log("\n相似度汇总统计:")
			
 
				+log(f"{'指标':<30} {'均值':>8} {'中位数':>8} {'标准差':>8} {'覆盖率':>8}")
			
 
				+log("-" * 70)
			
 
				+for col, label in similarity_cols:
			
 
				+    if col in df.columns and df[col].notna().any():
			
 
				+        mean_val = df[col].mean()
			
 
				+        median_val = df[col].median()
			
 
				+        std_val = df[col].std()
			
 
				+        coverage = df[col].notna().mean()
			
 
				+        log(f"{label:<30} {mean_val:>8.3f} {median_val:>8.3f} {std_val:>8.3f} {coverage:>8.1%}")
			
 
				+
			
 
				+# 逐个分析相似度与回流率的关系
			
 
				+for col, label in similarity_cols:
			
 
				+    if col not in df.columns or not df[col].notna().any():
			
 
				+        continue
			
 
				+
			
 
				+    log(f"\n{label} vs 回流率:")
			
 
				+
			
 
				+    # 按相似度分组
			
 
				+    group_col = f'{col}_分组'
			
 
				+    df[group_col] = pd.cut(
			
 
				+        df[col],
			
 
				+        bins=[0, 0.3, 0.5, 0.7, 0.9, 1.0],
			
 
				+        labels=['低(0-0.3)', '较低(0.3-0.5)', '中等(0.5-0.7)', '较高(0.7-0.9)', '高(0.9-1)']
			
 
				+    )
			
 
				+
			
 
				+    sim_effect = df.groupby(group_col, observed=True).agg({
			
 
				+        'videoid': 'count',
			
 
				+        '点击uv': 'sum',
			
 
				+        '再分享回流uv': 'sum'
			
 
				+    }).rename(columns={'videoid': '记录数'})
			
 
				+    sim_effect['回流率'] = sim_effect['再分享回流uv'] / (sim_effect['点击uv'] + 10)
			
 
				+
			
 
				+    log(f"{'相似度分组':<20} {'记录数':>8} {'点击uv':>12} {'回流率':>10}")
			
 
				+    log("-" * 55)
			
 
				+    for group, row in sim_effect.iterrows():
			
 
				+        log(f"{str(group):<20} {int(row['记录数']):>8,} {int(row['点击uv']):>12,} {row['回流率']:>10.2%}")
			
 
				+
			
 
				+    # 清理临时列
			
 
				+    df.drop(columns=[group_col], inplace=True)
			
 
				+
			
 
				+log()
			
 
				+
			
 
				+# 保存带相似度的数据
			
 
				+output_with_sim = output_dir / f"{latest_file.stem}_含相似度.csv"
			
 
				+df.to_csv(output_with_sim, index=False)
			
 
				+log(f"含相似度数据已保存到: {output_with_sim}")
			
 
				+
			
 
				+# 保存分析结果
			
 
				+result_file = output_dir / f"{latest_file.stem}_分析.txt"
			
 
				+with open(result_file, 'w', encoding='utf-8') as f:
			
 
				+    f.write("\n".join(lines))
			
 
				+
			
 
				+log(f"分析结果已保存到: {result_file}")
			
--- a/tasks/头部/进入前的I与头部I的相关性分析/fetch_data.py
+++ b/tasks/头部/进入前的I与头部I的相关性分析/fetch_data.py
@@ -0,0 +1,184 @@
 
				+#!/usr/bin/env python
			
 
				+# coding=utf-8
			
 
				+"""
			
 
				+增量数据获取脚本
			
 
				+按天获取数据，支持增量更新和并发获取
			
 
				+
			
 
				+用法:
			
 
				+    python fetch_data.py 素材与头部视频相关性.sql              # 获取最近7天
			
 
				+    python fetch_data.py 素材与头部视频相关性.sql --days 30    # 获取最近30天
			
 
				+    python fetch_data.py 素材与头部视频相关性.sql --date 20260105
			
 
				+    python fetch_data.py 素材与头部视频相关性.sql --force      # 强制重新获取
			
 
				+"""
			
 
				+import argparse
			
 
				+import sys
			
 
				+from datetime import datetime, timedelta
			
 
				+from pathlib import Path
			
 
				+from concurrent.futures import ThreadPoolExecutor, as_completed
			
 
				+import threading
			
 
				+
			
 
				+# 添加 lib 目录到路径 (tasks/承接/头部品类与承接品类分析 -> data_analysis/lib)
			
 
				+sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "lib"))
			
 
				+from odps_module import ODPSClient
			
 
				+
			
 
				+task_dir = Path(__file__).parent
			
 
				+output_dir = task_dir / "output"
			
 
				+
			
 
				+# 线程安全的计数器
			
 
				+counter_lock = threading.Lock()
			
 
				+success_count = 0
			
 
				+fail_count = 0
			
 
				+
			
 
				+
			
 
				+def get_existing_dates(daily_dir):
			
 
				+    """获取已下载的日期列表"""
			
 
				+    existing = set()
			
 
				+    if not daily_dir.exists():
			
 
				+        return existing
			
 
				+    for f in daily_dir.glob("*.csv"):
			
 
				+        try:
			
 
				+            dt = f.stem
			
 
				+            if len(dt) == 8 and dt.isdigit():
			
 
				+                existing.add(dt)
			
 
				+        except:
			
 
				+            pass
			
 
				+    return existing
			
 
				+
			
 
				+
			
 
				+def get_date_range(start_str, end_str):
			
 
				+    """生成日期范围列表"""
			
 
				+    start = datetime.strptime(start_str, "%Y%m%d")
			
 
				+    end = datetime.strptime(end_str, "%Y%m%d")
			
 
				+    dates = []
			
 
				+    current = start
			
 
				+    while current <= end:
			
 
				+        dates.append(current.strftime("%Y%m%d"))
			
 
				+        current += timedelta(days=1)
			
 
				+    return dates
			
 
				+
			
 
				+
			
 
				+def fetch_single_day(dt, sql_template, daily_dir):
			
 
				+    """获取单天数据（每个线程创建自己的客户端）"""
			
 
				+    global success_count, fail_count
			
 
				+
			
 
				+    try:
			
 
				+        client = ODPSClient()
			
 
				+        sql = sql_template.replace("${dt}", dt)
			
 
				+        df = client.execute_sql(sql)
			
 
				+
			
 
				+        output_file = daily_dir / f"{dt}.csv"
			
 
				+
			
 
				+        if df is not None and len(df) > 0:
			
 
				+            df.to_csv(output_file, index=False)
			
 
				+            with counter_lock:
			
 
				+                success_count += 1
			
 
				+            return (dt, "success", len(df))
			
 
				+        elif df is not None:
			
 
				+            df.to_csv(output_file, index=False)
			
 
				+            with counter_lock:
			
 
				+                success_count += 1
			
 
				+            return (dt, "empty", 0)
			
 
				+        else:
			
 
				+            with counter_lock:
			
 
				+                fail_count += 1
			
 
				+            return (dt, "fail", 0)
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        with counter_lock:
			
 
				+            fail_count += 1
			
 
				+        return (dt, "error", str(e))
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    global success_count, fail_count
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(description="增量获取品类数据")
			
 
				+    parser.add_argument("sql_file", type=str, help="SQL文件名 (如: 品类组合_按天.sql)")
			
 
				+    parser.add_argument("--days", type=int, default=7, help="获取最近N天 (默认7)")
			
 
				+    parser.add_argument("--start", type=str, help="开始日期 YYYYMMDD")
			
 
				+    parser.add_argument("--end", type=str, help="结束日期 YYYYMMDD")
			
 
				+    parser.add_argument("--date", type=str, help="单天日期 YYYYMMDD")
			
 
				+    parser.add_argument("--force", action="store_true", help="强制重新获取")
			
 
				+    parser.add_argument("--workers", type=int, default=5, help="并发数 (默认5)")
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    # 解析SQL文件路径和数据目录
			
 
				+    sql_file = task_dir / args.sql_file
			
 
				+    if not sql_file.exists():
			
 
				+        print(f"错误: 找不到 {sql_file}")
			
 
				+        return
			
 
				+
			
 
				+    # 数据目录 = output / SQL文件名（去掉.sql后缀）
			
 
				+    data_dir_name = sql_file.stem  # 如 "品类组合_按天"
			
 
				+    daily_dir = output_dir / data_dir_name
			
 
				+    daily_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    print(f"SQL文件: {sql_file.name}")
			
 
				+    print(f"数据目录: {daily_dir}")
			
 
				+
			
 
				+    # 确定日期范围
			
 
				+    if args.date:
			
 
				+        target_dates = [args.date]
			
 
				+    elif args.start and args.end:
			
 
				+        target_dates = get_date_range(args.start, args.end)
			
 
				+    else:
			
 
				+        today = datetime.now()
			
 
				+        end_date = (today - timedelta(days=1)).strftime("%Y%m%d")
			
 
				+        start_date = (today - timedelta(days=args.days)).strftime("%Y%m%d")
			
 
				+        target_dates = get_date_range(start_date, end_date)
			
 
				+
			
 
				+    print(f"目标日期: {target_dates[0]} ~ {target_dates[-1]} ({len(target_dates)}天)")
			
 
				+
			
 
				+    # 检查已有数据
			
 
				+    existing_dates = get_existing_dates(daily_dir)
			
 
				+    print(f"已有数据: {len(existing_dates)}天")
			
 
				+
			
 
				+    # 确定需要获取的日期
			
 
				+    if args.force:
			
 
				+        missing_dates = target_dates
			
 
				+        print(f"强制模式: 重新获取所有 {len(missing_dates)} 天")
			
 
				+    else:
			
 
				+        missing_dates = [d for d in target_dates if d not in existing_dates]
			
 
				+        print(f"需要获取: {len(missing_dates)}天")
			
 
				+
			
 
				+    if not missing_dates:
			
 
				+        print("没有需要获取的数据，退出")
			
 
				+        return
			
 
				+
			
 
				+    # 读取SQL模板
			
 
				+    sql_template = sql_file.read_text(encoding="utf-8")
			
 
				+
			
 
				+    # 重置计数器
			
 
				+    success_count = 0
			
 
				+    fail_count = 0
			
 
				+
			
 
				+    # 并发获取
			
 
				+    workers = min(args.workers, len(missing_dates))
			
 
				+    print(f"\n开始获取 (并发数: {workers})...")
			
 
				+
			
 
				+    with ThreadPoolExecutor(max_workers=workers) as executor:
			
 
				+        futures = {
			
 
				+            executor.submit(fetch_single_day, dt, sql_template, daily_dir): dt
			
 
				+            for dt in missing_dates
			
 
				+        }
			
 
				+
			
 
				+        completed = 0
			
 
				+        for future in as_completed(futures):
			
 
				+            completed += 1
			
 
				+            dt, status, info = future.result()
			
 
				+
			
 
				+            if status == "success":
			
 
				+                print(f"  [{completed}/{len(missing_dates)}] ✓ {dt}: {info} 行")
			
 
				+            elif status == "empty":
			
 
				+                print(f"  [{completed}/{len(missing_dates)}] ⚠ {dt}: 无数据")
			
 
				+            elif status == "error":
			
 
				+                print(f"  [{completed}/{len(missing_dates)}] ✗ {dt}: {info}")
			
 
				+            else:
			
 
				+                print(f"  [{completed}/{len(missing_dates)}] ✗ {dt}: 失败")
			
 
				+
			
 
				+    print(f"\n完成! 成功: {success_count}, 失败: {fail_count}")
			
 
				+    print(f"数据目录: {daily_dir}")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/tasks/头部/进入前的I与头部I的相关性分析/visualize.py
+++ b/tasks/头部/进入前的I与头部I的相关性分析/visualize.py
@@ -0,0 +1,649 @@
 
				+#!/usr/bin/env python
			
 
				+# coding=utf-8
			
 
				+"""
			
 
				+素材视频内容分析 - HTML 可视化（简化版：相似度 vs 回流率）
			
 
				+"""
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+from pathlib import Path
			
 
				+import json
			
 
				+
			
 
				+# 找到最新的含相似度文件
			
 
				+task_dir = Path(__file__).parent
			
 
				+output_dir = task_dir / "output"
			
 
				+csv_files = list(output_dir.glob("*含相似度*.csv"))
			
 
				+if not csv_files:
			
 
				+    csv_files = list(output_dir.glob("*.csv"))
			
 
				+if not csv_files:
			
 
				+    print("没有找到数据文件")
			
 
				+    exit(1)
			
 
				+
			
 
				+latest_file = max(csv_files, key=lambda x: x.stat().st_mtime)
			
 
				+print(f"读取文件: {latest_file.name}")
			
 
				+df = pd.read_csv(latest_file)
			
 
				+
			
 
				+# 相似度列配置
			
 
				+similarity_cols = [
			
 
				+    ('分享标题_视频标题_相似度', '分享标题与视频标题'),
			
 
				+    ('分享标题_口播_相似度', '分享标题与口播内容'),
			
 
				+    ('分享标题_一级品类_相似度', '分享标题与一级品类'),
			
 
				+    ('分享标题_二级品类_相似度', '分享标题与二级品类'),
			
 
				+    ('文章标题_视频标题_相似度', '文章标题与视频标题'),
			
 
				+    ('文章标题_口播_相似度', '文章标题与口播内容'),
			
 
				+]
			
 
				+
			
 
				+# 过滤有效的相似度列
			
 
				+valid_cols = [(col, label) for col, label in similarity_cols
			
 
				+              if col in df.columns and df[col].notna().sum() > 100]
			
 
				+
			
 
				+print(f"有效相似度指标: {len(valid_cols)} 个")
			
 
				+
			
 
				+# 回流率字段
			
 
				+rate_cols = ['再分享回流率', '原视频再分享回流率', '推荐再分享回流率']
			
 
				+rate_cols = [c for c in rate_cols if c in df.columns]
			
 
				+
			
 
				+# 准备原始数据表（取关键字段）- 调整列顺序
			
 
				+table_cols = ['dt', 'channel', 'hotsencetype', '合作方名', '公众号名']  # 日期、渠道、场景、合作方、公众号在最前
			
 
				+table_cols += ['文章标题', '分享封面', '分享标题', 'title', 'videoid']  # 标题、封面和视频ID
			
 
				+table_cols += rate_cols  # 三个回流率
			
 
				+table_cols += ['点击uv']  # 点击量
			
 
				+table_cols += [col for col, _ in valid_cols]  # 相似度
			
 
				+table_cols += ['merge一级品类', 'merge二级品类']  # 品类在后
			
 
				+table_cols = [c for c in table_cols if c in df.columns]
			
 
				+
			
 
				+# 过滤有相似度数据的记录
			
 
				+raw_df = df[df[[col for col, _ in valid_cols[:2]]].notna().any(axis=1)].copy()
			
 
				+
			
 
				+# 计算分享标题聚合UV
			
 
				+share_title_uv = raw_df.groupby('分享标题')['点击uv'].transform('sum')
			
 
				+raw_df['分享标题聚合UV'] = share_title_uv
			
 
				+
			
 
				+# 按分享标题聚合UV排序，再按点击UV排序，取前2000条
			
 
				+raw_df = raw_df.sort_values(['分享标题聚合UV', '点击uv'], ascending=[False, False]).head(2000)
			
 
				+
			
 
				+# 更新table_cols，加入聚合UV
			
 
				+table_cols_with_agg = table_cols.copy()
			
 
				+# 在点击uv后面插入分享标题聚合UV
			
 
				+if '点击uv' in table_cols_with_agg:
			
 
				+    idx = table_cols_with_agg.index('点击uv')
			
 
				+    table_cols_with_agg.insert(idx, '分享标题聚合UV')
			
 
				+
			
 
				+raw_data = raw_df[table_cols_with_agg].fillna('').to_dict('records')
			
 
				+table_cols = table_cols_with_agg
			
 
				+
			
 
				+# 相似度分组统计
			
 
				+bins = [0, 0.3, 0.5, 0.7, 0.9, 1.0]
			
 
				+labels_bin = ['0-0.3', '0.3-0.5', '0.5-0.7', '0.7-0.9', '0.9-1.0']
			
 
				+
			
 
				+group_stats = []
			
 
				+for col, label in valid_cols:
			
 
				+    df['_group'] = pd.cut(df[col], bins=bins, labels=labels_bin)
			
 
				+
			
 
				+    stats = []
			
 
				+    for grp in labels_bin:
			
 
				+        grp_df = df[df['_group'] == grp]
			
 
				+        if len(grp_df) == 0:
			
 
				+            continue
			
 
				+
			
 
				+        row = {
			
 
				+            'group': grp,
			
 
				+            'count': len(grp_df),
			
 
				+            'click_uv': int(grp_df['点击uv'].sum()),
			
 
				+        }
			
 
				+
			
 
				+        # 计算加权平均回流率（保持原始小数）
			
 
				+        for rate_col in rate_cols:
			
 
				+            weighted = (grp_df[rate_col] * grp_df['点击uv']).sum()
			
 
				+            total_click = grp_df['点击uv'].sum()
			
 
				+            row[rate_col] = round(weighted / (total_click + 1), 4) if total_click > 0 else 0
			
 
				+
			
 
				+        stats.append(row)
			
 
				+
			
 
				+    group_stats.append({
			
 
				+        'label': label,
			
 
				+        'col': col,
			
 
				+        'stats': stats
			
 
				+    })
			
 
				+
			
 
				+if '_group' in df.columns:
			
 
				+    df.drop(columns=['_group'], inplace=True)
			
 
				+
			
 
				+# 列名映射（用于表头显示）
			
 
				+col_labels = {col: label for col, label in valid_cols}
			
 
				+col_labels.update({
			
 
				+    'dt': '日期',
			
 
				+    'channel': '渠道',
			
 
				+    'hotsencetype': '场景类型',
			
 
				+    '合作方名': '合作方',
			
 
				+    '公众号名': '公众号',
			
 
				+    '文章标题': '文章标题',
			
 
				+    '分享标题': '分享标题',
			
 
				+    '分享封面': '分享封面',
			
 
				+    'title': '视频标题',
			
 
				+    'videoid': '视频ID',
			
 
				+    'merge一级品类': '一级品类',
			
 
				+    'merge二级品类': '二级品类',
			
 
				+    '分享标题聚合UV': '分享标题聚合UV',
			
 
				+    '点击uv': '点击UV',
			
 
				+    '再分享回流率': '再分享回流率',
			
 
				+    '原视频再分享回流率': '原视频回流率',
			
 
				+    '推荐再分享回流率': '推荐回流率',
			
 
				+})
			
 
				+
			
 
				+# 获取筛选项的唯一值
			
 
				+date_list = sorted(df['dt'].dropna().unique().tolist()) if 'dt' in df.columns else []
			
 
				+channel_list = sorted(df['channel'].dropna().unique().tolist()) if 'channel' in df.columns else []
			
 
				+hotsencetype_list = sorted(df['hotsencetype'].dropna().unique().tolist()) if 'hotsencetype' in df.columns else []
			
 
				+partner_list = sorted(df['合作方名'].dropna().unique().tolist()) if '合作方名' in df.columns else []
			
 
				+account_list = sorted(df['公众号名'].dropna().unique().tolist()) if '公众号名' in df.columns else []
			
 
				+
			
 
				+# 生成 HTML
			
 
				+html_content = f'''<!DOCTYPE html>
			
 
				+<html lang="zh-CN">
			
 
				+<head>
			
 
				+    <meta charset="UTF-8">
			
 
				+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
			
 
				+    <title>相似度 vs 回流率分析</title>
			
 
				+    <script src="https://cdn.jsdelivr.net/npm/echarts@5.4.3/dist/echarts.min.js"></script>
			
 
				+    <style>
			
 
				+        * {{ margin: 0; padding: 0; box-sizing: border-box; }}
			
 
				+        body {{
			
 
				+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
			
 
				+            background: #f5f7fa;
			
 
				+            padding: 20px;
			
 
				+        }}
			
 
				+        .container {{ max-width: 1600px; margin: 0 auto; }}
			
 
				+        h1 {{ text-align: center; color: #333; margin-bottom: 20px; }}
			
 
				+        .section {{
			
 
				+            background: white;
			
 
				+            border-radius: 12px;
			
 
				+            padding: 20px;
			
 
				+            margin-bottom: 20px;
			
 
				+            box-shadow: 0 2px 8px rgba(0,0,0,0.08);
			
 
				+        }}
			
 
				+        .section h2 {{
			
 
				+            color: #333;
			
 
				+            margin-bottom: 15px;
			
 
				+            padding-bottom: 10px;
			
 
				+            border-bottom: 2px solid #667eea;
			
 
				+            display: inline-block;
			
 
				+        }}
			
 
				+
			
 
				+        /* 图表网格 */
			
 
				+        .chart-grid {{
			
 
				+            display: grid;
			
 
				+            grid-template-columns: repeat(3, 1fr);
			
 
				+            gap: 15px;
			
 
				+        }}
			
 
				+        .chart-item {{ height: 300px; }}
			
 
				+
			
 
				+        /* 可排序表格 */
			
 
				+        .table-controls {{
			
 
				+            display: flex;
			
 
				+            gap: 15px;
			
 
				+            margin-bottom: 15px;
			
 
				+            flex-wrap: wrap;
			
 
				+            align-items: center;
			
 
				+        }}
			
 
				+        .table-controls input {{
			
 
				+            padding: 8px 12px;
			
 
				+            border: 1px solid #ddd;
			
 
				+            border-radius: 6px;
			
 
				+            width: 300px;
			
 
				+        }}
			
 
				+        .table-controls select {{
			
 
				+            padding: 8px 12px;
			
 
				+            border: 1px solid #ddd;
			
 
				+            border-radius: 6px;
			
 
				+        }}
			
 
				+        .table-wrapper {{
			
 
				+            overflow-x: auto;
			
 
				+            max-height: 600px;
			
 
				+            overflow-y: auto;
			
 
				+        }}
			
 
				+        table {{
			
 
				+            width: 100%;
			
 
				+            border-collapse: collapse;
			
 
				+            font-size: 13px;
			
 
				+        }}
			
 
				+        th {{
			
 
				+            background: #667eea;
			
 
				+            color: white;
			
 
				+            padding: 10px 8px;
			
 
				+            text-align: left;
			
 
				+            cursor: pointer;
			
 
				+            user-select: none;
			
 
				+            white-space: nowrap;
			
 
				+            position: sticky;
			
 
				+            top: 0;
			
 
				+            z-index: 10;
			
 
				+        }}
			
 
				+        th:hover {{ background: #5a6fd6; }}
			
 
				+        th .sort-icon {{ margin-left: 5px; opacity: 0.5; }}
			
 
				+        th.sorted .sort-icon {{ opacity: 1; }}
			
 
				+        td {{
			
 
				+            padding: 8px;
			
 
				+            border-bottom: 1px solid #eee;
			
 
				+            max-width: 250px;
			
 
				+            overflow: hidden;
			
 
				+            text-overflow: ellipsis;
			
 
				+            white-space: nowrap;
			
 
				+        }}
			
 
				+        td.wrap {{
			
 
				+            white-space: normal;
			
 
				+            word-break: break-word;
			
 
				+            min-width: 180px;
			
 
				+            max-width: 220px;
			
 
				+        }}
			
 
				+        tr:hover {{ background: #f8f9fa; }}
			
 
				+        tr:nth-child(even) {{ background: #fafbfc; }}
			
 
				+        tr:nth-child(even):hover {{ background: #f0f1f2; }}
			
 
				+        .num {{ text-align: right; font-family: monospace; }}
			
 
				+        .highlight {{ background: #fff3cd !important; }}
			
 
				+
			
 
				+        /* 图片预览模态框 */
			
 
				+        .img-modal {{
			
 
				+            display: none;
			
 
				+            position: fixed;
			
 
				+            top: 0;
			
 
				+            left: 0;
			
 
				+            width: 100%;
			
 
				+            height: 100%;
			
 
				+            background: rgba(0,0,0,0.8);
			
 
				+            z-index: 1000;
			
 
				+            cursor: pointer;
			
 
				+            justify-content: center;
			
 
				+            align-items: center;
			
 
				+        }}
			
 
				+        .img-modal img {{
			
 
				+            max-width: 90%;
			
 
				+            max-height: 90%;
			
 
				+            border-radius: 8px;
			
 
				+            box-shadow: 0 4px 20px rgba(0,0,0,0.3);
			
 
				+        }}
			
 
				+        .img-modal.show {{ display: flex; }}
			
 
				+
			
 
				+        /* 统计表格 */
			
 
				+        .stats-table {{ margin-top: 10px; }}
			
 
				+        .stats-table th {{ background: #5a6fd6; font-size: 12px; }}
			
 
				+        .stats-table td {{ font-size: 12px; padding: 6px 8px; }}
			
 
				+
			
 
				+        @media (max-width: 1200px) {{
			
 
				+            .chart-grid {{ grid-template-columns: repeat(2, 1fr); }}
			
 
				+        }}
			
 
				+        @media (max-width: 768px) {{
			
 
				+            .chart-grid {{ grid-template-columns: 1fr; }}
			
 
				+        }}
			
 
				+    </style>
			
 
				+</head>
			
 
				+<body>
			
 
				+    <!-- 图片预览模态框 -->
			
 
				+    <div id="imgModal" class="img-modal" onclick="closeImgModal()">
			
 
				+        <img id="modalImg" src="" alt="预览图片">
			
 
				+    </div>
			
 
				+
			
 
				+    <div class="container">
			
 
				+        <h1>相似度 vs 回流率分析</h1>
			
 
				+
			
 
				+        <!-- 分组统计图表 -->
			
 
				+        <div class="section">
			
 
				+            <h2>相似度分组 vs 回流率</h2>
			
 
				+            <div class="chart-grid">
			
 
				+                {' '.join(f'<div id="chart_{i}" class="chart-item"></div>' for i in range(len(group_stats)))}
			
 
				+            </div>
			
 
				+        </div>
			
 
				+
			
 
				+        <!-- 分组统计表格 -->
			
 
				+        <div class="section">
			
 
				+            <h2>分组详细数据</h2>
			
 
				+            <div id="statsTablesContainer"></div>
			
 
				+        </div>
			
 
				+
			
 
				+        <!-- 原始数据表 -->
			
 
				+        <div class="section">
			
 
				+            <h2>原始数据（Top 2000 by 点击UV）</h2>
			
 
				+            <div class="table-controls">
			
 
				+                <select id="dateFilter">
			
 
				+                    <option value="">全部日期</option>
			
 
				+                    {' '.join(f'<option value="{d}">{d}</option>' for d in date_list)}
			
 
				+                </select>
			
 
				+                <select id="channelFilter">
			
 
				+                    <option value="">全部渠道</option>
			
 
				+                    {' '.join(f'<option value="{c}">{c}</option>' for c in channel_list)}
			
 
				+                </select>
			
 
				+                <select id="hotsencetypeFilter">
			
 
				+                    <option value="">全部场景</option>
			
 
				+                    {' '.join(f'<option value="{h}">{h}</option>' for h in hotsencetype_list)}
			
 
				+                </select>
			
 
				+                <select id="partnerFilter">
			
 
				+                    <option value="">全部合作方</option>
			
 
				+                    {' '.join(f'<option value="{p}">{p}</option>' for p in partner_list)}
			
 
				+                </select>
			
 
				+                <select id="accountFilter">
			
 
				+                    <option value="">全部公众号</option>
			
 
				+                    {' '.join(f'<option value="{a}">{a}</option>' for a in account_list)}
			
 
				+                </select>
			
 
				+                <input type="text" id="searchInput" placeholder="搜索标题...">
			
 
				+                <select id="simFilter">
			
 
				+                    <option value="">全部相似度</option>
			
 
				+                    <option value="high">高相似度 (≥0.7)</option>
			
 
				+                    <option value="mid">中相似度 (0.3-0.7)</option>
			
 
				+                    <option value="low">低相似度 (<0.3)</option>
			
 
				+                </select>
			
 
				+                <label>点击UV ≥ <input type="number" id="minUvInput" value="100" min="0" style="width:80px;padding:8px;border:1px solid #ddd;border-radius:6px;"></label>
			
 
				+                <span id="rowCount" style="color:#666;"></span>
			
 
				+            </div>
			
 
				+            <div class="table-wrapper">
			
 
				+                <table id="dataTable">
			
 
				+                    <thead>
			
 
				+                        <tr id="headerRow"></tr>
			
 
				+                    </thead>
			
 
				+                    <tbody id="tableBody"></tbody>
			
 
				+                </table>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+    </div>
			
 
				+
			
 
				+    <script>
			
 
				+        // 数据
			
 
				+        const rawData = {json.dumps(raw_data, ensure_ascii=False, default=str)};
			
 
				+        const groupStats = {json.dumps(group_stats, ensure_ascii=False)};
			
 
				+        const tableCols = {json.dumps(table_cols, ensure_ascii=False)};
			
 
				+        const colLabels = {json.dumps(col_labels, ensure_ascii=False)};
			
 
				+        const rateCols = {json.dumps(rate_cols, ensure_ascii=False)};
			
 
				+        const validCols = {json.dumps([col for col, _ in valid_cols], ensure_ascii=False)};
			
 
				+
			
 
				+        // 当前排序状态
			
 
				+        let currentSort = {{ col: '分享标题聚合UV', dir: 'desc' }};
			
 
				+        let filteredData = [...rawData];
			
 
				+
			
 
				+        // 渲染图表
			
 
				+        function renderCharts() {{
			
 
				+            groupStats.forEach((gs, idx) => {{
			
 
				+                const chart = echarts.init(document.getElementById('chart_' + idx));
			
 
				+                const groups = gs.stats.map(s => s.group);
			
 
				+
			
 
				+                const series = rateCols.map((rc, i) => ({{
			
 
				+                    name: colLabels[rc] || rc,
			
 
				+                    type: 'bar',
			
 
				+                    data: gs.stats.map(s => s[rc] || 0),
			
 
				+                    itemStyle: {{ color: ['#667eea', '#f093fb', '#43e97b'][i] }}
			
 
				+                }}));
			
 
				+
			
 
				+                chart.setOption({{
			
 
				+                    title: {{ text: gs.label, left: 'center', textStyle: {{ fontSize: 14 }} }},
			
 
				+                    tooltip: {{ trigger: 'axis' }},
			
 
				+                    legend: {{ bottom: 0, textStyle: {{ fontSize: 10 }} }},
			
 
				+                    xAxis: {{ type: 'category', data: groups, axisLabel: {{ fontSize: 11 }} }},
			
 
				+                    yAxis: {{ type: 'value', name: '回流率(%)', axisLabel: {{ fontSize: 10 }} }},
			
 
				+                    series: series,
			
 
				+                    grid: {{ top: 50, bottom: 60, left: 50, right: 20 }}
			
 
				+                }});
			
 
				+            }});
			
 
				+        }}
			
 
				+
			
 
				+        // 渲染统计表格
			
 
				+        function renderStatsTables() {{
			
 
				+            const container = document.getElementById('statsTablesContainer');
			
 
				+            let html = '<div style="display:grid;grid-template-columns:repeat(2,1fr);gap:20px;">';
			
 
				+
			
 
				+            groupStats.forEach(gs => {{
			
 
				+                html += `<div>
			
 
				+                    <h4 style="margin-bottom:10px;color:#333;">${{gs.label}}</h4>
			
 
				+                    <table class="stats-table">
			
 
				+                        <tr>
			
 
				+                            <th>相似度区间</th>
			
 
				+                            <th>记录数</th>
			
 
				+                            <th>点击UV</th>
			
 
				+                            ${{rateCols.map(rc => `<th>${{colLabels[rc] || rc}}</th>`).join('')}}
			
 
				+                        </tr>`;
			
 
				+
			
 
				+                gs.stats.forEach(row => {{
			
 
				+                    html += `<tr>
			
 
				+                        <td>${{row.group}}</td>
			
 
				+                        <td class="num">${{row.count.toLocaleString()}}</td>
			
 
				+                        <td class="num">${{row.click_uv.toLocaleString()}}</td>
			
 
				+                        ${{rateCols.map(rc => `<td class="num">${{(row[rc] || 0).toFixed(4)}}</td>`).join('')}}
			
 
				+                    </tr>`;
			
 
				+                }});
			
 
				+
			
 
				+                html += '</table></div>';
			
 
				+            }});
			
 
				+
			
 
				+            html += '</div>';
			
 
				+            container.innerHTML = html;
			
 
				+        }}
			
 
				+
			
 
				+        // 渲染表头
			
 
				+        function renderHeader() {{
			
 
				+            const headerRow = document.getElementById('headerRow');
			
 
				+            headerRow.innerHTML = tableCols.map(col => {{
			
 
				+                const label = colLabels[col] || col;
			
 
				+                const isSorted = currentSort.col === col;
			
 
				+                const icon = isSorted ? (currentSort.dir === 'asc' ? '▲' : '▼') : '▼';
			
 
				+                return `<th class="${{isSorted ? 'sorted' : ''}}" onclick="sortBy('${{col}}')">
			
 
				+                    ${{label}}<span class="sort-icon">${{icon}}</span>
			
 
				+                </th>`;
			
 
				+            }}).join('');
			
 
				+        }}
			
 
				+
			
 
				+        // 计算每列的最大最小值（用于渐变）
			
 
				+        function getColumnRange(data, col) {{
			
 
				+            const vals = data.map(r => r[col]).filter(v => typeof v === 'number' && !isNaN(v));
			
 
				+            if (vals.length === 0) return {{ min: 0, max: 1 }};
			
 
				+            return {{ min: Math.min(...vals), max: Math.max(...vals) }};
			
 
				+        }}
			
 
				+
			
 
				+        // 根据值获取渐变背景色（绿色系）
			
 
				+        function getGradientColor(val, min, max) {{
			
 
				+            if (typeof val !== 'number' || isNaN(val)) return '';
			
 
				+            const ratio = max > min ? (val - min) / (max - min) : 0;
			
 
				+            // 绿色系，alpha 从 0.05 到 0.6
			
 
				+            const alpha = 0.05 + ratio * 0.55;
			
 
				+            return `rgba(34, 197, 94, ${{alpha}})`;
			
 
				+        }}
			
 
				+
			
 
				+        // 渲染表格数据
			
 
				+        function renderTable() {{
			
 
				+            const tbody = document.getElementById('tableBody');
			
 
				+            const search = document.getElementById('searchInput').value.toLowerCase();
			
 
				+            const simFilter = document.getElementById('simFilter').value;
			
 
				+
			
 
				+            // 筛选条件
			
 
				+            const minUv = parseInt(document.getElementById('minUvInput').value) || 0;
			
 
				+            const dateFilter = document.getElementById('dateFilter').value;
			
 
				+            const channelFilter = document.getElementById('channelFilter').value;
			
 
				+            const hotsencetypeFilter = document.getElementById('hotsencetypeFilter').value;
			
 
				+            const partnerFilter = document.getElementById('partnerFilter').value;
			
 
				+            const accountFilter = document.getElementById('accountFilter').value;
			
 
				+
			
 
				+            // 过滤
			
 
				+            filteredData = rawData.filter(row => {{
			
 
				+                // 日期过滤（转字符串比较）
			
 
				+                if (dateFilter && String(row['dt']) !== dateFilter) return false;
			
 
				+
			
 
				+                // 渠道过滤
			
 
				+                if (channelFilter && row['channel'] !== channelFilter) return false;
			
 
				+
			
 
				+                // 场景类型过滤（转字符串比较）
			
 
				+                if (hotsencetypeFilter && String(row['hotsencetype']) !== hotsencetypeFilter) return false;
			
 
				+
			
 
				+                // 合作方过滤
			
 
				+                if (partnerFilter && row['合作方名'] !== partnerFilter) return false;
			
 
				+
			
 
				+                // 公众号过滤
			
 
				+                if (accountFilter && row['公众号名'] !== accountFilter) return false;
			
 
				+
			
 
				+                // 点击UV过滤
			
 
				+                if (row['点击uv'] < minUv) return false;
			
 
				+
			
 
				+                // 搜索过滤
			
 
				+                if (search) {{
			
 
				+                    const title1 = (row['分享标题'] || '').toLowerCase();
			
 
				+                    const title2 = (row['title'] || '').toLowerCase();
			
 
				+                    if (!title1.includes(search) && !title2.includes(search)) return false;
			
 
				+                }}
			
 
				+
			
 
				+                // 相似度过滤
			
 
				+                if (simFilter) {{
			
 
				+                    const simVal = validCols.map(c => row[c]).find(v => v !== '' && v !== null);
			
 
				+                    if (simVal === undefined) return false;
			
 
				+                    if (simFilter === 'high' && simVal < 0.7) return false;
			
 
				+                    if (simFilter === 'mid' && (simVal < 0.3 || simVal >= 0.7)) return false;
			
 
				+                    if (simFilter === 'low' && simVal >= 0.3) return false;
			
 
				+                }}
			
 
				+
			
 
				+                return true;
			
 
				+            }});
			
 
				+
			
 
				+            // 排序
			
 
				+            filteredData.sort((a, b) => {{
			
 
				+                let va = a[currentSort.col];
			
 
				+                let vb = b[currentSort.col];
			
 
				+
			
 
				+                // 处理空值
			
 
				+                if (va === '' || va === null) va = currentSort.dir === 'asc' ? Infinity : -Infinity;
			
 
				+                if (vb === '' || vb === null) vb = currentSort.dir === 'asc' ? Infinity : -Infinity;
			
 
				+
			
 
				+                // 数值比较
			
 
				+                if (typeof va === 'number' && typeof vb === 'number') {{
			
 
				+                    return currentSort.dir === 'asc' ? va - vb : vb - va;
			
 
				+                }}
			
 
				+
			
 
				+                // 字符串比较
			
 
				+                va = String(va);
			
 
				+                vb = String(vb);
			
 
				+                return currentSort.dir === 'asc' ? va.localeCompare(vb) : vb.localeCompare(va);
			
 
				+            }});
			
 
				+
			
 
				+            // 计算全局列范围（用于渐变）
			
 
				+            const globalRanges = {{}};
			
 
				+            tableCols.forEach(col => {{
			
 
				+                globalRanges[col] = getColumnRange(filteredData, col);
			
 
				+            }});
			
 
				+
			
 
				+            // 渲染行
			
 
				+            tbody.innerHTML = filteredData.map(row => {{
			
 
				+                return '<tr>' + tableCols.map(col => {{
			
 
				+                    let val = row[col];
			
 
				+                    const isNum = typeof val === 'number';
			
 
				+
			
 
				+                    if (val === '' || val === null || val === undefined) {{
			
 
				+                        return '<td>-</td>';
			
 
				+                    }}
			
 
				+
			
 
				+                    // 分享封面 - 显示为图片缩略图，点击放大预览
			
 
				+                    if (col === '分享封面') {{
			
 
				+                        return `<td><img src="${{val}}" style="max-width:80px;max-height:60px;cursor:pointer;border-radius:4px;" onclick="showImgModal('${{val}}')" onerror="this.style.display='none'"></td>`;
			
 
				+                    }}
			
 
				+
			
 
				+                    // videoid - 显示为超链接（优先处理，避免被数字判断拦截）
			
 
				+                    if (col === 'videoid') {{
			
 
				+                        return `<td><a href="https://admin.piaoquantv.com/cms/post-detail/${{val}}/detail" target="_blank" style="color:#667eea;text-decoration:none;">${{val}}</a></td>`;
			
 
				+                    }}
			
 
				+
			
 
				+                    // 日期、场景类型 - 强制显示为字符串
			
 
				+                    if (col === 'dt' || col === 'hotsencetype') {{
			
 
				+                        return `<td>${{val}}</td>`;
			
 
				+                    }}
			
 
				+
			
 
				+                    if (isNum) {{
			
 
				+                        const range = globalRanges[col] || {{ min: 0, max: 1 }};
			
 
				+                        let displayVal = '';
			
 
				+                        let needGradient = false;
			
 
				+
			
 
				+                        // 相似度列 - 需要渐变
			
 
				+                        if (col.includes('相似度')) {{
			
 
				+                            displayVal = val.toFixed(3);
			
 
				+                            needGradient = true;
			
 
				+                        }}
			
 
				+                        // 回流率列 - 需要渐变
			
 
				+                        else if (col.includes('回流率')) {{
			
 
				+                            displayVal = val.toFixed(4);
			
 
				+                            needGradient = true;
			
 
				+                        }}
			
 
				+                        // UV列 - 不需要渐变
			
 
				+                        else if (col.includes('UV') || col.includes('uv')) {{
			
 
				+                            displayVal = Math.round(val).toLocaleString();
			
 
				+                        }}
			
 
				+                        else {{
			
 
				+                            displayVal = val.toFixed(2);
			
 
				+                        }}
			
 
				+
			
 
				+                        const bgColor = needGradient ? getGradientColor(val, range.min, range.max) : '';
			
 
				+                        const style = bgColor ? `style="background:${{bgColor}}"` : '';
			
 
				+                        return `<td class="num" ${{style}}>${{displayVal}}</td>`;
			
 
				+                    }}
			
 
				+
			
 
				+                    // 标题列 - 允许换行不截断
			
 
				+                    if (col === 'title' || col === '分享标题' || col === '文章标题') {{
			
 
				+                        return `<td class="wrap">${{val}}</td>`;
			
 
				+                    }}
			
 
				+
			
 
				+                    // 其他文本列，截断显示
			
 
				+                    const displayVal = String(val).substring(0, 40) + (String(val).length > 40 ? '...' : '');
			
 
				+                    return `<td title="${{val}}">${{displayVal}}</td>`;
			
 
				+                }}).join('') + '</tr>';
			
 
				+            }}).join('');
			
 
				+
			
 
				+            document.getElementById('rowCount').textContent = `显示 ${{filteredData.length}} 条`;
			
 
				+        }}
			
 
				+
			
 
				+        // 排序
			
 
				+        function sortBy(col) {{
			
 
				+            if (currentSort.col === col) {{
			
 
				+                currentSort.dir = currentSort.dir === 'asc' ? 'desc' : 'asc';
			
 
				+            }} else {{
			
 
				+                currentSort.col = col;
			
 
				+                currentSort.dir = 'desc';
			
 
				+            }}
			
 
				+            renderHeader();
			
 
				+            renderTable();
			
 
				+        }}
			
 
				+
			
 
				+        // 图片预览功能
			
 
				+        function showImgModal(src) {{
			
 
				+            document.getElementById('modalImg').src = src;
			
 
				+            document.getElementById('imgModal').classList.add('show');
			
 
				+        }}
			
 
				+        function closeImgModal() {{
			
 
				+            document.getElementById('imgModal').classList.remove('show');
			
 
				+        }}
			
 
				+        // ESC 关闭预览
			
 
				+        document.addEventListener('keydown', (e) => {{
			
 
				+            if (e.key === 'Escape') closeImgModal();
			
 
				+        }});
			
 
				+
			
 
				+        // 事件绑定
			
 
				+        document.getElementById('searchInput').addEventListener('input', renderTable);
			
 
				+        document.getElementById('simFilter').addEventListener('change', renderTable);
			
 
				+        document.getElementById('minUvInput').addEventListener('input', renderTable);
			
 
				+        document.getElementById('dateFilter').addEventListener('change', renderTable);
			
 
				+        document.getElementById('channelFilter').addEventListener('change', renderTable);
			
 
				+        document.getElementById('hotsencetypeFilter').addEventListener('change', renderTable);
			
 
				+        document.getElementById('partnerFilter').addEventListener('change', renderTable);
			
 
				+        document.getElementById('accountFilter').addEventListener('change', renderTable);
			
 
				+
			
 
				+        // 初始化
			
 
				+        renderCharts();
			
 
				+        renderStatsTables();
			
 
				+        renderHeader();
			
 
				+        renderTable();
			
 
				+
			
 
				+        // 响应式
			
 
				+        window.addEventListener('resize', () => {{
			
 
				+            groupStats.forEach((_, idx) => {{
			
 
				+                echarts.getInstanceByDom(document.getElementById('chart_' + idx))?.resize();
			
 
				+            }});
			
 
				+        }});
			
 
				+    </script>
			
 
				+</body>
			
 
				+</html>
			
 
				+'''
			
 
				+
			
 
				+# 保存 HTML
			
 
				+output_path = output_dir / '素材视频内容分析.html'
			
 
				+with open(output_path, 'w', encoding='utf-8') as f:
			
 
				+    f.write(html_content)
			
 
				+
			
 
				+print(f"\n已保存: {output_path}")
			
--- a/tasks/头部/进入前的I与头部I的相关性分析/进入前的I与头部I的相关性分析.sql
+++ b/tasks/头部/进入前的I与头部I的相关性分析/进入前的I与头部I的相关性分析.sql
@@ -0,0 +1,101 @@
 
				+-- 素材与头部视频相关性分析（单日查询）
			
 
				+-- 参数: ${dt} - 日期，格式 YYYYMMDD
			
 
				+-- JOIN 视频基础信息（关键词、口播等），分析素材与视频内容的匹配关系
			
 
				+
			
 
				+SELECT  a.dt
			
 
				+        ,a.channel
			
 
				+        ,a.hotsencetype
			
 
				+        ,a.合作方名
			
 
				+        ,a.公众号名
			
 
				+        -- 素材维度
			
 
				+        ,a.rootsourceid
			
 
				+        ,a.文章标题
			
 
				+        ,a.分享标题
			
 
				+        ,a.分享封面
			
 
				+        -- 视频基础信息
			
 
				+        ,a.videoid
			
 
				+        ,a.title
			
 
				+        ,a.merge一级品类
			
 
				+        ,a.merge二级品类
			
 
				+        -- 视频内容信息（来自 video_dimension 表）
			
 
				+        ,b.视频关键词
			
 
				+        ,b.视频口播
			
 
				+        ,b.视频主题
			
 
				+        ,b.视频场景
			
 
				+        ,b.情感倾向
			
 
				+        ,b.视频风格
			
 
				+        ,b.传播性判断
			
 
				+        ,b.推测观众年龄段
			
 
				+        ,b.是否有片尾引导
			
 
				+        ,b.引导强度
			
 
				+        -- 核心指标
			
 
				+        ,COUNT(DISTINCT a.mid) AS 点击uv
			
 
				+        ,COUNT(DISTINCT CASE WHEN a.是否进入推荐 = '1' THEN a.mid END) / COUNT(DISTINCT a.mid) AS 进入推荐率
			
 
				+        ,(SUM(CASE WHEN a.再分享群聊回流uv > 0 THEN a.再分享群聊回流uv ELSE 0 END)
			
 
				+          + SUM(CASE WHEN a.再分享单聊回流uv > 0 THEN a.再分享单聊回流uv ELSE 0 END)
			
 
				+         ) / (COUNT(DISTINCT a.mid) + 10) AS 再分享回流率
			
 
				+        ,(SUM(CASE WHEN a.是否原视频 = '是' THEN a.再分享群聊回流uv END)
			
 
				+          + SUM(CASE WHEN a.是否原视频 = '是' THEN a.再分享单聊回流uv END)
			
 
				+         ) / (COUNT(DISTINCT a.mid) + 10) AS 原视频再分享回流率
			
 
				+        ,(SUM(CASE WHEN a.是否原视频 = '否' THEN a.再分享群聊回流uv END)
			
 
				+          + SUM(CASE WHEN a.是否原视频 = '否' THEN a.再分享单聊回流uv END)
			
 
				+         ) / (COUNT(DISTINCT a.mid) + 10) AS 推荐再分享回流率
			
 
				+        ,SUM(CASE WHEN a.再分享群聊回流uv > 0 THEN a.再分享群聊回流uv ELSE 0 END)
			
 
				+         + SUM(CASE WHEN a.再分享单聊回流uv > 0 THEN a.再分享单聊回流uv ELSE 0 END) AS 再分享回流uv
			
 
				+FROM    loghubods.opengid_base_data a
			
 
				+LEFT JOIN (
			
 
				+    SELECT  视频id
			
 
				+            ,视频关键词
			
 
				+            ,视频口播
			
 
				+            ,视频主题
			
 
				+            ,视频场景
			
 
				+            ,情感倾向
			
 
				+            ,视频风格
			
 
				+            ,传播性判断
			
 
				+            ,推测观众年龄段
			
 
				+            ,是否有片尾引导
			
 
				+            ,引导强度
			
 
				+    FROM    loghubods.video_dimension_detail_add_column
			
 
				+    WHERE   dt = '${dt}'
			
 
				+    GROUP BY 视频id
			
 
				+             ,视频关键词
			
 
				+             ,视频口播
			
 
				+             ,视频主题
			
 
				+             ,视频场景
			
 
				+             ,情感倾向
			
 
				+             ,视频风格
			
 
				+             ,传播性判断
			
 
				+             ,推测观众年龄段
			
 
				+             ,是否有片尾引导
			
 
				+             ,引导强度
			
 
				+) b ON a.videoid = b.视频id
			
 
				+WHERE   a.dt = '${dt}'
			
 
				+AND     a.usersharedepth = 0
			
 
				+AND     a.videoid IS NOT NULL
			
 
				+AND     (a.文章标题 IS NOT NULL AND a.文章标题 != '' OR a.分享标题 IS NOT NULL AND a.分享标题 != '')
			
 
				+GROUP BY a.dt
			
 
				+         ,a.channel
			
 
				+         ,a.hotsencetype
			
 
				+         ,a.合作方名
			
 
				+         ,a.公众号名
			
 
				+         ,a.rootsourceid
			
 
				+         ,a.文章标题
			
 
				+         ,a.分享标题
			
 
				+         ,a.分享封面
			
 
				+         ,a.videoid
			
 
				+         ,a.title
			
 
				+         ,a.merge一级品类
			
 
				+         ,a.merge二级品类
			
 
				+         ,b.视频关键词
			
 
				+         ,b.视频口播
			
 
				+         ,b.视频主题
			
 
				+         ,b.视频场景
			
 
				+         ,b.情感倾向
			
 
				+         ,b.视频风格
			
 
				+         ,b.传播性判断
			
 
				+         ,b.推测观众年龄段
			
 
				+         ,b.是否有片尾引导
			
 
				+         ,b.引导强度
			
 
				+ORDER BY 点击uv DESC
			
 
				+LIMIT   50000
			
 
				+;
			
--- a/tasks/承接/头部品类与承接品类分析/.DS_Store
+++ b/tasks/承接/头部品类与承接品类分析/.DS_Store
--- a/tasks/承接/头部品类与承接品类分析/_archive/analyze_category_correlation.py
+++ b/tasks/承接/头部品类与承接品类分析/_archive/analyze_category_correlation.py
@@ -0,0 +1,288 @@
 
				+#!/usr/bin/env python
			
 
				+# coding=utf-8
			
 
				+"""
			
 
				+品类承接裂变率分析
			
 
				+分析目标:
			
 
				+1. 进入/承接品类一致时，承接裂变率(vov)是否更高
			
 
				+2. 不同品类组合间的承接裂变率是否存在稳定的相关性
			
 
				+"""
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+from pathlib import Path
			
 
				+from scipy import stats
			
 
				+import matplotlib.pyplot as plt
			
 
				+import matplotlib
			
 
				+matplotlib.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'PingFang SC']
			
 
				+matplotlib.rcParams['axes.unicode_minus'] = False
			
 
				+
			
 
				+# ========== 数据加载 ==========
			
 
				+task_dir = Path(__file__).parent
			
 
				+output_dir = task_dir / "output"
			
 
				+
			
 
				+csv_files = [f for f in output_dir.glob("query_*.csv") if not f.name.endswith('.html')]
			
 
				+if not csv_files:
			
 
				+    print("没有找到数据文件，请先运行 query.sql")
			
 
				+    exit(1)
			
 
				+
			
 
				+latest_file = max(csv_files, key=lambda x: x.stat().st_mtime)
			
 
				+df = pd.read_csv(latest_file)
			
 
				+
			
 
				+print("=" * 60)
			
 
				+print("品类承接裂变率分析")
			
 
				+print("=" * 60)
			
 
				+print(f"数据文件: {latest_file.name}")
			
 
				+print(f"时间范围: {df['dt'].min()} ~ {df['dt'].max()}")
			
 
				+print(f"记录数: {len(df)}")
			
 
				+print()
			
 
				+
			
 
				+# 过滤掉 headvideoid为空 的记录（无法判断进入品类）
			
 
				+df_valid = df[~df['head_cate2'].isin(['headvideoid为空', '未匹配品类'])].copy()
			
 
				+print(f"有效记录数(排除headvideoid为空/未匹配): {len(df_valid)}")
			
 
				+
			
 
				+# ========== 分析1: 品类一致性分析 ==========
			
 
				+print("\n" + "=" * 60)
			
 
				+print("分析1: 进入/承接品类一致性 vs 承接裂变率(vov)")
			
 
				+print("=" * 60)
			
 
				+
			
 
				+# 标记是否为同品类
			
 
				+df_valid['is_same_cate'] = df_valid['head_cate2'] == df_valid['rec_cate2']
			
 
				+
			
 
				+# 按人群分组分析
			
 
				+for crowd in ['内部', '外部0层', '外部裂变']:
			
 
				+    crowd_df = df_valid[df_valid['crowd'] == crowd]
			
 
				+    if len(crowd_df) == 0:
			
 
				+        continue
			
 
				+
			
 
				+    # 同品类 vs 跨品类
			
 
				+    same_cate = crowd_df[crowd_df['is_same_cate']]
			
 
				+    diff_cate = crowd_df[~crowd_df['is_same_cate']]
			
 
				+
			
 
				+    # 加权平均 vov (按曝光量加权)
			
 
				+    same_vov = (same_cate['new_exposure_cnt'].sum() / same_cate['exp'].sum()) if same_cate['exp'].sum() > 0 else 0
			
 
				+    diff_vov = (diff_cate['new_exposure_cnt'].sum() / diff_cate['exp'].sum()) if diff_cate['exp'].sum() > 0 else 0
			
 
				+
			
 
				+    print(f"\n【{crowd}】")
			
 
				+    print(f"  同品类承接: 曝光 {same_cate['exp'].sum():,.0f}, vov = {same_vov:.4f}")
			
 
				+    print(f"  跨品类承接: 曝光 {diff_cate['exp'].sum():,.0f}, vov = {diff_vov:.4f}")
			
 
				+    print(f"  同品类/跨品类 vov比值: {same_vov/diff_vov:.2f}x" if diff_vov > 0 else "  跨品类无数据")
			
 
				+
			
 
				+    # 统计检验: Mann-Whitney U检验 (非参数检验)
			
 
				+    if len(same_cate) >= 5 and len(diff_cate) >= 5:
			
 
				+        stat, pvalue = stats.mannwhitneyu(same_cate['vov'], diff_cate['vov'], alternative='greater')
			
 
				+        print(f"  Mann-Whitney U检验 (同品类vov > 跨品类vov): p-value = {pvalue:.4f}")
			
 
				+        print(f"  结论: {'显著' if pvalue < 0.05 else '不显著'} (α=0.05)")
			
 
				+
			
 
				+# 整体汇总
			
 
				+print("\n【整体汇总】")
			
 
				+same_all = df_valid[df_valid['is_same_cate']]
			
 
				+diff_all = df_valid[~df_valid['is_same_cate']]
			
 
				+same_vov_all = same_all['new_exposure_cnt'].sum() / same_all['exp'].sum()
			
 
				+diff_vov_all = diff_all['new_exposure_cnt'].sum() / diff_all['exp'].sum()
			
 
				+print(f"  同品类承接: 曝光 {same_all['exp'].sum():,.0f}, vov = {same_vov_all:.4f}")
			
 
				+print(f"  跨品类承接: 曝光 {diff_all['exp'].sum():,.0f}, vov = {diff_vov_all:.4f}")
			
 
				+print(f"  同品类/跨品类 vov比值: {same_vov_all/diff_vov_all:.2f}x")
			
 
				+
			
 
				+# ========== 分析2: 品类组合稳定性分析 ==========
			
 
				+print("\n" + "=" * 60)
			
 
				+print("分析2: 品类组合间的承接裂变率稳定性相关性")
			
 
				+print("=" * 60)
			
 
				+
			
 
				+# 2.1 跨日期稳定性: 同一品类组合在不同日期的vov相关性
			
 
				+print("\n【2.1 跨日期稳定性】")
			
 
				+print("分析同一品类组合在不同日期的vov是否稳定")
			
 
				+
			
 
				+dates = sorted(df_valid['dt'].unique())
			
 
				+if len(dates) >= 2:
			
 
				+    # 创建品类组合 pivot table
			
 
				+    df_valid['cate_pair'] = df_valid['head_cate2'] + ' → ' + df_valid['rec_cate2']
			
 
				+
			
 
				+    # 按日期和品类组合汇总
			
 
				+    daily_vov = df_valid.groupby(['dt', 'cate_pair']).apply(
			
 
				+        lambda x: x['new_exposure_cnt'].sum() / x['exp'].sum()
			
 
				+    ).unstack(level=0)
			
 
				+
			
 
				+    # 计算相邻日期间的相关性
			
 
				+    date_correlations = []
			
 
				+    for i in range(len(dates) - 1):
			
 
				+        d1, d2 = dates[i], dates[i+1]
			
 
				+        if d1 in daily_vov.columns and d2 in daily_vov.columns:
			
 
				+            valid_pairs = daily_vov[[d1, d2]].dropna()
			
 
				+            if len(valid_pairs) >= 10:
			
 
				+                corr, pval = stats.pearsonr(valid_pairs[d1], valid_pairs[d2])
			
 
				+                date_correlations.append({'date1': d1, 'date2': d2, 'corr': corr, 'pval': pval, 'n': len(valid_pairs)})
			
 
				+
			
 
				+    if date_correlations:
			
 
				+        corr_df = pd.DataFrame(date_correlations)
			
 
				+        print(f"  相邻日期vov相关性:")
			
 
				+        for _, row in corr_df.iterrows():
			
 
				+            print(f"    {row['date1']} vs {row['date2']}: r={row['corr']:.3f}, p={row['pval']:.4f}, n={row['n']}")
			
 
				+        print(f"  平均相关系数: {corr_df['corr'].mean():.3f}")
			
 
				+        print(f"  结论: 品类组合的vov在跨日期间{'高度稳定' if corr_df['corr'].mean() > 0.7 else '较为稳定' if corr_df['corr'].mean() > 0.5 else '不太稳定'}")
			
 
				+
			
 
				+# 2.2 跨人群稳定性: 同一品类组合在不同人群的vov相关性
			
 
				+print("\n【2.2 跨人群稳定性】")
			
 
				+print("分析同一品类组合在不同人群的vov排序是否一致")
			
 
				+
			
 
				+crowd_vov = df_valid.groupby(['crowd', 'cate_pair']).apply(
			
 
				+    lambda x: x['new_exposure_cnt'].sum() / x['exp'].sum()
			
 
				+).unstack(level=0)
			
 
				+
			
 
				+crowds = ['内部', '外部0层', '外部裂变']
			
 
				+crowd_pairs = [(crowds[i], crowds[j]) for i in range(len(crowds)) for j in range(i+1, len(crowds))]
			
 
				+
			
 
				+for c1, c2 in crowd_pairs:
			
 
				+    if c1 in crowd_vov.columns and c2 in crowd_vov.columns:
			
 
				+        valid = crowd_vov[[c1, c2]].dropna()
			
 
				+        if len(valid) >= 10:
			
 
				+            corr, pval = stats.pearsonr(valid[c1], valid[c2])
			
 
				+            spearman_corr, spearman_pval = stats.spearmanr(valid[c1], valid[c2])
			
 
				+            print(f"  {c1} vs {c2}:")
			
 
				+            print(f"    Pearson r = {corr:.3f} (p={pval:.4f})")
			
 
				+            print(f"    Spearman ρ = {spearman_corr:.3f} (p={spearman_pval:.4f})")
			
 
				+            print(f"    样本数: {len(valid)} 品类组合")
			
 
				+
			
 
				+# 2.3 高/低裂变品类组合识别
			
 
				+print("\n【2.3 稳定的高/低裂变品类组合】")
			
 
				+print("识别在所有人群中都表现稳定的品类组合")
			
 
				+
			
 
				+# 计算每个品类组合在所有人群的平均vov
			
 
				+overall_vov = df_valid.groupby('cate_pair').apply(
			
 
				+    lambda x: pd.Series({
			
 
				+        'vov': x['new_exposure_cnt'].sum() / x['exp'].sum(),
			
 
				+        'exp': x['exp'].sum(),
			
 
				+        'crowd_count': x['crowd'].nunique()
			
 
				+    })
			
 
				+)
			
 
				+
			
 
				+# 只看在多个人群都有数据的组合
			
 
				+stable_pairs = overall_vov[overall_vov['crowd_count'] >= 2].copy()
			
 
				+stable_pairs = stable_pairs.sort_values('vov', ascending=False)
			
 
				+
			
 
				+print(f"\n  Top 10 高裂变品类组合 (vov最高):")
			
 
				+for i, (pair, row) in enumerate(stable_pairs.head(10).iterrows(), 1):
			
 
				+    print(f"    {i}. {pair}: vov={row['vov']:.4f}, 曝光={row['exp']:,.0f}")
			
 
				+
			
 
				+print(f"\n  Top 10 低裂变品类组合 (vov最低):")
			
 
				+for i, (pair, row) in enumerate(stable_pairs.tail(10).iloc[::-1].iterrows(), 1):
			
 
				+    print(f"    {i}. {pair}: vov={row['vov']:.4f}, 曝光={row['exp']:,.0f}")
			
 
				+
			
 
				+# ========== 分析3: 品类亲和性矩阵 ==========
			
 
				+print("\n" + "=" * 60)
			
 
				+print("分析3: 品类亲和性矩阵 (进入品类 → 承接品类)")
			
 
				+print("=" * 60)
			
 
				+
			
 
				+# 计算每个head_cate2的基准vov
			
 
				+head_baseline = df_valid.groupby('head_cate2').apply(
			
 
				+    lambda x: x['new_exposure_cnt'].sum() / x['exp'].sum()
			
 
				+).to_dict()
			
 
				+
			
 
				+# 计算亲和性: 特定组合vov / 进入品类基准vov
			
 
				+affinity_data = []
			
 
				+for (head, rec), grp in df_valid.groupby(['head_cate2', 'rec_cate2']):
			
 
				+    if grp['exp'].sum() >= 10000:  # 只看曝光量足够的组合
			
 
				+        pair_vov = grp['new_exposure_cnt'].sum() / grp['exp'].sum()
			
 
				+        baseline = head_baseline.get(head, 1)
			
 
				+        affinity = pair_vov / baseline if baseline > 0 else 0
			
 
				+        affinity_data.append({
			
 
				+            'head_cate2': head,
			
 
				+            'rec_cate2': rec,
			
 
				+            'vov': pair_vov,
			
 
				+            'baseline_vov': baseline,
			
 
				+            'affinity': affinity,
			
 
				+            'exp': grp['exp'].sum()
			
 
				+        })
			
 
				+
			
 
				+affinity_df = pd.DataFrame(affinity_data)
			
 
				+
			
 
				+print("\n  高亲和组合 (affinity > 1.2, 即vov比基准高20%):")
			
 
				+high_affinity = affinity_df[affinity_df['affinity'] > 1.2].sort_values('affinity', ascending=False).head(15)
			
 
				+for _, row in high_affinity.iterrows():
			
 
				+    print(f"    {row['head_cate2']} → {row['rec_cate2']}: affinity={row['affinity']:.2f}, vov={row['vov']:.4f}")
			
 
				+
			
 
				+print("\n  低亲和组合 (affinity < 0.8, 即vov比基准低20%):")
			
 
				+low_affinity = affinity_df[affinity_df['affinity'] < 0.8].sort_values('affinity').head(15)
			
 
				+for _, row in low_affinity.iterrows():
			
 
				+    print(f"    {row['head_cate2']} → {row['rec_cate2']}: affinity={row['affinity']:.2f}, vov={row['vov']:.4f}")
			
 
				+
			
 
				+# ========== 可视化 ==========
			
 
				+print("\n" + "=" * 60)
			
 
				+print("生成可视化图表...")
			
 
				+print("=" * 60)
			
 
				+
			
 
				+fig, axes = plt.subplots(2, 2, figsize=(14, 12))
			
 
				+
			
 
				+# 图1: 同品类 vs 跨品类 vov对比
			
 
				+ax1 = axes[0, 0]
			
 
				+crowds = ['内部', '外部0层', '外部裂变']
			
 
				+same_vovs = []
			
 
				+diff_vovs = []
			
 
				+for crowd in crowds:
			
 
				+    crowd_df = df_valid[df_valid['crowd'] == crowd]
			
 
				+    same = crowd_df[crowd_df['is_same_cate']]
			
 
				+    diff = crowd_df[~crowd_df['is_same_cate']]
			
 
				+    same_vovs.append(same['new_exposure_cnt'].sum() / same['exp'].sum() if same['exp'].sum() > 0 else 0)
			
 
				+    diff_vovs.append(diff['new_exposure_cnt'].sum() / diff['exp'].sum() if diff['exp'].sum() > 0 else 0)
			
 
				+
			
 
				+x = np.arange(len(crowds))
			
 
				+width = 0.35
			
 
				+ax1.bar(x - width/2, same_vovs, width, label='同品类承接', color='#4CAF50')
			
 
				+ax1.bar(x + width/2, diff_vovs, width, label='跨品类承接', color='#2196F3')
			
 
				+ax1.set_ylabel('承接裂变率 (vov)')
			
 
				+ax1.set_title('同品类 vs 跨品类 承接裂变率对比')
			
 
				+ax1.set_xticks(x)
			
 
				+ax1.set_xticklabels(crowds)
			
 
				+ax1.legend()
			
 
				+ax1.grid(axis='y', alpha=0.3)
			
 
				+
			
 
				+# 图2: 品类组合vov分布
			
 
				+ax2 = axes[0, 1]
			
 
				+ax2.hist(stable_pairs['vov'], bins=30, edgecolor='black', alpha=0.7, color='#FF9800')
			
 
				+ax2.axvline(stable_pairs['vov'].median(), color='red', linestyle='--', label=f'中位数: {stable_pairs["vov"].median():.4f}')
			
 
				+ax2.axvline(stable_pairs['vov'].mean(), color='blue', linestyle='--', label=f'均值: {stable_pairs["vov"].mean():.4f}')
			
 
				+ax2.set_xlabel('承接裂变率 (vov)')
			
 
				+ax2.set_ylabel('品类组合数')
			
 
				+ax2.set_title('品类组合vov分布')
			
 
				+ax2.legend()
			
 
				+
			
 
				+# 图3: 跨人群vov相关性散点图 (内部 vs 外部0层)
			
 
				+ax3 = axes[1, 0]
			
 
				+if '内部' in crowd_vov.columns and '外部0层' in crowd_vov.columns:
			
 
				+    valid = crowd_vov[['内部', '外部0层']].dropna()
			
 
				+    ax3.scatter(valid['内部'], valid['外部0层'], alpha=0.5, s=30)
			
 
				+    # 添加对角线
			
 
				+    max_val = max(valid['内部'].max(), valid['外部0层'].max())
			
 
				+    ax3.plot([0, max_val], [0, max_val], 'r--', alpha=0.5, label='y=x')
			
 
				+    ax3.set_xlabel('内部 vov')
			
 
				+    ax3.set_ylabel('外部0层 vov')
			
 
				+    ax3.set_title('跨人群vov相关性 (内部 vs 外部0层)')
			
 
				+    corr, _ = stats.pearsonr(valid['内部'], valid['外部0层'])
			
 
				+    ax3.text(0.05, 0.95, f'r = {corr:.3f}', transform=ax3.transAxes, fontsize=12, verticalalignment='top')
			
 
				+    ax3.legend()
			
 
				+
			
 
				+# 图4: 亲和性分布
			
 
				+ax4 = axes[1, 1]
			
 
				+ax4.hist(affinity_df['affinity'], bins=30, edgecolor='black', alpha=0.7, color='#9C27B0')
			
 
				+ax4.axvline(1.0, color='red', linestyle='--', label='基准线 (affinity=1)')
			
 
				+ax4.set_xlabel('亲和性 (vov / 基准vov)')
			
 
				+ax4.set_ylabel('品类组合数')
			
 
				+ax4.set_title('品类亲和性分布')
			
 
				+ax4.legend()
			
 
				+
			
 
				+plt.tight_layout()
			
 
				+plt.savefig(output_dir / 'category_correlation_analysis.png', dpi=150, bbox_inches='tight')
			
 
				+print(f"图表已保存: {output_dir / 'category_correlation_analysis.png'}")
			
 
				+
			
 
				+# ========== 导出分析结果 ==========
			
 
				+print("\n导出分析结果...")
			
 
				+
			
 
				+# 导出品类组合vov排名
			
 
				+stable_pairs.to_csv(output_dir / 'category_pair_vov_ranking.csv')
			
 
				+print(f"品类组合vov排名: {output_dir / 'category_pair_vov_ranking.csv'}")
			
 
				+
			
 
				+# 导出亲和性矩阵
			
 
				+affinity_df.to_csv(output_dir / 'category_affinity_matrix.csv', index=False)
			
 
				+print(f"品类亲和性矩阵: {output_dir / 'category_affinity_matrix.csv'}")
			
 
				+
			
 
				+print("\n" + "=" * 60)
			
 
				+print("分析完成!")
			
 
				+print("=" * 60)
			
--- a/tasks/承接/头部品类与承接品类分析/_archive/query_range.sql
+++ b/tasks/承接/头部品类与承接品类分析/_archive/query_range.sql
@@ -0,0 +1,86 @@
 
				+-- 简化版：直接用 headvideoid 和 vid 关联品类表获取品类
			
 
				+-- 不需要 join 头部视频表
			
 
				+WITH t_rec AS (
			
 
				+    SELECT  dt
			
 
				+            ,mid
			
 
				+            ,subsessionid
			
 
				+            ,headvideoid
			
 
				+            ,vid AS rec_vid
			
 
				+            ,ts
			
 
				+            ,CASE WHEN rootsourceid = '' OR rootsourceid IS NULL THEN '内部' ELSE '外部' END AS in_out
			
 
				+            ,page
			
 
				+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
			
 
				+                    WHEN page IN ("回流页","其他") THEN "非推荐"
			
 
				+                    ELSE "其他"
			
 
				+            END AS page_rec
			
 
				+            ,share_cnt
			
 
				+            ,return_n_uv
			
 
				+            ,new_exposure_cnt
			
 
				+            ,GET_JSON_OBJECT(extend,"$.extParams.userShareDepth") AS layer
			
 
				+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
			
 
				+    WHERE   dt BETWEEN "${start}" AND "${end}"
			
 
				+    AND     apptype IN ('4','0')
			
 
				+    AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
			
 
				+)
			
 
				+,t_vid_info AS (
			
 
				+    -- 视频品类信息表
			
 
				+    SELECT  vid
			
 
				+            ,COALESCE(GET_JSON_OBJECT(feature,"$.merge_second_level_cate"),"unknown") AS cate2
			
 
				+    FROM    (
			
 
				+                SELECT  vid
			
 
				+                        ,feature
			
 
				+                        ,ROW_NUMBER() OVER (PARTITION BY vid ORDER BY dt DESC, hh DESC) AS rn
			
 
				+                FROM    loghubods.alg_vid_feature_basic_info
			
 
				+                WHERE   CONCAT(dt,hh) BETWEEN CONCAT("${start}","00") AND CONCAT("${end}","23")
			
 
				+            )
			
 
				+    WHERE   rn = 1
			
 
				+)
			
 
				+,t_joined AS (
			
 
				+    SELECT  r.dt
			
 
				+            ,CASE   WHEN r.in_out = '内部' THEN '内部'
			
 
				+                    WHEN r.layer = '0' THEN '外部0层'
			
 
				+                    WHEN CAST(r.layer AS INT) > 0 THEN '外部裂变'
			
 
				+                    ELSE '其他'
			
 
				+            END AS crowd
			
 
				+            ,CASE   WHEN r.headvideoid IS NULL OR r.headvideoid = '' THEN 'headvideoid为空'
			
 
				+                    WHEN h.cate2 IS NULL THEN '未匹配品类'
			
 
				+                    ELSE h.cate2
			
 
				+            END AS head_cate2
			
 
				+            ,COALESCE(v.cate2, 'unknown') AS rec_cate2
			
 
				+            ,r.share_cnt
			
 
				+            ,r.return_n_uv
			
 
				+            ,r.new_exposure_cnt
			
 
				+    FROM    t_rec r
			
 
				+    LEFT JOIN t_vid_info h ON r.headvideoid = h.vid
			
 
				+    LEFT JOIN t_vid_info v ON r.rec_vid = v.vid
			
 
				+    WHERE   r.page_rec = '推荐'
			
 
				+)
			
 
				+,t_final AS (
			
 
				+    SELECT  dt
			
 
				+            ,crowd
			
 
				+            ,head_cate2
			
 
				+            ,rec_cate2
			
 
				+            ,SUM(1) AS exp
			
 
				+            ,SUM(share_cnt) AS share_cnt
			
 
				+            ,SUM(return_n_uv) AS return_n_uv
			
 
				+            ,SUM(new_exposure_cnt) AS new_exposure_cnt
			
 
				+    FROM    t_joined
			
 
				+    GROUP BY dt, crowd, head_cate2, rec_cate2
			
 
				+)
			
 
				+SELECT  dt
			
 
				+        ,crowd
			
 
				+        ,head_cate2
			
 
				+        ,rec_cate2
			
 
				+        ,exp
			
 
				+        ,share_cnt
			
 
				+        ,return_n_uv
			
 
				+        ,new_exposure_cnt
			
 
				+        ,round(COALESCE(share_cnt / exp, 0), 4) AS str
			
 
				+        ,round(COALESCE(return_n_uv / share_cnt, 0), 4) AS ros
			
 
				+        ,round(COALESCE(return_n_uv / exp, 0), 4) AS rovn
			
 
				+        ,round(COALESCE(new_exposure_cnt / exp, 0), 4) AS vov
			
 
				+FROM    t_final
			
 
				+WHERE   crowd <> '其他'
			
 
				+AND     exp >= 1000
			
 
				+ORDER BY dt DESC, crowd, exp DESC
			
 
				+;
			
--- a/tasks/承接/头部品类与承接品类分析/_archive/visualize.py
+++ b/tasks/承接/头部品类与承接品类分析/_archive/visualize.py
@@ -0,0 +1,874 @@
 
				+#!/usr/bin/env python
			
 
				+# coding=utf-8
			
 
				+"""
			
 
				+头部品类分析可视化
			
 
				+Tab 1: Matrix - 头部品类 × 推荐品类矩阵
			
 
				+Tab 2: Compare - Top 10 品类人群对比
			
 
				+"""
			
 
				+import pandas as pd
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+
			
 
				+task_dir = Path(__file__).parent
			
 
				+output_dir = task_dir / "output"
			
 
				+
			
 
				+# 找到最新的原始数据文件
			
 
				+csv_files = [f for f in output_dir.glob("query_*.csv")]
			
 
				+if not csv_files:
			
 
				+    print("没有找到数据文件，请先运行 query.sql")
			
 
				+    exit(1)
			
 
				+
			
 
				+latest_file = max(csv_files, key=lambda x: x.stat().st_mtime)
			
 
				+df = pd.read_csv(latest_file)
			
 
				+
			
 
				+print(f"分析文件: {latest_file.name}")
			
 
				+print(f"时间范围: {df['dt'].min()} ~ {df['dt'].max()}")
			
 
				+
			
 
				+# 日期列表
			
 
				+all_dates = sorted([str(d) for d in df['dt'].unique()])
			
 
				+date_options = ['all'] + all_dates
			
 
				+latest_date = all_dates[-1] if all_dates else 'all'
			
 
				+print(f"日期数: {len(all_dates)}")
			
 
				+
			
 
				+# 人群列表
			
 
				+crowd_list = ['内部', '外部0层', '外部裂变']
			
 
				+print(f"人群: {crowd_list}")
			
 
				+
			
 
				+# 曝光阈值
			
 
				+EXP_THRESHOLD = 1000
			
 
				+
			
 
				+# 计算人群×日期的矩阵数据
			
 
				+def calc_matrix_data(crowd, date=None):
			
 
				+    ch_df = df[df['crowd'] == crowd].copy()
			
 
				+    if date and date != 'all':
			
 
				+        ch_df = ch_df[ch_df['dt'].astype(str) == str(date)]
			
 
				+    if len(ch_df) == 0:
			
 
				+        return None
			
 
				+
			
 
				+    row_col = 'head_cate2'
			
 
				+    col_col = 'rec_cate2'
			
 
				+
			
 
				+    matrix = ch_df.groupby([row_col, col_col]).agg({
			
 
				+        'exp': 'sum',
			
 
				+        'share_cnt': 'sum',
			
 
				+        'return_n_uv': 'sum',
			
 
				+        'new_exposure_cnt': 'sum',
			
 
				+    }).reset_index()
			
 
				+
			
 
				+    matrix = matrix[matrix['exp'] >= EXP_THRESHOLD]
			
 
				+    if len(matrix) == 0:
			
 
				+        return None
			
 
				+
			
 
				+    matrix['str'] = matrix['share_cnt'] / (matrix['exp'] + 1)
			
 
				+    matrix['ros'] = matrix['return_n_uv'] / (matrix['share_cnt'] + 1)
			
 
				+    matrix['rovn'] = matrix['return_n_uv'] / (matrix['exp'] + 1)
			
 
				+    matrix['vov'] = matrix['new_exposure_cnt'] / (matrix['exp'] + 1)
			
 
				+
			
 
				+    exp_pivot = matrix.pivot(index=row_col, columns=col_col, values='exp').fillna(0)
			
 
				+    str_pivot = matrix.pivot(index=row_col, columns=col_col, values='str').fillna(0)
			
 
				+    ros_pivot = matrix.pivot(index=row_col, columns=col_col, values='ros').fillna(0)
			
 
				+    rovn_pivot = matrix.pivot(index=row_col, columns=col_col, values='rovn').fillna(0)
			
 
				+    vov_pivot = matrix.pivot(index=row_col, columns=col_col, values='vov').fillna(0)
			
 
				+
			
 
				+    row_order = exp_pivot.sum(axis=1).sort_values(ascending=False).index.tolist()
			
 
				+    col_order = exp_pivot.sum(axis=0).sort_values(ascending=False).index.tolist()
			
 
				+
			
 
				+    def to_dict(pivot, is_int=False):
			
 
				+        return {str(r): {str(c): int(pivot.loc[r, c]) if is_int else round(float(pivot.loc[r, c]), 4) if c in pivot.columns else 0 for c in col_order} for r in row_order}
			
 
				+
			
 
				+    total_exp = int(ch_df['exp'].sum())
			
 
				+    total_share = int(ch_df['share_cnt'].sum())
			
 
				+    total_return = int(ch_df['return_n_uv'].sum())
			
 
				+
			
 
				+    return {
			
 
				+        'rows': row_order,
			
 
				+        'cols': col_order,
			
 
				+        'exp': to_dict(exp_pivot, is_int=True),
			
 
				+        'str': to_dict(str_pivot),
			
 
				+        'ros': to_dict(ros_pivot),
			
 
				+        'rovn': to_dict(rovn_pivot),
			
 
				+        'vov': to_dict(vov_pivot),
			
 
				+        'total_exp': total_exp,
			
 
				+        'total_str': round(total_share / (total_exp + 1), 4),
			
 
				+        'total_rovn': round(total_return / (total_exp + 1), 4),
			
 
				+    }
			
 
				+
			
 
				+# 计算头部品类下钻数据：head_cate2 -> crowd -> rec_cate2
			
 
				+def calc_head_drill_data(date=None):
			
 
				+    ch_df = df.copy()
			
 
				+    if date and date != 'all':
			
 
				+        ch_df = ch_df[ch_df['dt'].astype(str) == str(date)]
			
 
				+    if len(ch_df) == 0:
			
 
				+        return None
			
 
				+
			
 
				+    # 按 head_cate2 + crowd + rec_cate2 聚合
			
 
				+    agg = ch_df.groupby(['head_cate2', 'crowd', 'rec_cate2']).agg({
			
 
				+        'exp': 'sum',
			
 
				+        'share_cnt': 'sum',
			
 
				+        'return_n_uv': 'sum',
			
 
				+        'new_exposure_cnt': 'sum',
			
 
				+    }).reset_index()
			
 
				+
			
 
				+    agg['str'] = agg['share_cnt'] / (agg['exp'] + 1)
			
 
				+    agg['ros'] = agg['return_n_uv'] / (agg['share_cnt'] + 1)
			
 
				+    agg['rovn'] = agg['return_n_uv'] / (agg['exp'] + 1)
			
 
				+    agg['vov'] = agg['new_exposure_cnt'] / (agg['exp'] + 1)
			
 
				+
			
 
				+    # 构建嵌套字典: head_cate2 -> crowd -> {rec_cate2: metrics}
			
 
				+    result = {}
			
 
				+
			
 
				+    # 添加 "all" 选项：不区分头部品类，按 crowd + rec_cate2 聚合
			
 
				+    agg_all = ch_df.groupby(['crowd', 'rec_cate2']).agg({
			
 
				+        'exp': 'sum',
			
 
				+        'share_cnt': 'sum',
			
 
				+        'return_n_uv': 'sum',
			
 
				+        'new_exposure_cnt': 'sum',
			
 
				+    }).reset_index()
			
 
				+    agg_all['str'] = agg_all['share_cnt'] / (agg_all['exp'] + 1)
			
 
				+    agg_all['ros'] = agg_all['return_n_uv'] / (agg_all['share_cnt'] + 1)
			
 
				+    agg_all['rovn'] = agg_all['return_n_uv'] / (agg_all['exp'] + 1)
			
 
				+    agg_all['vov'] = agg_all['new_exposure_cnt'] / (agg_all['exp'] + 1)
			
 
				+
			
 
				+    result['all'] = {}
			
 
				+    for crowd in crowd_list:
			
 
				+        crowd_df = agg_all[agg_all['crowd'] == crowd]
			
 
				+        result['all'][crowd] = {}
			
 
				+        # 计算整体汇总
			
 
				+        total_exp = int(crowd_df['exp'].sum())
			
 
				+        total_share = crowd_df['share_cnt'].sum()
			
 
				+        total_return = crowd_df['return_n_uv'].sum()
			
 
				+        total_new_exp = crowd_df['new_exposure_cnt'].sum()
			
 
				+        result['all'][crowd]['_total'] = {
			
 
				+            'exp': total_exp,
			
 
				+            'str': round(total_share / (total_exp + 1), 4),
			
 
				+            'ros': round(total_return / (total_share + 1), 4),
			
 
				+            'rovn': round(total_return / (total_exp + 1), 4),
			
 
				+            'vov': round(total_new_exp / (total_exp + 1), 4),
			
 
				+        }
			
 
				+        for _, row in crowd_df.iterrows():
			
 
				+            result['all'][crowd][row['rec_cate2']] = {
			
 
				+                'exp': int(row['exp']),
			
 
				+                'str': round(row['str'], 4),
			
 
				+                'ros': round(row['ros'], 4),
			
 
				+                'rovn': round(row['rovn'], 4),
			
 
				+                'vov': round(row['vov'], 4),
			
 
				+            }
			
 
				+
			
 
				+    # 按头部品类聚合
			
 
				+    for head_cate in agg['head_cate2'].unique():
			
 
				+        result[head_cate] = {}
			
 
				+        for crowd in crowd_list:
			
 
				+            crowd_df = agg[(agg['head_cate2'] == head_cate) & (agg['crowd'] == crowd)]
			
 
				+            result[head_cate][crowd] = {}
			
 
				+            # 计算该头部品类下的整体汇总
			
 
				+            total_exp = int(crowd_df['exp'].sum())
			
 
				+            total_share = crowd_df['share_cnt'].sum()
			
 
				+            total_return = crowd_df['return_n_uv'].sum()
			
 
				+            total_new_exp = crowd_df['new_exposure_cnt'].sum()
			
 
				+            result[head_cate][crowd]['_total'] = {
			
 
				+                'exp': total_exp,
			
 
				+                'str': round(total_share / (total_exp + 1), 4),
			
 
				+                'ros': round(total_return / (total_share + 1), 4),
			
 
				+                'rovn': round(total_return / (total_exp + 1), 4),
			
 
				+                'vov': round(total_new_exp / (total_exp + 1), 4),
			
 
				+            }
			
 
				+            for _, row in crowd_df.iterrows():
			
 
				+                result[head_cate][crowd][row['rec_cate2']] = {
			
 
				+                    'exp': int(row['exp']),
			
 
				+                    'str': round(row['str'], 4),
			
 
				+                    'ros': round(row['ros'], 4),
			
 
				+                    'rovn': round(row['rovn'], 4),
			
 
				+                    'vov': round(row['vov'], 4),
			
 
				+                }
			
 
				+
			
 
				+    # 获取所有头部品类列表（按总曝光排序）
			
 
				+    head_exp = ch_df.groupby('head_cate2')['exp'].sum().sort_values(ascending=False)
			
 
				+    head_list = head_exp.index.tolist()
			
 
				+
			
 
				+    return {
			
 
				+        'heads': ['all'] + head_list,  # all 放在最前面
			
 
				+        'data': result
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+# 预计算所有数据
			
 
				+all_data = {}
			
 
				+for crowd in crowd_list:
			
 
				+    all_data[crowd] = {}
			
 
				+    for dt in date_options:
			
 
				+        matrix = calc_matrix_data(crowd, dt)
			
 
				+        if matrix:
			
 
				+            all_data[crowd][dt] = matrix
			
 
				+
			
 
				+# 预计算头部品类下钻数据
			
 
				+head_drill_data = {}
			
 
				+for dt in date_options:
			
 
				+    drill = calc_head_drill_data(dt)
			
 
				+    if drill:
			
 
				+        head_drill_data[dt] = drill
			
 
				+
			
 
				+# 转为JSON
			
 
				+data_json = json.dumps(all_data, ensure_ascii=False)
			
 
				+head_drill_json = json.dumps(head_drill_data, ensure_ascii=False)
			
 
				+crowd_list_json = json.dumps(crowd_list, ensure_ascii=False)
			
 
				+dates_json = json.dumps(date_options)
			
 
				+
			
 
				+# 日期选项HTML
			
 
				+date_options_html = "".join([
			
 
				+    f'<option value="{dt}" {"selected" if dt == latest_date else ""}>'
			
 
				+    f'{"all" if dt == "all" else dt}</option>'
			
 
				+    for dt in date_options
			
 
				+])
			
 
				+
			
 
				+# 人群选项HTML
			
 
				+crowd_options_html = "".join([
			
 
				+    f'<option value="{c}">{c}</option>'
			
 
				+    for c in crowd_list
			
 
				+])
			
 
				+
			
 
				+html_content = f"""<!DOCTYPE html>
			
 
				+<html>
			
 
				+<head>
			
 
				+    <meta charset="utf-8">
			
 
				+    <title>头部品类分析</title>
			
 
				+    <style>
			
 
				+        * {{ margin: 0; padding: 0; box-sizing: border-box; }}
			
 
				+        body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
			
 
				+               background: #f5f5f5; padding: 20px; }}
			
 
				+        .container {{ max-width: 1600px; margin: 0 auto; background: white;
			
 
				+                     border-radius: 8px; padding: 20px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); }}
			
 
				+        h1 {{ font-size: 24px; margin-bottom: 20px; color: #333; }}
			
 
				+        .controls {{ display: flex; gap: 20px; margin-bottom: 20px; align-items: center; flex-wrap: wrap; }}
			
 
				+        .controls .date-switcher {{ margin-left: auto; }}
			
 
				+        .play-btn {{ background: #4CAF50; color: white; border: none; border-radius: 4px; padding: 6px 12px; font-size: 14px; }}
			
 
				+        .play-btn:hover {{ background: #45a049; }}
			
 
				+        .play-btn.playing {{ background: #f44336; }}
			
 
				+        .control-group {{ display: flex; align-items: center; gap: 8px; }}
			
 
				+        .control-group label {{ font-weight: 500; color: #666; }}
			
 
				+        select {{ padding: 8px 12px; border: 1px solid #ddd; border-radius: 4px; font-size: 14px; min-width: 120px; }}
			
 
				+        .summary {{ display: flex; gap: 20px; margin-bottom: 20px; }}
			
 
				+        .stat-card {{ background: #f8f9fa; padding: 15px 20px; border-radius: 6px; text-align: center; }}
			
 
				+        .stat-card h4 {{ font-size: 24px; color: #28a745; margin-bottom: 5px; }}
			
 
				+        .stat-card p {{ font-size: 12px; color: #666; }}
			
 
				+        .matrix-container {{ overflow-x: auto; max-height: 600px; overflow-y: auto; }}
			
 
				+        table {{ border-collapse: collapse; font-size: 11px; }}
			
 
				+        th, td {{ border: 1px solid #e0e0e0; padding: 4px 6px; text-align: center; white-space: nowrap; }}
			
 
				+        th {{ background: #f5f5f5; font-weight: 600; position: sticky; top: 0; z-index: 1; }}
			
 
				+        th:first-child {{ position: sticky; left: 0; z-index: 3; }}
			
 
				+        td:first-child {{ background: #f5f5f5; font-weight: 500; position: sticky; left: 0; z-index: 1; text-align: left; }}
			
 
				+        .corner-cell {{
			
 
				+            position: relative;
			
 
				+            width: 100px;
			
 
				+            height: 50px;
			
 
				+            background: linear-gradient(to top right, #f5f5f5 49.5%, #ccc 49.5%, #ccc 50.5%, #f5f5f5 50.5%);
			
 
				+        }}
			
 
				+        .corner-cell .row-label {{
			
 
				+            position: absolute;
			
 
				+            bottom: 4px;
			
 
				+            left: 4px;
			
 
				+            font-size: 10px;
			
 
				+            color: #666;
			
 
				+        }}
			
 
				+        .corner-cell .col-label {{
			
 
				+            position: absolute;
			
 
				+            top: 4px;
			
 
				+            right: 4px;
			
 
				+            font-size: 10px;
			
 
				+            color: #666;
			
 
				+        }}
			
 
				+        .legend {{ font-size: 12px; color: #666; margin-bottom: 10px; }}
			
 
				+        .date-switcher {{ display: flex; align-items: center; gap: 5px; }}
			
 
				+        .date-switcher button {{ padding: 5px 10px; border: 1px solid #ddd; background: white;
			
 
				+                                cursor: pointer; border-radius: 3px; }}
			
 
				+        .date-switcher button:hover {{ background: #f0f0f0; }}
			
 
				+        .play-btn.playing {{ background: #28a745; color: white; }}
			
 
				+        /* Compare tab styles */
			
 
				+        .chart-container {{ width: 100%; overflow-x: auto; }}
			
 
				+        .bar-chart {{ min-width: 800px; }}
			
 
				+        .bar-group {{ display: flex; align-items: flex-end; gap: 4px; margin-bottom: 8px; }}
			
 
				+        .bar {{ min-width: 60px; text-align: center; font-size: 10px; color: white;
			
 
				+               border-radius: 3px 3px 0 0; transition: all 0.3s; cursor: pointer; }}
			
 
				+        .bar:hover {{ opacity: 0.8; }}
			
 
				+        .bar-label {{ font-size: 11px; color: #333; margin-bottom: 5px; font-weight: 500; }}
			
 
				+        .chart-legend {{ display: flex; gap: 20px; margin-bottom: 15px; }}
			
 
				+        .legend-item {{ display: flex; align-items: center; gap: 5px; font-size: 12px; }}
			
 
				+        .legend-color {{ width: 16px; height: 16px; border-radius: 3px; }}
			
 
				+        .compare-table {{ width: 100%; border-collapse: collapse; }}
			
 
				+        .compare-table th {{ background: #f5f5f5; padding: 8px 10px; text-align: center; font-weight: 600; border: 1px solid #ddd; }}
			
 
				+        .compare-table td {{ padding: 6px 8px; border: 1px solid #eee; text-align: center; }}
			
 
				+        .compare-table .crowd-header {{ background: #e8e8e8; font-size: 14px; }}
			
 
				+        .compare-table .cat-cell {{ text-align: left; padding-left: 10px; }}
			
 
				+        .compare-section {{ display: flex; gap: 20px; }}
			
 
				+        .crowd-block {{ flex: 1; min-width: 250px; }}
			
 
				+        .crowd-block table {{ width: 100%; border-collapse: collapse; }}
			
 
				+        .crowd-block th {{ background: #f0f0f0; padding: 8px; border: 1px solid #ddd; }}
			
 
				+        .crowd-block td {{ padding: 6px 8px; border: 1px solid #eee; }}
			
 
				+        .crowd-block .rn {{ width: 40px; text-align: center; color: #666; }}
			
 
				+        .crowd-block .cat {{ text-align: left; cursor: pointer; transition: all 0.2s; }}
			
 
				+        .crowd-block .val {{ text-align: right; font-family: monospace; }}
			
 
				+        .crowd-block .cat.highlight {{
			
 
				+            font-weight: bold;
			
 
				+        }}
			
 
				+        .crowd-block tr.row-highlight {{
			
 
				+            outline: 2px solid #1565C0;
			
 
				+            outline-offset: -1px;
			
 
				+        }}
			
 
				+    </style>
			
 
				+</head>
			
 
				+<body>
			
 
				+    <div class="container">
			
 
				+        <h1>头部品类 → 推荐品类</h1>
			
 
				+
			
 
				+        <!-- Matrix Tab -->
			
 
				+        <div id="tab-matrix">
			
 
				+            <div class="controls">
			
 
				+                <div class="control-group">
			
 
				+                    <label>人群:</label>
			
 
				+                    <select id="crowd-select" onchange="updateMatrix()">
			
 
				+                        {crowd_options_html}
			
 
				+                    </select>
			
 
				+                </div>
			
 
				+                <div class="control-group">
			
 
				+                    <label>指标:</label>
			
 
				+                    <select id="metric-select" onchange="updateMatrix()">
			
 
				+                        <option value="exp">exp</option>
			
 
				+                        <option value="str">str</option>
			
 
				+                        <option value="ros">ros</option>
			
 
				+                        <option value="rovn">rovn</option>
			
 
				+                        <option value="vov" selected>vov</option>
			
 
				+                    </select>
			
 
				+                </div>
			
 
				+                <div class="control-group date-switcher">
			
 
				+                    <label>日期:</label>
			
 
				+                    <button onclick="switchDate(-1)">◀</button>
			
 
				+                    <select id="date-select" onchange="updateMatrix()">
			
 
				+                        {date_options_html}
			
 
				+                    </select>
			
 
				+                    <button onclick="switchDate(1)">▶</button>
			
 
				+                    <button id="play-btn" class="play-btn" onclick="togglePlay()">▶</button>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="summary" id="summary"></div>
			
 
				+
			
 
				+            <div class="legend">
			
 
				+                行=头部品类，列=推荐品类 | 颜色越深=数值越高 | 点击表头排序
			
 
				+                <button onclick="resetSort()" style="margin-left:15px;padding:3px 10px;cursor:pointer;">重置</button>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="matrix-container">
			
 
				+                <table id="matrix-table">
			
 
				+                    <thead id="matrix-header"></thead>
			
 
				+                    <tbody id="matrix-body"></tbody>
			
 
				+                </table>
			
 
				+            </div>
			
 
				+
			
 
				+            <!-- 头部品类下钻表格 -->
			
 
				+            <div style="margin-top: 30px; border-top: 2px solid #e0e0e0; padding-top: 20px;">
			
 
				+                <h3 style="margin-bottom: 15px; font-size: 16px; color: #333;">头部品类下钻：各人群推荐品类 Top N</h3>
			
 
				+                <div class="controls">
			
 
				+                    <div class="control-group">
			
 
				+                        <label>头部品类:</label>
			
 
				+                        <select id="drill-head" onchange="updateHeadDrill()">
			
 
				+                        </select>
			
 
				+                    </div>
			
 
				+                    <div class="control-group">
			
 
				+                        <label>排序:</label>
			
 
				+                        <select id="drill-sort" onchange="updateHeadDrill()">
			
 
				+                            <option value="exp" selected>exp</option>
			
 
				+                            <option value="str">str</option>
			
 
				+                            <option value="ros">ros</option>
			
 
				+                            <option value="rovn">rovn</option>
			
 
				+                            <option value="vov">vov</option>
			
 
				+                        </select>
			
 
				+                    </div>
			
 
				+                    <div class="control-group">
			
 
				+                        <label>展示:</label>
			
 
				+                        <select id="drill-metric" onchange="updateHeadDrill()">
			
 
				+                            <option value="exp">exp</option>
			
 
				+                            <option value="str">str</option>
			
 
				+                            <option value="ros">ros</option>
			
 
				+                            <option value="rovn">rovn</option>
			
 
				+                            <option value="vov" selected>vov</option>
			
 
				+                        </select>
			
 
				+                    </div>
			
 
				+                    <div class="control-group">
			
 
				+                        <label>Top:</label>
			
 
				+                        <select id="drill-topn" onchange="updateHeadDrill()">
			
 
				+                            <option value="5">5</option>
			
 
				+                            <option value="10" selected>10</option>
			
 
				+                            <option value="15">15</option>
			
 
				+                            <option value="20">20</option>
			
 
				+                        </select>
			
 
				+                    </div>
			
 
				+                    <div class="control-group date-switcher">
			
 
				+                        <label>日期:</label>
			
 
				+                        <button onclick="switchDrillDate(-1)">◀</button>
			
 
				+                        <select id="drill-date" onchange="updateHeadDrill()">
			
 
				+                            {date_options_html}
			
 
				+                        </select>
			
 
				+                        <button onclick="switchDrillDate(1)">▶</button>
			
 
				+                        <button id="drill-play-btn" class="play-btn" onclick="toggleDrillPlay()">▶</button>
			
 
				+                    </div>
			
 
				+                </div>
			
 
				+                <div class="compare-section" id="drill-section"></div>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+
			
 
				+    </div>
			
 
				+
			
 
				+    <script>
			
 
				+    const allData = {data_json};
			
 
				+    const headDrillData = {head_drill_json};
			
 
				+    const crowdList = {crowd_list_json};
			
 
				+    const dates = {dates_json};
			
 
				+    const crowdColors = {{ '内部': '#4CAF50', '外部0层': '#2196F3', '外部裂变': '#FF9800' }};
			
 
				+    let playInterval = null;
			
 
				+    let drillPlayInterval = null;
			
 
				+    let currentRowOrder = null;
			
 
				+    let currentColOrder = null;
			
 
				+    let sortState = {{ row: null, col: null, asc: true }};
			
 
				+    let lastCrowd = null;
			
 
				+    let lastDate = null;
			
 
				+
			
 
				+    function getGradient(val, maxVal, minVal = 0) {{
			
 
				+        if (val <= minVal || maxVal <= minVal) return '#f8f9fa';
			
 
				+        const ratio = Math.min((val - minVal) / (maxVal - minVal), 1);
			
 
				+        const r = Math.round(255 - ratio * 215);
			
 
				+        const g = Math.round(255 - ratio * 88);
			
 
				+        const b = Math.round(255 - ratio * 186);
			
 
				+        return `rgb(${{r}},${{g}},${{b}})`;
			
 
				+    }}
			
 
				+
			
 
				+    function updateMatrix() {{
			
 
				+        const crowd = document.getElementById('crowd-select').value;
			
 
				+        const metric = document.getElementById('metric-select').value;
			
 
				+        const date = document.getElementById('date-select').value;
			
 
				+
			
 
				+        if (!allData[crowd] || !allData[crowd][date]) {{
			
 
				+            document.getElementById('summary').innerHTML = '<div class="stat-card"><h4>-</h4><p>no data</p></div>';
			
 
				+            document.getElementById('matrix-header').innerHTML = '';
			
 
				+            document.getElementById('matrix-body').innerHTML = '';
			
 
				+            return;
			
 
				+        }}
			
 
				+
			
 
				+        const data = allData[crowd][date];
			
 
				+
			
 
				+        document.getElementById('summary').innerHTML = `
			
 
				+            <div class="stat-card"><h4>${{data.total_exp.toLocaleString()}}</h4><p>总 exp</p></div>
			
 
				+            <div class="stat-card"><h4>${{data.total_str.toFixed(4)}}</h4><p>总 str</p></div>
			
 
				+            <div class="stat-card"><h4>${{data.total_rovn.toFixed(4)}}</h4><p>总 rovn</p></div>
			
 
				+            <div class="stat-card"><h4>${{data.rows.length}}</h4><p>头部品类数</p></div>
			
 
				+            <div class="stat-card"><h4>${{data.cols.length}}</h4><p>推荐品类数</p></div>
			
 
				+        `;
			
 
				+
			
 
				+        const metricData = data[metric];
			
 
				+        const allVals = [];
			
 
				+        data.rows.forEach(r => data.cols.forEach(c => {{
			
 
				+            const val = metricData[r]?.[c] || 0;
			
 
				+            if (val > 0) allVals.push(val);
			
 
				+        }}));
			
 
				+        allVals.sort((a, b) => a - b);
			
 
				+
			
 
				+        const p95Idx = Math.floor(allVals.length * 0.95);
			
 
				+        let maxVal = allVals.length > 0 ? allVals[Math.min(p95Idx, allVals.length - 1)] : 0;
			
 
				+        const thresholds = {{ exp: 10000, str: 0.1, ros: 0.5, rovn: 0.05, vov: 0.3 }};
			
 
				+        maxVal = Math.max(maxVal, thresholds[metric] || 0.1);
			
 
				+
			
 
				+        // 切换人群或日期时，重置排序，使用新数据的 exp 排序
			
 
				+        if (crowd !== lastCrowd || date !== lastDate) {{
			
 
				+            currentRowOrder = null;
			
 
				+            currentColOrder = null;
			
 
				+            sortState = {{ row: null, col: null, asc: true }};
			
 
				+            lastCrowd = crowd;
			
 
				+            lastDate = date;
			
 
				+        }}
			
 
				+
			
 
				+        if (!currentRowOrder) currentRowOrder = [...data.rows];
			
 
				+        if (!currentColOrder) currentColOrder = [...data.cols];
			
 
				+
			
 
				+        const rows = currentRowOrder.filter(r => data.rows.includes(r));
			
 
				+        const cols = currentColOrder.filter(c => data.cols.includes(c));
			
 
				+
			
 
				+        const expData = data.exp;
			
 
				+        const rowExpTotals = {{}};
			
 
				+        const colExpTotals = {{}};
			
 
				+        rows.forEach(r => {{ rowExpTotals[r] = cols.reduce((sum, c) => sum + (expData[r]?.[c] || 0), 0); }});
			
 
				+        cols.forEach(c => {{ colExpTotals[c] = rows.reduce((sum, r) => sum + (expData[r]?.[c] || 0), 0); }});
			
 
				+
			
 
				+        // 计算原始排名（按exp排序）
			
 
				+        const origRowOrder = [...data.rows];
			
 
				+        const origColOrder = [...data.cols];
			
 
				+
			
 
				+        document.getElementById('matrix-header').innerHTML = `
			
 
				+            <tr>
			
 
				+                <th class="corner-cell" style="cursor:pointer" onclick="sortByRowSum()">
			
 
				+                    <span class="row-label">头部品类 ↓</span>
			
 
				+                    <span class="col-label">推荐品类 →</span>
			
 
				+                </th>
			
 
				+                ${{cols.map((c, i) => {{
			
 
				+                    const origRank = origColOrder.indexOf(c) + 1;
			
 
				+                    return `<th style="cursor:pointer" onclick="sortByCol('${{c}}')" title="推荐品类: ${{c}}&#10;exp排名: #${{origRank}}&#10;exp: ${{colExpTotals[c].toLocaleString()}}">#${{origRank}} ${{c}}</th>`;
			
 
				+                }}).join('')}}
			
 
				+            </tr>
			
 
				+        `;
			
 
				+
			
 
				+        document.getElementById('matrix-body').innerHTML = rows.map((r, ri) => {{
			
 
				+            const origRowRank = origRowOrder.indexOf(r) + 1;
			
 
				+            const cells = cols.map(c => {{
			
 
				+                const val = metricData[r]?.[c] || 0;
			
 
				+                const cellExp = expData[r]?.[c] || 0;
			
 
				+                const bg = getGradient(val, maxVal);
			
 
				+                const display = metric === 'exp' ? parseInt(val).toLocaleString() : val.toFixed(4);
			
 
				+                const rowPct = rowExpTotals[r] > 0 ? (cellExp / rowExpTotals[r] * 100).toFixed(1) : '0.0';
			
 
				+                const colPct = colExpTotals[c] > 0 ? (cellExp / colExpTotals[c] * 100).toFixed(1) : '0.0';
			
 
				+                return `<td style="background:${{bg}}" title="头部: ${{r}}&#10;推荐: ${{c}}&#10;${{metric}}: ${{display}}&#10;exp: ${{cellExp.toLocaleString()}}&#10;横向占比: ${{rowPct}}%&#10;纵向占比: ${{colPct}}%">${{display}}</td>`;
			
 
				+            }}).join('');
			
 
				+            return `<tr><td style="cursor:pointer;background:#f5f5f5" onclick="sortByRow('${{r}}')" title="头部品类: ${{r}}&#10;exp排名: #${{origRowRank}}&#10;exp: ${{rowExpTotals[r].toLocaleString()}}">#${{origRowRank}} ${{r}}</td>${{cells}}</tr>`;
			
 
				+        }}).join('');
			
 
				+    }}
			
 
				+
			
 
				+    function switchDate(delta) {{
			
 
				+        const select = document.getElementById('date-select');
			
 
				+        const idx = dates.indexOf(select.value);
			
 
				+        const newIdx = idx + delta;
			
 
				+        if (newIdx >= 0 && newIdx < dates.length) {{
			
 
				+            select.value = dates[newIdx];
			
 
				+            updateMatrix();
			
 
				+        }}
			
 
				+    }}
			
 
				+
			
 
				+    function switchDrillDate(delta) {{
			
 
				+        const select = document.getElementById('drill-date');
			
 
				+        const idx = dates.indexOf(select.value);
			
 
				+        const newIdx = idx + delta;
			
 
				+        if (newIdx >= 0 && newIdx < dates.length) {{
			
 
				+            select.value = dates[newIdx];
			
 
				+            // 触发 change 事件以更新头部品类列表
			
 
				+            select.dispatchEvent(new Event('change'));
			
 
				+        }}
			
 
				+    }}
			
 
				+
			
 
				+    function toggleDrillPlay() {{
			
 
				+        const btn = document.getElementById('drill-play-btn');
			
 
				+        if (drillPlayInterval) {{
			
 
				+            clearInterval(drillPlayInterval);
			
 
				+            drillPlayInterval = null;
			
 
				+            btn.classList.remove('playing');
			
 
				+            btn.textContent = '▶';
			
 
				+        }} else {{
			
 
				+            btn.classList.add('playing');
			
 
				+            btn.textContent = '⏸';
			
 
				+            let idx = 0;
			
 
				+            const play = () => {{
			
 
				+                if (idx >= dates.length) {{
			
 
				+                    clearInterval(drillPlayInterval);
			
 
				+                    drillPlayInterval = null;
			
 
				+                    btn.classList.remove('playing');
			
 
				+                    btn.textContent = '▶';
			
 
				+                    return;
			
 
				+                }}
			
 
				+                document.getElementById('drill-date').value = dates[idx];
			
 
				+                document.getElementById('drill-date').dispatchEvent(new Event('change'));
			
 
				+                idx++;
			
 
				+            }};
			
 
				+            play();
			
 
				+            drillPlayInterval = setInterval(play, 1500);
			
 
				+        }}
			
 
				+    }}
			
 
				+
			
 
				+    function togglePlay() {{
			
 
				+        const btn = document.getElementById('play-btn');
			
 
				+        if (playInterval) {{
			
 
				+            clearInterval(playInterval);
			
 
				+            playInterval = null;
			
 
				+            btn.classList.remove('playing');
			
 
				+            btn.textContent = '▶';
			
 
				+        }} else {{
			
 
				+            btn.classList.add('playing');
			
 
				+            btn.textContent = '⏸';
			
 
				+            let idx = 0;
			
 
				+            const play = () => {{
			
 
				+                if (idx >= dates.length) {{
			
 
				+                    clearInterval(playInterval);
			
 
				+                    playInterval = null;
			
 
				+                    btn.classList.remove('playing');
			
 
				+                    btn.textContent = '▶';
			
 
				+                    return;
			
 
				+                }}
			
 
				+                document.getElementById('date-select').value = dates[idx];
			
 
				+                updateMatrix();
			
 
				+                idx++;
			
 
				+            }};
			
 
				+            play();
			
 
				+            playInterval = setInterval(play, 1500);
			
 
				+        }}
			
 
				+    }}
			
 
				+
			
 
				+    function getCurrentData() {{
			
 
				+        const crowd = document.getElementById('crowd-select').value;
			
 
				+        const date = document.getElementById('date-select').value;
			
 
				+        const metric = document.getElementById('metric-select').value;
			
 
				+        if (!allData[crowd] || !allData[crowd][date]) return null;
			
 
				+        return {{ data: allData[crowd][date], metric }};
			
 
				+    }}
			
 
				+
			
 
				+    function sortByRowSum() {{
			
 
				+        const result = getCurrentData();
			
 
				+        if (!result) return;
			
 
				+        const {{ data, metric }} = result;
			
 
				+        const metricData = data[metric];
			
 
				+        const rowSums = {{}};
			
 
				+        data.rows.forEach(r => {{ rowSums[r] = data.cols.reduce((sum, c) => sum + (metricData[r]?.[c] || 0), 0); }});
			
 
				+        sortState.asc = sortState.row === 'sum' ? !sortState.asc : false;
			
 
				+        sortState.row = 'sum';
			
 
				+        currentRowOrder = [...data.rows].sort((a, b) => sortState.asc ? rowSums[a] - rowSums[b] : rowSums[b] - rowSums[a]);
			
 
				+        updateMatrix();
			
 
				+    }}
			
 
				+
			
 
				+    function sortByCol(colName) {{
			
 
				+        const result = getCurrentData();
			
 
				+        if (!result) return;
			
 
				+        const {{ data, metric }} = result;
			
 
				+        const metricData = data[metric];
			
 
				+        sortState.asc = sortState.col === colName ? !sortState.asc : false;
			
 
				+        sortState.col = colName;
			
 
				+        currentRowOrder = [...data.rows].sort((a, b) => {{
			
 
				+            const va = metricData[a]?.[colName] || 0;
			
 
				+            const vb = metricData[b]?.[colName] || 0;
			
 
				+            return sortState.asc ? va - vb : vb - va;
			
 
				+        }});
			
 
				+        updateMatrix();
			
 
				+    }}
			
 
				+
			
 
				+    function sortByRow(rowName) {{
			
 
				+        const result = getCurrentData();
			
 
				+        if (!result) return;
			
 
				+        const {{ data, metric }} = result;
			
 
				+        const metricData = data[metric];
			
 
				+        sortState.asc = sortState.row === rowName ? !sortState.asc : false;
			
 
				+        sortState.row = rowName;
			
 
				+        currentColOrder = [...data.cols].sort((a, b) => {{
			
 
				+            const va = metricData[rowName]?.[a] || 0;
			
 
				+            const vb = metricData[rowName]?.[b] || 0;
			
 
				+            return sortState.asc ? va - vb : vb - va;
			
 
				+        }});
			
 
				+        updateMatrix();
			
 
				+    }}
			
 
				+
			
 
				+    function resetSort() {{
			
 
				+        currentRowOrder = null;
			
 
				+        currentColOrder = null;
			
 
				+        sortState = {{ row: null, col: null, asc: true }};
			
 
				+        updateMatrix();
			
 
				+    }}
			
 
				+
			
 
				+    function highlightCat(el) {{
			
 
				+        const cat = el.getAttribute('data-cat');
			
 
				+        document.querySelectorAll('.cat[data-cat]').forEach(cell => {{
			
 
				+            if (cell.getAttribute('data-cat') === cat) {{
			
 
				+                cell.classList.add('highlight');
			
 
				+                cell.closest('tr').classList.add('row-highlight');
			
 
				+            }}
			
 
				+        }});
			
 
				+    }}
			
 
				+
			
 
				+    function unhighlightCat() {{
			
 
				+        document.querySelectorAll('.cat.highlight').forEach(cell => {{
			
 
				+            cell.classList.remove('highlight');
			
 
				+            cell.closest('tr').classList.remove('row-highlight');
			
 
				+        }});
			
 
				+    }}
			
 
				+
			
 
				+    // 初始化头部品类下钻
			
 
				+    function initHeadDrill() {{
			
 
				+        const date = document.getElementById('drill-date').value;
			
 
				+        const headSelect = document.getElementById('drill-head');
			
 
				+
			
 
				+        if (!headDrillData[date]) {{
			
 
				+            headSelect.innerHTML = '<option value="">无数据</option>';
			
 
				+            return;
			
 
				+        }}
			
 
				+
			
 
				+        const heads = headDrillData[date].heads;
			
 
				+        headSelect.innerHTML = heads.map((h, i) => {{
			
 
				+            const label = h === 'all' ? '全部（不区分头部品类）' : `#${{i}} ${{h}}`;
			
 
				+            return `<option value="${{h}}">${{label}}</option>`;
			
 
				+        }}).join('');
			
 
				+
			
 
				+        updateHeadDrill();
			
 
				+    }}
			
 
				+
			
 
				+    function updateHeadDrill() {{
			
 
				+        const date = document.getElementById('drill-date').value;
			
 
				+        const headCate = document.getElementById('drill-head').value;
			
 
				+        const sortBy = document.getElementById('drill-sort').value;
			
 
				+        const showMetric = document.getElementById('drill-metric').value;
			
 
				+        const topN = parseInt(document.getElementById('drill-topn').value);
			
 
				+
			
 
				+        // 检查日期变化，更新头部品类列表
			
 
				+        const headSelect = document.getElementById('drill-head');
			
 
				+        if (headDrillData[date] && headSelect.options.length > 0) {{
			
 
				+            const currentHeads = headDrillData[date].heads;
			
 
				+            const firstOption = headSelect.options[0]?.value;
			
 
				+            if (currentHeads[0] !== firstOption) {{
			
 
				+                headSelect.innerHTML = currentHeads.map((h, i) => {{
			
 
				+                    const label = h === 'all' ? '全部（不区分头部品类）' : `#${{i}} ${{h}}`;
			
 
				+                    return `<option value="${{h}}" ${{h === headCate ? 'selected' : ''}}>${{label}}</option>`;
			
 
				+                }}).join('');
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        if (!headDrillData[date] || !headCate) {{
			
 
				+            document.getElementById('drill-section').innerHTML = '<p>无数据</p>';
			
 
				+            return;
			
 
				+        }}
			
 
				+
			
 
				+        const data = headDrillData[date].data[headCate];
			
 
				+        if (!data) {{
			
 
				+            document.getElementById('drill-section').innerHTML = '<p>该头部品类无数据</p>';
			
 
				+            return;
			
 
				+        }}
			
 
				+
			
 
				+        // 为每个人群计算 Top N 和整体汇总
			
 
				+        const crowdTopN = {{}};
			
 
				+        const crowdTotal = {{}};
			
 
				+        crowdList.forEach(crowd => {{
			
 
				+            const items = [];
			
 
				+            if (data[crowd]) {{
			
 
				+                for (const cat in data[crowd]) {{
			
 
				+                    if (cat === '_total') {{
			
 
				+                        // 保存整体汇总
			
 
				+                        crowdTotal[crowd] = {{
			
 
				+                            exp: data[crowd][cat].exp || 0,
			
 
				+                            showVal: data[crowd][cat][showMetric] || 0
			
 
				+                        }};
			
 
				+                    }} else {{
			
 
				+                        items.push({{
			
 
				+                            cat: cat,
			
 
				+                            sortVal: data[crowd][cat][sortBy] || 0,
			
 
				+                            showVal: data[crowd][cat][showMetric] || 0,
			
 
				+                            exp: data[crowd][cat].exp || 0
			
 
				+                        }});
			
 
				+                    }}
			
 
				+                }}
			
 
				+            }}
			
 
				+            items.sort((a, b) => b.sortVal - a.sortVal);
			
 
				+            crowdTopN[crowd] = items.slice(0, topN);
			
 
				+        }});
			
 
				+
			
 
				+        // 收集所有品类用于颜色映射
			
 
				+        const allCats = new Set();
			
 
				+        crowdList.forEach(crowd => {{
			
 
				+            crowdTopN[crowd].forEach(item => allCats.add(item.cat));
			
 
				+        }});
			
 
				+        const catList = Array.from(allCats);
			
 
				+
			
 
				+        const catColors = {{}};
			
 
				+        const colorPalette = [
			
 
				+            '#FFCDD2', '#F8BBD0', '#E1BEE7', '#D1C4E9', '#C5CAE9',
			
 
				+            '#BBDEFB', '#B3E5FC', '#B2EBF2', '#B2DFDB', '#C8E6C9',
			
 
				+            '#DCEDC8', '#F0F4C3', '#FFF9C4', '#FFECB3', '#FFE0B2',
			
 
				+            '#FFCCBC', '#D7CCC8', '#CFD8DC', '#BCAAA4', '#B0BEC5'
			
 
				+        ];
			
 
				+        catList.forEach((cat, i) => {{
			
 
				+            catColors[cat] = colorPalette[i % colorPalette.length];
			
 
				+        }});
			
 
				+
			
 
				+        // 计算指标渐变范围
			
 
				+        let maxVal = 0, minVal = Infinity;
			
 
				+        crowdList.forEach(crowd => {{
			
 
				+            crowdTopN[crowd].forEach(item => {{
			
 
				+                if (item.showVal > maxVal) maxVal = item.showVal;
			
 
				+                if (item.showVal < minVal) minVal = item.showVal;
			
 
				+            }});
			
 
				+        }});
			
 
				+        if (minVal === Infinity) minVal = 0;
			
 
				+
			
 
				+        function getValueColor(val) {{
			
 
				+            if (maxVal === minVal) return '#C8E6C9';
			
 
				+            const ratio = (val - minVal) / (maxVal - minVal);
			
 
				+            const r = Math.round(200 - ratio * 120);
			
 
				+            const g = Math.round(230 - ratio * 80);
			
 
				+            const b = Math.round(201 - ratio * 120);
			
 
				+            return `rgb(${{r}},${{g}},${{b}})`;
			
 
				+        }}
			
 
				+
			
 
				+        // 生成表格
			
 
				+        let html = '';
			
 
				+        crowdList.forEach(crowd => {{
			
 
				+            const colSpan = showMetric === 'exp' ? 3 : 4;
			
 
				+            html += `<div class="crowd-block">
			
 
				+                <table>
			
 
				+                    <thead>
			
 
				+                        <tr><th colspan="${{colSpan}}" style="background:${{crowdColors[crowd]}};color:white">${{crowd}}</th></tr>
			
 
				+                        <tr><th class="rn">rn</th><th>推荐品类</th><th>exp</th>${{showMetric !== 'exp' ? `<th>${{showMetric}}</th>` : ''}}</tr>
			
 
				+                    </thead>
			
 
				+                    <tbody>`;
			
 
				+
			
 
				+            if (crowdTopN[crowd].length === 0) {{
			
 
				+                html += `<tr><td colspan="${{colSpan}}" style="color:#999">无数据</td></tr>`;
			
 
				+            }} else {{
			
 
				+                // 先添加整体汇总行 (rn=0)
			
 
				+                if (crowdTotal[crowd]) {{
			
 
				+                    const totalExp = parseInt(crowdTotal[crowd].exp).toLocaleString();
			
 
				+                    const totalMetric = (crowdTotal[crowd].showVal * 100).toFixed(1) + '%';
			
 
				+                    html += `<tr style="background:#f5f5f5;font-weight:bold">
			
 
				+                        <td class="rn">0</td>
			
 
				+                        <td class="cat" style="background:#e0e0e0">整体</td>
			
 
				+                        <td class="val">${{totalExp}}</td>
			
 
				+                        ${{showMetric !== 'exp' ? `<td class="val">${{totalMetric}}</td>` : ''}}
			
 
				+                    </tr>`;
			
 
				+                }}
			
 
				+                // 添加 Top N 品类
			
 
				+                crowdTopN[crowd].forEach((item, i) => {{
			
 
				+                    const expDisplay = parseInt(item.exp).toLocaleString();
			
 
				+                    const metricDisplay = (item.showVal * 100).toFixed(1) + '%';
			
 
				+                    const valColor = getValueColor(item.showVal);
			
 
				+                    const catColor = catColors[item.cat];
			
 
				+                    const catAttr = item.cat.replace(/"/g, '&quot;');
			
 
				+                    html += `<tr>
			
 
				+                        <td class="rn">${{i + 1}}</td>
			
 
				+                        <td class="cat" style="background:${{catColor}}" data-cat="${{catAttr}}" onmouseenter="highlightCat(this)" onmouseleave="unhighlightCat()">${{item.cat}}</td>
			
 
				+                        <td class="val">${{expDisplay}}</td>
			
 
				+                        ${{showMetric !== 'exp' ? `<td class="val" style="background:${{valColor}}">${{metricDisplay}}</td>` : ''}}
			
 
				+                    </tr>`;
			
 
				+                }});
			
 
				+            }}
			
 
				+
			
 
				+            html += `</tbody></table></div>`;
			
 
				+        }});
			
 
				+
			
 
				+        document.getElementById('drill-section').innerHTML = html;
			
 
				+    }}
			
 
				+
			
 
				+    // 监听日期变化，更新头部品类列表
			
 
				+    document.getElementById('drill-date').addEventListener('change', function() {{
			
 
				+        const date = this.value;
			
 
				+        const headSelect = document.getElementById('drill-head');
			
 
				+        const currentHead = headSelect.value;
			
 
				+
			
 
				+        if (headDrillData[date]) {{
			
 
				+            const heads = headDrillData[date].heads;
			
 
				+            headSelect.innerHTML = heads.map((h, i) => {{
			
 
				+                const label = h === 'all' ? '全部（不区分头部品类）' : `#${{i}} ${{h}}`;
			
 
				+                return `<option value="${{h}}" ${{h === currentHead ? 'selected' : ''}}>${{label}}</option>`;
			
 
				+            }}).join('');
			
 
				+        }} else {{
			
 
				+            headSelect.innerHTML = '<option value="">无数据</option>';
			
 
				+        }}
			
 
				+        updateHeadDrill();
			
 
				+    }});
			
 
				+
			
 
				+    updateMatrix();
			
 
				+    initHeadDrill();
			
 
				+    </script>
			
 
				+</body>
			
 
				+</html>
			
 
				+"""
			
 
				+
			
 
				+html_file = output_dir / f"{latest_file.stem}_头部品类分析.html"
			
 
				+with open(html_file, 'w', encoding='utf-8') as f:
			
 
				+    f.write(html_content)
			
 
				+
			
 
				+print(f"\nHTML 报告已生成: {html_file}")
			
--- a/tasks/承接/头部品类与承接品类分析/_archive/visualize_correlation.py
+++ b/tasks/承接/头部品类与承接品类分析/_archive/visualize_correlation.py
@@ -0,0 +1,768 @@
 
				+#!/usr/bin/env python
			
 
				+# coding=utf-8
			
 
				+"""
			
 
				+品类承接裂变率相关性分析 - HTML可视化
			
 
				+Tab 1: 品类一致性分析 - 同品类vs跨品类vov对比
			
 
				+Tab 2: 品类组合稳定性 - 跨人群相关性散点图
			
 
				+Tab 3: 品类亲和性矩阵 - 热力图
			
 
				+"""
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+
			
 
				+task_dir = Path(__file__).parent
			
 
				+output_dir = task_dir / "output"
			
 
				+
			
 
				+# 找到最新的原始数据文件
			
 
				+csv_files = [f for f in output_dir.glob("query_*.csv") if not f.name.endswith('.html')]
			
 
				+if not csv_files:
			
 
				+    print("没有找到数据文件，请先运行 query.sql")
			
 
				+    exit(1)
			
 
				+
			
 
				+latest_file = max(csv_files, key=lambda x: x.stat().st_mtime)
			
 
				+df = pd.read_csv(latest_file)
			
 
				+
			
 
				+print(f"分析文件: {latest_file.name}")
			
 
				+print(f"时间范围: {df['dt'].min()} ~ {df['dt'].max()}")
			
 
				+
			
 
				+# 过滤掉 headvideoid为空 的记录
			
 
				+df_valid = df[~df['head_cate2'].isin(['headvideoid为空', '未匹配品类'])].copy()
			
 
				+df_valid['is_same_cate'] = df_valid['head_cate2'] == df_valid['rec_cate2']
			
 
				+df_valid['cate_pair'] = df_valid['head_cate2'] + ' → ' + df_valid['rec_cate2']
			
 
				+
			
 
				+crowd_list = ['内部', '外部0层', '外部裂变']
			
 
				+date_list = ['全部'] + sorted([str(d) for d in df_valid['dt'].unique()])
			
 
				+EXP_THRESHOLD = 10000  # 亲和性矩阵的曝光阈值（全部天数）
			
 
				+EXP_THRESHOLD_DAILY = 1000  # 单日曝光阈值
			
 
				+
			
 
				+# ========== 1. 品类一致性数据 ==========
			
 
				+consistency_data = {'crowds': crowd_list, 'same': [], 'diff': [], 'ratio': []}
			
 
				+for crowd in crowd_list:
			
 
				+    crowd_df = df_valid[df_valid['crowd'] == crowd]
			
 
				+    same = crowd_df[crowd_df['is_same_cate']]
			
 
				+    diff = crowd_df[~crowd_df['is_same_cate']]
			
 
				+    same_vov = same['new_exposure_cnt'].sum() / same['exp'].sum() if same['exp'].sum() > 0 else 0
			
 
				+    diff_vov = diff['new_exposure_cnt'].sum() / diff['exp'].sum() if diff['exp'].sum() > 0 else 0
			
 
				+    consistency_data['same'].append(round(same_vov, 4))
			
 
				+    consistency_data['diff'].append(round(diff_vov, 4))
			
 
				+    consistency_data['ratio'].append(round(same_vov / diff_vov, 2) if diff_vov > 0 else 0)
			
 
				+
			
 
				+# 整体
			
 
				+same_all = df_valid[df_valid['is_same_cate']]
			
 
				+diff_all = df_valid[~df_valid['is_same_cate']]
			
 
				+consistency_data['total_same'] = round(same_all['new_exposure_cnt'].sum() / same_all['exp'].sum(), 4)
			
 
				+consistency_data['total_diff'] = round(diff_all['new_exposure_cnt'].sum() / diff_all['exp'].sum(), 4)
			
 
				+consistency_data['total_ratio'] = round(consistency_data['total_same'] / consistency_data['total_diff'], 2)
			
 
				+
			
 
				+# 同品类曝光占比
			
 
				+consistency_data['same_exp'] = [int(df_valid[(df_valid['crowd'] == c) & df_valid['is_same_cate']]['exp'].sum()) for c in crowd_list]
			
 
				+consistency_data['diff_exp'] = [int(df_valid[(df_valid['crowd'] == c) & ~df_valid['is_same_cate']]['exp'].sum()) for c in crowd_list]
			
 
				+
			
 
				+# ========== 2. 品类亲和性矩阵（按人群分开 + 整体） ==========
			
 
				+def calc_affinity_matrix(data_df, exp_threshold=EXP_THRESHOLD):
			
 
				+    """计算亲和性矩阵数据"""
			
 
				+    # 计算每个head_cate2的基准vov
			
 
				+    head_baseline = data_df.groupby('head_cate2').apply(
			
 
				+        lambda x: x['new_exposure_cnt'].sum() / x['exp'].sum(), include_groups=False
			
 
				+    ).to_dict()
			
 
				+
			
 
				+    affinity_list = []
			
 
				+    for (head, rec), grp in data_df.groupby(['head_cate2', 'rec_cate2']):
			
 
				+        if grp['exp'].sum() >= exp_threshold:
			
 
				+            pair_vov = grp['new_exposure_cnt'].sum() / grp['exp'].sum()
			
 
				+            baseline = head_baseline.get(head, 1)
			
 
				+            affinity = pair_vov / baseline if baseline > 0 else 0
			
 
				+            affinity_list.append({
			
 
				+                'head': head, 'rec': rec,
			
 
				+                'vov': round(pair_vov, 4),
			
 
				+                'baseline': round(baseline, 4),
			
 
				+                'affinity': round(affinity, 2),
			
 
				+                'exp': int(grp['exp'].sum())
			
 
				+            })
			
 
				+
			
 
				+    if not affinity_list:
			
 
				+        return None
			
 
				+
			
 
				+    aff_df = pd.DataFrame(affinity_list)
			
 
				+
			
 
				+    # 构建矩阵数据 - 行列使用相同品类列表，方便看对角线（同品类承接）
			
 
				+    # 合并 head 和 rec 的曝光量，按总曝光排序
			
 
				+    head_exp = aff_df.groupby('head')['exp'].sum()
			
 
				+    rec_exp = aff_df.groupby('rec')['exp'].sum()
			
 
				+    all_cates = set(head_exp.index) | set(rec_exp.index)
			
 
				+    cate_total_exp = {c: head_exp.get(c, 0) + rec_exp.get(c, 0) for c in all_cates}
			
 
				+    cate_list = sorted(cate_total_exp.keys(), key=lambda x: cate_total_exp[x], reverse=True)[:30]
			
 
				+
			
 
				+    # 行列使用相同顺序
			
 
				+    head_list = cate_list
			
 
				+    rec_list = cate_list
			
 
				+
			
 
				+    result = {'rows': head_list, 'cols': rec_list, 'affinity': {}, 'vov': {}, 'exp': {}}
			
 
				+    for head in head_list:
			
 
				+        result['affinity'][head] = {}
			
 
				+        result['vov'][head] = {}
			
 
				+        result['exp'][head] = {}
			
 
				+        for rec in rec_list:
			
 
				+            row = aff_df[(aff_df['head'] == head) & (aff_df['rec'] == rec)]
			
 
				+            if len(row) > 0:
			
 
				+                result['affinity'][head][rec] = float(row.iloc[0]['affinity'])
			
 
				+                result['vov'][head][rec] = float(row.iloc[0]['vov'])
			
 
				+                result['exp'][head][rec] = int(row.iloc[0]['exp'])
			
 
				+            else:
			
 
				+                result['affinity'][head][rec] = 0
			
 
				+                result['vov'][head][rec] = 0
			
 
				+                result['exp'][head][rec] = 0
			
 
				+    return result
			
 
				+
			
 
				+# 先计算全部+整体的矩阵，获取固定的行列顺序
			
 
				+base_matrix = calc_affinity_matrix(df_valid, EXP_THRESHOLD)
			
 
				+fixed_cate_list = base_matrix['rows'] if base_matrix else []
			
 
				+
			
 
				+def calc_affinity_matrix_fixed(data_df, exp_threshold, fixed_list):
			
 
				+    """计算亲和性矩阵数据，使用固定的行列顺序"""
			
 
				+    head_baseline = data_df.groupby('head_cate2').apply(
			
 
				+        lambda x: x['new_exposure_cnt'].sum() / x['exp'].sum(), include_groups=False
			
 
				+    ).to_dict()
			
 
				+
			
 
				+    affinity_dict = {}
			
 
				+    for (head, rec), grp in data_df.groupby(['head_cate2', 'rec_cate2']):
			
 
				+        if grp['exp'].sum() >= exp_threshold:
			
 
				+            pair_vov = grp['new_exposure_cnt'].sum() / grp['exp'].sum()
			
 
				+            baseline = head_baseline.get(head, 1)
			
 
				+            affinity = pair_vov / baseline if baseline > 0 else 0
			
 
				+            affinity_dict[(head, rec)] = {
			
 
				+                'vov': round(pair_vov, 4),
			
 
				+                'affinity': round(affinity, 2),
			
 
				+                'exp': int(grp['exp'].sum())
			
 
				+            }
			
 
				+
			
 
				+    # 使用固定的行列顺序
			
 
				+    result = {'rows': fixed_list, 'cols': fixed_list, 'affinity': {}, 'vov': {}, 'exp': {}}
			
 
				+    for head in fixed_list:
			
 
				+        result['affinity'][head] = {}
			
 
				+        result['vov'][head] = {}
			
 
				+        result['exp'][head] = {}
			
 
				+        for rec in fixed_list:
			
 
				+            if (head, rec) in affinity_dict:
			
 
				+                result['affinity'][head][rec] = float(affinity_dict[(head, rec)]['affinity'])
			
 
				+                result['vov'][head][rec] = float(affinity_dict[(head, rec)]['vov'])
			
 
				+                result['exp'][head][rec] = int(affinity_dict[(head, rec)]['exp'])
			
 
				+            else:
			
 
				+                result['affinity'][head][rec] = 0
			
 
				+                result['vov'][head][rec] = 0
			
 
				+                result['exp'][head][rec] = 0
			
 
				+    return result
			
 
				+
			
 
				+# 计算各日期×人群的矩阵（使用固定行列顺序）
			
 
				+matrix_data = {}
			
 
				+for date in date_list:
			
 
				+    matrix_data[date] = {}
			
 
				+    if date == '全部':
			
 
				+        date_df = df_valid
			
 
				+        threshold = EXP_THRESHOLD
			
 
				+    else:
			
 
				+        date_df = df_valid[df_valid['dt'].astype(str) == date]
			
 
				+        threshold = EXP_THRESHOLD_DAILY
			
 
				+
			
 
				+    # 整体
			
 
				+    matrix_data[date]['整体'] = calc_affinity_matrix_fixed(date_df, threshold, fixed_cate_list)
			
 
				+    # 各人群
			
 
				+    for crowd in crowd_list:
			
 
				+        matrix_data[date][crowd] = calc_affinity_matrix_fixed(
			
 
				+            date_df[date_df['crowd'] == crowd], threshold, fixed_cate_list
			
 
				+        )
			
 
				+
			
 
				+# ========== 4. Top品类组合排名（按人群分开 + 整体） ==========
			
 
				+def calc_ranking(data_df, min_exp=1000):
			
 
				+    """计算品类组合排名"""
			
 
				+    pair_vov = data_df.groupby('cate_pair').apply(
			
 
				+        lambda x: pd.Series({
			
 
				+            'vov': x['new_exposure_cnt'].sum() / x['exp'].sum(),
			
 
				+            'exp': int(x['exp'].sum()),
			
 
				+        }), include_groups=False
			
 
				+    )
			
 
				+    pair_vov = pair_vov[pair_vov['exp'] >= min_exp]
			
 
				+    if len(pair_vov) == 0:
			
 
				+        return {'high': [], 'low': []}
			
 
				+
			
 
				+    all_high = pair_vov.sort_values('vov', ascending=False).head(100)
			
 
				+    all_low = pair_vov.sort_values('vov', ascending=True).head(100)
			
 
				+
			
 
				+    return {
			
 
				+        'high': [{'pair': idx, 'vov': float(round(row['vov'], 4)), 'exp': int(row['exp'])} for idx, row in all_high.iterrows()],
			
 
				+        'low': [{'pair': idx, 'vov': float(round(row['vov'], 4)), 'exp': int(row['exp'])} for idx, row in all_low.iterrows()]
			
 
				+    }
			
 
				+
			
 
				+# 计算各日期×人群的排名
			
 
				+ranking_data = {}
			
 
				+for date in date_list:
			
 
				+    ranking_data[date] = {}
			
 
				+    if date == '全部':
			
 
				+        date_df = df_valid
			
 
				+        min_exp = 1000
			
 
				+    else:
			
 
				+        date_df = df_valid[df_valid['dt'].astype(str) == date]
			
 
				+        min_exp = 100  # 单日阈值更低
			
 
				+
			
 
				+    ranking_data[date]['整体'] = calc_ranking(date_df, min_exp)
			
 
				+    for crowd in crowd_list:
			
 
				+        ranking_data[date][crowd] = calc_ranking(date_df[date_df['crowd'] == crowd], min_exp)
			
 
				+
			
 
				+# 转为JSON
			
 
				+consistency_json = json.dumps(consistency_data, ensure_ascii=False)
			
 
				+matrix_json = json.dumps(matrix_data, ensure_ascii=False)
			
 
				+ranking_json = json.dumps(ranking_data, ensure_ascii=False)
			
 
				+dates_json = json.dumps(date_list, ensure_ascii=False)
			
 
				+
			
 
				+# 日期选项HTML
			
 
				+date_options_html = "".join([f'<option value="{d}" {"selected" if d == "全部" else ""}>{d}</option>' for d in date_list])
			
 
				+
			
 
				+html_content = f"""<!DOCTYPE html>
			
 
				+<html>
			
 
				+<head>
			
 
				+    <meta charset="utf-8">
			
 
				+    <title>品类承接裂变率相关性分析</title>
			
 
				+    <style>
			
 
				+        * {{ margin: 0; padding: 0; box-sizing: border-box; }}
			
 
				+        body {{ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
			
 
				+               background: #f5f5f5; padding: 20px; }}
			
 
				+        .container {{ max-width: 1600px; margin: 0 auto; background: white;
			
 
				+                     border-radius: 8px; padding: 20px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); }}
			
 
				+        h1 {{ font-size: 24px; margin-bottom: 10px; color: #333; }}
			
 
				+        .subtitle {{ color: #666; margin-bottom: 20px; font-size: 14px; }}
			
 
				+
			
 
				+        /* Tabs */
			
 
				+        .tabs {{ display: flex; gap: 5px; margin-bottom: 20px; border-bottom: 2px solid #e0e0e0; }}
			
 
				+        .tab {{ padding: 10px 20px; cursor: pointer; border: none; background: none;
			
 
				+               font-size: 14px; color: #666; border-bottom: 2px solid transparent; margin-bottom: -2px; }}
			
 
				+        .tab:hover {{ color: #333; }}
			
 
				+        .tab.active {{ color: #1976D2; border-bottom-color: #1976D2; font-weight: 500; }}
			
 
				+        .tab-content {{ display: none; }}
			
 
				+        .tab-content.active {{ display: block; }}
			
 
				+
			
 
				+        /* Summary cards */
			
 
				+        .summary {{ display: flex; gap: 15px; margin-bottom: 25px; flex-wrap: wrap; }}
			
 
				+        .stat-card {{ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
			
 
				+                     padding: 15px 20px; border-radius: 8px; text-align: center; color: white; min-width: 140px; }}
			
 
				+        .stat-card.green {{ background: linear-gradient(135deg, #11998e 0%, #38ef7d 100%); }}
			
 
				+        .stat-card.orange {{ background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); }}
			
 
				+        .stat-card.blue {{ background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%); }}
			
 
				+        .stat-card h4 {{ font-size: 28px; margin-bottom: 5px; }}
			
 
				+        .stat-card p {{ font-size: 12px; opacity: 0.9; }}
			
 
				+
			
 
				+        /* Bar chart */
			
 
				+        .chart-section {{ margin-bottom: 30px; }}
			
 
				+        .chart-title {{ font-size: 16px; font-weight: 500; margin-bottom: 15px; color: #333; }}
			
 
				+        .bar-chart {{ display: flex; gap: 30px; align-items: flex-end; justify-content: center; padding: 20px; }}
			
 
				+        .bar-group {{ text-align: center; }}
			
 
				+        .bar-pair {{ display: flex; gap: 8px; align-items: flex-end; height: 200px; }}
			
 
				+        .bar {{ width: 50px; border-radius: 4px 4px 0 0; transition: all 0.3s; cursor: pointer; position: relative; }}
			
 
				+        .bar:hover {{ opacity: 0.8; }}
			
 
				+        .bar-value {{ position: absolute; top: -25px; left: 50%; transform: translateX(-50%); font-size: 12px; font-weight: 500; white-space: nowrap; }}
			
 
				+        .bar-label {{ margin-top: 10px; font-size: 13px; color: #333; }}
			
 
				+        .bar-ratio {{ font-size: 11px; color: #666; margin-top: 3px; }}
			
 
				+        .legend {{ display: flex; gap: 20px; justify-content: center; margin-bottom: 15px; }}
			
 
				+        .legend-item {{ display: flex; align-items: center; gap: 6px; font-size: 13px; }}
			
 
				+        .legend-color {{ width: 16px; height: 16px; border-radius: 3px; }}
			
 
				+
			
 
				+        /* Scatter plot */
			
 
				+        .scatter-container {{ display: flex; gap: 20px; flex-wrap: wrap; }}
			
 
				+        .scatter-box {{ flex: 1; min-width: 350px; background: #f8f9fa; border-radius: 8px; padding: 15px; }}
			
 
				+        .scatter-title {{ font-size: 14px; font-weight: 500; margin-bottom: 10px; }}
			
 
				+        .scatter-stats {{ font-size: 12px; color: #666; margin-bottom: 10px; }}
			
 
				+        .scatter-canvas {{ width: 100%; height: 300px; position: relative; background: white; border: 1px solid #e0e0e0; border-radius: 4px; }}
			
 
				+
			
 
				+        /* Matrix */
			
 
				+        .matrix-container {{ overflow-x: auto; max-height: 600px; overflow-y: auto; }}
			
 
				+        table {{ border-collapse: collapse; font-size: 11px; }}
			
 
				+        th, td {{ border: 1px solid #e0e0e0; padding: 4px 6px; text-align: center; white-space: nowrap; }}
			
 
				+        th {{ background: #f5f5f5; font-weight: 600; position: sticky; top: 0; z-index: 1; }}
			
 
				+        th:first-child {{ position: sticky; left: 0; z-index: 3; }}
			
 
				+        td:first-child {{ background: #f5f5f5; font-weight: 500; position: sticky; left: 0; z-index: 1; text-align: left; }}
			
 
				+        .corner-cell {{ background: linear-gradient(to top right, #f5f5f5 49.5%, #ccc 49.5%, #ccc 50.5%, #f5f5f5 50.5%); }}
			
 
				+
			
 
				+        /* Controls */
			
 
				+        .controls {{ display: flex; gap: 15px; margin-bottom: 15px; align-items: center; flex-wrap: wrap; }}
			
 
				+        .control-group {{ display: flex; align-items: center; gap: 6px; }}
			
 
				+        .control-group label {{ font-size: 13px; color: #666; }}
			
 
				+        select {{ padding: 6px 10px; border: 1px solid #ddd; border-radius: 4px; font-size: 13px; }}
			
 
				+        .date-switcher button {{ padding: 5px 10px; border: 1px solid #ddd; background: white; cursor: pointer; border-radius: 3px; }}
			
 
				+        .date-switcher button:hover {{ background: #f0f0f0; }}
			
 
				+        .play-btn {{ background: #4CAF50; color: white; border: none; border-radius: 4px; padding: 5px 12px; font-size: 12px; cursor: pointer; }}
			
 
				+        .play-btn:hover {{ background: #45a049; }}
			
 
				+        .play-btn.playing {{ background: #f44336; }}
			
 
				+        /* Matrix highlight */
			
 
				+        th.highlight, td.row-header.highlight {{ background: #bbdefb !important; }}
			
 
				+        td.cell-highlight {{ outline: 2px solid #1565C0; outline-offset: -1px; }}
			
 
				+
			
 
				+        /* Ranking table */
			
 
				+        .ranking-section {{ display: flex; gap: 30px; }}
			
 
				+        .ranking-box {{ flex: 1; }}
			
 
				+        .ranking-box h4 {{ font-size: 14px; margin-bottom: 10px; padding: 8px; border-radius: 4px; }}
			
 
				+        .ranking-box.high h4 {{ background: #e8f5e9; color: #2e7d32; }}
			
 
				+        .ranking-box.low h4 {{ background: #ffebee; color: #c62828; }}
			
 
				+        .ranking-table {{ width: 100%; border-collapse: collapse; }}
			
 
				+        .ranking-table th {{ background: #f5f5f5; padding: 8px; text-align: left; font-size: 12px; }}
			
 
				+        .ranking-table td {{ padding: 6px 8px; border-bottom: 1px solid #eee; font-size: 12px; }}
			
 
				+        .ranking-table .rn {{ width: 30px; color: #999; }}
			
 
				+        .ranking-table .vov {{ font-family: monospace; text-align: right; }}
			
 
				+        .ranking-table .exp {{ color: #666; text-align: right; }}
			
 
				+
			
 
				+        /* Insight box */
			
 
				+        .insight-box {{ background: #e3f2fd; border-left: 4px solid #1976D2; padding: 15px; margin: 20px 0; border-radius: 0 8px 8px 0; }}
			
 
				+        .insight-box h5 {{ color: #1565C0; margin-bottom: 8px; font-size: 14px; }}
			
 
				+        .insight-box p {{ color: #333; font-size: 13px; line-height: 1.6; }}
			
 
				+    </style>
			
 
				+</head>
			
 
				+<body>
			
 
				+    <div class="container">
			
 
				+        <h1>品类承接裂变率相关性分析</h1>
			
 
				+        <p class="subtitle">分析进入品类与承接品类的关系对裂变效果的影响</p>
			
 
				+
			
 
				+        <div class="tabs">
			
 
				+            <button class="tab active" onclick="switchTab('consistency')">品类一致性</button>
			
 
				+            <button class="tab" onclick="switchTab('affinity')">品类亲和性矩阵</button>
			
 
				+            <button class="tab" onclick="switchTab('ranking')">品类组合排名</button>
			
 
				+        </div>
			
 
				+
			
 
				+        <!-- Tab 1: 品类一致性 -->
			
 
				+        <div id="tab-consistency" class="tab-content active">
			
 
				+            <div class="summary">
			
 
				+                <div class="stat-card green">
			
 
				+                    <h4 id="same-vov">-</h4>
			
 
				+                    <p>同品类承接 vov</p>
			
 
				+                </div>
			
 
				+                <div class="stat-card orange">
			
 
				+                    <h4 id="diff-vov">-</h4>
			
 
				+                    <p>跨品类承接 vov</p>
			
 
				+                </div>
			
 
				+                <div class="stat-card blue">
			
 
				+                    <h4 id="vov-ratio">-</h4>
			
 
				+                    <p>同/跨品类比值</p>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="insight-box">
			
 
				+                <h5>核心发现</h5>
			
 
				+                <p>同品类承接（进入品类=承接品类）的裂变率显著高于跨品类承接，约为 <strong id="insight-ratio">-</strong> 倍。
			
 
				+                这说明用户对同类内容有更强的分享意愿，推荐系统在品类匹配上有优化空间。</p>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="chart-section">
			
 
				+                <div class="chart-title">各人群同品类 vs 跨品类 vov 对比</div>
			
 
				+                <div class="legend">
			
 
				+                    <div class="legend-item"><div class="legend-color" style="background:#4CAF50"></div>同品类承接</div>
			
 
				+                    <div class="legend-item"><div class="legend-color" style="background:#2196F3"></div>跨品类承接</div>
			
 
				+                </div>
			
 
				+                <div class="bar-chart" id="consistency-chart"></div>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="chart-section">
			
 
				+                <div class="chart-title">同品类曝光占比</div>
			
 
				+                <div id="exp-ratio-chart" style="display:flex;gap:20px;justify-content:center;"></div>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+
			
 
				+        <!-- Tab 2: 品类亲和性矩阵 -->
			
 
				+        <div id="tab-affinity" class="tab-content">
			
 
				+            <div class="insight-box">
			
 
				+                <h5>亲和性 = 这个组合的表现 / 进入品类的平均表现</h5>
			
 
				+                <p>
			
 
				+                <strong>举例</strong>：用户从「搞笑段子」进入，平均裂变率 0.4<br>
			
 
				+                • 推荐「搞笑段子→搞笑段子」裂变率 0.8，亲和性 = 0.8/0.4 = <span style="color:#2e7d32;font-weight:bold">2.0 ✓ 更对味</span><br>
			
 
				+                • 推荐「搞笑段子→历史名人」裂变率 0.2，亲和性 = 0.2/0.4 = <span style="color:#c62828;font-weight:bold">0.5 ✗ 不对味</span><br><br>
			
 
				+                <strong>颜色</strong>：<span style="background:#c8e6c9;padding:2px 6px;border-radius:3px">绿色=高亲和</span>
			
 
				+                <span style="background:#ffcdd2;padding:2px 6px;border-radius:3px;margin-left:10px">红色=低亲和</span>
			
 
				+                </p>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="controls">
			
 
				+                <div class="control-group date-switcher">
			
 
				+                    <label>日期:</label>
			
 
				+                    <button onclick="switchMatrixDate(-1)">◀</button>
			
 
				+                    <select id="matrix-date" onchange="updateMatrix()">
			
 
				+                        {date_options_html}
			
 
				+                    </select>
			
 
				+                    <button onclick="switchMatrixDate(1)">▶</button>
			
 
				+                    <button id="matrix-play-btn" class="play-btn" onclick="toggleMatrixPlay()">▶ 播放</button>
			
 
				+                </div>
			
 
				+                <div class="control-group">
			
 
				+                    <label>人群:</label>
			
 
				+                    <select id="matrix-crowd" onchange="updateMatrix()">
			
 
				+                        <option value="整体" selected>整体</option>
			
 
				+                        <option value="内部">内部</option>
			
 
				+                        <option value="外部0层">外部0层</option>
			
 
				+                        <option value="外部裂变">外部裂变</option>
			
 
				+                    </select>
			
 
				+                </div>
			
 
				+                <div class="control-group">
			
 
				+                    <label>显示指标:</label>
			
 
				+                    <select id="matrix-metric" onchange="updateMatrix()">
			
 
				+                        <option value="affinity" selected>亲和性 (affinity)</option>
			
 
				+                        <option value="vov">裂变率 (vov)</option>
			
 
				+                        <option value="exp">曝光量 (exp)</option>
			
 
				+                    </select>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="matrix-container">
			
 
				+                <table id="affinity-table">
			
 
				+                    <thead id="affinity-header"></thead>
			
 
				+                    <tbody id="affinity-body"></tbody>
			
 
				+                </table>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+
			
 
				+        <!-- Tab 4: 品类组合排名 -->
			
 
				+        <div id="tab-ranking" class="tab-content">
			
 
				+            <div class="insight-box">
			
 
				+                <h5>筛选条件</h5>
			
 
				+                <p>仅展示在 ≥2 个人群中都有数据且曝光量 ≥1000 的品类组合，确保结果稳定可靠。</p>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="controls">
			
 
				+                <div class="control-group date-switcher">
			
 
				+                    <label>日期:</label>
			
 
				+                    <button onclick="switchRankingDate(-1)">◀</button>
			
 
				+                    <select id="ranking-date" onchange="initRanking()">
			
 
				+                        {date_options_html}
			
 
				+                    </select>
			
 
				+                    <button onclick="switchRankingDate(1)">▶</button>
			
 
				+                    <button id="ranking-play-btn" class="play-btn" onclick="toggleRankingPlay()">▶ 播放</button>
			
 
				+                </div>
			
 
				+                <div class="control-group">
			
 
				+                    <label>人群:</label>
			
 
				+                    <select id="ranking-crowd" onchange="initRanking()">
			
 
				+                        <option value="整体" selected>整体</option>
			
 
				+                        <option value="内部">内部</option>
			
 
				+                        <option value="外部0层">外部0层</option>
			
 
				+                        <option value="外部裂变">外部裂变</option>
			
 
				+                    </select>
			
 
				+                </div>
			
 
				+                <div class="control-group">
			
 
				+                    <label>展示数量:</label>
			
 
				+                    <select id="ranking-topn" onchange="initRanking()">
			
 
				+                        <option value="20">Top 20</option>
			
 
				+                        <option value="50">Top 50</option>
			
 
				+                        <option value="100">Top 100</option>
			
 
				+                    </select>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="ranking-section">
			
 
				+                <div class="ranking-box high">
			
 
				+                    <h4>Top 20 高裂变品类组合</h4>
			
 
				+                    <table class="ranking-table" id="high-ranking"></table>
			
 
				+                </div>
			
 
				+                <div class="ranking-box low">
			
 
				+                    <h4>Top 20 低裂变品类组合</h4>
			
 
				+                    <table class="ranking-table" id="low-ranking"></table>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+    </div>
			
 
				+
			
 
				+    <script>
			
 
				+    const consistencyData = {consistency_json};
			
 
				+    const matrixData = {matrix_json};
			
 
				+    const rankingData = {ranking_json};
			
 
				+    const dateList = {dates_json};
			
 
				+
			
 
				+    let matrixPlayInterval = null;
			
 
				+    let rankingPlayInterval = null;
			
 
				+
			
 
				+    // Tab switching
			
 
				+    function switchTab(tabId) {{
			
 
				+        document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
			
 
				+        document.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active'));
			
 
				+        document.querySelector(`[onclick="switchTab('${{tabId}}')"]`).classList.add('active');
			
 
				+        document.getElementById('tab-' + tabId).classList.add('active');
			
 
				+    }}
			
 
				+
			
 
				+    // Initialize consistency chart
			
 
				+    function initConsistency() {{
			
 
				+        const data = consistencyData;
			
 
				+        document.getElementById('same-vov').textContent = data.total_same.toFixed(4);
			
 
				+        document.getElementById('diff-vov').textContent = data.total_diff.toFixed(4);
			
 
				+        document.getElementById('vov-ratio').textContent = data.total_ratio.toFixed(2) + 'x';
			
 
				+        document.getElementById('insight-ratio').textContent = data.total_ratio.toFixed(2);
			
 
				+
			
 
				+        const maxVov = Math.max(...data.same, ...data.diff);
			
 
				+        const chartHtml = data.crowds.map((crowd, i) => {{
			
 
				+            const sameH = Math.round(data.same[i] / maxVov * 180);
			
 
				+            const diffH = Math.round(data.diff[i] / maxVov * 180);
			
 
				+            return `
			
 
				+                <div class="bar-group">
			
 
				+                    <div class="bar-pair">
			
 
				+                        <div class="bar" style="height:${{sameH}}px;background:#4CAF50">
			
 
				+                            <span class="bar-value">${{data.same[i].toFixed(4)}}</span>
			
 
				+                        </div>
			
 
				+                        <div class="bar" style="height:${{diffH}}px;background:#2196F3">
			
 
				+                            <span class="bar-value">${{data.diff[i].toFixed(4)}}</span>
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+                    <div class="bar-label">${{crowd}}</div>
			
 
				+                    <div class="bar-ratio">${{data.ratio[i]}}x</div>
			
 
				+                </div>
			
 
				+            `;
			
 
				+        }}).join('');
			
 
				+        document.getElementById('consistency-chart').innerHTML = chartHtml;
			
 
				+
			
 
				+        // Exp ratio
			
 
				+        const expHtml = data.crowds.map((crowd, i) => {{
			
 
				+            const total = data.same_exp[i] + data.diff_exp[i];
			
 
				+            const sameRatio = total > 0 ? (data.same_exp[i] / total * 100).toFixed(1) : 0;
			
 
				+            return `
			
 
				+                <div style="text-align:center">
			
 
				+                    <div style="font-size:13px;margin-bottom:5px">${{crowd}}</div>
			
 
				+                    <div style="width:150px;height:20px;background:#e0e0e0;border-radius:10px;overflow:hidden">
			
 
				+                        <div style="width:${{sameRatio}}%;height:100%;background:#4CAF50"></div>
			
 
				+                    </div>
			
 
				+                    <div style="font-size:11px;color:#666;margin-top:3px">同品类占比: ${{sameRatio}}%</div>
			
 
				+                </div>
			
 
				+            `;
			
 
				+        }}).join('');
			
 
				+        document.getElementById('exp-ratio-chart').innerHTML = expHtml;
			
 
				+    }}
			
 
				+
			
 
				+    // Matrix
			
 
				+    function updateMatrix() {{
			
 
				+        const date = document.getElementById('matrix-date').value;
			
 
				+        const crowd = document.getElementById('matrix-crowd').value;
			
 
				+        const metric = document.getElementById('matrix-metric').value;
			
 
				+
			
 
				+        if (!matrixData[date] || !matrixData[date][crowd]) {{
			
 
				+            document.getElementById('affinity-header').innerHTML = '<tr><th>无数据</th></tr>';
			
 
				+            document.getElementById('affinity-body').innerHTML = '';
			
 
				+            return;
			
 
				+        }}
			
 
				+
			
 
				+        const data = matrixData[date][crowd];
			
 
				+        const metricData = data[metric];
			
 
				+
			
 
				+        // Calculate color range
			
 
				+        const allVals = [];
			
 
				+        data.rows.forEach(r => data.cols.forEach(c => {{
			
 
				+            const val = metricData[r]?.[c] || 0;
			
 
				+            if (val > 0) allVals.push(val);
			
 
				+        }}));
			
 
				+
			
 
				+        let maxVal, minVal = 0;
			
 
				+        if (metric === 'affinity') {{
			
 
				+            maxVal = 2; minVal = 0.5;
			
 
				+        }} else if (metric === 'vov') {{
			
 
				+            allVals.sort((a, b) => a - b);
			
 
				+            maxVal = allVals[Math.floor(allVals.length * 0.95)] || 1;
			
 
				+        }} else {{
			
 
				+            allVals.sort((a, b) => a - b);
			
 
				+            maxVal = allVals[Math.floor(allVals.length * 0.9)] || 100000;
			
 
				+        }}
			
 
				+
			
 
				+        function getColor(val) {{
			
 
				+            if (metric === 'affinity') {{
			
 
				+                if (val >= 1) {{
			
 
				+                    const ratio = Math.min((val - 1) / (maxVal - 1), 1);
			
 
				+                    return `rgb(${{Math.round(200 - ratio * 200)}}, ${{Math.round(230 - ratio * 30)}}, ${{Math.round(200 - ratio * 200)}})`;
			
 
				+                }} else {{
			
 
				+                    const ratio = Math.min((1 - val) / (1 - minVal), 1);
			
 
				+                    return `rgb(${{Math.round(230 - ratio * 30)}}, ${{Math.round(200 - ratio * 200)}}, ${{Math.round(200 - ratio * 200)}})`;
			
 
				+                }}
			
 
				+            }} else {{
			
 
				+                const ratio = Math.min(val / maxVal, 1);
			
 
				+                return `rgb(${{Math.round(255 - ratio * 215)}}, ${{Math.round(255 - ratio * 88)}}, ${{Math.round(255 - ratio * 186)}})`;
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        const expData = data.exp;
			
 
				+
			
 
				+        // 计算每行和每列的总曝光量
			
 
				+        const rowTotals = {{}};
			
 
				+        const colTotals = {{}};
			
 
				+        data.rows.forEach(r => {{
			
 
				+            rowTotals[r] = data.cols.reduce((sum, c) => sum + (expData[r]?.[c] || 0), 0);
			
 
				+        }});
			
 
				+        data.cols.forEach(c => {{
			
 
				+            colTotals[c] = data.rows.reduce((sum, r) => sum + (expData[r]?.[c] || 0), 0);
			
 
				+        }});
			
 
				+
			
 
				+        document.getElementById('affinity-header').innerHTML = `
			
 
				+            <tr>
			
 
				+                <th class="corner-cell" style="width:120px">进入↓ 承接→</th>
			
 
				+                ${{data.cols.map((c, ci) => `<th data-col="${{ci}}" title="${{c}}\\nexp: ${{colTotals[c].toLocaleString()}}">${{c.length > 6 ? c.substring(0,6) + '..' : c}}</th>`).join('')}}
			
 
				+            </tr>
			
 
				+        `;
			
 
				+
			
 
				+        document.getElementById('affinity-body').innerHTML = data.rows.map((r, ri) => {{
			
 
				+            const cells = data.cols.map((c, ci) => {{
			
 
				+                const val = metricData[r]?.[c] || 0;
			
 
				+                const exp = expData[r]?.[c] || 0;
			
 
				+                const bg = val > 0 ? getColor(val) : '#f8f9fa';
			
 
				+                const isDiagonal = (r === c);  // 对角线：同品类承接
			
 
				+                let display;
			
 
				+                if (metric === 'exp') {{
			
 
				+                    display = val > 0 ? (val >= 10000 ? Math.round(val/1000) + 'k' : val) : '-';
			
 
				+                }} else {{
			
 
				+                    display = val > 0 ? val.toFixed(2) : '-';
			
 
				+                }}
			
 
				+                // 计算横向和纵向占比
			
 
				+                const rowPct = rowTotals[r] > 0 ? (exp / rowTotals[r] * 100).toFixed(1) : '0.0';
			
 
				+                const colPct = colTotals[c] > 0 ? (exp / colTotals[c] * 100).toFixed(1) : '0.0';
			
 
				+                const tooltip = `进入: ${{r}}\\n承接: ${{c}}\\n${{metric}}: ${{val}}\\nexp: ${{exp.toLocaleString()}}\\n横向占比: ${{rowPct}}%\\n纵向占比: ${{colPct}}%${{isDiagonal ? '\\n★ 同品类承接' : ''}}`;
			
 
				+                const border = isDiagonal ? 'border:2px solid #1565C0;' : '';
			
 
				+                return `<td data-row="${{ri}}" data-col="${{ci}}" style="background:${{bg}};${{border}}" title="${{tooltip}}" onmouseenter="highlightCell(${{ri}},${{ci}})" onmouseleave="unhighlightCell()">${{display}}</td>`;
			
 
				+            }}).join('');
			
 
				+            return `<tr><td class="row-header" data-row="${{ri}}" title="${{r}}\\nexp: ${{rowTotals[r].toLocaleString()}}">${{r.length > 10 ? r.substring(0,10) + '..' : r}}</td>${{cells}}</tr>`;
			
 
				+        }}).join('');
			
 
				+    }}
			
 
				+
			
 
				+    // Highlight row/col headers on cell hover
			
 
				+    function highlightCell(row, col) {{
			
 
				+        // Highlight column header
			
 
				+        document.querySelectorAll('#affinity-header th[data-col]').forEach(th => {{
			
 
				+            if (parseInt(th.dataset.col) === col) th.classList.add('highlight');
			
 
				+        }});
			
 
				+        // Highlight row header
			
 
				+        document.querySelectorAll('#affinity-body .row-header').forEach(td => {{
			
 
				+            if (parseInt(td.dataset.row) === row) td.classList.add('highlight');
			
 
				+        }});
			
 
				+    }}
			
 
				+
			
 
				+    function unhighlightCell() {{
			
 
				+        document.querySelectorAll('.highlight').forEach(el => el.classList.remove('highlight'));
			
 
				+    }}
			
 
				+
			
 
				+    // Ranking
			
 
				+    function initRanking() {{
			
 
				+        const date = document.getElementById('ranking-date').value;
			
 
				+        const crowd = document.getElementById('ranking-crowd').value;
			
 
				+        const topN = parseInt(document.getElementById('ranking-topn').value);
			
 
				+
			
 
				+        if (!rankingData[date] || !rankingData[date][crowd]) {{
			
 
				+            document.getElementById('high-ranking').innerHTML = '<tbody><tr><td>无数据</td></tr></tbody>';
			
 
				+            document.getElementById('low-ranking').innerHTML = '<tbody><tr><td>无数据</td></tr></tbody>';
			
 
				+            return;
			
 
				+        }}
			
 
				+
			
 
				+        const data = rankingData[date][crowd];
			
 
				+
			
 
				+        function renderTable(items, tableId) {{
			
 
				+            const sliced = items.slice(0, topN);
			
 
				+            const html = `
			
 
				+                <thead><tr><th class="rn">#</th><th>品类组合</th><th class="vov">vov</th><th class="exp">曝光</th></tr></thead>
			
 
				+                <tbody>
			
 
				+                    ${{sliced.map((item, i) => `
			
 
				+                        <tr>
			
 
				+                            <td class="rn">${{i + 1}}</td>
			
 
				+                            <td>${{item.pair}}</td>
			
 
				+                            <td class="vov">${{item.vov.toFixed(4)}}</td>
			
 
				+                            <td class="exp">${{item.exp.toLocaleString()}}</td>
			
 
				+                        </tr>
			
 
				+                    `).join('')}}
			
 
				+                </tbody>
			
 
				+            `;
			
 
				+            document.getElementById(tableId).innerHTML = html;
			
 
				+        }}
			
 
				+
			
 
				+        // 更新标题
			
 
				+        const dateLabel = date === '全部' ? '' : ` [${{date}}]`;
			
 
				+        const crowdLabel = crowd === '整体' ? '' : ` (${{crowd}})`;
			
 
				+        document.querySelector('.ranking-box.high h4').textContent = `Top ${{topN}} 高裂变品类组合${{crowdLabel}}${{dateLabel}}`;
			
 
				+        document.querySelector('.ranking-box.low h4').textContent = `Top ${{topN}} 低裂变品类组合${{crowdLabel}}${{dateLabel}}`;
			
 
				+
			
 
				+        renderTable(data.high, 'high-ranking');
			
 
				+        renderTable(data.low, 'low-ranking');
			
 
				+    }}
			
 
				+
			
 
				+    // Matrix date switching
			
 
				+    function switchMatrixDate(delta) {{
			
 
				+        const select = document.getElementById('matrix-date');
			
 
				+        const idx = dateList.indexOf(select.value);
			
 
				+        const newIdx = idx + delta;
			
 
				+        if (newIdx >= 0 && newIdx < dateList.length) {{
			
 
				+            select.value = dateList[newIdx];
			
 
				+            updateMatrix();
			
 
				+        }}
			
 
				+    }}
			
 
				+
			
 
				+    function toggleMatrixPlay() {{
			
 
				+        const btn = document.getElementById('matrix-play-btn');
			
 
				+        if (matrixPlayInterval) {{
			
 
				+            clearInterval(matrixPlayInterval);
			
 
				+            matrixPlayInterval = null;
			
 
				+            btn.classList.remove('playing');
			
 
				+            btn.textContent = '▶ 播放';
			
 
				+        }} else {{
			
 
				+            btn.classList.add('playing');
			
 
				+            btn.textContent = '⏸ 停止';
			
 
				+            let idx = 1;  // 从第一个日期开始（跳过"全部"）
			
 
				+            const play = () => {{
			
 
				+                if (idx >= dateList.length) {{
			
 
				+                    clearInterval(matrixPlayInterval);
			
 
				+                    matrixPlayInterval = null;
			
 
				+                    btn.classList.remove('playing');
			
 
				+                    btn.textContent = '▶ 播放';
			
 
				+                    return;
			
 
				+                }}
			
 
				+                document.getElementById('matrix-date').value = dateList[idx];
			
 
				+                updateMatrix();
			
 
				+                idx++;
			
 
				+            }};
			
 
				+            play();
			
 
				+            matrixPlayInterval = setInterval(play, 1500);
			
 
				+        }}
			
 
				+    }}
			
 
				+
			
 
				+    // Ranking date switching
			
 
				+    function switchRankingDate(delta) {{
			
 
				+        const select = document.getElementById('ranking-date');
			
 
				+        const idx = dateList.indexOf(select.value);
			
 
				+        const newIdx = idx + delta;
			
 
				+        if (newIdx >= 0 && newIdx < dateList.length) {{
			
 
				+            select.value = dateList[newIdx];
			
 
				+            initRanking();
			
 
				+        }}
			
 
				+    }}
			
 
				+
			
 
				+    function toggleRankingPlay() {{
			
 
				+        const btn = document.getElementById('ranking-play-btn');
			
 
				+        if (rankingPlayInterval) {{
			
 
				+            clearInterval(rankingPlayInterval);
			
 
				+            rankingPlayInterval = null;
			
 
				+            btn.classList.remove('playing');
			
 
				+            btn.textContent = '▶ 播放';
			
 
				+        }} else {{
			
 
				+            btn.classList.add('playing');
			
 
				+            btn.textContent = '⏸ 停止';
			
 
				+            let idx = 1;
			
 
				+            const play = () => {{
			
 
				+                if (idx >= dateList.length) {{
			
 
				+                    clearInterval(rankingPlayInterval);
			
 
				+                    rankingPlayInterval = null;
			
 
				+                    btn.classList.remove('playing');
			
 
				+                    btn.textContent = '▶ 播放';
			
 
				+                    return;
			
 
				+                }}
			
 
				+                document.getElementById('ranking-date').value = dateList[idx];
			
 
				+                initRanking();
			
 
				+                idx++;
			
 
				+            }};
			
 
				+            play();
			
 
				+            rankingPlayInterval = setInterval(play, 1500);
			
 
				+        }}
			
 
				+    }}
			
 
				+
			
 
				+    // Initialize
			
 
				+    initConsistency();
			
 
				+    updateMatrix();
			
 
				+    initRanking();
			
 
				+    </script>
			
 
				+</body>
			
 
				+</html>
			
 
				+"""
			
 
				+
			
 
				+html_file = output_dir / f"{latest_file.stem}_品类相关性分析.html"
			
 
				+with open(html_file, 'w', encoding='utf-8') as f:
			
 
				+    f.write(html_content)
			
 
				+
			
 
				+print(f"\nHTML 报告已生成: {html_file}")