#1 合并

Fusionado
yangxiaohui fusionados 23 commits de yangxiaohui/feat/exposure-base-d-chain en yangxiaohui/master hace 3 semanas
Se han modificado 100 ficheros con 21209 adiciones y 76 borrados
  1. 1 0
      .gitignore
  2. 1 1
      default.json
  3. 165 74
      fetch_daily.py
  4. 1943 0
      lib/feishu.py
  5. 125 0
      req/new.md
  6. 124 0
      req/低.md
  7. 421 0
      table_gen/ loghubods.ods_user_active_log_info_day.sql
  8. 722 0
      table_gen/de.sql
  9. 324 0
      table_gen/exposure_return_Bn.sql
  10. 378 0
      table_gen/exposure_return_Bn_v2.sql
  11. 654 0
      table_gen/exposure_return_Bn_v3.sql
  12. 460 0
      table_gen/exposure_return_Bn_v4.sql
  13. 477 0
      table_gen/exposure_return_recursive.sql
  14. 967 0
      table_gen/loghubods.dwd_recsys_alg_exposure_agg_wide_20260209.json
  15. 678 0
      table_gen/loghubods.dwd_recsys_alg_exposure_agg_wide_20260209.sql
  16. 773 0
      table_gen/loghubods.dwd_recsys_alg_exposure_base_20250108.sql
  17. 1014 0
      table_gen/loghubods.dwd_recsys_alg_exposure_base_20260206.sql
  18. 1425 0
      table_gen/loghubods.dwd_recsys_alg_exposure_base_20260209.sql
  19. 202 0
      table_gen/loghubods.dwd_recsys_alg_exposure_base_view_20250402.sql
  20. 194 0
      table_gen/test_1.sql
  21. 300 0
      table_gen/test_2.sql
  22. 601 0
      table_gen/test_3.sql
  23. 632 0
      table_gen/test_4.sql
  24. 118 0
      table_gen/tmp.md
  25. 347 0
      tables/loghubods/loghubods.dwd_recsys_alg_exposure_base_20250108.md
  26. 68 0
      tables/loghubods/user_share_log.txt
  27. 50 0
      tables/videoods/dim_user.txt
  28. 55 0
      tables/videoods/wx_video.txt
  29. 6 0
      tasks/00_AB效果/01_推荐AB天级效果.json
  30. 85 0
      tasks/00_AB效果/01_推荐AB天级效果.sql
  31. 6 0
      tasks/00_AB效果/01_推荐AB实时效果.json
  32. 89 0
      tasks/00_AB效果/01_推荐AB实时效果.sql
  33. 6 0
      tasks/00_AB效果/01_推荐AB实时效果_before.json
  34. 86 0
      tasks/00_AB效果/01_推荐AB实时效果_before.sql
  35. 6 0
      tasks/00_AB效果/01_推荐AB实时效果_分小时.json
  36. 145 0
      tasks/00_AB效果/01_推荐AB实时效果_分小时.sql
  37. 6 0
      tasks/00_AB效果/02_推荐AB天级效果_对比对照组.json
  38. 139 0
      tasks/00_AB效果/02_推荐AB天级效果_对比对照组.sql
  39. 139 0
      tasks/00_AB效果/02_推荐AB天级效果_对比对照组_ab.sql
  40. 6 0
      tasks/00_AB效果/02_推荐AB天级效果_对比对照组_分ab.json
  41. 140 0
      tasks/00_AB效果/02_推荐AB天级效果_对比对照组_分ab.sql
  42. 6 0
      tasks/00_AB效果/02_推荐AB天级效果_对比对照组_分小时.json
  43. 6 0
      tasks/00_AB效果/03_推荐AB天级效果_对比对照组_分小时.json
  44. 145 0
      tasks/00_AB效果/03_推荐AB天级效果_对比对照组_分小时.sql
  45. 6 0
      tasks/00_AB效果/04_推荐AB天级效果_对比对照组_分seq.json
  46. 145 0
      tasks/00_AB效果/04_推荐AB天级效果_对比对照组_分seq.sql
  47. 6 0
      tasks/00_AB效果/05_推荐AB天级效果_对比对照组_含多跳.json
  48. 190 0
      tasks/00_AB效果/05_推荐AB天级效果_对比对照组_含多跳.sql
  49. 195 0
      tasks/00_AB效果/05_推荐AB天级效果_对比对照组_含多跳_v2.sql
  50. 7 0
      tasks/00_尾号实验/base.json
  51. 123 0
      tasks/00_尾号实验/base.sql
  52. 7 0
      tasks/00_尾号实验/base_v1.json
  53. 122 0
      tasks/00_尾号实验/base_v1.sql
  54. 7 0
      tasks/00_尾号实验/base_v2.json
  55. 128 0
      tasks/00_尾号实验/base_v2.sql
  56. 21 0
      tasks/00_表的洞察/loghubods.user_share_log/00_洞察/00_uv情况.sql
  57. 64 0
      tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_rootshareid来源分布.sql
  58. 63 0
      tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_shareid来源分布.sql
  59. 56 0
      tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_shareid来源分布_180d.sql
  60. 26 0
      tasks/00_表的洞察/loghubods.user_share_log/00_洞察/02_click_depth分布.sql
  61. 27 0
      tasks/00_表的洞察/loghubods.user_share_log/00_洞察/03_click_top视频.sql
  62. 27 0
      tasks/00_表的洞察/loghubods.user_share_log/00_洞察/04_click_top视频_by_depth.sql
  63. 28 0
      tasks/00_表的洞察/loghubods.user_share_log/00_洞察/05_click_top_from_mid.sql
  64. 29 0
      tasks/00_表的洞察/loghubods.user_share_log/00_洞察/06_活跃表概况.sql
  65. 23 0
      tasks/00_表的洞察/loghubods.user_share_log/00_洞察/07_click_来源分布.sql
  66. 25 0
      tasks/00_表的洞察/loghubods.user_share_log/00_洞察/08_click_level_channel分布.sql
  67. 49 0
      tasks/00_表的洞察/loghubods.user_share_log/00_洞察/09_click_内外部_首层分布.sql
  68. 54 0
      tasks/00_表的洞察/loghubods.user_share_log/00_洞察/10_click_top_from_mid_排除外部首层.sql
  69. 90 0
      tasks/00_表的洞察/loghubods.user_share_log/export_neo4j.py
  70. 1 1
      tasks/承接/rosn分析/02_实验组xTop10一级品类_vs对照组.sql
  71. 256 0
      tasks/承接/rosn分析/05_实验组xTop20视频_vs对照组_vor.config
  72. 391 0
      tasks/承接/rosn校准/plot_calibration.py
  73. BIN
      tasks/承接/头部品类与承接品类分析/.DS_Store
  74. 495 0
      tasks/承接/线上实验/08_预测覆盖率效果分析.py
  75. 177 0
      tasks/承接/线上实验/分桶诊断分析.py
  76. 177 0
      tasks/承接/线上实验/分桶诊断分析_full.py
  77. 174 0
      tasks/承接/线上实验/分桶诊断分析_full_v2.py
  78. 8 0
      tasks/报表/01_推荐分发报表.json
  79. 678 0
      tasks/报表/01_推荐分发报表.sql
  80. 293 0
      tasks/指标分析/02_实验组xTop20视频_vs对照组_误差分析_v2_hh.sql
  81. 6 0
      tasks/指标分析/02_实验组xTop20视频_vs对照组_误差分析_v3.json
  82. 339 0
      tasks/指标分析/02_实验组xTop20视频_vs对照组_误差分析_v3.sql
  83. 586 0
      tmp/低vov高曝光分析/step10_可视化.py
  84. 100 0
      tmp/低vov高曝光分析/step1_分析.py
  85. 115 0
      tmp/低vov高曝光分析/step2_分析.py
  86. 122 0
      tmp/低vov高曝光分析/step3_分析.py
  87. 91 0
      tmp/低vov高曝光分析/step3b_分析.py
  88. 93 0
      tmp/低vov高曝光分析/step3c_分析.py
  89. 82 0
      tmp/低vov高曝光分析/step3d_分析.py
  90. 80 0
      tmp/低vov高曝光分析/step3e_copc分析.py
  91. 89 0
      tmp/低vov高曝光分析/step3f_copc详细.py
  92. 134 0
      tmp/低vov高曝光分析/step4_建议.md
  93. 94 0
      tmp/低vov高曝光分析/step5_分析.py
  94. 93 0
      tmp/低vov高曝光分析/step6_历史抽样.py
  95. 130 0
      tmp/低vov高曝光分析/step7_分析.py
  96. 153 0
      tmp/低vov高曝光分析/step8_分析.py
  97. 167 0
      tmp/低vov高曝光分析/step9_天级趋势.py
  98. 91 0
      tmp/低vov高曝光分析/v2_step1_数据驱动定义.py
  99. 106 0
      tmp/低vov高曝光分析/v2_step2_分析.py
  100. 135 0
      tmp/低vov高曝光分析/v2_step3_对比分析.py

+ 1 - 0
.gitignore

@@ -1,3 +1,4 @@
 __pycache__/
 *.pyc
 output/
+.DS_Store

+ 1 - 1
default.json

@@ -1,6 +1,6 @@
 {
   "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
-  "sheet_id": null,
+  "sheet_id": "oYkbVB",
   "sort": "dt:desc",
   "cols": null
 }

+ 165 - 74
fetch_daily.py

@@ -9,6 +9,7 @@
     python fetch_daily.py tasks/xxx/query.sql --days 30          # 获取最近30天
     python fetch_daily.py tasks/xxx/query.sql --start 20260101 --end 20260107
     python fetch_daily.py tasks/xxx/query.sql --date 20260105    # 单天
+    python fetch_daily.py tasks/xxx/query.sql --date 20260105 --hh 08  # 单天单小时
     python fetch_daily.py tasks/xxx/query.sql --force            # 强制重新获取
     python fetch_daily.py tasks/xxx/query.sql --workers 10       # 设置天级并发数
     python fetch_daily.py tasks/xxx/query.sql --parallel 50      # 单天多线程下载(默认50,大数据量推荐)
@@ -35,16 +36,25 @@ success_count = 0
 fail_count = 0
 
 
-def get_existing_dates(daily_dir):
-    """获取已下载的日期列表"""
+def get_existing_dates(daily_dir, hh=None):
+    """获取已下载的日期列表(可选指定小时)"""
     existing = set()
     if not daily_dir.exists():
         return existing
     for f in daily_dir.glob("*.csv"):
         try:
-            dt = f.stem
-            if len(dt) == 8 and dt.isdigit():
-                existing.add(dt)
+            stem = f.stem
+            if hh is not None:
+                # 带小时格式:20250101_08
+                if len(stem) == 11 and stem[8] == '_':
+                    dt = stem[:8]
+                    file_hh = stem[9:11]
+                    if dt.isdigit() and file_hh == hh:
+                        existing.add(dt)
+            else:
+                # 仅日期格式:20250101
+                if len(stem) == 8 and stem.isdigit():
+                    existing.add(stem)
         except:
             pass
     return existing
@@ -151,6 +161,8 @@ def load_feishu_config(sql_file):
         "sheet_id": None,
         "sort": "dt:desc",
         "cols": None,
+        "filter": None,
+        "limit": None,
     }
 
     root_dir = Path(__file__).parent
@@ -249,7 +261,7 @@ def column_index_to_letter(col_idx):
     return result
 
 
-def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc", cols_spec=None):
+def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc", cols_spec=None, filter_spec=None, limit=None):
     """上传 CSV 文件到飞书表格(通过模板行继承样式)
 
     第1行: 表头
@@ -262,6 +274,8 @@ def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc",
         sheet_id: 工作表 ID(None 时自动获取第一个)
         sort_spec: 排序规格,如 "dt:desc,name:asc"
         cols_spec: 列映射规格,如 "dt:日期,name,value:数值"
+        filter_spec: 过滤条件,dict {"字段": "值"} 或 str "字段=值,字段=值"
+        limit: 上传行数上限
     """
     from feishu import Client, LARK_HOST, APP_ID, APP_SECRET, request
 
@@ -289,6 +303,36 @@ def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc",
         if applied:
             print(f"排序: {', '.join(reversed(applied))}")
 
+    # 过滤(排序之后)
+    if filter_spec:
+        # 支持 dict(来自 JSON 配置)或 str(来自命令行 "字段=值,字段!=值")
+        if isinstance(filter_spec, str):
+            filters = []
+            for part in filter_spec.split(","):
+                if "!=" in part:
+                    k, v = part.split("!=", 1)
+                    filters.append((k.strip(), v.strip(), "!="))
+                elif "=" in part:
+                    k, v = part.split("=", 1)
+                    filters.append((k.strip(), v.strip(), "=="))
+        elif isinstance(filter_spec, dict):
+            filters = [(k, v, "==") for k, v in filter_spec.items()]
+
+        before_count = len(data_rows)
+        for field, value, op in filters:
+            if field in header:
+                idx = header.index(field)
+                if op == "!=":
+                    data_rows = [row for row in data_rows if idx < len(row) and row[idx] != str(value)]
+                else:
+                    data_rows = [row for row in data_rows if idx < len(row) and row[idx] == str(value)]
+        print(f"过滤: {filters} → {before_count} → {len(data_rows)} 行")
+
+    # limit(过滤之后)
+    if limit and len(data_rows) > limit:
+        print(f"限制行数: {len(data_rows)} → {limit}")
+        data_rows = data_rows[:limit]
+
     # 列映射(排序之后)
     header, data_rows = apply_cols_mapping(header, data_rows, cols_spec)
 
@@ -310,18 +354,49 @@ def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc",
     current_cols = sheet_props['column_count'] if sheet_props else 26
     header_end_col = column_index_to_letter(current_cols)
 
+    # 扩展列数(CSV 列数超过当前 sheet 列数时)
+    num_csv_cols = len(header)
+    if num_csv_cols > current_cols:
+        add_cols = num_csv_cols - current_cols
+        expand_headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': f'Bearer {access_token}'
+        }
+        expand_payload = {
+            "dimension": {
+                "sheetId": sheet_id,
+                "majorDimension": "COLUMNS",
+                "length": add_cols
+            }
+        }
+        try:
+            request("POST", f"{LARK_HOST}/open-apis/sheets/v2/spreadsheets/{sheet_token}/dimension_range",
+                    expand_headers, expand_payload)
+            print(f"扩展列数: {current_cols} -> {num_csv_cols} (+{add_cols}列)")
+            current_cols = num_csv_cols
+            header_end_col = column_index_to_letter(current_cols)
+        except Exception as e:
+            print(f"  扩展列数失败: {e}")
+
     # 读取飞书表头(获取所有列)
     feishu_header = client.read_range_values(access_token, sheet_token, f"{sheet_id}!A1:{header_end_col}1")
     feishu_cols = []
     if feishu_header and feishu_header[0]:
         feishu_cols = [c for c in feishu_header[0] if c]  # 过滤 None 和空字符串
 
+    # 富文本列转纯文本(飞书表头可能含带链接的 list 结构)
+    def _col_to_str(col):
+        if isinstance(col, list):
+            return "".join(item.get("text", "") for item in col if isinstance(item, dict))
+        return col
+
     if feishu_cols:
-        print(f"飞书表头: {feishu_cols}")
+        feishu_cols_str = [_col_to_str(c) for c in feishu_cols]
+        print(f"飞书表头: {feishu_cols_str}")
         print(f"CSV表头: {header}")
 
         # 校验字段一致性(警告但继续,以飞书表头为准)
-        feishu_set = set(feishu_cols)
+        feishu_set = set(feishu_cols_str)
         csv_set = set(header)
 
         missing_in_csv = feishu_set - csv_set
@@ -332,12 +407,12 @@ def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc",
         if missing_in_feishu:
             print(f"警告: 飞书缺少字段(将忽略): {missing_in_feishu}")
 
-        # 按飞书表头顺序重排数据
+        # 按飞书表头顺序重排数据(用纯文本版本做匹配)
         csv_col_index = {name: i for i, name in enumerate(header)}
         new_converted_rows = []
         for row in converted_rows:
             new_row = []
-            for col_name in feishu_cols:
+            for col_name in feishu_cols_str:
                 if col_name in csv_col_index:
                     new_row.append(row[csv_col_index[col_name]])
                 else:
@@ -348,17 +423,30 @@ def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc",
         header = feishu_cols
         print(f"已按飞书表头顺序重排数据")
     else:
-        # 飞书表头为空,用 CSV 表头写入
+        # 飞书表头为空,用 CSV 表头写入(飞书单次最多写100列,需分批)
         print(f"飞书表头为空,使用 CSV 表头写入")
-        header_range = f"{sheet_id}!A1:{column_index_to_letter(len(header))}1"
-        client.batch_update_values(access_token, sheet_token, {
-            "valueRanges": [{"range": header_range, "values": [header]}]
-        })
+        col_batch = 100
+        for start in range(0, len(header), col_batch):
+            end = min(start + col_batch, len(header))
+            start_col = column_index_to_letter(start + 1)
+            end_col = column_index_to_letter(end)
+            batch_range = f"{sheet_id}!{start_col}1:{end_col}1"
+            client.batch_update_values(access_token, sheet_token, {
+                "valueRanges": [{"range": batch_range, "values": [header[start:end]]}]
+            })
 
     total_rows = len(converted_rows)
     num_cols = len(header)
     end_col = column_index_to_letter(num_cols)
 
+    # 飞书单 sheet 上限 5,000,000 cells,预留表头+模板行
+    CELL_LIMIT = 5_000_000
+    max_data_rows = (CELL_LIMIT // num_cols) - 2
+    if total_rows > max_data_rows:
+        print(f"⚠ 飞书 cell 上限 {CELL_LIMIT:,}({num_cols}列 × {max_data_rows}行),截断 {total_rows} → {max_data_rows} 行")
+        converted_rows = converted_rows[:max_data_rows]
+        total_rows = max_data_rows
+
     print(f"上传到飞书: {total_rows} 行数据")
 
     batch_size = 500
@@ -391,73 +479,62 @@ def upload_to_feishu(csv_file, sheet_token, sheet_id=None, sort_spec="dt:desc",
 
     # 第2步:扩展表格容量(insert 不会自动扩展)
     # 删除后当前只有2行(表头+模板),需要扩展到 2 + total_rows 行
-    needed_rows = 2 + total_rows
     add_url = f"{LARK_HOST}/open-apis/sheets/v2/spreadsheets/{sheet_token}/dimension_range"
-    add_payload = {
-        "dimension": {
-            "sheetId": sheet_id,
-            "majorDimension": "ROWS",
-            "length": total_rows  # 添加数据行数
-        }
-    }
-    try:
-        request("POST", add_url, headers, add_payload)
-        print(f"扩展容量: +{total_rows} 行")
-    except Exception as e:
-        print(f"  扩展容量失败: {e}")
-
-    # 第3步:分批插入空行(继承第2行样式)并写入数据
-    print(f"插入并写入 {total_rows} 行...")
-    insert_url = f"{LARK_HOST}/open-apis/sheets/v2/spreadsheets/{sheet_token}/insert_dimension_range"
-
-    # 反向处理批次(从最后一批开始),因为每次都在第3行前插入
-    batches = [converted_rows[i:i + batch_size] for i in range(0, total_rows, batch_size)]
-    processed = 0
-
-    for batch in reversed(batches):
-        batch_count = len(batch)
-
-        # 在第3行前插入空行(继承第2行样式)
-        insert_payload = {
+    expand_batch = 5000
+    remaining = total_rows
+    expanded = 0
+    while remaining > 0:
+        chunk = min(remaining, expand_batch)
+        add_payload = {
             "dimension": {
                 "sheetId": sheet_id,
                 "majorDimension": "ROWS",
-                "startIndex": 2,  # 0-indexed, 第3行位置
-                "endIndex": 2 + batch_count
-            },
-            "inheritStyle": "BEFORE"
+                "length": chunk
+            }
         }
         try:
-            request("POST", insert_url, headers, insert_payload)
+            request("POST", add_url, headers, add_payload)
+            expanded += chunk
+            remaining -= chunk
         except Exception as e:
-            print(f"  插入行失败: {e}")
+            print(f"  扩展容量失败(已扩展{expanded}): {e}")
             break
+    if expanded > 0:
+        print(f"扩展容量: +{expanded} 行")
 
-        # 写入数据到插入的行(第3行开始)
-        range_str = f"{sheet_id}!A3:{end_col}{2 + batch_count}"
+    # 第3步:分批写入数据到扩展的空行(不再 insert,避免 expand+insert 双重加行超 cell 上限)
+    print(f"写入 {total_rows} 行...")
+    batches = [converted_rows[i:i + batch_size] for i in range(0, total_rows, batch_size)]
+    processed = 0
+
+    for i, batch in enumerate(batches):
+        batch_count = len(batch)
+        start_row = 3 + i * batch_size  # 从第3行开始,顺序写入
+
+        # 写入数据(飞书单次最多100列,需按列分批)
+        col_batch = 100
+        value_ranges = []
+        for col_start in range(0, num_cols, col_batch):
+            col_end = min(col_start + col_batch, num_cols)
+            sc = column_index_to_letter(col_start + 1)
+            ec = column_index_to_letter(col_end)
+            col_range = f"{sheet_id}!{sc}{start_row}:{ec}{start_row + batch_count - 1}"
+            col_values = [row[col_start:col_end] for row in batch]
+            value_ranges.append({"range": col_range, "values": col_values})
         client.batch_update_values(access_token, sheet_token, {
-            "valueRanges": [{"range": range_str, "values": batch}]
+            "valueRanges": value_ranges
         })
 
         processed += batch_count
         print(f"  处理: {processed}/{total_rows}")
 
-    # 第4步:删除末尾多余的空行(扩展容量时添加的)
-    final_row_count = 2 + total_rows  # 表头 + 模板 + 数据
-    current_row_count = 2 + total_rows * 2  # 扩展 + 插入
-    if current_row_count > final_row_count:
-        print(f"清理多余空行...")
+    # 第5步:删除模板行(第2行),仅当初始存在模板行时
+    if current_rows >= 2:
+        print(f"删除模板行...")
         try:
-            client.delete_rows(access_token, sheet_token, sheet_id, final_row_count + 1, current_row_count)
+            client.delete_rows(access_token, sheet_token, sheet_id, 2, 2)
         except Exception as e:
-            print(f"  清理失败: {e}")
-
-    # 第5步:删除模板行(第2行)
-    print(f"删除模板行...")
-    try:
-        client.delete_rows(access_token, sheet_token, sheet_id, 2, 2)
-    except Exception as e:
-        print(f"  删除模板行失败: {e}")
+            print(f"  删除模板行失败: {e}")
 
     print(f"飞书上传完成: {sheet_token}")
 
@@ -474,14 +551,18 @@ def get_date_range(start_str, end_str):
     return dates
 
 
-def fetch_single_day(dt, sql_template, daily_dir, parallel_threads=0, config="default"):
-    """获取单天数据"""
+def fetch_single_day(dt, sql_template, daily_dir, parallel_threads=0, config="default", hh=None):
+    """获取单天数据(可选指定小时)"""
     global success_count, fail_count
 
     try:
         client = ODPSClient(config=config)
         sql = sql_template.replace("${dt}", dt)
-        output_file = daily_dir / f"{dt}.csv"
+        if hh is not None:
+            sql = sql.replace("${hh}", hh)
+            output_file = daily_dir / f"{dt}_{hh}.csv"
+        else:
+            output_file = daily_dir / f"{dt}.csv"
 
         # 下载到文件
         if parallel_threads > 0:
@@ -520,6 +601,7 @@ def main():
     parser.add_argument("--start", type=str, help="开始日期 YYYYMMDD")
     parser.add_argument("--end", type=str, help="结束日期 YYYYMMDD")
     parser.add_argument("--date", type=str, help="单天日期 YYYYMMDD")
+    parser.add_argument("--hh", type=str, default=None, help="小时 HH (00-23),需配合 --date 使用")
     parser.add_argument("--force", action="store_true", help="强制重新获取")
     parser.add_argument("--workers", type=int, default=5, help="天级并发数 (默认5)")
     parser.add_argument("--parallel", type=int, default=50, help="单天多线程下载 (默认50, 大数据量推荐)")
@@ -529,6 +611,8 @@ def main():
     parser.add_argument("--sheet-id", type=str, default=None, help="飞书工作表ID")
     parser.add_argument("--sort", type=str, default=None, help="排序: 字段:asc/desc")
     parser.add_argument("--cols", type=str, default=None, help="列映射: 原名:新名,...")
+    parser.add_argument("--filter", type=str, default=None, help="过滤: 字段=值,字段=值")
+    parser.add_argument("--limit", type=int, default=None, help="上传行数上限")
     parser.add_argument("--config", type=str, default="default", help="ODPS配置: default 或 piaoquan_api")
     args = parser.parse_args()
 
@@ -551,6 +635,10 @@ def main():
         args.sort = feishu_config["sort"]
     if args.cols is None:
         args.cols = feishu_config["cols"]
+    if args.filter is None:
+        args.filter = feishu_config["filter"]
+    if args.limit is None:
+        args.limit = feishu_config["limit"]
 
     # 打印飞书配置
     if args.feishu:
@@ -572,7 +660,7 @@ def main():
             merged_file = merge_csv_files(daily_dir)
             # 如果指定了飞书上传
             if args.feishu and merged_file:
-                upload_to_feishu(merged_file, args.feishu, args.sheet_id, args.sort, args.cols)
+                upload_to_feishu(merged_file, args.feishu, args.sheet_id, args.sort, args.cols, args.filter, args.limit)
         else:
             print("没有可合并的数据")
         return
@@ -591,8 +679,11 @@ def main():
     print(f"目标日期: {target_dates[0]} ~ {target_dates[-1]} ({len(target_dates)}天)")
 
     # 检查已有数据
-    existing_dates = get_existing_dates(daily_dir)
-    print(f"已有数据: {len(existing_dates)}天")
+    existing_dates = get_existing_dates(daily_dir, args.hh)
+    if args.hh:
+        print(f"已有数据: {len(existing_dates)}天 (hh={args.hh})")
+    else:
+        print(f"已有数据: {len(existing_dates)}天")
 
     # 确定需要获取的日期
     if args.force:
@@ -633,7 +724,7 @@ def main():
             print(f"数据目录: {output_file}")
             # 如果指定了飞书上传
             if args.feishu and output_file.exists():
-                upload_to_feishu(output_file, args.feishu, args.sheet_id, args.sort, args.cols)
+                upload_to_feishu(output_file, args.feishu, args.sheet_id, args.sort, args.cols, args.filter, args.limit)
         except Exception as e:
             print(f"✗ 执行失败: {e}")
         return
@@ -648,7 +739,7 @@ def main():
 
     with ThreadPoolExecutor(max_workers=workers) as executor:
         futures = {
-            executor.submit(fetch_single_day, dt, sql_template, daily_dir, args.parallel, args.config): dt
+            executor.submit(fetch_single_day, dt, sql_template, daily_dir, args.parallel, args.config, args.hh): dt
             for dt in missing_dates
         }
 
@@ -673,7 +764,7 @@ def main():
     if args.feishu:
         merged_file = merge_csv_files(daily_dir)
         if merged_file:
-            upload_to_feishu(merged_file, args.feishu, args.sheet_id, args.sort, args.cols)
+            upload_to_feishu(merged_file, args.feishu, args.sheet_id, args.sort, args.cols, args.filter, args.limit)
 
 
 if __name__ == "__main__":

+ 1943 - 0
lib/feishu.py

@@ -0,0 +1,1943 @@
+# -*- coding: UTF-8 -*-
+import json
+import base64
+import hashlib
+import os
+from curl_cffi import requests as mj_requests
+import requests
+import os
+from dotenv import load_dotenv, find_dotenv
+
+load_dotenv(find_dotenv())
+
+# load from env
+APP_ID = 'cli_a22acf2916b8500e'
+APP_SECRET = 'tE0xAB2gZTMlBGdPczCGLcmpRlZQm5CQ'
+LARK_HOST = 'https://open.feishu.cn'
+APP_HOST = 'https://open.feishu.cn'
+EMAIL = 'semsevens@email.com'
+
+class LarkException(Exception):
+    def __init__(self, code=0, msg=None):
+        self.code = code
+        self.msg = msg
+
+    def __str__(self) -> str:
+        return "{}:{}".format(self.code, self.msg)
+
+    __repr__ = __str__
+
+def request(method, url, headers, payload={}):
+    response = requests.request(method, url, headers=headers, json=payload)
+    # logging.info("URL: " + url)
+    # logging.info("X-Tt-Logid: " + response.headers['X-Tt-Logid'])
+    # logging.info("headers:\n"+json.dumps(headers,indent=2, ensure_ascii=False))
+    # logging.info("payload:\n"+json.dumps(payload,indent=2, ensure_ascii=False))
+    resp = {}
+    if response.text[0] == '{':
+        resp = response.json()
+        # logging.info("response:\n"+json.dumps(resp,indent=2, ensure_ascii=False))
+    else:
+        pass
+        # logging.info("response:\n"+response.text)
+    code = resp.get("code", -1)
+    if code == -1:
+        code = resp.get("StatusCode", -1)
+    if code == -1 and response.status_code != 200:
+        response.raise_for_status()
+    if code != 0:
+        raise LarkException(code=code, msg=resp.get("msg", ""))
+    return resp
+def get_image_data_from_url(img_url, use_cache=True):
+    # 计算URL的MD5哈希值
+    url_hash = hashlib.md5(img_url.encode()).hexdigest()
+    cache_dir = 'image_cache'
+    cache_file = os.path.join(cache_dir, f'{url_hash}.json')
+
+    if use_cache:
+        # 检查缓存目录是否存在,如果不存在则创建
+        if not os.path.exists(cache_dir):
+            os.makedirs(cache_dir)
+
+        # 检查缓存文件是否存在
+        if os.path.exists(cache_file):
+            with open(cache_file, 'r') as f:
+                cached_data = json.load(f)
+            return cached_data['image_data']
+
+    # 如果缓存不存在,从URL获取图片
+    if 'midjourney' in img_url:
+        proxies = {
+            'http': 'http://127.0.0.1:7890',
+            'https': 'http://127.0.0.1:7890',
+        }
+        # response = mj_requests.get(img_url, impersonate="chrome100", proxies=proxies)
+        response = mj_requests.get(img_url.replace("https://", "http://"), impersonate="chrome100")
+    else:
+        # proxies = {
+        #     'http': 'http://t10952018781111:1ap37oc3@d844.kdltps.com:15818',
+        #     'https': 'http://t10952018781111:1ap37oc3@d844.kdltps.com:15818',
+        # }
+        # proxies = {
+        #     'http': None,
+        #     'https': None,
+        # }
+        # response = requests.get(img_url.replace("https://", "http://"), proxies=proxies)
+        response = requests.get(img_url)
+        # response = requests.get(img_url, proxies=proxies)
+    if response.status_code == 200:
+        image_content = response.content
+        missing_padding = 4 - len(image_content) % 4
+        if missing_padding:
+            image_content += b'=' * missing_padding
+        image_data = base64.b64encode(image_content).decode('utf-8')
+
+        # 将图片数据保存到缓存
+        with open(cache_file, 'w') as f:
+            json.dump({'image_data': image_data}, f)
+
+        return image_data
+    else:
+        # import traceback
+        # traceback.print_exc()
+        raise Exception(f"无法从URL获取图片: {img_url}")
+from PIL import Image
+import io
+import os
+def get_image_size(img_url):
+    img_data = get_image_data_from_url(img_url)
+    img = Image.open(io.BytesIO(base64.b64decode(img_data)))
+    width, height = img.size
+    return width, height
+
+if __name__ == "__main__":
+    img_url = "https://sns-webpic.xhscdn.com/1040g2sg31c4vs26n12a05ph3cdp3cutm5prqo90"
+    img_data = get_image_data_from_url(img_url)
+
+    save_path = "/Users/nieqi/Downloads/save.json"
+    with open(save_path, 'w') as f:
+        f.write(img_data)
+
+def column_id(col):
+    '''column int to string id'''
+    ans = ""
+    i = col
+    while i > 0:
+        m = int((i-1) % 26)
+        i = int((i-1) / 26)
+        ans = chr(m+65) + ans
+    return ans
+
+def do_compress_image(image_data, image_type):
+     # 压缩图片
+    from PIL import Image
+    import io
+    import base64
+    Image.MAX_IMAGE_PIXELS = None  # 禁用图片大小限制
+
+    # 将base64转为图片对象
+    image = Image.open(io.BytesIO(base64.b64decode(image_data)))
+
+    # 计算压缩后的尺寸,保持宽高比
+    max_size = 1600
+    ratio = min(max_size/image.width, max_size/image.height)
+    if ratio < 1:
+        new_size = (int(image.width * ratio), int(image.height * ratio))
+        image = image.resize(new_size, Image.Resampling.LANCZOS)
+
+    # 在保存之前转换RGBA为RGB
+    if image.mode == 'RGBA':
+        # 创建白色背景
+        background = Image.new('RGB', image.size, (255, 255, 255))
+        # 将RGBA图片合成到白色背景上
+        background.paste(image, mask=image.split()[3])  # 使用alpha通道作为mask
+        image = background
+
+    buffer = io.BytesIO()
+
+    # 将 'JPG' 转换为 'JPEG'
+    if image_type and image_type.upper() == 'JPG':
+        image_type = 'JPEG'
+    image_type = 'JPEG'
+    # image.save(buffer, format=image_type.upper(), quality=95, optimize=True)
+    image.save(buffer, format=image_type.upper(), quality=85, optimize=True)
+    image_data = base64.b64encode(buffer.getvalue()).decode()
+    return image_data
+
+class Client(object):
+    def __init__(self, lark_host):
+        self._host = lark_host
+
+    def get_tenant_access_token(self, app_id, app_secret):
+        url = self._host+"/open-apis/auth/v3/app_access_token/internal/"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8'
+        }
+        payload = {
+            'app_id': app_id,
+            'app_secret': app_secret
+        }
+        resp = request("POST", url, headers, payload)
+        return resp['tenant_access_token']
+
+    def get_user_access_token(self, tenant_access_token, code):
+        url = self._host+"/open-apis/authen/v1/access_token"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8'
+        }
+        payload = {
+            "grant_type": "authorization_code",
+            "code": code,
+            "app_access_token": tenant_access_token
+        }
+        resp = request("POST", url, headers, payload)
+        return resp['data']['access_token']
+
+    def get_root_folder_token(self, access_token):
+        url = self._host+"/open-apis/drive/explorer/v2/root_folder/meta"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': 'Bearer '+access_token
+        }
+        resp = request("GET", url, headers)
+        return resp['data']['token']
+
+    def create_spreadsheet(self, access_token, foldertoken, title):
+        url =self._host+"/open-apis/sheets/v3/spreadsheets"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': 'Bearer '+access_token
+        }
+        payload={
+            "title": title,
+            "folder_token": foldertoken
+        }
+        resp = request("POST", url, headers, payload)
+        return resp['data']['spreadsheet']['spreadsheet_token'], resp['data']['spreadsheet']['url']
+
+    def get_sheetid(self, access_token, doctoken, sheet_index=0):
+        url = self._host+"/open-apis/sheets/v2/spreadsheets/"+doctoken+"/metainfo"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': 'Bearer '+access_token
+        }
+        resp = request("GET", url, headers)
+        return resp['data']['sheets'][sheet_index]["sheetId"]
+
+    def batch_update_values(self, access_token, doctoken, data):
+        url =self._host+"/open-apis/sheets/v2/spreadsheets/"+doctoken+"/values_batch_update"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': 'Bearer '+access_token
+        }
+        payload=data
+        resp = request("POST", url, headers, payload)
+        return resp['data']['spreadsheetToken']
+
+    def batch_update_styles(self, access_token, doctoken, data):
+        url =self._host+"/open-apis/sheets/v2/spreadsheets/"+doctoken+"/styles_batch_update"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': 'Bearer '+access_token
+        }
+        payload=data
+        resp = request("PUT", url, headers, payload)
+        return resp['data']['spreadsheetToken']
+
+    def add_permissions_member(self, access_token, doctoken, doctype, member_type, member_id, perm):
+        url = self._host+"/open-apis/drive/v1/permissions/"+doctoken+"/members?type="+doctype+"&need_notification=false"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': 'Bearer '+access_token
+        }
+        payload = {
+            "member_type": member_type,
+            "member_id": member_id,
+            "perm": perm
+        }
+        request("POST", url, headers, payload)
+
+    def write_image_to_cell(self, access_token, doctoken, sheetid, img_url, row, col, image_type, compress_image=True):
+        url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/values_image"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': f'Bearer {access_token}'
+        }
+        try:
+            image_data = get_image_data_from_url(img_url)
+        except Exception as e:
+            print(img_url)
+            print(e)
+            return None, None
+        if compress_image:
+            image_data = do_compress_image(image_data, image_type)
+
+        image_name = img_url.split('/')[-1].replace(f'.{image_type}', '')  # 从URL中提取文件名
+        if compress_image:
+            image_type = 'JPEG'
+        cell_start = column_id(col)+str(row)
+        range = f'{sheetid}!{cell_start}:{cell_start}'
+        payload = {
+            "range": range,
+            "image": image_data,
+            "name": f"{image_name}.{image_type}"
+        }
+        try:
+            resp = request("POST", url, headers, payload)
+        except Exception as e:
+            print(img_url)
+            print(image_name)
+            print(image_type)
+            print(e)
+            return None, None
+        return resp['data']['revision'], resp['data']['updateRange']
+
+    def merge_cells(self, access_token, doctoken, sheetid, start_row, end_row, start_col, end_col):
+        print(f"merge  start_row = {start_row} end_row = {end_row} start_col = {start_col} end_col = {end_col}")
+        url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/merge_cells"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': f'Bearer {access_token}'
+        }
+
+        start_col_id = column_id(start_col)
+        end_col_id = column_id(end_col)
+
+        payload = {
+            "range": f"{sheetid}!{start_col_id}{start_row}:{end_col_id}{end_row}",
+            "mergeType": "MERGE_ALL",
+        }
+        try:
+            resp = request("POST", url, headers, payload)
+        except Exception as e:
+            print(e)
+            return None
+        return None
+
+    def write_images_to_cell(self, access_token, doctoken, sheetid, img_url_list, row, col, compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
+        """
+        将多张图片拼接后写入单元格
+        
+        Args:
+            img_url_list: 图片URL列表
+            row: 目标单元格行号
+            col: 目标单元格列号
+            compress_image: 是否压缩图片
+            grid_width: 拼接图片的列数,如果为None则自动计算
+            grid_height: 拼接图片的行数,如果为None则自动计算
+            border_width: 边框宽度,像素
+            border_color: 边框颜色,RGB元组
+        """
+        from PIL import Image, ImageDraw
+        import io
+        import base64
+        import math
+
+        # 下载所有图片
+        images = []
+        for img_url in img_url_list:
+            try:
+                image_type = get_image_type(img_url)
+                if not image_type:
+                    continue
+
+                image_data = get_image_data_from_url(img_url)
+                image = Image.open(io.BytesIO(base64.b64decode(image_data)))
+                images.append(image)
+            except Exception as e:
+                print(f"下载图片失败: {img_url}")
+                print(e)
+                continue
+
+        if not images:
+            return None, None
+
+        # 计算拼接图片的行列数
+        img_count = len(images)
+        if grid_width is None and grid_height is None:
+            # 如果未指定行列数,计算最接近正方形的网格
+            grid_width = math.ceil(math.sqrt(img_count))
+            grid_height = math.ceil(img_count / grid_width)
+        elif grid_width is None:
+            # 如果只指定了行数,计算列数
+            grid_width = math.ceil(img_count / grid_height)
+        elif grid_height is None:
+            # 如果只指定了列数,计算行数
+            grid_height = math.ceil(img_count / grid_width)
+
+        # 确保网格能容纳所有图片
+        while grid_width * grid_height < img_count:
+            if grid_width <= grid_height:
+                grid_width += 1
+            else:
+                grid_height += 1
+
+        # 调整所有图片到相同尺寸,保持原始比例
+        if images:
+            # 计算目标尺寸(使用平均尺寸作为参考)
+            avg_width = sum(img.width for img in images) // len(images)
+            avg_height = sum(img.height for img in images) // len(images)
+            target_size = (avg_width, avg_height)
+            
+            # 调整图片尺寸,保持原始比例
+            resized_images = []
+            for img in images:
+                # 计算保持比例的缩放尺寸
+                img_ratio = img.width / img.height
+                target_ratio = target_size[0] / target_size[1]
+                
+                if img_ratio > target_ratio:
+                    # 图片比目标更宽,以宽度为准
+                    new_width = target_size[0]
+                    new_height = int(target_size[0] / img_ratio)
+                else:
+                    # 图片比目标更高,以高度为准
+                    new_height = target_size[1]
+                    new_width = int(target_size[1] * img_ratio)
+                
+                # 缩放图片,保持比例
+                resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
+                resized_images.append(resized_img)
+            
+            # 创建拼接画布
+            canvas_width = grid_width * avg_width + (grid_width + 1) * border_width
+            canvas_height = grid_height * avg_height + (grid_height + 1) * border_width
+            canvas = Image.new('RGB', (canvas_width, canvas_height), border_color)
+            
+            # 拼接图片
+            for i, img in enumerate(resized_images):
+                row_idx = i // grid_width
+                col_idx = i % grid_width
+                
+                # 计算每个网格单元的位置
+                cell_x = col_idx * avg_width + (col_idx + 1) * border_width
+                cell_y = row_idx * avg_height + (row_idx + 1) * border_width
+                
+                # 在网格单元中居中放置图片
+                center_x = cell_x + (avg_width - img.width) // 2
+                center_y = cell_y + (avg_height - img.height) // 2
+                
+                canvas.paste(img, (center_x, center_y))
+            
+            # 将拼接后的图片转换为base64
+            output = io.BytesIO()
+            if compress_image:
+                canvas.save(output, format='JPEG', quality=85)
+                image_type = 'JPEG'
+            else:
+                canvas.save(output, format='PNG')
+                image_type = 'PNG'
+            
+            output.seek(0)
+            image_data = base64.b64encode(output.getvalue()).decode()
+            
+            # 调用写入图片的API
+            url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/values_image"
+            headers = {
+                'Content-Type': 'application/json; charset=utf-8',
+                'Authorization': f'Bearer {access_token}'
+            }
+            
+            cell_start = column_id(col) + str(row)
+            range_val = f'{sheetid}!{cell_start}:{cell_start}'
+            payload = {
+                "range": range_val,
+                "image": image_data,
+                "name": f"combined_image.{image_type}"
+            }
+            
+            try:
+                resp = request("POST", url, headers, payload)
+                return resp['data']['revision'], resp['data']['updateRange']
+            except Exception as e:
+                print(f"写入拼接图片失败: {e}")
+                return None, None
+        
+        return None, None
+
+    def read_range_values(self, access_token, doctoken, range_val):
+        """
+        读取指定范围的数据
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token
+            range_val: 范围,格式如 "Sheet1!A1:C10"
+            
+        Returns:
+            读取到的数据列表
+        """
+        url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/values/{range_val}"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': f'Bearer {access_token}'
+        }
+        
+        try:
+            resp = request("GET", url, headers)
+            return resp['data']['valueRange']['values']
+        except Exception as e:
+            print(f"读取数据失败: {e}")
+            return []
+
+    def prepend_data(self, access_token, doctoken, range_val, values):
+        """
+        在指定位置前面插入数据
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token
+            range_val: 插入范围,格式如 "Sheet1!A1:C1"
+            values: 要插入的数据
+            
+        Returns:
+            操作结果
+        """
+        url = f"{self._host}/open-apis/sheets/v3/spreadsheets/{doctoken}/sheets/{range_val.split('!')[0]}/prepend"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': f'Bearer {access_token}'
+        }
+        
+        # 从range_val中提取行数
+        range_part = range_val.split('!')[1]  # 如 "A1:Z1"
+        start_cell = range_part.split(':')[0]  # 如 "A1"
+        
+        payload = {
+            "values": values
+        }
+        
+        try:
+            resp = request("POST", url, headers, payload)
+            return resp
+        except Exception as e:
+            print(f"插入数据失败: {e}")
+            return None
+
+    def insert_data_at_row(self, access_token, doctoken, sheetid, row, values):
+        """
+        在指定行插入数据(使用批量更新方式)
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token  
+            sheetid: 工作表ID
+            row: 目标行号
+            values: 要插入的数据
+            
+        Returns:
+            操作结果
+        """
+        # 使用批量更新的方式插入数据
+        cols = len(values[0]) if values else 1
+        end_col = column_id(cols)
+        range_val = f"{sheetid}!A{row}:{end_col}{row}"
+        
+        body = {
+            "valueRanges": [
+                {
+                    "range": range_val,
+                    "values": values
+                }
+            ]
+        }
+        
+        try:
+            result = self.batch_update_values(access_token, doctoken, body)
+            return result
+        except Exception as e:
+            print(f"插入数据到第{row}行失败: {e}")
+            return None
+
+    def insert_rows_before(self, access_token, doctoken, sheetid, row_index, count=1):
+        """
+        在指定行前插入新行(基于飞书官方API)
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token
+            sheetid: 工作表ID
+            row_index: 插入位置的行号(从1开始,在此行前插入)
+            count: 插入行数(默认1行)
+            
+        Returns:
+            操作结果
+        """
+        # 先获取工作表信息,检查当前行数
+        sheet_props = self.get_sheet_properties(access_token, doctoken, sheetid)
+        if not sheet_props:
+            print("无法获取工作表信息,尝试直接插入")
+            current_row_count = 1000  # 默认值
+        else:
+            current_row_count = sheet_props['row_count']
+            print(f"当前工作表行数: {current_row_count}")
+        
+        # 如果要插入的位置超过了当前行数,使用追加模式
+        if row_index > current_row_count:
+            print(f"插入位置({row_index})超过当前行数({current_row_count}),使用追加模式")
+            # 使用追加方式在末尾添加行
+            return self.append_empty_rows(access_token, doctoken, sheetid, count)
+        
+        url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/insert_dimension_range"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': f'Bearer {access_token}'
+        }
+        
+        # 转换为0基索引:row_index=3表示第3行,对应startIndex=2
+        start_index = row_index - 1  # 从0开始计数
+        end_index = start_index + count  # 结束位置(不包含)
+        
+        # 确保 endIndex 不超过当前工作表的行数限制
+        if end_index > current_row_count:
+            print(f"警告:计算的endIndex({end_index})超过当前行数({current_row_count}),调整为追加模式")
+            return self.append_empty_rows(access_token, doctoken, sheetid, count)
+        
+        # 智能选择继承样式:插入第2行时继承后面的数据行样式,其他情况继承前面的样式
+        inherit_style = "AFTER" if row_index == 2 else "BEFORE"
+        
+        payload = {
+            "dimension": {
+                "sheetId": sheetid,
+                "majorDimension": "ROWS",
+                "startIndex": start_index,  # 从0开始计数
+                "endIndex": end_index  # 结束位置(不包含此行)
+            },
+            "inheritStyle": inherit_style  # 智能继承样式
+        }
+        
+        try:
+            resp = request("POST", url, headers, payload)
+            print(f"在第{row_index}行前成功插入{count}行(startIndex={start_index}, endIndex={end_index}, inheritStyle={inherit_style})")
+            return resp
+        except Exception as e:
+            print(f"在第{row_index}行前插入{count}行失败: {e}")
+            # 如果插入失败,尝试追加模式
+            print("尝试使用追加模式...")
+            return self.append_empty_rows(access_token, doctoken, sheetid, count)
+
+    def insert_row_with_images(self, access_token, doctoken, sheetid, row, values, compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
+        """
+        在指定行插入数据并同时处理图片写入(覆盖方式)
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token  
+            sheetid: 工作表ID
+            row: 目标行号
+            values: 要插入的数据
+            compress_image: 是否压缩图片
+            grid_width: 拼接图片的列数
+            grid_height: 拼接图片的行数
+            border_width: 边框宽度
+            border_color: 边框颜色
+            
+        Returns:
+            操作结果
+        """
+        # 1. 先插入文本数据(覆盖指定行)
+        result = self.insert_data_at_row(access_token, doctoken, sheetid, row, values)
+        
+        if not result:
+            return None
+            
+        # 2. 同时处理图片写入
+        if values and len(values) > 0:
+            row_data = values[0]
+            for col_index, cell in enumerate(row_data, start=1):
+                if is_image_list_cell_url(cell):
+                    # 处理图片列表
+                    try:
+                        img_urls = eval(cell)
+                        self.write_images_to_cell(access_token, doctoken, sheetid, img_urls, row, col_index, compress_image, grid_width, grid_height, border_width, border_color)
+                    except Exception as e:
+                        print(f"写入图片列表失败 (第{row}行第{col_index}列): {e}")
+                elif is_image_cell(cell):
+                    # 处理单张图片
+                    image_type = get_image_type(cell)
+                    if image_type:
+                        try:
+                            self.write_image_to_cell(access_token, doctoken, sheetid, cell, row, col_index, image_type, compress_image)
+                        except Exception as e:
+                            print(f"写入单张图片失败 (第{row}行第{col_index}列): {e}")
+        
+        return result
+
+    def update_specific_fields(self, access_token, doctoken, sheetid, row, field_updates, headers=None):
+        """
+        只更新指定字段,其他字段保持不变
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token  
+            sheetid: 工作表ID
+            row: 目标行号(从1开始)
+            field_updates: 字段更新字典,格式如 {"列名": "新值", "列B": "新值B"} 
+                          或者 {列索引: "新值", 2: "新值B"}(从1开始计数)
+            headers: 表头列表,用于列名到列索引的映射。如果为None,则field_updates的key必须是列索引
+            
+        Returns:
+            操作结果
+        """
+        try:
+            # 如果提供了headers且field_updates的key是列名,则转换为列索引
+            if headers and field_updates:
+                column_updates = {}
+                for field_name, value in field_updates.items():
+                    if isinstance(field_name, str):  # 如果是列名
+                        try:
+                            col_index = headers.index(field_name) + 1  # 转为1基索引
+                            column_updates[col_index] = value
+                        except ValueError:
+                            print(f"警告:找不到列名 '{field_name}',跳过更新")
+                            continue
+                    else:  # 如果已经是列索引
+                        column_updates[field_name] = value
+            else:
+                column_updates = field_updates
+            
+            # 构建批量更新请求
+            value_ranges = []
+            for col_index, value in column_updates.items():
+                col_letter = column_id(col_index)
+                range_val = f"{sheetid}!{col_letter}{row}:{col_letter}{row}"
+                value_ranges.append({
+                    "range": range_val,
+                    "values": [[value]]
+                })
+            
+            body = {
+                "valueRanges": value_ranges
+            }
+            
+            result = self.batch_update_values(access_token, doctoken, body)
+            
+            if result:
+                updated_fields = list(column_updates.keys())
+                print(f"成功更新第{row}行的字段: {updated_fields}")
+            
+            return result
+        except Exception as e:
+            print(f"更新第{row}行指定字段失败: {e}")
+            return None
+
+    def update_row_with_specific_fields_and_images(self, access_token, doctoken, sheetid, row, field_updates, headers=None, compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
+        """
+        更新指定字段并处理图片
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token  
+            sheetid: 工作表ID
+            row: 目标行号
+            field_updates: 字段更新字典
+            headers: 表头列表
+            compress_image: 是否压缩图片
+            grid_width: 拼接图片的列数
+            grid_height: 拼接图片的行数
+            border_width: 边框宽度
+            border_color: 边框颜色
+            
+        Returns:
+            操作结果
+        """
+        # 1. 先更新文本数据
+        result = self.update_specific_fields(access_token, doctoken, sheetid, row, field_updates, headers)
+        
+        if not result:
+            return None
+        
+        # 2. 处理图片写入
+        column_updates = {}
+        if headers and field_updates:
+            for field_name, value in field_updates.items():
+                if isinstance(field_name, str):  # 如果是列名
+                    try:
+                        col_index = headers.index(field_name) + 1
+                        column_updates[col_index] = value
+                    except ValueError:
+                        continue
+                else:  # 如果已经是列索引
+                    column_updates[field_name] = value
+        else:
+            column_updates = field_updates
+        
+        for col_index, cell in column_updates.items():
+            if is_image_list_cell_url(cell):
+                # 处理图片列表
+                try:
+                    img_urls = eval(cell)
+                    self.write_images_to_cell(access_token, doctoken, sheetid, img_urls, row, col_index, compress_image, grid_width, grid_height, border_width, border_color)
+                except Exception as e:
+                    print(f"写入图片列表失败 (第{row}行第{col_index}列): {e}")
+            elif is_image_cell(cell):
+                # 处理单张图片
+                image_type = get_image_type(cell)
+                if image_type:
+                    try:
+                        self.write_image_to_cell(access_token, doctoken, sheetid, cell, row, col_index, image_type, compress_image)
+                    except Exception as e:
+                        print(f"写入单张图片失败 (第{row}行第{col_index}列): {e}")
+        
+        return result
+
+    def insert_row_with_data_at_position(self, access_token, doctoken, sheetid, row_position, values, compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
+        """
+        在指定位置真正插入新行并填入数据
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token  
+            sheetid: 工作表ID
+            row_position: 插入位置(从1开始,在此行前插入)
+            values: 要插入的数据
+            compress_image: 是否压缩图片
+            grid_width: 拼接图片的列数
+            grid_height: 拼接图片的行数
+            border_width: 边框宽度
+            border_color: 边框颜色
+            
+        Returns:
+            操作结果
+        """
+        # 获取当前工作表行数
+        sheet_props = self.get_sheet_properties(access_token, doctoken, sheetid)
+        current_row_count = sheet_props['row_count'] if sheet_props else 1
+        
+        # 1. 先插入空行
+        insert_result = self.insert_rows_before(access_token, doctoken, sheetid, row_position, 1)
+        
+        if not insert_result:
+            print(f"插入空行失败,无法在第{row_position}行插入数据")
+            return None
+        
+        # 如果是追加模式(插入位置超过了原有行数),实际数据位置是当前行数+1
+        actual_row_position = row_position
+        if row_position > current_row_count:
+            actual_row_position = current_row_count + 1
+            print(f"追加模式:实际数据插入位置调整为第{actual_row_position}行")
+            
+        # 2. 再在新插入的行中填入数据
+        result = self.insert_data_at_row(access_token, doctoken, sheetid, actual_row_position, values)
+        
+        if not result:
+            print(f"插入数据失败")
+            return None
+            
+        # 3. 同时处理图片写入
+        if values and len(values) > 0:
+            row_data = values[0]
+            for col_index, cell in enumerate(row_data, start=1):
+                if is_image_list_cell_url(cell):
+                    # 处理图片列表
+                    try:
+                        img_urls = eval(cell)
+                        self.write_images_to_cell(access_token, doctoken, sheetid, img_urls, actual_row_position, col_index, compress_image, grid_width, grid_height, border_width, border_color)
+                    except Exception as e:
+                        print(f"写入图片列表失败 (第{actual_row_position}行第{col_index}列): {e}")
+                elif is_image_cell(cell):
+                    # 处理单张图片
+                    image_type = get_image_type(cell)
+                    if image_type:
+                        try:
+                            self.write_image_to_cell(access_token, doctoken, sheetid, cell, actual_row_position, col_index, image_type, compress_image)
+                        except Exception as e:
+                            print(f"写入单张图片失败 (第{actual_row_position}行第{col_index}列): {e}")
+        
+        return result
+
+    def get_sheet_info(self, access_token, doctoken, sheetid):
+        """
+        获取工作表的基础信息
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token
+            sheetid: 工作表ID
+            
+        Returns:
+            工作表信息,包含行数、列数等
+        """
+        url = f"{self._host}/open-apis/sheets/v3/spreadsheets/{doctoken}/sheets/{sheetid}"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': f'Bearer {access_token}'
+        }
+        
+        try:
+            resp = request("GET", url, headers)
+            return resp['data']['sheet']
+        except Exception as e:
+            print(f"获取工作表信息失败: {e}")
+            return None
+
+    def get_sheet_properties(self, access_token, doctoken, sheetid):
+        """
+        获取工作表属性,包括行数和列数
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token
+            sheetid: 工作表ID
+            
+        Returns:
+            dict: 包含 row_count, column_count 等信息
+        """
+        sheet_info = self.get_sheet_info(access_token, doctoken, sheetid)
+        if sheet_info:
+            grid_properties = sheet_info.get('grid_properties', {})
+            return {
+                'row_count': grid_properties.get('row_count', 0),
+                'column_count': grid_properties.get('column_count', 0),
+                'title': sheet_info.get('title', ''),
+                'sheet_id': sheet_info.get('sheet_id', ''),
+                'sheet_type': sheet_info.get('sheet_type', '')
+            }
+        return None
+
+    def append_data(self, access_token, doctoken, range_val, values):
+        """
+        在指定位置后面追加数据
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token
+            range_val: 追加范围,格式如 "Sheet1!A1:C1"
+            values: 要追加的数据
+            
+        Returns:
+            操作结果
+        """
+        url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/values_append"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': f'Bearer {access_token}'
+        }
+        
+        payload = {
+            "valueRange": {
+                "range": range_val,
+                "values": values
+            }
+        }
+        
+        try:
+            resp = request("POST", url, headers, payload)
+            return resp
+        except Exception as e:
+            print(f"追加数据失败: {e}")
+            return None
+
+    def delete_rows(self, access_token, doctoken, sheetid, start_row, end_row):
+        """
+        删除指定范围的行
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token
+            sheetid: 工作表ID
+            start_row: 开始行号(从1开始)
+            end_row: 结束行号(从1开始,包含)
+            
+        Returns:
+            操作结果
+        """
+        url = f"{self._host}/open-apis/sheets/v2/spreadsheets/{doctoken}/dimension_range"
+        headers = {
+            'Content-Type': 'application/json; charset=utf-8',
+            'Authorization': f'Bearer {access_token}'
+        }
+        
+        payload = {
+            "dimension": {
+                "sheetId": sheetid,
+                "majorDimension": "ROWS",
+                "startIndex": start_row,  # 从1开始计数,包含
+                "endIndex": end_row       # 从1开始计数,包含
+            }
+        }
+        
+        try:
+            resp = request("DELETE", url, headers, payload)
+            return resp
+        except Exception as e:
+            print(f"删除第{start_row}-{end_row}行失败: {e}")
+            return None
+
+    def delete_single_row(self, access_token, doctoken, sheetid, row):
+        """
+        删除单行
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token
+            sheetid: 工作表ID
+            row: 行号(从1开始)
+            
+        Returns:
+            操作结果
+        """
+        return self.delete_rows(access_token, doctoken, sheetid, row, row)
+
+    def append_empty_rows(self, access_token, doctoken, sheetid, count=1):
+        """
+        在工作表末尾追加空行
+        
+        Args:
+            access_token: 访问令牌
+            doctoken: 表格token
+            sheetid: 工作表ID
+            count: 追加行数(默认1行)
+            
+        Returns:
+            操作结果
+        """
+        # 获取当前工作表信息
+        sheet_props = self.get_sheet_properties(access_token, doctoken, sheetid)
+        if not sheet_props:
+            print("无法获取工作表信息,追加失败")
+            return None
+        
+        current_row_count = sheet_props['row_count']
+        current_col_count = sheet_props['column_count']
+        
+        print(f"在工作表末尾追加{count}行,当前行数: {current_row_count}")
+        
+        # 构造空数据行
+        empty_values = [[''] * max(current_col_count, 1) for _ in range(count)]
+        
+        # 使用append_data在末尾追加
+        range_val = f"{sheetid}!A{current_row_count + 1}:{column_id(max(current_col_count, 1))}{current_row_count + count}"
+        
+        try:
+            result = self.append_data(access_token, doctoken, range_val, empty_values)
+            if result:
+                print(f"成功在末尾追加{count}行空行")
+            return result
+        except Exception as e:
+            print(f"追加空行失败: {e}")
+            return None
+
+# -*- coding: UTF-8 -*-
+import json
+import logging
+from datetime import datetime
+import re
+import os
+import requests
+from urllib.parse import urlparse
+
+LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
+logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)
+
+import os
+logging.info(os.getcwd())
+
+def column_id(col):
+    '''column int to string id'''
+    ans = ""
+    i = col
+    while i > 0:
+        m = int((i-1) % 26)
+        i = int((i-1) / 26)
+        ans = chr(m+65) + ans
+    return ans
+
+def get_image_type(url):
+    '''根据图片URL获取图片类型'''
+    try:
+        # 发送 HEAD 请求以获取头信息
+        path = urlparse(url).path
+        ext = path.split('.')[-1].lower()
+        if ext in ['jpg', 'jpeg', 'png', 'gif']:
+            return ext
+        ext = 'jpeg'
+        if 'jpg' in url:
+            ext = 'jpg'
+        if 'jpeg' in url:
+            ext = 'jpeg'
+        if 'png' in url:
+            ext = 'png'
+        if 'gif' in url:
+            ext = 'gif'
+        if "webp" in url:
+            ext = "webp"
+        # 如果无法确定类型,返回 None
+        return ext
+    except Exception as e:
+        print(f"获取图片类型时出错: {str(e)}")
+        return None
+
+def is_image_cell(cell):
+    # 判断是否包含中文字符
+    if isinstance(cell, str):
+        for char in cell:
+            if '\u4e00' <= char <= '\u9fff':
+                return False
+    is_image = False
+    if (
+        isinstance(cell, str) and
+        cell.startswith('http') and
+        (
+            re.match(r'https?://.+\.(jpg|jpeg|png|gif|webp).*', cell, re.I) or re.match(r'http?://.+\.(jpg|jpeg|png|gif|webp).*', cell, re.I) or
+            ('xhscdn.com' in cell and 'format/jpg' in cell) or
+            ('rescdn.yishihui.com' in cell and 'jpg' in cell) or
+            'sns-webpic-qc.xhscdn.com' in cell or 'ci.xiaohongshu.com' in cell
+        )
+    ):
+        is_image = True
+    return is_image
+
+def is_image_list_cell_url(cell):
+    if isinstance(cell, str)  and cell.strip() and cell[0] == '[' and cell[-1] == ']':
+        try:
+            cell_obj = eval(cell)
+        except:
+            return False
+        if type(cell_obj) == list:
+            for c in cell_obj:
+                if not is_image_cell(c):
+                    return False
+            return True
+    return False
+
+def write_images(client, access_token, token, sheetid, data, start_row=1, start_col=1, skip_col=[], compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
+    '''将图片URL写入单元格'''
+    for row_index, row in enumerate(data, start=1):
+        if row_index < start_row:
+            print(f"跳过行: {row_index}")
+            continue
+        for col_index, cell in enumerate(row, start=1):
+            # if cell is not None and "http" in cell and is_image_cell(cell) is False:
+            #     print(f"is_image_cell = {is_image_cell(cell)}, {cell}")
+            if col_index < start_col:
+                continue
+            if col_index in skip_col:
+                continue
+            if is_image_list_cell_url(cell):
+                # print(f"is_image_list_cell_url = True , {cell}")
+                client.write_images_to_cell(access_token, token, sheetid, eval(cell), row_index, col_index, compress_image, grid_width, grid_height, border_width, border_color)
+            elif is_image_cell(cell):
+                image_type = get_image_type(cell)
+                if image_type:
+                    client.write_image_to_cell(access_token, token, sheetid, cell, row_index, col_index,image_type, compress_image)
+
+def merge_cells(client, access_token, token, sheetid, data ):
+    row_cnt = len(data)
+    col_cnt = len(data[0])
+
+    for col in range(0,col_cnt):
+        previous_row = 0
+        previous_value = None
+        for row in range(0,row_cnt):
+            cell_value = data[row][col]
+
+            if cell_value != previous_value :
+                if row - previous_row > 1:
+                    client.merge_cells(access_token, token, sheetid, previous_row+1, row, col+1, col+1)
+                previous_row = row
+                previous_value= cell_value
+
+
+def pack_data(data, sheetid, start_row=1, start_col=1):
+    rows = len(data)
+    cols = len(data[0])
+    range1 = f"{sheetid}!{column_id(start_col)}{start_row}:{column_id(cols)}{rows}"
+    body = {
+        "valueRanges": [
+            {
+                "range": range1,
+                "values": []
+            },
+        ]
+    }
+    print(range1)
+    for d in data[start_row-1:]:
+        row = []
+        for c in d[start_col-1:]:
+            row.append(c)
+        body["valueRanges"][0]["values"].append(row)
+    return body
+
+def write_data_to_sheet(data, sheet_token='IoTOsjZ4khIqlOtTxnec8oTbn7c', sheetid=None, skip_text=False, skip_images=False, start_row=1, start_col=1, skip_col=[], compress_image=True, grid_width=None, grid_height=None, border_width=3, border_color=(200, 200, 200)):
+    '''测试函数'''
+    # 初始化 API 客户端
+    client = Client(LARK_HOST)
+
+    # 获取租户访问令牌
+    access_token = client.get_tenant_access_token(APP_ID, APP_SECRET)
+
+    # 获取第一个 sheet_id
+    if sheetid is None:
+        sheetid = client.get_sheetid(access_token, sheet_token)
+    print(f"Sheet ID: {sheetid}")
+
+
+    # 构建并写入测试数据
+    body = pack_data(data,
+                     sheetid, start_row=start_row, start_col=start_col)
+    if not skip_text:
+        client.batch_update_values(access_token, sheet_token, body)
+
+    # merge_cells(client, access_token, sheet_token, sheetid, data)
+
+    # 写入图片
+    if not skip_images:
+        write_images(client, access_token, sheet_token, sheetid, data, start_row=start_row, start_col=start_col, skip_col=skip_col, compress_image=compress_image, grid_width=grid_width, grid_height=grid_height, border_width=border_width, border_color=border_color)
+
+
+def get_test_data():
+    data = [
+        ["标题1", "标题2", "标题3", "图片"],
+        [1, 2,2, "http://sns-webpic.xhscdn.com/1040g2sg316vc6tdrk4705o8h0c2095f1else4i8?imageView2/2/w/0/format/jpg/v3"],
+        [4, "https://cdn.midjourney.com/f78df4d5-9b8b-4ec7-ae34-5cc04d176f87/0_0.png", 6, "dd"],
+        # [7, 8, 9,  "https://sns-webpic.xhscdn.com/1040g2sg317l7814ck4705n3aa5ik4jgjahhcam0?imageView2/2/w/0/format/jpg/v3"],
+    ]
+    return data
+
+from typing import List, Dict
+import pandas as pd
+import json
+def to_feishu(
+    res_list: List[Dict], 
+    sheet_id: str = 'Qn9MAs',
+    sheet_token: str = 'Rbsysi6FChzCp7tfv19crkWNnEb',
+    start_row: int = 1,
+    start_col: int = 1,
+    grid_width: int = None,
+    grid_height: int = None,
+    border_width: int = 3,
+    border_color: tuple = (200, 200, 200),
+) -> None:
+    """
+    将数据导出到飞书表格
+    
+    Args:
+        res_list: 数据列表
+        sheet_id: 表格ID
+        sheet_token: 表格token
+        start_row: 起始行
+        start_col: 起始列
+        grid_width: 拼接图片的列数,如果为None则自动计算
+        grid_height: 拼接图片的行数,如果为None则自动计算
+        border_width: 边框宽度,像素
+        border_color: 边框颜色,RGB元组
+    """
+    from tqdm import tqdm
+    
+    def truncate_by_bytes(text, max_bytes=450000):
+        """按字节长度截断文本"""
+        if not text:
+            return ""
+        text_str = str(text)
+        encoded = text_str.encode('utf-8')
+        if len(encoded) <= max_bytes:
+            return text_str
+        # 安全截断,避免截断多字节字符
+        truncated = encoded[:max_bytes]
+        while len(truncated) > 0:
+            try:
+                return truncated.decode('utf-8') + "...[已截断]"
+            except UnicodeDecodeError:
+                truncated = truncated[:-1]
+        return ""
+    
+    res_new_v4 = []
+    for row in tqdm(res_list):
+        if not row:
+            continue
+        for k, v in row.items():
+            if isinstance(v, list):
+                if len(v) > 0 and v[0] and v[0].startswith('http'):
+                    row[k] = truncate_by_bytes(str(v))
+                else:
+                    json_str = json.dumps(v, ensure_ascii=False, separators=(',', ':'))
+                    row[k] = truncate_by_bytes(json_str)
+            elif isinstance(v, dict):
+                json_str = json.dumps(v, ensure_ascii=False, indent=2)
+                row[k] = truncate_by_bytes(json_str)
+            else:
+                row[k] = truncate_by_bytes(v)
+        res_new_v4.append(row)
+    df = pd.DataFrame(res_new_v4)
+    df.fillna('', inplace=True)
+    header = df.columns.tolist()
+    data_rows = df.values.tolist()
+    data_with_header = [header] + data_rows
+    
+    write_data_to_sheet(
+        data_with_header, 
+        sheet_token=sheet_token, 
+        sheetid=sheet_id, 
+        start_col=start_col,
+        start_row=start_row,
+        grid_width=grid_width,
+        grid_height=grid_height,
+        border_width=border_width,
+        border_color=border_color,
+    )
+
+def to_feishu_incremental(
+    res_list: List[Dict],
+    sort_field: str = '内容ID',
+    sheet_id: str = 'Qn9MAs', 
+    sheet_token: str = 'Rbsysi6FChzCp7tfv19crkWNnEb',
+    unique_field: str = None,  # 用于去重的唯一字段,默认使用sort_field
+    duplicate_strategy: str = 'skip',  # 重复数据处理策略:'skip'跳过, 'delete'删除后插入, 'update'更新
+    update_fields: List[str] = None,  # 当duplicate_strategy='update'时,指定要更新的字段列表。None表示更新所有字段
+    cleanup_duplicates: bool = True,  # 是否先清理现有表格中的重复数据
+    keep_first: bool = True,  # 清理重复数据时保留第一个(True)还是最后一个(False)
+    sort_ascending: bool = False,  # 排序顺序:True为升序(从小到大),False为降序(从大到小)
+    grid_width: int = None,
+    grid_height: int = None,
+    border_width: int = 3,
+    border_color: tuple = (200, 200, 200),
+) -> None:
+    """
+    逐行增量插入数据到飞书表格,按指定字段查找插入位置
+    
+    Args:
+        res_list: 数据列表
+        sort_field: 用于排序的字段名,如 '内容ID'
+        sheet_id: 表格ID  
+        sheet_token: 表格token
+        unique_field: 用于去重的唯一字段,默认使用sort_field
+        duplicate_strategy: 重复数据处理策略
+            - 'skip': 跳过重复数据(默认)
+            - 'delete': 删除重复数据后插入新数据
+            - 'update': 更新重复数据的指定字段
+        update_fields: 当duplicate_strategy='update'时,指定要更新的字段列表
+            - None: 更新所有字段(除了unique_field)
+            - ['字段1', '字段2']: 只更新指定的字段
+        cleanup_duplicates: 是否先清理现有表格中的重复数据
+        keep_first: 清理重复数据时保留第一个(True)还是最后一个(False)
+        sort_ascending: 排序顺序,True为升序(从小到大),False为降序(从大到小),默认False
+        grid_width: 拼接图片的列数,如果为None则自动计算
+        grid_height: 拼接图片的行数,如果为None则自动计算
+        border_width: 边框宽度,像素
+        border_color: 边框颜色,RGB元组
+    """
+    from tqdm import tqdm
+    import pandas as pd
+    import json
+    from typing import List
+    
+    def truncate_by_bytes(text, max_bytes=450000):
+        """按字节长度截断文本"""
+        if not text:
+            return ""
+        text_str = str(text)
+        encoded = text_str.encode('utf-8')
+        if len(encoded) <= max_bytes:
+            return text_str
+        # 安全截断,避免截断多字节字符
+        truncated = encoded[:max_bytes]
+        while len(truncated) > 0:
+            try:
+                return truncated.decode('utf-8') + "...[已截断]"
+            except UnicodeDecodeError:
+                truncated = truncated[:-1]
+        return ""
+    
+    # 初始化 API 客户端
+    client = Client(LARK_HOST)
+    access_token = client.get_tenant_access_token(APP_ID, APP_SECRET)
+    
+    # 设置去重字段,默认使用排序字段
+    if unique_field is None:
+        unique_field = sort_field
+    
+    # 1. 获取工作表基础信息
+    print("正在获取工作表信息...")
+    sheet_props = client.get_sheet_properties(access_token, sheet_token, sheet_id)
+    
+    if not sheet_props:
+        print("获取工作表信息失败,使用默认范围")
+        max_col = 'ZZ'
+        max_row = 1000
+    else:
+        print(f"工作表信息: 行数={sheet_props['row_count']}, 列数={sheet_props['column_count']}")
+        max_col = column_id(sheet_props['column_count']) if sheet_props['column_count'] > 0 else 'ZZ'
+        max_row = sheet_props['row_count'] if sheet_props['row_count'] > 0 else 1000
+    
+    # 2. 读取表头(使用精确范围)
+    print("正在读取表头...")
+    header_range = f"{sheet_id}!A1:{max_col}1"  # 表头总是从A列开始读取
+    header_data = client.read_range_values(access_token, sheet_token, header_range)
+    
+    if not header_data or not header_data[0] or all(not cell.strip() for cell in header_data[0] if cell):
+        print("表格为空,需要根据数据创建表头")
+        # 从第一条数据中提取字段名作为表头
+        if not res_list or not res_list[0]:
+            print("错误:无法从空数据中创建表头")
+            return
+        
+        # 提取字段名
+        headers = list(res_list[0].keys())
+        print(f"创建表头: {headers}")
+        
+        # 写入表头(表头不包含图片,使用普通插入即可)
+        header_range = f"{sheet_id}!A1:{column_id(len(headers))}1"
+        client.insert_data_at_row(access_token, sheet_token, sheet_id, 1, [headers])
+        
+        # 表头创建后,从第二行开始插入数据
+        print("表头创建完成,开始插入数据...")
+    else:
+        # 解析现有表头
+        headers = [cell.strip() for cell in header_data[0] if cell is not None]
+        headers = [h for h in headers if h]  # 移除空字段
+        print(f"读取到现有表头: {headers}")
+    
+    # 检查排序字段和去重字段是否存在
+    if sort_field not in headers:
+        print(f"警告: 排序字段 '{sort_field}' 未在表头中找到。可用字段: {headers}")
+        # 如果找不到排序字段,就直接追加到末尾
+        # 使用工作表信息中的行数,或从第二行开始(如果刚创建了表头)
+        start_row = len(headers) + 1 if 'headers' in locals() else (max_row + 1 if sheet_props else 2)
+        to_feishu(res_list, sheet_id, sheet_token, start_row, 1, grid_width, grid_height, border_width, border_color)
+        return
+    
+    if unique_field not in headers:
+        print(f"警告: 去重字段 '{unique_field}' 未在表头中找到,将使用排序字段 '{sort_field}' 进行去重")
+        unique_field = sort_field
+    
+    sort_field_index = headers.index(sort_field)
+    sort_field_col = column_id(sort_field_index + 1)  # 转换为列标识符,如A, B, C...
+    
+    unique_field_index = headers.index(unique_field)
+    unique_field_col = column_id(unique_field_index + 1)  # 转换为列标识符,如A, B, C...
+    
+    # 3. 读取排序字段和去重字段的数据
+    print(f"正在读取排序字段 '{sort_field}' 和去重字段 '{unique_field}' 列数据...")
+    
+    # 读取排序字段数据
+    sort_data_range = f"{sheet_id}!{sort_field_col}2:{sort_field_col}{max_row}"
+    all_sort_data = client.read_range_values(access_token, sheet_token, sort_data_range)
+    
+    # 读取去重字段数据(如果与排序字段不同)
+    if unique_field != sort_field:
+        unique_data_range = f"{sheet_id}!{unique_field_col}2:{unique_field_col}{max_row}"
+        all_unique_data = client.read_range_values(access_token, sheet_token, unique_data_range)
+    else:
+        all_unique_data = all_sort_data
+    
+    # 先清理空白行(排序字段和去重字段都为空的行)
+    print("检查并清理空白行...")
+    empty_rows_to_delete = []
+    
+    if all_unique_data and all_sort_data:
+        for i in range(min(len(all_unique_data), len(all_sort_data))):
+            unique_row = all_unique_data[i] if i < len(all_unique_data) else None
+            sort_row = all_sort_data[i] if i < len(all_sort_data) else None
+            
+            # 检查去重字段值
+            unique_value = ""
+            if unique_row and len(unique_row) > 0 and unique_row[0]:
+                unique_value = str(unique_row[0]).strip()
+            
+            # 检查排序字段值
+            sort_value = ""
+            if sort_row and len(sort_row) > 0 and sort_row[0]:
+                sort_value = str(sort_row[0]).strip()
+            
+            # 如果排序字段和去重字段都为空,标记为空白行
+            if not unique_value and not sort_value:
+                row_number = i + 2  # +2 因为从第2行开始,且行号从1开始
+                empty_rows_to_delete.append(row_number)
+                print(f"标记删除空白行: 第{row_number}行")
+    
+    # 删除空白行
+    if empty_rows_to_delete:
+        print(f"开始删除 {len(empty_rows_to_delete)} 个空白行...")
+        # 按行号倒序删除,避免删除后行号变化的问题
+        empty_rows_to_delete.sort(reverse=True)
+        
+        for row_to_delete in empty_rows_to_delete:
+            delete_result = client.delete_single_row(access_token, sheet_token, sheet_id, row_to_delete)
+            if delete_result:
+                print(f"成功删除空白行: 第{row_to_delete}行")
+            else:
+                print(f"删除空白行失败: 第{row_to_delete}行")
+        
+        # 重新读取数据(删除后数据已经改变)
+        print("重新读取数据(清理空白行后)...")
+        # 重新读取排序字段数据
+        sort_data_range = f"{sheet_id}!{sort_field_col}2:{sort_field_col}{max_row}"
+        all_sort_data = client.read_range_values(access_token, sheet_token, sort_data_range)
+        
+        # 重新读取去重字段数据
+        if unique_field != sort_field:
+            unique_data_range = f"{sheet_id}!{unique_field_col}2:{unique_field_col}{max_row}"
+            all_unique_data = client.read_range_values(access_token, sheet_token, unique_data_range)
+        else:
+            all_unique_data = all_sort_data
+    
+    # 构建现有数据的去重集合
+    duplicate_rows_to_delete = []
+    
+    if cleanup_duplicates and all_unique_data:
+        # 先分析重复数据
+        seen_unique_values = {}  # 记录已见过的唯一值和对应行号
+        actual_data_rows = []  # 记录实际有数据的行号
+        
+        print(f"开始分析重复数据,总共读取了 {len(all_unique_data)} 行数据")
+        
+        # 先找出所有有效数据行及其对应的实际行号(必须同时有排序字段和去重字段的值)
+        for i in range(min(len(all_unique_data), len(all_sort_data) if all_sort_data else 0)):
+            unique_row = all_unique_data[i] if i < len(all_unique_data) else None
+            sort_row = all_sort_data[i] if i < len(all_sort_data) else None
+            
+            # 检查去重字段值
+            unique_value = ""
+            if unique_row and len(unique_row) > 0 and unique_row[0]:
+                unique_value = str(unique_row[0]).strip()
+            
+            # 检查排序字段值
+            sort_value = ""
+            if sort_row and len(sort_row) > 0 and sort_row[0]:
+                sort_value = str(sort_row[0]).strip()
+            
+            # 只有当排序字段和去重字段都有值时,才认为是有效数据
+            if unique_value and sort_value:
+                actual_row_number = i + 2  # +2 因为从第2行开始,且行号从1开始
+                actual_data_rows.append((actual_row_number, unique_value, sort_value))
+        
+        print(f"找到 {len(actual_data_rows)} 行有效数据")
+        
+        # 分析重复数据
+        for actual_row_number, unique_value, sort_value in actual_data_rows:
+            if unique_value in seen_unique_values:
+                # 发现重复数据
+                if keep_first:
+                    # 保留第一个,删除当前这个
+                    duplicate_rows_to_delete.append(actual_row_number)
+                    print(f"标记删除重复行: 第{actual_row_number}行 ({unique_field}={unique_value}, {sort_field}={sort_value})")
+                else:
+                    # 保留最后一个,删除之前的
+                    previous_row = seen_unique_values[unique_value]
+                    duplicate_rows_to_delete.append(previous_row)
+                    print(f"标记删除重复行: 第{previous_row}行 ({unique_field}={unique_value}, {sort_field}={sort_value})")
+                    seen_unique_values[unique_value] = actual_row_number
+            else:
+                # 第一次见到这个唯一值
+                seen_unique_values[unique_value] = actual_row_number
+        
+        # 执行清理:删除重复行
+        if duplicate_rows_to_delete:
+            print(f"开始清理 {len(duplicate_rows_to_delete)} 行重复数据...")
+            # 按行号倒序删除,避免删除后行号变化的问题
+            duplicate_rows_to_delete.sort(reverse=True)
+            
+            for row_to_delete in duplicate_rows_to_delete:
+                delete_result = client.delete_single_row(access_token, sheet_token, sheet_id, row_to_delete)
+                if delete_result:
+                    print(f"成功删除重复行: 第{row_to_delete}行")
+                else:
+                    print(f"删除重复行失败: 第{row_to_delete}行")
+            
+            # 重新读取数据(删除后数据已经改变)
+            print("重新读取排序和去重字段数据...")
+            # 重新读取排序字段数据
+            sort_data_range = f"{sheet_id}!{sort_field_col}2:{sort_field_col}{max_row}"
+            all_sort_data = client.read_range_values(access_token, sheet_token, sort_data_range)
+            
+            # 重新读取去重字段数据
+            if unique_field != sort_field:
+                unique_data_range = f"{sheet_id}!{unique_field_col}2:{unique_field_col}{max_row}"
+                all_unique_data = client.read_range_values(access_token, sheet_token, unique_data_range)
+            else:
+                all_unique_data = all_sort_data
+    
+    # 构建最终的去重集合(处理清理后的数据,必须同时有排序字段和去重字段的值)
+    existing_unique_values = set()
+    existing_unique_rows = {}  # 用于update策略:{unique_value: row_number}
+    if all_unique_data and all_sort_data:
+        for i in range(min(len(all_unique_data), len(all_sort_data))):
+            unique_row = all_unique_data[i] if i < len(all_unique_data) else None
+            sort_row = all_sort_data[i] if i < len(all_sort_data) else None
+            
+            # 检查去重字段值
+            unique_value = ""
+            if unique_row and len(unique_row) > 0 and unique_row[0]:
+                unique_value = str(unique_row[0]).strip()
+            
+            # 检查排序字段值
+            sort_value = ""
+            if sort_row and len(sort_row) > 0 and sort_row[0]:
+                sort_value = str(sort_row[0]).strip()
+            
+            # 只有当排序字段和去重字段都有值时,才添加到去重集合
+            if unique_value and sort_value:
+                actual_row_number = i + 2  # +2 因为从第2行开始,且行号从1开始
+                existing_unique_values.add(unique_value)
+                existing_unique_rows[unique_value] = actual_row_number
+    
+    print(f"现有去重值数量: {len(existing_unique_values)}")
+    print(existing_unique_values)
+    
+    # 获取排序数据用于插入位置计算(基于清理后的最新数据)
+    sort_data = []
+    if all_sort_data:
+        # 同时检查排序字段和去重字段,确保数据完整性
+        for i in range(min(len(all_sort_data), len(all_unique_data) if all_unique_data else 0)):
+            sort_row = all_sort_data[i] if i < len(all_sort_data) else None
+            unique_row = all_unique_data[i] if i < len(all_unique_data) else None
+            
+            # 检查排序字段值
+            sort_value = ""
+            if sort_row and len(sort_row) > 0 and sort_row[0]:
+                sort_value = str(sort_row[0]).strip()
+            
+            # 检查去重字段值
+            unique_value = ""
+            if unique_row and len(unique_row) > 0 and unique_row[0]:
+                unique_value = str(unique_row[0]).strip()
+            
+            # 只有当排序字段和去重字段都有值时,才加入排序数据
+            if sort_value and unique_value:
+                sort_data.append([sort_value])
+    
+    if not sort_data:
+        print("未读取到排序字段数据,所有新数据将从第二行开始插入")
+    
+    # 处理新数据
+    processed_data = []
+    for row in tqdm(res_list, desc="处理数据"):
+        if not row:
+            continue
+        processed_row = {}
+        for k, v in row.items():
+            if isinstance(v, list):
+                if len(v) > 0 and v[0] and str(v[0]).startswith('http'):
+                    processed_row[k] = truncate_by_bytes(str(v))
+                else:
+                    json_str = json.dumps(v, ensure_ascii=False, indent=1)
+                    processed_row[k] = truncate_by_bytes(json_str)
+            elif isinstance(v, dict):
+                json_str = json.dumps(v, ensure_ascii=False, indent=1)
+                processed_row[k] = truncate_by_bytes(json_str)
+            else:
+                processed_row[k] = truncate_by_bytes(v)
+        processed_data.append(processed_row)
+    
+    # 转换为DataFrame以便操作
+    df_new = pd.DataFrame(processed_data)
+    df_new.fillna('', inplace=True)
+    
+    # 确保新数据包含所有必要的列
+    for header in headers:
+        if header not in df_new.columns:
+            df_new[header] = ''
+    
+    # 按表头顺序重新排列列
+    df_new = df_new.reindex(columns=headers, fill_value='')
+    
+    # 预处理:过滤重复数据并确定插入顺序
+    print(f"预处理新数据:过滤重复并排序...")
+    print(f"传入数据总量: {len(df_new)} 行")
+    print(f"现有去重集合大小: {len(existing_unique_values)}")
+    
+    valid_rows = []
+    update_rows = []  # 需要更新的行:[{row_number, values, unique_value}, ...]
+    skipped_count = 0
+    new_data_duplicates = 0  # 新数据内部重复计数
+    updated_count = 0  # 更新计数
+    
+    for idx, new_row in df_new.iterrows():
+        new_row_values = new_row.tolist()
+        new_sort_value = str(new_row_values[sort_field_index])
+        new_unique_value = str(new_row_values[unique_field_index])
+        
+        # 检查是否与现有数据重复
+        if new_unique_value in existing_unique_values:
+            if duplicate_strategy == 'update':
+                # 更新策略:记录需要更新的行
+                target_row = existing_unique_rows[new_unique_value]
+                update_rows.append({
+                    'row_number': target_row,
+                    'values': new_row_values,
+                    'unique_value': new_unique_value
+                })
+                print(f"标记更新现有数据: 第{target_row}行 {unique_field}={new_unique_value}")
+                updated_count += 1
+                continue
+            elif duplicate_strategy == 'delete':
+                # 删除策略:先删除现有行,再插入新数据
+                target_row = existing_unique_rows[new_unique_value]
+                delete_result = client.delete_single_row(access_token, sheet_token, sheet_id, target_row)
+                if delete_result:
+                    print(f"成功删除重复行: 第{target_row}行 {unique_field}={new_unique_value}")
+                    # 从去重集合中移除,允许后续插入
+                    existing_unique_values.remove(new_unique_value)
+                    # 更新所有行号(删除后后面的行号会前移)
+                    for key, row_num in existing_unique_rows.items():
+                        if row_num > target_row:
+                            existing_unique_rows[key] = row_num - 1
+                    del existing_unique_rows[new_unique_value]
+                else:
+                    print(f"删除重复行失败: 第{target_row}行 {unique_field}={new_unique_value}")
+                    skipped_count += 1
+                    continue
+            else:  # 'skip' 策略
+                print(f"跳过与现有数据重复: {unique_field}={new_unique_value}")
+                skipped_count += 1
+                continue
+        
+        # 检查新数据内部是否重复
+        already_processed = any(row['unique_value'] == new_unique_value for row in valid_rows)
+        if already_processed:
+            print(f"跳过新数据内部重复: {unique_field}={new_unique_value}")
+            new_data_duplicates += 1
+            continue
+        
+        # 添加到待插入列表
+        valid_rows.append({
+            'values': new_row_values,
+            'sort_value': new_sort_value,
+            'unique_value': new_unique_value
+        })
+    
+    print(f"预处理完成:有效数据 {len(valid_rows)} 行,需要更新 {len(update_rows)} 行,跳过与现有重复 {skipped_count} 行,跳过新数据内部重复 {new_data_duplicates} 行")
+    
+    # 处理更新操作
+    if update_rows:
+        print(f"开始执行更新操作,共 {len(update_rows)} 行...")
+        for update_data in tqdm(update_rows, desc="更新数据"):
+            row_number = update_data['row_number']
+            new_values = update_data['values']
+            unique_value = update_data['unique_value']
+            
+            # 构建字段更新字典
+            if update_fields is None:
+                # 更新所有字段,但排除unique_field(避免修改关键字段)
+                field_updates = {}
+                for i, header in enumerate(headers):
+                    if header != unique_field:  # 不更新去重字段
+                        field_updates[header] = new_values[i]
+                print(f"更新第{row_number}行所有字段(除了{unique_field}): {unique_value}")
+            else:
+                # 只更新指定字段
+                field_updates = {}
+                for field_name in update_fields:
+                    if field_name in headers:
+                        field_index = headers.index(field_name)
+                        field_updates[field_name] = new_values[field_index]
+                    else:
+                        print(f"警告:字段 '{field_name}' 不存在于表头中,跳过")
+                print(f"更新第{row_number}行指定字段 {list(field_updates.keys())}: {unique_value}")
+            
+            # 执行更新
+            if field_updates:
+                result = client.update_row_with_specific_fields_and_images(
+                    access_token, sheet_token, sheet_id, row_number, 
+                    field_updates, headers, True, grid_width, grid_height, border_width, border_color
+                )
+                if result:
+                    print(f"✅ 成功更新第{row_number}行")
+                else:
+                    print(f"❌ 更新第{row_number}行失败")
+    
+    if not valid_rows:
+        if update_rows:
+            print("所有数据均为更新操作,无新数据需要插入")
+        else:
+            print("没有新数据需要插入")
+        return
+    
+    # 按排序字段排序新数据(根据sort_ascending参数决定排序方向)
+    if sort_ascending:
+        # 升序排序:小的值先插入(reverse=False)
+        valid_rows.sort(key=lambda x: x['sort_value'], reverse=False)
+        print(f"新数据排序完成,将按升序插入")
+    else:
+        # 降序排序:大的值先插入(reverse=True)
+        valid_rows.sort(key=lambda x: x['sort_value'], reverse=True)
+        print(f"新数据排序完成,将按降序插入")
+    
+    # 逐行插入已排序的数据
+    for i, row_data in tqdm(enumerate(valid_rows), total=len(valid_rows), desc="插入数据"):
+        new_row_values = row_data['values']
+        new_sort_value = row_data['sort_value']
+        new_unique_value = row_data['unique_value']
+        
+        # 找到合适的插入位置(根据sort_ascending参数确定排序方向)
+        insert_row = len(sort_data) + 2  # 默认插入到末尾
+        
+        print(f"查找插入位置,新值: {new_sort_value}")
+        
+        # 找到两个相邻ID之间的正确插入位置
+        if sort_ascending:
+            # 升序排列:小 → 大,需要找到 prev_value < new_value < current_value 的位置
+            for j in range(len(sort_data)):
+                current_value = str(sort_data[j][0]) if sort_data[j] and len(sort_data[j]) > 0 else ""
+                prev_value = str(sort_data[j-1][0]) if j > 0 and sort_data[j-1] and len(sort_data[j-1]) > 0 else None
+                
+                # 检查是否应该插入到当前位置
+                if prev_value is None:
+                    # 这是第一个位置,检查是否应该插入到最前面
+                    if new_sort_value < current_value:
+                        insert_row = j + 2  # +2 因为表头偏移
+                        print(f"  插入到最前面第{insert_row}行: 新值{new_sort_value} < 第一个值{current_value}")
+                        break
+                else:
+                    # 检查是否在两个相邻值之间
+                    if new_sort_value >= prev_value and new_sort_value < current_value:
+                        insert_row = j + 2  # +2 因为表头偏移
+                        print(f"  插入到第{insert_row}行: {prev_value} <= {new_sort_value} < {current_value}")
+                        break
+                    elif new_sort_value == current_value:
+                        # 值相等时插入到相等值之后
+                        insert_row = j + 3  # +2(表头偏移) +1(插入到此行之后)
+                        print(f"  插入到第{insert_row}行: 新值{new_sort_value} = 现有值{current_value},插入其后")
+                        break
+            
+            # 如果遍历完都没有找到位置,说明新值是最大的,插入到末尾
+            if insert_row == len(sort_data) + 2:
+                last_value = str(sort_data[-1][0]) if sort_data and sort_data[-1] and len(sort_data[-1]) > 0 else "无"
+                print(f"  插入到末尾第{insert_row}行: 新值{new_sort_value} > 最后一个值{last_value}")
+        else:
+            # 降序排列:大 → 小,需要找到 prev_value > new_value > current_value 的位置
+            for j in range(len(sort_data)):
+                current_value = str(sort_data[j][0]) if sort_data[j] and len(sort_data[j]) > 0 else ""
+                prev_value = str(sort_data[j-1][0]) if j > 0 and sort_data[j-1] and len(sort_data[j-1]) > 0 else None
+                
+                # 检查是否应该插入到当前位置
+                if prev_value is None:
+                    # 这是第一个位置,检查是否应该插入到最前面
+                    if new_sort_value > current_value:
+                        insert_row = j + 2  # +2 因为表头偏移
+                        print(f"  插入到最前面第{insert_row}行: 新值{new_sort_value} > 第一个值{current_value}")
+                        break
+                else:
+                    # 检查是否在两个相邻值之间
+                    if new_sort_value <= prev_value and new_sort_value > current_value:
+                        insert_row = j + 2  # +2 因为表头偏移
+                        print(f"  插入到第{insert_row}行: {prev_value} >= {new_sort_value} > {current_value}")
+                        break
+                    elif new_sort_value == current_value:
+                        # 值相等时插入到相等值之后
+                        insert_row = j + 3  # +2(表头偏移) +1(插入到此行之后)
+                        print(f"  插入到第{insert_row}行: 新值{new_sort_value} = 现有值{current_value},插入其后")
+                        break
+            
+            # 如果遍历完都没有找到位置,说明新值是最小的,插入到末尾
+            if insert_row == len(sort_data) + 2:
+                last_value = str(sort_data[-1][0]) if sort_data and sort_data[-1] and len(sort_data[-1]) > 0 else "无"
+                print(f"  插入到末尾第{insert_row}行: 新值{new_sort_value} < 最后一个值{last_value}")
+        
+        print(f"[{i+1}/{len(valid_rows)}] 最终插入位置: 第 {insert_row} 行: {sort_field}={new_sort_value}")
+        
+        # 插入数据到指定行(真正插入新行)
+        result = client.insert_row_with_data_at_position(access_token, sheet_token, sheet_id, insert_row, [new_row_values], True, grid_width, grid_height, border_width, border_color)
+        
+        if result:
+            print(f"成功插入数据和图片到第 {insert_row} 行")
+            # 更新sort_data:在正确的位置添加新的排序值
+            sort_data_index = insert_row - 2  # 转换为sort_data的索引(-2因为表头偏移)
+            sort_data.insert(sort_data_index, [new_sort_value])
+            # 更新去重集合
+            existing_unique_values.add(new_unique_value)
+        else:
+            print(f"插入数据到第 {insert_row} 行失败")
+
+
+
+if __name__ == "__main__":
+    # data = get_test_data()
+    # sheet_token = 'IoTOsjZ4khIqlOtTxnec8oTbn7c'
+    # sheetid = 'K9c4LG'
+    # write_data_to_sheet(data, sheetid=sheetid)
+
+    # is_image_cell_result = is_image_cell('["http://sns-webpic-qc.xhscdn.com/202501021415/1a6e88908930afce92b09206d5a482f8/1040g2sg31b74rf6k7g5g5oo7i8vkgev59lkjet0!nd_whlt34_webp_wm_1","http://sns-webpic-qc.xhscdn.com/202501021415/1a6e88908930afce92b09206d5a482f8/1040g2sg31b74rf6k7g5g5oo7i8vkgev59lkjet0!nd_whlt34_webp_wm_1"]')
+    # print(is_image_cell_result)
+    
+    # 新增函数使用示例
+    """
+    示例:使用 to_feishu_incremental 增量插入数据
+    
+    # 测试数据
+    test_data = [
+        {
+            '内容ID': '1001', 
+            '标题': '测试标题1', 
+            '内容': '测试内容1',
+            '图片': '["http://example.com/image1.jpg", "http://example.com/image2.jpg"]'
+        },
+        {
+            '内容ID': '1003', 
+            '标题': '测试标题2', 
+            '内容': '测试内容2',
+            '图片': 'http://example.com/image3.jpg'
+        }
+    ]
+    
+         # 调用增量插入函数
+     to_feishu_incremental(
+         res_list=test_data,
+         sort_field='内容ID',  # 按此字段排序
+         sheet_id='your_sheet_id', 
+         sheet_token='your_sheet_token',
+         unique_field='内容ID',  # 去重字段,默认使用sort_field
+         duplicate_strategy='update',  # 重复处理策略:'skip'跳过, 'delete'删除后插入, 'update'更新指定字段
+         update_fields=['标题', '内容', '图片'],  # 当strategy='update'时,只更新这些字段
+         cleanup_duplicates=True,  # 先清理现有表格中的重复数据
+         keep_first=True,  # 清理时保留第一个重复项
+         sort_ascending=False,  # 排序顺序:False为降序(大→小),True为升序(小→大)
+         grid_width=2,  # 图片拼接列数
+         grid_height=2,  # 图片拼接行数
+     )
+    
+    # 排序方向示例:
+    
+    # 示例1:按时间戳降序排序(最新的在前面)- 适合新闻、动态等时间敏感内容
+    to_feishu_incremental(
+        res_list=news_data,
+        sort_field='发布时间',
+        sort_ascending=False,  # 降序,最新时间在前面
+        # ... 其他参数
+    )
+    
+    # 示例2:按ID升序排序(从小到大)- 适合有明确编号顺序的内容
+    to_feishu_incremental(
+        res_list=product_data,
+        sort_field='产品ID',
+        sort_ascending=True,  # 升序,小ID在前面
+        # ... 其他参数
+    )
+    
+    # 示例3:按优先级降序排序(高优先级在前面)- 适合任务、问题等需要优先级管理的内容
+    to_feishu_incremental(
+        res_list=task_data,
+        sort_field='优先级',
+        sort_ascending=False,  # 降序,高优先级在前面
+        # ... 其他参数
+    )
+    
+    功能说明:
+    1. **智能表头处理**:
+       - 如果表格为空,自动从数据中提取字段名创建表头
+       - 如果表格已有数据,读取现有表头结构
+    2. **空白行清理**:
+       - 自动检测并删除排序字段和去重字段都为空的空白行
+       - 确保数据的连续性和逻辑一致性
+    3. **重复数据清理**:
+       - cleanup_duplicates=True: 先清理现有表格中的重复数据
+       - keep_first: 保留第一个或最后一个重复项
+    4. **智能去重检查**:
+       - 基于 unique_field 字段检查数据是否已存在
+       - 预处理阶段过滤重复数据,避免插入过程中的状态变化问题
+    5. **排序插入**:根据指定的 sort_field 字段和 sort_ascending 参数查找插入位置
+       - sort_ascending=False(默认):降序排序,较大的值插入到较前面的位置
+       - sort_ascending=True:升序排序,较小的值插入到较前面的位置
+    6. **逐行数据插入**:按排序顺序逐行插入数据,保持表格整体有序
+    7. **完整图片支持**:自动处理图片写入,支持单张图片和图片列表
+    8. **图片拼接功能**:支持多图拼接,可设置拼接的行列数和边框样式
+    
+    适用场景:
+    - ✅ 空表格:自动创建表头并插入数据
+    - ✅ 已有重复数据的表格:先清理重复,再智能插入
+    - ✅ 增量数据更新:逐条插入,保持排序,自动去重
+    - ✅ 重复运行安全:不会插入重复数据
+    - ✅ 数据清理:一键清理现有重复数据
+    - ✅ 灵活排序:支持升序和降序两种排序方式
+    """

+ 125 - 0
req/new.md

@@ -0,0 +1,125 @@
+
+现在有一个推荐场景的问题,
+我的推荐预估的是 str、ros, vor 是统计出来的,最终的排序公式是str * ros * vor = vov
+str用的是 fm 模型,ros用的是 xgb模型,vor 用的是 24 小时的统计量;
+现在有一个问题,最近看到一些头部的 item,vov 低,但是给的曝光量很多,现在要你分析具体的原因;
+我会给你一个 sql:
+exp 是曝光的意思;具体 sql 如下,请你基于以下 sql 来分析具体低vov高曝光的原因是什么?这个问题的影响面有哪些?怎么解决:
+
+模型:
+1. fmRov = str
+1. fmRovOrigin = str采样还原前
+2. NorXGBScore = ros 
+3. vor=vor
+真实
+  1. Str = str-plus-noself
+  2. Ros = ros-minus-noself
+  3. Rov = return-n-uv-noself / exp
+  4. Vov = new-exposure-cnt / exp
+
+
+WITH tab_base AS 
+(
+    SELECT  *
+            ,((0.059 * fmRovOrigin) / (1 - (1 - 0.059) * fmRovOrigin)) AS online_fmrov
+            ,((0.036 * fmRovOrigin) / (1 - (1 - 0.036) * fmRovOrigin)) AS real_fmrov
+    FROM    (
+                SELECT  dt
+                        ,hh
+                        ,vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,CAST(score AS DOUBLE) AS score
+                        ,CAST(GET_JSON_OBJECT(scoresmap,'$.fmRov') AS DOUBLE) AS fmRov
+                        ,CAST(GET_JSON_OBJECT(scoresmap,'$.fmRovOrigin') AS DOUBLE) AS fmRovOrigin
+                        ,CAST(GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') AS DOUBLE) AS NorXGBScore
+                        ,CAST(GET_JSON_OBJECT(scoresmap,'$.vor') AS DOUBLE) AS vor
+                        ,CAST(GET_JSON_OBJECT(scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS hasReturnRovScore
+                FROM    (
+                            SELECT  dt
+                                    ,hh
+                                    ,vid
+                                    ,is_share
+                                    ,share_cnt
+                                    ,is_return_1
+                                    ,is_return_n
+                                    ,is_return_noself
+                                    ,return_1_uv
+                                    ,return_n_uv
+                                    ,return_n_uv_noself
+                                    ,new_exposure_cnt
+                                    ,score
+                                    ,REPLACE(GET_JSON_OBJECT(extend_alg,'$.scoresMap'),"\\","") AS scoresmap
+                            FROM    loghubods.dwd_recsys_alg_sample_all_20250212
+                            WHERE   dt BETWEEN '${start_dt}' AND '${end_dt}'
+                            AND     hh BETWEEN '${start_hh}' AND '${end_hh}'
+                            AND     apptype = '${apptype}'
+                            --AND     vid IN ('62421458','55931081','62955809','58807530') 
+                            AND     vid IN ('62967014','63159658','62151288')
+                            AND     extend_alg IS NOT NULL
+                            AND     GET_JSON_OBJECT(extend_alg,'$.scoresMap') IS NOT NULL
+                            AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页")
+                            AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                            AND     abcode NOT IN ("ab100")
+                        ) 
+                WHERE   GET_JSON_OBJECT(scoresmap,'$.fmRov') IS NOT NULL
+                AND     GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') IS NOT NULL
+                AND     GET_JSON_OBJECT(scoresmap,'$.vor') IS NOT NULL
+                AND     GET_JSON_OBJECT(scoresmap,'$.hasReturnRovScore') IS NOT NULL
+            ) 
+)
+,tab_pre AS 
+(
+    SELECT  dt --,hh
+            ,vid
+            ,COUNT(1) AS cnt
+            ,AVG(score) AS score
+            ,AVG(fmRov) AS fmRov
+            ,AVG(online_fmrov) AS online_fmrov
+            ,AVG(real_fmrov) AS real_fmrov
+            ,AVG(1.22 * pow(NorXGBScore,1.15)) AS NorXGBScore
+            ,AVG(vor) AS vor
+            ,AVG(hasReturnRovScore) AS hasReturnRovScore
+            ,STDDEV(score) AS std_score
+            ,STDDEV(fmRov) AS std_fmRov
+            ,STDDEV(NorXGBScore) AS std_NorXGBScore
+            ,STDDEV(vor) AS std_vor
+            ,STDDEV(hasReturnRovScore) AS std_hasReturnRovScore
+    FROM    tab_base
+    GROUP BY dt -- ,hh
+             ,vid
+)
+,tab_post AS 
+(
+    SELECT  dt --,hh
+            ,vid
+            ,COUNT(1) AS exp
+            ,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
+            ,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
+            ,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
+            ,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
+            ,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
+            ,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
+            ,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
+            ,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
+            ,round(COALESCE(SUM(is_return_noself) / COUNT(1),0),6) AS str_plus_noself
+            ,round(COALESCE(SUM(return_n_uv_noself) / SUM(is_return_noself),0),6) AS ros_minus_noself
+    FROM    tab_base
+    GROUP BY dt -- ,hh
+             ,vid
+)
+SELECT  t1.*
+        ,t2.*
+FROM    tab_post t1
+LEFT JOIN tab_pre t2
+ON      t1.dt = t2.dt --AND     t1.hh = t2.hh
+AND     t1.vid = t2.vid
+ORDER BY t1.dt,t1.vid
+;

+ 124 - 0
req/低.md

@@ -0,0 +1,124 @@
+
+现在有一个推荐场景的问题,
+我的推荐预估的是 str、ros, vor 是统计出来的,最终的排序公式是str * ros * vor = vov
+str用的是 fm 模型,ros用的是 xgb模型,vor 用的是 24 小时的统计量;
+现在有一个问题,最近看到一些头部的 item,vov 低,但是给的曝光量很多,现在要你分析具体的原因;
+我会给你一个 sql:
+模型:
+1. fmRov = str
+1. fmRovOrigin = str采样还原前
+2. NorXGBScore = ros 
+3. vor=vor
+真实
+  1. Str = str-plus-noself
+  2. Ros = ros-minus-noself
+  3. Rov = return-n-uv-noself / exp
+  4. Vov = new-exposure-cnt / exp
+
+exp 是曝光的意思;具体 sql 如下,请你基于以下 sql 来分析具体低vov高曝光的原因是什么?这个问题的影响面有哪些?怎么解决:
+
+WITH tab_base AS 
+(
+    SELECT  *
+            ,((0.059 * fmRovOrigin) / (1 - (1 - 0.059) * fmRovOrigin)) AS online_fmrov
+            ,((0.036 * fmRovOrigin) / (1 - (1 - 0.036) * fmRovOrigin)) AS real_fmrov
+    FROM    (
+                SELECT  dt
+                        ,hh
+                        ,vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,CAST(score AS DOUBLE) AS score
+                        ,CAST(GET_JSON_OBJECT(scoresmap,'$.fmRov') AS DOUBLE) AS fmRov
+                        ,CAST(GET_JSON_OBJECT(scoresmap,'$.fmRovOrigin') AS DOUBLE) AS fmRovOrigin
+                        ,CAST(GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') AS DOUBLE) AS NorXGBScore
+                        ,CAST(GET_JSON_OBJECT(scoresmap,'$.vor') AS DOUBLE) AS vor
+                        ,CAST(GET_JSON_OBJECT(scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS hasReturnRovScore
+                FROM    (
+                            SELECT  dt
+                                    ,hh
+                                    ,vid
+                                    ,is_share
+                                    ,share_cnt
+                                    ,is_return_1
+                                    ,is_return_n
+                                    ,is_return_noself
+                                    ,return_1_uv
+                                    ,return_n_uv
+                                    ,return_n_uv_noself
+                                    ,new_exposure_cnt
+                                    ,score
+                                    ,REPLACE(GET_JSON_OBJECT(extend_alg,'$.scoresMap'),"\\","") AS scoresmap
+                            FROM    loghubods.dwd_recsys_alg_sample_all_20250212
+                            WHERE   dt BETWEEN '${start_dt}' AND '${end_dt}'
+                            AND     hh BETWEEN '${start_hh}' AND '${end_hh}'
+                            AND     apptype = '${apptype}'
+                            --AND     vid IN ('62421458','55931081','62955809','58807530') 
+                            AND     vid IN ('62967014','63159658','62151288')
+                            AND     extend_alg IS NOT NULL
+                            AND     GET_JSON_OBJECT(extend_alg,'$.scoresMap') IS NOT NULL
+                            AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页")
+                            AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                            AND     abcode NOT IN ("ab100")
+                        ) 
+                WHERE   GET_JSON_OBJECT(scoresmap,'$.fmRov') IS NOT NULL
+                AND     GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') IS NOT NULL
+                AND     GET_JSON_OBJECT(scoresmap,'$.vor') IS NOT NULL
+                AND     GET_JSON_OBJECT(scoresmap,'$.hasReturnRovScore') IS NOT NULL
+            ) 
+)
+,tab_pre AS 
+(
+    SELECT  dt --,hh
+            ,vid
+            ,COUNT(1) AS cnt
+            ,AVG(score) AS score
+            ,AVG(fmRov) AS fmRov
+            ,AVG(online_fmrov) AS online_fmrov
+            ,AVG(real_fmrov) AS real_fmrov
+            ,AVG(1.22 * pow(NorXGBScore,1.15)) AS NorXGBScore
+            ,AVG(vor) AS vor
+            ,AVG(hasReturnRovScore) AS hasReturnRovScore
+            ,STDDEV(score) AS std_score
+            ,STDDEV(fmRov) AS std_fmRov
+            ,STDDEV(NorXGBScore) AS std_NorXGBScore
+            ,STDDEV(vor) AS std_vor
+            ,STDDEV(hasReturnRovScore) AS std_hasReturnRovScore
+    FROM    tab_base
+    GROUP BY dt -- ,hh
+             ,vid
+)
+,tab_post AS 
+(
+    SELECT  dt --,hh
+            ,vid
+            ,COUNT(1) AS exp
+            ,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
+            ,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
+            ,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
+            ,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
+            ,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
+            ,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
+            ,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
+            ,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
+            ,round(COALESCE(SUM(is_return_noself) / COUNT(1),0),6) AS str_plus_noself
+            ,round(COALESCE(SUM(return_n_uv_noself) / SUM(is_return_noself),0),6) AS ros_minus_noself
+    FROM    tab_base
+    GROUP BY dt -- ,hh
+             ,vid
+)
+SELECT  t1.*
+        ,t2.*
+FROM    tab_post t1
+LEFT JOIN tab_pre t2
+ON      t1.dt = t2.dt --AND     t1.hh = t2.hh
+AND     t1.vid = t2.vid
+ORDER BY t1.dt,t1.vid
+;

+ 421 - 0
table_gen/ loghubods.ods_user_active_log_info_day.sql

@@ -0,0 +1,421 @@
+--odps sql 
+
+
+--********************************************************************--
+--author:李晓阳
+--create time:2025-07-30 11:36:56
+--desc mid区分渠道承接和日常的明细数据表 (优化上线)
+--********************************************************************--
+CREATE TABLE IF NOT EXISTS loghubods.ods_user_active_log_info_day
+(
+    apptype             STRING COMMENT '应用类型'
+    ,loginuid           STRING COMMENT '用户id'
+    ,opengid            STRING COMMENT '群id'
+    ,type               STRING COMMENT '群类型'
+    ,sencetype          STRING COMMENT '群类型_编码'
+    ,hotsencetype       STRING COMMENT '热启动群类型_编码'
+    ,machinecode        STRING COMMENT '设备唯一标识'
+    ,subsessionid       STRING COMMENT '子会话ID'
+    ,sessionid          STRING COMMENT '会话ID'
+    ,rootsourceid       STRING COMMENT '根来源ID'
+    ,rootsessionid      STRING COMMENT '根会话ID'
+    ,layer              BIGINT COMMENT '用户分享层级'
+    ,channel            STRING COMMENT '渠道名称'
+    ,channel_shortname  STRING COMMENT '渠道简称'
+    ,push_type          STRING COMMENT '推送类型(承接/日常/未知)'
+    ,sub_channel        STRING COMMENT '子渠道标识'
+    ,channel_type       STRING COMMENT '渠道类型分类'
+    ,channel_type_frist STRING COMMENT '渠道类型前缀'
+    ,channel_id         BIGINT COMMENT '渠道类型ID'
+    ,level_type         STRING COMMENT '外部首层、非外部首层'
+    ,user_share_depth   BIGINT COMMENT '分享层级'
+    ,browse_time STRING COMMENT '访问时间'
+
+)
+PARTITIONED BY 
+(
+    dt                  STRING COMMENT '数据日期'
+)
+STORED AS ALIORC
+TBLPROPERTIES ('columnar.nested.type' = 'true','comment' = 'mid区分渠道承接和日常的明细数据表')
+;
+--ALTER TABLE loghubods.ods_user_active_log_info_day ADD COLUMNS (channel_type_okr string )
+--ALTER TABLE loghubods.ods_user_active_log_info_day ADD COLUMNS (browse_time string COMMENT '访问时间');
+--ALTER TABLE loghubods.ods_user_active_log_info_day ADD COLUMNS (real_user_share_depth string --COMMENT '真实层级');
+--ALTER TABLE loghubods.ods_user_active_log_info_day ADD COLUMNS (clienttimestamp string );
+INSERT OVERWRITE TABLE loghubods.ods_user_active_log_info_day PARTITION (dt = '${day}')
+WITH -- 1. 基础用户活跃日志CTE:解析JSON字段,过滤公共条件
+base_useractive AS 
+(
+    SELECT  dt
+            ,apptype
+            ,machinecode
+            ,subsessionid
+            ,sessionid
+            ,businesstype
+            ,GET_JSON_OBJECT(extparams,'$.rootSourceId') AS root_source_id
+            ,GET_JSON_OBJECT(extparams,'$.rootSessionId') AS root_session_id
+            ,GET_JSON_OBJECT(extparams,'$.userShareDepth') AS user_share_depth
+            ,extparams
+            ,loginuid
+            ,opengid
+            ,CASE   WHEN opengid = '-1' THEN '单聊'
+                    WHEN opengid = '-2' THEN '获取失败'
+                    WHEN LENGTH(opengid) > 4 THEN '群聊'
+            END AS type -- ,pagesource
+            ,senceType AS sencetype
+            ,GET_JSON_OBJECT(extParams,'$.hotSenceType') AS hotsencetype
+            ,from_unixtime(cast(clienttimestamp as BIGINT )/1000) browse_time
+            ,clienttimestamp
+    FROM    loghubods.useractive_log
+    WHERE   dt = '${day}'
+    AND     businesstype = 'path'
+) -- 2. 公众号买号分组表
+,gzh_buy_group AS 
+(
+    SELECT  root_source_id AS gzh_buy_root
+    FROM    loghubods.changwen_rootsourceid_group
+    WHERE   dt = MAX_PT('loghubods.changwen_rootsourceid_group')
+    AND     group_name = '公众号买号'
+) -- 3. 服务号信息表(用于服务号代运营场景)
+,service_account AS 
+(
+    SELECT  gzh.gh_id
+            ,account.channel
+            ,CASE   WHEN gzh.type = 0 THEN '公众号'
+                    ELSE '服务号'
+            END AS account_type
+    FROM    loghubods.content_platform_account account
+    LEFT JOIN loghubods.content_platform_gzh_account gzh
+    ON      account.id = gzh.create_account_id
+    WHERE   gzh.type != 0 -- 仅保留服务号
+    GROUP BY gzh.gh_id
+             ,account.channel
+             ,CASE   WHEN gzh.type = 0 THEN '公众号'
+                     ELSE '服务号'
+             END
+) -- 4. 长文数据与分组关联表
+,long_articles_info AS 
+(
+    SELECT  a.rootsourceid
+            ,a.ghid
+            ,c.group_source_name
+            ,a.push_type
+    FROM    loghubods.long_articles_root_source_id a
+    LEFT JOIN loghubods.publish_account b
+    ON      a.ghid = b.gh_id
+    LEFT JOIN loghubods.wx_statistics_group_source_account c
+    ON      b.id = c.account_id
+    WHERE   a.dt = MAX_PT('loghubods.long_articles_root_source_id')
+    GROUP BY a.rootsourceid
+             ,a.ghid
+             ,c.group_source_name
+             ,a.push_type
+) -- 5. 腾讯投放记录表
+,ad_put_flow_tencent AS 
+(
+    SELECT  root_source_id
+            ,put_carrier_id
+            ,put_type_one
+            ,put_type_two
+            ,channel
+    FROM    loghubods.ad_put_flow_record_tencent_day
+    WHERE   dt = '${day}'
+    GROUP BY root_source_id
+             ,put_carrier_id
+             ,put_type_one
+             ,put_type_two
+             ,channel
+) -- 主查询:通过CASE WHEN和LEFT JOIN实现所有渠道类型的统一处理
+SELECT  b.apptype
+        ,b.loginuid
+        ,b.opengid
+        ,b.type
+        ,b.sencetype
+        ,b.hotsencetype
+        ,b.machinecode
+        ,b.subsessionid
+        ,b.sessionid
+        ,b.root_source_id AS rootsourceid
+        ,b.root_session_id AS rootsessionid -- 计算层级layer
+        ,CASE   WHEN b.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_' AND t9.channel REGEXP 'xycsd|csaq|shy|jxjx|gzcr|xyjj|jxatm|xjcy|yqyx|hbwq|jxxm|gzmy|cdjh|gzjr|gzxts|twhc|qdjdz|sjzyd|gzyhc|djh|gzlx|yywl|szjn|gzdd1|cqqd|cqslh|hzjy|hzjh|sclh|xyhc|snss' --AND t9.layer_type ='特殊首层' 
+                THEN IF(CAST(b.user_share_depth AS INT) <= 1,1,2)
+                ELSE IF(CAST(b.user_share_depth AS INT) = 0,1,2)
+        END AS layer -- 计算渠道名称、简称
+        ,CASE
+                 -- 公众号即时回复
+                   WHEN b.root_source_id REGEXP 'dyyjs_'
+                    AND SUBSTR(b.root_source_id,7,4) = 'null'
+                    AND t3.ghid_list = 'gh_580589404a08' THEN '浩讯'
+                WHEN b.root_source_id REGEXP 'dyyjs_'
+                    AND SUBSTR(b.root_source_id,7,4) = 'null'
+                    AND t3.ghid_list = 'gh_505269831752' THEN '恒创'
+                WHEN b.root_source_id REGEXP 'dyyjs_' THEN t4.company -- 企微外部合作
+                WHEN b.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_' THEN SPLIT_PART(b.root_source_id,'_',2) -- 服务号代运营
+                WHEN b.root_source_id REGEXP 'fwhhzdyy_|fwhdyy_' THEN COALESCE(s.channel,'') -- 公众号买号/长文
+                WHEN b.root_source_id REGEXP 'longArticles_' THEN COALESCE(l.group_source_name,'') -- 其他渠道
+                ELSE ''
+        END AS channel -- 渠道简称(逻辑与channel一致,仅部分场景有简写)
+        ,CASE   WHEN b.root_source_id REGEXP 'dyyjs_'
+                    AND SUBSTR(b.root_source_id,7,4) = 'null'
+                    AND t3.ghid_list = 'gh_580589404a08' THEN 'hx'
+                WHEN b.root_source_id REGEXP 'dyyjs_'
+                    AND SUBSTR(b.root_source_id,7,4) = 'null'
+                    AND t3.ghid_list = 'gh_505269831752' THEN 'hc'
+                WHEN b.root_source_id REGEXP 'dyyjs_' THEN t4.company_code
+                WHEN b.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_' THEN SPLIT_PART(b.root_source_id,'_',2)
+                ELSE ''
+        END AS channel_shortname -- 推送类型
+        ,CASE
+                 -- 公众号/服务号投流、企微投流:从长文数据或投放记录获取
+                WHEN b.root_source_id REGEXP 'touliu_tencentgzh_|touliu_tencentGzhArticle_|GzhTouLiu_Articles_gh|fwhtouliu_|touliu_tencentqw_|WeCom_|daitou_tencentgzh|DaiTou_gh' THEN COALESCE(t5.push_type,'未知') -- 公众号即时回复、小程序投流:固定为承接
+                WHEN b.root_source_id REGEXP 'dyyjs_|touliu_tencent_' THEN '承接' -- 其他渠道:固定为日常
+                ELSE '日常'
+        END AS push_type -- 子渠道
+        ,CASE   WHEN b.root_source_id REGEXP 'dyyjs_' THEN t3.ghid_list
+                WHEN b.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_' THEN t6.put_carrier_id
+                WHEN b.root_source_id REGEXP 'fwhhzdyy_|fwhdyy_' THEN COALESCE(s.gh_id,'')
+                WHEN b.root_source_id REGEXP 'longArticles_' THEN COALESCE(l.ghid,'')
+                WHEN b.root_source_id REGEXP 'touliu_tencentgzh_|touliu_tencentGzhArticle_|GzhTouLiu_Articles_gh' AND b.root_source_id NOT REGEXP 'fwhtouliu_' THEN COALESCE(t5.ghid,'')
+                WHEN b.root_source_id REGEXP 'fwhtouliu_' THEN COALESCE(t5.ghid,'')
+                ELSE ''
+        END AS sub_channel -- 渠道类型(统一映射)
+        ,CASE   WHEN b.root_source_id REGEXP 'longArticles_' AND g.gzh_buy_root IS NOT NULL THEN '公众号买号'
+                WHEN b.root_source_id REGEXP 'longArticles_'
+                    AND g.gzh_buy_root IS NULL
+                    AND t8.rootsourceid IS NULL THEN '公众号代运营-Daily-系统'
+                WHEN b.root_source_id REGEXP 'dyyjs_' THEN '公众号合作-即转-稳定'
+                WHEN b.root_source_id REGEXP 'fwhtouliu_' THEN '服务号投流'
+                WHEN b.root_source_id REGEXP 'touliu_tencent_' THEN '小程序投流-稳定'
+                WHEN b.root_source_id REGEXP 'touliu_tencentgzh_|touliu_tencentGzhArticle_|GzhTouLiu_Articles_gh' AND b.root_source_id NOT REGEXP 'fwhtouliu_' THEN '公众号投流-稳定'
+                WHEN b.root_source_id REGEXP 'touliu_tencentqw_|WeCom_' AND t7.rootsourceid IS NULL THEN '企微投放'
+                WHEN b.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_' AND t6.root_source_id IS NOT NULL THEN '群/企微合作-稳定' --WHEN b.root_source_id REGEXP 'daitou_tencentgzh|DaiTou_gh' THEN '90公众号代投'
+                WHEN b.root_source_id REGEXP 'qwsq_' AND t7.rootsourceid IS NOT NULL THEN '企微投放-存量拉群'
+                WHEN b.root_source_id REGEXP 'fwhhzdyy_|fwhdyy_' THEN '服务号合作-Daily-自选' --WHEN b.root_source_id REGEXP 'fwhmh_' THEN '92服务号买号'
+                WHEN b.root_source_id REGEXP 'longArticles_'
+                    AND g.gzh_buy_root IS NULL
+                    AND t8.rootsourceid IS NOT NULL THEN '公众号合作-Daily-自选'
+                WHEN b.root_source_id REGEXP 'touliu_tencentqw_|WeCom_|qwmf_' AND t7.rootsourceid IS NOT NULL THEN '微信群买粉'
+                WHEN b.root_source_id REGEXP 'daitou_tencentgzh|DaiTou_gh' THEN '公众号完全代投放'
+                WHEN b.root_source_id REGEXP 'fwhmh_' THEN '服务号买号'
+                ELSE '内部'
+        END AS channel_type
+        ,CASE   WHEN b.root_source_id REGEXP 'longArticles_' AND g.gzh_buy_root IS NOT NULL THEN 'longArticles_'
+                WHEN b.root_source_id REGEXP 'longArticles_'
+                    AND g.gzh_buy_root IS NULL
+                    AND t8.rootsourceid IS NULL THEN 'longArticles_'
+                WHEN b.root_source_id REGEXP 'dyyjs_' THEN 'dyyjs_'
+                WHEN b.root_source_id REGEXP 'fwhtouliu_' THEN 'fwhtouliu_'
+                WHEN b.root_source_id REGEXP 'touliu_tencent_' THEN 'touliu_tencent_'
+                WHEN b.root_source_id REGEXP 'touliu_tencentgzh_|touliu_tencentGzhArticle_|GzhTouLiu_Articles_gh' AND b.root_source_id NOT REGEXP 'fwhtouliu_' THEN 'touliu_tencentgzh_'
+                WHEN b.root_source_id REGEXP 'touliu_tencentqw_|WeCom_' AND t7.rootsourceid IS NULL THEN 'touliu_tencentqw_'
+                WHEN b.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_' AND t6.root_source_id IS NOT NULL THEN 'touliu_tencentwbqw_' --WHEN b.root_source_id REGEXP 'daitou_tencentgzh|DaiTou_gh' THEN '90公众号代投'
+                WHEN b.root_source_id REGEXP 'qwsq_' AND t7.rootsourceid IS NOT NULL THEN 'qwsq_'
+                WHEN b.root_source_id REGEXP 'fwhhzdyy_|fwhdyy_' THEN 'fwhhzdyy_' --WHEN b.root_source_id REGEXP 'fwhmh_' THEN '92服务号买号'
+                WHEN b.root_source_id REGEXP 'longArticles_'
+                    AND g.gzh_buy_root IS NULL
+                    AND t8.rootsourceid IS NOT NULL THEN 'longArticles_'
+                WHEN b.root_source_id REGEXP 'touliu_tencentqw_|WeCom_|qwmf_' AND t7.rootsourceid IS NOT NULL THEN 'WeCom_'
+                WHEN b.root_source_id REGEXP 'daitou_tencentgzh|DaiTou_gh' THEN 'DaiTou_gh'
+                WHEN b.root_source_id REGEXP 'fwhmh_' THEN 'fwhmh_'
+                ELSE '内部'
+        END AS channel_type_frist -- 渠道ID(与channel_type对应)
+        ,CASE   WHEN b.root_source_id REGEXP 'longArticles_' AND g.gzh_buy_root IS NOT NULL THEN 3
+                WHEN b.root_source_id REGEXP 'longArticles_' AND g.gzh_buy_root IS NULL THEN 1
+                WHEN b.root_source_id REGEXP 'dyyjs_' THEN 2
+                WHEN b.root_source_id REGEXP 'fwhtouliu_' THEN 4
+                WHEN b.root_source_id REGEXP 'touliu_tencent_' THEN 5
+                WHEN b.root_source_id REGEXP 'touliu_tencentgzh_|touliu_tencentGzhArticle_|GzhTouLiu_Articles_gh' AND b.root_source_id NOT REGEXP 'fwhtouliu_' THEN 6
+                WHEN b.root_source_id REGEXP 'touliu_tencentqw_|WeCom_' AND t7.rootsourceid IS NULL THEN 7
+                WHEN b.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_' AND t6.root_source_id IS NOT NULL THEN 8 --WHEN b.root_source_id REGEXP 'daitou_tencentgzh|DaiTou_gh' THEN 90
+                WHEN b.root_source_id REGEXP 'qwsq_' AND t7.rootsourceid IS NOT NULL THEN 90
+                WHEN b.root_source_id REGEXP 'fwhhzdyy_|fwhdyy_' THEN 91 --WHEN b.root_source_id REGEXP 'fwhmh_' THEN 92
+                WHEN b.root_source_id REGEXP 'longArticles_'
+                    AND g.gzh_buy_root IS NULL
+                    AND t8.rootsourceid IS NOT NULL THEN 92
+                WHEN b.root_source_id REGEXP 'touliu_tencentqw_|WeCom_|qwmf_' AND t7.rootsourceid IS NOT NULL THEN 93
+                WHEN b.root_source_id REGEXP 'daitou_tencentgzh|DaiTou_gh' THEN 94
+                WHEN b.root_source_id REGEXP 'fwhmh_' THEN 95
+                ELSE 9999999
+        END AS channel_id
+        ,CASE   WHEN b.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_' --AND t9.layer_type ='特殊首层'
+                    AND t9.channel REGEXP 'xycsd|csaq|shy|jxjx|gzcr|xyjj|jxatm|xjcy|yqyx|hbwq|jxxm|gzmy|cdjh|gzjr|gzxts|twhc|qdjdz|sjzyd|gzyhc|djh|gzlx|yywl|szjn|gzdd1|cqqd|cqslh|hzjy|hzjh|sclh|xyhc|snss'
+                    AND b.user_share_depth <= 1 THEN '外部首层'
+                WHEN b.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_' --AND t9.layer_type ='非特殊首层'
+                    AND t9.channel NOT REGEXP 'xycsd|csaq|shy|jxjx|gzcr|xyjj|jxatm|xjcy|yqyx|hbwq|jxxm|gzmy|cdjh|gzjr|gzxts|twhc|qdjdz|sjzyd|gzyhc|djh|gzlx|yywl|szjn|gzdd1|cqqd|cqslh|hzjy|hzjh|sclh|xyhc|snss'
+                    AND b.user_share_depth = 0 THEN '外部首层'
+                WHEN b.root_source_id NOT REGEXP 'touliu_tencentwbqw_|dyyqw_' AND b.user_share_depth = 0 THEN '外部首层'
+                ELSE '非外部首层'
+        END AS level_type
+        ,CASE   WHEN b.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_' AND t9.channel REGEXP 'xycsd|csaq|shy|jxjx|gzcr|xyjj|jxatm|xjcy|yqyx|hbwq|jxxm|gzmy|cdjh|gzjr|gzxts|twhc|qdjdz|sjzyd|gzyhc|djh|gzlx|yywl|szjn|gzdd1|cqqd|cqslh|hzjy|hzjh|sclh|xyhc|snss' --AND t9.layer_type ='特殊首层' 
+                THEN IF(CAST(b.user_share_depth AS INT) <= 1,0,b.user_share_depth)
+                ELSE b.user_share_depth
+        END user_share_depth
+        ,CASE   WHEN b.root_source_id REGEXP 'longArticles_' AND g.gzh_buy_root IS NOT NULL THEN '3公众号买号'
+                WHEN b.root_source_id REGEXP 'longArticles_'
+                    AND g.gzh_buy_root IS NULL
+                    AND t8.rootsourceid IS NULL THEN '1公众号代运营-Daily'
+                WHEN b.root_source_id REGEXP 'dyyjs_' THEN '2公众号代运营-即转'
+                WHEN b.root_source_id REGEXP 'fwhtouliu_' THEN '4服务号投流'
+                WHEN b.root_source_id REGEXP 'touliu_tencent_' THEN '5小程序投流'
+                WHEN b.root_source_id REGEXP 'touliu_tencentgzh_|touliu_tencentGzhArticle_|GzhTouLiu_Articles_gh' AND b.root_source_id NOT REGEXP 'fwhtouliu_' THEN '6公众号投流'
+                WHEN b.root_source_id REGEXP 'touliu_tencentqw_|WeCom_' AND t7.rootsourceid IS NULL THEN '7企微'
+                WHEN b.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_' AND t6.root_source_id IS NOT NULL THEN '8企微外部合作' --WHEN b.root_source_id REGEXP 'daitou_tencentgzh|DaiTou_gh' THEN '90公众号代投'
+                WHEN b.root_source_id REGEXP 'qwsq_' AND t7.rootsourceid IS NOT NULL THEN '90企微投放-人群包-存量拉群'
+                WHEN b.root_source_id REGEXP 'fwhhzdyy_|fwhdyy_' THEN '91服务号代运营-Daily' --WHEN b.root_source_id REGEXP 'fwhmh_' THEN '92服务号买号'
+                WHEN b.root_source_id REGEXP 'longArticles_'
+                    AND g.gzh_buy_root IS NULL
+                    AND t8.rootsourceid IS NOT NULL THEN '92公众号代运营-Daily-合作'
+                WHEN b.root_source_id REGEXP 'touliu_tencentqw_|WeCom_|qwmf_' AND t7.rootsourceid IS NOT NULL THEN '93企微买群粉'
+                WHEN b.root_source_id REGEXP 'daitou_tencentgzh|DaiTou_gh' THEN '94公众号代投'
+                WHEN b.root_source_id REGEXP 'fwhmh_' THEN '95服务号买号'
+                ELSE '内部'
+        END AS channel_type_okr
+        ,b.browse_time
+        ,b.user_share_depth
+        ,b.clienttimestamp
+FROM    base_useractive b -- 关联公众号买号分组表
+LEFT JOIN gzh_buy_group g
+ON      b.root_source_id = g.gzh_buy_root -- 关联公众号即时回复所需的ghid信息
+LEFT JOIN   (
+                SELECT  root_source_id
+                        ,CONCAT_WS(',',COLLECT_SET(put_carrier_id)) AS ghid_list
+                FROM    ad_put_flow_tencent
+                WHERE   root_source_id REGEXP 'dyyjs_'
+                GROUP BY root_source_id
+            ) t3
+ON      b.root_source_id = t3.root_source_id -- 关联公众号即时回复所需的公司信息
+LEFT JOIN loghubods.dim_company_price_info t4
+ON      b.root_source_id = t4.root_source_id
+AND     t4.dt = '${day}' -- 关联投流相关的推送类型信息
+LEFT JOIN   (
+                SELECT  rootsourceid AS root_source_id
+                        ,CASE   WHEN MAX(push_type) = '1' THEN '承接'
+                                WHEN MAX(push_type) = '2' THEN '日常'
+                                WHEN MAX(push_type) REGEXP '菜单|自动回复|即时欢迎语' THEN '承接'
+                                WHEN MAX(push_type) REGEXP '日常推送|社群' THEN '日常'
+                                WHEN MAX(push_type) REGEXP '对外信息展示' THEN '承接'
+                                WHEN MAX(push_type) REGEXP '朋友圈' THEN '日常'
+                                ELSE '未知'
+                        END AS push_type
+                        ,ghid
+                FROM    (
+                            SELECT  rootsourceid
+                                    ,CAST(push_type AS STRING) AS push_type
+                                    ,ghid
+                            FROM    loghubods.long_articles_root_source_id
+                            WHERE   dt = MAX_PT("loghubods.long_articles_root_source_id")
+                            UNION
+                            SELECT  root_source_id AS rootsourceid
+                                    ,put_type_two AS push_type
+                                    ,put_carrier_id
+                            FROM    loghubods.ad_put_flow_record_tencent_day a
+                            WHERE   a.dt = '${day}'
+                        ) 
+                GROUP BY rootsourceid
+                         ,ghid
+            ) t5
+ON      b.root_source_id = t5.root_source_id -- 关联企微外部合作的put_carrier_id
+LEFT JOIN   (
+                SELECT  root_source_id
+                        ,put_carrier_id
+                FROM    ad_put_flow_tencent
+                WHERE   put_type_one = '企微'
+                AND     root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_'
+                GROUP BY root_source_id
+                         ,put_carrier_id
+            ) t6
+ON      b.root_source_id = t6.root_source_id -- 关联服务号信息
+LEFT JOIN   (
+                SELECT  rootsourceid
+                        ,s.gh_id
+                        ,s.channel
+                FROM    loghubods.long_articles_root_source_id a
+                LEFT JOIN service_account s
+                ON      a.ghid = s.gh_id
+                WHERE   a.dt = MAX_PT('loghubods.long_articles_root_source_id')
+                UNION
+                SELECT  root_source_id AS rootsourceid
+                        ,a.put_carrier_id
+                        ,d.channel
+                FROM    loghubods.ad_put_flow_record_tencent_day a
+                LEFT JOIN   (
+                                SELECT  account.channel
+                                        ,gzh.gh_id
+                                        ,gzh.`name`
+                                        ,(CASE    gzh.type
+                                                WHEN 0 THEN '公众号'
+                                                ELSE '服务号'
+                                        END) AS type
+                                FROM    loghubods.content_platform_account account
+                                LEFT JOIN loghubods.content_platform_gzh_account gzh
+                                ON      account.id = gzh.create_account_id
+                            ) d
+                ON      a.put_carrier_id = d.gh_id
+                AND     d.type = '服务号'
+                WHERE   a.dt = '${day}'
+                AND     root_source_id REGEXP 'fwhhzdyy_|fwhdyy_'
+            ) s
+ON      b.root_source_id = s.rootsourceid
+LEFT JOIN   (
+                SELECT  root_source_id AS rootsourceid
+                        ,put_type_two AS push_type --,b.name AS accountname
+                        ,COALESCE(b.name,a.remark) AS accountname
+                FROM    loghubods.ad_put_flow_record_tencent_day a
+                LEFT JOIN   (
+                                SELECT  user_id
+                                        ,name
+                                FROM    loghubods.reply_staff
+                                WHERE   dt = MAX_PT('loghubods.reply_staff')
+                                AND     is_delete = 0
+                            ) b
+                ON      a.put_carrier_id = b.user_id
+                WHERE   dt = '${day}'
+                AND     put_type_one = '企微'
+                AND     root_source_id REGEXP 'touliu_tencentqw_|WeCom_|qwmf_|qwsq_'
+                AND     (
+                            COALESCE(b.name,a.remark) IN ('热点视频推荐','情感专家-月亮姐姐')
+                            OR      root_source_id REGEXP 'qwmf_|qwsq_'
+                )
+                GROUP BY root_source_id
+                         ,put_type_two
+                         ,COALESCE(b.name,a.remark)
+            ) t7
+ON      b.root_source_id = t7.rootsourceid -- 关联长文信息
+LEFT JOIN long_articles_info l
+ON      b.root_source_id = l.rootsourceid
+LEFT JOIN   (
+                SELECT  rootsourceid
+                        ,channel
+                        ,channel_name
+                        ,ghid
+                        ,gzh_name
+                        ,type
+                FROM    loghubods.rootsourceid_ghid_channel_mapping
+                WHERE   type = '公众号'
+                GROUP BY rootsourceid
+                         ,channel
+                         ,channel_name
+                         ,ghid
+                         ,gzh_name
+                         ,type
+            ) t8
+ON      b.root_source_id = t8.rootsourceid
+LEFT JOIN   (
+                SELECT  t1.root_source_id AS rootsourceid
+                        ,t1.put_type_two AS push_type
+                        ,t1.channel
+                        ,t2.layer_type
+                FROM    loghubods.ad_put_flow_record_tencent_day t1
+                LEFT JOIN loghubods.content_platform_account t2
+                ON      t1.channel = t2.channel
+                WHERE   t1.dt = MAX_PT('loghubods.ad_put_flow_record_tencent_day')
+                AND     t1.put_type_one = '企微'
+                AND     t1.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_|dyycd_'
+                GROUP BY t1.root_source_id
+                         ,t1.put_type_two
+                         ,t1.channel
+                         ,t2.layer_type
+            ) t9
+ON      b.root_source_id = t9.rootsourceid
+;

+ 722 - 0
table_gen/de.sql

@@ -0,0 +1,722 @@
+-- =====================================================================
+-- 用户拉活-曝光-拉回分析表 (recommend_distribution_data_total)
+-- 维度: 用户拉活量分层 × 进入小时 × 进入场景 × 进入内容品类 × 推荐内容品类 × 内容id
+-- 粒度: GROUP BY CUBE 全维度组合 + SUM 汇总
+-- =====================================================================
+
+-- DROP TABLE IF EXISTS loghubods.recommend_distribution_data_total;
+CREATE TABLE IF NOT EXISTS loghubods.recommend_distribution_data_total (
+    `用户拉活量分层`             STRING    COMMENT '用户拉活量分层(R0&新用户/R1-50等,汇总为SUM)',
+    `进入小时`                   STRING    COMMENT '进入小时(1-4/5-8等,汇总为SUM)',
+    `进入场景`                   STRING    COMMENT '进入场景(业务场景描述,汇总为SUM)',
+    `进入内容品类`               STRING    COMMENT '进入内容品类(merge_leve2维度,汇总为SUM)',
+    `推荐内容品类`               STRING    COMMENT '推荐内容品类(merge_leve2维度,汇总为SUM)',
+    `内容id`                     STRING    COMMENT '内容id(videoid,其他为兜底,汇总为SUM)',
+
+    -- ===== 基础流量指标 =====
+    `访问人数`                   BIGINT    COMMENT '去重访问人数(machinecode维度)',
+    `曝光人数`                   BIGINT    COMMENT '去重曝光人数(mid维度)',
+    `曝光人数比访问人数`         DOUBLE    COMMENT '曝光人数/访问人数 比值',
+    `曝光次数`                   BIGINT    COMMENT '曝光总次数(mid计数)',
+    `曝光次数人均`               DOUBLE    COMMENT '曝光次数/去重曝光人数 人均曝光次数',
+    `视频个数`                   BIGINT    COMMENT '去重曝光视频个数(vid维度)',
+    `单视频曝光次数`             DOUBLE    COMMENT '曝光次数/去重视频个数 单视频平均曝光次数',
+
+    -- ===== 全链路拉回 =====
+    `ALL拉回n层`                 BIGINT    COMMENT '全量拉回n层人数(多来源拉回人数求和)',
+    `ALL拉回n层比曝光次数`       DOUBLE    COMMENT '全量拉回n层人数/曝光次数 比值,保留6位小数',
+
+    -- ===== 原生拉回 & rovn =====
+    `return_n_uv_noself`         BIGINT    COMMENT '原生n层拉回人数(无自返)',
+    `rovn`                       DOUBLE    COMMENT '原生n层拉回人数/曝光次数 比值,保留6位小数',
+    `rovn_pred`                  DOUBLE    COMMENT 'rovn预估值(str_pred*rosn_pred平均值),保留6位小数',
+    `rovn_copc`                  DOUBLE    COMMENT 'rovn实际/rovn预估 比值,保留4位小数',
+
+    -- ===== STR 指标 =====
+    `str_real`                   DOUBLE    COMMENT 'str实际值(is_return_noself求和/曝光次数),保留6位小数',
+    `str_pred`                   DOUBLE    COMMENT 'str预估值(str_pred求和/曝光次数),保留6位小数',
+    `str_copc`                   DOUBLE    COMMENT 'str实际/str预估 比值,保留4位小数',
+    `str_MAE`                    DOUBLE    COMMENT 'str预估与实际的平均绝对误差,保留6位小数',
+    `STR_VAR`                    DOUBLE    COMMENT 'str预估与实际差值的方差,保留6位小数',
+    `strauc`                     DOUBLE    COMMENT 'str AUC,保留6位小数',
+    `str多维加权copc`            DOUBLE    COMMENT 'str多维加权copc,保留6位小数',
+
+    -- ===== ROSN 指标 =====
+    `rosn_real`                  DOUBLE    COMMENT 'rosn实际值(原生n层拉回人数/is_return_noself求和),保留6位小数',
+    `rosn_pred`                  DOUBLE    COMMENT 'rosn预估值(rosn_pred求和/曝光次数),保留6位小数',
+    `rosn_copc`                  DOUBLE    COMMENT 'rosn实际/rosn预估 比值,保留4位小数',
+    `rosn_实际预估diff绝对距离`  DOUBLE    COMMENT '拉回时rosn预估与实际的平均绝对距离,保留6位小数',
+    `rosn_实际预估高低估分布比例` DOUBLE   COMMENT '拉回时rosn预估与实际差值的方差,保留6位小数',
+
+    -- ===== B链 (分享→点击) =====
+    `Buv1层拉回人数`             BIGINT    COMMENT 'B端uv1层拉回人数(is_return_noself求和)',
+    `rov0`                       DOUBLE    COMMENT 'B端uv1层拉回人数/曝光次数 比值,保留6位小数',
+    `rov0预估`                   BIGINT    COMMENT 'rov0预估值(固定为0)',
+    `rov0copc`                   BIGINT    COMMENT 'rov0copc值(固定为0)',
+    `ros0`                       DOUBLE    COMMENT 'ros0实际值(B端uv1层拉回人数/分享次数),保留6位小数',
+    `ros0预估`                   BIGINT    COMMENT 'ros0预估值(固定为0)',
+    `ros0copc`                   BIGINT    COMMENT 'ros0copc值(固定为0)',
+    `Bnuvn层拉回人数`            BIGINT    COMMENT 'B端nuvn层拉回人数(原生n层拉回人数一致)',
+    `rorn_b`                     DOUBLE    COMMENT 'B端n层拉回率(Bnuvn/uv1层拉回人数),保留6位小数',
+    `ror1单层_b`                 DOUBLE    COMMENT 'B端1层拉回率(b1/uv1层拉回人数),保留6位小数',
+    `ror2单层_b`                 DOUBLE    COMMENT 'B端2层拉回率(b2/b1),保留6位小数',
+
+    -- ===== B链 T+1 =====
+    `Bnuvn层拉回人数_t1`         DOUBLE    COMMENT 'T1维度B端nuvn层拉回人数,保留6位小数',
+    `rorn_b_t1`                  DOUBLE    COMMENT 'T1维度B端n层拉回率,保留6位小数',
+    `ror1单层_b_t1`              DOUBLE    COMMENT 'T1维度B端1层拉回率,保留6位小数',
+    `ror2单层_b_t1`              DOUBLE    COMMENT 'T1维度B端2层拉回率,保留6位小数',
+
+    -- ===== C链 (二次分享→点击) =====
+    `cn`                         BIGINT    COMMENT 'C端拉回n层人数',
+    `rorn_c`                     DOUBLE    COMMENT 'C端n层拉回率(cn/原生n层拉回人数)',
+    `ror1_c`                     DOUBLE    COMMENT 'C端1层拉回率(c/原生n层拉回人数)',
+    `vor单层_c`                  DOUBLE    COMMENT 'C端单层视频访问率(cviews/原生n层拉回人数)',
+    `vor单层预估`                BIGINT    COMMENT 'C端单层视频访问率预估(固定为0)',
+    `vor单层copc`                BIGINT    COMMENT 'C端单层视频访问率copc(固定为0)',
+
+    -- ===== D链 (session内后续曝光传播) =====
+    `a`                          BIGINT    COMMENT 'D端去重访问人数(machinecode维度)',
+    `dn`                         BIGINT    COMMENT 'D端拉回n层人数',
+    `rorn_d`                     DOUBLE    COMMENT 'D端n层拉回率(dn/D端去重访问人数),保留6位小数',
+    `ror1_d`                     DOUBLE    COMMENT 'D端1层拉回率(d1/D端去重访问人数),保留6位小数',
+    `vor_d`                      DOUBLE    COMMENT 'D端视频访问率(g.mid计数/D端去重访问人数)'
+)
+COMMENT '用户拉活-曝光-拉回分析表'
+PARTITIONED BY (
+    dt STRING COMMENT '统计日期,格式yyyyMMdd'
+);
+
+
+-- =====================================================================
+-- CTE: 基础样本提取 + 特征工程
+-- =====================================================================
+
+WITH
+-- [1] 原始样本: 从推荐算法样本表取数,解析 scoresMap
+t_raw AS (
+    SELECT  *
+            ,REPLACE(GET_JSON_OBJECT(extend_alg, '$.scoresMap'), "\\", "") AS scoresmap
+            ,CASE
+                WHEN page IN ("回流后沉浸页&内页feed", "详情后沉浸页", "首页feed", "详情页") THEN "推荐"
+                WHEN page IN ("回流页", "其他")                                               THEN "非推荐"
+                ELSE "其他"
+             END AS page_type
+    FROM    loghubods.dwd_recsys_alg_sample_all_20250212
+    WHERE   dt = '${bizdate}'
+    -- AND     apptype IN ("0","4")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+    AND     extend_alg IS NOT NULL
+    AND     GET_JSON_OBJECT(extend_alg, '$.scoresMap') IS NOT NULL
+)
+
+-- [2] 过滤: 只保留推荐页面
+,t_filtered AS (
+    SELECT  *
+    FROM    t_raw
+    WHERE   page_type = "推荐"
+)
+
+-- [3] 特征提取: 维度映射 + 模型分数解析
+,t_base AS (
+    SELECT  dt
+            ,apptype
+            ,CASE
+                WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+                WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
+                WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准"
+                WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+                WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "ab4-5"
+                ELSE "其他"
+             END AS abcode
+            ,page_type AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+            ,scoresmap
+            ,subsessionid
+            ,CAST(GET_JSON_OBJECT(scoresmap, '$.fmRov') AS DOUBLE)                            AS str_pred
+            ,1.22 * POW(CAST(GET_JSON_OBJECT(scoresmap, '$.NorXGBScore') AS DOUBLE), 1.15)    AS rosn_pred
+            ,CAST(GET_JSON_OBJECT(scoresmap, '$.hasReturnRovScore') AS DOUBLE)                 AS rosn_stat
+            ,GET_JSON_OBJECT(v1_feature, '$.title')                                            AS vid_title
+    FROM    t_filtered
+)
+
+
+-- =====================================================================
+-- 主查询: INSERT OVERWRITE → 多维度 CUBE 聚合
+-- =====================================================================
+
+-- SELECT * FROM loghubods.recommend_distribution_data_total WHERE dt = 20260204 ORDER BY 访问人数 DESC
+
+INSERT OVERWRITE TABLE loghubods.recommend_distribution_data_total PARTITION (dt = '${bizdate}')
+SELECT
+        -- ==================== 维度列 ====================
+
+        -- 用户拉活量分层
+        CASE WHEN grouping(
+                    COALESCE(
+                        CASE
+                            WHEN e.type IS NULL OR e.type = 'R_0'                       THEN 'R0&新用户'
+                            WHEN e.type IN ('R_1','R_2_10','R_10_50')                    THEN 'R1-50'
+                            WHEN e.type IN ('R_50_100','R_100_180','R_180_330')           THEN 'R_180_330'
+                            ELSE e.type
+                        END
+                    , '-')
+                 ) = 1
+             THEN 'SUM'
+             ELSE NVL(
+                    COALESCE(
+                        CASE
+                            WHEN e.type IS NULL OR e.type = 'R_0'                       THEN 'R0&新用户'
+                            WHEN e.type IN ('R_1','R_2_10','R_10_50')                    THEN 'R1-50'
+                            WHEN e.type IN ('R_50_100','R_100_180','R_180_330')           THEN 'R_180_330'
+                            ELSE e.type
+                        END
+                    , '-')
+                 , 'SUM')
+        END                                                                     AS 用户拉活量分层
+
+        -- 进入小时
+        ,CASE WHEN grouping(
+                    COALESCE(
+                        CASE
+                            WHEN in_hour >= 1  AND in_hour <= 4  THEN '1-4'
+                            WHEN in_hour >= 5  AND in_hour <= 8  THEN '5-8'
+                            WHEN in_hour >= 9  AND in_hour <= 12 THEN '9-12'
+                            WHEN in_hour >= 13 AND in_hour <= 16 THEN '13-16'
+                            WHEN in_hour >= 17 AND in_hour <= 20 THEN '17-20'
+                            WHEN in_hour >= 21 AND in_hour <= 24 THEN '21-24'
+                            ELSE '-'
+                        END
+                    , '-')
+                 ) = 1
+              THEN 'SUM'
+              ELSE NVL(
+                    COALESCE(
+                        CASE
+                            WHEN in_hour >= 1  AND in_hour <= 4  THEN '1-4'
+                            WHEN in_hour >= 5  AND in_hour <= 8  THEN '5-8'
+                            WHEN in_hour >= 9  AND in_hour <= 12 THEN '9-12'
+                            WHEN in_hour >= 13 AND in_hour <= 16 THEN '13-16'
+                            WHEN in_hour >= 17 AND in_hour <= 20 THEN '17-20'
+                            WHEN in_hour >= 21 AND in_hour <= 24 THEN '21-24'
+                            ELSE '-'
+                        END
+                    , '-')
+                 , 'SUM')
+        END                                                                     AS 进入小时
+
+        -- 进入场景
+        ,CASE WHEN grouping(CASE WHEN f.scene_id IS NOT NULL THEN f.scene_desc ELSE '其他' END) = 1
+              THEN 'SUM'
+              ELSE NVL(CASE WHEN f.scene_id IS NOT NULL THEN f.scene_desc ELSE '其他' END, 'SUM')
+        END                                                                     AS 进入场景
+
+        -- 进入内容品类
+        ,CASE WHEN grouping(CASE WHEN m.merge_leve2 IS NOT NULL THEN d.merge_leve2 ELSE '其他' END) = 1
+              THEN 'SUM'
+              ELSE NVL(CASE WHEN m.merge_leve2 IS NOT NULL THEN d.merge_leve2 ELSE '其他' END, 'SUM')
+        END                                                                     AS 进入内容品类
+
+        -- 推荐内容品类
+        ,CASE WHEN grouping(CASE WHEN l.merge_leve2 IS NOT NULL THEN h.merge_leve2 ELSE '其他' END) = 1
+              THEN 'SUM'
+              ELSE NVL(CASE WHEN l.merge_leve2 IS NOT NULL THEN h.merge_leve2 ELSE '其他' END, 'SUM')
+        END                                                                     AS 推荐内容品类
+
+        -- 内容id
+        ,CASE WHEN grouping(COALESCE(CASE WHEN i.merge_leve2 IS NOT NULL THEN i.videoid ELSE '其他' END, '-')) = 1
+              THEN 'SUM'
+              ELSE NVL(COALESCE(CASE WHEN i.merge_leve2 IS NOT NULL THEN i.videoid ELSE '其他' END, '-'), 'SUM')
+        END                                                                     AS 内容id
+
+        -- ==================== 基础流量指标 ====================
+        ,COUNT(DISTINCT a.machinecode)                                          AS 访问人数
+        ,COUNT(DISTINCT j.mid)                                                  AS 曝光人数
+        ,COUNT(DISTINCT j.mid) / COUNT(DISTINCT a.machinecode)                  AS 曝光人数比访问人数
+        ,COUNT(j.mid)                                                           AS 曝光次数
+        ,COUNT(j.mid) / COUNT(DISTINCT j.mid)                                   AS 曝光次数人均
+        ,COUNT(DISTINCT j.vid)                                                  AS 视频个数
+        ,COUNT(j.mid) / COUNT(DISTINCT j.vid)                                   AS 单视频曝光次数
+
+        -- ==================== 全链路拉回 ====================
+        ,SUM(return_n_uv_noself) + SUM(cc.cn) + SUM(dd.dn)                     AS ALL拉回n层
+        ,ROUND(COALESCE(
+            (SUM(return_n_uv_noself) + SUM(cc.cn) + SUM(dd.dn)) / COUNT(j.mid)
+        , 0), 6)                                                                AS ALL拉回n层比曝光次数
+
+        -- ==================== 原生拉回 & rovn ====================
+        ,SUM(return_n_uv_noself)                                                AS return_n_uv_noself
+        ,ROUND(SUM(return_n_uv_noself) / COUNT(j.mid), 6)                      AS rovn
+        ,ROUND(AVG(str_pred * rosn_pred), 6)                                    AS rovn_pred
+        ,ROUND(
+            (SUM(return_n_uv_noself) / COUNT(j.mid))
+            / NULLIF(AVG(str_pred * rosn_pred), 0)
+        , 4)                                                                    AS rovn_copc
+
+        -- ==================== STR 指标 ====================
+        ,ROUND(COALESCE(SUM(is_return_noself) / COUNT(j.mid), 0), 6)           AS str_real
+        ,ROUND(COALESCE(SUM(str_pred)         / COUNT(j.mid), 0), 6)           AS str_pred
+        ,ROUND(
+            (SUM(is_return_noself) / COUNT(j.mid))
+            / NULLIF(SUM(str_pred) / COUNT(j.mid), 0)
+        , 4)                                                                    AS str_copc
+        ,ROUND(AVG(ABS(str_pred - is_return_noself)), 6)                       AS str_MAE
+        ,ROUND(VARIANCE(str_pred - is_return_noself), 6)                       AS STR_VAR
+        ,0                                                                      AS strauc
+        ,0                                                                      AS str多维加权copc
+
+        -- ==================== ROSN 指标 ====================
+        ,ROUND(COALESCE(
+            SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0)
+        , 0), 6)                                                                AS rosn_real
+        ,ROUND(COALESCE(SUM(rosn_pred) / COUNT(j.mid), 0), 6)                 AS rosn_pred
+        ,ROUND(
+            (SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0))
+            / NULLIF(SUM(rosn_pred) / COUNT(1), 0)
+        , 4)                                                                    AS rosn_copc
+        ,ROUND(AVG(
+            CASE WHEN is_return_noself = 1
+                 THEN ABS(rosn_pred - return_n_uv_noself)
+            END
+        ), 6)                                                                   AS rosn_实际预估diff绝对距离
+        ,ROUND(VARIANCE(
+            CASE WHEN is_return_noself = 1
+                 THEN rosn_pred - return_n_uv_noself
+            END
+        ), 6)                                                                   AS rosn_实际预估高低估分布比例
+
+        -- ==================== B链: 分享→点击 ====================
+        ,SUM(is_return_noself)                                                  AS Buv1层拉回人数
+        ,ROUND(SUM(is_return_noself) / COUNT(j.mid), 6)                        AS rov0
+        ,0                                                                      AS rov0预估
+        ,0                                                                      AS rov0copc
+        ,ROUND(COALESCE(
+            SUM(is_return_noself) / NULLIF(SUM(share_cnt), 0)
+        , 0), 6)                                                                AS ros0
+        ,0                                                                      AS ros0预估
+        ,0                                                                      AS ros0copc
+        ,SUM(return_n_uv_noself)                                                AS Bnuvn层拉回人数
+        ,ROUND(COALESCE(SUM(return_n_uv_noself) / SUM(is_return_noself), 0), 6) AS rorn_b
+        ,ROUND(COALESCE(SUM(k.b1)               / SUM(is_return_noself), 0), 6) AS ror1单层_b
+        ,ROUND(COALESCE(SUM(k.b2)               / SUM(k.b1),             0), 6) AS ror2单层_b
+
+        -- ==================== B链 T+1 ====================
+        ,ROUND(COALESCE(SUM(k1.bn), 0), 6)                                     AS Bnuvn层拉回人数_t1
+        ,ROUND(COALESCE(SUM(k1.bn) / SUM(k1.b),  0), 6)                       AS rorn_b_t1
+        ,ROUND(COALESCE(SUM(k1.b1) / SUM(k1.b),  0), 6)                       AS ror1单层_b_t1
+        ,ROUND(COALESCE(SUM(k1.b2) / SUM(k1.b1), 0), 6)                       AS ror2单层_b_t1
+
+        -- ==================== C链: 二次分享 ====================
+        ,SUM(cc.cn)                                                             AS cn
+        ,SUM(cc.cn)     / SUM(return_n_uv_noself)                              AS rorn_c
+        ,SUM(cc.c)      / SUM(return_n_uv_noself)                              AS ror1_c
+        ,SUM(cc.cviews) / SUM(return_n_uv_noself)                              AS vor单层_c
+        ,0                                                                      AS vor单层预估
+        ,0                                                                      AS vor单层copc
+
+        -- ==================== D链: session内后续曝光传播 ====================
+        ,COUNT(DISTINCT b.machinecode)                                          AS a
+        ,SUM(dd.dn)                                                             AS dn
+        ,ROUND(COALESCE(SUM(dd.dn) / COUNT(DISTINCT b.machinecode), 0), 6)    AS rorn_d
+        ,ROUND(COALESCE(SUM(dd.d1) / COUNT(DISTINCT b.machinecode), 0), 6)    AS ror1_d
+        ,COUNT(g.mid) / COUNT(DISTINCT b.machinecode)                           AS vor_d
+
+
+-- =====================================================================
+-- FROM + JOINs
+-- =====================================================================
+
+FROM    (
+            -- [a] 用户活跃日志 + 进入小时
+            SELECT  *
+                    ,TO_CHAR(FROM_UNIXTIME(a.clienttimestamp / 1000), 'HH') + 1 AS in_hour
+            FROM    loghubods.useractive_log a
+            WHERE   a.dt = '${bizdate}'
+        ) a
+
+-- [b] 分享点击日志 (topic=click)
+LEFT JOIN (
+            SELECT  DISTINCT
+                    machinecode
+                    ,sessionid
+                    ,subsessionid
+                    ,pagesource
+                    ,topic
+                    ,shareid
+                    ,clickobjectid
+                    ,shareobjectid
+                    ,clientip
+                    ,CASE
+                        WHEN topic = 'click' THEN clickobjectid
+                        WHEN topic = 'share' THEN shareobjectid
+                     END AS videoid
+                    ,sharetitleid
+                    ,sharetitle
+                    ,shareimageurl
+                    ,rootsharemid
+                    ,clienttimestamp
+                    ,FROM_UNIXTIME(clienttimestamp / 1000)                                         AS click_time
+                    ,GET_JSON_OBJECT(videocdm.Ip2RegionAnalyse(clientip), "$.province")            AS click_province
+                    ,GET_JSON_OBJECT(videocdm.Ip2RegionAnalyse(clientip), "$.city")                AS click_city
+                    ,TO_CHAR(FROM_UNIXTIME(clienttimestamp / 1000), 'HH') + 1                     AS click_hour
+            FROM    loghubods.user_share_log
+            WHERE   dt = '${bizdate}'
+            AND     topic = 'click'
+        ) b
+ON      a.machinecode  = b.machinecode
+AND     a.subsessionid = b.subsessionid
+
+-- [c] 视频元信息
+LEFT JOIN videoods.wx_video c
+ON      b.videoid = c.id
+
+-- [d] 点击视频品类标签
+LEFT JOIN loghubods.video_merge_tag d
+ON      b.videoid = d.videoid
+
+-- [e] 用户拉活量分层 (R0/R1-50/R_180_330/...)
+LEFT JOIN (
+            SELECT  DISTINCT type, openid
+            FROM    loghubods.mid_share_return_people_1year
+            WHERE   dt = '${bizdate-1}'
+            AND     type IS NOT NULL
+            AND     type != 'S_ALL'
+            AND     type NOT REGEXP 'R50'
+        ) e
+ON      SUBSTRING_INDEX(a.machinecode, 'weixin_openid_', -1) = e.openid
+
+-- [f] TOP5 进入场景
+LEFT JOIN (
+            SELECT  a.scene_id
+                    ,a.scene_desc
+                    ,COUNT(DISTINCT b.machinecode) AS cnt
+            FROM    loghubods.wechat_miniprogram_scene_simple a
+            LEFT JOIN loghubods.useractive_log b
+            ON      a.scene_id = b.sencetype
+            AND     b.dt = '${bizdate}'
+            GROUP BY a.scene_id, a.scene_desc
+            ORDER BY cnt DESC
+            LIMIT   5
+        ) f
+ON      a.sencetype = f.scene_id
+
+-- [g] 推荐页视频播放
+LEFT JOIN (
+            SELECT  DISTINCT mid, subsessionid, videoid
+            FROM    loghubods.video_action_log_rp
+            WHERE   dt = '${bizdate}'
+            AND     businesstype = 'videoView'
+            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+        ) g
+ON      b.machinecode  = g.mid
+AND     b.subsessionid = g.subsessionid
+
+-- [h] 推荐视频品类标签
+LEFT JOIN loghubods.video_merge_tag h
+ON      g.videoid = h.videoid
+
+-- [i] 热门视频 (按品类 TOP1, 曝光>10w)
+LEFT JOIN (
+            SELECT  merge_leve2, videoid
+            FROM    (
+                        SELECT  b.merge_leve2
+                                ,a.videoid
+                                ,COUNT(1)                                                        AS view
+                                ,ROW_NUMBER() OVER (PARTITION BY b.merge_leve2 ORDER BY COUNT(1) DESC) AS rank
+                        FROM    loghubods.video_action_log_rp a
+                        LEFT JOIN loghubods.video_merge_tag b
+                        ON      a.videoid = b.videoid
+                        WHERE   dt = '${bizdate}'
+                        AND     businesstype = 'videoView'
+                        AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                        GROUP BY b.merge_leve2, a.videoid
+                        HAVING  view > 100000
+                    )
+            WHERE   rank <= 1
+        ) i
+ON      g.videoid = i.videoid
+
+-- [j] 曝光样本 (t_base CTE)
+LEFT JOIN t_base j
+ON      g.mid          = j.mid
+AND     g.subsessionid = j.subsessionid
+AND     g.videoid      = j.vid
+
+-- [k] B链: 当天分享→点击裂变 (sharedepth 1~4)
+LEFT JOIN (
+            SELECT  a.machinecode                                                               AS mid
+                    ,a.subsessionid
+                    ,a.videoid                                                                  AS vid
+                    ,COUNT(DISTINCT CASE WHEN a.machinecode <> b.machinecode                              THEN b.machinecode END) AS bn
+                    ,COUNT(DISTINCT CASE WHEN b.sharedepth = 1 AND a.machinecode <> b.machinecode         THEN b.machinecode END) AS b
+                    ,COUNT(DISTINCT CASE WHEN b.sharedepth = 2 AND a.machinecode <> b.machinecode         THEN b.machinecode END) AS b1
+                    ,COUNT(DISTINCT CASE WHEN b.sharedepth = 3 AND a.machinecode <> b.machinecode         THEN b.machinecode END) AS b2
+                    ,COUNT(DISTINCT CASE WHEN b.sharedepth = 4 AND a.machinecode <> b.machinecode         THEN b.machinecode END) AS b3
+            FROM    (
+                        SELECT  DISTINCT machinecode, shareobjectid AS videoid, recomTraceId,
+                                subsessionid, sharedepth, shareid
+                        FROM    loghubods.user_share_log
+                        WHERE   dt = '${bizdate}'
+                        AND     topic = 'share'
+                        AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                    ) a
+            LEFT JOIN (
+                        SELECT  DISTINCT machinecode, clickobjectid, recomTraceId,
+                                subsessionid, sharedepth, rootshareid
+                        FROM    loghubods.user_share_log
+                        WHERE   dt = '${bizdate}'
+                        AND     topic = 'click'
+                    ) b
+            ON      a.shareid = b.rootshareid
+            GROUP BY a.machinecode, a.subsessionid, a.videoid
+        ) k
+ON      g.mid          = k.mid
+AND     g.subsessionid = k.subsessionid
+AND     g.videoid      = k.vid
+
+-- [k1] B链 T+1: 跨天分享→点击裂变
+LEFT JOIN (
+            SELECT  a.machinecode                                                               AS mid
+                    ,a.subsessionid
+                    ,a.videoid                                                                  AS vid
+                    ,COUNT(DISTINCT CASE WHEN a.machinecode <> b.machinecode                              THEN b.machinecode END) AS bn
+                    ,COUNT(DISTINCT CASE WHEN b.sharedepth = 1 AND a.machinecode <> b.machinecode         THEN b.machinecode END) AS b
+                    ,COUNT(DISTINCT CASE WHEN b.sharedepth = 2 AND a.machinecode <> b.machinecode         THEN b.machinecode END) AS b1
+                    ,COUNT(DISTINCT CASE WHEN b.sharedepth = 3 AND a.machinecode <> b.machinecode         THEN b.machinecode END) AS b2
+                    ,COUNT(DISTINCT CASE WHEN b.sharedepth = 4 AND a.machinecode <> b.machinecode         THEN b.machinecode END) AS b3
+            FROM    (
+                        SELECT  DISTINCT machinecode, shareobjectid AS videoid, recomTraceId,
+                                subsessionid, sharedepth, shareid
+                        FROM    loghubods.user_share_log
+                        WHERE   dt = '${bizdate}'
+                        AND     topic = 'share'
+                        AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                    ) a
+            LEFT JOIN (
+                        SELECT  DISTINCT machinecode, clickobjectid, recomTraceId,
+                                subsessionid, sharedepth, rootshareid
+                        FROM    loghubods.user_share_log
+                        WHERE   dt >= '${bizdate}'
+                        AND     dt <= '${bizdate+1}'       -- T+1 跨天
+                        AND     topic = 'click'
+                    ) b
+            ON      a.shareid = b.rootshareid
+            GROUP BY a.machinecode, a.subsessionid, a.videoid
+        ) k1
+ON      g.mid          = k1.mid
+AND     g.subsessionid = k1.subsessionid
+AND     g.videoid      = k1.vid
+
+-- [l] TOP10 推荐品类 (按播放量)
+LEFT JOIN (
+            SELECT  b.merge_leve2
+                    ,COUNT(1) AS view
+            FROM    loghubods.video_action_log_rp a
+            LEFT JOIN loghubods.video_merge_tag b
+            ON      a.videoid = b.videoid
+            WHERE   dt = '${bizdate}'
+            AND     businesstype = 'videoView'
+            AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+            AND     b.merge_leve2 IS NOT NULL
+            GROUP BY b.merge_leve2
+            ORDER BY view DESC
+            LIMIT   10
+        ) l
+ON      h.merge_leve2 = l.merge_leve2
+
+-- [m] TOP1 点击品类 (按点击人数)
+LEFT JOIN (
+            SELECT  b.merge_leve2
+                    ,COUNT(DISTINCT machinecode) AS click
+            FROM    loghubods.user_share_log a
+            LEFT JOIN loghubods.video_merge_tag b
+            ON      a.clickobjectid = b.videoid
+            WHERE   dt = '${bizdate}'
+            AND     topic = 'click'
+            AND     b.merge_leve2 IS NOT NULL
+            GROUP BY b.merge_leve2
+            ORDER BY click DESC
+            LIMIT   1
+        ) m
+ON      d.merge_leve2 = m.merge_leve2
+
+-- [cc] C链: 二次分享 (分享→点击→再分享→再点击 + 视频播放)
+LEFT JOIN (
+            SELECT  a.machinecode                                                               AS mid
+                    ,a.subsessionid
+                    ,a.videoid                                                                  AS vid
+                    ,COUNT(DISTINCT CASE WHEN b1.machinecode <> b2.machinecode THEN b2.machinecode END)  AS cn
+                    ,COUNT(DISTINCT
+                        CASE WHEN b2.sharedepth = 1 AND b1.machinecode <> b2.machinecode
+                             THEN b2.machinecode
+                        END
+                    )                                                                           AS c
+                    ,COUNT(c.mid)                                                               AS cviews
+            FROM    (
+                        -- 一次分享
+                        SELECT  DISTINCT machinecode, shareobjectid AS videoid, recomTraceId,
+                                subsessionid, sharedepth, shareid
+                        FROM    loghubods.user_share_log
+                        WHERE   dt = '${bizdate}'
+                        AND     topic = 'share'
+                        AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                    ) a
+            LEFT JOIN (
+                        -- 一次点击
+                        SELECT  DISTINCT machinecode, clickobjectid, recomTraceId,
+                                subsessionid, sharedepth, rootshareid
+                        FROM    loghubods.user_share_log
+                        WHERE   dt = '${bizdate}'
+                        AND     topic = 'click'
+                    ) b
+            ON      a.shareid = b.rootshareid
+            LEFT JOIN (
+                        -- 二次分享 (点击者再分享)
+                        SELECT  DISTINCT machinecode, shareobjectid, recomTraceId,
+                                subsessionid, sharedepth, shareid
+                        FROM    loghubods.user_share_log
+                        WHERE   dt = '${bizdate}'
+                        AND     topic = 'share'
+                        AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                    ) b1
+            ON      b.machinecode  = b1.machinecode
+            AND     b.subsessionid = b1.subsessionid
+            LEFT JOIN (
+                        -- 二次点击
+                        SELECT  DISTINCT machinecode, clickobjectid, recomTraceId,
+                                subsessionid, sharedepth, shareid, rootshareid
+                        FROM    loghubods.user_share_log
+                        WHERE   dt = '${bizdate}'
+                        AND     topic = 'click'
+                    ) b2
+            ON      b1.shareid = b2.rootshareid
+            LEFT JOIN (
+                        -- 二次点击者的视频播放
+                        SELECT  DISTINCT mid, subsessionid, videoid
+                        FROM    loghubods.video_action_log_rp
+                        WHERE   dt = '${bizdate}'
+                        AND     businesstype = 'videoView'
+                        AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                    ) c
+            ON      b2.machinecode = c.mid
+            AND     b2.subsessionid = c.subsessionid
+            GROUP BY a.machinecode, a.subsessionid, a.videoid
+        ) cc
+ON      g.mid          = cc.mid
+AND     g.subsessionid = cc.subsessionid
+AND     g.videoid      = cc.vid
+
+-- [dd] D链: session内后续曝光传播 (窗口函数累计后续拉回)
+LEFT JOIN (
+            SELECT  *
+                    ,COALESCE(
+                        SUM(回流)  OVER (PARTITION BY mid, subsessionid ORDER BY rn ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING)
+                    , 0) AS dn
+                    ,COALESCE(
+                        SUM(回流1) OVER (PARTITION BY mid, subsessionid ORDER BY rn ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING)
+                    , 0) AS d1
+            FROM    (
+                        SELECT  a.mid                                                           AS mid
+                                ,a.subsessionid
+                                ,a.videoid                                                      AS vid
+                                ,COUNT(DISTINCT b.shareid)                                      AS 分享次数
+                                ,COUNT(DISTINCT
+                                    CASE WHEN c.machinecode <> b.machinecode
+                                         THEN c.machinecode
+                                    END
+                                )                                                               AS 回流
+                                ,COUNT(DISTINCT
+                                    CASE WHEN c.machinecode <> b.machinecode AND c.sharedepth = 1
+                                         THEN c.machinecode
+                                    END
+                                )                                                               AS 回流1
+                                ,ROW_NUMBER() OVER (PARTITION BY a.subsessionid ORDER BY a.logtimestamp DESC) AS rn
+                        FROM    (
+                                    SELECT  DISTINCT mid, subsessionid, videoid, logtimestamp
+                                    FROM    loghubods.video_action_log_rp
+                                    WHERE   dt = '${bizdate}'
+                                    AND     businesstype = 'videoView'
+                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                ) a
+                        LEFT JOIN (
+                                    SELECT  DISTINCT machinecode, shareobjectid AS videoid, recomTraceId,
+                                            subsessionid, sharedepth, shareid, clienttimestamp
+                                    FROM    loghubods.user_share_log
+                                    WHERE   dt = '${bizdate}'
+                                    AND     topic = 'share'
+                                    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+                                ) b
+                        ON      a.mid          = b.machinecode
+                        AND     a.subsessionid = b.subsessionid
+                        AND     a.videoid      = b.videoid
+                        LEFT JOIN (
+                                    SELECT  DISTINCT machinecode, clickobjectid, recomTraceId,
+                                            subsessionid, sharedepth, rootshareid
+                                    FROM    loghubods.user_share_log
+                                    WHERE   dt = '${bizdate}'
+                                    AND     topic = 'click'
+                                ) c
+                        ON      b.shareid = c.rootshareid
+                        GROUP BY a.mid, a.subsessionid, a.videoid, a.logtimestamp
+                    )
+        ) dd
+ON      g.mid          = dd.mid
+AND     g.subsessionid = dd.subsessionid
+AND     g.videoid      = dd.vid
+
+
+-- =====================================================================
+-- GROUP BY CUBE: 全维度组合聚合
+-- =====================================================================
+
+GROUP BY CUBE(
+            -- 用户拉活量分层
+            COALESCE(
+                CASE
+                    WHEN e.type IS NULL OR e.type = 'R_0'                       THEN 'R0&新用户'
+                    WHEN e.type IN ('R_1','R_2_10','R_10_50')                    THEN 'R1-50'
+                    WHEN e.type IN ('R_50_100','R_100_180','R_180_330')           THEN 'R_180_330'
+                    ELSE e.type
+                END
+            , '-')
+
+            -- 进入小时
+            ,COALESCE(
+                CASE
+                    WHEN in_hour >= 1  AND in_hour <= 4  THEN '1-4'
+                    WHEN in_hour >= 5  AND in_hour <= 8  THEN '5-8'
+                    WHEN in_hour >= 9  AND in_hour <= 12 THEN '9-12'
+                    WHEN in_hour >= 13 AND in_hour <= 16 THEN '13-16'
+                    WHEN in_hour >= 17 AND in_hour <= 20 THEN '17-20'
+                    WHEN in_hour >= 21 AND in_hour <= 24 THEN '21-24'
+                    ELSE '-'
+                END
+            , '-')
+
+            -- 进入场景
+            ,CASE WHEN f.scene_id IS NOT NULL THEN f.scene_desc ELSE '其他' END
+
+            -- 进入内容品类
+            ,CASE WHEN m.merge_leve2 IS NOT NULL THEN d.merge_leve2 ELSE '其他' END
+
+            -- 推荐内容品类
+            ,CASE WHEN l.merge_leve2 IS NOT NULL THEN h.merge_leve2 ELSE '其他' END
+
+            -- 内容id
+            ,COALESCE(CASE WHEN i.merge_leve2 IS NOT NULL THEN i.videoid ELSE '其他' END, '-')
+        )
+
+ORDER BY 访问人数 DESC
+;

+ 324 - 0
table_gen/exposure_return_Bn.sql

@@ -0,0 +1,324 @@
+--*********************
+-- 曝光回流收益计算 (B + C 多跳版)
+-- 数学公式:
+--   B_i      = 不换视频回流人数(用 rootshareid 计算)
+--   C_1(i)   = Σ B_j, j 在 E_i 回流用户的 session 中(1跳)
+--   C_2(i)   = Σ B_k, k 在 C_1 涉及曝光的回流用户 session 中(2跳)
+--   C_3(i)   = 同理(3跳)
+--   V_total  = B + C_1 + C_2 + C_3
+--
+-- 相比递推版:
+--   1. 去掉 D(session内累积),避免指数级膨胀
+--   2. 每层 C 独立计算,不依赖上一层结果
+--   3. CTE 从 ~18 个减到 ~12 个
+--*********************
+
+WITH
+--========================================
+-- 1. 基础数据准备
+--========================================
+
+-- 1.1 回流数据(用户通过分享链接回流)
+-- 时间范围:${dt}${hh} 往后 24 小时
+t_return AS (
+    SELECT  *
+            ,CONCAT(dthh,":",shareid,":",vid,":",dthh_id) AS id
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,clickobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,shareid
+                        ,rootshareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,clickobjectid,sessionid,subsessionid,shareid,rootshareid ORDER BY clienttimestamp DESC ) AS rn
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),shareid,clickobjectid ORDER BY clienttimestamp ) AS dthh_id
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN '${dt}${hh}' AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) + 3600 * 24),'YYYYMMDDHH')
+                AND     __topic__ = 'click'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     clickobjectid IS NOT NULL
+                AND     pagesource REGEXP "-pages/user-videos-share$"
+            ) t
+    WHERE   rn = 1
+)
+
+-- 1.2 分享数据
+-- 时间范围:${dt}${hh} 往后 24 小时
+,t_share_from_sharelog AS (
+    SELECT  *
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,shareobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,pagesource
+                        ,shareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,shareobjectid,sessionid,subsessionid,pagesource,shareid ORDER BY clienttimestamp DESC ) AS rn
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN '${dt}${hh}' AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) + 3600 * 24),'YYYYMMDDHH')
+                AND     __topic__ = 'share'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     shareobjectid IS NOT NULL
+            ) t
+    WHERE   rn = 1
+)
+
+-- 1.3 曝光数据
+-- 时间范围:${dt}${hh} 单小时(曝光起点)
+,t_exposure AS (
+    SELECT  dthh_id
+            ,dthh
+            ,apptype
+            ,uid
+            ,mid
+            ,vid
+            ,sessionid
+            ,subsessionid
+            ,pagesource
+            ,ts
+            ,id
+            ,dt
+            ,hh
+    FROM    loghubods.dwd_recsys_alg_exposure_base_view_20250402
+    WHERE   CONCAT(dt,hh) = '${dt}${hh}'
+)
+
+-- 1.4 详情页曝光(用于非常规分享关联)
+,t_exposure_detail AS (
+    SELECT  *
+    FROM    t_exposure
+    WHERE   pagesource REGEXP "-pages/user-videos-detail$|pages/detail-recommend$"
+)
+
+--========================================
+-- 2. 曝光关联分享(单次 JOIN + 优先级选择)
+--========================================
+
+-- 2.1 常规分享关联曝光(一次性匹配所有优先级)
+,t_normal_share_exposure AS (
+    SELECT  *
+    FROM    (
+        SELECT  s.dthh
+                ,s.apptype
+                ,s.mid
+                ,s.vid
+                ,s.sessionid
+                ,s.subsessionid
+                ,s.pagesource
+                ,s.shareid
+                ,s.ts
+                ,e.id AS exposure_id
+                ,e.ts AS exposure_ts
+                -- 优先级:subsession+pagesource+ts > session+pagesource+ts > ...
+                ,CASE
+                    WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 1
+                    WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 2
+                    WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource THEN 3
+                    WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource THEN 4
+                    WHEN s.subsessionid = e.subsessionid THEN 5
+                    WHEN s.sessionid = e.sessionid THEN 6
+                END AS match_priority
+                ,ROW_NUMBER() OVER (
+                    PARTITION BY s.dthh,s.apptype,s.mid,s.vid,s.sessionid,s.subsessionid,s.pagesource,s.shareid
+                    ORDER BY
+                        CASE
+                            WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 1
+                            WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 2
+                            WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource THEN 3
+                            WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource THEN 4
+                            WHEN s.subsessionid = e.subsessionid THEN 5
+                            WHEN s.sessionid = e.sessionid THEN 6
+                        END
+                        ,e.ts DESC
+                ) AS rn
+        FROM    t_share_from_sharelog s
+        LEFT JOIN t_exposure e
+        ON      s.apptype = e.apptype
+        AND     s.mid = e.mid
+        AND     s.vid = e.vid
+        AND     (s.subsessionid = e.subsessionid OR s.sessionid = e.sessionid)
+        WHERE   s.pagesource NOT REGEXP "pages/detail-user-videos-share-recommend$"
+    ) t
+    WHERE   rn = 1
+)
+
+-- 2.2 非常规分享关联曝光(detail页面,一次性匹配)
+,t_no_normal_share_exposure AS (
+    SELECT  *
+    FROM    (
+        SELECT  s.dthh
+                ,s.apptype
+                ,s.mid
+                ,s.vid
+                ,s.sessionid
+                ,s.subsessionid
+                ,s.pagesource
+                ,s.shareid
+                ,s.ts
+                ,e.id AS exposure_id
+                ,e.ts AS exposure_ts
+                ,CASE
+                    WHEN s.subsessionid = e.subsessionid AND s.ts >= e.ts THEN 1
+                    WHEN s.sessionid = e.sessionid AND s.ts >= e.ts THEN 2
+                    WHEN s.subsessionid = e.subsessionid THEN 3
+                    WHEN s.sessionid = e.sessionid THEN 4
+                END AS match_priority
+                ,ROW_NUMBER() OVER (
+                    PARTITION BY s.dthh,s.apptype,s.mid,s.vid,s.sessionid,s.subsessionid,s.pagesource,s.shareid
+                    ORDER BY
+                        CASE
+                            WHEN s.subsessionid = e.subsessionid AND s.ts >= e.ts THEN 1
+                            WHEN s.sessionid = e.sessionid AND s.ts >= e.ts THEN 2
+                            WHEN s.subsessionid = e.subsessionid THEN 3
+                            WHEN s.sessionid = e.sessionid THEN 4
+                        END
+                        ,e.ts DESC
+                ) AS rn
+        FROM    t_share_from_sharelog s
+        LEFT JOIN t_exposure_detail e
+        ON      s.apptype = e.apptype
+        AND     s.mid = e.mid
+        AND     s.vid = e.vid
+        AND     (s.subsessionid = e.subsessionid OR s.sessionid = e.sessionid)
+        WHERE   s.pagesource REGEXP "pages/detail-user-videos-share-recommend$"
+    ) t
+    WHERE   rn = 1
+)
+
+--========================================
+-- 3. 合并所有分享-曝光关联
+--========================================
+,t_share_exposure AS (
+    SELECT  dthh, apptype, mid, vid, sessionid, subsessionid, pagesource, shareid, ts, exposure_id, exposure_ts
+    FROM    t_normal_share_exposure
+    UNION ALL
+    SELECT  dthh, apptype, mid, vid, sessionid, subsessionid, pagesource, shareid, ts, exposure_id, exposure_ts
+    FROM    t_no_normal_share_exposure
+)
+
+--========================================
+-- 4. 分享关联回流 + B 值计算
+--========================================
+
+-- 4.1 分享关联回流(exposure_id → return_subsessionid)
+,t_share_return AS (
+    SELECT  se.exposure_id
+            ,se.shareid
+            ,se.vid
+            ,se.apptype
+            ,se.subsessionid
+            ,r.subsessionid AS return_subsessionid
+            ,r.mid AS return_mid
+    FROM    t_share_exposure se
+    JOIN    t_return r
+    ON      se.shareid = r.rootshareid
+    AND     se.vid = r.vid
+    AND     se.apptype = r.apptype
+)
+
+-- 4.2 每个曝光的 B 值和 bn_subsessions
+,t_exposure_bn AS (
+    SELECT  e.id AS exposure_id
+            ,e.subsessionid
+            ,e.ts
+            ,e.vid
+            ,e.uid
+            ,e.mid
+            ,COALESCE(bn.B, 0) AS B
+            ,bn.bn_subsessions
+    FROM    t_exposure e
+    LEFT JOIN (
+        SELECT  exposure_id
+                ,COUNT(DISTINCT return_mid) AS B
+                ,COLLECT_SET(return_subsessionid) AS bn_subsessions
+        FROM    t_share_return
+        GROUP BY exposure_id
+    ) bn
+    ON      e.id = bn.exposure_id
+)
+
+--========================================
+-- 5. 多跳计算(直接 JOIN,无递推)
+--========================================
+
+-- C_1: 1跳(exposure → 回流session → 那些session中曝光的B之和)
+,t_c1 AS (
+    SELECT  sr.exposure_id
+            ,SUM(eb.B) AS C_1
+    FROM    t_share_return sr
+    JOIN    t_exposure_bn eb
+    ON      sr.return_subsessionid = eb.subsessionid
+    GROUP BY sr.exposure_id
+)
+
+-- C_2: 2跳
+,t_c2 AS (
+    SELECT  sr1.exposure_id
+            ,SUM(eb2.B) AS C_2
+    FROM    t_share_return sr1
+    JOIN    t_exposure_bn eb1
+    ON      sr1.return_subsessionid = eb1.subsessionid
+    JOIN    t_share_return sr2
+    ON      eb1.exposure_id = sr2.exposure_id
+    JOIN    t_exposure_bn eb2
+    ON      sr2.return_subsessionid = eb2.subsessionid
+    GROUP BY sr1.exposure_id
+)
+
+-- C_3: 3跳
+,t_c3 AS (
+    SELECT  sr1.exposure_id
+            ,SUM(eb3.B) AS C_3
+    FROM    t_share_return sr1
+    JOIN    t_exposure_bn eb1
+    ON      sr1.return_subsessionid = eb1.subsessionid
+    JOIN    t_share_return sr2
+    ON      eb1.exposure_id = sr2.exposure_id
+    JOIN    t_exposure_bn eb2
+    ON      sr2.return_subsessionid = eb2.subsessionid
+    JOIN    t_share_return sr3
+    ON      eb2.exposure_id = sr3.exposure_id
+    JOIN    t_exposure_bn eb3
+    ON      sr3.return_subsessionid = eb3.subsessionid
+    GROUP BY sr1.exposure_id
+)
+
+--========================================
+-- 6. 最终输出
+--========================================
+SELECT  e.id AS exposure_id
+        ,e.vid
+        ,v.title AS video_title
+        ,e.uid
+        ,e.mid
+        ,e.ts
+        ,CASE WHEN se.exposure_id IS NOT NULL THEN 1 ELSE 0 END AS is_share
+        ,COALESCE(bn.B, 0) AS B
+        ,COALESCE(c1.C_1, 0) AS C_1
+        ,COALESCE(c2.C_2, 0) AS C_2
+        ,COALESCE(c3.C_3, 0) AS C_3
+        ,COALESCE(bn.B, 0) + COALESCE(c1.C_1, 0) + COALESCE(c2.C_2, 0) + COALESCE(c3.C_3, 0) AS V_total
+FROM    t_exposure e
+LEFT JOIN (SELECT DISTINCT exposure_id FROM t_share_exposure) se
+ON      e.id = se.exposure_id
+LEFT JOIN t_exposure_bn bn
+ON      e.id = bn.exposure_id
+LEFT JOIN t_c1 c1
+ON      e.id = c1.exposure_id
+LEFT JOIN t_c2 c2
+ON      e.id = c2.exposure_id
+LEFT JOIN t_c3 c3
+ON      e.id = c3.exposure_id
+LEFT JOIN videoods.wx_video v
+ON      e.vid = CAST(v.id AS STRING)
+;

+ 378 - 0
table_gen/exposure_return_Bn_v2.sql

@@ -0,0 +1,378 @@
+--*********************
+-- 曝光回流收益计算 (B + C 多跳版, BFS 去环)
+-- 数学公式:
+--   B_i      = 不换视频回流人数(用 rootshareid 计算)
+--   C_1(i)   = Σ B_j, j ∈ frontier_1(直接触达的 subsession)
+--   C_2(i)   = Σ B_k, k ∈ frontier_2(排除 hop1 已访问的)
+--   C_3(i)   = 同理(排除 hop1 + hop2 已访问的)
+--   V_total  = B + C_1 + C_2 + C_3
+--
+-- 改进: BFS frontier + anti-join 去环
+--   每个 subsession 只在最短跳数被计入,不重复
+--   解决 C_1=C_2=C_3 的环路膨胀问题
+--*********************
+
+WITH
+--========================================
+-- 1. 基础数据准备
+--========================================
+
+-- 1.1 回流数据(用户通过分享链接回流)
+-- 时间范围:${dt}${hh} 往后 24 小时
+t_return AS (
+    SELECT  *
+            ,CONCAT(dthh,":",shareid,":",vid,":",dthh_id) AS id
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,clickobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,shareid
+                        ,rootshareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,clickobjectid,sessionid,subsessionid,shareid,rootshareid ORDER BY clienttimestamp DESC ) AS rn
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),shareid,clickobjectid ORDER BY clienttimestamp ) AS dthh_id
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN '${dt}${hh}' AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) + 3600 * 24),'YYYYMMDDHH')
+                AND     __topic__ = 'click'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     clickobjectid IS NOT NULL
+                AND     pagesource REGEXP "-pages/user-videos-share$"
+            ) t
+    WHERE   rn = 1
+)
+
+-- 1.2 分享数据
+-- 时间范围:${dt}${hh} 往后 24 小时
+,t_share_from_sharelog AS (
+    SELECT  *
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,shareobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,pagesource
+                        ,shareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,shareobjectid,sessionid,subsessionid,pagesource,shareid ORDER BY clienttimestamp DESC ) AS rn
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN '${dt}${hh}' AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) + 3600 * 24),'YYYYMMDDHH')
+                AND     __topic__ = 'share'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     shareobjectid IS NOT NULL
+            ) t
+    WHERE   rn = 1
+)
+
+-- 1.3 曝光数据
+-- 时间范围:${dt}${hh} 单小时(曝光起点)
+,t_exposure AS (
+    SELECT  dthh_id
+            ,dthh
+            ,apptype
+            ,uid
+            ,mid
+            ,vid
+            ,sessionid
+            ,subsessionid
+            ,pagesource
+            ,ts
+            ,id
+            ,dt
+            ,hh
+    FROM    loghubods.dwd_recsys_alg_exposure_base_view_20250402
+    WHERE   CONCAT(dt,hh) = '${dt}${hh}'
+)
+
+-- 1.4 详情页曝光(用于非常规分享关联)
+,t_exposure_detail AS (
+    SELECT  *
+    FROM    t_exposure
+    WHERE   pagesource REGEXP "-pages/user-videos-detail$|pages/detail-recommend$"
+)
+
+--========================================
+-- 2. 曝光关联分享(单次 JOIN + 优先级选择)
+--========================================
+
+-- 2.1 常规分享关联曝光(一次性匹配所有优先级)
+,t_normal_share_exposure AS (
+    SELECT  *
+    FROM    (
+        SELECT  s.dthh
+                ,s.apptype
+                ,s.mid
+                ,s.vid
+                ,s.sessionid
+                ,s.subsessionid
+                ,s.pagesource
+                ,s.shareid
+                ,s.ts
+                ,e.id AS exposure_id
+                ,e.ts AS exposure_ts
+                -- 优先级:subsession+pagesource+ts > session+pagesource+ts > ...
+                ,CASE
+                    WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 1
+                    WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 2
+                    WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource THEN 3
+                    WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource THEN 4
+                    WHEN s.subsessionid = e.subsessionid THEN 5
+                    WHEN s.sessionid = e.sessionid THEN 6
+                END AS match_priority
+                ,ROW_NUMBER() OVER (
+                    PARTITION BY s.dthh,s.apptype,s.mid,s.vid,s.sessionid,s.subsessionid,s.pagesource,s.shareid
+                    ORDER BY
+                        CASE
+                            WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 1
+                            WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 2
+                            WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource THEN 3
+                            WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource THEN 4
+                            WHEN s.subsessionid = e.subsessionid THEN 5
+                            WHEN s.sessionid = e.sessionid THEN 6
+                        END
+                        ,e.ts DESC
+                ) AS rn
+        FROM    t_share_from_sharelog s
+        LEFT JOIN t_exposure e
+        ON      s.apptype = e.apptype
+        AND     s.mid = e.mid
+        AND     s.vid = e.vid
+        AND     (s.subsessionid = e.subsessionid OR s.sessionid = e.sessionid)
+        WHERE   s.pagesource NOT REGEXP "pages/detail-user-videos-share-recommend$"
+    ) t
+    WHERE   rn = 1
+)
+
+-- 2.2 非常规分享关联曝光(detail页面,一次性匹配)
+,t_no_normal_share_exposure AS (
+    SELECT  *
+    FROM    (
+        SELECT  s.dthh
+                ,s.apptype
+                ,s.mid
+                ,s.vid
+                ,s.sessionid
+                ,s.subsessionid
+                ,s.pagesource
+                ,s.shareid
+                ,s.ts
+                ,e.id AS exposure_id
+                ,e.ts AS exposure_ts
+                ,CASE
+                    WHEN s.subsessionid = e.subsessionid AND s.ts >= e.ts THEN 1
+                    WHEN s.sessionid = e.sessionid AND s.ts >= e.ts THEN 2
+                    WHEN s.subsessionid = e.subsessionid THEN 3
+                    WHEN s.sessionid = e.sessionid THEN 4
+                END AS match_priority
+                ,ROW_NUMBER() OVER (
+                    PARTITION BY s.dthh,s.apptype,s.mid,s.vid,s.sessionid,s.subsessionid,s.pagesource,s.shareid
+                    ORDER BY
+                        CASE
+                            WHEN s.subsessionid = e.subsessionid AND s.ts >= e.ts THEN 1
+                            WHEN s.sessionid = e.sessionid AND s.ts >= e.ts THEN 2
+                            WHEN s.subsessionid = e.subsessionid THEN 3
+                            WHEN s.sessionid = e.sessionid THEN 4
+                        END
+                        ,e.ts DESC
+                ) AS rn
+        FROM    t_share_from_sharelog s
+        LEFT JOIN t_exposure_detail e
+        ON      s.apptype = e.apptype
+        AND     s.mid = e.mid
+        AND     s.vid = e.vid
+        AND     (s.subsessionid = e.subsessionid OR s.sessionid = e.sessionid)
+        WHERE   s.pagesource REGEXP "pages/detail-user-videos-share-recommend$"
+    ) t
+    WHERE   rn = 1
+)
+
+--========================================
+-- 3. 合并所有分享-曝光关联
+--========================================
+,t_share_exposure AS (
+    SELECT  dthh, apptype, mid, vid, sessionid, subsessionid, pagesource, shareid, ts, exposure_id, exposure_ts
+    FROM    t_normal_share_exposure
+    UNION ALL
+    SELECT  dthh, apptype, mid, vid, sessionid, subsessionid, pagesource, shareid, ts, exposure_id, exposure_ts
+    FROM    t_no_normal_share_exposure
+)
+
+--========================================
+-- 4. 分享关联回流 + B 值计算
+--========================================
+
+-- 4.1 分享关联回流(exposure_id → return_subsessionid)
+,t_share_return AS (
+    SELECT  se.exposure_id
+            ,se.shareid
+            ,se.vid
+            ,se.apptype
+            ,se.subsessionid
+            ,r.subsessionid AS return_subsessionid
+            ,r.mid AS return_mid
+    FROM    t_share_exposure se
+    JOIN    t_return r
+    ON      se.shareid = r.rootshareid
+    AND     se.vid = r.vid
+    AND     se.apptype = r.apptype
+)
+
+-- 4.2 每个曝光的 B 值和 bn_subsessions
+,t_exposure_bn AS (
+    SELECT  e.id AS exposure_id
+            ,e.subsessionid
+            ,e.ts
+            ,e.vid
+            ,e.uid
+            ,e.mid
+            ,COALESCE(bn.B, 0) AS B
+            ,bn.bn_subsessions
+    FROM    t_exposure e
+    LEFT JOIN (
+        SELECT  exposure_id
+                ,COUNT(DISTINCT return_mid) AS B
+                ,COLLECT_SET(return_subsessionid) AS bn_subsessions
+        FROM    t_share_return
+        GROUP BY exposure_id
+    ) bn
+    ON      e.id = bn.exposure_id
+)
+
+--========================================
+-- 5. 多跳计算(BFS frontier + anti-join 去环)
+--   每个 subsession 只在最短跳数被计入
+--========================================
+
+-- Frontier 1: 源曝光直接触达的 subsession(去重)
+,t_frontier_1 AS (
+    SELECT DISTINCT exposure_id AS source_id, return_subsessionid AS reached_sub
+    FROM   t_share_return
+)
+
+-- C_1: hop1 触达的 subsession 中曝光的 B 之和
+,t_c1 AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS C_1
+    FROM    t_frontier_1 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+
+-- C_1 涉及的回流 MID(去重)
+,t_c1_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,CONCAT_WS(',', COLLECT_SET(sr.return_mid)) AS C_1_mids
+    FROM    t_frontier_1 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+-- Frontier 2: 从 hop1 延伸一跳,排除 hop1 已访问的 subsession
+,t_frontier_2 AS (
+    SELECT DISTINCT f1.source_id, sr2.return_subsessionid AS reached_sub
+    FROM    t_frontier_1 f1
+    JOIN    t_exposure_bn eb1 ON f1.reached_sub = eb1.subsessionid
+    JOIN    t_share_return sr2 ON eb1.exposure_id = sr2.exposure_id
+    LEFT JOIN t_frontier_1 v1
+        ON  f1.source_id = v1.source_id
+        AND sr2.return_subsessionid = v1.reached_sub
+    WHERE   v1.source_id IS NULL
+)
+
+-- C_2: hop2 触达的 subsession 中曝光的 B 之和
+,t_c2 AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS C_2
+    FROM    t_frontier_2 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+
+-- C_2 涉及的回流 MID(去重)
+,t_c2_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,CONCAT_WS(',', COLLECT_SET(sr.return_mid)) AS C_2_mids
+    FROM    t_frontier_2 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+-- Frontier 3: 从 hop2 延伸一跳,排除 hop1 + hop2 已访问的
+,t_frontier_3 AS (
+    SELECT DISTINCT f2.source_id, sr3.return_subsessionid AS reached_sub
+    FROM    t_frontier_2 f2
+    JOIN    t_exposure_bn eb2 ON f2.reached_sub = eb2.subsessionid
+    JOIN    t_share_return sr3 ON eb2.exposure_id = sr3.exposure_id
+    LEFT JOIN t_frontier_1 v1
+        ON  f2.source_id = v1.source_id
+        AND sr3.return_subsessionid = v1.reached_sub
+    LEFT JOIN t_frontier_2 v2
+        ON  f2.source_id = v2.source_id
+        AND sr3.return_subsessionid = v2.reached_sub
+    WHERE   v1.source_id IS NULL AND v2.source_id IS NULL
+)
+
+-- C_3: hop3 触达的 subsession 中曝光的 B 之和
+,t_c3 AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS C_3
+    FROM    t_frontier_3 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+
+-- C_3 涉及的回流 MID(去重)
+,t_c3_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,CONCAT_WS(',', COLLECT_SET(sr.return_mid)) AS C_3_mids
+    FROM    t_frontier_3 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+--========================================
+-- 6. 最终输出
+--========================================
+SELECT  e.id AS exposure_id
+        ,e.vid
+        ,v.title AS video_title
+        ,e.uid
+        ,e.mid
+        ,e.ts
+        ,CASE WHEN se.exposure_id IS NOT NULL THEN 1 ELSE 0 END AS is_share
+        ,COALESCE(bn.B, 0) AS B
+        ,COALESCE(c1.C_1, 0) AS C_1
+        ,c1m.C_1_mids
+        ,COALESCE(c2.C_2, 0) AS C_2
+        ,c2m.C_2_mids
+        ,COALESCE(c3.C_3, 0) AS C_3
+        ,c3m.C_3_mids
+        ,COALESCE(bn.B, 0) + COALESCE(c1.C_1, 0) + COALESCE(c2.C_2, 0) + COALESCE(c3.C_3, 0) AS V_total
+FROM    t_exposure e
+LEFT JOIN (SELECT DISTINCT exposure_id FROM t_share_exposure) se
+ON      e.id = se.exposure_id
+LEFT JOIN t_exposure_bn bn
+ON      e.id = bn.exposure_id
+LEFT JOIN t_c1 c1
+ON      e.id = c1.exposure_id
+LEFT JOIN t_c1_mids c1m
+ON      e.id = c1m.exposure_id
+LEFT JOIN t_c2 c2
+ON      e.id = c2.exposure_id
+LEFT JOIN t_c2_mids c2m
+ON      e.id = c2m.exposure_id
+LEFT JOIN t_c3 c3
+ON      e.id = c3.exposure_id
+LEFT JOIN t_c3_mids c3m
+ON      e.id = c3m.exposure_id
+LEFT JOIN videoods.wx_video v
+ON      e.vid = CAST(v.id AS STRING)
+;

+ 654 - 0
table_gen/exposure_return_Bn_v3.sql

@@ -0,0 +1,654 @@
+--*********************
+-- 曝光回流收益计算 (B + C 多跳版, BFS 去环, 24h + 48h)
+-- 同时输出 24h 和 48h 窗口的 B/C/mids
+--*********************
+
+WITH
+--========================================
+-- 1. 基础数据准备
+--========================================
+
+-- 1.1 回流数据 24h
+t_return AS (
+    SELECT  *
+            ,CONCAT(dthh,":",shareid,":",vid,":",dthh_id) AS id
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,clickobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,shareid
+                        ,rootshareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,clickobjectid,sessionid,subsessionid,shareid,rootshareid ORDER BY clienttimestamp DESC ) AS rn
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),shareid,clickobjectid ORDER BY clienttimestamp ) AS dthh_id
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN '${dt}${hh}' AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) + 3600 * 24),'YYYYMMDDHH')
+                AND     __topic__ = 'click'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     clickobjectid IS NOT NULL
+                AND     pagesource REGEXP "-pages/user-videos-share$"
+            ) t
+    WHERE   rn = 1
+)
+
+-- 1.1b 回流数据 48h
+,t_return_48h AS (
+    SELECT  *
+            ,CONCAT(dthh,":",shareid,":",vid,":",dthh_id) AS id
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,clickobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,shareid
+                        ,rootshareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,clickobjectid,sessionid,subsessionid,shareid,rootshareid ORDER BY clienttimestamp DESC ) AS rn
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),shareid,clickobjectid ORDER BY clienttimestamp ) AS dthh_id
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN '${dt}${hh}' AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) + 3600 * 48),'YYYYMMDDHH')
+                AND     __topic__ = 'click'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     clickobjectid IS NOT NULL
+                AND     pagesource REGEXP "-pages/user-videos-share$"
+            ) t
+    WHERE   rn = 1
+)
+
+-- 1.2 分享数据 24h
+,t_share_from_sharelog AS (
+    SELECT  *
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,shareobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,pagesource
+                        ,shareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,shareobjectid,sessionid,subsessionid,pagesource,shareid ORDER BY clienttimestamp DESC ) AS rn
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN '${dt}${hh}' AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) + 3600 * 24),'YYYYMMDDHH')
+                AND     __topic__ = 'share'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     shareobjectid IS NOT NULL
+            ) t
+    WHERE   rn = 1
+)
+
+-- 1.2b 分享数据 48h
+,t_share_from_sharelog_48h AS (
+    SELECT  *
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,shareobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,pagesource
+                        ,shareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,shareobjectid,sessionid,subsessionid,pagesource,shareid ORDER BY clienttimestamp DESC ) AS rn
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN '${dt}${hh}' AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) + 3600 * 48),'YYYYMMDDHH')
+                AND     __topic__ = 'share'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     shareobjectid IS NOT NULL
+            ) t
+    WHERE   rn = 1
+)
+
+-- 1.3 曝光数据(共用)
+,t_exposure AS (
+    SELECT  dthh_id
+            ,dthh
+            ,apptype
+            ,uid
+            ,mid
+            ,vid
+            ,sessionid
+            ,subsessionid
+            ,pagesource
+            ,ts
+            ,id
+            ,dt
+            ,hh
+    FROM    loghubods.dwd_recsys_alg_exposure_base_view_20250402
+    WHERE   CONCAT(dt,hh) = '${dt}${hh}'
+)
+
+-- 1.4 详情页曝光(共用)
+,t_exposure_detail AS (
+    SELECT  *
+    FROM    t_exposure
+    WHERE   pagesource REGEXP "-pages/user-videos-detail$|pages/detail-recommend$"
+)
+
+--========================================
+-- 2. 曝光关联分享 24h
+--========================================
+,t_normal_share_exposure AS (
+    SELECT  *
+    FROM    (
+        SELECT  s.dthh
+                ,s.apptype
+                ,s.mid
+                ,s.vid
+                ,s.sessionid
+                ,s.subsessionid
+                ,s.pagesource
+                ,s.shareid
+                ,s.ts
+                ,e.id AS exposure_id
+                ,e.ts AS exposure_ts
+                ,CASE
+                    WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 1
+                    WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 2
+                    WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource THEN 3
+                    WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource THEN 4
+                    WHEN s.subsessionid = e.subsessionid THEN 5
+                    WHEN s.sessionid = e.sessionid THEN 6
+                END AS match_priority
+                ,ROW_NUMBER() OVER (
+                    PARTITION BY s.dthh,s.apptype,s.mid,s.vid,s.sessionid,s.subsessionid,s.pagesource,s.shareid
+                    ORDER BY
+                        CASE
+                            WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 1
+                            WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 2
+                            WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource THEN 3
+                            WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource THEN 4
+                            WHEN s.subsessionid = e.subsessionid THEN 5
+                            WHEN s.sessionid = e.sessionid THEN 6
+                        END
+                        ,e.ts DESC
+                ) AS rn
+        FROM    t_share_from_sharelog s
+        LEFT JOIN t_exposure e
+        ON      s.apptype = e.apptype
+        AND     s.mid = e.mid
+        AND     s.vid = e.vid
+        AND     (s.subsessionid = e.subsessionid OR s.sessionid = e.sessionid)
+        WHERE   s.pagesource NOT REGEXP "pages/detail-user-videos-share-recommend$"
+    ) t
+    WHERE   rn = 1
+)
+
+,t_no_normal_share_exposure AS (
+    SELECT  *
+    FROM    (
+        SELECT  s.dthh
+                ,s.apptype
+                ,s.mid
+                ,s.vid
+                ,s.sessionid
+                ,s.subsessionid
+                ,s.pagesource
+                ,s.shareid
+                ,s.ts
+                ,e.id AS exposure_id
+                ,e.ts AS exposure_ts
+                ,CASE
+                    WHEN s.subsessionid = e.subsessionid AND s.ts >= e.ts THEN 1
+                    WHEN s.sessionid = e.sessionid AND s.ts >= e.ts THEN 2
+                    WHEN s.subsessionid = e.subsessionid THEN 3
+                    WHEN s.sessionid = e.sessionid THEN 4
+                END AS match_priority
+                ,ROW_NUMBER() OVER (
+                    PARTITION BY s.dthh,s.apptype,s.mid,s.vid,s.sessionid,s.subsessionid,s.pagesource,s.shareid
+                    ORDER BY
+                        CASE
+                            WHEN s.subsessionid = e.subsessionid AND s.ts >= e.ts THEN 1
+                            WHEN s.sessionid = e.sessionid AND s.ts >= e.ts THEN 2
+                            WHEN s.subsessionid = e.subsessionid THEN 3
+                            WHEN s.sessionid = e.sessionid THEN 4
+                        END
+                        ,e.ts DESC
+                ) AS rn
+        FROM    t_share_from_sharelog s
+        LEFT JOIN t_exposure_detail e
+        ON      s.apptype = e.apptype
+        AND     s.mid = e.mid
+        AND     s.vid = e.vid
+        AND     (s.subsessionid = e.subsessionid OR s.sessionid = e.sessionid)
+        WHERE   s.pagesource REGEXP "pages/detail-user-videos-share-recommend$"
+    ) t
+    WHERE   rn = 1
+)
+
+,t_share_exposure AS (
+    SELECT  dthh, apptype, mid, vid, sessionid, subsessionid, pagesource, shareid, ts, exposure_id, exposure_ts
+    FROM    t_normal_share_exposure
+    UNION ALL
+    SELECT  dthh, apptype, mid, vid, sessionid, subsessionid, pagesource, shareid, ts, exposure_id, exposure_ts
+    FROM    t_no_normal_share_exposure
+)
+
+--========================================
+-- 2b. 曝光关联分享 48h
+--========================================
+,t_normal_share_exposure_48h AS (
+    SELECT  *
+    FROM    (
+        SELECT  s.dthh
+                ,s.apptype
+                ,s.mid
+                ,s.vid
+                ,s.sessionid
+                ,s.subsessionid
+                ,s.pagesource
+                ,s.shareid
+                ,s.ts
+                ,e.id AS exposure_id
+                ,e.ts AS exposure_ts
+                ,CASE
+                    WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 1
+                    WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 2
+                    WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource THEN 3
+                    WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource THEN 4
+                    WHEN s.subsessionid = e.subsessionid THEN 5
+                    WHEN s.sessionid = e.sessionid THEN 6
+                END AS match_priority
+                ,ROW_NUMBER() OVER (
+                    PARTITION BY s.dthh,s.apptype,s.mid,s.vid,s.sessionid,s.subsessionid,s.pagesource,s.shareid
+                    ORDER BY
+                        CASE
+                            WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 1
+                            WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 2
+                            WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource THEN 3
+                            WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource THEN 4
+                            WHEN s.subsessionid = e.subsessionid THEN 5
+                            WHEN s.sessionid = e.sessionid THEN 6
+                        END
+                        ,e.ts DESC
+                ) AS rn
+        FROM    t_share_from_sharelog_48h s
+        LEFT JOIN t_exposure e
+        ON      s.apptype = e.apptype
+        AND     s.mid = e.mid
+        AND     s.vid = e.vid
+        AND     (s.subsessionid = e.subsessionid OR s.sessionid = e.sessionid)
+        WHERE   s.pagesource NOT REGEXP "pages/detail-user-videos-share-recommend$"
+    ) t
+    WHERE   rn = 1
+)
+
+,t_no_normal_share_exposure_48h AS (
+    SELECT  *
+    FROM    (
+        SELECT  s.dthh
+                ,s.apptype
+                ,s.mid
+                ,s.vid
+                ,s.sessionid
+                ,s.subsessionid
+                ,s.pagesource
+                ,s.shareid
+                ,s.ts
+                ,e.id AS exposure_id
+                ,e.ts AS exposure_ts
+                ,CASE
+                    WHEN s.subsessionid = e.subsessionid AND s.ts >= e.ts THEN 1
+                    WHEN s.sessionid = e.sessionid AND s.ts >= e.ts THEN 2
+                    WHEN s.subsessionid = e.subsessionid THEN 3
+                    WHEN s.sessionid = e.sessionid THEN 4
+                END AS match_priority
+                ,ROW_NUMBER() OVER (
+                    PARTITION BY s.dthh,s.apptype,s.mid,s.vid,s.sessionid,s.subsessionid,s.pagesource,s.shareid
+                    ORDER BY
+                        CASE
+                            WHEN s.subsessionid = e.subsessionid AND s.ts >= e.ts THEN 1
+                            WHEN s.sessionid = e.sessionid AND s.ts >= e.ts THEN 2
+                            WHEN s.subsessionid = e.subsessionid THEN 3
+                            WHEN s.sessionid = e.sessionid THEN 4
+                        END
+                        ,e.ts DESC
+                ) AS rn
+        FROM    t_share_from_sharelog_48h s
+        LEFT JOIN t_exposure_detail e
+        ON      s.apptype = e.apptype
+        AND     s.mid = e.mid
+        AND     s.vid = e.vid
+        AND     (s.subsessionid = e.subsessionid OR s.sessionid = e.sessionid)
+        WHERE   s.pagesource REGEXP "pages/detail-user-videos-share-recommend$"
+    ) t
+    WHERE   rn = 1
+)
+
+,t_share_exposure_48h AS (
+    SELECT  dthh, apptype, mid, vid, sessionid, subsessionid, pagesource, shareid, ts, exposure_id, exposure_ts
+    FROM    t_normal_share_exposure_48h
+    UNION ALL
+    SELECT  dthh, apptype, mid, vid, sessionid, subsessionid, pagesource, shareid, ts, exposure_id, exposure_ts
+    FROM    t_no_normal_share_exposure_48h
+)
+
+--========================================
+-- 3. 分享关联回流 + B 值计算 (24h)
+--========================================
+,t_share_return AS (
+    SELECT  se.exposure_id
+            ,se.shareid
+            ,se.vid
+            ,se.apptype
+            ,se.subsessionid
+            ,r.subsessionid AS return_subsessionid
+            ,r.mid AS return_mid
+    FROM    t_share_exposure se
+    JOIN    t_return r
+    ON      se.shareid = r.rootshareid
+    AND     se.vid = r.vid
+    AND     se.apptype = r.apptype
+)
+
+,t_exposure_bn AS (
+    SELECT  e.id AS exposure_id
+            ,e.subsessionid
+            ,e.ts
+            ,e.vid
+            ,e.uid
+            ,e.mid
+            ,COALESCE(bn.B, 0) AS B
+            ,bn.B_mids
+    FROM    t_exposure e
+    LEFT JOIN (
+        SELECT  exposure_id
+                ,COUNT(DISTINCT return_mid) AS B
+                ,COLLECT_SET(return_mid) AS B_mids
+        FROM    t_share_return
+        GROUP BY exposure_id
+    ) bn
+    ON      e.id = bn.exposure_id
+)
+
+--========================================
+-- 3b. 分享关联回流 + B 值计算 (48h)
+--========================================
+,t_share_return_48h AS (
+    SELECT  se.exposure_id
+            ,se.shareid
+            ,se.vid
+            ,se.apptype
+            ,se.subsessionid
+            ,r.subsessionid AS return_subsessionid
+            ,r.mid AS return_mid
+    FROM    t_share_exposure_48h se
+    JOIN    t_return_48h r
+    ON      se.shareid = r.rootshareid
+    AND     se.vid = r.vid
+    AND     se.apptype = r.apptype
+)
+
+,t_exposure_bn_48h AS (
+    SELECT  e.id AS exposure_id
+            ,e.subsessionid
+            ,COALESCE(bn.B, 0) AS B
+            ,bn.B_mids
+    FROM    t_exposure e
+    LEFT JOIN (
+        SELECT  exposure_id
+                ,COUNT(DISTINCT return_mid) AS B
+                ,COLLECT_SET(return_mid) AS B_mids
+        FROM    t_share_return_48h
+        GROUP BY exposure_id
+    ) bn
+    ON      e.id = bn.exposure_id
+)
+
+--========================================
+-- 4. 多跳计算 24h(BFS frontier + subsession 去环)
+--   MID 跨层去重在最终输出用 ARRAY_EXCEPT 计算
+--========================================
+
+,t_frontier_1 AS (
+    SELECT DISTINCT exposure_id AS source_id, return_subsessionid AS reached_sub
+    FROM   t_share_return
+)
+,t_c1 AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS C_1
+    FROM    t_frontier_1 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+,t_c1_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS C_1_mids
+    FROM    t_frontier_1 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+,t_frontier_2 AS (
+    SELECT DISTINCT f1.source_id, sr2.return_subsessionid AS reached_sub
+    FROM    t_frontier_1 f1
+    JOIN    t_exposure_bn eb1 ON f1.reached_sub = eb1.subsessionid
+    JOIN    t_share_return sr2 ON eb1.exposure_id = sr2.exposure_id
+    LEFT JOIN t_frontier_1 v1
+        ON  f1.source_id = v1.source_id
+        AND sr2.return_subsessionid = v1.reached_sub
+    WHERE   v1.source_id IS NULL
+)
+,t_c2 AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS C_2
+    FROM    t_frontier_2 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+,t_c2_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS C_2_mids
+    FROM    t_frontier_2 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+,t_frontier_3 AS (
+    SELECT DISTINCT f2.source_id, sr3.return_subsessionid AS reached_sub
+    FROM    t_frontier_2 f2
+    JOIN    t_exposure_bn eb2 ON f2.reached_sub = eb2.subsessionid
+    JOIN    t_share_return sr3 ON eb2.exposure_id = sr3.exposure_id
+    LEFT JOIN t_frontier_1 v1
+        ON  f2.source_id = v1.source_id
+        AND sr3.return_subsessionid = v1.reached_sub
+    LEFT JOIN t_frontier_2 v2
+        ON  f2.source_id = v2.source_id
+        AND sr3.return_subsessionid = v2.reached_sub
+    WHERE   v1.source_id IS NULL AND v2.source_id IS NULL
+)
+,t_c3 AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS C_3
+    FROM    t_frontier_3 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+,t_c3_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS C_3_mids
+    FROM    t_frontier_3 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+--========================================
+-- 4b. 多跳计算 48h(BFS frontier + subsession 去环)
+--========================================
+
+,t_frontier_1_48h AS (
+    SELECT DISTINCT exposure_id AS source_id, return_subsessionid AS reached_sub
+    FROM   t_share_return_48h
+)
+,t_c1_48h AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS C_1
+    FROM    t_frontier_1_48h f
+    JOIN    t_exposure_bn_48h eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+,t_c1_mids_48h AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS C_1_mids
+    FROM    t_frontier_1_48h f
+    JOIN    t_exposure_bn_48h eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return_48h sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+,t_frontier_2_48h AS (
+    SELECT DISTINCT f1.source_id, sr2.return_subsessionid AS reached_sub
+    FROM    t_frontier_1_48h f1
+    JOIN    t_exposure_bn_48h eb1 ON f1.reached_sub = eb1.subsessionid
+    JOIN    t_share_return_48h sr2 ON eb1.exposure_id = sr2.exposure_id
+    LEFT JOIN t_frontier_1_48h v1
+        ON  f1.source_id = v1.source_id
+        AND sr2.return_subsessionid = v1.reached_sub
+    WHERE   v1.source_id IS NULL
+)
+,t_c2_48h AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS C_2
+    FROM    t_frontier_2_48h f
+    JOIN    t_exposure_bn_48h eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+,t_c2_mids_48h AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS C_2_mids
+    FROM    t_frontier_2_48h f
+    JOIN    t_exposure_bn_48h eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return_48h sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+,t_frontier_3_48h AS (
+    SELECT DISTINCT f2.source_id, sr3.return_subsessionid AS reached_sub
+    FROM    t_frontier_2_48h f2
+    JOIN    t_exposure_bn_48h eb2 ON f2.reached_sub = eb2.subsessionid
+    JOIN    t_share_return_48h sr3 ON eb2.exposure_id = sr3.exposure_id
+    LEFT JOIN t_frontier_1_48h v1
+        ON  f2.source_id = v1.source_id
+        AND sr3.return_subsessionid = v1.reached_sub
+    LEFT JOIN t_frontier_2_48h v2
+        ON  f2.source_id = v2.source_id
+        AND sr3.return_subsessionid = v2.reached_sub
+    WHERE   v1.source_id IS NULL AND v2.source_id IS NULL
+)
+,t_c3_48h AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS C_3
+    FROM    t_frontier_3_48h f
+    JOIN    t_exposure_bn_48h eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+,t_c3_mids_48h AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS C_3_mids
+    FROM    t_frontier_3_48h f
+    JOIN    t_exposure_bn_48h eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return_48h sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+--========================================
+-- 5. 最终输出
+--========================================
+SELECT  e.id AS exposure_id
+        ,e.vid
+        ,v.title AS video_title
+        ,e.uid
+        ,e.mid
+        ,e.ts
+        -- 24h 非去重
+        ,CASE WHEN se.exposure_id IS NOT NULL THEN 1 ELSE 0 END AS is_share
+        ,COALESCE(bn.B, 0) AS B
+        ,CONCAT_WS(',', bn.B_mids) AS B_mids
+        ,COALESCE(c1.C_1, 0) AS C_1
+        ,CONCAT_WS(',', c1m.C_1_mids) AS C_1_mids
+        ,COALESCE(c2.C_2, 0) AS C_2
+        ,CONCAT_WS(',', c2m.C_2_mids) AS C_2_mids
+        ,COALESCE(c3.C_3, 0) AS C_3
+        ,CONCAT_WS(',', c3m.C_3_mids) AS C_3_mids
+        ,COALESCE(bn.B, 0) + COALESCE(c1.C_1, 0) + COALESCE(c2.C_2, 0) + COALESCE(c3.C_3, 0) AS V_total
+        -- 24h 去重(ARRAY_EXCEPT 跨层排除已计入的 MID)
+        ,SIZE(c1m.C_1_mids) AS C_1_distinct
+        ,IF(c1m.C_1_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(c1m.C_1_mids, COALESCE(bn.B_mids, ARRAY('')))), 0) AS C_1_dedup
+        ,IF(c2m.C_2_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(c2m.C_2_mids, COALESCE(bn.B_mids, ARRAY(''))), COALESCE(c1m.C_1_mids, ARRAY('')))), 0) AS C_2_dedup
+        ,IF(c3m.C_3_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(ARRAY_EXCEPT(c3m.C_3_mids, COALESCE(bn.B_mids, ARRAY(''))), COALESCE(c1m.C_1_mids, ARRAY(''))), COALESCE(c2m.C_2_mids, ARRAY('')))), 0) AS C_3_dedup
+        ,COALESCE(bn.B, 0)
+            + IF(c1m.C_1_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(c1m.C_1_mids, COALESCE(bn.B_mids, ARRAY('')))), 0)
+            + IF(c2m.C_2_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(c2m.C_2_mids, COALESCE(bn.B_mids, ARRAY(''))), COALESCE(c1m.C_1_mids, ARRAY('')))), 0)
+            + IF(c3m.C_3_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(ARRAY_EXCEPT(c3m.C_3_mids, COALESCE(bn.B_mids, ARRAY(''))), COALESCE(c1m.C_1_mids, ARRAY(''))), COALESCE(c2m.C_2_mids, ARRAY('')))), 0)
+            AS V_total_dedup
+        -- 48h 非去重
+        ,CASE WHEN se48.exposure_id IS NOT NULL THEN 1 ELSE 0 END AS is_share_48h
+        ,COALESCE(bn48.B, 0) AS B_48h
+        ,CONCAT_WS(',', bn48.B_mids) AS B_mids_48h
+        ,COALESCE(c1_48.C_1, 0) AS C_1_48h
+        ,CONCAT_WS(',', c1m48.C_1_mids) AS C_1_mids_48h
+        ,COALESCE(c2_48.C_2, 0) AS C_2_48h
+        ,CONCAT_WS(',', c2m48.C_2_mids) AS C_2_mids_48h
+        ,COALESCE(c3_48.C_3, 0) AS C_3_48h
+        ,CONCAT_WS(',', c3m48.C_3_mids) AS C_3_mids_48h
+        ,COALESCE(bn48.B, 0) + COALESCE(c1_48.C_1, 0) + COALESCE(c2_48.C_2, 0) + COALESCE(c3_48.C_3, 0) AS V_total_48h
+        -- 48h 去重
+        ,IF(c1m48.C_1_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(c1m48.C_1_mids, COALESCE(bn48.B_mids, ARRAY('')))), 0) AS C_1_dedup_48h
+        ,IF(c2m48.C_2_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(c2m48.C_2_mids, COALESCE(bn48.B_mids, ARRAY(''))), COALESCE(c1m48.C_1_mids, ARRAY('')))), 0) AS C_2_dedup_48h
+        ,IF(c3m48.C_3_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(ARRAY_EXCEPT(c3m48.C_3_mids, COALESCE(bn48.B_mids, ARRAY(''))), COALESCE(c1m48.C_1_mids, ARRAY(''))), COALESCE(c2m48.C_2_mids, ARRAY('')))), 0) AS C_3_dedup_48h
+        ,COALESCE(bn48.B, 0)
+            + IF(c1m48.C_1_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(c1m48.C_1_mids, COALESCE(bn48.B_mids, ARRAY('')))), 0)
+            + IF(c2m48.C_2_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(c2m48.C_2_mids, COALESCE(bn48.B_mids, ARRAY(''))), COALESCE(c1m48.C_1_mids, ARRAY('')))), 0)
+            + IF(c3m48.C_3_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(ARRAY_EXCEPT(c3m48.C_3_mids, COALESCE(bn48.B_mids, ARRAY(''))), COALESCE(c1m48.C_1_mids, ARRAY(''))), COALESCE(c2m48.C_2_mids, ARRAY('')))), 0)
+            AS V_total_dedup_48h
+FROM    t_exposure e
+-- 24h joins
+LEFT JOIN (SELECT DISTINCT exposure_id FROM t_share_exposure) se
+ON      e.id = se.exposure_id
+LEFT JOIN t_exposure_bn bn
+ON      e.id = bn.exposure_id
+LEFT JOIN t_c1 c1
+ON      e.id = c1.exposure_id
+LEFT JOIN t_c1_mids c1m
+ON      e.id = c1m.exposure_id
+LEFT JOIN t_c2 c2
+ON      e.id = c2.exposure_id
+LEFT JOIN t_c2_mids c2m
+ON      e.id = c2m.exposure_id
+LEFT JOIN t_c3 c3
+ON      e.id = c3.exposure_id
+LEFT JOIN t_c3_mids c3m
+ON      e.id = c3m.exposure_id
+-- 48h joins
+LEFT JOIN (SELECT DISTINCT exposure_id FROM t_share_exposure_48h) se48
+ON      e.id = se48.exposure_id
+LEFT JOIN t_exposure_bn_48h bn48
+ON      e.id = bn48.exposure_id
+LEFT JOIN t_c1_48h c1_48
+ON      e.id = c1_48.exposure_id
+LEFT JOIN t_c1_mids_48h c1m48
+ON      e.id = c1m48.exposure_id
+LEFT JOIN t_c2_48h c2_48
+ON      e.id = c2_48.exposure_id
+LEFT JOIN t_c2_mids_48h c2m48
+ON      e.id = c2m48.exposure_id
+LEFT JOIN t_c3_48h c3_48
+ON      e.id = c3_48.exposure_id
+LEFT JOIN t_c3_mids_48h c3m48
+ON      e.id = c3m48.exposure_id
+-- video info
+LEFT JOIN videoods.wx_video v
+ON      e.vid = CAST(v.id AS STRING)
+;

+ 460 - 0
table_gen/exposure_return_Bn_v4.sql

@@ -0,0 +1,460 @@
+--*********************
+-- 曝光回流收益计算 (B + C 多跳 + D 链, BFS 去环, 24h)
+-- D 链: session 内后续曝光的传播收益
+--*********************
+
+WITH
+--========================================
+-- 1. 基础数据准备
+--========================================
+
+-- 1.1 回流数据 24h
+t_return AS (
+    SELECT  *
+            ,CONCAT(dthh,":",shareid,":",vid,":",dthh_id) AS id
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,clickobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,shareid
+                        ,rootshareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,clickobjectid,sessionid,subsessionid,shareid,rootshareid ORDER BY clienttimestamp DESC ) AS rn
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),shareid,clickobjectid ORDER BY clienttimestamp ) AS dthh_id
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN '${dt}${hh}' AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) + 3600 * 24),'YYYYMMDDHH')
+                AND     __topic__ = 'click'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     clickobjectid IS NOT NULL
+                AND     pagesource REGEXP "-pages/user-videos-share$"
+            ) t
+    WHERE   rn = 1
+)
+
+-- 1.2 分享数据 24h
+,t_share_from_sharelog AS (
+    SELECT  *
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,shareobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,pagesource
+                        ,shareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,shareobjectid,sessionid,subsessionid,pagesource,shareid ORDER BY clienttimestamp DESC ) AS rn
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN '${dt}${hh}' AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) + 3600 * 24),'YYYYMMDDHH')
+                AND     __topic__ = 'share'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     shareobjectid IS NOT NULL
+            ) t
+    WHERE   rn = 1
+)
+
+-- 1.3 曝光数据(共用)
+,t_exposure AS (
+    SELECT  dthh_id
+            ,dthh
+            ,apptype
+            ,uid
+            ,mid
+            ,vid
+            ,sessionid
+            ,subsessionid
+            ,pagesource
+            ,ts
+            ,id
+            ,dt
+            ,hh
+    FROM    loghubods.dwd_recsys_alg_exposure_base_view_20250402
+    WHERE   CONCAT(dt,hh) = '${dt}${hh}'
+)
+
+-- 1.4 详情页曝光(共用)
+,t_exposure_detail AS (
+    SELECT  *
+    FROM    t_exposure
+    WHERE   pagesource REGEXP "-pages/user-videos-detail$|pages/detail-recommend$"
+)
+
+--========================================
+-- 2. 曝光关联分享 24h
+--========================================
+,t_normal_share_exposure AS (
+    SELECT  *
+    FROM    (
+        SELECT  s.dthh
+                ,s.apptype
+                ,s.mid
+                ,s.vid
+                ,s.sessionid
+                ,s.subsessionid
+                ,s.pagesource
+                ,s.shareid
+                ,s.ts
+                ,e.id AS exposure_id
+                ,e.ts AS exposure_ts
+                ,CASE
+                    WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 1
+                    WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 2
+                    WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource THEN 3
+                    WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource THEN 4
+                    WHEN s.subsessionid = e.subsessionid THEN 5
+                    WHEN s.sessionid = e.sessionid THEN 6
+                END AS match_priority
+                ,ROW_NUMBER() OVER (
+                    PARTITION BY s.dthh,s.apptype,s.mid,s.vid,s.sessionid,s.subsessionid,s.pagesource,s.shareid
+                    ORDER BY
+                        CASE
+                            WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 1
+                            WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 2
+                            WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource THEN 3
+                            WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource THEN 4
+                            WHEN s.subsessionid = e.subsessionid THEN 5
+                            WHEN s.sessionid = e.sessionid THEN 6
+                        END
+                        ,e.ts DESC
+                ) AS rn
+        FROM    t_share_from_sharelog s
+        LEFT JOIN t_exposure e
+        ON      s.apptype = e.apptype
+        AND     s.mid = e.mid
+        AND     s.vid = e.vid
+        AND     (s.subsessionid = e.subsessionid OR s.sessionid = e.sessionid)
+        WHERE   s.pagesource NOT REGEXP "pages/detail-user-videos-share-recommend$"
+    ) t
+    WHERE   rn = 1
+)
+
+,t_no_normal_share_exposure AS (
+    SELECT  *
+    FROM    (
+        SELECT  s.dthh
+                ,s.apptype
+                ,s.mid
+                ,s.vid
+                ,s.sessionid
+                ,s.subsessionid
+                ,s.pagesource
+                ,s.shareid
+                ,s.ts
+                ,e.id AS exposure_id
+                ,e.ts AS exposure_ts
+                ,CASE
+                    WHEN s.subsessionid = e.subsessionid AND s.ts >= e.ts THEN 1
+                    WHEN s.sessionid = e.sessionid AND s.ts >= e.ts THEN 2
+                    WHEN s.subsessionid = e.subsessionid THEN 3
+                    WHEN s.sessionid = e.sessionid THEN 4
+                END AS match_priority
+                ,ROW_NUMBER() OVER (
+                    PARTITION BY s.dthh,s.apptype,s.mid,s.vid,s.sessionid,s.subsessionid,s.pagesource,s.shareid
+                    ORDER BY
+                        CASE
+                            WHEN s.subsessionid = e.subsessionid AND s.ts >= e.ts THEN 1
+                            WHEN s.sessionid = e.sessionid AND s.ts >= e.ts THEN 2
+                            WHEN s.subsessionid = e.subsessionid THEN 3
+                            WHEN s.sessionid = e.sessionid THEN 4
+                        END
+                        ,e.ts DESC
+                ) AS rn
+        FROM    t_share_from_sharelog s
+        LEFT JOIN t_exposure_detail e
+        ON      s.apptype = e.apptype
+        AND     s.mid = e.mid
+        AND     s.vid = e.vid
+        AND     (s.subsessionid = e.subsessionid OR s.sessionid = e.sessionid)
+        WHERE   s.pagesource REGEXP "pages/detail-user-videos-share-recommend$"
+    ) t
+    WHERE   rn = 1
+)
+
+,t_share_exposure AS (
+    SELECT  dthh, apptype, mid, vid, sessionid, subsessionid, pagesource, shareid, ts, exposure_id, exposure_ts
+    FROM    t_normal_share_exposure
+    UNION ALL
+    SELECT  dthh, apptype, mid, vid, sessionid, subsessionid, pagesource, shareid, ts, exposure_id, exposure_ts
+    FROM    t_no_normal_share_exposure
+)
+
+--========================================
+-- 3. 分享关联回流 + B 值计算 (24h)
+--========================================
+,t_share_return AS (
+    SELECT  se.exposure_id
+            ,se.shareid
+            ,se.vid
+            ,se.apptype
+            ,se.subsessionid
+            ,r.subsessionid AS return_subsessionid
+            ,r.mid AS return_mid
+    FROM    t_share_exposure se
+    JOIN    t_return r
+    ON      se.shareid = r.rootshareid
+    AND     se.vid = r.vid
+    AND     se.apptype = r.apptype
+)
+
+,t_exposure_bn AS (
+    SELECT  e.id AS exposure_id
+            ,e.subsessionid
+            ,e.ts
+            ,e.vid
+            ,e.uid
+            ,e.mid
+            ,COALESCE(bn.B, 0) AS B
+            ,bn.B_mids
+    FROM    t_exposure e
+    LEFT JOIN (
+        SELECT  exposure_id
+                ,COUNT(DISTINCT return_mid) AS B
+                ,COLLECT_SET(return_mid) AS B_mids
+        FROM    t_share_return
+        GROUP BY exposure_id
+    ) bn
+    ON      e.id = bn.exposure_id
+)
+
+--========================================
+-- 4. C 链多跳计算 24h(BFS frontier + subsession 去环)
+--   MID 跨层去重在最终输出用 ARRAY_EXCEPT 计算
+--========================================
+
+,t_frontier_1 AS (
+    SELECT DISTINCT exposure_id AS source_id, return_subsessionid AS reached_sub
+    FROM   t_share_return
+)
+,t_c1 AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS C_1
+    FROM    t_frontier_1 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+,t_c1_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS C_1_mids
+    FROM    t_frontier_1 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+,t_frontier_2 AS (
+    SELECT DISTINCT f1.source_id, sr2.return_subsessionid AS reached_sub
+    FROM    t_frontier_1 f1
+    JOIN    t_exposure_bn eb1 ON f1.reached_sub = eb1.subsessionid
+    JOIN    t_share_return sr2 ON eb1.exposure_id = sr2.exposure_id
+    LEFT JOIN t_frontier_1 v1
+        ON  f1.source_id = v1.source_id
+        AND sr2.return_subsessionid = v1.reached_sub
+    WHERE   v1.source_id IS NULL
+)
+,t_c2 AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS C_2
+    FROM    t_frontier_2 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+,t_c2_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS C_2_mids
+    FROM    t_frontier_2 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+,t_frontier_3 AS (
+    SELECT DISTINCT f2.source_id, sr3.return_subsessionid AS reached_sub
+    FROM    t_frontier_2 f2
+    JOIN    t_exposure_bn eb2 ON f2.reached_sub = eb2.subsessionid
+    JOIN    t_share_return sr3 ON eb2.exposure_id = sr3.exposure_id
+    LEFT JOIN t_frontier_1 v1
+        ON  f2.source_id = v1.source_id
+        AND sr3.return_subsessionid = v1.reached_sub
+    LEFT JOIN t_frontier_2 v2
+        ON  f2.source_id = v2.source_id
+        AND sr3.return_subsessionid = v2.reached_sub
+    WHERE   v1.source_id IS NULL AND v2.source_id IS NULL
+)
+,t_c3 AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS C_3
+    FROM    t_frontier_3 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+,t_c3_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS C_3_mids
+    FROM    t_frontier_3 f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+--========================================
+-- 5. D 链(session 内后续曝光传播)
+--========================================
+
+-- 5a. t_d1 — 同 subsession 后续曝光的 B 之和
+,t_d1 AS (
+    SELECT  e1.exposure_id
+            ,SUM(e2.B) AS D_1
+    FROM    t_exposure_bn e1
+    JOIN    t_exposure_bn e2
+    ON      e1.subsessionid = e2.subsessionid
+    AND     e2.ts > e1.ts
+    GROUP BY e1.exposure_id
+)
+
+-- 5b. t_d1_mids — 后续曝光的 return mids
+,t_d1_mids AS (
+    SELECT  e1.exposure_id
+            ,COLLECT_SET(sr.return_mid) AS D_1_mids
+    FROM    t_exposure_bn e1
+    JOIN    t_exposure_bn e2
+    ON      e1.subsessionid = e2.subsessionid
+    AND     e2.ts > e1.ts
+    JOIN    t_share_return sr
+    ON      e2.exposure_id = sr.exposure_id
+    GROUP BY e1.exposure_id
+)
+
+-- 5c. t_d1_frontier — d1 后续曝光的 return users 落入的 subsessions
+,t_d1_frontier AS (
+    SELECT DISTINCT e1.exposure_id AS source_id
+           ,sr.return_subsessionid AS reached_sub
+    FROM    t_exposure_bn e1
+    JOIN    t_exposure_bn e2
+    ON      e1.subsessionid = e2.subsessionid
+    AND     e2.ts > e1.ts
+    JOIN    t_share_return sr
+    ON      e2.exposure_id = sr.exposure_id
+)
+
+-- 5d. t_d2 / t_d2_mids — BFS 第二跳
+,t_d2 AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS D_2
+    FROM    t_d1_frontier f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+,t_d2_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS D_2_mids
+    FROM    t_d1_frontier f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+-- 5e. t_d2_frontier / t_d3 / t_d3_mids — BFS 第三跳(含去环)
+,t_d2_frontier AS (
+    SELECT DISTINCT f1.source_id, sr2.return_subsessionid AS reached_sub
+    FROM    t_d1_frontier f1
+    JOIN    t_exposure_bn eb1 ON f1.reached_sub = eb1.subsessionid
+    JOIN    t_share_return sr2 ON eb1.exposure_id = sr2.exposure_id
+    LEFT JOIN t_d1_frontier v1
+        ON  f1.source_id = v1.source_id
+        AND sr2.return_subsessionid = v1.reached_sub
+    WHERE   v1.source_id IS NULL
+)
+,t_d3 AS (
+    SELECT  f.source_id AS exposure_id, SUM(eb.B) AS D_3
+    FROM    t_d2_frontier f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    GROUP BY f.source_id
+)
+,t_d3_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS D_3_mids
+    FROM    t_d2_frontier f
+    JOIN    t_exposure_bn eb ON f.reached_sub = eb.subsessionid
+    JOIN    t_share_return sr ON eb.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+
+--========================================
+-- 6. 最终输出
+--========================================
+SELECT  e.id AS exposure_id
+        ,e.vid
+        ,v.title AS video_title
+        ,e.uid
+        ,e.mid
+        ,e.ts
+        -- 24h 非去重
+        ,CASE WHEN se.exposure_id IS NOT NULL THEN 1 ELSE 0 END AS is_share
+        ,COALESCE(bn.B, 0) AS B
+        ,CONCAT_WS(',', bn.B_mids) AS B_mids
+        ,COALESCE(c1.C_1, 0) AS C_1
+        ,CONCAT_WS(',', c1m.C_1_mids) AS C_1_mids
+        ,COALESCE(c2.C_2, 0) AS C_2
+        ,CONCAT_WS(',', c2m.C_2_mids) AS C_2_mids
+        ,COALESCE(c3.C_3, 0) AS C_3
+        ,CONCAT_WS(',', c3m.C_3_mids) AS C_3_mids
+        ,COALESCE(bn.B, 0) + COALESCE(c1.C_1, 0) + COALESCE(c2.C_2, 0) + COALESCE(c3.C_3, 0) AS V_total
+        -- 24h 去重(ARRAY_EXCEPT 跨层排除已计入的 MID)
+        ,SIZE(c1m.C_1_mids) AS C_1_distinct
+        ,IF(c1m.C_1_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(c1m.C_1_mids, COALESCE(bn.B_mids, ARRAY('')))), 0) AS C_1_dedup
+        ,IF(c2m.C_2_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(c2m.C_2_mids, COALESCE(bn.B_mids, ARRAY(''))), COALESCE(c1m.C_1_mids, ARRAY('')))), 0) AS C_2_dedup
+        ,IF(c3m.C_3_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(ARRAY_EXCEPT(c3m.C_3_mids, COALESCE(bn.B_mids, ARRAY(''))), COALESCE(c1m.C_1_mids, ARRAY(''))), COALESCE(c2m.C_2_mids, ARRAY('')))), 0) AS C_3_dedup
+        ,COALESCE(bn.B, 0)
+            + IF(c1m.C_1_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(c1m.C_1_mids, COALESCE(bn.B_mids, ARRAY('')))), 0)
+            + IF(c2m.C_2_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(c2m.C_2_mids, COALESCE(bn.B_mids, ARRAY(''))), COALESCE(c1m.C_1_mids, ARRAY('')))), 0)
+            + IF(c3m.C_3_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(ARRAY_EXCEPT(c3m.C_3_mids, COALESCE(bn.B_mids, ARRAY(''))), COALESCE(c1m.C_1_mids, ARRAY(''))), COALESCE(c2m.C_2_mids, ARRAY('')))), 0)
+            AS V_total_dedup
+        -- D 链(session 内后续曝光传播)非去重
+        ,COALESCE(d1.D_1, 0) AS D_1
+        ,CONCAT_WS(',', d1m.D_1_mids) AS D_1_mids
+        ,COALESCE(d2.D_2, 0) AS D_2
+        ,CONCAT_WS(',', d2m.D_2_mids) AS D_2_mids
+        ,COALESCE(d3.D_3, 0) AS D_3
+        ,CONCAT_WS(',', d3m.D_3_mids) AS D_3_mids
+        ,COALESCE(d1.D_1, 0) + COALESCE(d2.D_2, 0) + COALESCE(d3.D_3, 0) AS D_total
+        -- D 链去重(ARRAY_EXCEPT 跨层排除)
+        ,IF(d1m.D_1_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(d1m.D_1_mids, COALESCE(bn.B_mids, ARRAY('')))), 0) AS D_1_dedup
+        ,IF(d2m.D_2_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(d2m.D_2_mids, COALESCE(bn.B_mids, ARRAY(''))), COALESCE(d1m.D_1_mids, ARRAY('')))), 0) AS D_2_dedup
+        ,IF(d3m.D_3_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(ARRAY_EXCEPT(d3m.D_3_mids, COALESCE(bn.B_mids, ARRAY(''))), COALESCE(d1m.D_1_mids, ARRAY(''))), COALESCE(d2m.D_2_mids, ARRAY('')))), 0) AS D_3_dedup
+        ,IF(d1m.D_1_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(d1m.D_1_mids, COALESCE(bn.B_mids, ARRAY('')))), 0)
+            + IF(d2m.D_2_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(d2m.D_2_mids, COALESCE(bn.B_mids, ARRAY(''))), COALESCE(d1m.D_1_mids, ARRAY('')))), 0)
+            + IF(d3m.D_3_mids IS NOT NULL, SIZE(ARRAY_EXCEPT(ARRAY_EXCEPT(ARRAY_EXCEPT(d3m.D_3_mids, COALESCE(bn.B_mids, ARRAY(''))), COALESCE(d1m.D_1_mids, ARRAY(''))), COALESCE(d2m.D_2_mids, ARRAY('')))), 0)
+            AS D_total_dedup
+FROM    t_exposure e
+-- 24h joins
+LEFT JOIN (SELECT DISTINCT exposure_id FROM t_share_exposure) se
+ON      e.id = se.exposure_id
+LEFT JOIN t_exposure_bn bn
+ON      e.id = bn.exposure_id
+LEFT JOIN t_c1 c1
+ON      e.id = c1.exposure_id
+LEFT JOIN t_c1_mids c1m
+ON      e.id = c1m.exposure_id
+LEFT JOIN t_c2 c2
+ON      e.id = c2.exposure_id
+LEFT JOIN t_c2_mids c2m
+ON      e.id = c2m.exposure_id
+LEFT JOIN t_c3 c3
+ON      e.id = c3.exposure_id
+LEFT JOIN t_c3_mids c3m
+ON      e.id = c3m.exposure_id
+-- D chain joins
+LEFT JOIN t_d1 d1 ON e.id = d1.exposure_id
+LEFT JOIN t_d1_mids d1m ON e.id = d1m.exposure_id
+LEFT JOIN t_d2 d2 ON e.id = d2.exposure_id
+LEFT JOIN t_d2_mids d2m ON e.id = d2m.exposure_id
+LEFT JOIN t_d3 d3 ON e.id = d3.exposure_id
+LEFT JOIN t_d3_mids d3m ON e.id = d3m.exposure_id
+-- video info
+LEFT JOIN videoods.wx_video v
+ON      e.vid = CAST(v.id AS STRING)
+;

+ 477 - 0
table_gen/exposure_return_recursive.sql

@@ -0,0 +1,477 @@
+--*********************
+-- 曝光回流收益递推计算 (优化版)
+-- 数学公式:
+--   B_i     = 不换视频回流人数(用 rootshareid 计算)
+--   D_i^(0) = Σ B_j,  j > i 且同 session(初始化)
+--   C_i^(0) = 0
+--   D_i^(k) = Σ (D_j^(k-1) + C_j^(k-1)),  j > i 且同 session
+--   C_i^(k) = Σ (D_j^(k) + C_j^(k-1)),    j ∈ S(i)
+--   其中 S(i) = Bn(E_i) 带来的回流用户的首曝光集合
+--   最终收益 V(E_i) = B_i + D_i^(k) + C_i^(k)
+--
+-- 优化点:
+--   1. 去掉 seq,用 ts 直接判断先后顺序
+--   2. 级联匹配合并为单次 JOIN + 优先级选择
+--   3. 递推只保留 k=1(k=2、k=3 增量贡献通常很小)
+--*********************
+
+WITH
+--========================================
+-- 1. 基础数据准备
+--========================================
+
+-- 1.1 回流数据(用户通过分享链接回流)
+-- 时间范围:${dt}${hh} 往后 24 小时
+t_return AS (
+    SELECT  *
+            ,CONCAT(dthh,":",shareid,":",vid,":",dthh_id) AS id
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,clickobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,shareid
+                        ,rootshareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,clickobjectid,sessionid,subsessionid,shareid,rootshareid ORDER BY clienttimestamp DESC ) AS rn
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),shareid,clickobjectid ORDER BY clienttimestamp ) AS dthh_id
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN '${dt}${hh}' AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) + 3600 * 24),'YYYYMMDDHH')
+                AND     __topic__ = 'click'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     clickobjectid IS NOT NULL
+                AND     pagesource REGEXP "-pages/user-videos-share$"
+            ) t
+    WHERE   rn = 1
+)
+
+-- 1.2 分享数据
+-- 时间范围:${dt}${hh} 往后 24 小时
+,t_share_from_sharelog AS (
+    SELECT  *
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,shareobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,pagesource
+                        ,shareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,shareobjectid,sessionid,subsessionid,pagesource,shareid ORDER BY clienttimestamp DESC ) AS rn
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN '${dt}${hh}' AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) + 3600 * 24),'YYYYMMDDHH')
+                AND     __topic__ = 'share'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     shareobjectid IS NOT NULL
+            ) t
+    WHERE   rn = 1
+)
+
+-- 1.3 曝光数据
+-- 时间范围:${dt}${hh} 单小时(曝光起点)
+-- 优化:去掉 seq,用 ts 直接判断先后顺序
+,t_exposure AS (
+    SELECT  dthh_id
+            ,dthh
+            ,apptype
+            ,uid
+            ,mid
+            ,vid
+            ,sessionid
+            ,subsessionid
+            ,pagesource
+            ,ts
+            ,id
+            ,dt
+            ,hh
+    FROM    loghubods.dwd_recsys_alg_exposure_base_view_20250402
+    WHERE   CONCAT(dt,hh) = '${dt}${hh}'
+)
+
+-- 1.4 详情页曝光(用于非常规分享关联)
+,t_exposure_detail AS (
+    SELECT  *
+    FROM    t_exposure
+    WHERE   pagesource REGEXP "-pages/user-videos-detail$|pages/detail-recommend$"
+)
+
+--========================================
+-- 2. 曝光关联分享(单次 JOIN + 优先级选择)
+-- 优化:合并 10 轮级联匹配为 1 轮
+--========================================
+
+-- 2.1 常规分享关联曝光(一次性匹配所有优先级)
+,t_normal_share_exposure AS (
+    SELECT  *
+    FROM    (
+        SELECT  s.dthh
+                ,s.apptype
+                ,s.mid
+                ,s.vid
+                ,s.sessionid
+                ,s.subsessionid
+                ,s.pagesource
+                ,s.shareid
+                ,s.ts
+                ,e.id AS exposure_id
+                ,e.ts AS exposure_ts
+                -- 优先级:subsession+pagesource+ts > session+pagesource+ts > ...
+                ,CASE
+                    WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 1
+                    WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 2
+                    WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource THEN 3
+                    WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource THEN 4
+                    WHEN s.subsessionid = e.subsessionid THEN 5
+                    WHEN s.sessionid = e.sessionid THEN 6
+                END AS match_priority
+                ,ROW_NUMBER() OVER (
+                    PARTITION BY s.dthh,s.apptype,s.mid,s.vid,s.sessionid,s.subsessionid,s.pagesource,s.shareid
+                    ORDER BY
+                        CASE
+                            WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 1
+                            WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource AND s.ts >= e.ts THEN 2
+                            WHEN s.subsessionid = e.subsessionid AND s.pagesource = e.pagesource THEN 3
+                            WHEN s.sessionid = e.sessionid AND s.pagesource = e.pagesource THEN 4
+                            WHEN s.subsessionid = e.subsessionid THEN 5
+                            WHEN s.sessionid = e.sessionid THEN 6
+                        END
+                        ,e.ts DESC
+                ) AS rn
+        FROM    t_share_from_sharelog s
+        LEFT JOIN t_exposure e
+        ON      s.apptype = e.apptype
+        AND     s.mid = e.mid
+        AND     s.vid = e.vid
+        AND     (s.subsessionid = e.subsessionid OR s.sessionid = e.sessionid)
+        WHERE   s.pagesource NOT REGEXP "pages/detail-user-videos-share-recommend$"
+    ) t
+    WHERE   rn = 1
+)
+
+-- 2.2 非常规分享关联曝光(detail页面,一次性匹配)
+,t_no_normal_share_exposure AS (
+    SELECT  *
+    FROM    (
+        SELECT  s.dthh
+                ,s.apptype
+                ,s.mid
+                ,s.vid
+                ,s.sessionid
+                ,s.subsessionid
+                ,s.pagesource
+                ,s.shareid
+                ,s.ts
+                ,e.id AS exposure_id
+                ,e.ts AS exposure_ts
+                ,CASE
+                    WHEN s.subsessionid = e.subsessionid AND s.ts >= e.ts THEN 1
+                    WHEN s.sessionid = e.sessionid AND s.ts >= e.ts THEN 2
+                    WHEN s.subsessionid = e.subsessionid THEN 3
+                    WHEN s.sessionid = e.sessionid THEN 4
+                END AS match_priority
+                ,ROW_NUMBER() OVER (
+                    PARTITION BY s.dthh,s.apptype,s.mid,s.vid,s.sessionid,s.subsessionid,s.pagesource,s.shareid
+                    ORDER BY
+                        CASE
+                            WHEN s.subsessionid = e.subsessionid AND s.ts >= e.ts THEN 1
+                            WHEN s.sessionid = e.sessionid AND s.ts >= e.ts THEN 2
+                            WHEN s.subsessionid = e.subsessionid THEN 3
+                            WHEN s.sessionid = e.sessionid THEN 4
+                        END
+                        ,e.ts DESC
+                ) AS rn
+        FROM    t_share_from_sharelog s
+        LEFT JOIN t_exposure_detail e
+        ON      s.apptype = e.apptype
+        AND     s.mid = e.mid
+        AND     s.vid = e.vid
+        AND     (s.subsessionid = e.subsessionid OR s.sessionid = e.sessionid)
+        WHERE   s.pagesource REGEXP "pages/detail-user-videos-share-recommend$"
+    ) t
+    WHERE   rn = 1
+)
+
+--========================================
+-- 3. 合并所有分享-曝光关联
+--========================================
+,t_share_exposure AS (
+    SELECT  dthh, apptype, mid, vid, sessionid, subsessionid, pagesource, shareid, ts, exposure_id, exposure_ts
+    FROM    t_normal_share_exposure
+    UNION ALL
+    SELECT  dthh, apptype, mid, vid, sessionid, subsessionid, pagesource, shareid, ts, exposure_id, exposure_ts
+    FROM    t_no_normal_share_exposure
+)
+
+--========================================
+-- 4. 计算 B_i(不换视频回流,用 rootshareid)
+--========================================
+
+-- 4.1 分享关联回流(用 rootshareid 追踪裂变)
+,t_share_return AS (
+    SELECT  se.exposure_id
+            ,se.shareid
+            ,se.vid
+            ,se.apptype
+            ,se.subsessionid
+            -- 回流用户的 subsession(用于构建 S(i))
+            ,r.subsessionid AS return_subsessionid
+            ,r.mid AS return_mid
+    FROM    t_share_exposure se
+    JOIN    t_return r
+    ON      se.shareid = r.rootshareid  -- 用 rootshareid 追踪所有裂变
+    AND     se.vid = r.vid              -- 同视频
+    AND     se.apptype = r.apptype
+)
+
+-- 4.2 每个曝光的 B_i 和 Bn_sessions
+,t_exposure_bn AS (
+    SELECT  e.id AS exposure_id
+            ,e.subsessionid
+            ,e.ts
+            ,e.vid
+            ,e.uid
+            ,e.mid
+            -- B_i: 不换视频回流人数(含裂变)
+            ,COALESCE(bn.B, 0) AS B
+            -- Bn 带来的回流用户的 subsessions(用于计算 S(i))
+            ,bn.bn_subsessions
+    FROM    t_exposure e
+    LEFT JOIN (
+        SELECT  exposure_id
+                ,COUNT(DISTINCT return_mid) AS B
+                ,COLLECT_SET(return_subsessionid) AS bn_subsessions
+        FROM    t_share_return
+        GROUP BY exposure_id
+    ) bn
+    ON      e.id = bn.exposure_id
+)
+
+--========================================
+-- 5. 构建 S(i): Bn 带来的回流用户的首曝光
+--========================================
+
+-- 每个 subsession 的首曝光(用 ts 最小的)
+,t_subsession_first_exposure AS (
+    SELECT  subsessionid
+            ,first_exposure_id
+    FROM    (
+        SELECT  subsessionid
+                ,id AS first_exposure_id
+                ,ROW_NUMBER() OVER (PARTITION BY subsessionid ORDER BY ts) AS rn
+        FROM    t_exposure
+    ) t
+    WHERE   rn = 1
+)
+
+,t_bn_first_exposure AS (
+    -- S(i): E_i 的 Bn 带来的回流用户的首曝光集合
+    SELECT  e.exposure_id AS source_exposure_id
+            ,f.first_exposure_id AS target_exposure_id
+    FROM    t_exposure_bn e
+    LATERAL VIEW EXPLODE(e.bn_subsessions) t AS bn_subsess
+    JOIN    t_subsession_first_exposure f
+    ON      t.bn_subsess = f.subsessionid
+    WHERE   e.bn_subsessions IS NOT NULL
+)
+
+--========================================
+-- 6. 初始化 k=0
+--    D_i^(0) = Σ B_j, j > i 且同 subsession(用 ts 判断)
+--    C_i^(0) = 0
+--========================================
+,t_layer_0 AS (
+    SELECT  e1.exposure_id
+            ,e1.subsessionid
+            ,e1.ts
+            ,e1.vid
+            ,e1.uid
+            ,e1.mid
+            ,e1.B
+            ,e1.bn_subsessions
+            -- D^(0) = Σ B_j, j > i 且同 subsession
+            ,COALESCE(d0.D_0, 0) AS D_0
+            -- C^(0) = 0
+            ,0 AS C_0
+    FROM    t_exposure_bn e1
+    LEFT JOIN (
+        SELECT  a.exposure_id
+                ,SUM(b.B) AS D_0
+        FROM    t_exposure_bn a
+        JOIN    t_exposure_bn b
+        ON      a.subsessionid = b.subsessionid
+        AND     b.ts > a.ts  -- j > i,用 ts 判断
+        GROUP BY a.exposure_id
+    ) d0
+    ON      e1.exposure_id = d0.exposure_id
+)
+
+--========================================
+-- 7. 递推 k=1
+--    D_i^(1) = Σ (D_j^(0) + C_j^(0)), j > i 且同 subsession
+--    C_i^(1) = Σ (D_j^(1) + C_j^(0)), j ∈ S(i)
+--========================================
+,t_D_1 AS (
+    SELECT  a.exposure_id
+            ,COALESCE(SUM(b.D_0 + b.C_0), 0) AS D_1
+    FROM    t_layer_0 a
+    JOIN    t_layer_0 b
+    ON      a.subsessionid = b.subsessionid
+    AND     b.ts > a.ts  -- j > i,用 ts 判断
+    GROUP BY a.exposure_id
+)
+
+,t_layer_1 AS (
+    SELECT  l0.exposure_id
+            ,l0.subsessionid
+            ,l0.ts
+            ,l0.vid
+            ,l0.uid
+            ,l0.mid
+            ,l0.B
+            ,l0.bn_subsessions
+            ,l0.D_0
+            ,l0.C_0
+            ,COALESCE(d1.D_1, 0) AS D_1
+            ,COALESCE(c1.C_1, 0) AS C_1
+    FROM    t_layer_0 l0
+    LEFT JOIN t_D_1 d1
+    ON      l0.exposure_id = d1.exposure_id
+    LEFT JOIN (
+        SELECT  s.source_exposure_id AS exposure_id
+                ,SUM(COALESCE(d.D_1, 0) + l.C_0) AS C_1
+        FROM    t_bn_first_exposure s
+        JOIN    t_layer_0 l
+        ON      s.target_exposure_id = l.exposure_id
+        LEFT JOIN t_D_1 d
+        ON      l.exposure_id = d.exposure_id
+        GROUP BY s.source_exposure_id
+    ) c1
+    ON      l0.exposure_id = c1.exposure_id
+)
+
+--========================================
+-- 8. 递推 k=2
+--    D_i^(2) = Σ (D_j^(1) + C_j^(1)), j > i 且同 subsession
+--    C_i^(2) = Σ (D_j^(2) + C_j^(1)), j ∈ S(i)
+--========================================
+,t_D_2 AS (
+    SELECT  a.exposure_id
+            ,COALESCE(SUM(b.D_1 + b.C_1), 0) AS D_2
+    FROM    t_layer_1 a
+    JOIN    t_layer_1 b
+    ON      a.subsessionid = b.subsessionid
+    AND     b.ts > a.ts
+    GROUP BY a.exposure_id
+)
+
+,t_layer_2 AS (
+    SELECT  l1.exposure_id
+            ,l1.subsessionid
+            ,l1.ts
+            ,l1.vid
+            ,l1.uid
+            ,l1.mid
+            ,l1.B
+            ,l1.bn_subsessions
+            ,l1.D_0, l1.C_0
+            ,l1.D_1, l1.C_1
+            ,COALESCE(d2.D_2, 0) AS D_2
+            ,COALESCE(c2.C_2, 0) AS C_2
+    FROM    t_layer_1 l1
+    LEFT JOIN t_D_2 d2
+    ON      l1.exposure_id = d2.exposure_id
+    LEFT JOIN (
+        SELECT  s.source_exposure_id AS exposure_id
+                ,SUM(COALESCE(d.D_2, 0) + l.C_1) AS C_2
+        FROM    t_bn_first_exposure s
+        JOIN    t_layer_1 l
+        ON      s.target_exposure_id = l.exposure_id
+        LEFT JOIN t_D_2 d
+        ON      l.exposure_id = d.exposure_id
+        GROUP BY s.source_exposure_id
+    ) c2
+    ON      l1.exposure_id = c2.exposure_id
+)
+
+--========================================
+-- 9. 递推 k=3
+--    D_i^(3) = Σ (D_j^(2) + C_j^(2)), j > i 且同 subsession
+--    C_i^(3) = Σ (D_j^(3) + C_j^(2)), j ∈ S(i)
+--========================================
+,t_D_3 AS (
+    SELECT  a.exposure_id
+            ,COALESCE(SUM(b.D_2 + b.C_2), 0) AS D_3
+    FROM    t_layer_2 a
+    JOIN    t_layer_2 b
+    ON      a.subsessionid = b.subsessionid
+    AND     b.ts > a.ts
+    GROUP BY a.exposure_id
+)
+
+,t_layer_3 AS (
+    SELECT  l2.exposure_id
+            ,l2.subsessionid
+            ,l2.ts
+            ,l2.vid
+            ,l2.uid
+            ,l2.mid
+            ,l2.B
+            ,l2.bn_subsessions
+            ,l2.D_0, l2.C_0
+            ,l2.D_1, l2.C_1
+            ,l2.D_2, l2.C_2
+            ,COALESCE(d3.D_3, 0) AS D_3
+            ,COALESCE(c3.C_3, 0) AS C_3
+    FROM    t_layer_2 l2
+    LEFT JOIN t_D_3 d3
+    ON      l2.exposure_id = d3.exposure_id
+    LEFT JOIN (
+        SELECT  s.source_exposure_id AS exposure_id
+                ,SUM(COALESCE(d.D_3, 0) + l.C_2) AS C_3
+        FROM    t_bn_first_exposure s
+        JOIN    t_layer_2 l
+        ON      s.target_exposure_id = l.exposure_id
+        LEFT JOIN t_D_3 d
+        ON      l.exposure_id = d.exposure_id
+        GROUP BY s.source_exposure_id
+    ) c3
+    ON      l2.exposure_id = c3.exposure_id
+)
+
+--========================================
+-- 10. 最终输出 (k=3)
+--========================================
+SELECT  t.exposure_id
+        ,t.vid
+        ,v.title AS video_title
+        ,t.uid
+        ,t.mid
+        ,t.ts
+        -- 基础收益
+        ,t.B
+        -- 第0轮(初始化)
+        ,t.D_0
+        ,t.C_0
+        -- 第1轮
+        ,t.D_1
+        ,t.C_1
+        -- 第2轮
+        ,t.D_2
+        ,t.C_2
+        -- 第3轮
+        ,t.D_3
+        ,t.C_3
+        -- 最终收益 = B + D^(k) + C^(k)
+        ,t.B + t.D_3 + t.C_3 AS V_total
+FROM    t_layer_3 t
+LEFT JOIN videoods.wx_video v
+ON      t.vid = CAST(v.id AS STRING)
+;

+ 967 - 0
table_gen/loghubods.dwd_recsys_alg_exposure_agg_wide_20260209.json

@@ -0,0 +1,967 @@
+[
+  {
+    "field": "dt",
+    "type": "STRING",
+    "comment": "日期"
+  },
+  {
+    "field": "user_type",
+    "type": "STRING",
+    "comment": "用户拉活量分层(R0&新用户/R1-50/R_180_330等,汇总为SUM)"
+  },
+  {
+    "field": "hh_bucket",
+    "type": "STRING",
+    "comment": "小时段(00-03/04-07/.../20-23,汇总为SUM)"
+  },
+  {
+    "field": "head_merge_leve2",
+    "type": "STRING",
+    "comment": "进入内容品类(headvideoid品类,汇总为SUM)"
+  },
+  {
+    "field": "vid_merge_leve2",
+    "type": "STRING",
+    "comment": "推荐内容品类(vid品类,TOP10曝光+其他,汇总为SUM)"
+  },
+  {
+    "field": "vid_id",
+    "type": "STRING",
+    "comment": "内容id(品类曝光TOP1+其他,汇总为SUM)"
+  },
+  {
+    "field": "exposure_cnt",
+    "type": "BIGINT",
+    "comment": "曝光次数"
+  },
+  {
+    "field": "exposure_uv",
+    "type": "BIGINT",
+    "comment": "曝光人数(mid去重)"
+  },
+  {
+    "field": "vid_cnt",
+    "type": "BIGINT",
+    "comment": "视频个数(vid去重)"
+  },
+  {
+    "field": "exposure_per_user",
+    "type": "DOUBLE",
+    "comment": "人均曝光次数 = 曝光次数/曝光人数"
+  },
+  {
+    "field": "share_exposure_cnt",
+    "type": "BIGINT",
+    "comment": "产生分享的曝光数"
+  },
+  {
+    "field": "share_cnt",
+    "type": "BIGINT",
+    "comment": "分享总次数"
+  },
+  {
+    "field": "return_exposure_cnt",
+    "type": "BIGINT",
+    "comment": "产生回流的曝光数(含自身) = SUM(is_return_n)"
+  },
+  {
+    "field": "return_exposure_cnt_noself",
+    "type": "BIGINT",
+    "comment": "产生回流的曝光数(非自身) = SUM(is_return_noself)"
+  },
+  {
+    "field": "return_uv",
+    "type": "BIGINT",
+    "comment": "回流人数(含自身) = SUM(return_n_uv)"
+  },
+  {
+    "field": "return_uv_noself",
+    "type": "BIGINT",
+    "comment": "回流人数(非自身) = SUM(return_n_uv_noself)"
+  },
+  {
+    "field": "share_rate",
+    "type": "DOUBLE",
+    "comment": "分享率 = share_exposure_cnt/exposure_cnt"
+  },
+  {
+    "field": "return_rate",
+    "type": "DOUBLE",
+    "comment": "回流率(含自身) = return_exposure_cnt/exposure_cnt"
+  },
+  {
+    "field": "return_rate_noself",
+    "type": "DOUBLE",
+    "comment": "回流率(非自身) = return_exposure_cnt_noself/exposure_cnt"
+  },
+  {
+    "field": "share_return_rate",
+    "type": "DOUBLE",
+    "comment": "分享→回流转化率(非自身) = return_exposure_cnt_noself/share_exposure_cnt"
+  },
+  {
+    "field": "str_real",
+    "type": "DOUBLE",
+    "comment": "= return_rate_noself, 模型label"
+  },
+  {
+    "field": "str_pred",
+    "type": "DOUBLE",
+    "comment": "STR预估 = SUM(str_pred)/exposure_cnt"
+  },
+  {
+    "field": "str_copc",
+    "type": "DOUBLE",
+    "comment": "STR copc = str_real/str_pred"
+  },
+  {
+    "field": "str_mae",
+    "type": "DOUBLE",
+    "comment": "STR MAE = AVG(|str_pred - str_real|)"
+  },
+  {
+    "field": "str_var",
+    "type": "DOUBLE",
+    "comment": "STR VAR = VARIANCE(str_pred - str_real)"
+  },
+  {
+    "field": "rosn_real",
+    "type": "DOUBLE",
+    "comment": "= return_uv_noself/return_exposure_cnt_noself, 模型label"
+  },
+  {
+    "field": "rosn_pred",
+    "type": "DOUBLE",
+    "comment": "ROSN预估 = SUM(rosn_pred WHERE is_return_noself=1)/SUM(is_return_noself)"
+  },
+  {
+    "field": "rosn_copc",
+    "type": "DOUBLE",
+    "comment": "ROSN copc = rosn_real/rosn_pred"
+  },
+  {
+    "field": "rosn_pred_origin",
+    "type": "DOUBLE",
+    "comment": "ROSN原始预估均值 = AVG(rosn_pred_origin)"
+  },
+  {
+    "field": "rosn_mae",
+    "type": "DOUBLE",
+    "comment": "ROSN MAE = AVG(|rosn_pred - rosn_real|) WHERE is_return_noself=1"
+  },
+  {
+    "field": "rosn_var",
+    "type": "DOUBLE",
+    "comment": "ROSN VAR = VARIANCE(rosn_pred - rosn_real) WHERE is_return_noself=1"
+  },
+  {
+    "field": "rovn_real",
+    "type": "DOUBLE",
+    "comment": "= return_uv_noself/exposure_cnt, 模型label"
+  },
+  {
+    "field": "rovn_pred",
+    "type": "DOUBLE",
+    "comment": "rovn预估 = AVG(str_pred*rosn_pred)"
+  },
+  {
+    "field": "rovn_copc",
+    "type": "DOUBLE",
+    "comment": "rovn copc = rovn_real/rovn_pred"
+  },
+  {
+    "field": "rovn_mae",
+    "type": "DOUBLE",
+    "comment": "rovn MAE = AVG(|rovn_pred - rovn_real|)"
+  },
+  {
+    "field": "rovn_var",
+    "type": "DOUBLE",
+    "comment": "rovn VAR = VARIANCE(rovn_pred - rovn_real)"
+  },
+  {
+    "field": "sortscore_avg",
+    "type": "DOUBLE",
+    "comment": "sortscore均值"
+  },
+  {
+    "field": "bn_uv",
+    "type": "BIGINT",
+    "comment": "B链全量: 回流去重人数"
+  },
+  {
+    "field": "bn_pv",
+    "type": "BIGINT",
+    "comment": "B链全量: 回流点击次数"
+  },
+  {
+    "field": "bn_exp",
+    "type": "BIGINT",
+    "comment": "B链全量: 回流session曝光数"
+  },
+  {
+    "field": "bn_ror",
+    "type": "DOUBLE",
+    "comment": "bn_uv/exposure_uv"
+  },
+  {
+    "field": "bn_rov",
+    "type": "DOUBLE",
+    "comment": "bn_uv/exposure_cnt"
+  },
+  {
+    "field": "b1_uv",
+    "type": "BIGINT",
+    "comment": "B链depth=1: 回流去重人数"
+  },
+  {
+    "field": "b1_pv",
+    "type": "BIGINT",
+    "comment": "B链depth=1: 回流点击次数"
+  },
+  {
+    "field": "b1_exp",
+    "type": "BIGINT",
+    "comment": "B链depth=1: 回流session曝光数"
+  },
+  {
+    "field": "b1_ror",
+    "type": "DOUBLE",
+    "comment": "b1_uv/exposure_uv"
+  },
+  {
+    "field": "b1_rov",
+    "type": "DOUBLE",
+    "comment": "b1_uv/exposure_cnt"
+  },
+  {
+    "field": "b2_uv",
+    "type": "BIGINT",
+    "comment": "B链depth=2: 回流去重人数"
+  },
+  {
+    "field": "b2_pv",
+    "type": "BIGINT",
+    "comment": "B链depth=2: 回流点击次数"
+  },
+  {
+    "field": "b2_exp",
+    "type": "BIGINT",
+    "comment": "B链depth=2: 回流session曝光数"
+  },
+  {
+    "field": "b2_ror",
+    "type": "DOUBLE",
+    "comment": "b2_uv/b1_uv"
+  },
+  {
+    "field": "b2_rov",
+    "type": "DOUBLE",
+    "comment": "b2_uv/b1_exp"
+  },
+  {
+    "field": "b3_uv",
+    "type": "BIGINT",
+    "comment": "B链depth=3: 回流去重人数"
+  },
+  {
+    "field": "b3_pv",
+    "type": "BIGINT",
+    "comment": "B链depth=3: 回流点击次数"
+  },
+  {
+    "field": "b3_exp",
+    "type": "BIGINT",
+    "comment": "B链depth=3: 回流session曝光数"
+  },
+  {
+    "field": "b3_ror",
+    "type": "DOUBLE",
+    "comment": "b3_uv/b2_uv"
+  },
+  {
+    "field": "b3_rov",
+    "type": "DOUBLE",
+    "comment": "b3_uv/b2_exp"
+  },
+  {
+    "field": "cn_1_uv",
+    "type": "BIGINT",
+    "comment": "C链hop1: 回流去重人数"
+  },
+  {
+    "field": "cn_1_pv",
+    "type": "BIGINT",
+    "comment": "C链hop1: 回流点击次数"
+  },
+  {
+    "field": "cn_1_exp",
+    "type": "BIGINT",
+    "comment": "C链hop1: 回流session曝光数"
+  },
+  {
+    "field": "cn_1_ror",
+    "type": "DOUBLE",
+    "comment": "cn_1_uv/bn_uv"
+  },
+  {
+    "field": "cn_1_rov",
+    "type": "DOUBLE",
+    "comment": "cn_1_uv/bn_exp"
+  },
+  {
+    "field": "cn_2_uv",
+    "type": "BIGINT",
+    "comment": "C链hop2: 回流去重人数"
+  },
+  {
+    "field": "cn_2_pv",
+    "type": "BIGINT",
+    "comment": "C链hop2: 回流点击次数"
+  },
+  {
+    "field": "cn_2_exp",
+    "type": "BIGINT",
+    "comment": "C链hop2: 回流session曝光数"
+  },
+  {
+    "field": "cn_2_ror",
+    "type": "DOUBLE",
+    "comment": "cn_2_uv/cn_1_uv"
+  },
+  {
+    "field": "cn_2_rov",
+    "type": "DOUBLE",
+    "comment": "cn_2_uv/cn_1_exp"
+  },
+  {
+    "field": "cn_3_uv",
+    "type": "BIGINT",
+    "comment": "C链hop3: 回流去重人数"
+  },
+  {
+    "field": "cn_3_pv",
+    "type": "BIGINT",
+    "comment": "C链hop3: 回流点击次数"
+  },
+  {
+    "field": "cn_3_exp",
+    "type": "BIGINT",
+    "comment": "C链hop3: 回流session曝光数"
+  },
+  {
+    "field": "cn_3_ror",
+    "type": "DOUBLE",
+    "comment": "cn_3_uv/cn_2_uv"
+  },
+  {
+    "field": "cn_3_rov",
+    "type": "DOUBLE",
+    "comment": "cn_3_uv/cn_2_exp"
+  },
+  {
+    "field": "cn_total_uv",
+    "type": "BIGINT",
+    "comment": "C链合计UV = cn_1_uv + cn_2_uv + cn_3_uv"
+  },
+  {
+    "field": "cn_total_pv",
+    "type": "BIGINT",
+    "comment": "C链合计PV = cn_1_pv + cn_2_pv + cn_3_pv"
+  },
+  {
+    "field": "cn_total_exp",
+    "type": "BIGINT",
+    "comment": "C链合计EXP = cn_1_exp + cn_2_exp + cn_3_exp"
+  },
+  {
+    "field": "cn_total_ror",
+    "type": "DOUBLE",
+    "comment": "cn_total_uv/bn_uv"
+  },
+  {
+    "field": "cn_total_rov",
+    "type": "DOUBLE",
+    "comment": "cn_total_uv/bn_exp"
+  },
+  {
+    "field": "c1_1_uv",
+    "type": "BIGINT",
+    "comment": "C链d1-hop1: 回流去重人数"
+  },
+  {
+    "field": "c1_1_pv",
+    "type": "BIGINT",
+    "comment": "C链d1-hop1: 回流点击次数"
+  },
+  {
+    "field": "c1_1_exp",
+    "type": "BIGINT",
+    "comment": "C链d1-hop1: 回流session曝光数"
+  },
+  {
+    "field": "c1_1_ror",
+    "type": "DOUBLE",
+    "comment": "c1_1_uv/bn_uv"
+  },
+  {
+    "field": "c1_1_rov",
+    "type": "DOUBLE",
+    "comment": "c1_1_uv/bn_exp"
+  },
+  {
+    "field": "c2_1_uv",
+    "type": "BIGINT",
+    "comment": "C链d2-hop1: 回流去重人数"
+  },
+  {
+    "field": "c2_1_pv",
+    "type": "BIGINT",
+    "comment": "C链d2-hop1: 回流点击次数"
+  },
+  {
+    "field": "c2_1_exp",
+    "type": "BIGINT",
+    "comment": "C链d2-hop1: 回流session曝光数"
+  },
+  {
+    "field": "c2_1_ror",
+    "type": "DOUBLE",
+    "comment": "c2_1_uv/c1_1_uv"
+  },
+  {
+    "field": "c2_1_rov",
+    "type": "DOUBLE",
+    "comment": "c2_1_uv/c1_1_exp"
+  },
+  {
+    "field": "c3_1_uv",
+    "type": "BIGINT",
+    "comment": "C链d3-hop1: 回流去重人数"
+  },
+  {
+    "field": "c3_1_pv",
+    "type": "BIGINT",
+    "comment": "C链d3-hop1: 回流点击次数"
+  },
+  {
+    "field": "c3_1_exp",
+    "type": "BIGINT",
+    "comment": "C链d3-hop1: 回流session曝光数"
+  },
+  {
+    "field": "c3_1_ror",
+    "type": "DOUBLE",
+    "comment": "c3_1_uv/c2_1_uv"
+  },
+  {
+    "field": "c3_1_rov",
+    "type": "DOUBLE",
+    "comment": "c3_1_uv/c2_1_exp"
+  },
+  {
+    "field": "c1_2_uv",
+    "type": "BIGINT",
+    "comment": "C链d1-hop2: 回流去重人数"
+  },
+  {
+    "field": "c1_2_pv",
+    "type": "BIGINT",
+    "comment": "C链d1-hop2: 回流点击次数"
+  },
+  {
+    "field": "c1_2_exp",
+    "type": "BIGINT",
+    "comment": "C链d1-hop2: 回流session曝光数"
+  },
+  {
+    "field": "c1_2_ror",
+    "type": "DOUBLE",
+    "comment": "c1_2_uv/cn_1_uv"
+  },
+  {
+    "field": "c1_2_rov",
+    "type": "DOUBLE",
+    "comment": "c1_2_uv/cn_1_exp"
+  },
+  {
+    "field": "c2_2_uv",
+    "type": "BIGINT",
+    "comment": "C链d2-hop2: 回流去重人数"
+  },
+  {
+    "field": "c2_2_pv",
+    "type": "BIGINT",
+    "comment": "C链d2-hop2: 回流点击次数"
+  },
+  {
+    "field": "c2_2_exp",
+    "type": "BIGINT",
+    "comment": "C链d2-hop2: 回流session曝光数"
+  },
+  {
+    "field": "c2_2_ror",
+    "type": "DOUBLE",
+    "comment": "c2_2_uv/c1_2_uv"
+  },
+  {
+    "field": "c2_2_rov",
+    "type": "DOUBLE",
+    "comment": "c2_2_uv/c1_2_exp"
+  },
+  {
+    "field": "c3_2_uv",
+    "type": "BIGINT",
+    "comment": "C链d3-hop2: 回流去重人数"
+  },
+  {
+    "field": "c3_2_pv",
+    "type": "BIGINT",
+    "comment": "C链d3-hop2: 回流点击次数"
+  },
+  {
+    "field": "c3_2_exp",
+    "type": "BIGINT",
+    "comment": "C链d3-hop2: 回流session曝光数"
+  },
+  {
+    "field": "c3_2_ror",
+    "type": "DOUBLE",
+    "comment": "c3_2_uv/c2_2_uv"
+  },
+  {
+    "field": "c3_2_rov",
+    "type": "DOUBLE",
+    "comment": "c3_2_uv/c2_2_exp"
+  },
+  {
+    "field": "c1_3_uv",
+    "type": "BIGINT",
+    "comment": "C链d1-hop3: 回流去重人数"
+  },
+  {
+    "field": "c1_3_pv",
+    "type": "BIGINT",
+    "comment": "C链d1-hop3: 回流点击次数"
+  },
+  {
+    "field": "c1_3_exp",
+    "type": "BIGINT",
+    "comment": "C链d1-hop3: 回流session曝光数"
+  },
+  {
+    "field": "c1_3_ror",
+    "type": "DOUBLE",
+    "comment": "c1_3_uv/cn_2_uv"
+  },
+  {
+    "field": "c1_3_rov",
+    "type": "DOUBLE",
+    "comment": "c1_3_uv/cn_2_exp"
+  },
+  {
+    "field": "c2_3_uv",
+    "type": "BIGINT",
+    "comment": "C链d2-hop3: 回流去重人数"
+  },
+  {
+    "field": "c2_3_pv",
+    "type": "BIGINT",
+    "comment": "C链d2-hop3: 回流点击次数"
+  },
+  {
+    "field": "c2_3_exp",
+    "type": "BIGINT",
+    "comment": "C链d2-hop3: 回流session曝光数"
+  },
+  {
+    "field": "c2_3_ror",
+    "type": "DOUBLE",
+    "comment": "c2_3_uv/c1_3_uv"
+  },
+  {
+    "field": "c2_3_rov",
+    "type": "DOUBLE",
+    "comment": "c2_3_uv/c1_3_exp"
+  },
+  {
+    "field": "c3_3_uv",
+    "type": "BIGINT",
+    "comment": "C链d3-hop3: 回流去重人数"
+  },
+  {
+    "field": "c3_3_pv",
+    "type": "BIGINT",
+    "comment": "C链d3-hop3: 回流点击次数"
+  },
+  {
+    "field": "c3_3_exp",
+    "type": "BIGINT",
+    "comment": "C链d3-hop3: 回流session曝光数"
+  },
+  {
+    "field": "c3_3_ror",
+    "type": "DOUBLE",
+    "comment": "c3_3_uv/c2_3_uv"
+  },
+  {
+    "field": "c3_3_rov",
+    "type": "DOUBLE",
+    "comment": "c3_3_uv/c2_3_exp"
+  },
+  {
+    "field": "d0",
+    "type": "BIGINT",
+    "comment": "D链初始成本: session内后续曝光数"
+  },
+  {
+    "field": "dn_1_uv",
+    "type": "BIGINT",
+    "comment": "D链hop1: 回流去重人数"
+  },
+  {
+    "field": "dn_1_pv",
+    "type": "BIGINT",
+    "comment": "D链hop1: 回流点击次数"
+  },
+  {
+    "field": "dn_1_exp",
+    "type": "BIGINT",
+    "comment": "D链hop1: 回流session曝光数"
+  },
+  {
+    "field": "dn_1_ror",
+    "type": "DOUBLE",
+    "comment": "dn_1_uv/exposure_uv"
+  },
+  {
+    "field": "dn_1_rov",
+    "type": "DOUBLE",
+    "comment": "dn_1_uv/d0"
+  },
+  {
+    "field": "dn_2_uv",
+    "type": "BIGINT",
+    "comment": "D链hop2: 回流去重人数"
+  },
+  {
+    "field": "dn_2_pv",
+    "type": "BIGINT",
+    "comment": "D链hop2: 回流点击次数"
+  },
+  {
+    "field": "dn_2_exp",
+    "type": "BIGINT",
+    "comment": "D链hop2: 回流session曝光数"
+  },
+  {
+    "field": "dn_2_ror",
+    "type": "DOUBLE",
+    "comment": "dn_2_uv/dn_1_uv"
+  },
+  {
+    "field": "dn_2_rov",
+    "type": "DOUBLE",
+    "comment": "dn_2_uv/dn_1_exp"
+  },
+  {
+    "field": "dn_3_uv",
+    "type": "BIGINT",
+    "comment": "D链hop3: 回流去重人数"
+  },
+  {
+    "field": "dn_3_pv",
+    "type": "BIGINT",
+    "comment": "D链hop3: 回流点击次数"
+  },
+  {
+    "field": "dn_3_exp",
+    "type": "BIGINT",
+    "comment": "D链hop3: 回流session曝光数"
+  },
+  {
+    "field": "dn_3_ror",
+    "type": "DOUBLE",
+    "comment": "dn_3_uv/dn_2_uv"
+  },
+  {
+    "field": "dn_3_rov",
+    "type": "DOUBLE",
+    "comment": "dn_3_uv/dn_2_exp"
+  },
+  {
+    "field": "dn_total_uv",
+    "type": "BIGINT",
+    "comment": "D链合计UV = dn_1_uv + dn_2_uv + dn_3_uv"
+  },
+  {
+    "field": "dn_total_pv",
+    "type": "BIGINT",
+    "comment": "D链合计PV = dn_1_pv + dn_2_pv + dn_3_pv"
+  },
+  {
+    "field": "dn_total_exp",
+    "type": "BIGINT",
+    "comment": "D链合计EXP = dn_1_exp + dn_2_exp + dn_3_exp"
+  },
+  {
+    "field": "dn_total_ror",
+    "type": "DOUBLE",
+    "comment": "dn_total_uv/exposure_uv"
+  },
+  {
+    "field": "dn_total_rov",
+    "type": "DOUBLE",
+    "comment": "dn_total_uv/d0"
+  },
+  {
+    "field": "d1_1_uv",
+    "type": "BIGINT",
+    "comment": "D链d1-hop1: 回流去重人数"
+  },
+  {
+    "field": "d1_1_pv",
+    "type": "BIGINT",
+    "comment": "D链d1-hop1: 回流点击次数"
+  },
+  {
+    "field": "d1_1_exp",
+    "type": "BIGINT",
+    "comment": "D链d1-hop1: 回流session曝光数"
+  },
+  {
+    "field": "d1_1_ror",
+    "type": "DOUBLE",
+    "comment": "d1_1_uv/exposure_uv"
+  },
+  {
+    "field": "d1_1_rov",
+    "type": "DOUBLE",
+    "comment": "d1_1_uv/d0"
+  },
+  {
+    "field": "d2_1_uv",
+    "type": "BIGINT",
+    "comment": "D链d2-hop1: 回流去重人数"
+  },
+  {
+    "field": "d2_1_pv",
+    "type": "BIGINT",
+    "comment": "D链d2-hop1: 回流点击次数"
+  },
+  {
+    "field": "d2_1_exp",
+    "type": "BIGINT",
+    "comment": "D链d2-hop1: 回流session曝光数"
+  },
+  {
+    "field": "d2_1_ror",
+    "type": "DOUBLE",
+    "comment": "d2_1_uv/d1_1_uv"
+  },
+  {
+    "field": "d2_1_rov",
+    "type": "DOUBLE",
+    "comment": "d2_1_uv/d1_1_exp"
+  },
+  {
+    "field": "d3_1_uv",
+    "type": "BIGINT",
+    "comment": "D链d3-hop1: 回流去重人数"
+  },
+  {
+    "field": "d3_1_pv",
+    "type": "BIGINT",
+    "comment": "D链d3-hop1: 回流点击次数"
+  },
+  {
+    "field": "d3_1_exp",
+    "type": "BIGINT",
+    "comment": "D链d3-hop1: 回流session曝光数"
+  },
+  {
+    "field": "d3_1_ror",
+    "type": "DOUBLE",
+    "comment": "d3_1_uv/d2_1_uv"
+  },
+  {
+    "field": "d3_1_rov",
+    "type": "DOUBLE",
+    "comment": "d3_1_uv/d2_1_exp"
+  },
+  {
+    "field": "d1_2_uv",
+    "type": "BIGINT",
+    "comment": "D链d1-hop2: 回流去重人数"
+  },
+  {
+    "field": "d1_2_pv",
+    "type": "BIGINT",
+    "comment": "D链d1-hop2: 回流点击次数"
+  },
+  {
+    "field": "d1_2_exp",
+    "type": "BIGINT",
+    "comment": "D链d1-hop2: 回流session曝光数"
+  },
+  {
+    "field": "d1_2_ror",
+    "type": "DOUBLE",
+    "comment": "d1_2_uv/dn_1_uv"
+  },
+  {
+    "field": "d1_2_rov",
+    "type": "DOUBLE",
+    "comment": "d1_2_uv/dn_1_exp"
+  },
+  {
+    "field": "d2_2_uv",
+    "type": "BIGINT",
+    "comment": "D链d2-hop2: 回流去重人数"
+  },
+  {
+    "field": "d2_2_pv",
+    "type": "BIGINT",
+    "comment": "D链d2-hop2: 回流点击次数"
+  },
+  {
+    "field": "d2_2_exp",
+    "type": "BIGINT",
+    "comment": "D链d2-hop2: 回流session曝光数"
+  },
+  {
+    "field": "d2_2_ror",
+    "type": "DOUBLE",
+    "comment": "d2_2_uv/d1_2_uv"
+  },
+  {
+    "field": "d2_2_rov",
+    "type": "DOUBLE",
+    "comment": "d2_2_uv/d1_2_exp"
+  },
+  {
+    "field": "d3_2_uv",
+    "type": "BIGINT",
+    "comment": "D链d3-hop2: 回流去重人数"
+  },
+  {
+    "field": "d3_2_pv",
+    "type": "BIGINT",
+    "comment": "D链d3-hop2: 回流点击次数"
+  },
+  {
+    "field": "d3_2_exp",
+    "type": "BIGINT",
+    "comment": "D链d3-hop2: 回流session曝光数"
+  },
+  {
+    "field": "d3_2_ror",
+    "type": "DOUBLE",
+    "comment": "d3_2_uv/d2_2_uv"
+  },
+  {
+    "field": "d3_2_rov",
+    "type": "DOUBLE",
+    "comment": "d3_2_uv/d2_2_exp"
+  },
+  {
+    "field": "d1_3_uv",
+    "type": "BIGINT",
+    "comment": "D链d1-hop3: 回流去重人数"
+  },
+  {
+    "field": "d1_3_pv",
+    "type": "BIGINT",
+    "comment": "D链d1-hop3: 回流点击次数"
+  },
+  {
+    "field": "d1_3_exp",
+    "type": "BIGINT",
+    "comment": "D链d1-hop3: 回流session曝光数"
+  },
+  {
+    "field": "d1_3_ror",
+    "type": "DOUBLE",
+    "comment": "d1_3_uv/dn_2_uv"
+  },
+  {
+    "field": "d1_3_rov",
+    "type": "DOUBLE",
+    "comment": "d1_3_uv/dn_2_exp"
+  },
+  {
+    "field": "d2_3_uv",
+    "type": "BIGINT",
+    "comment": "D链d2-hop3: 回流去重人数"
+  },
+  {
+    "field": "d2_3_pv",
+    "type": "BIGINT",
+    "comment": "D链d2-hop3: 回流点击次数"
+  },
+  {
+    "field": "d2_3_exp",
+    "type": "BIGINT",
+    "comment": "D链d2-hop3: 回流session曝光数"
+  },
+  {
+    "field": "d2_3_ror",
+    "type": "DOUBLE",
+    "comment": "d2_3_uv/d1_3_uv"
+  },
+  {
+    "field": "d2_3_rov",
+    "type": "DOUBLE",
+    "comment": "d2_3_uv/d1_3_exp"
+  },
+  {
+    "field": "d3_3_uv",
+    "type": "BIGINT",
+    "comment": "D链d3-hop3: 回流去重人数"
+  },
+  {
+    "field": "d3_3_pv",
+    "type": "BIGINT",
+    "comment": "D链d3-hop3: 回流点击次数"
+  },
+  {
+    "field": "d3_3_exp",
+    "type": "BIGINT",
+    "comment": "D链d3-hop3: 回流session曝光数"
+  },
+  {
+    "field": "d3_3_ror",
+    "type": "DOUBLE",
+    "comment": "d3_3_uv/d2_3_uv"
+  },
+  {
+    "field": "d3_3_rov",
+    "type": "DOUBLE",
+    "comment": "d3_3_uv/d2_3_exp"
+  },
+  {
+    "field": "all_uv",
+    "type": "BIGINT",
+    "comment": "全链路拉回UV = bn_uv + cn_total_uv + dn_total_uv"
+  },
+  {
+    "field": "all_pv",
+    "type": "BIGINT",
+    "comment": "全链路拉回PV = bn_pv + cn_total_pv + dn_total_pv"
+  },
+  {
+    "field": "all_exp",
+    "type": "BIGINT",
+    "comment": "全链路拉回EXP = bn_exp + cn_total_exp + dn_total_exp"
+  },
+  {
+    "field": "all_ror",
+    "type": "DOUBLE",
+    "comment": "all_uv/exposure_uv"
+  },
+  {
+    "field": "all_rov",
+    "type": "DOUBLE",
+    "comment": "all_uv/exposure_cnt"
+  }
+]

+ 678 - 0
table_gen/loghubods.dwd_recsys_alg_exposure_agg_wide_20260209.sql

@@ -0,0 +1,678 @@
+-- =====================================================================
+-- 曝光回流链路 CUBE 聚合表 (宽表版, 含用户/品类维度 + 模型预估 + 全链路漏斗)
+-- 维度: user_type × hh_bucket × head_merge_leve2 × vid_merge_leve2 × vid_id (CUBE)
+-- 依赖: base_20260209 → (JOIN user_type + video_merge_tag + t_score) → CUBE 聚合
+-- 参考: de.sql + dwd_recsys_alg_exposure_agg_20260209
+-- =====================================================================
+--
+-- 指标分区:
+--   基础流量       exposure_cnt / exposure_uv / vid_cnt / exposure_per_user
+--   分享&回流漏斗  share_exposure_cnt → share_cnt → return_exposure_cnt → return_uv
+--                  + 4 个 rate (share_rate / return_rate / return_rate_noself / share_return_rate)
+--   模型预估       STR (曝光→非自身回流概率) / ROSN (条件回流UV) / ROVN (STR×ROSN)
+--                  每组: _real(label) + _pred(预估) + _copc + _mae + _var
+--                  _real 与漏斗字段等价: str_real=return_rate_noself, rovn_real=return_uv_noself/exposure_cnt
+--   B/C/D 链       每级: _uv + _pv + _exp + _ror + _rov
+--   全链路         all_uv/pv/exp = B + C + D, all_ror/rov
+--
+-- rov/ror 分母推导 (逐级递推):
+--   depth 维度: depth=1 的成本=该hop入口; depth=N+1 的成本=depth=N 的输出
+--   hop 维度:   hop1 的成本=链路入口; hop N+1 的成本=hop N 全量depth的输出
+--
+--   B链:  bn/b1 → rov=uv/COUNT(1), ror=uv/COUNT(DISTINCT mid)
+--         b2    → rov=uv/b1_exp,    ror=uv/b1_uv
+--         b3    → rov=uv/b2_exp,    ror=uv/b2_uv
+--   C链全量: cn_1 → rov=uv/bn_exp,    ror=uv/bn_uv
+--            cn_2 → rov=uv/cn_1_exp,  ror=uv/cn_1_uv
+--            cn_3 → rov=uv/cn_2_exp,  ror=uv/cn_2_uv
+--   C链depth拆分: cX_Y → rov=uv/上级exp, ror=uv/上级uv (X=depth, Y=hop)
+--            hop1: c1_1→bn, c2_1→c1_1, c3_1→c2_1
+--            hop2: c1_2→cn_1, c2_2→c1_2, c3_2→c2_2
+--            hop3: c1_3→cn_2, c2_3→c1_3, c3_3→c2_3
+--   D链全量: dn_1 → rov=uv/d0,         ror=uv/COUNT(DISTINCT mid)
+--            dn_2 → rov=uv/dn_1_exp,   ror=uv/dn_1_uv
+--            dn_3 → rov=uv/dn_2_exp,   ror=uv/dn_2_uv
+--   D链depth拆分: dX_Y (同 C 链模式)
+--            hop1: d1_1→d0/mid, d2_1→d1_1, d3_1→d2_1
+--            hop2: d1_2→dn_1, d2_2→d1_2, d3_2→d2_2
+--            hop3: d1_3→dn_2, d2_3→d1_3, d3_3→d2_3
+--   全链路: all → rov=uv/COUNT(1), ror=uv/COUNT(DISTINCT mid)
+-- =====================================================================
+
+-- DROP TABLE IF EXISTS loghubods.dwd_recsys_alg_exposure_agg_wide_20260209;
+CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_agg_wide_20260209 (
+    -- ==================== 维度列 ====================
+    dt                         STRING    COMMENT '日期'
+    ,user_type                  STRING    COMMENT '用户拉活量分层(R0&新用户/R1-50/R_180_330等,汇总为SUM)'
+    ,hh_bucket                STRING    COMMENT '小时段(00-03/04-07/.../20-23,汇总为SUM)'
+    ,head_merge_leve2         STRING    COMMENT '进入内容品类(headvideoid品类,汇总为SUM)'
+    ,vid_merge_leve2          STRING    COMMENT '推荐内容品类(vid品类,TOP10曝光+其他,汇总为SUM)'
+    ,vid_id                   STRING    COMMENT '内容id(品类曝光TOP1+其他,汇总为SUM)'
+
+    -- ==================== 基础流量 ====================
+    ,exposure_cnt             BIGINT    COMMENT '曝光次数'
+    ,exposure_uv              BIGINT    COMMENT '曝光人数(mid去重)'
+    ,vid_cnt                  BIGINT    COMMENT '视频个数(vid去重)'
+    ,exposure_per_user        DOUBLE    COMMENT '人均曝光次数 = 曝光次数/曝光人数'
+
+    -- ==================== 分享 & 回流漏斗 ====================
+    ,share_exposure_cnt       BIGINT    COMMENT '产生分享的曝光数'
+    ,share_cnt                BIGINT    COMMENT '分享总次数'
+    ,return_exposure_cnt      BIGINT    COMMENT '产生回流的曝光数(含自身) = SUM(is_return_n)'
+    ,return_exposure_cnt_noself BIGINT  COMMENT '产生回流的曝光数(非自身) = SUM(is_return_noself)'
+    ,return_uv                BIGINT    COMMENT '回流人数(含自身) = SUM(return_n_uv)'
+    ,return_uv_noself         BIGINT    COMMENT '回流人数(非自身) = SUM(return_n_uv_noself)'
+    ,share_rate               DOUBLE    COMMENT '分享率 = share_exposure_cnt/exposure_cnt'
+    ,return_rate              DOUBLE    COMMENT '回流率(含自身) = return_exposure_cnt/exposure_cnt'
+    ,return_rate_noself       DOUBLE    COMMENT '回流率(非自身) = return_exposure_cnt_noself/exposure_cnt'
+    ,share_return_rate        DOUBLE    COMMENT '分享→回流转化率(非自身) = return_exposure_cnt_noself/share_exposure_cnt'
+
+    -- ==================== 模型预估: STR (曝光→非自身回流概率) ====================
+    ,str_real                 DOUBLE    COMMENT '= return_rate_noself, 模型label'
+    ,str_pred                 DOUBLE    COMMENT 'STR预估 = SUM(str_pred)/exposure_cnt'
+    ,str_copc                 DOUBLE    COMMENT 'STR copc = str_real/str_pred'
+    ,str_mae                  DOUBLE    COMMENT 'STR MAE = AVG(|str_pred - str_real|)'
+    ,str_var                  DOUBLE    COMMENT 'STR VAR = VARIANCE(str_pred - str_real)'
+
+    -- ==================== 模型预估: ROSN (条件回流UV, 非自身) ====================
+    ,rosn_real                DOUBLE    COMMENT '= return_uv_noself/return_exposure_cnt_noself, 模型label'
+    ,rosn_pred                DOUBLE    COMMENT 'ROSN预估 = SUM(rosn_pred WHERE is_return_noself=1)/SUM(is_return_noself)'
+    ,rosn_copc                DOUBLE    COMMENT 'ROSN copc = rosn_real/rosn_pred'
+    ,rosn_pred_origin         DOUBLE    COMMENT 'ROSN原始预估均值 = AVG(rosn_pred_origin)'
+    ,rosn_mae                 DOUBLE    COMMENT 'ROSN MAE = AVG(|rosn_pred - rosn_real|) WHERE is_return_noself=1'
+    ,rosn_var                 DOUBLE    COMMENT 'ROSN VAR = VARIANCE(rosn_pred - rosn_real) WHERE is_return_noself=1'
+
+    -- ==================== 模型预估: ROVN (STR×ROSN) ====================
+    ,rovn_real                DOUBLE    COMMENT '= return_uv_noself/exposure_cnt, 模型label'
+    ,rovn_pred                DOUBLE    COMMENT 'rovn预估 = AVG(str_pred*rosn_pred)'
+    ,rovn_copc                DOUBLE    COMMENT 'rovn copc = rovn_real/rovn_pred'
+    ,rovn_mae                 DOUBLE    COMMENT 'rovn MAE = AVG(|rovn_pred - rovn_real|)'
+    ,rovn_var                 DOUBLE    COMMENT 'rovn VAR = VARIANCE(rovn_pred - rovn_real)'
+    ,sortscore_avg            DOUBLE    COMMENT 'sortscore均值'
+
+    -- ==================== B链 (分享→点击) ====================
+    ,bn_uv                    BIGINT    COMMENT 'B链全量: 回流去重人数'
+    ,bn_pv                    BIGINT    COMMENT 'B链全量: 回流点击次数'
+    ,bn_exp                   BIGINT    COMMENT 'B链全量: 回流session曝光数'
+    ,bn_ror                   DOUBLE    COMMENT 'bn_uv/exposure_uv'
+    ,bn_rov                   DOUBLE    COMMENT 'bn_uv/exposure_cnt'
+    ,b1_uv                    BIGINT    COMMENT 'B链depth=1: 回流去重人数'
+    ,b1_pv                    BIGINT    COMMENT 'B链depth=1: 回流点击次数'
+    ,b1_exp                   BIGINT    COMMENT 'B链depth=1: 回流session曝光数'
+    ,b1_ror                   DOUBLE    COMMENT 'b1_uv/exposure_uv'
+    ,b1_rov                   DOUBLE    COMMENT 'b1_uv/exposure_cnt'
+    ,b2_uv                    BIGINT    COMMENT 'B链depth=2: 回流去重人数'
+    ,b2_pv                    BIGINT    COMMENT 'B链depth=2: 回流点击次数'
+    ,b2_exp                   BIGINT    COMMENT 'B链depth=2: 回流session曝光数'
+    ,b2_ror                   DOUBLE    COMMENT 'b2_uv/b1_uv'
+    ,b2_rov                   DOUBLE    COMMENT 'b2_uv/b1_exp'
+    ,b3_uv                    BIGINT    COMMENT 'B链depth=3: 回流去重人数'
+    ,b3_pv                    BIGINT    COMMENT 'B链depth=3: 回流点击次数'
+    ,b3_exp                   BIGINT    COMMENT 'B链depth=3: 回流session曝光数'
+    ,b3_ror                   DOUBLE    COMMENT 'b3_uv/b2_uv'
+    ,b3_rov                   DOUBLE    COMMENT 'b3_uv/b2_exp'
+
+    -- ==================== C链 (全量depth, 按hop) ====================
+    ,cn_1_uv                  BIGINT    COMMENT 'C链hop1: 回流去重人数'
+    ,cn_1_pv                  BIGINT    COMMENT 'C链hop1: 回流点击次数'
+    ,cn_1_exp                 BIGINT    COMMENT 'C链hop1: 回流session曝光数'
+    ,cn_1_ror                 DOUBLE    COMMENT 'cn_1_uv/bn_uv'
+    ,cn_1_rov                 DOUBLE    COMMENT 'cn_1_uv/bn_exp'
+    ,cn_2_uv                  BIGINT    COMMENT 'C链hop2: 回流去重人数'
+    ,cn_2_pv                  BIGINT    COMMENT 'C链hop2: 回流点击次数'
+    ,cn_2_exp                 BIGINT    COMMENT 'C链hop2: 回流session曝光数'
+    ,cn_2_ror                 DOUBLE    COMMENT 'cn_2_uv/cn_1_uv'
+    ,cn_2_rov                 DOUBLE    COMMENT 'cn_2_uv/cn_1_exp'
+    ,cn_3_uv                  BIGINT    COMMENT 'C链hop3: 回流去重人数'
+    ,cn_3_pv                  BIGINT    COMMENT 'C链hop3: 回流点击次数'
+    ,cn_3_exp                 BIGINT    COMMENT 'C链hop3: 回流session曝光数'
+    ,cn_3_ror                 DOUBLE    COMMENT 'cn_3_uv/cn_2_uv'
+    ,cn_3_rov                 DOUBLE    COMMENT 'cn_3_uv/cn_2_exp'
+    ,cn_total_uv              BIGINT    COMMENT 'C链合计UV = cn_1_uv + cn_2_uv + cn_3_uv'
+    ,cn_total_pv              BIGINT    COMMENT 'C链合计PV = cn_1_pv + cn_2_pv + cn_3_pv'
+    ,cn_total_exp             BIGINT    COMMENT 'C链合计EXP = cn_1_exp + cn_2_exp + cn_3_exp'
+    ,cn_total_ror             DOUBLE    COMMENT 'cn_total_uv/bn_uv'
+    ,cn_total_rov             DOUBLE    COMMENT 'cn_total_uv/bn_exp'
+    -- C链 depth拆分 hop1
+    ,c1_1_uv                  BIGINT    COMMENT 'C链d1-hop1: 回流去重人数'
+    ,c1_1_pv                  BIGINT    COMMENT 'C链d1-hop1: 回流点击次数'
+    ,c1_1_exp                 BIGINT    COMMENT 'C链d1-hop1: 回流session曝光数'
+    ,c1_1_ror                 DOUBLE    COMMENT 'c1_1_uv/bn_uv'
+    ,c1_1_rov                 DOUBLE    COMMENT 'c1_1_uv/bn_exp'
+    ,c2_1_uv                  BIGINT    COMMENT 'C链d2-hop1: 回流去重人数'
+    ,c2_1_pv                  BIGINT    COMMENT 'C链d2-hop1: 回流点击次数'
+    ,c2_1_exp                 BIGINT    COMMENT 'C链d2-hop1: 回流session曝光数'
+    ,c2_1_ror                 DOUBLE    COMMENT 'c2_1_uv/c1_1_uv'
+    ,c2_1_rov                 DOUBLE    COMMENT 'c2_1_uv/c1_1_exp'
+    ,c3_1_uv                  BIGINT    COMMENT 'C链d3-hop1: 回流去重人数'
+    ,c3_1_pv                  BIGINT    COMMENT 'C链d3-hop1: 回流点击次数'
+    ,c3_1_exp                 BIGINT    COMMENT 'C链d3-hop1: 回流session曝光数'
+    ,c3_1_ror                 DOUBLE    COMMENT 'c3_1_uv/c2_1_uv'
+    ,c3_1_rov                 DOUBLE    COMMENT 'c3_1_uv/c2_1_exp'
+    -- C链 depth拆分 hop2
+    ,c1_2_uv                  BIGINT    COMMENT 'C链d1-hop2: 回流去重人数'
+    ,c1_2_pv                  BIGINT    COMMENT 'C链d1-hop2: 回流点击次数'
+    ,c1_2_exp                 BIGINT    COMMENT 'C链d1-hop2: 回流session曝光数'
+    ,c1_2_ror                 DOUBLE    COMMENT 'c1_2_uv/cn_1_uv'
+    ,c1_2_rov                 DOUBLE    COMMENT 'c1_2_uv/cn_1_exp'
+    ,c2_2_uv                  BIGINT    COMMENT 'C链d2-hop2: 回流去重人数'
+    ,c2_2_pv                  BIGINT    COMMENT 'C链d2-hop2: 回流点击次数'
+    ,c2_2_exp                 BIGINT    COMMENT 'C链d2-hop2: 回流session曝光数'
+    ,c2_2_ror                 DOUBLE    COMMENT 'c2_2_uv/c1_2_uv'
+    ,c2_2_rov                 DOUBLE    COMMENT 'c2_2_uv/c1_2_exp'
+    ,c3_2_uv                  BIGINT    COMMENT 'C链d3-hop2: 回流去重人数'
+    ,c3_2_pv                  BIGINT    COMMENT 'C链d3-hop2: 回流点击次数'
+    ,c3_2_exp                 BIGINT    COMMENT 'C链d3-hop2: 回流session曝光数'
+    ,c3_2_ror                 DOUBLE    COMMENT 'c3_2_uv/c2_2_uv'
+    ,c3_2_rov                 DOUBLE    COMMENT 'c3_2_uv/c2_2_exp'
+    -- C链 depth拆分 hop3
+    ,c1_3_uv                  BIGINT    COMMENT 'C链d1-hop3: 回流去重人数'
+    ,c1_3_pv                  BIGINT    COMMENT 'C链d1-hop3: 回流点击次数'
+    ,c1_3_exp                 BIGINT    COMMENT 'C链d1-hop3: 回流session曝光数'
+    ,c1_3_ror                 DOUBLE    COMMENT 'c1_3_uv/cn_2_uv'
+    ,c1_3_rov                 DOUBLE    COMMENT 'c1_3_uv/cn_2_exp'
+    ,c2_3_uv                  BIGINT    COMMENT 'C链d2-hop3: 回流去重人数'
+    ,c2_3_pv                  BIGINT    COMMENT 'C链d2-hop3: 回流点击次数'
+    ,c2_3_exp                 BIGINT    COMMENT 'C链d2-hop3: 回流session曝光数'
+    ,c2_3_ror                 DOUBLE    COMMENT 'c2_3_uv/c1_3_uv'
+    ,c2_3_rov                 DOUBLE    COMMENT 'c2_3_uv/c1_3_exp'
+    ,c3_3_uv                  BIGINT    COMMENT 'C链d3-hop3: 回流去重人数'
+    ,c3_3_pv                  BIGINT    COMMENT 'C链d3-hop3: 回流点击次数'
+    ,c3_3_exp                 BIGINT    COMMENT 'C链d3-hop3: 回流session曝光数'
+    ,c3_3_ror                 DOUBLE    COMMENT 'c3_3_uv/c2_3_uv'
+    ,c3_3_rov                 DOUBLE    COMMENT 'c3_3_uv/c2_3_exp'
+
+    -- ==================== D链 (session内后续曝光传播) ====================
+    ,d0                       BIGINT    COMMENT 'D链初始成本: session内后续曝光数'
+    ,dn_1_uv                  BIGINT    COMMENT 'D链hop1: 回流去重人数'
+    ,dn_1_pv                  BIGINT    COMMENT 'D链hop1: 回流点击次数'
+    ,dn_1_exp                 BIGINT    COMMENT 'D链hop1: 回流session曝光数'
+    ,dn_1_ror                 DOUBLE    COMMENT 'dn_1_uv/exposure_uv'
+    ,dn_1_rov                 DOUBLE    COMMENT 'dn_1_uv/d0'
+    ,dn_2_uv                  BIGINT    COMMENT 'D链hop2: 回流去重人数'
+    ,dn_2_pv                  BIGINT    COMMENT 'D链hop2: 回流点击次数'
+    ,dn_2_exp                 BIGINT    COMMENT 'D链hop2: 回流session曝光数'
+    ,dn_2_ror                 DOUBLE    COMMENT 'dn_2_uv/dn_1_uv'
+    ,dn_2_rov                 DOUBLE    COMMENT 'dn_2_uv/dn_1_exp'
+    ,dn_3_uv                  BIGINT    COMMENT 'D链hop3: 回流去重人数'
+    ,dn_3_pv                  BIGINT    COMMENT 'D链hop3: 回流点击次数'
+    ,dn_3_exp                 BIGINT    COMMENT 'D链hop3: 回流session曝光数'
+    ,dn_3_ror                 DOUBLE    COMMENT 'dn_3_uv/dn_2_uv'
+    ,dn_3_rov                 DOUBLE    COMMENT 'dn_3_uv/dn_2_exp'
+    ,dn_total_uv              BIGINT    COMMENT 'D链合计UV = dn_1_uv + dn_2_uv + dn_3_uv'
+    ,dn_total_pv              BIGINT    COMMENT 'D链合计PV = dn_1_pv + dn_2_pv + dn_3_pv'
+    ,dn_total_exp             BIGINT    COMMENT 'D链合计EXP = dn_1_exp + dn_2_exp + dn_3_exp'
+    ,dn_total_ror             DOUBLE    COMMENT 'dn_total_uv/exposure_uv'
+    ,dn_total_rov             DOUBLE    COMMENT 'dn_total_uv/d0'
+    -- D链 depth拆分 hop1
+    ,d1_1_uv                  BIGINT    COMMENT 'D链d1-hop1: 回流去重人数'
+    ,d1_1_pv                  BIGINT    COMMENT 'D链d1-hop1: 回流点击次数'
+    ,d1_1_exp                 BIGINT    COMMENT 'D链d1-hop1: 回流session曝光数'
+    ,d1_1_ror                 DOUBLE    COMMENT 'd1_1_uv/exposure_uv'
+    ,d1_1_rov                 DOUBLE    COMMENT 'd1_1_uv/d0'
+    ,d2_1_uv                  BIGINT    COMMENT 'D链d2-hop1: 回流去重人数'
+    ,d2_1_pv                  BIGINT    COMMENT 'D链d2-hop1: 回流点击次数'
+    ,d2_1_exp                 BIGINT    COMMENT 'D链d2-hop1: 回流session曝光数'
+    ,d2_1_ror                 DOUBLE    COMMENT 'd2_1_uv/d1_1_uv'
+    ,d2_1_rov                 DOUBLE    COMMENT 'd2_1_uv/d1_1_exp'
+    ,d3_1_uv                  BIGINT    COMMENT 'D链d3-hop1: 回流去重人数'
+    ,d3_1_pv                  BIGINT    COMMENT 'D链d3-hop1: 回流点击次数'
+    ,d3_1_exp                 BIGINT    COMMENT 'D链d3-hop1: 回流session曝光数'
+    ,d3_1_ror                 DOUBLE    COMMENT 'd3_1_uv/d2_1_uv'
+    ,d3_1_rov                 DOUBLE    COMMENT 'd3_1_uv/d2_1_exp'
+    -- D链 depth拆分 hop2
+    ,d1_2_uv                  BIGINT    COMMENT 'D链d1-hop2: 回流去重人数'
+    ,d1_2_pv                  BIGINT    COMMENT 'D链d1-hop2: 回流点击次数'
+    ,d1_2_exp                 BIGINT    COMMENT 'D链d1-hop2: 回流session曝光数'
+    ,d1_2_ror                 DOUBLE    COMMENT 'd1_2_uv/dn_1_uv'
+    ,d1_2_rov                 DOUBLE    COMMENT 'd1_2_uv/dn_1_exp'
+    ,d2_2_uv                  BIGINT    COMMENT 'D链d2-hop2: 回流去重人数'
+    ,d2_2_pv                  BIGINT    COMMENT 'D链d2-hop2: 回流点击次数'
+    ,d2_2_exp                 BIGINT    COMMENT 'D链d2-hop2: 回流session曝光数'
+    ,d2_2_ror                 DOUBLE    COMMENT 'd2_2_uv/d1_2_uv'
+    ,d2_2_rov                 DOUBLE    COMMENT 'd2_2_uv/d1_2_exp'
+    ,d3_2_uv                  BIGINT    COMMENT 'D链d3-hop2: 回流去重人数'
+    ,d3_2_pv                  BIGINT    COMMENT 'D链d3-hop2: 回流点击次数'
+    ,d3_2_exp                 BIGINT    COMMENT 'D链d3-hop2: 回流session曝光数'
+    ,d3_2_ror                 DOUBLE    COMMENT 'd3_2_uv/d2_2_uv'
+    ,d3_2_rov                 DOUBLE    COMMENT 'd3_2_uv/d2_2_exp'
+    -- D链 depth拆分 hop3
+    ,d1_3_uv                  BIGINT    COMMENT 'D链d1-hop3: 回流去重人数'
+    ,d1_3_pv                  BIGINT    COMMENT 'D链d1-hop3: 回流点击次数'
+    ,d1_3_exp                 BIGINT    COMMENT 'D链d1-hop3: 回流session曝光数'
+    ,d1_3_ror                 DOUBLE    COMMENT 'd1_3_uv/dn_2_uv'
+    ,d1_3_rov                 DOUBLE    COMMENT 'd1_3_uv/dn_2_exp'
+    ,d2_3_uv                  BIGINT    COMMENT 'D链d2-hop3: 回流去重人数'
+    ,d2_3_pv                  BIGINT    COMMENT 'D链d2-hop3: 回流点击次数'
+    ,d2_3_exp                 BIGINT    COMMENT 'D链d2-hop3: 回流session曝光数'
+    ,d2_3_ror                 DOUBLE    COMMENT 'd2_3_uv/d1_3_uv'
+    ,d2_3_rov                 DOUBLE    COMMENT 'd2_3_uv/d1_3_exp'
+    ,d3_3_uv                  BIGINT    COMMENT 'D链d3-hop3: 回流去重人数'
+    ,d3_3_pv                  BIGINT    COMMENT 'D链d3-hop3: 回流点击次数'
+    ,d3_3_exp                 BIGINT    COMMENT 'D链d3-hop3: 回流session曝光数'
+    ,d3_3_ror                 DOUBLE    COMMENT 'd3_3_uv/d2_3_uv'
+    ,d3_3_rov                 DOUBLE    COMMENT 'd3_3_uv/d2_3_exp'
+
+    -- ==================== 全链路 ====================
+    ,all_uv                   BIGINT    COMMENT '全链路拉回UV = bn_uv + cn_total_uv + dn_total_uv'
+    ,all_pv                   BIGINT    COMMENT '全链路拉回PV = bn_pv + cn_total_pv + dn_total_pv'
+    ,all_exp                  BIGINT    COMMENT '全链路拉回EXP = bn_exp + cn_total_exp + dn_total_exp'
+    ,all_ror                  DOUBLE    COMMENT 'all_uv/exposure_uv'
+    ,all_rov                  DOUBLE    COMMENT 'all_uv/exposure_cnt'
+)
+COMMENT '曝光回流链路CUBE聚合-宽表版 (5维度: 用户分层/小时段/进入品类TOP1/推荐品类TOP10/内容idTOP1)'
+;
+
+
+-- =====================================================================
+-- INSERT: 从 wide 宽表聚合, CUBE 全维度组合
+-- =====================================================================
+
+-- SELECT * FROM loghubods.dwd_recsys_alg_exposure_agg_wide_20260209 WHERE dt = '${dt}' ORDER BY exposure_cnt DESC LIMIT 100;
+
+INSERT OVERWRITE TABLE loghubods.dwd_recsys_alg_exposure_agg_wide_20260209
+
+WITH
+-- 用户拉活量分层
+t_user_type AS (
+    SELECT  DISTINCT type, openid
+    FROM    loghubods.mid_share_return_people_1year
+    WHERE   dt = TO_CHAR(DATEADD(TO_DATE('${dt}','YYYYMMDD'),-1,'dd'),'YYYYMMDD')
+    AND     type IS NOT NULL
+    AND     type != 'S_ALL'
+    AND     type NOT REGEXP 'R50'
+)
+-- 模型预估分数
+,t_score AS (
+    SELECT  apptype
+            ,videoid
+            ,recommendtraceid
+            ,scoresmap
+            ,sortscore
+    FROM    (
+                SELECT  apptype
+                        ,videoid
+                        ,recommendtraceid
+                        ,scoresmap
+                        ,sortscore
+                        ,ROW_NUMBER() OVER (PARTITION BY apptype,videoid,recommendtraceid) AS rn
+                FROM    loghubods.statistics_log_hour
+                WHERE   dt LIKE '${dt}%'
+                AND     scoresmap IS NOT NULL
+            )
+    WHERE   rn = 1
+)
+-- TOP1 进入内容品类(场): 按回流人数取 TOP1
+,t_top_head_cate AS (
+    SELECT  vt.merge_leve2
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    JOIN    loghubods.video_merge_tag vt ON base.headvideoid = vt.videoid
+    WHERE   base.dt = '${dt}'
+    AND     vt.merge_leve2 IS NOT NULL
+    GROUP BY vt.merge_leve2
+    ORDER BY SUM(CAST(is_return_noself AS BIGINT)) DESC
+    LIMIT   1
+)
+-- TOP10 推荐内容品类(货): 按曝光次数取 TOP10
+,t_top_vid_cate AS (
+    SELECT  vt.merge_leve2
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    JOIN    loghubods.video_merge_tag vt ON base.vid = vt.videoid
+    WHERE   base.dt = '${dt}'
+    AND     vt.merge_leve2 IS NOT NULL
+    GROUP BY vt.merge_leve2
+    ORDER BY COUNT(1) DESC
+    LIMIT   10
+)
+-- TOP1 内容id(货): 每品类按曝光次数取 TOP1 (曝光>10w)
+,t_top_vid AS (
+    SELECT  merge_leve2, vid
+    FROM    (
+                SELECT  vt.merge_leve2
+                        ,base.vid
+                        ,COUNT(1)                                                               AS exp_cnt
+                        ,ROW_NUMBER() OVER (PARTITION BY vt.merge_leve2 ORDER BY COUNT(1) DESC) AS rk
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+                JOIN    loghubods.video_merge_tag vt ON base.vid = vt.videoid
+                WHERE   base.dt = '${dt}'
+                AND     vt.merge_leve2 IS NOT NULL
+                GROUP BY vt.merge_leve2, base.vid
+                HAVING  exp_cnt > 100000
+            )
+    WHERE   rk <= 1
+)
+-- 宽表
+,t_wide AS (
+    SELECT  base.*
+            ,CASE WHEN e.type IS NULL OR e.type = 'R_0'                       THEN 'R0&新用户'
+                  WHEN e.type IN ('R_1','R_2_10','R_10_50')                    THEN 'R1-50'
+                  WHEN e.type IN ('R_50_100','R_100_180','R_180_330')           THEN 'R_180_330'
+                  ELSE e.type
+             END AS user_type
+            ,CASE WHEN th.merge_leve2 IS NOT NULL THEN vt_head.merge_leve2 ELSE '其他' END AS head_merge_leve2
+            ,CASE WHEN tv.merge_leve2 IS NOT NULL THEN vt_vid.merge_leve2  ELSE '其他' END AS vid_merge_leve2
+            ,CASE WHEN ti.vid IS NOT NULL          THEN base.vid           ELSE '其他' END AS vid_id
+            ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
+            ,1.22*POW(CAST(GET_JSON_OBJECT(e1.scoresmap,'$.NorXGBScore') AS DOUBLE),1.15) AS rosn_pred
+            ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_pred_origin
+            ,e1.sortscore
+            ,CASE
+                WHEN CAST(hh AS INT) BETWEEN 0  AND 3  THEN '00-03'
+                WHEN CAST(hh AS INT) BETWEEN 4  AND 7  THEN '04-07'
+                WHEN CAST(hh AS INT) BETWEEN 8  AND 11 THEN '08-11'
+                WHEN CAST(hh AS INT) BETWEEN 12 AND 15 THEN '12-15'
+                WHEN CAST(hh AS INT) BETWEEN 16 AND 19 THEN '16-19'
+                WHEN CAST(hh AS INT) BETWEEN 20 AND 23 THEN '20-23'
+                ELSE '-'
+             END AS hh_bucket
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    LEFT JOIN t_user_type e
+    ON      SUBSTRING_INDEX(base.mid,'weixin_openid_',-1) = e.openid
+    LEFT JOIN loghubods.video_merge_tag vt_head
+    ON      base.headvideoid = vt_head.videoid
+    LEFT JOIN loghubods.video_merge_tag vt_vid
+    ON      base.vid = vt_vid.videoid
+    LEFT JOIN t_score e1
+    ON      base.apptype = e1.apptype
+    AND     base.vid = e1.videoid
+    AND     base.recomtraceid = e1.recommendtraceid
+    LEFT JOIN t_top_head_cate th
+    ON      vt_head.merge_leve2 = th.merge_leve2
+    LEFT JOIN t_top_vid_cate tv
+    ON      vt_vid.merge_leve2 = tv.merge_leve2
+    LEFT JOIN t_top_vid ti
+    ON      base.vid = ti.vid
+    WHERE   base.dt = '${dt}'
+)
+
+SELECT
+        -- ==================== 维度列 ====================
+        '${dt}'                                                                                     AS dt
+        ,CASE WHEN GROUPING(user_type) = 1 THEN 'SUM' ELSE NVL(user_type, 'SUM') END              AS user_type
+        ,CASE WHEN GROUPING(hh_bucket) = 1 THEN 'SUM' ELSE NVL(hh_bucket, 'SUM') END              AS hh_bucket
+        ,CASE WHEN GROUPING(head_merge_leve2) = 1 THEN 'SUM' ELSE NVL(head_merge_leve2, 'SUM') END AS head_merge_leve2
+        ,CASE WHEN GROUPING(vid_merge_leve2) = 1 THEN 'SUM' ELSE NVL(vid_merge_leve2, 'SUM') END   AS vid_merge_leve2
+        ,CASE WHEN GROUPING(vid_id) = 1 THEN 'SUM' ELSE NVL(vid_id, 'SUM') END                     AS vid_id
+
+        -- ==================== 基础流量 ====================
+        ,COUNT(1)                                                                       AS exposure_cnt
+        ,COUNT(DISTINCT mid)                                                            AS exposure_uv
+        ,COUNT(DISTINCT vid)                                                            AS vid_cnt
+        ,ROUND(COUNT(1) / COUNT(DISTINCT mid), 4)                                      AS exposure_per_user
+
+        -- ==================== 分享 & 回流漏斗 ====================
+        ,SUM(CAST(is_share AS BIGINT))                                                  AS share_exposure_cnt
+        ,SUM(CAST(share_cnt AS BIGINT))                                                 AS share_cnt
+        ,SUM(CAST(is_return_n AS BIGINT))                                               AS return_exposure_cnt
+        ,SUM(CAST(is_return_noself AS BIGINT))                                          AS return_exposure_cnt_noself
+        ,SUM(CAST(return_n_uv AS BIGINT))                                               AS return_uv
+        ,SUM(CAST(return_n_uv_noself AS BIGINT))                                        AS return_uv_noself
+        ,ROUND(COALESCE(SUM(CAST(is_share AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                                            AS share_rate
+        ,ROUND(COALESCE(SUM(CAST(is_return_n AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                                         AS return_rate
+        ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                                    AS return_rate_noself
+        ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(CAST(SUM(CAST(is_share AS BIGINT)) AS DOUBLE), 0), 0), 6)               AS share_return_rate
+
+        -- ==================== 模型预估: STR (曝光→非自身回流概率) ====================
+        ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6)  AS str_real
+        ,ROUND(COALESCE(SUM(str_pred) / NULLIF(COUNT(1), 0), 0), 6)                          AS str_pred
+        ,ROUND(
+            (SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
+            / NULLIF(SUM(str_pred) / NULLIF(COUNT(1), 0), 0)
+        , 4)                                                                                   AS str_copc
+        ,ROUND(AVG(ABS(str_pred - CAST(is_return_noself AS BIGINT))), 6)                      AS str_mae
+        ,ROUND(VARIANCE(str_pred - CAST(is_return_noself AS BIGINT)), 6)                      AS str_var
+
+        -- ==================== 模型预估: ROSN (条件回流UV, 非自身) ====================
+        ,ROUND(COALESCE(
+            SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0)
+        , 0), 6)                                                                               AS rosn_real
+        ,ROUND(COALESCE(SUM(CASE WHEN CAST(is_return_noself AS BIGINT) = 1 THEN rosn_pred END) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0), 6)  AS rosn_pred
+        ,ROUND(
+            (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0))
+            / NULLIF(SUM(CASE WHEN CAST(is_return_noself AS BIGINT) = 1 THEN rosn_pred END) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0)
+        , 4)                                                                                   AS rosn_copc
+        ,ROUND(AVG(rosn_pred_origin), 6)                                                      AS rosn_pred_origin
+        ,ROUND(AVG(
+            CASE WHEN CAST(is_return_noself AS BIGINT) = 1
+                 THEN ABS(rosn_pred - CAST(return_n_uv_noself AS BIGINT))
+            END
+        ), 6)                                                                                  AS rosn_mae
+        ,ROUND(VARIANCE(
+            CASE WHEN CAST(is_return_noself AS BIGINT) = 1
+                 THEN rosn_pred - CAST(return_n_uv_noself AS BIGINT)
+            END
+        ), 6)                                                                                  AS rosn_var
+
+        -- ==================== 模型预估: ROVN (STR×ROSN) ====================
+        ,ROUND(COALESCE(SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6)  AS rovn_real
+        ,ROUND(AVG(str_pred * rosn_pred), 6)                                                    AS rovn_pred
+        ,ROUND(
+            (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
+            / NULLIF(AVG(str_pred * rosn_pred), 0)
+        , 4)                                                                                     AS rovn_copc
+        ,ROUND(AVG(ABS(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT))), 6)            AS rovn_mae
+        ,ROUND(VARIANCE(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT)), 6)        AS rovn_var
+        ,ROUND(AVG(CAST(sortscore AS DOUBLE)), 6)                                               AS sortscore_avg
+
+        -- ==================== B链 ====================
+        ,SUM(CAST(bn_uv AS BIGINT))                                                     AS bn_uv
+        ,SUM(CAST(bn_pv AS BIGINT))                                                     AS bn_pv
+        ,SUM(CAST(bn_exp AS BIGINT))                                                    AS bn_exp
+        ,ROUND(COALESCE(SUM(CAST(bn_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)               AS bn_ror
+        ,ROUND(COALESCE(SUM(CAST(bn_uv AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                          AS bn_rov
+        ,SUM(CAST(b1_uv AS BIGINT))                                                     AS b1_uv
+        ,SUM(CAST(b1_pv AS BIGINT))                                                     AS b1_pv
+        ,SUM(CAST(b1_exp AS BIGINT))                                                    AS b1_exp
+        ,ROUND(COALESCE(SUM(CAST(b1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)               AS b1_ror
+        ,ROUND(COALESCE(SUM(CAST(b1_uv AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                          AS b1_rov
+        ,SUM(CAST(b2_uv AS BIGINT))                                                     AS b2_uv
+        ,SUM(CAST(b2_pv AS BIGINT))                                                     AS b2_pv
+        ,SUM(CAST(b2_exp AS BIGINT))                                                    AS b2_exp
+        ,ROUND(COALESCE(SUM(CAST(b2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS b2_ror
+        ,ROUND(COALESCE(SUM(CAST(b2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)       AS b2_rov
+        ,SUM(CAST(b3_uv AS BIGINT))                                                     AS b3_uv
+        ,SUM(CAST(b3_pv AS BIGINT))                                                     AS b3_pv
+        ,SUM(CAST(b3_exp AS BIGINT))                                                    AS b3_exp
+        ,ROUND(COALESCE(SUM(CAST(b3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS b3_ror
+        ,ROUND(COALESCE(SUM(CAST(b3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)       AS b3_rov
+
+        -- ==================== C链 (全量depth, 按hop) ====================
+        ,SUM(CAST(cn_1_uv AS BIGINT))                                                   AS cn_1_uv
+        ,SUM(CAST(cn_1_pv AS BIGINT))                                                   AS cn_1_pv
+        ,SUM(CAST(cn_1_exp AS BIGINT))                                                  AS cn_1_exp
+        ,ROUND(COALESCE(SUM(CAST(cn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)      AS cn_1_ror
+        ,ROUND(COALESCE(SUM(CAST(cn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)     AS cn_1_rov
+        ,SUM(CAST(cn_2_uv AS BIGINT))                                                   AS cn_2_uv
+        ,SUM(CAST(cn_2_pv AS BIGINT))                                                   AS cn_2_pv
+        ,SUM(CAST(cn_2_exp AS BIGINT))                                                  AS cn_2_exp
+        ,ROUND(COALESCE(SUM(CAST(cn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS cn_2_ror
+        ,ROUND(COALESCE(SUM(CAST(cn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS cn_2_rov
+        ,SUM(CAST(cn_3_uv AS BIGINT))                                                   AS cn_3_uv
+        ,SUM(CAST(cn_3_pv AS BIGINT))                                                   AS cn_3_pv
+        ,SUM(CAST(cn_3_exp AS BIGINT))                                                  AS cn_3_exp
+        ,ROUND(COALESCE(SUM(CAST(cn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS cn_3_ror
+        ,ROUND(COALESCE(SUM(CAST(cn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS cn_3_rov
+        ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))    AS cn_total_uv
+        ,SUM(CAST(cn_1_pv AS BIGINT)) + SUM(CAST(cn_2_pv AS BIGINT)) + SUM(CAST(cn_3_pv AS BIGINT))    AS cn_total_pv
+        ,SUM(CAST(cn_1_exp AS BIGINT)) + SUM(CAST(cn_2_exp AS BIGINT)) + SUM(CAST(cn_3_exp AS BIGINT))  AS cn_total_exp
+        ,ROUND(COALESCE(
+            (SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)))
+            / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS cn_total_ror
+        ,ROUND(COALESCE(
+            (SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)))
+            / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS cn_total_rov
+
+        -- ==================== C链 (按depth拆分) ====================
+        -- hop1
+        ,SUM(CAST(c1_1_uv AS BIGINT))                                                     AS c1_1_uv
+        ,SUM(CAST(c1_1_pv AS BIGINT))                                                     AS c1_1_pv
+        ,SUM(CAST(c1_1_exp AS BIGINT))                                                    AS c1_1_exp
+        ,ROUND(COALESCE(SUM(CAST(c1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)      AS c1_1_ror
+        ,ROUND(COALESCE(SUM(CAST(c1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)     AS c1_1_rov
+        ,SUM(CAST(c2_1_uv AS BIGINT))                                                     AS c2_1_uv
+        ,SUM(CAST(c2_1_pv AS BIGINT))                                                     AS c2_1_pv
+        ,SUM(CAST(c2_1_exp AS BIGINT))                                                    AS c2_1_exp
+        ,ROUND(COALESCE(SUM(CAST(c2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c2_1_ror
+        ,ROUND(COALESCE(SUM(CAST(c2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c2_1_rov
+        ,SUM(CAST(c3_1_uv AS BIGINT))                                                     AS c3_1_uv
+        ,SUM(CAST(c3_1_pv AS BIGINT))                                                     AS c3_1_pv
+        ,SUM(CAST(c3_1_exp AS BIGINT))                                                    AS c3_1_exp
+        ,ROUND(COALESCE(SUM(CAST(c3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c3_1_ror
+        ,ROUND(COALESCE(SUM(CAST(c3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c3_1_rov
+        -- hop2
+        ,SUM(CAST(c1_2_uv AS BIGINT))                                                     AS c1_2_uv
+        ,SUM(CAST(c1_2_pv AS BIGINT))                                                     AS c1_2_pv
+        ,SUM(CAST(c1_2_exp AS BIGINT))                                                    AS c1_2_exp
+        ,ROUND(COALESCE(SUM(CAST(c1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c1_2_ror
+        ,ROUND(COALESCE(SUM(CAST(c1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c1_2_rov
+        ,SUM(CAST(c2_2_uv AS BIGINT))                                                     AS c2_2_uv
+        ,SUM(CAST(c2_2_pv AS BIGINT))                                                     AS c2_2_pv
+        ,SUM(CAST(c2_2_exp AS BIGINT))                                                    AS c2_2_exp
+        ,ROUND(COALESCE(SUM(CAST(c2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c2_2_ror
+        ,ROUND(COALESCE(SUM(CAST(c2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c2_2_rov
+        ,SUM(CAST(c3_2_uv AS BIGINT))                                                     AS c3_2_uv
+        ,SUM(CAST(c3_2_pv AS BIGINT))                                                     AS c3_2_pv
+        ,SUM(CAST(c3_2_exp AS BIGINT))                                                    AS c3_2_exp
+        ,ROUND(COALESCE(SUM(CAST(c3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c3_2_ror
+        ,ROUND(COALESCE(SUM(CAST(c3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c3_2_rov
+        -- hop3
+        ,SUM(CAST(c1_3_uv AS BIGINT))                                                     AS c1_3_uv
+        ,SUM(CAST(c1_3_pv AS BIGINT))                                                     AS c1_3_pv
+        ,SUM(CAST(c1_3_exp AS BIGINT))                                                    AS c1_3_exp
+        ,ROUND(COALESCE(SUM(CAST(c1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c1_3_ror
+        ,ROUND(COALESCE(SUM(CAST(c1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c1_3_rov
+        ,SUM(CAST(c2_3_uv AS BIGINT))                                                     AS c2_3_uv
+        ,SUM(CAST(c2_3_pv AS BIGINT))                                                     AS c2_3_pv
+        ,SUM(CAST(c2_3_exp AS BIGINT))                                                    AS c2_3_exp
+        ,ROUND(COALESCE(SUM(CAST(c2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c2_3_ror
+        ,ROUND(COALESCE(SUM(CAST(c2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c2_3_rov
+        ,SUM(CAST(c3_3_uv AS BIGINT))                                                     AS c3_3_uv
+        ,SUM(CAST(c3_3_pv AS BIGINT))                                                     AS c3_3_pv
+        ,SUM(CAST(c3_3_exp AS BIGINT))                                                    AS c3_3_exp
+        ,ROUND(COALESCE(SUM(CAST(c3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c3_3_ror
+        ,ROUND(COALESCE(SUM(CAST(c3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c3_3_rov
+
+        -- ==================== D链 (全量depth, 按hop) ====================
+        ,SUM(CAST(d0 AS BIGINT))                                                        AS d0
+        ,SUM(CAST(dn_1_uv AS BIGINT))                                                   AS dn_1_uv
+        ,SUM(CAST(dn_1_pv AS BIGINT))                                                   AS dn_1_pv
+        ,SUM(CAST(dn_1_exp AS BIGINT))                                                  AS dn_1_exp
+        ,ROUND(COALESCE(SUM(CAST(dn_1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)            AS dn_1_ror
+        ,ROUND(COALESCE(SUM(CAST(dn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS dn_1_rov
+        ,SUM(CAST(dn_2_uv AS BIGINT))                                                   AS dn_2_uv
+        ,SUM(CAST(dn_2_pv AS BIGINT))                                                   AS dn_2_pv
+        ,SUM(CAST(dn_2_exp AS BIGINT))                                                  AS dn_2_exp
+        ,ROUND(COALESCE(SUM(CAST(dn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS dn_2_ror
+        ,ROUND(COALESCE(SUM(CAST(dn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS dn_2_rov
+        ,SUM(CAST(dn_3_uv AS BIGINT))                                                   AS dn_3_uv
+        ,SUM(CAST(dn_3_pv AS BIGINT))                                                   AS dn_3_pv
+        ,SUM(CAST(dn_3_exp AS BIGINT))                                                  AS dn_3_exp
+        ,ROUND(COALESCE(SUM(CAST(dn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS dn_3_ror
+        ,ROUND(COALESCE(SUM(CAST(dn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS dn_3_rov
+        ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))    AS dn_total_uv
+        ,SUM(CAST(dn_1_pv AS BIGINT)) + SUM(CAST(dn_2_pv AS BIGINT)) + SUM(CAST(dn_3_pv AS BIGINT))    AS dn_total_pv
+        ,SUM(CAST(dn_1_exp AS BIGINT)) + SUM(CAST(dn_2_exp AS BIGINT)) + SUM(CAST(dn_3_exp AS BIGINT))  AS dn_total_exp
+        ,ROUND(COALESCE(
+            (SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)))
+            / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS dn_total_ror
+        ,ROUND(COALESCE(
+            (SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)))
+            / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS dn_total_rov
+
+        -- ==================== D链 (按depth拆分) ====================
+        -- hop1
+        ,SUM(CAST(d1_1_uv AS BIGINT))                                                     AS d1_1_uv
+        ,SUM(CAST(d1_1_pv AS BIGINT))                                                     AS d1_1_pv
+        ,SUM(CAST(d1_1_exp AS BIGINT))                                                    AS d1_1_exp
+        ,ROUND(COALESCE(SUM(CAST(d1_1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)            AS d1_1_ror
+        ,ROUND(COALESCE(SUM(CAST(d1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS d1_1_rov
+        ,SUM(CAST(d2_1_uv AS BIGINT))                                                     AS d2_1_uv
+        ,SUM(CAST(d2_1_pv AS BIGINT))                                                     AS d2_1_pv
+        ,SUM(CAST(d2_1_exp AS BIGINT))                                                    AS d2_1_exp
+        ,ROUND(COALESCE(SUM(CAST(d2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d2_1_ror
+        ,ROUND(COALESCE(SUM(CAST(d2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d2_1_rov
+        ,SUM(CAST(d3_1_uv AS BIGINT))                                                     AS d3_1_uv
+        ,SUM(CAST(d3_1_pv AS BIGINT))                                                     AS d3_1_pv
+        ,SUM(CAST(d3_1_exp AS BIGINT))                                                    AS d3_1_exp
+        ,ROUND(COALESCE(SUM(CAST(d3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d3_1_ror
+        ,ROUND(COALESCE(SUM(CAST(d3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d3_1_rov
+        -- hop2
+        ,SUM(CAST(d1_2_uv AS BIGINT))                                                     AS d1_2_uv
+        ,SUM(CAST(d1_2_pv AS BIGINT))                                                     AS d1_2_pv
+        ,SUM(CAST(d1_2_exp AS BIGINT))                                                    AS d1_2_exp
+        ,ROUND(COALESCE(SUM(CAST(d1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d1_2_ror
+        ,ROUND(COALESCE(SUM(CAST(d1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d1_2_rov
+        ,SUM(CAST(d2_2_uv AS BIGINT))                                                     AS d2_2_uv
+        ,SUM(CAST(d2_2_pv AS BIGINT))                                                     AS d2_2_pv
+        ,SUM(CAST(d2_2_exp AS BIGINT))                                                    AS d2_2_exp
+        ,ROUND(COALESCE(SUM(CAST(d2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d2_2_ror
+        ,ROUND(COALESCE(SUM(CAST(d2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d2_2_rov
+        ,SUM(CAST(d3_2_uv AS BIGINT))                                                     AS d3_2_uv
+        ,SUM(CAST(d3_2_pv AS BIGINT))                                                     AS d3_2_pv
+        ,SUM(CAST(d3_2_exp AS BIGINT))                                                    AS d3_2_exp
+        ,ROUND(COALESCE(SUM(CAST(d3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d3_2_ror
+        ,ROUND(COALESCE(SUM(CAST(d3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d3_2_rov
+        -- hop3
+        ,SUM(CAST(d1_3_uv AS BIGINT))                                                     AS d1_3_uv
+        ,SUM(CAST(d1_3_pv AS BIGINT))                                                     AS d1_3_pv
+        ,SUM(CAST(d1_3_exp AS BIGINT))                                                    AS d1_3_exp
+        ,ROUND(COALESCE(SUM(CAST(d1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d1_3_ror
+        ,ROUND(COALESCE(SUM(CAST(d1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d1_3_rov
+        ,SUM(CAST(d2_3_uv AS BIGINT))                                                     AS d2_3_uv
+        ,SUM(CAST(d2_3_pv AS BIGINT))                                                     AS d2_3_pv
+        ,SUM(CAST(d2_3_exp AS BIGINT))                                                    AS d2_3_exp
+        ,ROUND(COALESCE(SUM(CAST(d2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d2_3_ror
+        ,ROUND(COALESCE(SUM(CAST(d2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d2_3_rov
+        ,SUM(CAST(d3_3_uv AS BIGINT))                                                     AS d3_3_uv
+        ,SUM(CAST(d3_3_pv AS BIGINT))                                                     AS d3_3_pv
+        ,SUM(CAST(d3_3_exp AS BIGINT))                                                    AS d3_3_exp
+        ,ROUND(COALESCE(SUM(CAST(d3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d3_3_ror
+        ,ROUND(COALESCE(SUM(CAST(d3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d3_3_rov
+
+        -- ==================== 全链路 ====================
+        ,SUM(CAST(bn_uv AS BIGINT))
+            + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+            + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+                                                                                        AS all_uv
+        ,SUM(CAST(bn_pv AS BIGINT))
+            + SUM(CAST(cn_1_pv AS BIGINT)) + SUM(CAST(cn_2_pv AS BIGINT)) + SUM(CAST(cn_3_pv AS BIGINT))
+            + SUM(CAST(dn_1_pv AS BIGINT)) + SUM(CAST(dn_2_pv AS BIGINT)) + SUM(CAST(dn_3_pv AS BIGINT))
+                                                                                        AS all_pv
+        ,SUM(CAST(bn_exp AS BIGINT))
+            + SUM(CAST(cn_1_exp AS BIGINT)) + SUM(CAST(cn_2_exp AS BIGINT)) + SUM(CAST(cn_3_exp AS BIGINT))
+            + SUM(CAST(dn_1_exp AS BIGINT)) + SUM(CAST(dn_2_exp AS BIGINT)) + SUM(CAST(dn_3_exp AS BIGINT))
+                                                                                        AS all_exp
+        ,ROUND(COALESCE(
+            (   SUM(CAST(bn_uv AS BIGINT))
+              + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+              + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+            ) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0)
+        , 0), 6)                                                                        AS all_ror
+        ,ROUND(COALESCE(
+            (   SUM(CAST(bn_uv AS BIGINT))
+              + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+              + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+            ) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0)
+        , 0), 6)                                                                        AS all_rov
+
+
+-- =====================================================================
+-- FROM + GROUP BY CUBE
+-- =====================================================================
+
+FROM    t_wide
+
+GROUP BY CUBE(
+            user_type
+            ,hh_bucket
+            ,head_merge_leve2
+            ,vid_merge_leve2
+            ,vid_id
+        )
+
+ORDER BY exposure_cnt DESC
+;

+ 773 - 0
table_gen/loghubods.dwd_recsys_alg_exposure_base_20250108.sql

@@ -0,0 +1,773 @@
+--@exclude_input=loghubods.video_action_log_flow_new
+--@exclude_input=loghubods.user_share_log_flow
+--*********************
+-- alg_recsys_rank_labelmatch_20250108
+--*********************
+--drop table loghubods.dwd_recsys_alg_exposure_base_20250108;
+CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_base_20250108
+(
+    apptype                    STRING
+    ,uid                       STRING
+    ,mid                       STRING
+    ,vid                       STRING
+    ,sessionid                 STRING
+    ,subsessionid              STRING
+    ,pagesource                STRING
+    ,page                      STRING
+    ,recommendlogvo            STRING COMMENT '推荐算法的返回结果日志存在这个字段中'
+    ,abcode                    STRING COMMENT '推荐算法的ab分组:ab0'
+    ,recommendpagetype         STRING COMMENT '用于区分pagesource相同时某些场景的。三种回流头部;两种下滑-沉浸页下滑和feed下滑。 -pages/user-videos-share-recommend-detail 是沉浸页。'
+    ,recomtraceid              STRING COMMENT '在后端调取推荐服务之前生成。前端降级会空;后端也可能为空。'
+    ,headvideoid               STRING
+    ,rootsourceid              STRING COMMENT '区分touliu等流量,咨询产品。'
+    ,hotsencetype              STRING
+    ,flowpool                  STRING COMMENT '非流量池,是空字符串。没有null值。'
+    ,level                     STRING COMMENT '非流量池,是null。'
+    ,clientip                  STRING
+    ,machineinfo_brand         STRING
+    ,machineinfo_model         STRING
+    ,machineinfo_system        STRING
+    ,machineinfo_wechatversion STRING
+    ,machineinfo_sdkversion    STRING
+    ,province                  STRING
+    ,city                      STRING
+    ,ts                        STRING
+    ,is_share                  STRING
+    ,share_cnt                 STRING
+    ,is_return_1               STRING
+    ,return_1_pv               STRING
+    ,return_1_uv               STRING
+    ,return_1_mids             STRING
+    ,is_return_n               STRING
+    ,return_n_pv               STRING
+    ,return_n_uv               STRING
+    ,return_n_mids             STRING
+    ,is_return_noself          STRING
+    ,return_1_uv_noself        STRING
+    ,return_1_mids_noself      STRING
+    ,is_return_n_noself        STRING
+    ,return_n_uv_noself        STRING
+    ,return_n_mids_noself      STRING
+    ,new_exposure_cnt          STRING
+    ,extend                    STRING
+)
+PARTITIONED BY 
+(
+    dt                         STRING COMMENT '日期:20240105'
+    ,hh                        STRING COMMENT '小时:04'
+)
+STORED AS ALIORC
+TBLPROPERTIES ('comment' = '推荐算法-labelmatch表-20250108更新最新版')
+LIFECYCLE 3650
+;
+
+SET hive.exec.dynamic.partition = true
+;
+
+SET hive.exec.dynamic.partition.mode = nonstrict
+;
+
+SET odps.stage.mapper.split.size = 1024
+;
+
+INSERT OVERWRITE TABLE loghubods.dwd_recsys_alg_exposure_base_20250108 PARTITION (dt,hh)
+WITH t_return AS 
+(
+    SELECT  *
+            ,CONCAT(dthh,":",shareid,":",vid,":",dthh_id) AS id
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,clickobjectid AS vid
+                        ,sessionid
+                        ,subsessionid -- 注意这是回流对应的subsessionid,每次回流点击会重置,可以通过这个字段找到回流的曝光。
+                        ,shareid
+                        ,rootshareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,clickobjectid,sessionid,subsessionid,shareid,rootshareid ORDER BY clienttimestamp DESC ) AS rn
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),shareid,clickobjectid ORDER BY clienttimestamp ) AS dthh_id
+                FROM    loghubods.user_share_log_flow -- 回流行为,理应subsessionid只有一条,但有脏数据,去重。
+                WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') --WHERE   CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                AND     __topic__ = 'click'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12') -- 12的pagesoucre是h5-share和h5-detail 暂时过滤掉 不做处理
+                AND     machinecode IS NOT NULL
+                AND     clickobjectid IS NOT NULL
+                AND     pagesource REGEXP "-pages/user-videos-share$" -- 存在脏数据 vlog-gzh /mine/mine-info$ 结尾的,都过滤掉。
+            ) 
+    WHERE   rn = 1
+)
+,t_share_from_sharelog AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,shareobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,pagesource
+                        ,shareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,shareobjectid,sessionid,subsessionid,pagesource,shareid ORDER BY clienttimestamp DESC ) AS rn
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') --WHERE   CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                AND     __topic__ = 'share'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     shareobjectid IS NOT NULL
+            ) 
+    WHERE   rn = 1
+)
+,t_exposure AS 
+(
+    SELECT  dthh_id
+            ,dthh
+            ,apptype
+            ,uid
+            ,mid
+            ,vid
+            ,sessionid
+            ,subsessionid
+            ,rootsessionid_new
+            ,pagesource
+            ,recommendlogvo
+            ,abcode
+            ,recommendpagetype
+            ,recomtraceid
+            ,headvideoid
+            ,rootsourceid
+            ,hotsencetype
+            ,animationscenetype
+            ,JSON_PARSE(IF(JSON_VALID(extparams),extparams,"{}")) AS extParams
+            ,flowpool
+            ,level
+            ,clientip
+            ,machineinfo_brand
+            ,machineinfo_model
+            ,machineinfo_system
+            ,machineinfo_wechatversion
+            ,machineinfo_sdkversion
+            ,province
+            ,city
+            ,versioncode
+            ,ts
+            ,rn
+            ,id
+            ,dt
+            ,hh
+    FROM    loghubods.dwd_recsys_alg_exposure_base_view_20250402
+    WHERE   CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+)
+,t_exposure_recommend AS 
+(
+    SELECT  *
+    FROM    t_exposure
+    WHERE   pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+)
+,t_return_exposure_1 AS -- 曝光关联回流,用于计算viewh24                                
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.id ORDER BY t2.ts DESC ) AS rn
+                FROM    t_exposure_recommend t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.headvideoid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_return_exposure_2 AS -- 曝光关联回流,用于计算viewh24                                
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.exposure_id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_return_exposure_1
+                            WHERE   return_id IS NULL
+                        ) t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.headvideoid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_return_exposure_3 AS -- 曝光关联回流,用于计算viewh24                                
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.exposure_id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_return_exposure_2
+                            WHERE   return_id IS NULL
+                        ) t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.subsessionid = t2.subsessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_return_exposure_4 AS -- 曝光关联回流,用于计算viewh24                                
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.exposure_id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_return_exposure_3
+                            WHERE   return_id IS NULL
+                        ) t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.sessionid = t2.sessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_return_exposure AS 
+(
+    SELECT  a.*
+            ,b.exposure_cnt AS new_exposure_cnt
+    FROM    t_return a
+    LEFT JOIN   (
+                    SELECT  return_id
+                            ,COUNT(1) AS exposure_cnt
+                    FROM    (
+                                SELECT  *
+                                FROM    t_return_exposure_1
+                                WHERE   return_id IS NOT NULL
+                                UNION ALL
+                                SELECT  *
+                                FROM    t_return_exposure_2
+                                WHERE   return_id IS NOT NULL
+                                UNION ALL
+                                SELECT  *
+                                FROM    t_return_exposure_3
+                                WHERE   return_id IS NOT NULL
+                                UNION ALL
+                                SELECT  *
+                                FROM    t_return_exposure_4
+                                WHERE   return_id IS NOT NULL
+                            ) 
+                    GROUP BY return_id
+                ) b
+    ON      a.id = b.return_id
+)
+,t_normal_share_exposure_1 AS -- 开始处理常规的分享与曝光关联                                                                                          
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    t_share_from_sharelog t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+                AND     t1.pagesource = t2.pagesource
+                AND     t1.ts >= t2.ts
+                WHERE   t1.pagesource NOT REGEXP "pages/detail-user-videos-share-recommend$"
+            ) 
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_2 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_1
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+                AND     t1.pagesource = t2.pagesource
+                AND     t1.ts >= t2.ts
+            ) 
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_3 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_2
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+                AND     t1.pagesource = t2.pagesource
+            ) 
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_4 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_3
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+                AND     t1.pagesource = t2.pagesource
+            ) 
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_5 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_4
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_6 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_5
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_exposure_detail AS 
+(
+    SELECT  *
+    FROM    t_exposure
+    WHERE   pagesource REGEXP "-pages/user-videos-detail$|pages/detail-recommend$"
+)
+,t_no_normal_share_exposure_1 AS -- 开始处理非常规的分享与曝光关联                                                                                         
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    t_share_from_sharelog t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+                AND     t1.ts >= t2.ts
+                WHERE   t1.pagesource REGEXP "pages/detail-user-videos-share-recommend$"
+            ) 
+    WHERE   rn = 1
+)
+,t_no_normal_share_exposure_2 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_no_normal_share_exposure_1
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+                AND     t1.ts >= t2.ts
+            ) 
+    WHERE   rn = 1
+)
+,t_no_normal_share_exposure_3 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_no_normal_share_exposure_2
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_no_normal_share_exposure_4 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_no_normal_share_exposure_3
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_share_exposure AS 
+(
+    SELECT  *
+    FROM    t_normal_share_exposure_1
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_2
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_3
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_4
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_5
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_6
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_1
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_2
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_3
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_4
+)
+,t_share_with_label AS 
+(
+    SELECT  a.dthh
+            ,a.apptype -- join 条件
+            ,a.mid
+            ,a.vid -- join 条件
+            ,a.sessionid
+            ,a.subsessionid
+            ,a.pagesource
+            ,a.shareid -- join 条件
+            ,a.ts
+            ,a.exposure_id
+            ,COALESCE(b.return_1_pv,0) AS return_1_pv
+            ,COALESCE(b.return_1_uv,0) AS return_1_uv
+            ,b.return_1_mids AS return_1_mids -- 可能为null,再决策是否提前处理。
+            ,COALESCE(c.return_n_pv,0) AS return_n_pv
+            ,COALESCE(c.return_n_uv,0) AS return_n_uv
+            ,c.return_n_mids AS return_n_mids -- 可能为null,再决策是否提前处理。
+            ,COALESCE(c.new_exposure_cnt,0) AS new_exposure_cnt
+    FROM    t_share_exposure a
+    LEFT JOIN   (
+                    SELECT  shareid
+                            ,vid
+                            ,apptype
+                            ,COUNT(1) AS return_1_pv
+                            ,COUNT(DISTINCT mid) AS return_1_uv
+                            ,CONCAT_WS(',',COLLECT_SET(mid)) AS return_1_mids
+                    FROM    t_return
+                    GROUP BY shareid
+                             ,vid
+                             ,apptype
+                ) b
+    ON      a.shareid = b.shareid
+    AND     a.vid = b.vid
+    AND     a.apptype = b.apptype
+    LEFT JOIN   (
+                    SELECT  rootshareid
+                            ,vid
+                            ,apptype
+                            ,COUNT(1) AS return_n_pv
+                            ,COUNT(DISTINCT mid) AS return_n_uv
+                            ,CONCAT_WS(',',COLLECT_SET(mid)) AS return_n_mids
+                            ,SUM(new_exposure_cnt) AS new_exposure_cnt
+                    FROM    t_return_exposure
+                    GROUP BY rootshareid
+                             ,vid
+                             ,apptype
+                ) c
+    ON      a.shareid = c.rootshareid
+    AND     a.vid = c.vid
+    AND     a.apptype = c.apptype
+)
+,t_share_with_label_group AS 
+(
+    SELECT  exposure_id
+            ,COUNT(1) AS share_cnt
+            ,SUM(return_1_pv) AS return_1_pv
+            ,COALESCE(SIZE(SPLIT(DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_1_mids))),",")),0) AS return_1_uv
+            ,DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_1_mids))) AS return_1_mids -- 可能是null
+            ,SUM(return_n_pv) AS return_n_pv
+            ,COALESCE(SIZE(SPLIT(DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_n_mids))),",")),0) AS return_n_uv
+            ,DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_n_mids))) AS return_n_mids -- 可能是null
+            ,SUM(new_exposure_cnt) AS new_exposure_cnt
+    FROM    t_share_with_label
+    GROUP BY exposure_id
+)
+,t_root_source_id_group_name AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  root_source_id
+                        ,group_name
+                        ,ROW_NUMBER() OVER (PARTITION BY root_source_id ) AS rn
+                FROM    loghubods.changwen_rootsourceid_group_hour
+                WHERE   dt = MAX_PT('loghubods.changwen_rootsourceid_group_hour')
+            ) 
+    WHERE   rn = 1
+)
+,t_exposure_share_return AS 
+(
+    SELECT  apptype
+            ,uid
+            ,mid
+            ,vid
+            ,sessionid
+            ,subsessionid
+            ,pagesource
+            ,CASE   WHEN pagesource REGEXP 'pages/user-videos-share-recommend$' THEN '回流后沉浸页&内页feed'
+                    WHEN pagesource REGEXP 'pages/detail-recommend$' THEN '详情后沉浸页'
+                    WHEN pagesource REGEXP 'pages/user-videos-share$' THEN '回流页'
+                    WHEN pagesource REGEXP 'pages/user-videos-detail$' THEN '详情页'
+                    WHEN pagesource REGEXP 'pages/category$' THEN '首页feed'
+                    ELSE '其他'
+            END AS pagesource_new
+            ,recommendlogvo -- 推荐算法的返回结果日志存在这个字段中
+            ,abcode -- 推荐算法的ab分组
+            ,recommendpagetype -- 三种回流头部;两种下滑-沉浸页下滑和feed下滑
+            ,recomtraceid
+            ,headvideoid
+            ,rootsourceid
+            ,hotsencetype
+            ,flowpool -- 14#68#3#1735262438476#2
+            ,level
+            ,clientip
+            ,machineinfo_brand
+            ,machineinfo_model
+            ,machineinfo_system
+            ,machineinfo_wechatversion
+            ,machineinfo_sdkversion
+            ,province
+            ,city
+            ,ts
+            ,IF(COALESCE(share_cnt,0) > 0,1,0) AS is_share
+            ,COALESCE(share_cnt,0) AS share_cnt
+            ,IF(COALESCE(return_1_uv,0) > 0,1,0) AS is_return_1
+            ,COALESCE(return_1_pv,0) AS return_1_pv
+            ,COALESCE(return_1_uv,0) AS return_1_uv
+            ,return_1_mids -- 可能是null
+            ,IF(COALESCE(return_n_pv,0) > 0,1,0) AS is_return_n
+            ,COALESCE(return_n_pv,0) AS return_n_pv
+            ,COALESCE(return_n_uv,0) AS return_n_uv
+            ,return_n_mids -- 可能是null
+            ,IF(COALESCE(COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid)),0),0) > 0,1,0) AS is_return_noself
+            ,COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid)),0) AS return_1_uv_noself
+            ,ARRAY_JOIN(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid),",") AS return_1_mids_noself
+            ,IF(COALESCE(COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid)),0),0) > 0,1,0) AS is_return_n_noself
+            ,COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid)),0) AS return_n_uv_noself
+            ,ARRAY_JOIN(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid),",") AS return_n_mids_noself
+            ,COALESCE(new_exposure_cnt) AS new_exposure_cnt
+            ,JSON_FORMAT(
+                        JSON_OBJECT("animationSceneType",animationSceneType,"extParams",extParams,"rootsessionid",rootsessionid_new,"versioncode",versioncode,"group_name",tc.group_name)
+            ) AS extend
+            ,SUBSTR(dthh,1,8) AS dt
+            ,SUBSTR(dthh,9,2) AS hh
+    FROM    t_exposure ta
+    LEFT JOIN t_share_with_label_group tb
+    ON      ta.id = tb.exposure_id
+    LEFT JOIN t_root_source_id_group_name tc
+    ON      ta.rootsourceid = tc.root_source_id
+)SELECT  *
+FROM    t_exposure_share_return
+;

+ 1014 - 0
table_gen/loghubods.dwd_recsys_alg_exposure_base_20260206.sql

@@ -0,0 +1,1014 @@
+--@exclude_input=loghubods.video_action_log_flow_new
+--@exclude_input=loghubods.user_share_log_flow
+--*********************
+-- alg_recsys_rank_labelmatch_20260206
+-- 在 20250108 基础上新增 B/C 多跳回流列
+--*********************
+-- drop table if exists loghubods.dwd_recsys_alg_exposure_base_20260206;
+CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_base_20260206
+(
+    apptype                    STRING
+    ,uid                       STRING
+    ,mid                       STRING
+    ,vid                       STRING
+    ,sessionid                 STRING
+    ,subsessionid              STRING
+    ,pagesource                STRING
+    ,page                      STRING
+    ,recommendlogvo            STRING COMMENT '推荐算法的返回结果日志存在这个字段中'
+    ,abcode                    STRING COMMENT '推荐算法的ab分组:ab0'
+    ,recommendpagetype         STRING COMMENT '用于区分pagesource相同时某些场景的。三种回流头部;两种下滑-沉浸页下滑和feed下滑。 -pages/user-videos-share-recommend-detail 是沉浸页。'
+    ,recomtraceid              STRING COMMENT '在后端调取推荐服务之前生成。前端降级会空;后端也可能为空。'
+    ,headvideoid               STRING
+    ,rootsourceid              STRING COMMENT '区分touliu等流量,咨询产品。'
+    ,hotsencetype              STRING
+    ,flowpool                  STRING COMMENT '非流量池,是空字符串。没有null值。'
+    ,level                     STRING COMMENT '非流量池,是null。'
+    ,clientip                  STRING
+    ,machineinfo_brand         STRING
+    ,machineinfo_model         STRING
+    ,machineinfo_system        STRING
+    ,machineinfo_wechatversion STRING
+    ,machineinfo_sdkversion    STRING
+    ,province                  STRING
+    ,city                      STRING
+    ,ts                        STRING
+    ,is_share                  STRING
+    ,share_cnt                 STRING
+    ,is_return_1               STRING
+    ,return_1_pv               STRING
+    ,return_1_uv               STRING
+    ,return_1_mids             STRING
+    ,is_return_n               STRING
+    ,return_n_pv               STRING
+    ,return_n_uv               STRING
+    ,return_n_mids             STRING
+    ,is_return_noself          STRING
+    ,return_1_uv_noself        STRING
+    ,return_1_mids_noself      STRING
+    ,is_return_n_noself        STRING
+    ,return_n_uv_noself        STRING
+    ,return_n_mids_noself      STRING
+    ,new_exposure_cnt          STRING
+    ,b                         STRING COMMENT '直接回流去重人数(B)'
+    ,c_1                       STRING COMMENT '1跳回流SUM(B)'
+    ,c_2                       STRING COMMENT '2跳回流SUM(B)'
+    ,c_3                       STRING COMMENT '3跳回流SUM(B)'
+    ,d_1                       STRING COMMENT 'D链1跳: 同subsession后续曝光的B之和'
+    ,d_2                       STRING COMMENT 'D链2跳: d1回流用户session内曝光的B之和'
+    ,d_3                       STRING COMMENT 'D链3跳: d2回流用户session内曝光的B之和(去环)'
+    ,b_mids                    STRING COMMENT 'B对应的回流mid列表'
+    ,c_1_mids                  STRING COMMENT 'C_1对应的回流mid列表'
+    ,c_2_mids                  STRING COMMENT 'C_2对应的回流mid列表'
+    ,c_3_mids                  STRING COMMENT 'C_3对应的回流mid列表'
+    ,d_1_mids                  STRING COMMENT 'D链1跳对应的回流mid列表'
+    ,d_2_mids                  STRING COMMENT 'D链2跳对应的回流mid列表'
+    ,d_3_mids                  STRING COMMENT 'D链3跳对应的回流mid列表'
+    ,extend                    STRING
+)
+PARTITIONED BY
+(
+    dt                         STRING COMMENT '日期:20240105'
+    ,hh                        STRING COMMENT '小时:04'
+)
+STORED AS ALIORC
+TBLPROPERTIES ('comment' = '推荐算法-labelmatch表-20260206更新-含多跳B/C/D')
+LIFECYCLE 3650
+;
+
+SET hive.exec.dynamic.partition = true
+;
+
+SET hive.exec.dynamic.partition.mode = nonstrict
+;
+
+SET odps.stage.mapper.split.size = 1024
+;
+
+INSERT OVERWRITE TABLE loghubods.dwd_recsys_alg_exposure_base_20260206 PARTITION (dt,hh)
+WITH t_return AS
+(
+    SELECT  *
+            ,CONCAT(dthh,":",shareid,":",vid,":",dthh_id) AS id
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,clickobjectid AS vid
+                        ,sessionid
+                        ,subsessionid -- 注意这是回流对应的subsessionid,每次回流点击会重置,可以通过这个字段找到回流的曝光。
+                        ,shareid
+                        ,rootshareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,clickobjectid,sessionid,subsessionid,shareid,rootshareid ORDER BY clienttimestamp DESC ) AS rn
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),shareid,clickobjectid ORDER BY clienttimestamp ) AS dthh_id
+                FROM    loghubods.user_share_log_flow -- 回流行为,理应subsessionid只有一条,但有脏数据,去重。
+                WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') --WHERE   CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                AND     __topic__ = 'click'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12') -- 12的pagesoucre是h5-share和h5-detail 暂时过滤掉 不做处理
+                AND     machinecode IS NOT NULL
+                AND     clickobjectid IS NOT NULL
+                AND     pagesource REGEXP "-pages/user-videos-share$" -- 存在脏数据 vlog-gzh /mine/mine-info$ 结尾的,都过滤掉。
+            )
+    WHERE   rn = 1
+)
+,t_share_from_sharelog AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,shareobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,pagesource
+                        ,shareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,shareobjectid,sessionid,subsessionid,pagesource,shareid ORDER BY clienttimestamp DESC ) AS rn
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') --WHERE   CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                AND     __topic__ = 'share'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     shareobjectid IS NOT NULL
+            )
+    WHERE   rn = 1
+)
+,t_exposure AS
+(
+    SELECT  dthh_id
+            ,dthh
+            ,apptype
+            ,uid
+            ,mid
+            ,vid
+            ,sessionid
+            ,subsessionid
+            ,rootsessionid_new
+            ,pagesource
+            ,recommendlogvo
+            ,abcode
+            ,recommendpagetype
+            ,recomtraceid
+            ,headvideoid
+            ,rootsourceid
+            ,hotsencetype
+            ,animationscenetype
+            ,JSON_PARSE(IF(JSON_VALID(extparams),extparams,"{}")) AS extParams
+            ,flowpool
+            ,level
+            ,clientip
+            ,machineinfo_brand
+            ,machineinfo_model
+            ,machineinfo_system
+            ,machineinfo_wechatversion
+            ,machineinfo_sdkversion
+            ,province
+            ,city
+            ,versioncode
+            ,ts
+            ,rn
+            ,id
+            ,dt
+            ,hh
+    FROM    loghubods.dwd_recsys_alg_exposure_base_view_20250402
+    WHERE   CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+)
+,t_exposure_recommend AS
+(
+    SELECT  *
+    FROM    t_exposure
+    WHERE   pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+)
+,t_return_exposure_1 AS -- 曝光关联回流,用于计算viewh24
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.id ORDER BY t2.ts DESC ) AS rn
+                FROM    t_exposure_recommend t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.headvideoid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+            )
+    WHERE   rn = 1
+)
+,t_return_exposure_2 AS -- 曝光关联回流,用于计算viewh24
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.exposure_id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_return_exposure_1
+                            WHERE   return_id IS NULL
+                        ) t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.headvideoid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+            )
+    WHERE   rn = 1
+)
+,t_return_exposure_3 AS -- 曝光关联回流,用于计算viewh24
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.exposure_id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_return_exposure_2
+                            WHERE   return_id IS NULL
+                        ) t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.subsessionid = t2.subsessionid
+            )
+    WHERE   rn = 1
+)
+,t_return_exposure_4 AS -- 曝光关联回流,用于计算viewh24
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.exposure_id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_return_exposure_3
+                            WHERE   return_id IS NULL
+                        ) t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.sessionid = t2.sessionid
+            )
+    WHERE   rn = 1
+)
+,t_return_exposure AS
+(
+    SELECT  a.*
+            ,b.exposure_cnt AS new_exposure_cnt
+    FROM    t_return a
+    LEFT JOIN   (
+                    SELECT  return_id
+                            ,COUNT(1) AS exposure_cnt
+                    FROM    (
+                                SELECT  *
+                                FROM    t_return_exposure_1
+                                WHERE   return_id IS NOT NULL
+                                UNION ALL
+                                SELECT  *
+                                FROM    t_return_exposure_2
+                                WHERE   return_id IS NOT NULL
+                                UNION ALL
+                                SELECT  *
+                                FROM    t_return_exposure_3
+                                WHERE   return_id IS NOT NULL
+                                UNION ALL
+                                SELECT  *
+                                FROM    t_return_exposure_4
+                                WHERE   return_id IS NOT NULL
+                            )
+                    GROUP BY return_id
+                ) b
+    ON      a.id = b.return_id
+)
+,t_normal_share_exposure_1 AS -- 开始处理常规的分享与曝光关联
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    t_share_from_sharelog t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+                AND     t1.pagesource = t2.pagesource
+                AND     t1.ts >= t2.ts
+                WHERE   t1.pagesource NOT REGEXP "pages/detail-user-videos-share-recommend$"
+            )
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_2 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_1
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+                AND     t1.pagesource = t2.pagesource
+                AND     t1.ts >= t2.ts
+            )
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_3 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_2
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+                AND     t1.pagesource = t2.pagesource
+            )
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_4 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_3
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+                AND     t1.pagesource = t2.pagesource
+            )
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_5 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_4
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+            )
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_6 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_5
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+            )
+    WHERE   rn = 1
+)
+,t_exposure_detail AS
+(
+    SELECT  *
+    FROM    t_exposure
+    WHERE   pagesource REGEXP "-pages/user-videos-detail$|pages/detail-recommend$"
+)
+,t_no_normal_share_exposure_1 AS -- 开始处理非常规的分享与曝光关联
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    t_share_from_sharelog t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+                AND     t1.ts >= t2.ts
+                WHERE   t1.pagesource REGEXP "pages/detail-user-videos-share-recommend$"
+            )
+    WHERE   rn = 1
+)
+,t_no_normal_share_exposure_2 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_no_normal_share_exposure_1
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+                AND     t1.ts >= t2.ts
+            )
+    WHERE   rn = 1
+)
+,t_no_normal_share_exposure_3 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_no_normal_share_exposure_2
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+            )
+    WHERE   rn = 1
+)
+,t_no_normal_share_exposure_4 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_no_normal_share_exposure_3
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+            )
+    WHERE   rn = 1
+)
+,t_share_exposure AS
+(
+    SELECT  *
+    FROM    t_normal_share_exposure_1
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_2
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_3
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_4
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_5
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_6
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_1
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_2
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_3
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_4
+)
+--========================================
+-- 多跳 B/C 计算 (BFS frontier + anti-join 去环, 仅 24h)
+--========================================
+,t_share_return AS (
+    SELECT  se.exposure_id
+            ,se.shareid
+            ,se.vid
+            ,se.apptype
+            ,se.subsessionid
+            ,r.subsessionid AS return_subsessionid
+            ,r.mid AS return_mid
+    FROM    t_share_exposure se
+    JOIN    t_return r
+    ON      se.shareid = r.rootshareid
+    AND     se.vid = r.vid
+    AND     se.apptype = r.apptype
+)
+,t_exposure_bn AS (
+    SELECT  exposure_id
+            ,COUNT(DISTINCT return_mid) AS B
+            ,COLLECT_SET(return_mid) AS B_mids
+    FROM    t_share_return
+    GROUP BY exposure_id
+)
+-- BFS frontier 1: 直达回流的 subsessionid 集合
+,t_frontier_1 AS (
+    SELECT DISTINCT exposure_id AS source_id, return_subsessionid AS reached_sub
+    FROM   t_share_return
+)
+-- C_1: frontier_1 中曝光的 B 之和
+,t_c1 AS (
+    SELECT  f.source_id AS exposure_id, SUM(bn.B) AS C_1
+    FROM    t_frontier_1 f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    GROUP BY f.source_id
+)
+,t_c1_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS C_1_mids
+    FROM    t_frontier_1 f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    JOIN    t_share_return sr ON bn.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+-- BFS frontier 2: frontier_1 延伸, anti-join 排除 frontier_1
+,t_frontier_2 AS (
+    SELECT DISTINCT f1.source_id, sr2.return_subsessionid AS reached_sub
+    FROM    t_frontier_1 f1
+    JOIN    t_exposure e1 ON f1.reached_sub = e1.subsessionid
+    JOIN    t_exposure_bn bn1 ON e1.id = bn1.exposure_id
+    JOIN    t_share_return sr2 ON bn1.exposure_id = sr2.exposure_id
+    LEFT JOIN t_frontier_1 v1
+        ON  f1.source_id = v1.source_id
+        AND sr2.return_subsessionid = v1.reached_sub
+    WHERE   v1.source_id IS NULL
+)
+,t_c2 AS (
+    SELECT  f.source_id AS exposure_id, SUM(bn.B) AS C_2
+    FROM    t_frontier_2 f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    GROUP BY f.source_id
+)
+,t_c2_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS C_2_mids
+    FROM    t_frontier_2 f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    JOIN    t_share_return sr ON bn.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+-- BFS frontier 3: frontier_2 延伸, anti-join 排除 frontier_1 + frontier_2
+,t_frontier_3 AS (
+    SELECT DISTINCT f2.source_id, sr3.return_subsessionid AS reached_sub
+    FROM    t_frontier_2 f2
+    JOIN    t_exposure e2 ON f2.reached_sub = e2.subsessionid
+    JOIN    t_exposure_bn bn2 ON e2.id = bn2.exposure_id
+    JOIN    t_share_return sr3 ON bn2.exposure_id = sr3.exposure_id
+    LEFT JOIN t_frontier_1 v1
+        ON  f2.source_id = v1.source_id
+        AND sr3.return_subsessionid = v1.reached_sub
+    LEFT JOIN t_frontier_2 v2
+        ON  f2.source_id = v2.source_id
+        AND sr3.return_subsessionid = v2.reached_sub
+    WHERE   v1.source_id IS NULL AND v2.source_id IS NULL
+)
+,t_c3 AS (
+    SELECT  f.source_id AS exposure_id, SUM(bn.B) AS C_3
+    FROM    t_frontier_3 f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    GROUP BY f.source_id
+)
+,t_c3_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS C_3_mids
+    FROM    t_frontier_3 f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    JOIN    t_share_return sr ON bn.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+--========================================
+-- D 链: session 内后续曝光传播 (BFS 去环)
+--========================================
+,t_d1 AS (
+    SELECT  e1.id AS exposure_id
+            ,SUM(bn2.B) AS D_1
+    FROM    t_exposure e1
+    JOIN    t_exposure e2
+    ON      e1.subsessionid = e2.subsessionid
+    AND     CAST(e2.ts AS BIGINT) > CAST(e1.ts AS BIGINT)
+    JOIN    t_exposure_bn bn2
+    ON      e2.id = bn2.exposure_id
+    GROUP BY e1.id
+)
+,t_d1_mids AS (
+    SELECT  e1.id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS D_1_mids
+    FROM    t_exposure e1
+    JOIN    t_exposure e2
+    ON      e1.subsessionid = e2.subsessionid
+    AND     CAST(e2.ts AS BIGINT) > CAST(e1.ts AS BIGINT)
+    JOIN    t_share_return sr
+    ON      e2.id = sr.exposure_id
+    GROUP BY e1.id
+)
+,t_d1_frontier AS (
+    SELECT DISTINCT e1.id AS source_id
+           ,sr.return_subsessionid AS reached_sub
+    FROM    t_exposure e1
+    JOIN    t_exposure e2
+    ON      e1.subsessionid = e2.subsessionid
+    AND     CAST(e2.ts AS BIGINT) > CAST(e1.ts AS BIGINT)
+    JOIN    t_share_return sr
+    ON      e2.id = sr.exposure_id
+)
+,t_d2 AS (
+    SELECT  f.source_id AS exposure_id, SUM(bn.B) AS D_2
+    FROM    t_d1_frontier f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    GROUP BY f.source_id
+)
+,t_d2_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS D_2_mids
+    FROM    t_d1_frontier f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    JOIN    t_share_return sr ON bn.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+,t_d2_frontier AS (
+    SELECT DISTINCT f1.source_id, sr2.return_subsessionid AS reached_sub
+    FROM    t_d1_frontier f1
+    JOIN    t_exposure e1 ON f1.reached_sub = e1.subsessionid
+    JOIN    t_exposure_bn bn1 ON e1.id = bn1.exposure_id
+    JOIN    t_share_return sr2 ON bn1.exposure_id = sr2.exposure_id
+    LEFT JOIN t_d1_frontier v1
+        ON  f1.source_id = v1.source_id
+        AND sr2.return_subsessionid = v1.reached_sub
+    WHERE   v1.source_id IS NULL
+)
+,t_d3 AS (
+    SELECT  f.source_id AS exposure_id, SUM(bn.B) AS D_3
+    FROM    t_d2_frontier f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    GROUP BY f.source_id
+)
+,t_d3_mids AS (
+    SELECT  f.source_id AS exposure_id
+            ,COLLECT_SET(sr.return_mid) AS D_3_mids
+    FROM    t_d2_frontier f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_exposure_bn bn ON e.id = bn.exposure_id
+    JOIN    t_share_return sr ON bn.exposure_id = sr.exposure_id
+    GROUP BY f.source_id
+)
+--========================================
+-- 以下为原有 CTE 继续
+--========================================
+,t_share_with_label AS
+(
+    SELECT  a.dthh
+            ,a.apptype -- join 条件
+            ,a.mid
+            ,a.vid -- join 条件
+            ,a.sessionid
+            ,a.subsessionid
+            ,a.pagesource
+            ,a.shareid -- join 条件
+            ,a.ts
+            ,a.exposure_id
+            ,COALESCE(b.return_1_pv,0) AS return_1_pv
+            ,COALESCE(b.return_1_uv,0) AS return_1_uv
+            ,b.return_1_mids AS return_1_mids -- 可能为null,再决策是否提前处理。
+            ,COALESCE(c.return_n_pv,0) AS return_n_pv
+            ,COALESCE(c.return_n_uv,0) AS return_n_uv
+            ,c.return_n_mids AS return_n_mids -- 可能为null,再决策是否提前处理。
+            ,COALESCE(c.new_exposure_cnt,0) AS new_exposure_cnt
+    FROM    t_share_exposure a
+    LEFT JOIN   (
+                    SELECT  shareid
+                            ,vid
+                            ,apptype
+                            ,COUNT(1) AS return_1_pv
+                            ,COUNT(DISTINCT mid) AS return_1_uv
+                            ,CONCAT_WS(',',COLLECT_SET(mid)) AS return_1_mids
+                    FROM    t_return
+                    GROUP BY shareid
+                             ,vid
+                             ,apptype
+                ) b
+    ON      a.shareid = b.shareid
+    AND     a.vid = b.vid
+    AND     a.apptype = b.apptype
+    LEFT JOIN   (
+                    SELECT  rootshareid
+                            ,vid
+                            ,apptype
+                            ,COUNT(1) AS return_n_pv
+                            ,COUNT(DISTINCT mid) AS return_n_uv
+                            ,CONCAT_WS(',',COLLECT_SET(mid)) AS return_n_mids
+                            ,SUM(new_exposure_cnt) AS new_exposure_cnt
+                    FROM    t_return_exposure
+                    GROUP BY rootshareid
+                             ,vid
+                             ,apptype
+                ) c
+    ON      a.shareid = c.rootshareid
+    AND     a.vid = c.vid
+    AND     a.apptype = c.apptype
+)
+,t_share_with_label_group AS
+(
+    SELECT  exposure_id
+            ,COUNT(1) AS share_cnt
+            ,SUM(return_1_pv) AS return_1_pv
+            ,COALESCE(SIZE(SPLIT(DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_1_mids))),",")),0) AS return_1_uv
+            ,DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_1_mids))) AS return_1_mids -- 可能是null
+            ,SUM(return_n_pv) AS return_n_pv
+            ,COALESCE(SIZE(SPLIT(DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_n_mids))),",")),0) AS return_n_uv
+            ,DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_n_mids))) AS return_n_mids -- 可能是null
+            ,SUM(new_exposure_cnt) AS new_exposure_cnt
+    FROM    t_share_with_label
+    GROUP BY exposure_id
+)
+,t_root_source_id_group_name AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  root_source_id
+                        ,group_name
+                        ,ROW_NUMBER() OVER (PARTITION BY root_source_id ) AS rn
+                FROM    loghubods.changwen_rootsourceid_group_hour
+                WHERE   dt = MAX_PT('loghubods.changwen_rootsourceid_group_hour')
+            )
+    WHERE   rn = 1
+)
+,t_exposure_share_return AS
+(
+    SELECT  apptype
+            ,uid
+            ,mid
+            ,vid
+            ,sessionid
+            ,subsessionid
+            ,pagesource
+            ,CASE   WHEN pagesource REGEXP 'pages/user-videos-share-recommend$' THEN '回流后沉浸页&内页feed'
+                    WHEN pagesource REGEXP 'pages/detail-recommend$' THEN '详情后沉浸页'
+                    WHEN pagesource REGEXP 'pages/user-videos-share$' THEN '回流页'
+                    WHEN pagesource REGEXP 'pages/user-videos-detail$' THEN '详情页'
+                    WHEN pagesource REGEXP 'pages/category$' THEN '首页feed'
+                    ELSE '其他'
+            END AS pagesource_new
+            ,recommendlogvo -- 推荐算法的返回结果日志存在这个字段中
+            ,abcode -- 推荐算法的ab分组
+            ,recommendpagetype -- 三种回流头部;两种下滑-沉浸页下滑和feed下滑
+            ,recomtraceid
+            ,headvideoid
+            ,rootsourceid
+            ,hotsencetype
+            ,flowpool -- 14#68#3#1735262438476#2
+            ,level
+            ,clientip
+            ,machineinfo_brand
+            ,machineinfo_model
+            ,machineinfo_system
+            ,machineinfo_wechatversion
+            ,machineinfo_sdkversion
+            ,province
+            ,city
+            ,ts
+            ,IF(COALESCE(share_cnt,0) > 0,1,0) AS is_share
+            ,COALESCE(share_cnt,0) AS share_cnt
+            ,IF(COALESCE(return_1_uv,0) > 0,1,0) AS is_return_1
+            ,COALESCE(return_1_pv,0) AS return_1_pv
+            ,COALESCE(return_1_uv,0) AS return_1_uv
+            ,return_1_mids -- 可能是null
+            ,IF(COALESCE(return_n_pv,0) > 0,1,0) AS is_return_n
+            ,COALESCE(return_n_pv,0) AS return_n_pv
+            ,COALESCE(return_n_uv,0) AS return_n_uv
+            ,return_n_mids -- 可能是null
+            ,IF(COALESCE(COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid)),0),0) > 0,1,0) AS is_return_noself
+            ,COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid)),0) AS return_1_uv_noself
+            ,ARRAY_JOIN(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid),",") AS return_1_mids_noself
+            ,IF(COALESCE(COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid)),0),0) > 0,1,0) AS is_return_n_noself
+            ,COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid)),0) AS return_n_uv_noself
+            ,ARRAY_JOIN(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid),",") AS return_n_mids_noself
+            ,COALESCE(new_exposure_cnt) AS new_exposure_cnt
+            ,COALESCE(bn_hop.B, 0) AS b
+            ,COALESCE(c1_hop.C_1, 0) AS c_1
+            ,COALESCE(c2_hop.C_2, 0) AS c_2
+            ,COALESCE(c3_hop.C_3, 0) AS c_3
+            ,COALESCE(d1_hop.D_1, 0) AS d_1
+            ,COALESCE(d2_hop.D_2, 0) AS d_2
+            ,COALESCE(d3_hop.D_3, 0) AS d_3
+            ,CONCAT_WS(',', bn_hop.B_mids) AS b_mids
+            ,CONCAT_WS(',', c1m_hop.C_1_mids) AS c_1_mids
+            ,CONCAT_WS(',', c2m_hop.C_2_mids) AS c_2_mids
+            ,CONCAT_WS(',', c3m_hop.C_3_mids) AS c_3_mids
+            ,CONCAT_WS(',', d1m_hop.D_1_mids) AS d_1_mids
+            ,CONCAT_WS(',', d2m_hop.D_2_mids) AS d_2_mids
+            ,CONCAT_WS(',', d3m_hop.D_3_mids) AS d_3_mids
+            ,JSON_FORMAT(
+                        JSON_OBJECT("animationSceneType",animationSceneType,"extParams",extParams,"rootsessionid",rootsessionid_new,"versioncode",versioncode,"group_name",tc.group_name)
+            ) AS extend
+            ,SUBSTR(dthh,1,8) AS dt
+            ,SUBSTR(dthh,9,2) AS hh
+    FROM    t_exposure ta
+    LEFT JOIN t_share_with_label_group tb
+    ON      ta.id = tb.exposure_id
+    LEFT JOIN t_root_source_id_group_name tc
+    ON      ta.rootsourceid = tc.root_source_id
+    LEFT JOIN t_exposure_bn bn_hop
+    ON      ta.id = bn_hop.exposure_id
+    LEFT JOIN t_c1 c1_hop
+    ON      ta.id = c1_hop.exposure_id
+    LEFT JOIN t_c1_mids c1m_hop
+    ON      ta.id = c1m_hop.exposure_id
+    LEFT JOIN t_c2 c2_hop
+    ON      ta.id = c2_hop.exposure_id
+    LEFT JOIN t_c2_mids c2m_hop
+    ON      ta.id = c2m_hop.exposure_id
+    LEFT JOIN t_c3 c3_hop
+    ON      ta.id = c3_hop.exposure_id
+    LEFT JOIN t_c3_mids c3m_hop
+    ON      ta.id = c3m_hop.exposure_id
+    LEFT JOIN t_d1 d1_hop
+    ON      ta.id = d1_hop.exposure_id
+    LEFT JOIN t_d1_mids d1m_hop
+    ON      ta.id = d1m_hop.exposure_id
+    LEFT JOIN t_d2 d2_hop
+    ON      ta.id = d2_hop.exposure_id
+    LEFT JOIN t_d2_mids d2m_hop
+    ON      ta.id = d2m_hop.exposure_id
+    LEFT JOIN t_d3 d3_hop
+    ON      ta.id = d3_hop.exposure_id
+    LEFT JOIN t_d3_mids d3m_hop
+    ON      ta.id = d3m_hop.exposure_id
+)SELECT  *
+FROM    t_exposure_share_return
+;

+ 1425 - 0
table_gen/loghubods.dwd_recsys_alg_exposure_base_20260209.sql

@@ -0,0 +1,1425 @@
+--@exclude_input=loghubods.video_action_log_flow_new
+--@exclude_input=loghubods.user_share_log_flow
+-- =====================================================================
+-- 曝光回流基础表 (行级, 每行 = 一次曝光)
+-- 版本: 20260209 (基于 20260206 重构 B/C/D 链: +sharedepth 维度 + exp/pv/uv/mids)
+-- 版本历史: 20250108 → 20260206(+B/C/D) → 20260209(+sharedepth+exp/pv/uv/mids)
+-- =====================================================================
+--
+-- 数据源 (3 张流水表):
+--   video_action_log_flow_new  → 曝光事件 (businesstype=videoView)
+--   user_share_log_flow        → 分享事件 (topic=share) + 回流点击 (topic=click)
+--   changwen_rootsourceid_group_hour → rootsourceid 分组名映射
+--
+-- 数据流:
+--   曝光去重 ──→ 分享关联曝光 ──→ 回流关联分享 ──→ B链(直达回流)
+--      │              │                                  │
+--      │              └── 回流关联曝光 ─── 1度/n度回流     ├─→ C链(二次传播, BFS 3hop)
+--      │                                                 └─→ D链(session内后续曝光传播, BFS 3hop)
+--      └── 最终 LEFT JOIN 组装 → 输出行级明细
+--
+-- CTE 管线:
+--   t_return               回流点击去重 (user_share_log_flow topic=click, ROW_NUMBER dedup)
+--   t_share_from_sharelog   分享行为去重 (user_share_log_flow topic=share)
+--   t_exposure_raw/t_exposure  曝光去重 (video_action_log_flow_new, 分 share/非share 两路 UNION ALL)
+--   t_return_exposure_1~4  回流关联曝光: 4 轮渐进放宽 JOIN 条件
+--                          1) subsessionid + headvideoid
+--                          2) sessionid + headvideoid
+--                          3) subsessionid (不限vid)
+--                          4) sessionid (不限vid)
+--   t_normal_share_exposure_1~6  常规分享关联曝光: 6 轮渐进放宽
+--                          1) subsessionid + pagesource + vid, ts>=
+--                          2) sessionid + pagesource + vid, ts>=
+--                          3) subsessionid + pagesource + vid (无ts)
+--                          4) sessionid + pagesource + vid (无ts)
+--                          5) subsessionid + vid
+--                          6) sessionid + vid
+--   t_no_normal_share_exposure_1~4  非常规(detail页)分享关联曝光: 4 轮
+--   t_share_return          bridge: 分享曝光 × 回流点击 (rootshareid + vid + apptype)
+--   B 链  t_exposure_bn (pv/uv/mids) + t_b_exp (exp), 按 sharedepth 拆分
+--   C 链  BFS 3hop: frontier_N → t_c_hopN (pv/uv/mids) + t_c_hopN_exp (exp)
+--   D 链  BFS 3hop: t_d0(成本) → t_d_hopN + t_d_hopN_exp, frontier anti-join 去环
+--   t_share_with_label/_group  分享标签聚合 (1度/n度 pv/uv/mids)
+--   t_exposure_share_return    最终 SELECT: LEFT JOIN 组装所有字段
+--
+-- 关键设计决策:
+--   去环策略: 仅 session 级 anti-join (frontier_N LEFT JOIN 排除已访问 subsessionid)
+--            不做用户级去环 (会丢失 A→B→A→C 中的 C)
+--   sharedepth: 来自 user_share_log_flow 的 click topic, CAST(sharedepth AS BIGINT)
+--   COLLECT_SET + CASE WHEN: 条件不满足时会加入 NULL, SIZE 需要 COALESCE 兜底
+-- =====================================================================
+-- drop table if exists loghubods.dwd_recsys_alg_exposure_base_20260209;
+CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_base_20260209
+(
+    apptype                    STRING
+    ,uid                       STRING
+    ,mid                       STRING
+    ,vid                       STRING
+    ,sessionid                 STRING
+    ,subsessionid              STRING
+    ,pagesource                STRING
+    ,page                      STRING
+    ,recommendlogvo            STRING COMMENT '推荐算法的返回结果日志存在这个字段中'
+    ,abcode                    STRING COMMENT '推荐算法的ab分组:ab0'
+    ,recommendpagetype         STRING COMMENT '用于区分pagesource相同时某些场景的。三种回流头部;两种下滑-沉浸页下滑和feed下滑。 -pages/user-videos-share-recommend-detail 是沉浸页。'
+    ,recomtraceid              STRING COMMENT '在后端调取推荐服务之前生成。前端降级会空;后端也可能为空。'
+    ,headvideoid               STRING
+    ,rootsourceid              STRING COMMENT '区分touliu等流量,咨询产品。'
+    ,hotsencetype              STRING
+    ,flowpool                  STRING COMMENT '非流量池,是空字符串。没有null值。'
+    ,level                     STRING COMMENT '非流量池,是null。'
+    ,clientip                  STRING
+    ,machineinfo_brand         STRING
+    ,machineinfo_model         STRING
+    ,machineinfo_system        STRING
+    ,machineinfo_wechatversion STRING
+    ,machineinfo_sdkversion    STRING
+    ,province                  STRING
+    ,city                      STRING
+    ,ts                        STRING
+    ,is_share                  STRING
+    ,share_cnt                 STRING
+    ,is_return_1               STRING
+    ,return_1_pv               STRING
+    ,return_1_uv               STRING
+    ,is_return_n               STRING
+    ,return_n_pv               STRING
+    ,return_n_uv               STRING
+    ,is_return_noself          STRING
+    ,return_1_uv_noself        STRING
+    ,is_return_n_noself        STRING
+    ,return_n_uv_noself        STRING
+    ,new_exposure_cnt          STRING
+    -- ========== B 链 (4 depth × 3 metric = 12 字段) ==========
+    ,bn_exp                    STRING COMMENT 'B链全量: 回流用户session曝光数'
+    ,bn_pv                     STRING COMMENT 'B链全量: 回流点击次数'
+    ,bn_uv                     STRING COMMENT 'B链全量: 回流去重人数'
+    ,b1_exp                    STRING COMMENT 'B链depth=1: 回流用户session曝光数'
+    ,b1_pv                     STRING COMMENT 'B链depth=1: 回流点击次数'
+    ,b1_uv                     STRING COMMENT 'B链depth=1: 回流去重人数'
+    ,b2_exp                    STRING COMMENT 'B链depth=2: 回流用户session曝光数'
+    ,b2_pv                     STRING COMMENT 'B链depth=2: 回流点击次数'
+    ,b2_uv                     STRING COMMENT 'B链depth=2: 回流去重人数'
+    ,b3_exp                    STRING COMMENT 'B链depth=3: 回流用户session曝光数'
+    ,b3_pv                     STRING COMMENT 'B链depth=3: 回流点击次数'
+    ,b3_uv                     STRING COMMENT 'B链depth=3: 回流去重人数'
+    -- ========== C 链 (4 depth × 3 hop × 3 metric = 36 字段) ==========
+    ,cn_1_exp                  STRING COMMENT 'C链全量hop1: 回流用户session曝光数'
+    ,cn_1_pv                   STRING COMMENT 'C链全量hop1: 回流点击次数'
+    ,cn_1_uv                   STRING COMMENT 'C链全量hop1: 回流去重人数'
+    ,c1_1_exp                  STRING COMMENT 'C链depth=1 hop1: 回流用户session曝光数'
+    ,c1_1_pv                   STRING COMMENT 'C链depth=1 hop1: 回流点击次数'
+    ,c1_1_uv                   STRING COMMENT 'C链depth=1 hop1: 回流去重人数'
+    ,c2_1_exp                  STRING COMMENT 'C链depth=2 hop1: 回流用户session曝光数'
+    ,c2_1_pv                   STRING COMMENT 'C链depth=2 hop1: 回流点击次数'
+    ,c2_1_uv                   STRING COMMENT 'C链depth=2 hop1: 回流去重人数'
+    ,c3_1_exp                  STRING COMMENT 'C链depth=3 hop1: 回流用户session曝光数'
+    ,c3_1_pv                   STRING COMMENT 'C链depth=3 hop1: 回流点击次数'
+    ,c3_1_uv                   STRING COMMENT 'C链depth=3 hop1: 回流去重人数'
+    ,cn_2_exp                  STRING COMMENT 'C链全量hop2: 回流用户session曝光数'
+    ,cn_2_pv                   STRING COMMENT 'C链全量hop2: 回流点击次数'
+    ,cn_2_uv                   STRING COMMENT 'C链全量hop2: 回流去重人数'
+    ,c1_2_exp                  STRING COMMENT 'C链depth=1 hop2: 回流用户session曝光数'
+    ,c1_2_pv                   STRING COMMENT 'C链depth=1 hop2: 回流点击次数'
+    ,c1_2_uv                   STRING COMMENT 'C链depth=1 hop2: 回流去重人数'
+    ,c2_2_exp                  STRING COMMENT 'C链depth=2 hop2: 回流用户session曝光数'
+    ,c2_2_pv                   STRING COMMENT 'C链depth=2 hop2: 回流点击次数'
+    ,c2_2_uv                   STRING COMMENT 'C链depth=2 hop2: 回流去重人数'
+    ,c3_2_exp                  STRING COMMENT 'C链depth=3 hop2: 回流用户session曝光数'
+    ,c3_2_pv                   STRING COMMENT 'C链depth=3 hop2: 回流点击次数'
+    ,c3_2_uv                   STRING COMMENT 'C链depth=3 hop2: 回流去重人数'
+    ,cn_3_exp                  STRING COMMENT 'C链全量hop3: 回流用户session曝光数'
+    ,cn_3_pv                   STRING COMMENT 'C链全量hop3: 回流点击次数'
+    ,cn_3_uv                   STRING COMMENT 'C链全量hop3: 回流去重人数'
+    ,c1_3_exp                  STRING COMMENT 'C链depth=1 hop3: 回流用户session曝光数'
+    ,c1_3_pv                   STRING COMMENT 'C链depth=1 hop3: 回流点击次数'
+    ,c1_3_uv                   STRING COMMENT 'C链depth=1 hop3: 回流去重人数'
+    ,c2_3_exp                  STRING COMMENT 'C链depth=2 hop3: 回流用户session曝光数'
+    ,c2_3_pv                   STRING COMMENT 'C链depth=2 hop3: 回流点击次数'
+    ,c2_3_uv                   STRING COMMENT 'C链depth=2 hop3: 回流去重人数'
+    ,c3_3_exp                  STRING COMMENT 'C链depth=3 hop3: 回流用户session曝光数'
+    ,c3_3_pv                   STRING COMMENT 'C链depth=3 hop3: 回流点击次数'
+    ,c3_3_uv                   STRING COMMENT 'C链depth=3 hop3: 回流去重人数'
+    -- ========== D 链 (d0 + 4 depth × 3 hop × 3 metric = 37 字段) ==========
+    ,d0                        STRING COMMENT 'D链初始成本: session内后续曝光数'
+    ,dn_1_exp                  STRING COMMENT 'D链全量hop1: 回流用户session曝光数'
+    ,dn_1_pv                   STRING COMMENT 'D链全量hop1: 回流点击次数'
+    ,dn_1_uv                   STRING COMMENT 'D链全量hop1: 回流去重人数'
+    ,d1_1_exp                  STRING COMMENT 'D链depth=1 hop1: 回流用户session曝光数'
+    ,d1_1_pv                   STRING COMMENT 'D链depth=1 hop1: 回流点击次数'
+    ,d1_1_uv                   STRING COMMENT 'D链depth=1 hop1: 回流去重人数'
+    ,d2_1_exp                  STRING COMMENT 'D链depth=2 hop1: 回流用户session曝光数'
+    ,d2_1_pv                   STRING COMMENT 'D链depth=2 hop1: 回流点击次数'
+    ,d2_1_uv                   STRING COMMENT 'D链depth=2 hop1: 回流去重人数'
+    ,d3_1_exp                  STRING COMMENT 'D链depth=3 hop1: 回流用户session曝光数'
+    ,d3_1_pv                   STRING COMMENT 'D链depth=3 hop1: 回流点击次数'
+    ,d3_1_uv                   STRING COMMENT 'D链depth=3 hop1: 回流去重人数'
+    ,dn_2_exp                  STRING COMMENT 'D链全量hop2: 回流用户session曝光数'
+    ,dn_2_pv                   STRING COMMENT 'D链全量hop2: 回流点击次数'
+    ,dn_2_uv                   STRING COMMENT 'D链全量hop2: 回流去重人数'
+    ,d1_2_exp                  STRING COMMENT 'D链depth=1 hop2: 回流用户session曝光数'
+    ,d1_2_pv                   STRING COMMENT 'D链depth=1 hop2: 回流点击次数'
+    ,d1_2_uv                   STRING COMMENT 'D链depth=1 hop2: 回流去重人数'
+    ,d2_2_exp                  STRING COMMENT 'D链depth=2 hop2: 回流用户session曝光数'
+    ,d2_2_pv                   STRING COMMENT 'D链depth=2 hop2: 回流点击次数'
+    ,d2_2_uv                   STRING COMMENT 'D链depth=2 hop2: 回流去重人数'
+    ,d3_2_exp                  STRING COMMENT 'D链depth=3 hop2: 回流用户session曝光数'
+    ,d3_2_pv                   STRING COMMENT 'D链depth=3 hop2: 回流点击次数'
+    ,d3_2_uv                   STRING COMMENT 'D链depth=3 hop2: 回流去重人数'
+    ,dn_3_exp                  STRING COMMENT 'D链全量hop3: 回流用户session曝光数'
+    ,dn_3_pv                   STRING COMMENT 'D链全量hop3: 回流点击次数'
+    ,dn_3_uv                   STRING COMMENT 'D链全量hop3: 回流去重人数'
+    ,d1_3_exp                  STRING COMMENT 'D链depth=1 hop3: 回流用户session曝光数'
+    ,d1_3_pv                   STRING COMMENT 'D链depth=1 hop3: 回流点击次数'
+    ,d1_3_uv                   STRING COMMENT 'D链depth=1 hop3: 回流去重人数'
+    ,d2_3_exp                  STRING COMMENT 'D链depth=2 hop3: 回流用户session曝光数'
+    ,d2_3_pv                   STRING COMMENT 'D链depth=2 hop3: 回流点击次数'
+    ,d2_3_uv                   STRING COMMENT 'D链depth=2 hop3: 回流去重人数'
+    ,d3_3_exp                  STRING COMMENT 'D链depth=3 hop3: 回流用户session曝光数'
+    ,d3_3_pv                   STRING COMMENT 'D链depth=3 hop3: 回流点击次数'
+    ,d3_3_uv                   STRING COMMENT 'D链depth=3 hop3: 回流去重人数'
+    ,extend                    STRING
+    -- ========== mids 列表字段 (变长, 统一放末尾) ==========
+    ,return_1_mids             STRING
+    ,return_n_mids             STRING
+    ,return_1_mids_noself      STRING
+    ,return_n_mids_noself      STRING
+    ,bn_mids                   STRING COMMENT 'B链全量: 回流mid列表'
+    ,b1_mids                   STRING COMMENT 'B链depth=1: 回流mid列表'
+    ,b2_mids                   STRING COMMENT 'B链depth=2: 回流mid列表'
+    ,b3_mids                   STRING COMMENT 'B链depth=3: 回流mid列表'
+    ,cn_1_mids                 STRING COMMENT 'C链全量hop1: 回流mid列表'
+    ,c1_1_mids                 STRING COMMENT 'C链depth=1 hop1: 回流mid列表'
+    ,c2_1_mids                 STRING COMMENT 'C链depth=2 hop1: 回流mid列表'
+    ,c3_1_mids                 STRING COMMENT 'C链depth=3 hop1: 回流mid列表'
+    ,cn_2_mids                 STRING COMMENT 'C链全量hop2: 回流mid列表'
+    ,c1_2_mids                 STRING COMMENT 'C链depth=1 hop2: 回流mid列表'
+    ,c2_2_mids                 STRING COMMENT 'C链depth=2 hop2: 回流mid列表'
+    ,c3_2_mids                 STRING COMMENT 'C链depth=3 hop2: 回流mid列表'
+    ,cn_3_mids                 STRING COMMENT 'C链全量hop3: 回流mid列表'
+    ,c1_3_mids                 STRING COMMENT 'C链depth=1 hop3: 回流mid列表'
+    ,c2_3_mids                 STRING COMMENT 'C链depth=2 hop3: 回流mid列表'
+    ,c3_3_mids                 STRING COMMENT 'C链depth=3 hop3: 回流mid列表'
+    ,dn_1_mids                 STRING COMMENT 'D链全量hop1: 回流mid列表'
+    ,d1_1_mids                 STRING COMMENT 'D链depth=1 hop1: 回流mid列表'
+    ,d2_1_mids                 STRING COMMENT 'D链depth=2 hop1: 回流mid列表'
+    ,d3_1_mids                 STRING COMMENT 'D链depth=3 hop1: 回流mid列表'
+    ,dn_2_mids                 STRING COMMENT 'D链全量hop2: 回流mid列表'
+    ,d1_2_mids                 STRING COMMENT 'D链depth=1 hop2: 回流mid列表'
+    ,d2_2_mids                 STRING COMMENT 'D链depth=2 hop2: 回流mid列表'
+    ,d3_2_mids                 STRING COMMENT 'D链depth=3 hop2: 回流mid列表'
+    ,dn_3_mids                 STRING COMMENT 'D链全量hop3: 回流mid列表'
+    ,d1_3_mids                 STRING COMMENT 'D链depth=1 hop3: 回流mid列表'
+    ,d2_3_mids                 STRING COMMENT 'D链depth=2 hop3: 回流mid列表'
+    ,d3_3_mids                 STRING COMMENT 'D链depth=3 hop3: 回流mid列表'
+)
+PARTITIONED BY
+(
+    dt                         STRING COMMENT '日期:20240105'
+    ,hh                        STRING COMMENT '小时:04'
+)
+STORED AS ALIORC
+TBLPROPERTIES ('comment' = '推荐算法-labelmatch表-20260209更新-含多跳B/C/D-sharedepth维度')
+LIFECYCLE 3650
+;
+
+SET hive.exec.dynamic.partition = true
+;
+
+SET hive.exec.dynamic.partition.mode = nonstrict
+;
+
+SET odps.stage.mapper.split.size = 1024
+;
+
+INSERT OVERWRITE TABLE loghubods.dwd_recsys_alg_exposure_base_20260209 PARTITION (dt,hh)
+WITH t_return AS
+(
+    SELECT  *
+            ,CONCAT(dthh,":",shareid,":",vid,":",dthh_id) AS id
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,clickobjectid AS vid
+                        ,sessionid
+                        ,subsessionid -- 注意这是回流对应的subsessionid,每次回流点击会重置,可以通过这个字段找到回流的曝光。
+                        ,shareid
+                        ,rootshareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,CAST(sharedepth AS BIGINT) AS sharedepth
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,clickobjectid,sessionid,subsessionid,shareid,rootshareid ORDER BY clienttimestamp DESC ) AS rn
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),shareid,clickobjectid ORDER BY clienttimestamp ) AS dthh_id
+                FROM    loghubods.user_share_log_flow -- 回流行为,理应subsessionid只有一条,但有脏数据,去重。
+                WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') --WHERE   CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                AND     __topic__ = 'click'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12') -- 12的pagesoucre是h5-share和h5-detail 暂时过滤掉 不做处理
+                AND     machinecode IS NOT NULL
+                AND     clickobjectid IS NOT NULL
+                AND     pagesource REGEXP "-pages/user-videos-share$" -- 存在脏数据 vlog-gzh /mine/mine-info$ 结尾的,都过滤掉。
+            )
+    WHERE   rn = 1
+)
+,t_share_from_sharelog AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,shareobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,pagesource
+                        ,shareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,shareobjectid,sessionid,subsessionid,pagesource,shareid ORDER BY clienttimestamp DESC ) AS rn
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') --WHERE   CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                AND     __topic__ = 'share'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     shareobjectid IS NOT NULL
+            )
+    WHERE   rn = 1
+)
+,t_exposure_raw AS
+(
+    SELECT  ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),subsessionid ORDER BY clienttimestamp DESC ) AS dthh_id
+            ,CONCAT(year,month,day,hour) AS dthh
+            ,apptype
+            ,uid
+            ,mid
+            ,videoid AS vid
+            ,sessionid
+            ,subsessionid
+            ,rootsessionid_new
+            ,pagesource
+            ,recommendlogvo
+            ,COALESCE(GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003'),"unknown") AS abcode
+            ,GET_JSON_OBJECT(extparams,'$.recommendPageType') AS recommendpagetype
+            ,GET_JSON_OBJECT(extparams,'$.recomTraceId') AS recomtraceid
+            ,CASE   WHEN GET_JSON_OBJECT(extParams,'$.head_videoid') IS NOT NULL THEN GET_JSON_OBJECT(extParams,'$.head_videoid')
+                    ELSE GET_JSON_OBJECT(extParams,'$.head_videoId')
+            END AS headvideoid
+            ,GET_JSON_OBJECT(extParams,'$.rootSourceId') AS rootsourceid
+            ,COALESCE(hotsencetype,sencetype,"other") AS hotsencetype
+            ,GET_JSON_OBJECT(extParams,'$.animationSceneType') AS animationscenetype
+            ,JSON_PARSE(IF(JSON_VALID(extparams),extparams,"{}")) AS extParams
+            ,flowpool
+            ,SPLIT(flowpool,'#')[2] AS level
+            ,clientip
+            ,machineinfo_brand
+            ,machineinfo_model
+            ,machineinfo_system
+            ,machineinfo_wechatversion
+            ,machineinfo_sdkversion
+            ,ANALYSISIP(clientip,"region") AS province
+            ,ANALYSISIP(clientip,"city") AS city
+            ,versioncode
+            ,CAST(logtimestamp / 1000 AS BIGINT) AS ts
+            ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,uid,mid,videoid,sessionid,subsessionid,pagesource ORDER BY logtimestamp ) AS rn
+    FROM    loghubods.video_action_log_flow_new
+    WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+    AND     businesstype IN ('videoView')
+    AND     apptype IS NOT NULL
+    AND     apptype NOT IN ('12')
+    AND     mid IS NOT NULL
+    AND     videoid IS NOT NULL
+)
+,t_exposure AS
+(
+    SELECT  dthh_id ,dthh ,apptype ,uid ,mid ,vid ,sessionid ,subsessionid ,rootsessionid_new ,pagesource
+            ,recommendlogvo ,abcode ,recommendpagetype ,recomtraceid ,headvideoid ,rootsourceid ,hotsencetype
+            ,animationscenetype ,extParams ,flowpool ,level ,clientip ,machineinfo_brand ,machineinfo_model
+            ,machineinfo_system ,machineinfo_wechatversion ,machineinfo_sdkversion ,province ,city ,versioncode
+            ,ts ,rn ,CONCAT(dthh,":",subsessionid,":",dthh_id) AS id
+    FROM    t_exposure_raw
+    WHERE   pagesource NOT REGEXP "-pages/user-videos-share$"
+    AND     rn = 1
+    UNION ALL
+    SELECT  dthh_id ,dthh ,apptype ,uid ,mid ,vid ,sessionid ,subsessionid ,rootsessionid_new ,pagesource
+            ,recommendlogvo ,abcode ,recommendpagetype ,recomtraceid ,headvideoid ,rootsourceid ,hotsencetype
+            ,animationscenetype ,extParams ,flowpool ,level ,clientip ,machineinfo_brand ,machineinfo_model
+            ,machineinfo_system ,machineinfo_wechatversion ,machineinfo_sdkversion ,province ,city ,versioncode
+            ,ts ,rn ,CONCAT(dthh,":",subsessionid,":",dthh_id) AS id
+    FROM    t_exposure_raw
+    WHERE   pagesource REGEXP "-pages/user-videos-share$"
+)
+,t_exposure_recommend AS
+(
+    SELECT  *
+    FROM    t_exposure
+    WHERE   pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+)
+,t_return_exposure_1 AS -- 曝光关联回流,用于计算viewh24
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.id ORDER BY t2.ts DESC ) AS rn
+                FROM    t_exposure_recommend t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.headvideoid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+            )
+    WHERE   rn = 1
+)
+,t_return_exposure_2 AS -- 曝光关联回流,用于计算viewh24
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.exposure_id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_return_exposure_1
+                            WHERE   return_id IS NULL
+                        ) t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.headvideoid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+            )
+    WHERE   rn = 1
+)
+,t_return_exposure_3 AS -- 曝光关联回流,用于计算viewh24
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.exposure_id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_return_exposure_2
+                            WHERE   return_id IS NULL
+                        ) t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.subsessionid = t2.subsessionid
+            )
+    WHERE   rn = 1
+)
+,t_return_exposure_4 AS -- 曝光关联回流,用于计算viewh24
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.exposure_id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_return_exposure_3
+                            WHERE   return_id IS NULL
+                        ) t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.sessionid = t2.sessionid
+            )
+    WHERE   rn = 1
+)
+,t_return_exposure AS
+(
+    SELECT  a.*
+            ,b.exposure_cnt AS new_exposure_cnt
+    FROM    t_return a
+    LEFT JOIN   (
+                    SELECT  return_id
+                            ,COUNT(1) AS exposure_cnt
+                    FROM    (
+                                SELECT  *
+                                FROM    t_return_exposure_1
+                                WHERE   return_id IS NOT NULL
+                                UNION ALL
+                                SELECT  *
+                                FROM    t_return_exposure_2
+                                WHERE   return_id IS NOT NULL
+                                UNION ALL
+                                SELECT  *
+                                FROM    t_return_exposure_3
+                                WHERE   return_id IS NOT NULL
+                                UNION ALL
+                                SELECT  *
+                                FROM    t_return_exposure_4
+                                WHERE   return_id IS NOT NULL
+                            )
+                    GROUP BY return_id
+                ) b
+    ON      a.id = b.return_id
+)
+,t_normal_share_exposure_1 AS -- 开始处理常规的分享与曝光关联
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    t_share_from_sharelog t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+                AND     t1.pagesource = t2.pagesource
+                AND     t1.ts >= t2.ts
+                WHERE   t1.pagesource NOT REGEXP "pages/detail-user-videos-share-recommend$"
+            )
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_2 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_1
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+                AND     t1.pagesource = t2.pagesource
+                AND     t1.ts >= t2.ts
+            )
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_3 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_2
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+                AND     t1.pagesource = t2.pagesource
+                AND     t1.ts >= t2.ts
+            )
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_4 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_3
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+                AND     t1.pagesource = t2.pagesource
+                AND     t1.ts >= t2.ts
+            )
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_5 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_4
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+            )
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_6 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_5
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+            )
+    WHERE   rn = 1
+)
+,t_exposure_detail AS
+(
+    SELECT  *
+    FROM    t_exposure
+    WHERE   pagesource REGEXP "-pages/user-videos-detail$|pages/detail-recommend$"
+)
+,t_no_normal_share_exposure_1 AS -- 开始处理非常规的分享与曝光关联
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    t_share_from_sharelog t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+                AND     t1.ts >= t2.ts
+                WHERE   t1.pagesource REGEXP "pages/detail-user-videos-share-recommend$"
+            )
+    WHERE   rn = 1
+)
+,t_no_normal_share_exposure_2 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_no_normal_share_exposure_1
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+                AND     t1.ts >= t2.ts
+            )
+    WHERE   rn = 1
+)
+,t_no_normal_share_exposure_3 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_no_normal_share_exposure_2
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+            )
+    WHERE   rn = 1
+)
+,t_no_normal_share_exposure_4 AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_no_normal_share_exposure_3
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+            )
+    WHERE   rn = 1
+)
+,t_share_exposure AS
+(
+    SELECT  *
+    FROM    t_normal_share_exposure_1
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_2
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_3
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_4
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_5
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_6
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_1
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_2
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_3
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_4
+)
+--========================================
+-- 多跳 B/C/D 计算 (BFS frontier + anti-join 去环, 仅 24h)
+--========================================
+,t_share_return AS (
+    SELECT  se.exposure_id
+            ,se.shareid
+            ,se.vid
+            ,se.apptype
+            ,se.subsessionid
+            ,r.subsessionid AS return_subsessionid
+            ,r.mid AS return_mid
+            ,r.sharedepth
+    FROM    t_share_exposure se
+    JOIN    t_return r
+    ON      se.shareid = r.rootshareid
+    AND     se.vid = r.vid
+    AND     se.apptype = r.apptype
+)
+--========================================
+-- B 链: pv/uv/mids (按 sharedepth 拆分)
+--========================================
+,t_exposure_bn AS (
+    SELECT  exposure_id
+            ,COUNT(return_mid) AS bn_pv
+            ,COLLECT_SET(return_mid) AS bn_mids
+            ,COUNT(CASE WHEN sharedepth = 1 THEN return_mid END) AS b1_pv
+            ,COLLECT_SET(CASE WHEN sharedepth = 1 THEN return_mid END) AS b1_mids
+            ,COUNT(CASE WHEN sharedepth = 2 THEN return_mid END) AS b2_pv
+            ,COLLECT_SET(CASE WHEN sharedepth = 2 THEN return_mid END) AS b2_mids
+            ,COUNT(CASE WHEN sharedepth = 3 THEN return_mid END) AS b3_pv
+            ,COLLECT_SET(CASE WHEN sharedepth = 3 THEN return_mid END) AS b3_mids
+    FROM    t_share_return
+    GROUP BY exposure_id
+)
+--========================================
+-- B 链: exp (回流用户 session 内曝光数, 按 sharedepth 拆分)
+--========================================
+,t_b_exp AS (
+    SELECT  sr.exposure_id
+            ,COUNT(e.id) AS bn_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 1 THEN e.id END) AS b1_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 2 THEN e.id END) AS b2_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 3 THEN e.id END) AS b3_exp
+    FROM    (SELECT DISTINCT exposure_id, return_subsessionid, sharedepth
+             FROM t_share_return) sr
+    JOIN    t_exposure e ON sr.return_subsessionid = e.subsessionid
+    GROUP BY sr.exposure_id
+)
+-- BFS frontier 1: 直达回流的 subsessionid 集合
+,t_frontier_1 AS (
+    SELECT DISTINCT exposure_id AS source_id, return_subsessionid AS reached_sub
+    FROM   t_share_return
+)
+--========================================
+-- C 链 hop1: pv/uv/mids (按 sharedepth 拆分)
+--========================================
+,t_c_hop1 AS (
+    SELECT  f.source_id AS exposure_id
+            ,COUNT(sr.return_mid) AS cn_1_pv
+            ,COLLECT_SET(sr.return_mid) AS cn_1_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 1 THEN sr.return_mid END) AS c1_1_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 1 THEN sr.return_mid END) AS c1_1_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 2 THEN sr.return_mid END) AS c2_1_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 2 THEN sr.return_mid END) AS c2_1_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 3 THEN sr.return_mid END) AS c3_1_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 3 THEN sr.return_mid END) AS c3_1_mids
+    FROM    t_frontier_1 f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_share_return sr ON e.id = sr.exposure_id
+    GROUP BY f.source_id
+)
+--========================================
+-- C 链 hop1: exp (回流用户 session 内曝光数)
+--========================================
+,t_c_hop1_exp AS (
+    SELECT  f.source_id AS exposure_id
+            ,COUNT(e_ret.id) AS cn_1_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 1 THEN e_ret.id END) AS c1_1_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 2 THEN e_ret.id END) AS c2_1_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 3 THEN e_ret.id END) AS c3_1_exp
+    FROM    t_frontier_1 f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    (SELECT DISTINCT exposure_id, return_subsessionid, sharedepth FROM t_share_return) sr
+    ON      e.id = sr.exposure_id
+    JOIN    t_exposure e_ret ON sr.return_subsessionid = e_ret.subsessionid
+    GROUP BY f.source_id
+)
+-- BFS frontier 2: frontier_1 延伸, anti-join 排除 frontier_1
+,t_frontier_2 AS (
+    SELECT DISTINCT f1.source_id, sr2.return_subsessionid AS reached_sub
+    FROM    t_frontier_1 f1
+    JOIN    t_exposure e1 ON f1.reached_sub = e1.subsessionid
+    JOIN    t_share_return sr2 ON e1.id = sr2.exposure_id
+    LEFT JOIN t_frontier_1 v1
+        ON  f1.source_id = v1.source_id
+        AND sr2.return_subsessionid = v1.reached_sub
+    WHERE   v1.source_id IS NULL
+)
+--========================================
+-- C 链 hop2: pv/uv/mids (按 sharedepth 拆分)
+--========================================
+,t_c_hop2 AS (
+    SELECT  f.source_id AS exposure_id
+            ,COUNT(sr.return_mid) AS cn_2_pv
+            ,COLLECT_SET(sr.return_mid) AS cn_2_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 1 THEN sr.return_mid END) AS c1_2_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 1 THEN sr.return_mid END) AS c1_2_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 2 THEN sr.return_mid END) AS c2_2_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 2 THEN sr.return_mid END) AS c2_2_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 3 THEN sr.return_mid END) AS c3_2_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 3 THEN sr.return_mid END) AS c3_2_mids
+    FROM    t_frontier_2 f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_share_return sr ON e.id = sr.exposure_id
+    GROUP BY f.source_id
+)
+--========================================
+-- C 链 hop2: exp
+--========================================
+,t_c_hop2_exp AS (
+    SELECT  f.source_id AS exposure_id
+            ,COUNT(e_ret.id) AS cn_2_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 1 THEN e_ret.id END) AS c1_2_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 2 THEN e_ret.id END) AS c2_2_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 3 THEN e_ret.id END) AS c3_2_exp
+    FROM    t_frontier_2 f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    (SELECT DISTINCT exposure_id, return_subsessionid, sharedepth FROM t_share_return) sr
+    ON      e.id = sr.exposure_id
+    JOIN    t_exposure e_ret ON sr.return_subsessionid = e_ret.subsessionid
+    GROUP BY f.source_id
+)
+-- BFS frontier 3: frontier_2 延伸, anti-join 排除 frontier_1 + frontier_2
+,t_frontier_3 AS (
+    SELECT DISTINCT f2.source_id, sr3.return_subsessionid AS reached_sub
+    FROM    t_frontier_2 f2
+    JOIN    t_exposure e2 ON f2.reached_sub = e2.subsessionid
+    JOIN    t_share_return sr3 ON e2.id = sr3.exposure_id
+    LEFT JOIN t_frontier_1 v1
+        ON  f2.source_id = v1.source_id
+        AND sr3.return_subsessionid = v1.reached_sub
+    LEFT JOIN t_frontier_2 v2
+        ON  f2.source_id = v2.source_id
+        AND sr3.return_subsessionid = v2.reached_sub
+    WHERE   v1.source_id IS NULL AND v2.source_id IS NULL
+)
+--========================================
+-- C 链 hop3: pv/uv/mids (按 sharedepth 拆分)
+--========================================
+,t_c_hop3 AS (
+    SELECT  f.source_id AS exposure_id
+            ,COUNT(sr.return_mid) AS cn_3_pv
+            ,COLLECT_SET(sr.return_mid) AS cn_3_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 1 THEN sr.return_mid END) AS c1_3_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 1 THEN sr.return_mid END) AS c1_3_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 2 THEN sr.return_mid END) AS c2_3_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 2 THEN sr.return_mid END) AS c2_3_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 3 THEN sr.return_mid END) AS c3_3_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 3 THEN sr.return_mid END) AS c3_3_mids
+    FROM    t_frontier_3 f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_share_return sr ON e.id = sr.exposure_id
+    GROUP BY f.source_id
+)
+--========================================
+-- C 链 hop3: exp
+--========================================
+,t_c_hop3_exp AS (
+    SELECT  f.source_id AS exposure_id
+            ,COUNT(e_ret.id) AS cn_3_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 1 THEN e_ret.id END) AS c1_3_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 2 THEN e_ret.id END) AS c2_3_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 3 THEN e_ret.id END) AS c3_3_exp
+    FROM    t_frontier_3 f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    (SELECT DISTINCT exposure_id, return_subsessionid, sharedepth FROM t_share_return) sr
+    ON      e.id = sr.exposure_id
+    JOIN    t_exposure e_ret ON sr.return_subsessionid = e_ret.subsessionid
+    GROUP BY f.source_id
+)
+--========================================
+-- D 链: session 内后续曝光传播 (BFS 去环)
+--========================================
+-- D0: session 内后续曝光数 (D 链初始成本)
+,t_d0 AS (
+    SELECT  e1.id AS exposure_id
+            ,COUNT(e2.id) AS d0
+    FROM    t_exposure e1
+    JOIN    t_exposure e2
+    ON      e1.subsessionid = e2.subsessionid
+    AND     CAST(e2.ts AS BIGINT) > CAST(e1.ts AS BIGINT)
+    GROUP BY e1.id
+)
+-- D 链 hop1: pv/uv/mids (session 内时序 JOIN)
+,t_d_hop1 AS (
+    SELECT  e1.id AS exposure_id
+            ,COUNT(sr.return_mid) AS dn_1_pv
+            ,COLLECT_SET(sr.return_mid) AS dn_1_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 1 THEN sr.return_mid END) AS d1_1_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 1 THEN sr.return_mid END) AS d1_1_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 2 THEN sr.return_mid END) AS d2_1_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 2 THEN sr.return_mid END) AS d2_1_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 3 THEN sr.return_mid END) AS d3_1_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 3 THEN sr.return_mid END) AS d3_1_mids
+    FROM    t_exposure e1
+    JOIN    t_exposure e2
+    ON      e1.subsessionid = e2.subsessionid
+    AND     CAST(e2.ts AS BIGINT) > CAST(e1.ts AS BIGINT)
+    JOIN    t_share_return sr ON e2.id = sr.exposure_id
+    GROUP BY e1.id
+)
+-- D 链 hop1: exp
+,t_d_hop1_exp AS (
+    SELECT  e1.id AS exposure_id
+            ,COUNT(e_ret.id) AS dn_1_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 1 THEN e_ret.id END) AS d1_1_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 2 THEN e_ret.id END) AS d2_1_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 3 THEN e_ret.id END) AS d3_1_exp
+    FROM    t_exposure e1
+    JOIN    t_exposure e2
+    ON      e1.subsessionid = e2.subsessionid
+    AND     CAST(e2.ts AS BIGINT) > CAST(e1.ts AS BIGINT)
+    JOIN    (SELECT DISTINCT exposure_id, return_subsessionid, sharedepth FROM t_share_return) sr
+    ON      e2.id = sr.exposure_id
+    JOIN    t_exposure e_ret ON sr.return_subsessionid = e_ret.subsessionid
+    GROUP BY e1.id
+)
+-- D 链 frontier: hop1 延伸
+,t_d1_frontier AS (
+    SELECT DISTINCT e1.id AS source_id
+           ,sr.return_subsessionid AS reached_sub
+    FROM    t_exposure e1
+    JOIN    t_exposure e2
+    ON      e1.subsessionid = e2.subsessionid
+    AND     CAST(e2.ts AS BIGINT) > CAST(e1.ts AS BIGINT)
+    JOIN    t_share_return sr
+    ON      e2.id = sr.exposure_id
+)
+-- D 链 hop2: pv/uv/mids
+,t_d_hop2 AS (
+    SELECT  f.source_id AS exposure_id
+            ,COUNT(sr.return_mid) AS dn_2_pv
+            ,COLLECT_SET(sr.return_mid) AS dn_2_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 1 THEN sr.return_mid END) AS d1_2_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 1 THEN sr.return_mid END) AS d1_2_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 2 THEN sr.return_mid END) AS d2_2_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 2 THEN sr.return_mid END) AS d2_2_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 3 THEN sr.return_mid END) AS d3_2_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 3 THEN sr.return_mid END) AS d3_2_mids
+    FROM    t_d1_frontier f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_share_return sr ON e.id = sr.exposure_id
+    GROUP BY f.source_id
+)
+-- D 链 hop2: exp
+,t_d_hop2_exp AS (
+    SELECT  f.source_id AS exposure_id
+            ,COUNT(e_ret.id) AS dn_2_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 1 THEN e_ret.id END) AS d1_2_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 2 THEN e_ret.id END) AS d2_2_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 3 THEN e_ret.id END) AS d3_2_exp
+    FROM    t_d1_frontier f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    (SELECT DISTINCT exposure_id, return_subsessionid, sharedepth FROM t_share_return) sr
+    ON      e.id = sr.exposure_id
+    JOIN    t_exposure e_ret ON sr.return_subsessionid = e_ret.subsessionid
+    GROUP BY f.source_id
+)
+-- D 链 frontier 2: hop2 延伸, anti-join 排除 d1_frontier
+,t_d2_frontier AS (
+    SELECT DISTINCT f1.source_id, sr2.return_subsessionid AS reached_sub
+    FROM    t_d1_frontier f1
+    JOIN    t_exposure e1 ON f1.reached_sub = e1.subsessionid
+    JOIN    t_share_return sr2 ON e1.id = sr2.exposure_id
+    LEFT JOIN t_d1_frontier v1
+        ON  f1.source_id = v1.source_id
+        AND sr2.return_subsessionid = v1.reached_sub
+    WHERE   v1.source_id IS NULL
+)
+-- D 链 hop3: pv/uv/mids
+,t_d_hop3 AS (
+    SELECT  f.source_id AS exposure_id
+            ,COUNT(sr.return_mid) AS dn_3_pv
+            ,COLLECT_SET(sr.return_mid) AS dn_3_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 1 THEN sr.return_mid END) AS d1_3_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 1 THEN sr.return_mid END) AS d1_3_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 2 THEN sr.return_mid END) AS d2_3_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 2 THEN sr.return_mid END) AS d2_3_mids
+            ,COUNT(CASE WHEN sr.sharedepth = 3 THEN sr.return_mid END) AS d3_3_pv
+            ,COLLECT_SET(CASE WHEN sr.sharedepth = 3 THEN sr.return_mid END) AS d3_3_mids
+    FROM    t_d2_frontier f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    t_share_return sr ON e.id = sr.exposure_id
+    GROUP BY f.source_id
+)
+-- D 链 hop3: exp
+,t_d_hop3_exp AS (
+    SELECT  f.source_id AS exposure_id
+            ,COUNT(e_ret.id) AS dn_3_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 1 THEN e_ret.id END) AS d1_3_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 2 THEN e_ret.id END) AS d2_3_exp
+            ,COUNT(CASE WHEN sr.sharedepth = 3 THEN e_ret.id END) AS d3_3_exp
+    FROM    t_d2_frontier f
+    JOIN    t_exposure e ON f.reached_sub = e.subsessionid
+    JOIN    (SELECT DISTINCT exposure_id, return_subsessionid, sharedepth FROM t_share_return) sr
+    ON      e.id = sr.exposure_id
+    JOIN    t_exposure e_ret ON sr.return_subsessionid = e_ret.subsessionid
+    GROUP BY f.source_id
+)
+--========================================
+-- 以下为原有 CTE 继续
+--========================================
+,t_share_with_label AS
+(
+    SELECT  a.dthh
+            ,a.apptype -- join 条件
+            ,a.mid
+            ,a.vid -- join 条件
+            ,a.sessionid
+            ,a.subsessionid
+            ,a.pagesource
+            ,a.shareid -- join 条件
+            ,a.ts
+            ,a.exposure_id
+            ,COALESCE(b.return_1_pv,0) AS return_1_pv
+            ,COALESCE(b.return_1_uv,0) AS return_1_uv
+            ,b.return_1_mids AS return_1_mids -- 可能为null,再决策是否提前处理。
+            ,COALESCE(c.return_n_pv,0) AS return_n_pv
+            ,COALESCE(c.return_n_uv,0) AS return_n_uv
+            ,c.return_n_mids AS return_n_mids -- 可能为null,再决策是否提前处理。
+            ,COALESCE(c.new_exposure_cnt,0) AS new_exposure_cnt
+    FROM    t_share_exposure a
+    LEFT JOIN   (
+                    SELECT  shareid
+                            ,vid
+                            ,apptype
+                            ,COUNT(1) AS return_1_pv
+                            ,COUNT(DISTINCT mid) AS return_1_uv
+                            ,CONCAT_WS(',',COLLECT_SET(mid)) AS return_1_mids
+                    FROM    t_return
+                    GROUP BY shareid
+                             ,vid
+                             ,apptype
+                ) b
+    ON      a.shareid = b.shareid
+    AND     a.vid = b.vid
+    AND     a.apptype = b.apptype
+    LEFT JOIN   (
+                    SELECT  rootshareid
+                            ,vid
+                            ,apptype
+                            ,COUNT(1) AS return_n_pv
+                            ,COUNT(DISTINCT mid) AS return_n_uv
+                            ,CONCAT_WS(',',COLLECT_SET(mid)) AS return_n_mids
+                            ,SUM(new_exposure_cnt) AS new_exposure_cnt
+                    FROM    t_return_exposure
+                    GROUP BY rootshareid
+                             ,vid
+                             ,apptype
+                ) c
+    ON      a.shareid = c.rootshareid
+    AND     a.vid = c.vid
+    AND     a.apptype = c.apptype
+)
+,t_share_with_label_group AS
+(
+    SELECT  exposure_id
+            ,COUNT(1) AS share_cnt
+            ,SUM(return_1_pv) AS return_1_pv
+            ,COALESCE(SIZE(SPLIT(DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_1_mids))),",")),0) AS return_1_uv
+            ,DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_1_mids))) AS return_1_mids -- 可能是null
+            ,SUM(return_n_pv) AS return_n_pv
+            ,COALESCE(SIZE(SPLIT(DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_n_mids))),",")),0) AS return_n_uv
+            ,DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_n_mids))) AS return_n_mids -- 可能是null
+            ,SUM(new_exposure_cnt) AS new_exposure_cnt
+    FROM    t_share_with_label
+    GROUP BY exposure_id
+)
+,t_root_source_id_group_name AS
+(
+    SELECT  *
+    FROM    (
+                SELECT  root_source_id
+                        ,group_name
+                        ,ROW_NUMBER() OVER (PARTITION BY root_source_id ) AS rn
+                FROM    loghubods.changwen_rootsourceid_group_hour
+                WHERE   dt = MAX_PT('loghubods.changwen_rootsourceid_group_hour')
+            )
+    WHERE   rn = 1
+)
+,t_exposure_share_return AS
+(
+    SELECT  apptype
+            ,uid
+            ,mid
+            ,vid
+            ,sessionid
+            ,subsessionid
+            ,pagesource
+            ,CASE   WHEN pagesource REGEXP 'pages/user-videos-share-recommend$' THEN '回流后沉浸页&内页feed'
+                    WHEN pagesource REGEXP 'pages/detail-recommend$' THEN '详情后沉浸页'
+                    WHEN pagesource REGEXP 'pages/user-videos-share$' THEN '回流页'
+                    WHEN pagesource REGEXP 'pages/user-videos-detail$' THEN '详情页'
+                    WHEN pagesource REGEXP 'pages/category$' THEN '首页feed'
+                    ELSE '其他'
+            END AS pagesource_new
+            ,recommendlogvo -- 推荐算法的返回结果日志存在这个字段中
+            ,abcode -- 推荐算法的ab分组
+            ,recommendpagetype -- 三种回流头部;两种下滑-沉浸页下滑和feed下滑
+            ,recomtraceid
+            ,headvideoid
+            ,rootsourceid
+            ,hotsencetype
+            ,flowpool -- 14#68#3#1735262438476#2
+            ,level
+            ,clientip
+            ,machineinfo_brand
+            ,machineinfo_model
+            ,machineinfo_system
+            ,machineinfo_wechatversion
+            ,machineinfo_sdkversion
+            ,province
+            ,city
+            ,ts
+            ,IF(COALESCE(share_cnt,0) > 0,1,0) AS is_share
+            ,COALESCE(share_cnt,0) AS share_cnt
+            ,IF(COALESCE(return_1_uv,0) > 0,1,0) AS is_return_1
+            ,COALESCE(return_1_pv,0) AS return_1_pv
+            ,COALESCE(return_1_uv,0) AS return_1_uv
+            ,IF(COALESCE(return_n_pv,0) > 0,1,0) AS is_return_n
+            ,COALESCE(return_n_pv,0) AS return_n_pv
+            ,COALESCE(return_n_uv,0) AS return_n_uv
+            ,IF(COALESCE(COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid)),0),0) > 0,1,0) AS is_return_noself
+            ,COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid)),0) AS return_1_uv_noself
+            ,IF(COALESCE(COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid)),0),0) > 0,1,0) AS is_return_n_noself
+            ,COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid)),0) AS return_n_uv_noself
+            ,COALESCE(new_exposure_cnt) AS new_exposure_cnt
+            -- ========== B 链 ==========
+            ,COALESCE(b_exp.bn_exp, 0) AS bn_exp
+            ,COALESCE(bn_hop.bn_pv, 0) AS bn_pv
+            ,COALESCE(SIZE(bn_hop.bn_mids), 0) AS bn_uv
+            ,COALESCE(b_exp.b1_exp, 0) AS b1_exp
+            ,COALESCE(bn_hop.b1_pv, 0) AS b1_pv
+            ,COALESCE(SIZE(bn_hop.b1_mids), 0) AS b1_uv
+            ,COALESCE(b_exp.b2_exp, 0) AS b2_exp
+            ,COALESCE(bn_hop.b2_pv, 0) AS b2_pv
+            ,COALESCE(SIZE(bn_hop.b2_mids), 0) AS b2_uv
+            ,COALESCE(b_exp.b3_exp, 0) AS b3_exp
+            ,COALESCE(bn_hop.b3_pv, 0) AS b3_pv
+            ,COALESCE(SIZE(bn_hop.b3_mids), 0) AS b3_uv
+            -- ========== C 链 hop1 ==========
+            ,COALESCE(c_hop1_exp.cn_1_exp, 0) AS cn_1_exp
+            ,COALESCE(c_hop1.cn_1_pv, 0) AS cn_1_pv
+            ,COALESCE(SIZE(c_hop1.cn_1_mids), 0) AS cn_1_uv
+            ,COALESCE(c_hop1_exp.c1_1_exp, 0) AS c1_1_exp
+            ,COALESCE(c_hop1.c1_1_pv, 0) AS c1_1_pv
+            ,COALESCE(SIZE(c_hop1.c1_1_mids), 0) AS c1_1_uv
+            ,COALESCE(c_hop1_exp.c2_1_exp, 0) AS c2_1_exp
+            ,COALESCE(c_hop1.c2_1_pv, 0) AS c2_1_pv
+            ,COALESCE(SIZE(c_hop1.c2_1_mids), 0) AS c2_1_uv
+            ,COALESCE(c_hop1_exp.c3_1_exp, 0) AS c3_1_exp
+            ,COALESCE(c_hop1.c3_1_pv, 0) AS c3_1_pv
+            ,COALESCE(SIZE(c_hop1.c3_1_mids), 0) AS c3_1_uv
+            -- ========== C 链 hop2 ==========
+            ,COALESCE(c_hop2_exp.cn_2_exp, 0) AS cn_2_exp
+            ,COALESCE(c_hop2.cn_2_pv, 0) AS cn_2_pv
+            ,COALESCE(SIZE(c_hop2.cn_2_mids), 0) AS cn_2_uv
+            ,COALESCE(c_hop2_exp.c1_2_exp, 0) AS c1_2_exp
+            ,COALESCE(c_hop2.c1_2_pv, 0) AS c1_2_pv
+            ,COALESCE(SIZE(c_hop2.c1_2_mids), 0) AS c1_2_uv
+            ,COALESCE(c_hop2_exp.c2_2_exp, 0) AS c2_2_exp
+            ,COALESCE(c_hop2.c2_2_pv, 0) AS c2_2_pv
+            ,COALESCE(SIZE(c_hop2.c2_2_mids), 0) AS c2_2_uv
+            ,COALESCE(c_hop2_exp.c3_2_exp, 0) AS c3_2_exp
+            ,COALESCE(c_hop2.c3_2_pv, 0) AS c3_2_pv
+            ,COALESCE(SIZE(c_hop2.c3_2_mids), 0) AS c3_2_uv
+            -- ========== C 链 hop3 ==========
+            ,COALESCE(c_hop3_exp.cn_3_exp, 0) AS cn_3_exp
+            ,COALESCE(c_hop3.cn_3_pv, 0) AS cn_3_pv
+            ,COALESCE(SIZE(c_hop3.cn_3_mids), 0) AS cn_3_uv
+            ,COALESCE(c_hop3_exp.c1_3_exp, 0) AS c1_3_exp
+            ,COALESCE(c_hop3.c1_3_pv, 0) AS c1_3_pv
+            ,COALESCE(SIZE(c_hop3.c1_3_mids), 0) AS c1_3_uv
+            ,COALESCE(c_hop3_exp.c2_3_exp, 0) AS c2_3_exp
+            ,COALESCE(c_hop3.c2_3_pv, 0) AS c2_3_pv
+            ,COALESCE(SIZE(c_hop3.c2_3_mids), 0) AS c2_3_uv
+            ,COALESCE(c_hop3_exp.c3_3_exp, 0) AS c3_3_exp
+            ,COALESCE(c_hop3.c3_3_pv, 0) AS c3_3_pv
+            ,COALESCE(SIZE(c_hop3.c3_3_mids), 0) AS c3_3_uv
+            -- ========== D 链 ==========
+            ,COALESCE(d0_hop.d0, 0) AS d0
+            -- D hop1
+            ,COALESCE(d_hop1_exp.dn_1_exp, 0) AS dn_1_exp
+            ,COALESCE(d_hop1.dn_1_pv, 0) AS dn_1_pv
+            ,COALESCE(SIZE(d_hop1.dn_1_mids), 0) AS dn_1_uv
+            ,COALESCE(d_hop1_exp.d1_1_exp, 0) AS d1_1_exp
+            ,COALESCE(d_hop1.d1_1_pv, 0) AS d1_1_pv
+            ,COALESCE(SIZE(d_hop1.d1_1_mids), 0) AS d1_1_uv
+            ,COALESCE(d_hop1_exp.d2_1_exp, 0) AS d2_1_exp
+            ,COALESCE(d_hop1.d2_1_pv, 0) AS d2_1_pv
+            ,COALESCE(SIZE(d_hop1.d2_1_mids), 0) AS d2_1_uv
+            ,COALESCE(d_hop1_exp.d3_1_exp, 0) AS d3_1_exp
+            ,COALESCE(d_hop1.d3_1_pv, 0) AS d3_1_pv
+            ,COALESCE(SIZE(d_hop1.d3_1_mids), 0) AS d3_1_uv
+            -- D hop2
+            ,COALESCE(d_hop2_exp.dn_2_exp, 0) AS dn_2_exp
+            ,COALESCE(d_hop2.dn_2_pv, 0) AS dn_2_pv
+            ,COALESCE(SIZE(d_hop2.dn_2_mids), 0) AS dn_2_uv
+            ,COALESCE(d_hop2_exp.d1_2_exp, 0) AS d1_2_exp
+            ,COALESCE(d_hop2.d1_2_pv, 0) AS d1_2_pv
+            ,COALESCE(SIZE(d_hop2.d1_2_mids), 0) AS d1_2_uv
+            ,COALESCE(d_hop2_exp.d2_2_exp, 0) AS d2_2_exp
+            ,COALESCE(d_hop2.d2_2_pv, 0) AS d2_2_pv
+            ,COALESCE(SIZE(d_hop2.d2_2_mids), 0) AS d2_2_uv
+            ,COALESCE(d_hop2_exp.d3_2_exp, 0) AS d3_2_exp
+            ,COALESCE(d_hop2.d3_2_pv, 0) AS d3_2_pv
+            ,COALESCE(SIZE(d_hop2.d3_2_mids), 0) AS d3_2_uv
+            -- D hop3
+            ,COALESCE(d_hop3_exp.dn_3_exp, 0) AS dn_3_exp
+            ,COALESCE(d_hop3.dn_3_pv, 0) AS dn_3_pv
+            ,COALESCE(SIZE(d_hop3.dn_3_mids), 0) AS dn_3_uv
+            ,COALESCE(d_hop3_exp.d1_3_exp, 0) AS d1_3_exp
+            ,COALESCE(d_hop3.d1_3_pv, 0) AS d1_3_pv
+            ,COALESCE(SIZE(d_hop3.d1_3_mids), 0) AS d1_3_uv
+            ,COALESCE(d_hop3_exp.d2_3_exp, 0) AS d2_3_exp
+            ,COALESCE(d_hop3.d2_3_pv, 0) AS d2_3_pv
+            ,COALESCE(SIZE(d_hop3.d2_3_mids), 0) AS d2_3_uv
+            ,COALESCE(d_hop3_exp.d3_3_exp, 0) AS d3_3_exp
+            ,COALESCE(d_hop3.d3_3_pv, 0) AS d3_3_pv
+            ,COALESCE(SIZE(d_hop3.d3_3_mids), 0) AS d3_3_uv
+            ,JSON_FORMAT(
+                        JSON_OBJECT("animationSceneType",animationSceneType,"extParams",extParams,"rootsessionid",rootsessionid_new,"versioncode",versioncode,"group_name",tc.group_name)
+            ) AS extend
+            -- ========== mids 列表字段 (变长, 统一放末尾) ==========
+            ,return_1_mids
+            ,return_n_mids
+            ,ARRAY_JOIN(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid),",") AS return_1_mids_noself
+            ,ARRAY_JOIN(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid),",") AS return_n_mids_noself
+            ,CONCAT_WS(',', bn_hop.bn_mids) AS bn_mids
+            ,CONCAT_WS(',', bn_hop.b1_mids) AS b1_mids
+            ,CONCAT_WS(',', bn_hop.b2_mids) AS b2_mids
+            ,CONCAT_WS(',', bn_hop.b3_mids) AS b3_mids
+            ,CONCAT_WS(',', c_hop1.cn_1_mids) AS cn_1_mids
+            ,CONCAT_WS(',', c_hop1.c1_1_mids) AS c1_1_mids
+            ,CONCAT_WS(',', c_hop1.c2_1_mids) AS c2_1_mids
+            ,CONCAT_WS(',', c_hop1.c3_1_mids) AS c3_1_mids
+            ,CONCAT_WS(',', c_hop2.cn_2_mids) AS cn_2_mids
+            ,CONCAT_WS(',', c_hop2.c1_2_mids) AS c1_2_mids
+            ,CONCAT_WS(',', c_hop2.c2_2_mids) AS c2_2_mids
+            ,CONCAT_WS(',', c_hop2.c3_2_mids) AS c3_2_mids
+            ,CONCAT_WS(',', c_hop3.cn_3_mids) AS cn_3_mids
+            ,CONCAT_WS(',', c_hop3.c1_3_mids) AS c1_3_mids
+            ,CONCAT_WS(',', c_hop3.c2_3_mids) AS c2_3_mids
+            ,CONCAT_WS(',', c_hop3.c3_3_mids) AS c3_3_mids
+            ,CONCAT_WS(',', d_hop1.dn_1_mids) AS dn_1_mids
+            ,CONCAT_WS(',', d_hop1.d1_1_mids) AS d1_1_mids
+            ,CONCAT_WS(',', d_hop1.d2_1_mids) AS d2_1_mids
+            ,CONCAT_WS(',', d_hop1.d3_1_mids) AS d3_1_mids
+            ,CONCAT_WS(',', d_hop2.dn_2_mids) AS dn_2_mids
+            ,CONCAT_WS(',', d_hop2.d1_2_mids) AS d1_2_mids
+            ,CONCAT_WS(',', d_hop2.d2_2_mids) AS d2_2_mids
+            ,CONCAT_WS(',', d_hop2.d3_2_mids) AS d3_2_mids
+            ,CONCAT_WS(',', d_hop3.dn_3_mids) AS dn_3_mids
+            ,CONCAT_WS(',', d_hop3.d1_3_mids) AS d1_3_mids
+            ,CONCAT_WS(',', d_hop3.d2_3_mids) AS d2_3_mids
+            ,CONCAT_WS(',', d_hop3.d3_3_mids) AS d3_3_mids
+            ,SUBSTR(dthh,1,8) AS dt
+            ,SUBSTR(dthh,9,2) AS hh
+    FROM    t_exposure ta
+    LEFT JOIN t_share_with_label_group tb
+    ON      ta.id = tb.exposure_id
+    LEFT JOIN t_root_source_id_group_name tc
+    ON      ta.rootsourceid = tc.root_source_id
+    LEFT JOIN t_exposure_bn bn_hop
+    ON      ta.id = bn_hop.exposure_id
+    LEFT JOIN t_b_exp b_exp
+    ON      ta.id = b_exp.exposure_id
+    LEFT JOIN t_d0 d0_hop
+    ON      ta.id = d0_hop.exposure_id
+    LEFT JOIN t_c_hop1 c_hop1
+    ON      ta.id = c_hop1.exposure_id
+    LEFT JOIN t_c_hop1_exp c_hop1_exp
+    ON      ta.id = c_hop1_exp.exposure_id
+    LEFT JOIN t_c_hop2 c_hop2
+    ON      ta.id = c_hop2.exposure_id
+    LEFT JOIN t_c_hop2_exp c_hop2_exp
+    ON      ta.id = c_hop2_exp.exposure_id
+    LEFT JOIN t_c_hop3 c_hop3
+    ON      ta.id = c_hop3.exposure_id
+    LEFT JOIN t_c_hop3_exp c_hop3_exp
+    ON      ta.id = c_hop3_exp.exposure_id
+    LEFT JOIN t_d_hop1 d_hop1
+    ON      ta.id = d_hop1.exposure_id
+    LEFT JOIN t_d_hop1_exp d_hop1_exp
+    ON      ta.id = d_hop1_exp.exposure_id
+    LEFT JOIN t_d_hop2 d_hop2
+    ON      ta.id = d_hop2.exposure_id
+    LEFT JOIN t_d_hop2_exp d_hop2_exp
+    ON      ta.id = d_hop2_exp.exposure_id
+    LEFT JOIN t_d_hop3 d_hop3
+    ON      ta.id = d_hop3.exposure_id
+    LEFT JOIN t_d_hop3_exp d_hop3_exp
+    ON      ta.id = d_hop3_exp.exposure_id
+)SELECT  *
+FROM    t_exposure_share_return
+;

+ 202 - 0
table_gen/loghubods.dwd_recsys_alg_exposure_base_view_20250402.sql

@@ -0,0 +1,202 @@
+--@exclude_input=loghubods.video_action_log_flow_new
+-- =====================================================================
+-- 上游曝光去重视图表 (行级, 每行 = 一次去重后的曝光)
+-- 版本: 20250402 (最早版本, 纯曝光去重, 无分享/回流)
+-- LIFECYCLE 3 (仅保留 3 天, 作为下游 base 表的输入中间层)
+-- =====================================================================
+--
+-- 数据源: video_action_log_flow_new (businesstype=videoView)
+-- 过滤:   apptype NOT IN ('12'), mid/videoid IS NOT NULL
+--
+-- 去重逻辑:
+--   ROW_NUMBER OVER (PARTITION BY dthh,apptype,uid,mid,vid,sessionid,subsessionid,pagesource
+--                    ORDER BY logtimestamp) = 1
+--
+-- 输出拆分 (UNION ALL):
+--   非 share 页: pagesource NOT REGEXP "-pages/user-videos-share$", rn=1
+--   share 页:    pagesource REGEXP "-pages/user-videos-share$" (不过滤 rn, 保留全部)
+--
+-- id 生成: CONCAT(dthh, ":", subsessionid, ":", dthh_id)
+--   dthh_id = ROW_NUMBER OVER (PARTITION BY dthh,subsessionid ORDER BY clienttimestamp DESC)
+--
+-- 与 base_20260209 的关系:
+--   base_20260209.t_exposure CTE 复用了本表的去重逻辑
+--   本表仅输出曝光字段, base 表在此基础上 JOIN 分享/回流/B/C/D 链
+-- =====================================================================
+CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_base_view_20250402
+(
+        dthh_id string 
+        ,dthh string
+        ,apptype string
+        ,uid string
+        ,mid string
+        ,vid string
+        ,sessionid string
+        ,subsessionid string
+        ,rootsessionid_new string
+        ,pagesource string
+        ,recommendlogvo  string-- 推荐算法的返回结果日志存在这个字段中
+        ,abcode  string-- 推荐算法的ab分组
+        ,recommendpagetype string -- 三种回流头部;两种下滑-沉浸页下滑和feed下滑
+        ,recomtraceid string
+        ,headvideoid string
+        ,rootsourceid string
+        ,hotsencetype string
+        ,animationSceneType string
+        ,extParams string  
+        ,flowpool string-- 14#68#3#1735262438476#2
+        ,level string
+        ,clientip string
+        ,machineinfo_brand string
+        ,machineinfo_model string 
+        ,machineinfo_system string 
+        ,machineinfo_wechatversion string
+        ,machineinfo_sdkversion string
+        ,province string
+        ,city string
+        ,versioncode string
+        , ts string
+        ,rn string
+        ,id  string
+)
+PARTITIONED BY 
+(
+    dt                         STRING COMMENT '日期:20240105'
+    ,hh                        STRING COMMENT '小时:04'
+)
+STORED AS ALIORC
+TBLPROPERTIES ('columnar.nested.type' = 'true','comment' = '推荐算法-labelmatch表-上游曝光表')
+LIFECYCLE 3
+;
+SET hive.exec.dynamic.partition = true
+;
+
+SET hive.exec.dynamic.partition.mode = nonstrict
+;
+
+
+INSERT OVERWRITE TABLE loghubods.dwd_recsys_alg_exposure_base_view_20250402 PARTITION (dt,hh)
+
+
+WITH t_exposure_base AS 
+(
+    SELECT  ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),subsessionid ORDER BY clienttimestamp DESC ) AS dthh_id
+            ,CONCAT(year,month,day,hour) AS dthh
+            ,apptype
+            ,uid
+            ,mid
+            ,videoid AS vid
+            ,sessionid
+            ,subsessionid
+            ,rootsessionid_new
+            ,pagesource
+            ,recommendlogvo -- 推荐算法的返回结果日志存在这个字段中
+            ,COALESCE(GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003'),"unknown") AS abcode -- 推荐算法的ab分组
+            ,GET_JSON_OBJECT(extparams,'$.recommendPageType') AS recommendpagetype -- 三种回流头部;两种下滑-沉浸页下滑和feed下滑
+            ,GET_JSON_OBJECT(extparams,'$.recomTraceId') AS recomtraceid
+            ,CASE   WHEN GET_JSON_OBJECT(extParams,'$.head_videoid') IS NOT NULL THEN GET_JSON_OBJECT(extParams,'$.head_videoid')
+                    ELSE GET_JSON_OBJECT(extParams,'$.head_videoId')
+            END AS headvideoid
+            ,GET_JSON_OBJECT(extParams,'$.rootSourceId') AS rootsourceid
+            ,COALESCE(hotsencetype,sencetype,"other") AS hotsencetype
+            ,GET_JSON_OBJECT(extParams,'$.animationSceneType') AS animationSceneType
+            ,extParams AS extParams
+            ,flowpool -- 14#68#3#1735262438476#2
+            ,SPLIT(flowpool,'#')[2] AS level
+            ,clientip
+            ,machineinfo_brand
+            ,machineinfo_model
+            ,machineinfo_system
+            ,machineinfo_wechatversion
+            ,machineinfo_sdkversion
+            ,ANALYSISIP(clientip,"region") AS province
+            ,ANALYSISIP(clientip,"city") AS city
+            ,versioncode
+            ,CAST(logtimestamp / 1000 AS BIGINT) AS ts
+            ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,uid,mid,videoid,sessionid,subsessionid,pagesource ORDER BY logtimestamp ) AS rn
+            ,CONCAT(year,month,day) AS dt
+            ,hour AS hh
+    FROM    loghubods.video_action_log_flow_new
+    WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') --WHERE   CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+    AND     businesstype IN ('videoView')
+    AND     apptype IS NOT NULL
+    AND     apptype NOT IN ('12')
+    AND     mid IS NOT NULL
+    AND     videoid IS NOT NULL
+)
+SELECT  dthh_id
+        ,dthh
+        ,apptype
+        ,uid
+        ,mid
+        ,vid
+        ,sessionid
+        ,subsessionid
+        ,rootsessionid_new
+        ,pagesource
+        ,recommendlogvo
+        ,abcode
+        ,recommendpagetype
+        ,recomtraceid
+        ,headvideoid
+        ,rootsourceid
+        ,hotsencetype
+        ,animationscenetype
+        ,extparams
+        ,flowpool
+        ,level
+        ,clientip
+        ,machineinfo_brand
+        ,machineinfo_model
+        ,machineinfo_system
+        ,machineinfo_wechatversion
+        ,machineinfo_sdkversion
+        ,province
+        ,city
+        ,versioncode
+        ,ts
+        ,rn
+        ,CONCAT(dthh,":",subsessionid,":",dthh_id) AS id
+        ,dt
+        ,hh
+FROM    t_exposure_base
+WHERE   pagesource NOT REGEXP "-pages/user-videos-share$"
+AND     rn = 1
+UNION ALL
+SELECT  dthh_id
+        ,dthh
+        ,apptype
+        ,uid
+        ,mid
+        ,vid
+        ,sessionid
+        ,subsessionid
+        ,rootsessionid_new
+        ,pagesource
+        ,recommendlogvo
+        ,abcode
+        ,recommendpagetype
+        ,recomtraceid
+        ,headvideoid
+        ,rootsourceid
+        ,hotsencetype
+        ,animationscenetype
+        ,extparams
+        ,flowpool
+        ,level
+        ,clientip
+        ,machineinfo_brand
+        ,machineinfo_model
+        ,machineinfo_system
+        ,machineinfo_wechatversion
+        ,machineinfo_sdkversion
+        ,province
+        ,city
+        ,versioncode
+        ,ts
+        ,rn
+        ,CONCAT(dthh,":",subsessionid,":",dthh_id) AS id
+        ,dt
+        ,hh
+FROM    t_exposure_base
+WHERE   pagesource REGEXP "-pages/user-videos-share$"

+ 194 - 0
table_gen/test_1.sql

@@ -0,0 +1,194 @@
+WITH
+-- 用户拉活量分层
+t_user_type AS (
+    SELECT  DISTINCT type, openid
+    FROM    loghubods.mid_share_return_people_1year
+    WHERE   dt = TO_CHAR(DATEADD(TO_DATE('${dt}','YYYYMMDD'),-1,'dd'),'YYYYMMDD')
+    AND     type IS NOT NULL
+    AND     type != 'S_ALL'
+    AND     type NOT REGEXP 'R50'
+)
+-- 模型预估分数
+,t_score AS (
+    SELECT  apptype
+            ,videoid
+            ,recommendtraceid
+            ,scoresmap
+            ,sortscore
+    FROM    (
+                SELECT  apptype
+                        ,videoid
+                        ,recommendtraceid
+                        ,scoresmap
+                        ,sortscore
+                        ,ROW_NUMBER() OVER (PARTITION BY apptype,videoid,recommendtraceid) AS rn
+                FROM    loghubods.statistics_log_hour
+                WHERE   dt = '${dt}'
+                AND     scoresmap IS NOT NULL
+            )
+    WHERE   rn = 1
+)
+-- 宽表
+,t_wide AS (
+    SELECT  base.*
+            ,CASE WHEN e.type IS NULL OR e.type = 'R_0'                       THEN 'R0&新用户'
+                  WHEN e.type IN ('R_1','R_2_10','R_10_50')                    THEN 'R1-50'
+                  WHEN e.type IN ('R_50_100','R_100_180','R_180_330')           THEN 'R_180_330'
+                  ELSE e.type
+             END AS user_type
+            ,vt_head.merge_leve2  AS head_merge_leve2
+            ,vt_vid.merge_leve2   AS vid_merge_leve2
+            ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
+            ,1.22*POW(CAST(GET_JSON_OBJECT(e1.scoresmap,'$.NorXGBScore') AS DOUBLE),1.15) AS rosn_pred
+            ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_pred_origin
+            ,e1.sortscore
+            ,CASE
+                WHEN CAST(hh AS INT) BETWEEN 0  AND 3  THEN '00-03'
+                WHEN CAST(hh AS INT) BETWEEN 4  AND 7  THEN '04-07'
+                WHEN CAST(hh AS INT) BETWEEN 8  AND 11 THEN '08-11'
+                WHEN CAST(hh AS INT) BETWEEN 12 AND 15 THEN '12-15'
+                WHEN CAST(hh AS INT) BETWEEN 16 AND 19 THEN '16-19'
+                WHEN CAST(hh AS INT) BETWEEN 20 AND 23 THEN '20-23'
+                ELSE '-'
+             END AS hh_bucket
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    LEFT JOIN t_user_type e
+    ON      SUBSTRING_INDEX(base.mid,'weixin_openid_',-1) = e.openid
+    LEFT JOIN loghubods.video_merge_tag vt_head
+    ON      base.headvideoid = vt_head.videoid
+    LEFT JOIN loghubods.video_merge_tag vt_vid
+    ON      base.vid = vt_vid.videoid
+    LEFT JOIN t_score e1
+    ON      base.apptype = e1.apptype
+    AND     base.vid = e1.videoid
+    AND     base.recomtraceid = e1.recommendtraceid
+    WHERE   base.dt = '${dt}'
+)
+
+SELECT
+        -- ==================== 维度列 ====================
+        CASE WHEN GROUPING(user_type) = 1 THEN 'SUM' ELSE NVL(user_type, 'SUM') END              AS user_type
+        ,CASE WHEN GROUPING(hh_bucket) = 1 THEN 'SUM' ELSE NVL(hh_bucket, 'SUM') END              AS hh_bucket
+        ,CASE WHEN GROUPING(head_merge_leve2) = 1 THEN 'SUM' ELSE NVL(head_merge_leve2, 'SUM') END AS head_merge_leve2
+        ,CASE WHEN GROUPING(vid_merge_leve2) = 1 THEN 'SUM' ELSE NVL(vid_merge_leve2, 'SUM') END   AS vid_merge_leve2
+
+        -- ==================== 基础流量 ====================
+        ,COUNT(1)                                                                       AS exposure_cnt
+        ,COUNT(DISTINCT mid)                                                            AS exposure_uv
+        ,COUNT(DISTINCT vid)                                                            AS vid_cnt
+        ,ROUND(COUNT(1) / COUNT(DISTINCT mid), 4)                                      AS exposure_per_user
+
+        -- ==================== 分享 ====================
+        ,SUM(CAST(is_share AS BIGINT))                                                  AS share_exposure_cnt
+        ,SUM(CAST(share_cnt AS BIGINT))                                                 AS share_cnt
+
+        -- ==================== STR 指标 ====================
+        ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6)  AS str_real
+        ,ROUND(COALESCE(SUM(str_pred) / NULLIF(COUNT(1), 0), 0), 6)                          AS str_pred
+        ,ROUND(
+            (SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
+            / NULLIF(SUM(str_pred) / NULLIF(COUNT(1), 0), 0)
+        , 4)                                                                                   AS str_copc
+        ,ROUND(AVG(ABS(str_pred - CAST(is_return_noself AS BIGINT))), 6)                      AS str_mae
+        ,ROUND(VARIANCE(str_pred - CAST(is_return_noself AS BIGINT)), 6)                      AS str_var
+
+        -- ==================== ROSN 指标 ====================
+        ,ROUND(COALESCE(
+            SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0)
+        , 0), 6)                                                                               AS rosn_real
+        ,ROUND(COALESCE(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0), 6)  AS rosn_pred
+        ,ROUND(
+            (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0))
+            / NULLIF(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0)
+        , 4)                                                                                   AS rosn_copc
+        ,ROUND(AVG(rosn_pred_origin), 6)                                                      AS rosn_pred_origin
+        ,ROUND(AVG(
+            CASE WHEN CAST(is_return_noself AS BIGINT) = 1
+                 THEN ABS(rosn_pred - CAST(return_n_uv_noself AS BIGINT))
+            END
+        ), 6)                                                                                  AS rosn_mae
+        ,ROUND(VARIANCE(
+            CASE WHEN CAST(is_return_noself AS BIGINT) = 1
+                 THEN rosn_pred - CAST(return_n_uv_noself AS BIGINT)
+            END
+        ), 6)                                                                                  AS rosn_var
+
+        -- ==================== ROVN 指标 ====================
+        ,ROUND(COALESCE(SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6)  AS rovn
+        ,ROUND(AVG(str_pred * rosn_pred), 6)                                                    AS rovn_pred
+        ,ROUND(
+            (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
+            / NULLIF(AVG(str_pred * rosn_pred), 0)
+        , 4)                                                                                     AS rovn_copc
+        ,ROUND(AVG(ABS(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT))), 6)            AS rovn_mae
+        ,ROUND(VARIANCE(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT)), 6)        AS rovn_var
+        ,ROUND(AVG(CAST(sortscore AS DOUBLE)), 6)                                               AS sortscore_avg
+
+        -- ==================== B链 ====================
+        ,SUM(CAST(bn_exp AS BIGINT))                                                    AS bn_exp
+        ,SUM(CAST(bn_pv AS BIGINT))                                                     AS bn_pv
+        ,SUM(CAST(bn_uv AS BIGINT))                                                     AS bn_uv
+        ,SUM(CAST(b1_exp AS BIGINT))                                                    AS b1_exp
+        ,SUM(CAST(b1_pv AS BIGINT))                                                     AS b1_pv
+        ,SUM(CAST(b1_uv AS BIGINT))                                                     AS b1_uv
+        ,SUM(CAST(b2_exp AS BIGINT))                                                    AS b2_exp
+        ,SUM(CAST(b2_pv AS BIGINT))                                                     AS b2_pv
+        ,SUM(CAST(b2_uv AS BIGINT))                                                     AS b2_uv
+        ,SUM(CAST(b3_exp AS BIGINT))                                                    AS b3_exp
+        ,SUM(CAST(b3_pv AS BIGINT))                                                     AS b3_pv
+        ,SUM(CAST(b3_uv AS BIGINT))                                                     AS b3_uv
+
+        -- ==================== C链 (全量depth, 按hop) ====================
+        ,SUM(CAST(cn_1_exp AS BIGINT))                                                  AS cn_1_exp
+        ,SUM(CAST(cn_1_pv AS BIGINT))                                                   AS cn_1_pv
+        ,SUM(CAST(cn_1_uv AS BIGINT))                                                   AS cn_1_uv
+        ,SUM(CAST(cn_2_exp AS BIGINT))                                                  AS cn_2_exp
+        ,SUM(CAST(cn_2_pv AS BIGINT))                                                   AS cn_2_pv
+        ,SUM(CAST(cn_2_uv AS BIGINT))                                                   AS cn_2_uv
+        ,SUM(CAST(cn_3_exp AS BIGINT))                                                  AS cn_3_exp
+        ,SUM(CAST(cn_3_pv AS BIGINT))                                                   AS cn_3_pv
+        ,SUM(CAST(cn_3_uv AS BIGINT))                                                   AS cn_3_uv
+        ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))  AS cn_total_uv
+
+        -- ==================== D链 (全量depth, 按hop) ====================
+        ,SUM(CAST(d0 AS BIGINT))                                                        AS d0
+        ,SUM(CAST(dn_1_exp AS BIGINT))                                                  AS dn_1_exp
+        ,SUM(CAST(dn_1_pv AS BIGINT))                                                   AS dn_1_pv
+        ,SUM(CAST(dn_1_uv AS BIGINT))                                                   AS dn_1_uv
+        ,SUM(CAST(dn_2_exp AS BIGINT))                                                  AS dn_2_exp
+        ,SUM(CAST(dn_2_pv AS BIGINT))                                                   AS dn_2_pv
+        ,SUM(CAST(dn_2_uv AS BIGINT))                                                   AS dn_2_uv
+        ,SUM(CAST(dn_3_exp AS BIGINT))                                                  AS dn_3_exp
+        ,SUM(CAST(dn_3_pv AS BIGINT))                                                   AS dn_3_pv
+        ,SUM(CAST(dn_3_uv AS BIGINT))                                                   AS dn_3_uv
+        ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))  AS dn_total_uv
+
+        -- ==================== 全链路 ====================
+        ,SUM(CAST(return_n_uv_noself AS BIGINT))
+            + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+            + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+                                                                                        AS all_return_n_uv
+        ,ROUND(COALESCE(
+            (   SUM(CAST(return_n_uv_noself AS BIGINT))
+              + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+              + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+            ) / NULLIF(COUNT(1), 0)
+        , 0), 6)                                                                        AS all_rovn
+
+
+-- =====================================================================
+-- FROM + GROUP BY CUBE
+-- =====================================================================
+
+FROM    t_wide
+
+GROUP BY CUBE(
+            user_type
+            ,hh_bucket
+            ,head_merge_leve2
+            ,vid_merge_leve2
+        )
+
+ORDER BY exposure_cnt DESC
+limit 1000
+;

+ 300 - 0
table_gen/test_2.sql

@@ -0,0 +1,300 @@
+WITH
+-- 用户拉活量分层
+t_user_type AS (
+    SELECT  DISTINCT type, openid
+    FROM    loghubods.mid_share_return_people_1year
+    WHERE   dt = TO_CHAR(DATEADD(TO_DATE('${dt}','YYYYMMDD'),-1,'dd'),'YYYYMMDD')
+    AND     type IS NOT NULL
+    AND     type != 'S_ALL'
+    AND     type NOT REGEXP 'R50'
+)
+-- 模型预估分数
+,t_score AS (
+    SELECT  apptype
+            ,videoid
+            ,recommendtraceid
+            ,scoresmap
+            ,sortscore
+    FROM    (
+                SELECT  apptype
+                        ,videoid
+                        ,recommendtraceid
+                        ,scoresmap
+                        ,sortscore
+                        ,ROW_NUMBER() OVER (PARTITION BY apptype,videoid,recommendtraceid) AS rn
+                FROM    loghubods.statistics_log_hour
+                WHERE   dt LIKE '${dt}%'
+                AND     scoresmap IS NOT NULL
+            )
+    WHERE   rn = 1
+)
+-- TOP1 进入内容品类(场): 按回流人数取 TOP1
+,t_top_head_cate AS (
+    SELECT  vt.merge_leve2
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    JOIN    loghubods.video_merge_tag vt ON base.headvideoid = vt.videoid
+    WHERE   base.dt = '${dt}'
+    AND     vt.merge_leve2 IS NOT NULL
+    GROUP BY vt.merge_leve2
+    ORDER BY SUM(CAST(is_return_noself AS BIGINT)) DESC
+    LIMIT   1
+)
+-- TOP10 推荐内容品类(货): 按曝光次数取 TOP10
+,t_top_vid_cate AS (
+    SELECT  vt.merge_leve2
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    JOIN    loghubods.video_merge_tag vt ON base.vid = vt.videoid
+    WHERE   base.dt = '${dt}'
+    AND     vt.merge_leve2 IS NOT NULL
+    GROUP BY vt.merge_leve2
+    ORDER BY COUNT(1) DESC
+    LIMIT   10
+)
+-- TOP1 内容id(货): 每品类按曝光次数取 TOP1 (曝光>10w)
+,t_top_vid AS (
+    SELECT  merge_leve2, vid
+    FROM    (
+                SELECT  vt.merge_leve2
+                        ,base.vid
+                        ,COUNT(1)                                                               AS exp_cnt
+                        ,ROW_NUMBER() OVER (PARTITION BY vt.merge_leve2 ORDER BY COUNT(1) DESC) AS rk
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+                JOIN    loghubods.video_merge_tag vt ON base.vid = vt.videoid
+                WHERE   base.dt = '${dt}'
+                AND     vt.merge_leve2 IS NOT NULL
+                GROUP BY vt.merge_leve2, base.vid
+                HAVING  exp_cnt > 100000
+            )
+    WHERE   rk <= 1
+)
+-- 宽表
+,t_wide AS (
+    SELECT  base.*
+            ,CASE WHEN e.type IS NULL OR e.type = 'R_0'                       THEN 'R0&新用户'
+                  WHEN e.type IN ('R_1','R_2_10','R_10_50')                    THEN 'R1-50'
+                  WHEN e.type IN ('R_50_100','R_100_180','R_180_330')           THEN 'R_180_330'
+                  ELSE e.type
+             END AS user_type
+            ,CASE WHEN th.merge_leve2 IS NOT NULL THEN vt_head.merge_leve2 ELSE '其他' END AS head_merge_leve2
+            ,CASE WHEN tv.merge_leve2 IS NOT NULL THEN vt_vid.merge_leve2  ELSE '其他' END AS vid_merge_leve2
+            ,CASE WHEN ti.vid IS NOT NULL          THEN base.vid           ELSE '其他' END AS vid_id
+            ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
+            ,1.22*POW(CAST(GET_JSON_OBJECT(e1.scoresmap,'$.NorXGBScore') AS DOUBLE),1.15) AS rosn_pred
+            ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_pred_origin
+            ,e1.sortscore
+            ,CASE
+                WHEN CAST(hh AS INT) BETWEEN 0  AND 3  THEN '00-03'
+                WHEN CAST(hh AS INT) BETWEEN 4  AND 7  THEN '04-07'
+                WHEN CAST(hh AS INT) BETWEEN 8  AND 11 THEN '08-11'
+                WHEN CAST(hh AS INT) BETWEEN 12 AND 15 THEN '12-15'
+                WHEN CAST(hh AS INT) BETWEEN 16 AND 19 THEN '16-19'
+                WHEN CAST(hh AS INT) BETWEEN 20 AND 23 THEN '20-23'
+                ELSE '-'
+             END AS hh_bucket
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    LEFT JOIN t_user_type e
+    ON      SUBSTRING_INDEX(base.mid,'weixin_openid_',-1) = e.openid
+    LEFT JOIN loghubods.video_merge_tag vt_head
+    ON      base.headvideoid = vt_head.videoid
+    LEFT JOIN loghubods.video_merge_tag vt_vid
+    ON      base.vid = vt_vid.videoid
+    LEFT JOIN t_score e1
+    ON      base.apptype = e1.apptype
+    AND     base.vid = e1.videoid
+    AND     base.recomtraceid = e1.recommendtraceid
+    LEFT JOIN t_top_head_cate th
+    ON      vt_head.merge_leve2 = th.merge_leve2
+    LEFT JOIN t_top_vid_cate tv
+    ON      vt_vid.merge_leve2 = tv.merge_leve2
+    LEFT JOIN t_top_vid ti
+    ON      base.vid = ti.vid
+    WHERE   base.dt = '${dt}'
+)
+
+SELECT
+        -- ==================== 维度列 ====================
+        '${dt}'                                                                                     AS dt
+        ,CASE WHEN GROUPING(user_type) = 1 THEN 'SUM' ELSE NVL(user_type, 'SUM') END              AS user_type
+        ,CASE WHEN GROUPING(hh_bucket) = 1 THEN 'SUM' ELSE NVL(hh_bucket, 'SUM') END              AS hh_bucket
+        ,CASE WHEN GROUPING(head_merge_leve2) = 1 THEN 'SUM' ELSE NVL(head_merge_leve2, 'SUM') END AS head_merge_leve2
+        ,CASE WHEN GROUPING(vid_merge_leve2) = 1 THEN 'SUM' ELSE NVL(vid_merge_leve2, 'SUM') END   AS vid_merge_leve2
+        ,CASE WHEN GROUPING(vid_id) = 1 THEN 'SUM' ELSE NVL(vid_id, 'SUM') END                     AS vid_id
+
+        -- ==================== 基础流量 ====================
+        ,COUNT(1)                                                                       AS exposure_cnt
+        ,COUNT(DISTINCT mid)                                                            AS exposure_uv
+        ,COUNT(DISTINCT vid)                                                            AS vid_cnt
+        ,ROUND(COUNT(1) / COUNT(DISTINCT mid), 4)                                      AS exposure_per_user
+
+        -- ==================== 分享 ====================
+        ,SUM(CAST(is_share AS BIGINT))                                                  AS share_exposure_cnt
+        ,SUM(CAST(share_cnt AS BIGINT))                                                 AS share_cnt
+
+        -- ==================== STR 指标 ====================
+        ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6)  AS str_real
+        ,ROUND(COALESCE(SUM(str_pred) / NULLIF(COUNT(1), 0), 0), 6)                          AS str_pred
+        ,ROUND(
+            (SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
+            / NULLIF(SUM(str_pred) / NULLIF(COUNT(1), 0), 0)
+        , 4)                                                                                   AS str_copc
+        ,ROUND(AVG(ABS(str_pred - CAST(is_return_noself AS BIGINT))), 6)                      AS str_mae
+        ,ROUND(VARIANCE(str_pred - CAST(is_return_noself AS BIGINT)), 6)                      AS str_var
+
+        -- ==================== ROSN 指标 ====================
+        ,ROUND(COALESCE(
+            SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0)
+        , 0), 6)                                                                               AS rosn_real
+        ,ROUND(COALESCE(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0), 6)  AS rosn_pred
+        ,ROUND(
+            (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0))
+            / NULLIF(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0)
+        , 4)                                                                                   AS rosn_copc
+        ,ROUND(AVG(rosn_pred_origin), 6)                                                      AS rosn_pred_origin
+        ,ROUND(AVG(
+            CASE WHEN CAST(is_return_noself AS BIGINT) = 1
+                 THEN ABS(rosn_pred - CAST(return_n_uv_noself AS BIGINT))
+            END
+        ), 6)                                                                                  AS rosn_mae
+        ,ROUND(VARIANCE(
+            CASE WHEN CAST(is_return_noself AS BIGINT) = 1
+                 THEN rosn_pred - CAST(return_n_uv_noself AS BIGINT)
+            END
+        ), 6)                                                                                  AS rosn_var
+
+        -- ==================== ROVN 指标 ====================
+        ,ROUND(COALESCE(SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6)  AS rovn
+        ,ROUND(AVG(str_pred * rosn_pred), 6)                                                    AS rovn_pred
+        ,ROUND(
+            (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
+            / NULLIF(AVG(str_pred * rosn_pred), 0)
+        , 4)                                                                                     AS rovn_copc
+        ,ROUND(AVG(ABS(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT))), 6)            AS rovn_mae
+        ,ROUND(VARIANCE(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT)), 6)        AS rovn_var
+        ,ROUND(AVG(CAST(sortscore AS DOUBLE)), 6)                                               AS sortscore_avg
+
+        -- ==================== B链 ====================
+        ,SUM(CAST(bn_exp AS BIGINT))                                                    AS bn_exp
+        ,SUM(CAST(bn_pv AS BIGINT))                                                     AS bn_pv
+        ,SUM(CAST(bn_uv AS BIGINT))                                                     AS bn_uv
+        ,SUM(CAST(b1_exp AS BIGINT))                                                    AS b1_exp
+        ,SUM(CAST(b1_pv AS BIGINT))                                                     AS b1_pv
+        ,SUM(CAST(b1_uv AS BIGINT))                                                     AS b1_uv
+        ,SUM(CAST(b2_exp AS BIGINT))                                                    AS b2_exp
+        ,SUM(CAST(b2_pv AS BIGINT))                                                     AS b2_pv
+        ,SUM(CAST(b2_uv AS BIGINT))                                                     AS b2_uv
+        ,SUM(CAST(b3_exp AS BIGINT))                                                    AS b3_exp
+        ,SUM(CAST(b3_pv AS BIGINT))                                                     AS b3_pv
+        ,SUM(CAST(b3_uv AS BIGINT))                                                     AS b3_uv
+
+        -- ==================== C链 (全量depth, 按hop) ====================
+        ,SUM(CAST(cn_1_exp AS BIGINT))                                                  AS cn_1_exp
+        ,SUM(CAST(cn_1_pv AS BIGINT))                                                   AS cn_1_pv
+        ,SUM(CAST(cn_1_uv AS BIGINT))                                                   AS cn_1_uv
+        ,SUM(CAST(cn_2_exp AS BIGINT))                                                  AS cn_2_exp
+        ,SUM(CAST(cn_2_pv AS BIGINT))                                                   AS cn_2_pv
+        ,SUM(CAST(cn_2_uv AS BIGINT))                                                   AS cn_2_uv
+        ,SUM(CAST(cn_3_exp AS BIGINT))                                                  AS cn_3_exp
+        ,SUM(CAST(cn_3_pv AS BIGINT))                                                   AS cn_3_pv
+        ,SUM(CAST(cn_3_uv AS BIGINT))                                                   AS cn_3_uv
+        ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))  AS cn_total_uv
+
+        -- ==================== C链 (按depth拆分) ====================
+        ,SUM(CAST(c1_1_exp AS BIGINT))                                                    AS c1_1_exp
+        ,SUM(CAST(c1_1_pv AS BIGINT))                                                     AS c1_1_pv
+        ,SUM(CAST(c1_1_uv AS BIGINT))                                                     AS c1_1_uv
+        ,SUM(CAST(c1_2_exp AS BIGINT))                                                    AS c1_2_exp
+        ,SUM(CAST(c1_2_pv AS BIGINT))                                                     AS c1_2_pv
+        ,SUM(CAST(c1_2_uv AS BIGINT))                                                     AS c1_2_uv
+        ,SUM(CAST(c1_3_exp AS BIGINT))                                                    AS c1_3_exp
+        ,SUM(CAST(c1_3_pv AS BIGINT))                                                     AS c1_3_pv
+        ,SUM(CAST(c1_3_uv AS BIGINT))                                                     AS c1_3_uv
+        ,SUM(CAST(c2_1_exp AS BIGINT))                                                    AS c2_1_exp
+        ,SUM(CAST(c2_1_pv AS BIGINT))                                                     AS c2_1_pv
+        ,SUM(CAST(c2_1_uv AS BIGINT))                                                     AS c2_1_uv
+        ,SUM(CAST(c2_2_exp AS BIGINT))                                                    AS c2_2_exp
+        ,SUM(CAST(c2_2_pv AS BIGINT))                                                     AS c2_2_pv
+        ,SUM(CAST(c2_2_uv AS BIGINT))                                                     AS c2_2_uv
+        ,SUM(CAST(c2_3_exp AS BIGINT))                                                    AS c2_3_exp
+        ,SUM(CAST(c2_3_pv AS BIGINT))                                                     AS c2_3_pv
+        ,SUM(CAST(c2_3_uv AS BIGINT))                                                     AS c2_3_uv
+        ,SUM(CAST(c3_1_exp AS BIGINT))                                                    AS c3_1_exp
+        ,SUM(CAST(c3_1_pv AS BIGINT))                                                     AS c3_1_pv
+        ,SUM(CAST(c3_1_uv AS BIGINT))                                                     AS c3_1_uv
+        ,SUM(CAST(c3_2_exp AS BIGINT))                                                    AS c3_2_exp
+        ,SUM(CAST(c3_2_pv AS BIGINT))                                                     AS c3_2_pv
+        ,SUM(CAST(c3_2_uv AS BIGINT))                                                     AS c3_2_uv
+        ,SUM(CAST(c3_3_exp AS BIGINT))                                                    AS c3_3_exp
+        ,SUM(CAST(c3_3_pv AS BIGINT))                                                     AS c3_3_pv
+        ,SUM(CAST(c3_3_uv AS BIGINT))                                                     AS c3_3_uv
+
+        -- ==================== D链 (全量depth, 按hop) ====================
+        ,SUM(CAST(d0 AS BIGINT))                                                        AS d0
+        ,SUM(CAST(dn_1_exp AS BIGINT))                                                  AS dn_1_exp
+        ,SUM(CAST(dn_1_pv AS BIGINT))                                                   AS dn_1_pv
+        ,SUM(CAST(dn_1_uv AS BIGINT))                                                   AS dn_1_uv
+        ,SUM(CAST(dn_2_exp AS BIGINT))                                                  AS dn_2_exp
+        ,SUM(CAST(dn_2_pv AS BIGINT))                                                   AS dn_2_pv
+        ,SUM(CAST(dn_2_uv AS BIGINT))                                                   AS dn_2_uv
+        ,SUM(CAST(dn_3_exp AS BIGINT))                                                  AS dn_3_exp
+        ,SUM(CAST(dn_3_pv AS BIGINT))                                                   AS dn_3_pv
+        ,SUM(CAST(dn_3_uv AS BIGINT))                                                   AS dn_3_uv
+        ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))  AS dn_total_uv
+
+        -- ==================== D链 (按depth拆分) ====================
+        ,SUM(CAST(d1_1_exp AS BIGINT))                                                    AS d1_1_exp
+        ,SUM(CAST(d1_1_pv AS BIGINT))                                                     AS d1_1_pv
+        ,SUM(CAST(d1_1_uv AS BIGINT))                                                     AS d1_1_uv
+        ,SUM(CAST(d1_2_exp AS BIGINT))                                                    AS d1_2_exp
+        ,SUM(CAST(d1_2_pv AS BIGINT))                                                     AS d1_2_pv
+        ,SUM(CAST(d1_2_uv AS BIGINT))                                                     AS d1_2_uv
+        ,SUM(CAST(d1_3_exp AS BIGINT))                                                    AS d1_3_exp
+        ,SUM(CAST(d1_3_pv AS BIGINT))                                                     AS d1_3_pv
+        ,SUM(CAST(d1_3_uv AS BIGINT))                                                     AS d1_3_uv
+        ,SUM(CAST(d2_1_exp AS BIGINT))                                                    AS d2_1_exp
+        ,SUM(CAST(d2_1_pv AS BIGINT))                                                     AS d2_1_pv
+        ,SUM(CAST(d2_1_uv AS BIGINT))                                                     AS d2_1_uv
+        ,SUM(CAST(d2_2_exp AS BIGINT))                                                    AS d2_2_exp
+        ,SUM(CAST(d2_2_pv AS BIGINT))                                                     AS d2_2_pv
+        ,SUM(CAST(d2_2_uv AS BIGINT))                                                     AS d2_2_uv
+        ,SUM(CAST(d2_3_exp AS BIGINT))                                                    AS d2_3_exp
+        ,SUM(CAST(d2_3_pv AS BIGINT))                                                     AS d2_3_pv
+        ,SUM(CAST(d2_3_uv AS BIGINT))                                                     AS d2_3_uv
+        ,SUM(CAST(d3_1_exp AS BIGINT))                                                    AS d3_1_exp
+        ,SUM(CAST(d3_1_pv AS BIGINT))                                                     AS d3_1_pv
+        ,SUM(CAST(d3_1_uv AS BIGINT))                                                     AS d3_1_uv
+        ,SUM(CAST(d3_2_exp AS BIGINT))                                                    AS d3_2_exp
+        ,SUM(CAST(d3_2_pv AS BIGINT))                                                     AS d3_2_pv
+        ,SUM(CAST(d3_2_uv AS BIGINT))                                                     AS d3_2_uv
+        ,SUM(CAST(d3_3_exp AS BIGINT))                                                    AS d3_3_exp
+        ,SUM(CAST(d3_3_pv AS BIGINT))                                                     AS d3_3_pv
+        ,SUM(CAST(d3_3_uv AS BIGINT))                                                     AS d3_3_uv
+
+        -- ==================== 全链路 ====================
+        ,SUM(CAST(return_n_uv_noself AS BIGINT))
+            + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+            + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+                                                                                        AS all_return_n_uv
+        ,ROUND(COALESCE(
+            (   SUM(CAST(return_n_uv_noself AS BIGINT))
+              + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+              + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+            ) / NULLIF(COUNT(1), 0)
+        , 0), 6)                                                                        AS all_rovn
+
+
+-- =====================================================================
+-- FROM + GROUP BY CUBE
+-- =====================================================================
+
+FROM    t_wide
+
+GROUP BY CUBE(
+            user_type
+            ,hh_bucket
+            ,head_merge_leve2
+            ,vid_merge_leve2
+            ,vid_id
+        )
+
+ORDER BY exposure_cnt DESC
+;

+ 601 - 0
table_gen/test_3.sql

@@ -0,0 +1,601 @@
+-- =====================================================================
+-- 曝光回流链路 CUBE 聚合表 (基于 wide 宽表, 增加用户/品类维度 + 模型预估指标)
+-- 维度: user_type × hh_bucket × head_merge_leve2 × vid_merge_leve2 × vid_id (CUBE)
+-- 参考: de.sql + dwd_recsys_alg_exposure_agg_20260209
+-- =====================================================================
+
+-- -- DROP TABLE IF EXISTS loghubods.dwd_recsys_alg_exposure_agg_wide_20260209;
+-- CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_agg_wide_20260209 (
+--     -- ==================== 维度列 ====================
+--     dt                         STRING    COMMENT '日期'
+--     ,user_type                  STRING    COMMENT '用户拉活量分层(R0&新用户/R1-50/R_180_330等,汇总为SUM)'
+--     ,hh_bucket                STRING    COMMENT '小时段(00-03/04-07/.../20-23,汇总为SUM)'
+--     ,head_merge_leve2         STRING    COMMENT '进入内容品类(headvideoid品类,汇总为SUM)'
+--     ,vid_merge_leve2          STRING    COMMENT '推荐内容品类(vid品类,TOP10曝光+其他,汇总为SUM)'
+--     ,vid_id                   STRING    COMMENT '内容id(品类曝光TOP1+其他,汇总为SUM)'
+
+--     -- ==================== 基础流量 ====================
+--     ,exposure_cnt             BIGINT    COMMENT '曝光次数'
+--     ,exposure_uv              BIGINT    COMMENT '曝光人数(mid去重)'
+--     ,vid_cnt                  BIGINT    COMMENT '视频个数(vid去重)'
+--     ,exposure_per_user        DOUBLE    COMMENT '人均曝光次数 = 曝光次数/曝光人数'
+
+--     -- ==================== 分享 ====================
+--     ,share_exposure_cnt       BIGINT    COMMENT '产生分享的曝光数'
+--     ,share_cnt                BIGINT    COMMENT '分享总次数'
+
+--     -- ==================== STR 指标 ====================
+--     ,str_real                 DOUBLE    COMMENT 'STR实际 = is_return_noself/曝光次数'
+--     ,str_pred                 DOUBLE    COMMENT 'STR预估 = SUM(str_pred)/曝光次数'
+--     ,str_copc                 DOUBLE    COMMENT 'STR copc = str_real/str_pred'
+--     ,str_mae                  DOUBLE    COMMENT 'STR MAE = AVG(ABS(str_pred - is_return_noself))'
+--     ,str_var                  DOUBLE    COMMENT 'STR VAR = VARIANCE(str_pred - is_return_noself)'
+
+--     -- ==================== ROSN 指标 ====================
+--     ,rosn_real                DOUBLE    COMMENT 'ROSN实际 = return_n_uv_noself/is_return_noself'
+--     ,rosn_pred                DOUBLE    COMMENT 'ROSN预估 = SUM(rosn_pred)/is_return_noself'
+--     ,rosn_copc                DOUBLE    COMMENT 'ROSN copc = rosn_real/rosn_pred'
+--     ,rosn_pred_origin         DOUBLE    COMMENT 'ROSN原始预估均值 = AVG(rosn_pred_origin)'
+--     ,rosn_mae                 DOUBLE    COMMENT 'ROSN MAE = AVG(ABS(rosn_pred - return_n_uv_noself)) WHERE is_return_noself=1'
+--     ,rosn_var                 DOUBLE    COMMENT 'ROSN VAR = VARIANCE(rosn_pred - return_n_uv_noself) WHERE is_return_noself=1'
+
+--     -- ==================== ROVN 指标 ====================
+--     ,rovn                     DOUBLE    COMMENT 'rovn实际 = return_n_uv_noself/曝光次数'
+--     ,rovn_pred                DOUBLE    COMMENT 'rovn预估 = AVG(str_pred*rosn_pred)'
+--     ,rovn_copc                DOUBLE    COMMENT 'rovn copc = rovn/rovn_pred'
+--     ,rovn_mae                 DOUBLE    COMMENT 'rovn MAE = AVG(ABS(str_pred*rosn_pred - return_n_uv_noself/曝光次数))'
+--     ,rovn_var                 DOUBLE    COMMENT 'rovn VAR = VARIANCE(str_pred*rosn_pred - return_n_uv_noself/曝光次数)'
+--     ,sortscore_avg            DOUBLE    COMMENT 'sortscore均值'
+
+--     -- ==================== B链 (分享→点击) ====================
+--     ,bn_exp                   BIGINT    COMMENT 'B链全量: 回流session曝光数'
+--     ,bn_pv                    BIGINT    COMMENT 'B链全量: 回流点击次数'
+--     ,bn_uv                    BIGINT    COMMENT 'B链全量: 回流去重人数'
+--     ,b1_exp                   BIGINT    COMMENT 'B链depth=1: 回流session曝光数'
+--     ,b1_pv                    BIGINT    COMMENT 'B链depth=1: 回流点击次数'
+--     ,b1_uv                    BIGINT    COMMENT 'B链depth=1: 回流去重人数'
+--     ,b2_exp                   BIGINT    COMMENT 'B链depth=2: 回流session曝光数'
+--     ,b2_pv                    BIGINT    COMMENT 'B链depth=2: 回流点击次数'
+--     ,b2_uv                    BIGINT    COMMENT 'B链depth=2: 回流去重人数'
+--     ,b3_exp                   BIGINT    COMMENT 'B链depth=3: 回流session曝光数'
+--     ,b3_pv                    BIGINT    COMMENT 'B链depth=3: 回流点击次数'
+--     ,b3_uv                    BIGINT    COMMENT 'B链depth=3: 回流去重人数'
+--     ,bn_rov                   DOUBLE    COMMENT 'B链全量: rov = bn_uv/曝光次数'
+--     ,bn_ror                   DOUBLE    COMMENT 'B链全量: ror = bn_uv/曝光人数'
+--     ,b1_rov                   DOUBLE    COMMENT 'B链depth=1: rov = b1_uv/曝光次数'
+--     ,b1_ror                   DOUBLE    COMMENT 'B链depth=1: ror = b1_uv/曝光人数'
+--     ,b2_rov                   DOUBLE    COMMENT 'B链depth=2: rov = b2_uv/b1曝光数'
+--     ,b2_ror                   DOUBLE    COMMENT 'B链depth=2: ror = b2_uv/b1人数'
+--     ,b3_rov                   DOUBLE    COMMENT 'B链depth=3: rov = b3_uv/b2曝光数'
+--     ,b3_ror                   DOUBLE    COMMENT 'B链depth=3: ror = b3_uv/b2人数'
+
+--     -- ==================== C链 (全量depth, 按hop) ====================
+--     ,cn_1_exp                 BIGINT    COMMENT 'C链hop1: 回流session曝光数'
+--     ,cn_1_pv                  BIGINT    COMMENT 'C链hop1: 回流点击次数'
+--     ,cn_1_uv                  BIGINT    COMMENT 'C链hop1: 回流去重人数'
+--     ,cn_2_exp                 BIGINT    COMMENT 'C链hop2: 回流session曝光数'
+--     ,cn_2_pv                  BIGINT    COMMENT 'C链hop2: 回流点击次数'
+--     ,cn_2_uv                  BIGINT    COMMENT 'C链hop2: 回流去重人数'
+--     ,cn_3_exp                 BIGINT    COMMENT 'C链hop3: 回流session曝光数'
+--     ,cn_3_pv                  BIGINT    COMMENT 'C链hop3: 回流点击次数'
+--     ,cn_3_uv                  BIGINT    COMMENT 'C链hop3: 回流去重人数'
+--     ,cn_total_uv              BIGINT    COMMENT 'C链合计UV'
+--     ,cn_1_rov                 DOUBLE    COMMENT 'C链hop1: rov = cn_1_uv/bn曝光数'
+--     ,cn_1_ror                 DOUBLE    COMMENT 'C链hop1: ror = cn_1_uv/bn人数'
+--     ,cn_2_rov                 DOUBLE    COMMENT 'C链hop2: rov = cn_2_uv/cn_1曝光数'
+--     ,cn_2_ror                 DOUBLE    COMMENT 'C链hop2: ror = cn_2_uv/cn_1人数'
+--     ,cn_3_rov                 DOUBLE    COMMENT 'C链hop3: rov = cn_3_uv/cn_2曝光数'
+--     ,cn_3_ror                 DOUBLE    COMMENT 'C链hop3: ror = cn_3_uv/cn_2人数'
+--     ,cn_total_rov             DOUBLE    COMMENT 'C链合计: rov = cn_total_uv/bn曝光数'
+--     ,cn_total_ror             DOUBLE    COMMENT 'C链合计: ror = cn_total_uv/bn人数'
+--     ,c1_1_exp                 BIGINT    COMMENT 'C链depth1-hop1: 回流session曝光数'
+--     ,c1_1_pv                  BIGINT    COMMENT 'C链depth1-hop1: 回流点击次数'
+--     ,c1_1_uv                  BIGINT    COMMENT 'C链depth1-hop1: 回流去重人数'
+--     ,c1_2_exp                 BIGINT    COMMENT 'C链depth1-hop2: 回流session曝光数'
+--     ,c1_2_pv                  BIGINT    COMMENT 'C链depth1-hop2: 回流点击次数'
+--     ,c1_2_uv                  BIGINT    COMMENT 'C链depth1-hop2: 回流去重人数'
+--     ,c1_3_exp                 BIGINT    COMMENT 'C链depth1-hop3: 回流session曝光数'
+--     ,c1_3_pv                  BIGINT    COMMENT 'C链depth1-hop3: 回流点击次数'
+--     ,c1_3_uv                  BIGINT    COMMENT 'C链depth1-hop3: 回流去重人数'
+--     ,c2_1_exp                 BIGINT    COMMENT 'C链depth2-hop1: 回流session曝光数'
+--     ,c2_1_pv                  BIGINT    COMMENT 'C链depth2-hop1: 回流点击次数'
+--     ,c2_1_uv                  BIGINT    COMMENT 'C链depth2-hop1: 回流去重人数'
+--     ,c2_2_exp                 BIGINT    COMMENT 'C链depth2-hop2: 回流session曝光数'
+--     ,c2_2_pv                  BIGINT    COMMENT 'C链depth2-hop2: 回流点击次数'
+--     ,c2_2_uv                  BIGINT    COMMENT 'C链depth2-hop2: 回流去重人数'
+--     ,c2_3_exp                 BIGINT    COMMENT 'C链depth2-hop3: 回流session曝光数'
+--     ,c2_3_pv                  BIGINT    COMMENT 'C链depth2-hop3: 回流点击次数'
+--     ,c2_3_uv                  BIGINT    COMMENT 'C链depth2-hop3: 回流去重人数'
+--     ,c3_1_exp                 BIGINT    COMMENT 'C链depth3-hop1: 回流session曝光数'
+--     ,c3_1_pv                  BIGINT    COMMENT 'C链depth3-hop1: 回流点击次数'
+--     ,c3_1_uv                  BIGINT    COMMENT 'C链depth3-hop1: 回流去重人数'
+--     ,c3_2_exp                 BIGINT    COMMENT 'C链depth3-hop2: 回流session曝光数'
+--     ,c3_2_pv                  BIGINT    COMMENT 'C链depth3-hop2: 回流点击次数'
+--     ,c3_2_uv                  BIGINT    COMMENT 'C链depth3-hop2: 回流去重人数'
+--     ,c3_3_exp                 BIGINT    COMMENT 'C链depth3-hop3: 回流session曝光数'
+--     ,c3_3_pv                  BIGINT    COMMENT 'C链depth3-hop3: 回流点击次数'
+--     ,c3_3_uv                  BIGINT    COMMENT 'C链depth3-hop3: 回流去重人数'
+--     ,c1_1_rov                 DOUBLE    COMMENT 'C链d1-hop1: rov = c1_1_uv/bn曝光数'
+--     ,c1_1_ror                 DOUBLE    COMMENT 'C链d1-hop1: ror = c1_1_uv/bn人数'
+--     ,c2_1_rov                 DOUBLE    COMMENT 'C链d2-hop1: rov = c2_1_uv/c1_1曝光数'
+--     ,c2_1_ror                 DOUBLE    COMMENT 'C链d2-hop1: ror = c2_1_uv/c1_1人数'
+--     ,c3_1_rov                 DOUBLE    COMMENT 'C链d3-hop1: rov = c3_1_uv/c2_1曝光数'
+--     ,c3_1_ror                 DOUBLE    COMMENT 'C链d3-hop1: ror = c3_1_uv/c2_1人数'
+--     ,c1_2_rov                 DOUBLE    COMMENT 'C链d1-hop2: rov = c1_2_uv/cn_1曝光数'
+--     ,c1_2_ror                 DOUBLE    COMMENT 'C链d1-hop2: ror = c1_2_uv/cn_1人数'
+--     ,c2_2_rov                 DOUBLE    COMMENT 'C链d2-hop2: rov = c2_2_uv/c1_2曝光数'
+--     ,c2_2_ror                 DOUBLE    COMMENT 'C链d2-hop2: ror = c2_2_uv/c1_2人数'
+--     ,c3_2_rov                 DOUBLE    COMMENT 'C链d3-hop2: rov = c3_2_uv/c2_2曝光数'
+--     ,c3_2_ror                 DOUBLE    COMMENT 'C链d3-hop2: ror = c3_2_uv/c2_2人数'
+--     ,c1_3_rov                 DOUBLE    COMMENT 'C链d1-hop3: rov = c1_3_uv/cn_2曝光数'
+--     ,c1_3_ror                 DOUBLE    COMMENT 'C链d1-hop3: ror = c1_3_uv/cn_2人数'
+--     ,c2_3_rov                 DOUBLE    COMMENT 'C链d2-hop3: rov = c2_3_uv/c1_3曝光数'
+--     ,c2_3_ror                 DOUBLE    COMMENT 'C链d2-hop3: ror = c2_3_uv/c1_3人数'
+--     ,c3_3_rov                 DOUBLE    COMMENT 'C链d3-hop3: rov = c3_3_uv/c2_3曝光数'
+--     ,c3_3_ror                 DOUBLE    COMMENT 'C链d3-hop3: ror = c3_3_uv/c2_3人数'
+
+--     -- ==================== D链 (session内后续曝光传播) ====================
+--     ,d0                       BIGINT    COMMENT 'D链初始成本: session内后续曝光数'
+--     ,dn_1_exp                 BIGINT    COMMENT 'D链hop1: 回流session曝光数'
+--     ,dn_1_pv                  BIGINT    COMMENT 'D链hop1: 回流点击次数'
+--     ,dn_1_uv                  BIGINT    COMMENT 'D链hop1: 回流去重人数'
+--     ,dn_2_exp                 BIGINT    COMMENT 'D链hop2: 回流session曝光数'
+--     ,dn_2_pv                  BIGINT    COMMENT 'D链hop2: 回流点击次数'
+--     ,dn_2_uv                  BIGINT    COMMENT 'D链hop2: 回流去重人数'
+--     ,dn_3_exp                 BIGINT    COMMENT 'D链hop3: 回流session曝光数'
+--     ,dn_3_pv                  BIGINT    COMMENT 'D链hop3: 回流点击次数'
+--     ,dn_3_uv                  BIGINT    COMMENT 'D链hop3: 回流去重人数'
+--     ,dn_total_uv              BIGINT    COMMENT 'D链合计UV'
+--     ,dn_1_rov                 DOUBLE    COMMENT 'D链hop1: rov = dn_1_uv/d0初始曝光数'
+--     ,dn_1_ror                 DOUBLE    COMMENT 'D链hop1: ror = dn_1_uv/曝光人数'
+--     ,dn_2_rov                 DOUBLE    COMMENT 'D链hop2: rov = dn_2_uv/dn_1曝光数'
+--     ,dn_2_ror                 DOUBLE    COMMENT 'D链hop2: ror = dn_2_uv/dn_1人数'
+--     ,dn_3_rov                 DOUBLE    COMMENT 'D链hop3: rov = dn_3_uv/dn_2曝光数'
+--     ,dn_3_ror                 DOUBLE    COMMENT 'D链hop3: ror = dn_3_uv/dn_2人数'
+--     ,dn_total_rov             DOUBLE    COMMENT 'D链合计: rov = dn_total_uv/d0初始曝光数'
+--     ,dn_total_ror             DOUBLE    COMMENT 'D链合计: ror = dn_total_uv/曝光人数'
+--     ,d1_1_exp                 BIGINT    COMMENT 'D链depth1-hop1: 回流session曝光数'
+--     ,d1_1_pv                  BIGINT    COMMENT 'D链depth1-hop1: 回流点击次数'
+--     ,d1_1_uv                  BIGINT    COMMENT 'D链depth1-hop1: 回流去重人数'
+--     ,d1_2_exp                 BIGINT    COMMENT 'D链depth1-hop2: 回流session曝光数'
+--     ,d1_2_pv                  BIGINT    COMMENT 'D链depth1-hop2: 回流点击次数'
+--     ,d1_2_uv                  BIGINT    COMMENT 'D链depth1-hop2: 回流去重人数'
+--     ,d1_3_exp                 BIGINT    COMMENT 'D链depth1-hop3: 回流session曝光数'
+--     ,d1_3_pv                  BIGINT    COMMENT 'D链depth1-hop3: 回流点击次数'
+--     ,d1_3_uv                  BIGINT    COMMENT 'D链depth1-hop3: 回流去重人数'
+--     ,d2_1_exp                 BIGINT    COMMENT 'D链depth2-hop1: 回流session曝光数'
+--     ,d2_1_pv                  BIGINT    COMMENT 'D链depth2-hop1: 回流点击次数'
+--     ,d2_1_uv                  BIGINT    COMMENT 'D链depth2-hop1: 回流去重人数'
+--     ,d2_2_exp                 BIGINT    COMMENT 'D链depth2-hop2: 回流session曝光数'
+--     ,d2_2_pv                  BIGINT    COMMENT 'D链depth2-hop2: 回流点击次数'
+--     ,d2_2_uv                  BIGINT    COMMENT 'D链depth2-hop2: 回流去重人数'
+--     ,d2_3_exp                 BIGINT    COMMENT 'D链depth2-hop3: 回流session曝光数'
+--     ,d2_3_pv                  BIGINT    COMMENT 'D链depth2-hop3: 回流点击次数'
+--     ,d2_3_uv                  BIGINT    COMMENT 'D链depth2-hop3: 回流去重人数'
+--     ,d3_1_exp                 BIGINT    COMMENT 'D链depth3-hop1: 回流session曝光数'
+--     ,d3_1_pv                  BIGINT    COMMENT 'D链depth3-hop1: 回流点击次数'
+--     ,d3_1_uv                  BIGINT    COMMENT 'D链depth3-hop1: 回流去重人数'
+--     ,d3_2_exp                 BIGINT    COMMENT 'D链depth3-hop2: 回流session曝光数'
+--     ,d3_2_pv                  BIGINT    COMMENT 'D链depth3-hop2: 回流点击次数'
+--     ,d3_2_uv                  BIGINT    COMMENT 'D链depth3-hop2: 回流去重人数'
+--     ,d3_3_exp                 BIGINT    COMMENT 'D链depth3-hop3: 回流session曝光数'
+--     ,d3_3_pv                  BIGINT    COMMENT 'D链depth3-hop3: 回流点击次数'
+--     ,d3_3_uv                  BIGINT    COMMENT 'D链depth3-hop3: 回流去重人数'
+--     ,d1_1_rov                 DOUBLE    COMMENT 'D链d1-hop1: rov = d1_1_uv/d0初始曝光数'
+--     ,d1_1_ror                 DOUBLE    COMMENT 'D链d1-hop1: ror = d1_1_uv/曝光人数'
+--     ,d2_1_rov                 DOUBLE    COMMENT 'D链d2-hop1: rov = d2_1_uv/d1_1曝光数'
+--     ,d2_1_ror                 DOUBLE    COMMENT 'D链d2-hop1: ror = d2_1_uv/d1_1人数'
+--     ,d3_1_rov                 DOUBLE    COMMENT 'D链d3-hop1: rov = d3_1_uv/d2_1曝光数'
+--     ,d3_1_ror                 DOUBLE    COMMENT 'D链d3-hop1: ror = d3_1_uv/d2_1人数'
+--     ,d1_2_rov                 DOUBLE    COMMENT 'D链d1-hop2: rov = d1_2_uv/dn_1曝光数'
+--     ,d1_2_ror                 DOUBLE    COMMENT 'D链d1-hop2: ror = d1_2_uv/dn_1人数'
+--     ,d2_2_rov                 DOUBLE    COMMENT 'D链d2-hop2: rov = d2_2_uv/d1_2曝光数'
+--     ,d2_2_ror                 DOUBLE    COMMENT 'D链d2-hop2: ror = d2_2_uv/d1_2人数'
+--     ,d3_2_rov                 DOUBLE    COMMENT 'D链d3-hop2: rov = d3_2_uv/d2_2曝光数'
+--     ,d3_2_ror                 DOUBLE    COMMENT 'D链d3-hop2: ror = d3_2_uv/d2_2人数'
+--     ,d1_3_rov                 DOUBLE    COMMENT 'D链d1-hop3: rov = d1_3_uv/dn_2曝光数'
+--     ,d1_3_ror                 DOUBLE    COMMENT 'D链d1-hop3: ror = d1_3_uv/dn_2人数'
+--     ,d2_3_rov                 DOUBLE    COMMENT 'D链d2-hop3: rov = d2_3_uv/d1_3曝光数'
+--     ,d2_3_ror                 DOUBLE    COMMENT 'D链d2-hop3: ror = d2_3_uv/d1_3人数'
+--     ,d3_3_rov                 DOUBLE    COMMENT 'D链d3-hop3: rov = d3_3_uv/d2_3曝光数'
+--     ,d3_3_ror                 DOUBLE    COMMENT 'D链d3-hop3: ror = d3_3_uv/d2_3人数'
+
+--     -- ==================== 全链路 ====================
+--     ,all_return_n_uv          BIGINT    COMMENT '全链路拉回UV = B + C + D'
+--     ,all_rovn                 DOUBLE    COMMENT '全链路拉回率 = all_return_n_uv/曝光次数'
+--     ,all_rov                  DOUBLE    COMMENT '全链路: rov = all_return_n_uv/曝光次数'
+--     ,all_ror                  DOUBLE    COMMENT '全链路: ror = all_return_n_uv/曝光人数'
+-- )
+-- COMMENT '曝光回流链路CUBE聚合-宽表版 (5维度: 用户分层/小时段/进入品类TOP1/推荐品类TOP10/内容idTOP1)'
+-- ;
+
+WITH
+-- 用户拉活量分层
+t_user_type AS (
+    SELECT  DISTINCT type, openid
+    FROM    loghubods.mid_share_return_people_1year
+    WHERE   dt = TO_CHAR(DATEADD(TO_DATE('${dt}','YYYYMMDD'),-1,'dd'),'YYYYMMDD')
+    AND     type IS NOT NULL
+    AND     type != 'S_ALL'
+    AND     type NOT REGEXP 'R50'
+)
+-- 模型预估分数
+,t_score AS (
+    SELECT  apptype
+            ,videoid
+            ,recommendtraceid
+            ,scoresmap
+            ,sortscore
+    FROM    (
+                SELECT  apptype
+                        ,videoid
+                        ,recommendtraceid
+                        ,scoresmap
+                        ,sortscore
+                        ,ROW_NUMBER() OVER (PARTITION BY apptype,videoid,recommendtraceid) AS rn
+                FROM    loghubods.statistics_log_hour
+                WHERE   dt LIKE '${dt}%'
+                AND     scoresmap IS NOT NULL
+            )
+    WHERE   rn = 1
+)
+-- TOP1 进入内容品类(场): 按回流人数取 TOP1
+,t_top_head_cate AS (
+    SELECT  vt.merge_leve2
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    JOIN    loghubods.video_merge_tag vt ON base.headvideoid = vt.videoid
+    WHERE   base.dt = '${dt}'
+    AND     vt.merge_leve2 IS NOT NULL
+    GROUP BY vt.merge_leve2
+    ORDER BY SUM(CAST(is_return_noself AS BIGINT)) DESC
+    LIMIT   1
+)
+-- TOP10 推荐内容品类(货): 按曝光次数取 TOP10
+,t_top_vid_cate AS (
+    SELECT  vt.merge_leve2
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    JOIN    loghubods.video_merge_tag vt ON base.vid = vt.videoid
+    WHERE   base.dt = '${dt}'
+    AND     vt.merge_leve2 IS NOT NULL
+    GROUP BY vt.merge_leve2
+    ORDER BY COUNT(1) DESC
+    LIMIT   10
+)
+-- TOP1 内容id(货): 每品类按曝光次数取 TOP1 (曝光>10w)
+,t_top_vid AS (
+    SELECT  merge_leve2, vid
+    FROM    (
+                SELECT  vt.merge_leve2
+                        ,base.vid
+                        ,COUNT(1)                                                               AS exp_cnt
+                        ,ROW_NUMBER() OVER (PARTITION BY vt.merge_leve2 ORDER BY COUNT(1) DESC) AS rk
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+                JOIN    loghubods.video_merge_tag vt ON base.vid = vt.videoid
+                WHERE   base.dt = '${dt}'
+                AND     vt.merge_leve2 IS NOT NULL
+                GROUP BY vt.merge_leve2, base.vid
+                HAVING  exp_cnt > 100000
+            )
+    WHERE   rk <= 1
+)
+-- 宽表
+,t_wide AS (
+    SELECT  base.*
+            ,CASE WHEN e.type IS NULL OR e.type = 'R_0'                       THEN 'R0&新用户'
+                  WHEN e.type IN ('R_1','R_2_10','R_10_50')                    THEN 'R1-50'
+                  WHEN e.type IN ('R_50_100','R_100_180','R_180_330')           THEN 'R_180_330'
+                  ELSE e.type
+             END AS user_type
+            ,CASE WHEN th.merge_leve2 IS NOT NULL THEN vt_head.merge_leve2 ELSE '其他' END AS head_merge_leve2
+            ,CASE WHEN tv.merge_leve2 IS NOT NULL THEN vt_vid.merge_leve2  ELSE '其他' END AS vid_merge_leve2
+            ,CASE WHEN ti.vid IS NOT NULL          THEN base.vid           ELSE '其他' END AS vid_id
+            ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
+            ,1.22*POW(CAST(GET_JSON_OBJECT(e1.scoresmap,'$.NorXGBScore') AS DOUBLE),1.15) AS rosn_pred
+            ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_pred_origin
+            ,e1.sortscore
+            ,CASE
+                WHEN CAST(hh AS INT) BETWEEN 0  AND 3  THEN '00-03'
+                WHEN CAST(hh AS INT) BETWEEN 4  AND 7  THEN '04-07'
+                WHEN CAST(hh AS INT) BETWEEN 8  AND 11 THEN '08-11'
+                WHEN CAST(hh AS INT) BETWEEN 12 AND 15 THEN '12-15'
+                WHEN CAST(hh AS INT) BETWEEN 16 AND 19 THEN '16-19'
+                WHEN CAST(hh AS INT) BETWEEN 20 AND 23 THEN '20-23'
+                ELSE '-'
+             END AS hh_bucket
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    LEFT JOIN t_user_type e
+    ON      SUBSTRING_INDEX(base.mid,'weixin_openid_',-1) = e.openid
+    LEFT JOIN loghubods.video_merge_tag vt_head
+    ON      base.headvideoid = vt_head.videoid
+    LEFT JOIN loghubods.video_merge_tag vt_vid
+    ON      base.vid = vt_vid.videoid
+    LEFT JOIN t_score e1
+    ON      base.apptype = e1.apptype
+    AND     base.vid = e1.videoid
+    AND     base.recomtraceid = e1.recommendtraceid
+    LEFT JOIN t_top_head_cate th
+    ON      vt_head.merge_leve2 = th.merge_leve2
+    LEFT JOIN t_top_vid_cate tv
+    ON      vt_vid.merge_leve2 = tv.merge_leve2
+    LEFT JOIN t_top_vid ti
+    ON      base.vid = ti.vid
+    WHERE   base.dt = '${dt}'
+)
+
+SELECT
+        -- ==================== 维度列 ====================
+        '${dt}'                                                                                     AS dt
+        ,CASE WHEN GROUPING(user_type) = 1 THEN 'SUM' ELSE NVL(user_type, 'SUM') END              AS user_type
+        ,CASE WHEN GROUPING(hh_bucket) = 1 THEN 'SUM' ELSE NVL(hh_bucket, 'SUM') END              AS hh_bucket
+        ,CASE WHEN GROUPING(head_merge_leve2) = 1 THEN 'SUM' ELSE NVL(head_merge_leve2, 'SUM') END AS head_merge_leve2
+        ,CASE WHEN GROUPING(vid_merge_leve2) = 1 THEN 'SUM' ELSE NVL(vid_merge_leve2, 'SUM') END   AS vid_merge_leve2
+        ,CASE WHEN GROUPING(vid_id) = 1 THEN 'SUM' ELSE NVL(vid_id, 'SUM') END                     AS vid_id
+
+        -- ==================== 基础流量 ====================
+        ,COUNT(1)                                                                       AS exposure_cnt
+        ,COUNT(DISTINCT mid)                                                            AS exposure_uv
+        ,COUNT(DISTINCT vid)                                                            AS vid_cnt
+        ,ROUND(COUNT(1) / COUNT(DISTINCT mid), 4)                                      AS exposure_per_user
+
+        -- ==================== 分享 ====================
+        ,SUM(CAST(is_share AS BIGINT))                                                  AS share_exposure_cnt
+        ,SUM(CAST(share_cnt AS BIGINT))                                                 AS share_cnt
+
+        -- ==================== STR 指标 ====================
+        ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6)  AS str_real
+        ,ROUND(COALESCE(SUM(str_pred) / NULLIF(COUNT(1), 0), 0), 6)                          AS str_pred
+        ,ROUND(
+            (SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
+            / NULLIF(SUM(str_pred) / NULLIF(COUNT(1), 0), 0)
+        , 4)                                                                                   AS str_copc
+        ,ROUND(AVG(ABS(str_pred - CAST(is_return_noself AS BIGINT))), 6)                      AS str_mae
+        ,ROUND(VARIANCE(str_pred - CAST(is_return_noself AS BIGINT)), 6)                      AS str_var
+
+        -- ==================== ROSN 指标 ====================
+        ,ROUND(COALESCE(
+            SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0)
+        , 0), 6)                                                                               AS rosn_real
+        ,ROUND(COALESCE(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0), 6)  AS rosn_pred
+        ,ROUND(
+            (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0))
+            / NULLIF(SUM(rosn_pred) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0)
+        , 4)                                                                                   AS rosn_copc
+        ,ROUND(AVG(rosn_pred_origin), 6)                                                      AS rosn_pred_origin
+        ,ROUND(AVG(
+            CASE WHEN CAST(is_return_noself AS BIGINT) = 1
+                 THEN ABS(rosn_pred - CAST(return_n_uv_noself AS BIGINT))
+            END
+        ), 6)                                                                                  AS rosn_mae
+        ,ROUND(VARIANCE(
+            CASE WHEN CAST(is_return_noself AS BIGINT) = 1
+                 THEN rosn_pred - CAST(return_n_uv_noself AS BIGINT)
+            END
+        ), 6)                                                                                  AS rosn_var
+
+        -- ==================== ROVN 指标 ====================
+        ,ROUND(COALESCE(SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6)  AS rovn
+        ,ROUND(AVG(str_pred * rosn_pred), 6)                                                    AS rovn_pred
+        ,ROUND(
+            (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
+            / NULLIF(AVG(str_pred * rosn_pred), 0)
+        , 4)                                                                                     AS rovn_copc
+        ,ROUND(AVG(ABS(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT))), 6)            AS rovn_mae
+        ,ROUND(VARIANCE(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT)), 6)        AS rovn_var
+        ,ROUND(AVG(CAST(sortscore AS DOUBLE)), 6)                                               AS sortscore_avg
+
+        -- ==================== B链 ====================
+        ,SUM(CAST(bn_exp AS BIGINT))                                                    AS bn_exp
+        ,SUM(CAST(bn_pv AS BIGINT))                                                     AS bn_pv
+        ,SUM(CAST(bn_uv AS BIGINT))                                                     AS bn_uv
+        ,SUM(CAST(b1_exp AS BIGINT))                                                    AS b1_exp
+        ,SUM(CAST(b1_pv AS BIGINT))                                                     AS b1_pv
+        ,SUM(CAST(b1_uv AS BIGINT))                                                     AS b1_uv
+        ,SUM(CAST(b2_exp AS BIGINT))                                                    AS b2_exp
+        ,SUM(CAST(b2_pv AS BIGINT))                                                     AS b2_pv
+        ,SUM(CAST(b2_uv AS BIGINT))                                                     AS b2_uv
+        ,SUM(CAST(b3_exp AS BIGINT))                                                    AS b3_exp
+        ,SUM(CAST(b3_pv AS BIGINT))                                                     AS b3_pv
+        ,SUM(CAST(b3_uv AS BIGINT))                                                     AS b3_uv
+        ,ROUND(COALESCE(SUM(CAST(bn_uv AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                          AS bn_rov
+        ,ROUND(COALESCE(SUM(CAST(bn_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)               AS bn_ror
+        ,ROUND(COALESCE(SUM(CAST(b1_uv AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                          AS b1_rov
+        ,ROUND(COALESCE(SUM(CAST(b1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)               AS b1_ror
+        ,ROUND(COALESCE(SUM(CAST(b2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)       AS b2_rov
+        ,ROUND(COALESCE(SUM(CAST(b2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS b2_ror
+        ,ROUND(COALESCE(SUM(CAST(b3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)       AS b3_rov
+        ,ROUND(COALESCE(SUM(CAST(b3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS b3_ror
+
+        -- ==================== C链 (全量depth, 按hop) ====================
+        ,SUM(CAST(cn_1_exp AS BIGINT))                                                  AS cn_1_exp
+        ,SUM(CAST(cn_1_pv AS BIGINT))                                                   AS cn_1_pv
+        ,SUM(CAST(cn_1_uv AS BIGINT))                                                   AS cn_1_uv
+        ,SUM(CAST(cn_2_exp AS BIGINT))                                                  AS cn_2_exp
+        ,SUM(CAST(cn_2_pv AS BIGINT))                                                   AS cn_2_pv
+        ,SUM(CAST(cn_2_uv AS BIGINT))                                                   AS cn_2_uv
+        ,SUM(CAST(cn_3_exp AS BIGINT))                                                  AS cn_3_exp
+        ,SUM(CAST(cn_3_pv AS BIGINT))                                                   AS cn_3_pv
+        ,SUM(CAST(cn_3_uv AS BIGINT))                                                   AS cn_3_uv
+        ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))  AS cn_total_uv
+        ,ROUND(COALESCE(SUM(CAST(cn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)     AS cn_1_rov
+        ,ROUND(COALESCE(SUM(CAST(cn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)      AS cn_1_ror
+        ,ROUND(COALESCE(SUM(CAST(cn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS cn_2_rov
+        ,ROUND(COALESCE(SUM(CAST(cn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS cn_2_ror
+        ,ROUND(COALESCE(SUM(CAST(cn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS cn_3_rov
+        ,ROUND(COALESCE(SUM(CAST(cn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS cn_3_ror
+        ,ROUND(COALESCE(
+            (SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)))
+            / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS cn_total_rov
+        ,ROUND(COALESCE(
+            (SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)))
+            / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS cn_total_ror
+
+        -- ==================== C链 (按depth拆分) ====================
+        ,SUM(CAST(c1_1_exp AS BIGINT))                                                    AS c1_1_exp
+        ,SUM(CAST(c1_1_pv AS BIGINT))                                                     AS c1_1_pv
+        ,SUM(CAST(c1_1_uv AS BIGINT))                                                     AS c1_1_uv
+        ,SUM(CAST(c1_2_exp AS BIGINT))                                                    AS c1_2_exp
+        ,SUM(CAST(c1_2_pv AS BIGINT))                                                     AS c1_2_pv
+        ,SUM(CAST(c1_2_uv AS BIGINT))                                                     AS c1_2_uv
+        ,SUM(CAST(c1_3_exp AS BIGINT))                                                    AS c1_3_exp
+        ,SUM(CAST(c1_3_pv AS BIGINT))                                                     AS c1_3_pv
+        ,SUM(CAST(c1_3_uv AS BIGINT))                                                     AS c1_3_uv
+        ,SUM(CAST(c2_1_exp AS BIGINT))                                                    AS c2_1_exp
+        ,SUM(CAST(c2_1_pv AS BIGINT))                                                     AS c2_1_pv
+        ,SUM(CAST(c2_1_uv AS BIGINT))                                                     AS c2_1_uv
+        ,SUM(CAST(c2_2_exp AS BIGINT))                                                    AS c2_2_exp
+        ,SUM(CAST(c2_2_pv AS BIGINT))                                                     AS c2_2_pv
+        ,SUM(CAST(c2_2_uv AS BIGINT))                                                     AS c2_2_uv
+        ,SUM(CAST(c2_3_exp AS BIGINT))                                                    AS c2_3_exp
+        ,SUM(CAST(c2_3_pv AS BIGINT))                                                     AS c2_3_pv
+        ,SUM(CAST(c2_3_uv AS BIGINT))                                                     AS c2_3_uv
+        ,SUM(CAST(c3_1_exp AS BIGINT))                                                    AS c3_1_exp
+        ,SUM(CAST(c3_1_pv AS BIGINT))                                                     AS c3_1_pv
+        ,SUM(CAST(c3_1_uv AS BIGINT))                                                     AS c3_1_uv
+        ,SUM(CAST(c3_2_exp AS BIGINT))                                                    AS c3_2_exp
+        ,SUM(CAST(c3_2_pv AS BIGINT))                                                     AS c3_2_pv
+        ,SUM(CAST(c3_2_uv AS BIGINT))                                                     AS c3_2_uv
+        ,SUM(CAST(c3_3_exp AS BIGINT))                                                    AS c3_3_exp
+        ,SUM(CAST(c3_3_pv AS BIGINT))                                                     AS c3_3_pv
+        ,SUM(CAST(c3_3_uv AS BIGINT))                                                     AS c3_3_uv
+        -- C链depth拆分 hop1 rov/ror
+        ,ROUND(COALESCE(SUM(CAST(c1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)     AS c1_1_rov
+        ,ROUND(COALESCE(SUM(CAST(c1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)      AS c1_1_ror
+        ,ROUND(COALESCE(SUM(CAST(c2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c2_1_rov
+        ,ROUND(COALESCE(SUM(CAST(c2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c2_1_ror
+        ,ROUND(COALESCE(SUM(CAST(c3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c3_1_rov
+        ,ROUND(COALESCE(SUM(CAST(c3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c3_1_ror
+        -- C链depth拆分 hop2 rov/ror
+        ,ROUND(COALESCE(SUM(CAST(c1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c1_2_rov
+        ,ROUND(COALESCE(SUM(CAST(c1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c1_2_ror
+        ,ROUND(COALESCE(SUM(CAST(c2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c2_2_rov
+        ,ROUND(COALESCE(SUM(CAST(c2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c2_2_ror
+        ,ROUND(COALESCE(SUM(CAST(c3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c3_2_rov
+        ,ROUND(COALESCE(SUM(CAST(c3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c3_2_ror
+        -- C链depth拆分 hop3 rov/ror
+        ,ROUND(COALESCE(SUM(CAST(c1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c1_3_rov
+        ,ROUND(COALESCE(SUM(CAST(c1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c1_3_ror
+        ,ROUND(COALESCE(SUM(CAST(c2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c2_3_rov
+        ,ROUND(COALESCE(SUM(CAST(c2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c2_3_ror
+        ,ROUND(COALESCE(SUM(CAST(c3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c3_3_rov
+        ,ROUND(COALESCE(SUM(CAST(c3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c3_3_ror
+
+        -- ==================== D链 (全量depth, 按hop) ====================
+        ,SUM(CAST(d0 AS BIGINT))                                                        AS d0
+        ,SUM(CAST(dn_1_exp AS BIGINT))                                                  AS dn_1_exp
+        ,SUM(CAST(dn_1_pv AS BIGINT))                                                   AS dn_1_pv
+        ,SUM(CAST(dn_1_uv AS BIGINT))                                                   AS dn_1_uv
+        ,SUM(CAST(dn_2_exp AS BIGINT))                                                  AS dn_2_exp
+        ,SUM(CAST(dn_2_pv AS BIGINT))                                                   AS dn_2_pv
+        ,SUM(CAST(dn_2_uv AS BIGINT))                                                   AS dn_2_uv
+        ,SUM(CAST(dn_3_exp AS BIGINT))                                                  AS dn_3_exp
+        ,SUM(CAST(dn_3_pv AS BIGINT))                                                   AS dn_3_pv
+        ,SUM(CAST(dn_3_uv AS BIGINT))                                                   AS dn_3_uv
+        ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))  AS dn_total_uv
+        ,ROUND(COALESCE(SUM(CAST(dn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS dn_1_rov
+        ,ROUND(COALESCE(SUM(CAST(dn_1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)            AS dn_1_ror
+        ,ROUND(COALESCE(SUM(CAST(dn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS dn_2_rov
+        ,ROUND(COALESCE(SUM(CAST(dn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS dn_2_ror
+        ,ROUND(COALESCE(SUM(CAST(dn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS dn_3_rov
+        ,ROUND(COALESCE(SUM(CAST(dn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS dn_3_ror
+        ,ROUND(COALESCE(
+            (SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)))
+            / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS dn_total_rov
+        ,ROUND(COALESCE(
+            (SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)))
+            / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS dn_total_ror
+
+        -- ==================== D链 (按depth拆分) ====================
+        ,SUM(CAST(d1_1_exp AS BIGINT))                                                    AS d1_1_exp
+        ,SUM(CAST(d1_1_pv AS BIGINT))                                                     AS d1_1_pv
+        ,SUM(CAST(d1_1_uv AS BIGINT))                                                     AS d1_1_uv
+        ,SUM(CAST(d1_2_exp AS BIGINT))                                                    AS d1_2_exp
+        ,SUM(CAST(d1_2_pv AS BIGINT))                                                     AS d1_2_pv
+        ,SUM(CAST(d1_2_uv AS BIGINT))                                                     AS d1_2_uv
+        ,SUM(CAST(d1_3_exp AS BIGINT))                                                    AS d1_3_exp
+        ,SUM(CAST(d1_3_pv AS BIGINT))                                                     AS d1_3_pv
+        ,SUM(CAST(d1_3_uv AS BIGINT))                                                     AS d1_3_uv
+        ,SUM(CAST(d2_1_exp AS BIGINT))                                                    AS d2_1_exp
+        ,SUM(CAST(d2_1_pv AS BIGINT))                                                     AS d2_1_pv
+        ,SUM(CAST(d2_1_uv AS BIGINT))                                                     AS d2_1_uv
+        ,SUM(CAST(d2_2_exp AS BIGINT))                                                    AS d2_2_exp
+        ,SUM(CAST(d2_2_pv AS BIGINT))                                                     AS d2_2_pv
+        ,SUM(CAST(d2_2_uv AS BIGINT))                                                     AS d2_2_uv
+        ,SUM(CAST(d2_3_exp AS BIGINT))                                                    AS d2_3_exp
+        ,SUM(CAST(d2_3_pv AS BIGINT))                                                     AS d2_3_pv
+        ,SUM(CAST(d2_3_uv AS BIGINT))                                                     AS d2_3_uv
+        ,SUM(CAST(d3_1_exp AS BIGINT))                                                    AS d3_1_exp
+        ,SUM(CAST(d3_1_pv AS BIGINT))                                                     AS d3_1_pv
+        ,SUM(CAST(d3_1_uv AS BIGINT))                                                     AS d3_1_uv
+        ,SUM(CAST(d3_2_exp AS BIGINT))                                                    AS d3_2_exp
+        ,SUM(CAST(d3_2_pv AS BIGINT))                                                     AS d3_2_pv
+        ,SUM(CAST(d3_2_uv AS BIGINT))                                                     AS d3_2_uv
+        ,SUM(CAST(d3_3_exp AS BIGINT))                                                    AS d3_3_exp
+        ,SUM(CAST(d3_3_pv AS BIGINT))                                                     AS d3_3_pv
+        ,SUM(CAST(d3_3_uv AS BIGINT))                                                     AS d3_3_uv
+        -- D链depth拆分 hop1 rov/ror
+        ,ROUND(COALESCE(SUM(CAST(d1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS d1_1_rov
+        ,ROUND(COALESCE(SUM(CAST(d1_1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)            AS d1_1_ror
+        ,ROUND(COALESCE(SUM(CAST(d2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d2_1_rov
+        ,ROUND(COALESCE(SUM(CAST(d2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d2_1_ror
+        ,ROUND(COALESCE(SUM(CAST(d3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d3_1_rov
+        ,ROUND(COALESCE(SUM(CAST(d3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d3_1_ror
+        -- D链depth拆分 hop2 rov/ror
+        ,ROUND(COALESCE(SUM(CAST(d1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d1_2_rov
+        ,ROUND(COALESCE(SUM(CAST(d1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d1_2_ror
+        ,ROUND(COALESCE(SUM(CAST(d2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d2_2_rov
+        ,ROUND(COALESCE(SUM(CAST(d2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d2_2_ror
+        ,ROUND(COALESCE(SUM(CAST(d3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d3_2_rov
+        ,ROUND(COALESCE(SUM(CAST(d3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d3_2_ror
+        -- D链depth拆分 hop3 rov/ror
+        ,ROUND(COALESCE(SUM(CAST(d1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d1_3_rov
+        ,ROUND(COALESCE(SUM(CAST(d1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d1_3_ror
+        ,ROUND(COALESCE(SUM(CAST(d2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d2_3_rov
+        ,ROUND(COALESCE(SUM(CAST(d2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d2_3_ror
+        ,ROUND(COALESCE(SUM(CAST(d3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d3_3_rov
+        ,ROUND(COALESCE(SUM(CAST(d3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d3_3_ror
+
+        -- ==================== 全链路 ====================
+        ,SUM(CAST(return_n_uv_noself AS BIGINT))
+            + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+            + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+                                                                                        AS all_return_n_uv
+        ,ROUND(COALESCE(
+            (   SUM(CAST(return_n_uv_noself AS BIGINT))
+              + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+              + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+            ) / NULLIF(COUNT(1), 0)
+        , 0), 6)                                                                        AS all_rovn
+        ,ROUND(COALESCE(
+            (   SUM(CAST(return_n_uv_noself AS BIGINT))
+              + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+              + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+            ) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0)
+        , 0), 6)                                                                        AS all_rov
+        ,ROUND(COALESCE(
+            (   SUM(CAST(return_n_uv_noself AS BIGINT))
+              + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+              + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+            ) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0)
+        , 0), 6)                                                                        AS all_ror
+
+
+-- =====================================================================
+-- FROM + GROUP BY CUBE
+-- =====================================================================
+
+FROM    t_wide
+
+GROUP BY CUBE(
+            user_type
+            ,hh_bucket
+            ,head_merge_leve2
+            ,vid_merge_leve2
+            ,vid_id
+        )
+
+ORDER BY exposure_cnt DESC
+;

+ 632 - 0
table_gen/test_4.sql

@@ -0,0 +1,632 @@
+-- =====================================================================
+-- 曝光回流链路 CUBE 聚合表 (基于 wide 宽表, 增加用户/品类维度 + 模型预估指标)
+-- 维度: user_type × hh_bucket × head_merge_leve2 × vid_merge_leve2 × vid_id (CUBE)
+-- 参考: de.sql + dwd_recsys_alg_exposure_agg_20260209
+-- =====================================================================
+
+-- -- DROP TABLE IF EXISTS loghubods.dwd_recsys_alg_exposure_agg_wide_20260209;
+-- CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_agg_wide_20260209 (
+--     -- ==================== 维度列 ====================
+--     dt                         STRING    COMMENT '日期'
+--     ,user_type                  STRING    COMMENT '用户拉活量分层(R0&新用户/R1-50/R_180_330等,汇总为SUM)'
+--     ,hh_bucket                STRING    COMMENT '小时段(00-03/04-07/.../20-23,汇总为SUM)'
+--     ,head_merge_leve2         STRING    COMMENT '进入内容品类(headvideoid品类,汇总为SUM)'
+--     ,vid_merge_leve2          STRING    COMMENT '推荐内容品类(vid品类,TOP10曝光+其他,汇总为SUM)'
+--     ,vid_id                   STRING    COMMENT '内容id(品类曝光TOP1+其他,汇总为SUM)'
+
+--     -- ==================== 基础流量 ====================
+--     ,exposure_cnt             BIGINT    COMMENT '曝光次数'
+--     ,exposure_uv              BIGINT    COMMENT '曝光人数(mid去重)'
+--     ,vid_cnt                  BIGINT    COMMENT '视频个数(vid去重)'
+--     ,exposure_per_user        DOUBLE    COMMENT '人均曝光次数 = 曝光次数/曝光人数'
+
+--     -- ==================== 分享 & 回流漏斗 ====================
+--     ,share_exposure_cnt       BIGINT    COMMENT '产生分享的曝光数'
+--     ,share_cnt                BIGINT    COMMENT '分享总次数'
+--     ,return_exposure_cnt      BIGINT    COMMENT '产生回流的曝光数(含自身) = SUM(is_return_n)'
+--     ,return_exposure_cnt_noself BIGINT  COMMENT '产生回流的曝光数(非自身) = SUM(is_return_noself)'
+--     ,return_uv                BIGINT    COMMENT '回流人数(含自身) = SUM(return_n_uv)'
+--     ,return_uv_noself         BIGINT    COMMENT '回流人数(非自身) = SUM(return_n_uv_noself)'
+--     ,share_rate               DOUBLE    COMMENT '分享率 = share_exposure_cnt/exposure_cnt'
+--     ,return_rate              DOUBLE    COMMENT '回流率(含自身) = return_exposure_cnt/exposure_cnt'
+--     ,return_rate_noself       DOUBLE    COMMENT '回流率(非自身) = return_exposure_cnt_noself/exposure_cnt'
+--     ,share_return_rate        DOUBLE    COMMENT '分享→回流转化率(非自身) = return_exposure_cnt_noself/share_exposure_cnt'
+
+--     -- ==================== 模型预估: STR (曝光→非自身回流概率) ====================
+--     ,str_real                 DOUBLE    COMMENT '= return_rate_noself, 模型label'
+--     ,str_pred                 DOUBLE    COMMENT 'STR预估 = SUM(str_pred)/exposure_cnt'
+--     ,str_copc                 DOUBLE    COMMENT 'STR copc = str_real/str_pred'
+--     ,str_mae                  DOUBLE    COMMENT 'STR MAE = AVG(|str_pred - str_real|)'
+--     ,str_var                  DOUBLE    COMMENT 'STR VAR = VARIANCE(str_pred - str_real)'
+
+--     -- ==================== 模型预估: ROSN (条件回流UV, 非自身) ====================
+--     ,rosn_real                DOUBLE    COMMENT '= return_uv_noself/return_exposure_cnt_noself, 模型label'
+--     ,rosn_pred                DOUBLE    COMMENT 'ROSN预估 = SUM(rosn_pred WHERE is_return_noself=1)/SUM(is_return_noself)'
+--     ,rosn_copc                DOUBLE    COMMENT 'ROSN copc = rosn_real/rosn_pred'
+--     ,rosn_pred_origin         DOUBLE    COMMENT 'ROSN原始预估均值 = AVG(rosn_pred_origin)'
+--     ,rosn_mae                 DOUBLE    COMMENT 'ROSN MAE = AVG(|rosn_pred - rosn_real|) WHERE is_return_noself=1'
+--     ,rosn_var                 DOUBLE    COMMENT 'ROSN VAR = VARIANCE(rosn_pred - rosn_real) WHERE is_return_noself=1'
+
+--     -- ==================== 模型预估: ROVN (STR×ROSN) ====================
+--     ,rovn_real                DOUBLE    COMMENT '= return_uv_noself/exposure_cnt, 模型label'
+--     ,rovn_pred                DOUBLE    COMMENT 'rovn预估 = AVG(str_pred*rosn_pred)'
+--     ,rovn_copc                DOUBLE    COMMENT 'rovn copc = rovn_real/rovn_pred'
+--     ,rovn_mae                 DOUBLE    COMMENT 'rovn MAE = AVG(|rovn_pred - rovn_real|)'
+--     ,rovn_var                 DOUBLE    COMMENT 'rovn VAR = VARIANCE(rovn_pred - rovn_real)'
+--     ,sortscore_avg            DOUBLE    COMMENT 'sortscore均值'
+
+--     -- ==================== B链 (分享→点击) ====================
+--     ,bn_uv                    BIGINT    COMMENT 'B链全量: 回流去重人数'
+--     ,bn_pv                    BIGINT    COMMENT 'B链全量: 回流点击次数'
+--     ,bn_exp                   BIGINT    COMMENT 'B链全量: 回流session曝光数'
+--     ,bn_ror                   DOUBLE    COMMENT 'bn_uv/exposure_uv'
+--     ,bn_rov                   DOUBLE    COMMENT 'bn_uv/exposure_cnt'
+--     ,b1_uv                    BIGINT    COMMENT 'B链depth=1: 回流去重人数'
+--     ,b1_pv                    BIGINT    COMMENT 'B链depth=1: 回流点击次数'
+--     ,b1_exp                   BIGINT    COMMENT 'B链depth=1: 回流session曝光数'
+--     ,b1_ror                   DOUBLE    COMMENT 'b1_uv/exposure_uv'
+--     ,b1_rov                   DOUBLE    COMMENT 'b1_uv/exposure_cnt'
+--     ,b2_uv                    BIGINT    COMMENT 'B链depth=2: 回流去重人数'
+--     ,b2_pv                    BIGINT    COMMENT 'B链depth=2: 回流点击次数'
+--     ,b2_exp                   BIGINT    COMMENT 'B链depth=2: 回流session曝光数'
+--     ,b2_ror                   DOUBLE    COMMENT 'b2_uv/b1_uv'
+--     ,b2_rov                   DOUBLE    COMMENT 'b2_uv/b1_exp'
+--     ,b3_uv                    BIGINT    COMMENT 'B链depth=3: 回流去重人数'
+--     ,b3_pv                    BIGINT    COMMENT 'B链depth=3: 回流点击次数'
+--     ,b3_exp                   BIGINT    COMMENT 'B链depth=3: 回流session曝光数'
+--     ,b3_ror                   DOUBLE    COMMENT 'b3_uv/b2_uv'
+--     ,b3_rov                   DOUBLE    COMMENT 'b3_uv/b2_exp'
+
+--     -- ==================== C链 (全量depth, 按hop) ====================
+--     ,cn_1_uv                  BIGINT    COMMENT 'C链hop1: 回流去重人数'
+--     ,cn_1_pv                  BIGINT    COMMENT 'C链hop1: 回流点击次数'
+--     ,cn_1_exp                 BIGINT    COMMENT 'C链hop1: 回流session曝光数'
+--     ,cn_1_ror                 DOUBLE    COMMENT 'cn_1_uv/bn_uv'
+--     ,cn_1_rov                 DOUBLE    COMMENT 'cn_1_uv/bn_exp'
+--     ,cn_2_uv                  BIGINT    COMMENT 'C链hop2: 回流去重人数'
+--     ,cn_2_pv                  BIGINT    COMMENT 'C链hop2: 回流点击次数'
+--     ,cn_2_exp                 BIGINT    COMMENT 'C链hop2: 回流session曝光数'
+--     ,cn_2_ror                 DOUBLE    COMMENT 'cn_2_uv/cn_1_uv'
+--     ,cn_2_rov                 DOUBLE    COMMENT 'cn_2_uv/cn_1_exp'
+--     ,cn_3_uv                  BIGINT    COMMENT 'C链hop3: 回流去重人数'
+--     ,cn_3_pv                  BIGINT    COMMENT 'C链hop3: 回流点击次数'
+--     ,cn_3_exp                 BIGINT    COMMENT 'C链hop3: 回流session曝光数'
+--     ,cn_3_ror                 DOUBLE    COMMENT 'cn_3_uv/cn_2_uv'
+--     ,cn_3_rov                 DOUBLE    COMMENT 'cn_3_uv/cn_2_exp'
+--     ,cn_total_uv              BIGINT    COMMENT 'C链合计UV'
+--     ,cn_total_ror             DOUBLE    COMMENT 'cn_total_uv/bn_uv'
+--     ,cn_total_rov             DOUBLE    COMMENT 'cn_total_uv/bn_exp'
+--     -- C链 depth拆分 hop1
+--     ,c1_1_uv                  BIGINT    COMMENT 'C链d1-hop1: 回流去重人数'
+--     ,c1_1_pv                  BIGINT    COMMENT 'C链d1-hop1: 回流点击次数'
+--     ,c1_1_exp                 BIGINT    COMMENT 'C链d1-hop1: 回流session曝光数'
+--     ,c1_1_ror                 DOUBLE    COMMENT 'c1_1_uv/bn_uv'
+--     ,c1_1_rov                 DOUBLE    COMMENT 'c1_1_uv/bn_exp'
+--     ,c2_1_uv                  BIGINT    COMMENT 'C链d2-hop1: 回流去重人数'
+--     ,c2_1_pv                  BIGINT    COMMENT 'C链d2-hop1: 回流点击次数'
+--     ,c2_1_exp                 BIGINT    COMMENT 'C链d2-hop1: 回流session曝光数'
+--     ,c2_1_ror                 DOUBLE    COMMENT 'c2_1_uv/c1_1_uv'
+--     ,c2_1_rov                 DOUBLE    COMMENT 'c2_1_uv/c1_1_exp'
+--     ,c3_1_uv                  BIGINT    COMMENT 'C链d3-hop1: 回流去重人数'
+--     ,c3_1_pv                  BIGINT    COMMENT 'C链d3-hop1: 回流点击次数'
+--     ,c3_1_exp                 BIGINT    COMMENT 'C链d3-hop1: 回流session曝光数'
+--     ,c3_1_ror                 DOUBLE    COMMENT 'c3_1_uv/c2_1_uv'
+--     ,c3_1_rov                 DOUBLE    COMMENT 'c3_1_uv/c2_1_exp'
+--     -- C链 depth拆分 hop2
+--     ,c1_2_uv                  BIGINT    COMMENT 'C链d1-hop2: 回流去重人数'
+--     ,c1_2_pv                  BIGINT    COMMENT 'C链d1-hop2: 回流点击次数'
+--     ,c1_2_exp                 BIGINT    COMMENT 'C链d1-hop2: 回流session曝光数'
+--     ,c1_2_ror                 DOUBLE    COMMENT 'c1_2_uv/cn_1_uv'
+--     ,c1_2_rov                 DOUBLE    COMMENT 'c1_2_uv/cn_1_exp'
+--     ,c2_2_uv                  BIGINT    COMMENT 'C链d2-hop2: 回流去重人数'
+--     ,c2_2_pv                  BIGINT    COMMENT 'C链d2-hop2: 回流点击次数'
+--     ,c2_2_exp                 BIGINT    COMMENT 'C链d2-hop2: 回流session曝光数'
+--     ,c2_2_ror                 DOUBLE    COMMENT 'c2_2_uv/c1_2_uv'
+--     ,c2_2_rov                 DOUBLE    COMMENT 'c2_2_uv/c1_2_exp'
+--     ,c3_2_uv                  BIGINT    COMMENT 'C链d3-hop2: 回流去重人数'
+--     ,c3_2_pv                  BIGINT    COMMENT 'C链d3-hop2: 回流点击次数'
+--     ,c3_2_exp                 BIGINT    COMMENT 'C链d3-hop2: 回流session曝光数'
+--     ,c3_2_ror                 DOUBLE    COMMENT 'c3_2_uv/c2_2_uv'
+--     ,c3_2_rov                 DOUBLE    COMMENT 'c3_2_uv/c2_2_exp'
+--     -- C链 depth拆分 hop3
+--     ,c1_3_uv                  BIGINT    COMMENT 'C链d1-hop3: 回流去重人数'
+--     ,c1_3_pv                  BIGINT    COMMENT 'C链d1-hop3: 回流点击次数'
+--     ,c1_3_exp                 BIGINT    COMMENT 'C链d1-hop3: 回流session曝光数'
+--     ,c1_3_ror                 DOUBLE    COMMENT 'c1_3_uv/cn_2_uv'
+--     ,c1_3_rov                 DOUBLE    COMMENT 'c1_3_uv/cn_2_exp'
+--     ,c2_3_uv                  BIGINT    COMMENT 'C链d2-hop3: 回流去重人数'
+--     ,c2_3_pv                  BIGINT    COMMENT 'C链d2-hop3: 回流点击次数'
+--     ,c2_3_exp                 BIGINT    COMMENT 'C链d2-hop3: 回流session曝光数'
+--     ,c2_3_ror                 DOUBLE    COMMENT 'c2_3_uv/c1_3_uv'
+--     ,c2_3_rov                 DOUBLE    COMMENT 'c2_3_uv/c1_3_exp'
+--     ,c3_3_uv                  BIGINT    COMMENT 'C链d3-hop3: 回流去重人数'
+--     ,c3_3_pv                  BIGINT    COMMENT 'C链d3-hop3: 回流点击次数'
+--     ,c3_3_exp                 BIGINT    COMMENT 'C链d3-hop3: 回流session曝光数'
+--     ,c3_3_ror                 DOUBLE    COMMENT 'c3_3_uv/c2_3_uv'
+--     ,c3_3_rov                 DOUBLE    COMMENT 'c3_3_uv/c2_3_exp'
+
+--     -- ==================== D链 (session内后续曝光传播) ====================
+--     ,d0                       BIGINT    COMMENT 'D链初始成本: session内后续曝光数'
+--     ,dn_1_uv                  BIGINT    COMMENT 'D链hop1: 回流去重人数'
+--     ,dn_1_pv                  BIGINT    COMMENT 'D链hop1: 回流点击次数'
+--     ,dn_1_exp                 BIGINT    COMMENT 'D链hop1: 回流session曝光数'
+--     ,dn_1_ror                 DOUBLE    COMMENT 'dn_1_uv/exposure_uv'
+--     ,dn_1_rov                 DOUBLE    COMMENT 'dn_1_uv/d0'
+--     ,dn_2_uv                  BIGINT    COMMENT 'D链hop2: 回流去重人数'
+--     ,dn_2_pv                  BIGINT    COMMENT 'D链hop2: 回流点击次数'
+--     ,dn_2_exp                 BIGINT    COMMENT 'D链hop2: 回流session曝光数'
+--     ,dn_2_ror                 DOUBLE    COMMENT 'dn_2_uv/dn_1_uv'
+--     ,dn_2_rov                 DOUBLE    COMMENT 'dn_2_uv/dn_1_exp'
+--     ,dn_3_uv                  BIGINT    COMMENT 'D链hop3: 回流去重人数'
+--     ,dn_3_pv                  BIGINT    COMMENT 'D链hop3: 回流点击次数'
+--     ,dn_3_exp                 BIGINT    COMMENT 'D链hop3: 回流session曝光数'
+--     ,dn_3_ror                 DOUBLE    COMMENT 'dn_3_uv/dn_2_uv'
+--     ,dn_3_rov                 DOUBLE    COMMENT 'dn_3_uv/dn_2_exp'
+--     ,dn_total_uv              BIGINT    COMMENT 'D链合计UV'
+--     ,dn_total_ror             DOUBLE    COMMENT 'dn_total_uv/exposure_uv'
+--     ,dn_total_rov             DOUBLE    COMMENT 'dn_total_uv/d0'
+--     -- D链 depth拆分 hop1
+--     ,d1_1_uv                  BIGINT    COMMENT 'D链d1-hop1: 回流去重人数'
+--     ,d1_1_pv                  BIGINT    COMMENT 'D链d1-hop1: 回流点击次数'
+--     ,d1_1_exp                 BIGINT    COMMENT 'D链d1-hop1: 回流session曝光数'
+--     ,d1_1_ror                 DOUBLE    COMMENT 'd1_1_uv/exposure_uv'
+--     ,d1_1_rov                 DOUBLE    COMMENT 'd1_1_uv/d0'
+--     ,d2_1_uv                  BIGINT    COMMENT 'D链d2-hop1: 回流去重人数'
+--     ,d2_1_pv                  BIGINT    COMMENT 'D链d2-hop1: 回流点击次数'
+--     ,d2_1_exp                 BIGINT    COMMENT 'D链d2-hop1: 回流session曝光数'
+--     ,d2_1_ror                 DOUBLE    COMMENT 'd2_1_uv/d1_1_uv'
+--     ,d2_1_rov                 DOUBLE    COMMENT 'd2_1_uv/d1_1_exp'
+--     ,d3_1_uv                  BIGINT    COMMENT 'D链d3-hop1: 回流去重人数'
+--     ,d3_1_pv                  BIGINT    COMMENT 'D链d3-hop1: 回流点击次数'
+--     ,d3_1_exp                 BIGINT    COMMENT 'D链d3-hop1: 回流session曝光数'
+--     ,d3_1_ror                 DOUBLE    COMMENT 'd3_1_uv/d2_1_uv'
+--     ,d3_1_rov                 DOUBLE    COMMENT 'd3_1_uv/d2_1_exp'
+--     -- D链 depth拆分 hop2
+--     ,d1_2_uv                  BIGINT    COMMENT 'D链d1-hop2: 回流去重人数'
+--     ,d1_2_pv                  BIGINT    COMMENT 'D链d1-hop2: 回流点击次数'
+--     ,d1_2_exp                 BIGINT    COMMENT 'D链d1-hop2: 回流session曝光数'
+--     ,d1_2_ror                 DOUBLE    COMMENT 'd1_2_uv/dn_1_uv'
+--     ,d1_2_rov                 DOUBLE    COMMENT 'd1_2_uv/dn_1_exp'
+--     ,d2_2_uv                  BIGINT    COMMENT 'D链d2-hop2: 回流去重人数'
+--     ,d2_2_pv                  BIGINT    COMMENT 'D链d2-hop2: 回流点击次数'
+--     ,d2_2_exp                 BIGINT    COMMENT 'D链d2-hop2: 回流session曝光数'
+--     ,d2_2_ror                 DOUBLE    COMMENT 'd2_2_uv/d1_2_uv'
+--     ,d2_2_rov                 DOUBLE    COMMENT 'd2_2_uv/d1_2_exp'
+--     ,d3_2_uv                  BIGINT    COMMENT 'D链d3-hop2: 回流去重人数'
+--     ,d3_2_pv                  BIGINT    COMMENT 'D链d3-hop2: 回流点击次数'
+--     ,d3_2_exp                 BIGINT    COMMENT 'D链d3-hop2: 回流session曝光数'
+--     ,d3_2_ror                 DOUBLE    COMMENT 'd3_2_uv/d2_2_uv'
+--     ,d3_2_rov                 DOUBLE    COMMENT 'd3_2_uv/d2_2_exp'
+--     -- D链 depth拆分 hop3
+--     ,d1_3_uv                  BIGINT    COMMENT 'D链d1-hop3: 回流去重人数'
+--     ,d1_3_pv                  BIGINT    COMMENT 'D链d1-hop3: 回流点击次数'
+--     ,d1_3_exp                 BIGINT    COMMENT 'D链d1-hop3: 回流session曝光数'
+--     ,d1_3_ror                 DOUBLE    COMMENT 'd1_3_uv/dn_2_uv'
+--     ,d1_3_rov                 DOUBLE    COMMENT 'd1_3_uv/dn_2_exp'
+--     ,d2_3_uv                  BIGINT    COMMENT 'D链d2-hop3: 回流去重人数'
+--     ,d2_3_pv                  BIGINT    COMMENT 'D链d2-hop3: 回流点击次数'
+--     ,d2_3_exp                 BIGINT    COMMENT 'D链d2-hop3: 回流session曝光数'
+--     ,d2_3_ror                 DOUBLE    COMMENT 'd2_3_uv/d1_3_uv'
+--     ,d2_3_rov                 DOUBLE    COMMENT 'd2_3_uv/d1_3_exp'
+--     ,d3_3_uv                  BIGINT    COMMENT 'D链d3-hop3: 回流去重人数'
+--     ,d3_3_pv                  BIGINT    COMMENT 'D链d3-hop3: 回流点击次数'
+--     ,d3_3_exp                 BIGINT    COMMENT 'D链d3-hop3: 回流session曝光数'
+--     ,d3_3_ror                 DOUBLE    COMMENT 'd3_3_uv/d2_3_uv'
+--     ,d3_3_rov                 DOUBLE    COMMENT 'd3_3_uv/d2_3_exp'
+
+--     -- ==================== 全链路 ====================
+--     ,all_return_n_uv          BIGINT    COMMENT '全链路拉回UV = B + C + D'
+--     ,all_rovn                 DOUBLE    COMMENT 'all_return_n_uv/exposure_cnt'
+--     ,all_ror                  DOUBLE    COMMENT 'all_return_n_uv/exposure_uv'
+--     ,all_rov                  DOUBLE    COMMENT 'all_return_n_uv/exposure_cnt'
+-- )
+-- COMMENT '曝光回流链路CUBE聚合-宽表版 (5维度: 用户分层/小时段/进入品类TOP1/推荐品类TOP10/内容idTOP1)'
+-- ;
+
+
+-- -- =====================================================================
+-- -- INSERT: 从 wide 宽表聚合, CUBE 全维度组合
+-- -- =====================================================================
+
+-- -- SELECT * FROM loghubods.dwd_recsys_alg_exposure_agg_wide_20260209 WHERE dt = '${dt}' ORDER BY exposure_cnt DESC LIMIT 100;
+
+-- INSERT OVERWRITE TABLE loghubods.dwd_recsys_alg_exposure_agg_wide_20260209
+
+WITH
+-- 用户拉活量分层
+t_user_type AS (
+    SELECT  DISTINCT type, openid
+    FROM    loghubods.mid_share_return_people_1year
+    WHERE   dt = TO_CHAR(DATEADD(TO_DATE('${dt}','YYYYMMDD'),-1,'dd'),'YYYYMMDD')
+    AND     type IS NOT NULL
+    AND     type != 'S_ALL'
+    AND     type NOT REGEXP 'R50'
+)
+-- 模型预估分数
+,t_score AS (
+    SELECT  apptype
+            ,videoid
+            ,recommendtraceid
+            ,scoresmap
+            ,sortscore
+    FROM    (
+                SELECT  apptype
+                        ,videoid
+                        ,recommendtraceid
+                        ,scoresmap
+                        ,sortscore
+                        ,ROW_NUMBER() OVER (PARTITION BY apptype,videoid,recommendtraceid) AS rn
+                FROM    loghubods.statistics_log_hour
+                WHERE   dt LIKE '${dt}%'
+                AND     scoresmap IS NOT NULL
+            )
+    WHERE   rn = 1
+)
+-- TOP1 进入内容品类(场): 按回流人数取 TOP1
+,t_top_head_cate AS (
+    SELECT  vt.merge_leve2
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    JOIN    loghubods.video_merge_tag vt ON base.headvideoid = vt.videoid
+    WHERE   base.dt = '${dt}'
+    AND     vt.merge_leve2 IS NOT NULL
+    GROUP BY vt.merge_leve2
+    ORDER BY SUM(CAST(is_return_noself AS BIGINT)) DESC
+    LIMIT   1
+)
+-- TOP10 推荐内容品类(货): 按曝光次数取 TOP10
+,t_top_vid_cate AS (
+    SELECT  vt.merge_leve2
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    JOIN    loghubods.video_merge_tag vt ON base.vid = vt.videoid
+    WHERE   base.dt = '${dt}'
+    AND     vt.merge_leve2 IS NOT NULL
+    GROUP BY vt.merge_leve2
+    ORDER BY COUNT(1) DESC
+    LIMIT   10
+)
+-- TOP1 内容id(货): 每品类按曝光次数取 TOP1 (曝光>10w)
+,t_top_vid AS (
+    SELECT  merge_leve2, vid
+    FROM    (
+                SELECT  vt.merge_leve2
+                        ,base.vid
+                        ,COUNT(1)                                                               AS exp_cnt
+                        ,ROW_NUMBER() OVER (PARTITION BY vt.merge_leve2 ORDER BY COUNT(1) DESC) AS rk
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+                JOIN    loghubods.video_merge_tag vt ON base.vid = vt.videoid
+                WHERE   base.dt = '${dt}'
+                AND     vt.merge_leve2 IS NOT NULL
+                GROUP BY vt.merge_leve2, base.vid
+                HAVING  exp_cnt > 100000
+            )
+    WHERE   rk <= 1
+)
+-- 宽表
+,t_wide AS (
+    SELECT  base.*
+            ,CASE WHEN e.type IS NULL OR e.type = 'R_0'                       THEN 'R0&新用户'
+                  WHEN e.type IN ('R_1','R_2_10','R_10_50')                    THEN 'R1-50'
+                  WHEN e.type IN ('R_50_100','R_100_180','R_180_330')           THEN 'R_180_330'
+                  ELSE e.type
+             END AS user_type
+            ,CASE WHEN th.merge_leve2 IS NOT NULL THEN vt_head.merge_leve2 ELSE '其他' END AS head_merge_leve2
+            ,CASE WHEN tv.merge_leve2 IS NOT NULL THEN vt_vid.merge_leve2  ELSE '其他' END AS vid_merge_leve2
+            ,CASE WHEN ti.vid IS NOT NULL          THEN base.vid           ELSE '其他' END AS vid_id
+            ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
+            ,1.22*POW(CAST(GET_JSON_OBJECT(e1.scoresmap,'$.NorXGBScore') AS DOUBLE),1.15) AS rosn_pred
+            ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_pred_origin
+            ,e1.sortscore
+            ,CASE
+                WHEN CAST(hh AS INT) BETWEEN 0  AND 3  THEN '00-03'
+                WHEN CAST(hh AS INT) BETWEEN 4  AND 7  THEN '04-07'
+                WHEN CAST(hh AS INT) BETWEEN 8  AND 11 THEN '08-11'
+                WHEN CAST(hh AS INT) BETWEEN 12 AND 15 THEN '12-15'
+                WHEN CAST(hh AS INT) BETWEEN 16 AND 19 THEN '16-19'
+                WHEN CAST(hh AS INT) BETWEEN 20 AND 23 THEN '20-23'
+                ELSE '-'
+             END AS hh_bucket
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    LEFT JOIN t_user_type e
+    ON      SUBSTRING_INDEX(base.mid,'weixin_openid_',-1) = e.openid
+    LEFT JOIN loghubods.video_merge_tag vt_head
+    ON      base.headvideoid = vt_head.videoid
+    LEFT JOIN loghubods.video_merge_tag vt_vid
+    ON      base.vid = vt_vid.videoid
+    LEFT JOIN t_score e1
+    ON      base.apptype = e1.apptype
+    AND     base.vid = e1.videoid
+    AND     base.recomtraceid = e1.recommendtraceid
+    LEFT JOIN t_top_head_cate th
+    ON      vt_head.merge_leve2 = th.merge_leve2
+    LEFT JOIN t_top_vid_cate tv
+    ON      vt_vid.merge_leve2 = tv.merge_leve2
+    LEFT JOIN t_top_vid ti
+    ON      base.vid = ti.vid
+    WHERE   base.dt = '${dt}'
+)
+
+SELECT
+        -- ==================== 维度列 ====================
+        '${dt}'                                                                                     AS dt
+        ,CASE WHEN GROUPING(user_type) = 1 THEN 'SUM' ELSE NVL(user_type, 'SUM') END              AS user_type
+        ,CASE WHEN GROUPING(hh_bucket) = 1 THEN 'SUM' ELSE NVL(hh_bucket, 'SUM') END              AS hh_bucket
+        ,CASE WHEN GROUPING(head_merge_leve2) = 1 THEN 'SUM' ELSE NVL(head_merge_leve2, 'SUM') END AS head_merge_leve2
+        ,CASE WHEN GROUPING(vid_merge_leve2) = 1 THEN 'SUM' ELSE NVL(vid_merge_leve2, 'SUM') END   AS vid_merge_leve2
+        ,CASE WHEN GROUPING(vid_id) = 1 THEN 'SUM' ELSE NVL(vid_id, 'SUM') END                     AS vid_id
+
+        -- ==================== 基础流量 ====================
+        ,COUNT(1)                                                                       AS exposure_cnt
+        ,COUNT(DISTINCT mid)                                                            AS exposure_uv
+        ,COUNT(DISTINCT vid)                                                            AS vid_cnt
+        ,ROUND(COUNT(1) / COUNT(DISTINCT mid), 4)                                      AS exposure_per_user
+
+        -- ==================== 分享 & 回流漏斗 ====================
+        ,SUM(CAST(is_share AS BIGINT))                                                  AS share_exposure_cnt
+        ,SUM(CAST(share_cnt AS BIGINT))                                                 AS share_cnt
+        ,SUM(CAST(is_return_n AS BIGINT))                                               AS return_exposure_cnt
+        ,SUM(CAST(is_return_noself AS BIGINT))                                          AS return_exposure_cnt_noself
+        ,SUM(CAST(return_n_uv AS BIGINT))                                               AS return_uv
+        ,SUM(CAST(return_n_uv_noself AS BIGINT))                                        AS return_uv_noself
+        ,ROUND(COALESCE(SUM(CAST(is_share AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                                            AS share_rate
+        ,ROUND(COALESCE(SUM(CAST(is_return_n AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                                         AS return_rate
+        ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                                    AS return_rate_noself
+        ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(CAST(SUM(CAST(is_share AS BIGINT)) AS DOUBLE), 0), 0), 6)               AS share_return_rate
+
+        -- ==================== 模型预估: STR (曝光→非自身回流概率) ====================
+        ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6)  AS str_real
+        ,ROUND(COALESCE(SUM(str_pred) / NULLIF(COUNT(1), 0), 0), 6)                          AS str_pred
+        ,ROUND(
+            (SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
+            / NULLIF(SUM(str_pred) / NULLIF(COUNT(1), 0), 0)
+        , 4)                                                                                   AS str_copc
+        ,ROUND(AVG(ABS(str_pred - CAST(is_return_noself AS BIGINT))), 6)                      AS str_mae
+        ,ROUND(VARIANCE(str_pred - CAST(is_return_noself AS BIGINT)), 6)                      AS str_var
+
+        -- ==================== 模型预估: ROSN (条件回流UV, 非自身) ====================
+        ,ROUND(COALESCE(
+            SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0)
+        , 0), 6)                                                                               AS rosn_real
+        ,ROUND(COALESCE(SUM(CASE WHEN CAST(is_return_noself AS BIGINT) = 1 THEN rosn_pred END) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0), 6)  AS rosn_pred
+        ,ROUND(
+            (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0))
+            / NULLIF(SUM(CASE WHEN CAST(is_return_noself AS BIGINT) = 1 THEN rosn_pred END) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0)
+        , 4)                                                                                   AS rosn_copc
+        ,ROUND(AVG(rosn_pred_origin), 6)                                                      AS rosn_pred_origin
+        ,ROUND(AVG(
+            CASE WHEN CAST(is_return_noself AS BIGINT) = 1
+                 THEN ABS(rosn_pred - CAST(return_n_uv_noself AS BIGINT))
+            END
+        ), 6)                                                                                  AS rosn_mae
+        ,ROUND(VARIANCE(
+            CASE WHEN CAST(is_return_noself AS BIGINT) = 1
+                 THEN rosn_pred - CAST(return_n_uv_noself AS BIGINT)
+            END
+        ), 6)                                                                                  AS rosn_var
+
+        -- ==================== 模型预估: ROVN (STR×ROSN) ====================
+        ,ROUND(COALESCE(SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6)  AS rovn_real
+        ,ROUND(AVG(str_pred * rosn_pred), 6)                                                    AS rovn_pred
+        ,ROUND(
+            (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
+            / NULLIF(AVG(str_pred * rosn_pred), 0)
+        , 4)                                                                                     AS rovn_copc
+        ,ROUND(AVG(ABS(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT))), 6)            AS rovn_mae
+        ,ROUND(VARIANCE(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT)), 6)        AS rovn_var
+        ,ROUND(AVG(CAST(sortscore AS DOUBLE)), 6)                                               AS sortscore_avg
+
+        -- ==================== B链 ====================
+        ,SUM(CAST(bn_uv AS BIGINT))                                                     AS bn_uv
+        ,SUM(CAST(bn_pv AS BIGINT))                                                     AS bn_pv
+        ,SUM(CAST(bn_exp AS BIGINT))                                                    AS bn_exp
+        ,ROUND(COALESCE(SUM(CAST(bn_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)               AS bn_ror
+        ,ROUND(COALESCE(SUM(CAST(bn_uv AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                          AS bn_rov
+        ,SUM(CAST(b1_uv AS BIGINT))                                                     AS b1_uv
+        ,SUM(CAST(b1_pv AS BIGINT))                                                     AS b1_pv
+        ,SUM(CAST(b1_exp AS BIGINT))                                                    AS b1_exp
+        ,ROUND(COALESCE(SUM(CAST(b1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)               AS b1_ror
+        ,ROUND(COALESCE(SUM(CAST(b1_uv AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                          AS b1_rov
+        ,SUM(CAST(b2_uv AS BIGINT))                                                     AS b2_uv
+        ,SUM(CAST(b2_pv AS BIGINT))                                                     AS b2_pv
+        ,SUM(CAST(b2_exp AS BIGINT))                                                    AS b2_exp
+        ,ROUND(COALESCE(SUM(CAST(b2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS b2_ror
+        ,ROUND(COALESCE(SUM(CAST(b2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)       AS b2_rov
+        ,SUM(CAST(b3_uv AS BIGINT))                                                     AS b3_uv
+        ,SUM(CAST(b3_pv AS BIGINT))                                                     AS b3_pv
+        ,SUM(CAST(b3_exp AS BIGINT))                                                    AS b3_exp
+        ,ROUND(COALESCE(SUM(CAST(b3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS b3_ror
+        ,ROUND(COALESCE(SUM(CAST(b3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)       AS b3_rov
+
+        -- ==================== C链 (全量depth, 按hop) ====================
+        ,SUM(CAST(cn_1_uv AS BIGINT))                                                   AS cn_1_uv
+        ,SUM(CAST(cn_1_pv AS BIGINT))                                                   AS cn_1_pv
+        ,SUM(CAST(cn_1_exp AS BIGINT))                                                  AS cn_1_exp
+        ,ROUND(COALESCE(SUM(CAST(cn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)      AS cn_1_ror
+        ,ROUND(COALESCE(SUM(CAST(cn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)     AS cn_1_rov
+        ,SUM(CAST(cn_2_uv AS BIGINT))                                                   AS cn_2_uv
+        ,SUM(CAST(cn_2_pv AS BIGINT))                                                   AS cn_2_pv
+        ,SUM(CAST(cn_2_exp AS BIGINT))                                                  AS cn_2_exp
+        ,ROUND(COALESCE(SUM(CAST(cn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS cn_2_ror
+        ,ROUND(COALESCE(SUM(CAST(cn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS cn_2_rov
+        ,SUM(CAST(cn_3_uv AS BIGINT))                                                   AS cn_3_uv
+        ,SUM(CAST(cn_3_pv AS BIGINT))                                                   AS cn_3_pv
+        ,SUM(CAST(cn_3_exp AS BIGINT))                                                  AS cn_3_exp
+        ,ROUND(COALESCE(SUM(CAST(cn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS cn_3_ror
+        ,ROUND(COALESCE(SUM(CAST(cn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS cn_3_rov
+        ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))  AS cn_total_uv
+        ,ROUND(COALESCE(
+            (SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)))
+            / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS cn_total_ror
+        ,ROUND(COALESCE(
+            (SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)))
+            / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS cn_total_rov
+
+        -- ==================== C链 (按depth拆分) ====================
+        -- hop1
+        ,SUM(CAST(c1_1_uv AS BIGINT))                                                     AS c1_1_uv
+        ,SUM(CAST(c1_1_pv AS BIGINT))                                                     AS c1_1_pv
+        ,SUM(CAST(c1_1_exp AS BIGINT))                                                    AS c1_1_exp
+        ,ROUND(COALESCE(SUM(CAST(c1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)      AS c1_1_ror
+        ,ROUND(COALESCE(SUM(CAST(c1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)     AS c1_1_rov
+        ,SUM(CAST(c2_1_uv AS BIGINT))                                                     AS c2_1_uv
+        ,SUM(CAST(c2_1_pv AS BIGINT))                                                     AS c2_1_pv
+        ,SUM(CAST(c2_1_exp AS BIGINT))                                                    AS c2_1_exp
+        ,ROUND(COALESCE(SUM(CAST(c2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c2_1_ror
+        ,ROUND(COALESCE(SUM(CAST(c2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c2_1_rov
+        ,SUM(CAST(c3_1_uv AS BIGINT))                                                     AS c3_1_uv
+        ,SUM(CAST(c3_1_pv AS BIGINT))                                                     AS c3_1_pv
+        ,SUM(CAST(c3_1_exp AS BIGINT))                                                    AS c3_1_exp
+        ,ROUND(COALESCE(SUM(CAST(c3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c3_1_ror
+        ,ROUND(COALESCE(SUM(CAST(c3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c3_1_rov
+        -- hop2
+        ,SUM(CAST(c1_2_uv AS BIGINT))                                                     AS c1_2_uv
+        ,SUM(CAST(c1_2_pv AS BIGINT))                                                     AS c1_2_pv
+        ,SUM(CAST(c1_2_exp AS BIGINT))                                                    AS c1_2_exp
+        ,ROUND(COALESCE(SUM(CAST(c1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c1_2_ror
+        ,ROUND(COALESCE(SUM(CAST(c1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c1_2_rov
+        ,SUM(CAST(c2_2_uv AS BIGINT))                                                     AS c2_2_uv
+        ,SUM(CAST(c2_2_pv AS BIGINT))                                                     AS c2_2_pv
+        ,SUM(CAST(c2_2_exp AS BIGINT))                                                    AS c2_2_exp
+        ,ROUND(COALESCE(SUM(CAST(c2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c2_2_ror
+        ,ROUND(COALESCE(SUM(CAST(c2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c2_2_rov
+        ,SUM(CAST(c3_2_uv AS BIGINT))                                                     AS c3_2_uv
+        ,SUM(CAST(c3_2_pv AS BIGINT))                                                     AS c3_2_pv
+        ,SUM(CAST(c3_2_exp AS BIGINT))                                                    AS c3_2_exp
+        ,ROUND(COALESCE(SUM(CAST(c3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c3_2_ror
+        ,ROUND(COALESCE(SUM(CAST(c3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c3_2_rov
+        -- hop3
+        ,SUM(CAST(c1_3_uv AS BIGINT))                                                     AS c1_3_uv
+        ,SUM(CAST(c1_3_pv AS BIGINT))                                                     AS c1_3_pv
+        ,SUM(CAST(c1_3_exp AS BIGINT))                                                    AS c1_3_exp
+        ,ROUND(COALESCE(SUM(CAST(c1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c1_3_ror
+        ,ROUND(COALESCE(SUM(CAST(c1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c1_3_rov
+        ,SUM(CAST(c2_3_uv AS BIGINT))                                                     AS c2_3_uv
+        ,SUM(CAST(c2_3_pv AS BIGINT))                                                     AS c2_3_pv
+        ,SUM(CAST(c2_3_exp AS BIGINT))                                                    AS c2_3_exp
+        ,ROUND(COALESCE(SUM(CAST(c2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c2_3_ror
+        ,ROUND(COALESCE(SUM(CAST(c2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c2_3_rov
+        ,SUM(CAST(c3_3_uv AS BIGINT))                                                     AS c3_3_uv
+        ,SUM(CAST(c3_3_pv AS BIGINT))                                                     AS c3_3_pv
+        ,SUM(CAST(c3_3_exp AS BIGINT))                                                    AS c3_3_exp
+        ,ROUND(COALESCE(SUM(CAST(c3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c3_3_ror
+        ,ROUND(COALESCE(SUM(CAST(c3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c3_3_rov
+
+        -- ==================== D链 (全量depth, 按hop) ====================
+        ,SUM(CAST(d0 AS BIGINT))                                                        AS d0
+        ,SUM(CAST(dn_1_uv AS BIGINT))                                                   AS dn_1_uv
+        ,SUM(CAST(dn_1_pv AS BIGINT))                                                   AS dn_1_pv
+        ,SUM(CAST(dn_1_exp AS BIGINT))                                                  AS dn_1_exp
+        ,ROUND(COALESCE(SUM(CAST(dn_1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)            AS dn_1_ror
+        ,ROUND(COALESCE(SUM(CAST(dn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS dn_1_rov
+        ,SUM(CAST(dn_2_uv AS BIGINT))                                                   AS dn_2_uv
+        ,SUM(CAST(dn_2_pv AS BIGINT))                                                   AS dn_2_pv
+        ,SUM(CAST(dn_2_exp AS BIGINT))                                                  AS dn_2_exp
+        ,ROUND(COALESCE(SUM(CAST(dn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS dn_2_ror
+        ,ROUND(COALESCE(SUM(CAST(dn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS dn_2_rov
+        ,SUM(CAST(dn_3_uv AS BIGINT))                                                   AS dn_3_uv
+        ,SUM(CAST(dn_3_pv AS BIGINT))                                                   AS dn_3_pv
+        ,SUM(CAST(dn_3_exp AS BIGINT))                                                  AS dn_3_exp
+        ,ROUND(COALESCE(SUM(CAST(dn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS dn_3_ror
+        ,ROUND(COALESCE(SUM(CAST(dn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS dn_3_rov
+        ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))  AS dn_total_uv
+        ,ROUND(COALESCE(
+            (SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)))
+            / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS dn_total_ror
+        ,ROUND(COALESCE(
+            (SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)))
+            / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS dn_total_rov
+
+        -- ==================== D链 (按depth拆分) ====================
+        -- hop1
+        ,SUM(CAST(d1_1_uv AS BIGINT))                                                     AS d1_1_uv
+        ,SUM(CAST(d1_1_pv AS BIGINT))                                                     AS d1_1_pv
+        ,SUM(CAST(d1_1_exp AS BIGINT))                                                    AS d1_1_exp
+        ,ROUND(COALESCE(SUM(CAST(d1_1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)            AS d1_1_ror
+        ,ROUND(COALESCE(SUM(CAST(d1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS d1_1_rov
+        ,SUM(CAST(d2_1_uv AS BIGINT))                                                     AS d2_1_uv
+        ,SUM(CAST(d2_1_pv AS BIGINT))                                                     AS d2_1_pv
+        ,SUM(CAST(d2_1_exp AS BIGINT))                                                    AS d2_1_exp
+        ,ROUND(COALESCE(SUM(CAST(d2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d2_1_ror
+        ,ROUND(COALESCE(SUM(CAST(d2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d2_1_rov
+        ,SUM(CAST(d3_1_uv AS BIGINT))                                                     AS d3_1_uv
+        ,SUM(CAST(d3_1_pv AS BIGINT))                                                     AS d3_1_pv
+        ,SUM(CAST(d3_1_exp AS BIGINT))                                                    AS d3_1_exp
+        ,ROUND(COALESCE(SUM(CAST(d3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d3_1_ror
+        ,ROUND(COALESCE(SUM(CAST(d3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d3_1_rov
+        -- hop2
+        ,SUM(CAST(d1_2_uv AS BIGINT))                                                     AS d1_2_uv
+        ,SUM(CAST(d1_2_pv AS BIGINT))                                                     AS d1_2_pv
+        ,SUM(CAST(d1_2_exp AS BIGINT))                                                    AS d1_2_exp
+        ,ROUND(COALESCE(SUM(CAST(d1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d1_2_ror
+        ,ROUND(COALESCE(SUM(CAST(d1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d1_2_rov
+        ,SUM(CAST(d2_2_uv AS BIGINT))                                                     AS d2_2_uv
+        ,SUM(CAST(d2_2_pv AS BIGINT))                                                     AS d2_2_pv
+        ,SUM(CAST(d2_2_exp AS BIGINT))                                                    AS d2_2_exp
+        ,ROUND(COALESCE(SUM(CAST(d2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d2_2_ror
+        ,ROUND(COALESCE(SUM(CAST(d2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d2_2_rov
+        ,SUM(CAST(d3_2_uv AS BIGINT))                                                     AS d3_2_uv
+        ,SUM(CAST(d3_2_pv AS BIGINT))                                                     AS d3_2_pv
+        ,SUM(CAST(d3_2_exp AS BIGINT))                                                    AS d3_2_exp
+        ,ROUND(COALESCE(SUM(CAST(d3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d3_2_ror
+        ,ROUND(COALESCE(SUM(CAST(d3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d3_2_rov
+        -- hop3
+        ,SUM(CAST(d1_3_uv AS BIGINT))                                                     AS d1_3_uv
+        ,SUM(CAST(d1_3_pv AS BIGINT))                                                     AS d1_3_pv
+        ,SUM(CAST(d1_3_exp AS BIGINT))                                                    AS d1_3_exp
+        ,ROUND(COALESCE(SUM(CAST(d1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d1_3_ror
+        ,ROUND(COALESCE(SUM(CAST(d1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d1_3_rov
+        ,SUM(CAST(d2_3_uv AS BIGINT))                                                     AS d2_3_uv
+        ,SUM(CAST(d2_3_pv AS BIGINT))                                                     AS d2_3_pv
+        ,SUM(CAST(d2_3_exp AS BIGINT))                                                    AS d2_3_exp
+        ,ROUND(COALESCE(SUM(CAST(d2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d2_3_ror
+        ,ROUND(COALESCE(SUM(CAST(d2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d2_3_rov
+        ,SUM(CAST(d3_3_uv AS BIGINT))                                                     AS d3_3_uv
+        ,SUM(CAST(d3_3_pv AS BIGINT))                                                     AS d3_3_pv
+        ,SUM(CAST(d3_3_exp AS BIGINT))                                                    AS d3_3_exp
+        ,ROUND(COALESCE(SUM(CAST(d3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d3_3_ror
+        ,ROUND(COALESCE(SUM(CAST(d3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d3_3_rov
+
+        -- ==================== 全链路 ====================
+        ,SUM(CAST(return_n_uv_noself AS BIGINT))
+            + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+            + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+                                                                                        AS all_return_n_uv
+        ,ROUND(COALESCE(
+            (   SUM(CAST(return_n_uv_noself AS BIGINT))
+              + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+              + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+            ) / NULLIF(COUNT(1), 0)
+        , 0), 6)                                                                        AS all_rovn
+        ,ROUND(COALESCE(
+            (   SUM(CAST(return_n_uv_noself AS BIGINT))
+              + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+              + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+            ) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0)
+        , 0), 6)                                                                        AS all_rov
+        ,ROUND(COALESCE(
+            (   SUM(CAST(return_n_uv_noself AS BIGINT))
+              + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+              + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+            ) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0)
+        , 0), 6)                                                                        AS all_ror
+
+
+-- =====================================================================
+-- FROM + GROUP BY CUBE
+-- =====================================================================
+
+FROM    t_wide
+
+GROUP BY CUBE(
+            user_type
+            ,hh_bucket
+            ,head_merge_leve2
+            ,vid_merge_leve2
+            ,vid_id
+        )
+
+ORDER BY exposure_cnt DESC
+;

+ 118 - 0
table_gen/tmp.md

@@ -0,0 +1,118 @@
+
+ 为 B/C/D/全链路的每个 _uv 级别添加:
+ - rov = 带回UV / 成本曝光数
+ - ror = 带回UV / 成本人数
+
+ 分母 = 该级别的输入来源(上一级的输出),hop 和 depth 都逐级递推。
+
+ 数据流 & 分母推导
+
+ 逐级传播逻辑
+
+ depth维度 (分享链深度, 逐级递推):
+   曝光 → share → depth=1点击(b1) → reshare → depth=2点击(b2) → reshare → depth=3点击(b3)
+
+ hop维度 (二次传播跳数, 逐级递推):
+   B chain output → [C hop1] → C hop1 output → [C hop2] → C hop2 output → [C hop3]
+
+ 完整分母映射
+ ┌─────────────────────┬─────────────────────┬─────────────────────┐
+ │        字段         │ rov 分母 (成本曝光) │ ror 分母 (成本人数) │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ B链                 │                     │                     │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ bn (全量)           │ COUNT(1)            │ COUNT(DISTINCT mid) │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ b1 (depth=1)        │ COUNT(1)            │ COUNT(DISTINCT mid) │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ b2 (depth=2)        │ SUM(b1_exp)         │ SUM(b1_uv)          │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ b3 (depth=3)        │ SUM(b2_exp)         │ SUM(b2_uv)          │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ C链全量depth        │                     │                     │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ cn_1 (hop1)         │ SUM(bn_exp)         │ SUM(bn_uv)          │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ cn_2 (hop2)         │ SUM(cn_1_exp)       │ SUM(cn_1_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ cn_3 (hop3)         │ SUM(cn_2_exp)       │ SUM(cn_2_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ cn (合计)           │ SUM(bn_exp)         │ SUM(bn_uv)          │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ C链depth拆分 (hop1) │                     │                     │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ c1_1 (d1-hop1)      │ SUM(bn_exp)         │ SUM(bn_uv)          │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ c2_1 (d2-hop1)      │ SUM(c1_1_exp)       │ SUM(c1_1_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ c3_1 (d3-hop1)      │ SUM(c2_1_exp)       │ SUM(c2_1_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ C链depth拆分 (hop2) │                     │                     │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ c1_2 (d1-hop2)      │ SUM(cn_1_exp)       │ SUM(cn_1_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ c2_2 (d2-hop2)      │ SUM(c1_2_exp)       │ SUM(c1_2_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ c3_2 (d3-hop2)      │ SUM(c2_2_exp)       │ SUM(c2_2_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ C链depth拆分 (hop3) │                     │                     │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ c1_3 (d1-hop3)      │ SUM(cn_2_exp)       │ SUM(cn_2_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ c2_3 (d2-hop3)      │ SUM(c1_3_exp)       │ SUM(c1_3_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ c3_3 (d3-hop3)      │ SUM(c2_3_exp)       │ SUM(c2_3_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ D链全量depth        │                     │                     │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ dn_1 (hop1)         │ SUM(d0)             │ COUNT(DISTINCT mid) │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ dn_2 (hop2)         │ SUM(dn_1_exp)       │ SUM(dn_1_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ dn_3 (hop3)         │ SUM(dn_2_exp)       │ SUM(dn_2_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ dn (合计)           │ SUM(d0)             │ COUNT(DISTINCT mid) │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ D链depth拆分 (hop1) │                     │                     │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ d1_1 (d1-hop1)      │ SUM(d0)             │ COUNT(DISTINCT mid) │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ d2_1 (d2-hop1)      │ SUM(d1_1_exp)       │ SUM(d1_1_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ d3_1 (d3-hop1)      │ SUM(d2_1_exp)       │ SUM(d2_1_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ D链depth拆分 (hop2) │                     │                     │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ d1_2 (d1-hop2)      │ SUM(dn_1_exp)       │ SUM(dn_1_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ d2_2 (d2-hop2)      │ SUM(d1_2_exp)       │ SUM(d1_2_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ d3_2 (d3-hop2)      │ SUM(d2_2_exp)       │ SUM(d2_2_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ D链depth拆分 (hop3) │                     │                     │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ d1_3 (d1-hop3)      │ SUM(dn_2_exp)       │ SUM(dn_2_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ d2_3 (d2-hop3)      │ SUM(d1_3_exp)       │ SUM(d1_3_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ d3_3 (d3-hop3)      │ SUM(d2_3_exp)       │ SUM(d2_3_uv)        │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ 全链路              │                     │                     │
+ ├─────────────────────┼─────────────────────┼─────────────────────┤
+ │ all                 │ COUNT(1)            │ COUNT(DISTINCT mid) │
+ └─────────────────────┴─────────────────────┴─────────────────────┘
+ 规则总结:
+ - depth 维度: depth=1 的成本=该hop的入口; depth=N+1 的成本=depth=N 的输出
+ - hop 维度: hop1 的成本=链路入口; hop N+1 的成本=hop N 全量depth的输出
+ - D链 hop1 没有独立的成本人数(d0是同一用户session内曝光), 用 exposure_uv
+
+ 实施步骤
+
+ 1. 输出 CSV 到 table_gen/rov_ror_fields.csv (61 字段的完整清单)
+ 2. DDL: 在 CREATE TABLE 每个链路块末尾追加 DOUBLE 字段
+ 3. SELECT: 追加 ROUND(COALESCE(numerator / NULLIF(denominator, 0), 0), 6)
+ 4. 验证: grep 计数确保 DDL 和 SELECT 字段数一致
+
+ 文件
+
+ - loghubods.dwd_recsys_alg_exposure_agg_wide_20260209.sql

+ 347 - 0
tables/loghubods/loghubods.dwd_recsys_alg_exposure_base_20250108.md

@@ -0,0 +1,347 @@
+# dwd_recsys_alg_exposure_base_20250108 表逻辑说明
+
+## 一、表定位
+
+**曝光-分享-回流** 链路分析表,在曝光粒度上统计分享和回流指标。
+
+---
+
+## 二、整体流程(4 步)
+
+```
+数据源 → 分享关联曝光 → 分享关联回流 → 汇总输出
+```
+
+---
+
+## 三、每步详解
+
+### Step 1: 数据准备
+
+| CTE | 数据源 | 说明 |
+|-----|--------|------|
+| `t_return` | `user_share_log_flow` (topic=click) | 回流点击数据 |
+| `t_share_from_sharelog` | `user_share_log_flow` (topic=share) | 分享行为数据 |
+| `t_exposure` | `dwd_recsys_alg_exposure_base_view_20250402` | 曝光数据 |
+
+**贡献字段**:
+- 曝光维度:apptype, uid, mid, vid, sessionid, subsessionid
+- 场景信息:pagesource, recommendlogvo, abcode, recommendpagetype, recomtraceid, headvideoid, rootsourceid, hotsencetype
+- 流量池:flowpool, level
+- 设备信息:clientip, machineinfo_brand/model/system/wechatversion/sdkversion
+- 地理信息:province, city
+- 时间:ts
+
+---
+
+### Step 2: 分享 → 曝光关联
+
+**目的**:找到每次分享对应的曝光记录
+
+**关联方向**:分享 → 曝光(多对一)
+
+**关联 Key(6 级 Fallback)**:
+
+| 级别 | 关联条件 | 说明 |
+|------|---------|------|
+| 1 | apptype + mid + vid + **subsessionid** + pagesource + ts>= | 最严格 |
+| 2 | apptype + mid + vid + **sessionid** + pagesource + ts>= | 放宽会话 |
+| 3 | apptype + mid + vid + **subsessionid** + pagesource | 去掉时间 |
+| 4 | apptype + mid + vid + **sessionid** + pagesource | 去掉时间 |
+| 5 | apptype + mid + vid + **subsessionid** | 去掉 pagesource |
+| 6 | apptype + mid + vid + **sessionid** | 最宽松 |
+
+**必须 Key**:`apptype + mid + vid`
+
+**贡献字段**:
+- `is_share` = 该曝光是否产生分享
+- `share_cnt` = 该曝光产生的分享次数
+
+---
+
+### Step 3: 分享 ← 回流关联
+
+**目的**:统计每次分享带来的回流
+
+**关联方向**:回流 → 分享(多对一)
+
+**关联 Key**:
+
+| 回流类型 | 关联条件 | 说明 |
+|---------|---------|------|
+| 一级回流 | `shareid` + vid + apptype | 直接分享带来的回流 |
+| N级回流 | `rootshareid` + vid + apptype | 裂变链路所有回流 |
+
+**贡献字段**:
+
+| 字段 | 说明 |
+|------|------|
+| `is_return_1` | 是否有一级回流 |
+| `return_1_pv` | 一级回流 PV |
+| `return_1_uv` | 一级回流 UV |
+| `return_1_mids` | 一级回流用户列表 |
+| `is_return_n` | 是否有 N 级回流 |
+| `return_n_pv` | N 级回流 PV |
+| `return_n_uv` | N 级回流 UV |
+| `return_n_mids` | N 级回流用户列表 |
+| `is_return_noself` | 是否有非自己的一级回流 |
+| `return_1_uv_noself` | 排除自己的一级回流 UV |
+| `return_1_mids_noself` | 排除自己的一级回流用户列表 |
+| `is_return_n_noself` | 是否有非自己的 N 级回流 |
+| `return_n_uv_noself` | 排除自己的 N 级回流 UV |
+| `return_n_mids_noself` | 排除自己的 N 级回流用户列表 |
+| `new_exposure_cnt` | 回流带来的新曝光数 |
+
+**注**:`return_n` 包含 `return_1`(rootshareid = shareid 时)
+
+---
+
+### Step 4: 汇总输出
+
+**关联 Key**:`exposure_id`
+
+```sql
+t_exposure
+LEFT JOIN t_share_with_label_group
+ON exposure_id
+```
+
+**派生字段**:
+
+| 字段 | 计算逻辑 |
+|------|---------|
+| `pagesource_new` | pagesource 分类映射(回流后沉浸页、详情页、首页feed等) |
+| `extend` | JSON 扩展字段(animationSceneType, extParams, group_name 等) |
+
+---
+
+## 四、关联关系总图
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                                                                         │
+│   t_exposure                t_share                    t_return         │
+│   (曝光)                    (分享)                     (回流)           │
+│       │                        │                          │             │
+│       │                        │                          │             │
+│       │◄───────────────────────┤                          │             │
+│       │  Step2: 分享→曝光       │◄─────────────────────────┤             │
+│       │  key: apptype+mid+vid  │  Step3: 回流→分享         │             │
+│       │       +subsession/     │  key: shareid+vid+apptype│             │
+│       │        session         │       rootshareid+vid    │             │
+│       │       +pagesource+ts   │       +apptype           │             │
+│       │                        │                          │             │
+│       ▼                        ▼                          ▼             │
+│  ┌─────────────────────────────────────────────────────────────────┐    │
+│  │                    最终输出字段                                  │    │
+│  ├─────────────────────────────────────────────────────────────────┤    │
+│  │  曝光字段: apptype,uid,mid,vid,sessionid,pagesource...          │    │
+│  │  分享字段: is_share, share_cnt                                  │    │
+│  │  回流字段: is_return_1, return_1_pv/uv/mids                     │    │
+│  │           is_return_n, return_n_pv/uv/mids                      │    │
+│  │           *_noself (排除分享者自己)                              │    │
+│  │           new_exposure_cnt                                      │    │
+│  │  派生字段: pagesource_new, extend                               │    │
+│  └─────────────────────────────────────────────────────────────────┘    │
+│                                                                         │
+└─────────────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## 五、核心 Key 汇总
+
+| 步骤 | 关联 Key | 必须 Key |
+|------|---------|---------|
+| 分享→曝光 | apptype + mid + vid + subsessionid/sessionid + pagesource + ts | apptype + mid + vid |
+| 回流→分享(1级) | shareid + vid + apptype | shareid + vid + apptype |
+| 回流→分享(N级) | rootshareid + vid + apptype | rootshareid + vid + apptype |
+| 汇总→曝光 | exposure_id | exposure_id |
+
+---
+
+## 六、附:曝光→回流关联(new_exposure_cnt 计算)
+
+**目的**:统计每次回流带来多少新曝光
+
+**关联方向**:曝光 → 回流(多对一)
+
+**关联 Key(4 级 Fallback)**:
+
+| 级别 | 关联条件 |
+|------|---------|
+| 1 | mid + headvideoid + subsessionid |
+| 2 | mid + headvideoid + sessionid |
+| 3 | mid + subsessionid |
+| 4 | mid + sessionid |
+
+**必须 Key**:`mid`
+
+**贡献字段**:`new_exposure_cnt`(回流后用户浏览了多少新内容)
+
+---
+
+## 七、重要洞见
+
+### 1. new_exposure_cnt 包含整条裂变链路
+
+`new_exposure_cnt` **包括所有裂变用户的新曝光**,不只是直接回流用户。
+
+```
+A 分享视频 V(shareid_A = rootshareid)
+    │
+    ├─ B 回流,浏览了 5 个视频 → new_exposure_cnt = 5
+    │      │
+    │      └─ B 再分享视频 V
+    │              │
+    │              └─ C 回流,浏览了 3 个视频 → new_exposure_cnt = 3
+    │
+    └─ 最终汇总:SUM(new_exposure_cnt) = 5 + 3 = 8
+```
+
+**原因**:在 `t_share_with_label` 中按 `rootshareid` 分组后 `SUM(new_exposure_cnt)`。
+
+---
+
+### 2. return_n 只统计同一视频的裂变
+
+`return_n_uv` **不包括**用户往下滑分享其他视频带回的人。
+
+```sql
+-- 关联条件
+ON  a.shareid = c.rootshareid
+AND a.vid = c.vid              -- vid 必须匹配
+```
+
+**场景说明**:
+
+| 场景 | 是否计入 return_n |
+|------|------------------|
+| 直接点击分享链接回流看视频 V | ✓ |
+| 裂变用户继续分享**同一视频 V** 带回的人 | ✓ |
+| 裂变用户往下滑分享**其他视频 W** 带回的人 | ✗ |
+
+```
+A 分享视频 V
+    │
+    ├─ B 点击回流看视频 V ─────────────────► return_n ✓
+    │      │
+    │      ├─ B 往下滑分享视频 W
+    │      │      └─ D 点击回流 ──────────► return_n ✗(vid 不同)
+    │      │
+    │      └─ B 再分享视频 V
+    │              └─ C 点击回流 ──────────► return_n ✓
+    │
+    └─ A 的 return_n_uv = B + C(只统计视频 V 的裂变)
+```
+
+**结论**:`return_n` 只追踪**同一视频**的裂变链路,不跨视频统计。
+
+---
+
+## 八、字段说明
+
+### 曝光维度字段
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `apptype` | STRING | 应用类型 |
+| `uid` | STRING | 用户 ID |
+| `mid` | STRING | 设备 ID |
+| `vid` | STRING | 视频 ID |
+| `sessionid` | STRING | 会话 ID |
+| `subsessionid` | STRING | 子会话 ID(更细粒度的会话划分) |
+| `pagesource` | STRING | 页面来源 |
+| `page` | STRING | 页面标识 |
+
+### 推荐算法字段
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `recommendlogvo` | STRING | 推荐算法的返回结果日志 |
+| `abcode` | STRING | 推荐算法的 AB 分组(如 ab0) |
+| `recommendpagetype` | STRING | 区分 pagesource 相同时的场景(三种回流头部、沉浸页下滑、feed下滑) |
+| `recomtraceid` | STRING | 推荐服务追踪 ID(后端调取推荐服务前生成,前端降级或后端异常时可能为空) |
+| `headvideoid` | STRING | 头部视频 ID(用于回流场景关联) |
+| `rootsourceid` | STRING | 区分流量来源(如投流等) |
+| `hotsencetype` | STRING | 热点场景类型 |
+| `flowpool` | STRING | 流量池标识(非流量池为空字符串,无 null) |
+| `level` | STRING | 流量池层级(非流量池为 null) |
+
+### 设备信息字段
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `clientip` | STRING | 客户端 IP |
+| `machineinfo_brand` | STRING | 设备品牌 |
+| `machineinfo_model` | STRING | 设备型号 |
+| `machineinfo_system` | STRING | 操作系统 |
+| `machineinfo_wechatversion` | STRING | 微信版本 |
+| `machineinfo_sdkversion` | STRING | SDK 版本 |
+| `province` | STRING | 省份 |
+| `city` | STRING | 城市 |
+
+### 时间字段
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `ts` | STRING | 曝光时间戳 |
+| `dt` | STRING | 分区字段:日期(格式:20240105) |
+| `hh` | STRING | 分区字段:小时(格式:04) |
+
+### 分享指标字段
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `is_share` | STRING | 该曝光是否产生分享(1/0) |
+| `share_cnt` | STRING | 该曝光产生的分享次数 |
+
+### 一级回流字段(直接回流)
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `is_return_1` | STRING | 该曝光的分享是否带来一级回流(1/0) |
+| `return_1_pv` | STRING | 一级回流 PV(点击次数) |
+| `return_1_uv` | STRING | 一级回流 UV(回流人数) |
+| `return_1_mids` | STRING | 一级回流用户的 mid 列表 |
+| `is_return_noself` | STRING | 是否有非自己的一级回流(1/0) |
+| `return_1_uv_noself` | STRING | 排除分享者自己的一级回流 UV |
+| `return_1_mids_noself` | STRING | 排除分享者自己的一级回流 mid 列表 |
+
+### N级回流字段(裂变回流)
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `is_return_n` | STRING | 该曝光的分享是否带来 N 级回流(1/0) |
+| `return_n_pv` | STRING | N 级回流 PV(整条裂变链的点击次数) |
+| `return_n_uv` | STRING | N 级回流 UV(整条裂变链的回流人数) |
+| `return_n_mids` | STRING | N 级回流用户的 mid 列表 |
+| `is_return_n_noself` | STRING | 是否有非自己的 N 级回流(1/0) |
+| `return_n_uv_noself` | STRING | 排除分享者自己的 N 级回流 UV |
+| `return_n_mids_noself` | STRING | 排除分享者自己的 N 级回流 mid 列表 |
+
+### 新曝光指标
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `new_exposure_cnt` | STRING | 回流带来的新曝光数(整条裂变链所有用户浏览的内容总数) |
+
+### 扩展字段
+
+| 字段 | 类型 | 说明 |
+|------|------|------|
+| `extend` | STRING | JSON 扩展字段(包含 animationSceneType, extParams, group_name 等) |
+
+---
+
+## 九、关键字段关系
+
+```
+return_1 ⊂ return_n
+├── return_1:只统计直接点击分享链接的回流(by shareid)
+└── return_n:统计整条裂变链的回流(by rootshareid),包含 return_1
+
+*_noself 系列:从对应指标中排除分享者自己的回流
+├── return_1_uv_noself = return_1_uv - (分享者自己点击)
+└── return_n_uv_noself = return_n_uv - (分享者自己在裂变链中的点击)
+```

+ 68 - 0
tables/loghubods/user_share_log.txt

@@ -0,0 +1,68 @@
+表名: loghubods.user_share_log
+注释: *
+创建时间: 2019-07-06 18:03:41
+最后修改: 2026-01-22 00:11:48
+
+============================================================
+字段名                            类型              注释
+============================================================
+topic                          string          null
+machinecode                    string          null
+apptype                        string          null
+pagesource                     string          null
+shareid                        string          null
+shareobjectid                  string          null
+type                           string          null
+clienttimestamp                string          null
+parentshareid                  string          null
+versioncode                    string          null
+pagecategoryid                 string          null
+rootshareid                    string          null
+rootpagecategoryid             string          null
+sharedepth                     string          null
+rootpagesource                 string          null
+sessionid                      string          
+returnid                       string          
+rootlaunchshareid              string          
+subsessionid                   string          
+sharebuttontype                string          
+rootjumphomevideoid            string          
+rootpagetimestamp              string          
+abinfodata                     string          
+playid                         string          
+jumphomevideoid                string          
+eventid                        string          
+loginuid                       string          
+eventids                       string          实验组分组
+parenteventids                 string          
+parentrootpagesource           string          
+eventinfos                     string          
+clickobjectid                  string          
+clientip                       string          
+sharetitle                     string          
+shareimageurl                  string          
+rooteventinfos                 string          
+rootapptype                    string          
+wxconfigerr                    string          
+sharetitleid                   string          
+recomtraceid                   string          
+shareimgid                     string          
+isfeedcom                      string          
+rootsharemid                   string          
+rootsourceid                   string          
+rootsessionid                  string          
+opengid                        string          
+sequence                       string          
+usersharedepth                 string          
+progress                       string          
+groupshare                     string          
+sencetype                      string          
+hotsencetype                   string          
+ghid                           string          
+expstrategy                    string          
+creativeid                     string          
+dt                             string          
+
+分区字段:
+------------------------------------------------------------
+dt                             string          

+ 50 - 0
tables/videoods/dim_user.txt

@@ -0,0 +1,50 @@
+表名: videoods.dim_user
+注释: (无)
+创建时间: 2020-06-08 16:44:22
+最后修改: 2026-01-23 03:29:45
+
+============================================================
+字段名                            类型              注释
+============================================================
+uid                            bigint          
+mids                           string          设备唯一标识
+nick_name                      string          微信昵称
+longvideo_nick_name            string          小程序昵称
+gender                         string          性别
+user_type                      string          用户身份
+phone_number                   string          联系方式
+gmt_create                     datetime        创建时间
+gmt_create_timestamp           bigint          创建时间戳
+tags                           string          用户内容标签
+category_name                  string          场景
+isvip                          string          是否开通vip
+isreward                       string          是否开通赞赏
+isad                           string          是否开通广告
+isgood                         string          是否开通商品权限
+first_up_datetime              string          首次上传时间
+last_up_datetime               string          最后一次上传时间
+next_to_last_up_datetime       string          倒数第二次上传时间
+videos                         bigint          上传视频数
+today_videos                   bigint          今日上传视频数
+idols                          bigint          关注的人数
+fans                           bigint          粉丝数
+play_count                     bigint          累计播放人数
+play_count_total               bigint          累计播放次数
+total_reward                   double          赞赏总金额
+currentday_reward              double          当日赞赏金额
+reward_person                  bigint          赞赏人数
+total_reward_times             bigint          赞赏次数
+reward_videos                  bigint          赞赏视频数
+total_price                    bigint          付费总金额
+currentday_price               bigint          当日付费金额
+total_price_times              bigint          付费次数
+total_price_person             bigint          付费人数
+total_price_videos             bigint          付费视频数
+cgrain_user_type               string          粗粒度身份
+identity_tagname               string          用户身份标签
+operation_tags                 string          用户运营标签
+identity_tag_id                bigint          用户身份标签号
+identity_create_time           datetime        用户身份标签创建时间
+country                        string          国家
+province                       string          省份
+city                           string          市

+ 55 - 0
tables/videoods/wx_video.txt

@@ -0,0 +1,55 @@
+表名: videoods.wx_video
+注释: 视频表
+创建时间: 2019-07-02 19:43:22
+最后修改: 2026-01-23 00:30:45
+
+============================================================
+字段名                            类型              注释
+============================================================
+id                             bigint          主键编号,取值来源为redis分布式主键
+uid                            bigint          用户编号,用户信息表中的uid字段
+title                          string          标题
+video_path                     string          视频地址
+cover_img_path                 string          封面图片地址
+self_cover_img_path            string          自定义封面图片地址
+share_moment_img_path          string          分享到朋友圈的图片保存地址
+qrimg_path                     string          pc端生成二维码的保存路径
+width                          bigint          
+height                         bigint          
+cover_img_width                bigint          
+cover_img_height               bigint          
+play_count                     bigint          播放次数,去重
+play_count_total               bigint          被播放总次数,不去重
+share_count                    bigint          分享次数,去重
+share_count_total              bigint          被分享到朋友圈总次数,不去重
+reported_count                 bigint          被举报次数
+share_count_friend             bigint          微信分享给朋友的次数,不去重
+share_count_friend_total       bigint          被分享给微信好友的总次数,不去重
+favoriteds                     bigint          视频被收藏的次数
+total_time                     bigint          视频时长
+rotate                         string          
+bit_rate                       bigint          比率
+transcode_status               bigint          转码状态(1:发送转码失败,2:转码中,3:转码完成,4:转码失败)
+transcode_done_datetime        datetime        转码完成时间
+request_id                     string          
+job_id                         string          
+transed_video_path             string          
+gmt_create                     datetime        创建时间
+changed_by                     bigint          由谁修改
+gmt_modified                   datetime        最后修改时间
+gmt_create_timestamp           bigint          创建时间戳,用来排序和分页查询
+gmt_modified_timestamp         bigint          最后修改时间戳,用来排序和分页查询
+version                        bigint          数据版本号,用来做版本控制和乐观锁
+status                         bigint          数据状态,1有效,2 已删除,3 已屏蔽,4关注可见,5分享可见,6自己可见
+system                         string          发送视频时的操作系统
+file_extensions                string          视频后缀名
+examine_status                 bigint          审核状态(0:上传未审,1:上传已审)
+content_md5                    string          原视频的md5
+size                           bigint          原文件大小
+code_name                      string          原文件编码格式
+video_collection_id            bigint          用户的视频集编号
+recommend_status               bigint          推荐状态(0:未推荐,-6:待推荐,1:普通推荐,10:编辑推荐,-7:可搜索)
+tag_count                      bigint          标签个数
+stage_recommend_examine_status bigint          待推荐审核状态(0:待推荐未审,1:待推荐已审)
+sensitive_status               bigint          内容敏感状态(0:未检验,1:不敏感,2:敏感,3:敏感已审)
+is_foreogn_bucket              bigint          是否是存放在境外bucket,针对境外用户上传后转码前的地址

+ 6 - 0
tasks/00_AB效果/01_推荐AB天级效果.json

@@ -0,0 +1,6 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "L9pxuw",
+  "sort": "dt:desc",
+  "cols": null
+}

+ 85 - 0
tasks/00_AB效果/01_推荐AB天级效果.sql

@@ -0,0 +1,85 @@
+WITH t_base AS 
+(
+    SELECT  dt
+            ,apptype 
+            -- ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+            --         WHEN apptype IN ("4") AND abcode IN ("ab4","ab5","ab6","ab7","ab8","ab9") THEN "实验组-str+校准"
+            --         WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+            --         WHEN apptype IN ("0") AND abcode IN ("ab0","ab1","ab4","ab5","ab6","ab7","ab8","ab9") THEN "实验组-str+校准"
+            --         WHEN apptype IN ("0") AND abcode IN ("ab2","ab3") THEN "对照组"
+            --         ELSE "其他"
+            -- END AS abcode
+            ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准&ros损失函数优化"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "实验组-str+校准&ros天级更新"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+                    ELSE "其他"
+            END AS abcode
+            -- ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+            --         WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准"
+            --         WHEN apptype IN ("4") AND abcode IN ("ab2","ab3","ab4","ab5","ab6","ab7") THEN "对照组"
+            --         ELSE "其他"
+            -- END AS abcode
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   dt = '${dt}'
+    AND     apptype IN ("4")
+    AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+)
+SELECT  dt
+        ,COALESCE(apptype,"sum") AS apptype
+        ,COALESCE(abcode,"sum") AS abcode
+        ,COALESCE(page,"sum") AS page
+        ,round(COALESCE(COUNT(1) / COUNT(DISTINCT mid),0),2) AS exp_per_dau
+        ,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
+        ,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
+        ,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
+        ,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
+        ,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
+        ,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
+        ,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
+        ,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
+        ,COUNT(DISTINCT mid) AS dau
+        ,COUNT(1) AS exp
+        ,COALESCE(SUM(is_share),0) AS is_share
+        ,COALESCE(SUM(share_cnt),0) AS share_cnt
+        ,COALESCE(SUM(is_return_1),0) AS is_return_1
+        ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+        ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+        ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself -- ,MAX(CAST(COALESCE(share_cnt,'0') AS BIGINT)) AS max_share_cnt
+        -- ,MAX(CAST(COALESCE(return_1_uv,'0') AS BIGINT)) AS max_return_1_uv
+        -- ,MAX(CAST(COALESCE(return_n_uv,'0') AS BIGINT)) AS max_return_n_uv
+        -- ,MAX(CAST(COALESCE(return_n_uv_noself,'0') AS BIGINT)) AS max_return_n_uv_noself
+        -- ,COALESCE(SUM(is_return_noself),0) AS is_return_noself
+        -- ,COALESCE(SUM(return_1_uv),0) AS return_1_uv
+        -- ,COUNT(DISTINCT vid) AS exp_vid_cnt
+        -- ,COUNT(DISTINCT CASE    WHEN is_share = '1' THEN vid ELSE NULL END) AS share_vid_cnt
+        -- ,COUNT(DISTINCT CASE    WHEN is_return_n = '1' THEN vid ELSE NULL END) AS return_vid_cnt
+FROM    t_base
+where page in ("推荐")
+GROUP BY dt
+         ,apptype
+         ,abcode
+         ,page
+-- GROUPING SETS ((dt,apptype,abcode)
+--               ,(dt,apptype,abcode,page))
+ORDER BY dt DESC,apptype,page,abcode
+;

+ 6 - 0
tasks/00_AB效果/01_推荐AB实时效果.json

@@ -0,0 +1,6 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "zxVjf5",
+  "sort": "dt:desc",
+  "cols": null
+}

+ 89 - 0
tasks/00_AB效果/01_推荐AB实时效果.sql

@@ -0,0 +1,89 @@
+WITH t_base AS 
+(
+    SELECT  dt
+            ,hh
+            ,apptype 
+            -- ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+            --         WHEN apptype IN ("4") AND abcode IN ("ab4","ab5","ab6","ab7","ab8","ab9") THEN "实验组-str+校准"
+            --         WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+            --         WHEN apptype IN ("0") AND abcode IN ("ab0","ab1","ab4","ab5","ab6","ab7","ab8","ab9") THEN "实验组-str+校准"
+            --         WHEN apptype IN ("0") AND abcode IN ("ab2","ab3") THEN "对照组"
+            --         ELSE "其他"
+            -- END AS abcode
+            ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6") THEN "实验组-str+校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8") THEN "实验组-str+校准&ros损失函数优化"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4") THEN "实验组-str+校准&ros天级更新"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2") THEN "对照组"
+                    ELSE "其他"
+            END AS abcode
+            -- ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+            --         WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准"
+            --         WHEN apptype IN ("4") AND abcode IN ("ab2","ab3","ab4","ab5","ab6","ab7") THEN "对照组"
+            --         ELSE "其他"
+            -- END AS abcode
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   dt = '${dt}'
+--     AND     hh BETWEEN "16" AND "24"
+    AND     apptype IN ("4")
+    AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+)
+SELECT  dt
+        ,COALESCE(apptype,"sum") AS apptype
+        ,COALESCE(abcode,"sum") AS abcode
+        ,COALESCE(page,"sum") AS page
+        ,round(COALESCE(COUNT(1) / COUNT(DISTINCT mid),0),2) AS exp_per_dau
+        ,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
+        ,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
+        ,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
+        ,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
+        ,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
+        ,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
+        ,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
+        ,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
+        ,COUNT(DISTINCT mid) AS dau
+        ,COUNT(1) AS exp
+        ,COALESCE(SUM(is_share),0) AS is_share
+        ,COALESCE(SUM(share_cnt),0) AS share_cnt
+        ,COALESCE(SUM(is_return_1),0) AS is_return_1
+        ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+        ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+        ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself -- ,MAX(CAST(COALESCE(share_cnt,'0') AS BIGINT)) AS max_share_cnt
+        -- ,MAX(CAST(COALESCE(return_1_uv,'0') AS BIGINT)) AS max_return_1_uv
+        -- ,MAX(CAST(COALESCE(return_n_uv,'0') AS BIGINT)) AS max_return_n_uv
+        -- ,MAX(CAST(COALESCE(return_n_uv_noself,'0') AS BIGINT)) AS max_return_n_uv_noself
+        -- ,COALESCE(SUM(is_return_noself),0) AS is_return_noself
+        -- ,COALESCE(SUM(return_1_uv),0) AS return_1_uv
+        -- ,COUNT(DISTINCT vid) AS exp_vid_cnt
+        -- ,COUNT(DISTINCT CASE    WHEN is_share = '1' THEN vid ELSE NULL END) AS share_vid_cnt
+        -- ,COUNT(DISTINCT CASE    WHEN is_return_n = '1' THEN vid ELSE NULL END) AS return_vid_cnt
+        ,min(hh) as start_hh
+        ,max(hh) as end_hh
+FROM    t_base
+where page in ("推荐")
+GROUP BY dt
+         ,apptype
+         ,abcode
+         ,page
+-- GROUPING SETS ((dt,apptype,abcode)
+--               ,(dt,apptype,abcode,page))
+ORDER BY dt DESC,apptype,page,abcode
+;

+ 6 - 0
tasks/00_AB效果/01_推荐AB实时效果_before.json

@@ -0,0 +1,6 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "T9fvno",
+  "sort": "dt:desc",
+  "cols": null
+}

+ 86 - 0
tasks/00_AB效果/01_推荐AB实时效果_before.sql

@@ -0,0 +1,86 @@
+WITH t_base AS 
+(
+    SELECT  dt
+            ,apptype 
+            -- ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+            --         WHEN apptype IN ("4") AND abcode IN ("ab4","ab5","ab6","ab7","ab8","ab9") THEN "实验组-str+校准"
+            --         WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+            --         WHEN apptype IN ("0") AND abcode IN ("ab0","ab1","ab4","ab5","ab6","ab7","ab8","ab9") THEN "实验组-str+校准"
+            --         WHEN apptype IN ("0") AND abcode IN ("ab2","ab3") THEN "对照组"
+            --         ELSE "其他"
+            -- END AS abcode
+            ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准&ros损失函数优化"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "实验组-str+校准&ros天级更新"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+                    ELSE "其他"
+            END AS abcode
+            -- ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+            --         WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准"
+            --         WHEN apptype IN ("4") AND abcode IN ("ab2","ab3","ab4","ab5","ab6","ab7") THEN "对照组"
+            --         ELSE "其他"
+            -- END AS abcode
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   dt = '${dt}'
+    AND     hh BETWEEN "00" AND "13"
+    AND     apptype IN ("4")
+    AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+)
+SELECT  dt
+        ,COALESCE(apptype,"sum") AS apptype
+        ,COALESCE(abcode,"sum") AS abcode
+        ,COALESCE(page,"sum") AS page
+        ,round(COALESCE(COUNT(1) / COUNT(DISTINCT mid),0),2) AS exp_per_dau
+        ,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
+        ,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
+        ,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
+        ,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
+        ,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
+        ,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
+        ,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
+        ,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
+        ,COUNT(DISTINCT mid) AS dau
+        ,COUNT(1) AS exp
+        ,COALESCE(SUM(is_share),0) AS is_share
+        ,COALESCE(SUM(share_cnt),0) AS share_cnt
+        ,COALESCE(SUM(is_return_1),0) AS is_return_1
+        ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+        ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+        ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself -- ,MAX(CAST(COALESCE(share_cnt,'0') AS BIGINT)) AS max_share_cnt
+        -- ,MAX(CAST(COALESCE(return_1_uv,'0') AS BIGINT)) AS max_return_1_uv
+        -- ,MAX(CAST(COALESCE(return_n_uv,'0') AS BIGINT)) AS max_return_n_uv
+        -- ,MAX(CAST(COALESCE(return_n_uv_noself,'0') AS BIGINT)) AS max_return_n_uv_noself
+        -- ,COALESCE(SUM(is_return_noself),0) AS is_return_noself
+        -- ,COALESCE(SUM(return_1_uv),0) AS return_1_uv
+        -- ,COUNT(DISTINCT vid) AS exp_vid_cnt
+        -- ,COUNT(DISTINCT CASE    WHEN is_share = '1' THEN vid ELSE NULL END) AS share_vid_cnt
+        -- ,COUNT(DISTINCT CASE    WHEN is_return_n = '1' THEN vid ELSE NULL END) AS return_vid_cnt
+FROM    t_base
+where page in ("推荐")
+GROUP BY dt
+         ,apptype
+         ,abcode
+         ,page
+-- GROUPING SETS ((dt,apptype,abcode)
+--               ,(dt,apptype,abcode,page))
+ORDER BY dt DESC,apptype,page,abcode
+;

+ 6 - 0
tasks/00_AB效果/01_推荐AB实时效果_分小时.json

@@ -0,0 +1,6 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "JWT28U",
+  "sort": "dt:desc,hh:asc",
+  "cols": null
+}

+ 145 - 0
tasks/00_AB效果/01_推荐AB实时效果_分小时.sql

@@ -0,0 +1,145 @@
+-- 推荐AB实时效果 - 分小时对比对照组
+-- 新增维度:hh(小时),各指标相对对照组的变化率(lift)
+WITH t_base AS
+(
+    SELECT  dt
+            ,hh
+            ,apptype
+            ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准&ros损失函数优化"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "实验组-str+校准&ros天级更新"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+                    ELSE "其他"
+            END AS abcode
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   dt = '${dt}'
+    AND     apptype IN ("4")
+    AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+),
+-- 计算各组基础指标(按小时)
+t_metrics AS (
+    SELECT  dt
+            ,hh
+            ,apptype
+            ,abcode
+            ,page
+            ,COUNT(1) / COUNT(DISTINCT mid) AS exp_per_dau
+            ,SUM(is_share) / COUNT(1) AS str_one
+            ,SUM(return_n_uv) / SUM(is_share) AS ros_one
+            ,SUM(share_cnt) / COUNT(1) AS str
+            ,SUM(return_n_uv) / SUM(share_cnt) AS ros
+            ,SUM(is_return_1) / COUNT(1) AS str_plus
+            ,SUM(return_n_uv) / SUM(is_return_1) AS ros_minus
+            ,SUM(return_n_uv) / COUNT(1) AS rovn
+            ,SUM(new_exposure_cnt) / COUNT(1) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+    FROM    t_base
+    WHERE   page IN ("推荐")
+    GROUP BY dt
+             ,hh
+             ,apptype
+             ,abcode
+             ,page
+),
+-- 获取对照组数据(按小时)
+t_control AS (
+    SELECT  dt
+            ,hh
+            ,apptype
+            ,page
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one AS ctrl_str_one
+            ,ros_one AS ctrl_ros_one
+            ,str AS ctrl_str
+            ,ros AS ctrl_ros
+            ,str_plus AS ctrl_str_plus
+            ,ros_minus AS ctrl_ros_minus
+            ,rovn AS ctrl_rovn
+            ,vovh24 AS ctrl_vovh24
+            ,dau AS ctrl_dau
+            ,exp AS ctrl_exp
+            ,is_share AS ctrl_is_share
+            ,share_cnt AS ctrl_share_cnt
+            ,is_return_1 AS ctrl_is_return_1
+            ,return_n_uv AS ctrl_return_n_uv
+            ,viewh24 AS ctrl_viewh24
+            ,return_n_uv_noself AS ctrl_return_n_uv_noself
+    FROM    t_metrics
+    WHERE   abcode = "对照组"
+)
+-- 关联对照组,计算变化率
+SELECT  m.dt
+        ,m.hh
+        ,m.apptype
+        ,m.abcode
+        ,m.page
+        -- 原始指标
+        ,m.exp_per_dau
+        ,m.str_one
+        ,m.ros_one
+        ,m.str
+        ,m.ros
+        ,m.str_plus
+        ,m.ros_minus
+        ,m.rovn
+        ,m.vovh24
+        ,m.dau
+        ,m.exp
+        ,m.is_share
+        ,m.share_cnt
+        ,m.is_return_1
+        ,m.return_n_uv
+        ,m.viewh24
+        ,m.return_n_uv_noself
+        -- 相对对照组变化率
+        ,(m.exp_per_dau - c.ctrl_exp_per_dau) / c.ctrl_exp_per_dau AS exp_per_dau_lift
+        ,(m.str_one - c.ctrl_str_one) / c.ctrl_str_one AS str_one_lift
+        ,(m.ros_one - c.ctrl_ros_one) / c.ctrl_ros_one AS ros_one_lift
+        ,(m.str - c.ctrl_str) / c.ctrl_str AS str_lift
+        ,(m.ros - c.ctrl_ros) / c.ctrl_ros AS ros_lift
+        ,(m.str_plus - c.ctrl_str_plus) / c.ctrl_str_plus AS str_plus_lift
+        ,(m.ros_minus - c.ctrl_ros_minus) / c.ctrl_ros_minus AS ros_minus_lift
+        ,(m.rovn - c.ctrl_rovn) / c.ctrl_rovn AS rovn_lift
+        ,(m.vovh24 - c.ctrl_vovh24) / c.ctrl_vovh24 AS vovh24_lift
+        ,(m.dau - c.ctrl_dau) / c.ctrl_dau AS dau_lift
+        ,(m.exp - c.ctrl_exp) / c.ctrl_exp AS exp_lift
+        ,(m.is_share - c.ctrl_is_share) / c.ctrl_is_share AS is_share_lift
+        ,(m.share_cnt - c.ctrl_share_cnt) / c.ctrl_share_cnt AS share_cnt_lift
+        ,(m.is_return_1 - c.ctrl_is_return_1) / c.ctrl_is_return_1 AS is_return_1_lift
+        ,(m.return_n_uv - c.ctrl_return_n_uv) / c.ctrl_return_n_uv AS return_n_uv_lift
+        ,(m.viewh24 - c.ctrl_viewh24) / c.ctrl_viewh24 AS viewh24_lift
+        ,(m.return_n_uv_noself - c.ctrl_return_n_uv_noself) / c.ctrl_return_n_uv_noself AS return_n_uv_noself_lift
+FROM    t_metrics m
+LEFT JOIN t_control c
+ON      m.dt = c.dt
+AND     m.hh = c.hh
+AND     m.apptype = c.apptype
+AND     m.page = c.page
+ORDER BY m.dt DESC, m.hh, m.apptype, m.page, m.abcode
+;

+ 6 - 0
tasks/00_AB效果/02_推荐AB天级效果_对比对照组.json

@@ -0,0 +1,6 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "JWT28U",
+  "sort": "dt:desc",
+  "cols": null
+}

+ 139 - 0
tasks/00_AB效果/02_推荐AB天级效果_对比对照组.sql

@@ -0,0 +1,139 @@
+-- 推荐AB天级效果 - 含对照组对比
+-- 新增列:各指标相对对照组的变化率(lift)
+WITH t_base AS
+(
+    SELECT  dt
+            ,apptype
+            ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准&ros损失函数优化"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "实验组-str+校准&ros天级更新"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+                    ELSE "其他"
+            END AS abcode
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   dt = '${dt}'
+    AND     apptype IN ("4")
+    AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+),
+-- 计算各组基础指标
+t_metrics AS (
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,page
+            ,COUNT(1) / COUNT(DISTINCT mid) AS exp_per_dau
+            ,SUM(is_share) / COUNT(1) AS str_one
+            ,SUM(return_n_uv) / SUM(is_share) AS ros_one
+            ,SUM(share_cnt) / COUNT(1) AS str
+            ,SUM(return_n_uv) / SUM(share_cnt) AS ros
+            ,SUM(is_return_1) / COUNT(1) AS str_plus
+            ,SUM(return_n_uv) / SUM(is_return_1) AS ros_minus
+            ,SUM(return_n_uv) / COUNT(1) AS rovn
+            ,SUM(new_exposure_cnt) / COUNT(1) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+    FROM    t_base
+    WHERE   page IN ("推荐")
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,page
+),
+-- 获取对照组数据
+t_control AS (
+    SELECT  dt
+            ,apptype
+            ,page
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one AS ctrl_str_one
+            ,ros_one AS ctrl_ros_one
+            ,str AS ctrl_str
+            ,ros AS ctrl_ros
+            ,str_plus AS ctrl_str_plus
+            ,ros_minus AS ctrl_ros_minus
+            ,rovn AS ctrl_rovn
+            ,vovh24 AS ctrl_vovh24
+            ,dau AS ctrl_dau
+            ,exp AS ctrl_exp
+            ,is_share AS ctrl_is_share
+            ,share_cnt AS ctrl_share_cnt
+            ,is_return_1 AS ctrl_is_return_1
+            ,return_n_uv AS ctrl_return_n_uv
+            ,viewh24 AS ctrl_viewh24
+            ,return_n_uv_noself AS ctrl_return_n_uv_noself
+    FROM    t_metrics
+    WHERE   abcode = "对照组"
+)
+-- 关联对照组,计算变化率
+SELECT  m.dt
+        ,m.apptype
+        ,m.abcode
+        ,m.page
+        -- 原始指标
+        ,m.exp_per_dau
+        ,m.str_one
+        ,m.ros_one
+        ,m.str
+        ,m.ros
+        ,m.str_plus
+        ,m.ros_minus
+        ,m.rovn
+        ,m.vovh24
+        ,m.dau
+        ,m.exp
+        ,m.is_share
+        ,m.share_cnt
+        ,m.is_return_1
+        ,m.return_n_uv
+        ,m.viewh24
+        ,m.return_n_uv_noself
+        -- 相对对照组变化率
+        ,(m.exp_per_dau - c.ctrl_exp_per_dau) / c.ctrl_exp_per_dau AS exp_per_dau_lift
+        ,(m.str_one - c.ctrl_str_one) / c.ctrl_str_one AS str_one_lift
+        ,(m.ros_one - c.ctrl_ros_one) / c.ctrl_ros_one AS ros_one_lift
+        ,(m.str - c.ctrl_str) / c.ctrl_str AS str_lift
+        ,(m.ros - c.ctrl_ros) / c.ctrl_ros AS ros_lift
+        ,(m.str_plus - c.ctrl_str_plus) / c.ctrl_str_plus AS str_plus_lift
+        ,(m.ros_minus - c.ctrl_ros_minus) / c.ctrl_ros_minus AS ros_minus_lift
+        ,(m.rovn - c.ctrl_rovn) / c.ctrl_rovn AS rovn_lift
+        ,(m.vovh24 - c.ctrl_vovh24) / c.ctrl_vovh24 AS vovh24_lift
+        ,(m.dau - c.ctrl_dau) / c.ctrl_dau AS dau_lift
+        ,(m.exp - c.ctrl_exp) / c.ctrl_exp AS exp_lift
+        ,(m.is_share - c.ctrl_is_share) / c.ctrl_is_share AS is_share_lift
+        ,(m.share_cnt - c.ctrl_share_cnt) / c.ctrl_share_cnt AS share_cnt_lift
+        ,(m.is_return_1 - c.ctrl_is_return_1) / c.ctrl_is_return_1 AS is_return_1_lift
+        ,(m.return_n_uv - c.ctrl_return_n_uv) / c.ctrl_return_n_uv AS return_n_uv_lift
+        ,(m.viewh24 - c.ctrl_viewh24) / c.ctrl_viewh24 AS viewh24_lift
+        ,(m.return_n_uv_noself - c.ctrl_return_n_uv_noself) / c.ctrl_return_n_uv_noself AS return_n_uv_noself_lift
+FROM    t_metrics m
+LEFT JOIN t_control c
+ON      m.dt = c.dt
+AND     m.apptype = c.apptype
+AND     m.page = c.page
+ORDER BY m.dt DESC, m.apptype, m.page, m.abcode
+;

+ 139 - 0
tasks/00_AB效果/02_推荐AB天级效果_对比对照组_ab.sql

@@ -0,0 +1,139 @@
+-- 推荐AB天级效果 - 含对照组对比
+-- 新增列:各指标相对对照组的变化率(lift)
+WITH t_base AS
+(
+    SELECT  dt
+            ,apptype
+            ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准&ros损失函数优化"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "实验组-str+校准&ros天级更新"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+                    ELSE "其他"
+            END AS abcode
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   dt = '${dt}'
+    AND     apptype IN ("4")
+    AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+),
+-- 计算各组基础指标
+t_metrics AS (
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,page
+            ,COUNT(1) / COUNT(DISTINCT mid) AS exp_per_dau
+            ,SUM(is_share) / COUNT(1) AS str_one
+            ,SUM(return_n_uv) / SUM(is_share) AS ros_one
+            ,SUM(share_cnt) / COUNT(1) AS str
+            ,SUM(return_n_uv) / SUM(share_cnt) AS ros
+            ,SUM(is_return_1) / COUNT(1) AS str_plus
+            ,SUM(return_n_uv) / SUM(is_return_1) AS ros_minus
+            ,SUM(return_n_uv) / COUNT(1) AS rovn
+            ,SUM(new_exposure_cnt) / COUNT(1) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+    FROM    t_base
+    WHERE   page IN ("推荐")
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,page
+),
+-- 获取对照组数据
+t_control AS (
+    SELECT  dt
+            ,apptype
+            ,page
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one AS ctrl_str_one
+            ,ros_one AS ctrl_ros_one
+            ,str AS ctrl_str
+            ,ros AS ctrl_ros
+            ,str_plus AS ctrl_str_plus
+            ,ros_minus AS ctrl_ros_minus
+            ,rovn AS ctrl_rovn
+            ,vovh24 AS ctrl_vovh24
+            ,dau AS ctrl_dau
+            ,exp AS ctrl_exp
+            ,is_share AS ctrl_is_share
+            ,share_cnt AS ctrl_share_cnt
+            ,is_return_1 AS ctrl_is_return_1
+            ,return_n_uv AS ctrl_return_n_uv
+            ,viewh24 AS ctrl_viewh24
+            ,return_n_uv_noself AS ctrl_return_n_uv_noself
+    FROM    t_metrics
+    WHERE   abcode = "对照组"
+)
+-- 关联对照组,计算变化率
+SELECT  m.dt
+        ,m.apptype
+        ,m.abcode
+        ,m.page
+        -- 原始指标
+        ,m.exp_per_dau
+        ,m.str_one
+        ,m.ros_one
+        ,m.str
+        ,m.ros
+        ,m.str_plus
+        ,m.ros_minus
+        ,m.rovn
+        ,m.vovh24
+        ,m.dau
+        ,m.exp
+        ,m.is_share
+        ,m.share_cnt
+        ,m.is_return_1
+        ,m.return_n_uv
+        ,m.viewh24
+        ,m.return_n_uv_noself
+        -- 相对对照组变化率
+        ,(m.exp_per_dau - c.ctrl_exp_per_dau) / c.ctrl_exp_per_dau AS exp_per_dau_lift
+        ,(m.str_one - c.ctrl_str_one) / c.ctrl_str_one AS str_one_lift
+        ,(m.ros_one - c.ctrl_ros_one) / c.ctrl_ros_one AS ros_one_lift
+        ,(m.str - c.ctrl_str) / c.ctrl_str AS str_lift
+        ,(m.ros - c.ctrl_ros) / c.ctrl_ros AS ros_lift
+        ,(m.str_plus - c.ctrl_str_plus) / c.ctrl_str_plus AS str_plus_lift
+        ,(m.ros_minus - c.ctrl_ros_minus) / c.ctrl_ros_minus AS ros_minus_lift
+        ,(m.rovn - c.ctrl_rovn) / c.ctrl_rovn AS rovn_lift
+        ,(m.vovh24 - c.ctrl_vovh24) / c.ctrl_vovh24 AS vovh24_lift
+        ,(m.dau - c.ctrl_dau) / c.ctrl_dau AS dau_lift
+        ,(m.exp - c.ctrl_exp) / c.ctrl_exp AS exp_lift
+        ,(m.is_share - c.ctrl_is_share) / c.ctrl_is_share AS is_share_lift
+        ,(m.share_cnt - c.ctrl_share_cnt) / c.ctrl_share_cnt AS share_cnt_lift
+        ,(m.is_return_1 - c.ctrl_is_return_1) / c.ctrl_is_return_1 AS is_return_1_lift
+        ,(m.return_n_uv - c.ctrl_return_n_uv) / c.ctrl_return_n_uv AS return_n_uv_lift
+        ,(m.viewh24 - c.ctrl_viewh24) / c.ctrl_viewh24 AS viewh24_lift
+        ,(m.return_n_uv_noself - c.ctrl_return_n_uv_noself) / c.ctrl_return_n_uv_noself AS return_n_uv_noself_lift
+FROM    t_metrics m
+LEFT JOIN t_control c
+ON      m.dt = c.dt
+AND     m.apptype = c.apptype
+AND     m.page = c.page
+ORDER BY m.dt DESC, m.apptype, m.page, m.abcode
+;

+ 6 - 0
tasks/00_AB效果/02_推荐AB天级效果_对比对照组_分ab.json

@@ -0,0 +1,6 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "wblMdQ",
+  "sort": "dt:desc",
+  "cols": null
+}

+ 140 - 0
tasks/00_AB效果/02_推荐AB天级效果_对比对照组_分ab.sql

@@ -0,0 +1,140 @@
+-- 推荐AB天级效果 - 含对照组对比
+-- 新增列:各指标相对对照组的变化率(lift)
+WITH t_base AS
+(
+    SELECT  dt
+            ,apptype
+            ,concat(abcode, "-", CASE
+                    WHEN apptype IN ("4") AND abcode IN ("ab0", "ab1") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6") THEN "实验组-str校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8") THEN "实验组-str校准&ros损失函数优化"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4") THEN "实验组-str校准&ros天级更新"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2", "ab3") THEN "对照组"
+                    ELSE "对照组"
+            END) AS abcode
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   dt = '${dt}'
+    AND     apptype IN ("4")
+    AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+),
+-- 计算各组基础指标
+t_metrics AS (
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,page
+            ,COUNT(1) / COUNT(DISTINCT mid) AS exp_per_dau
+            ,SUM(is_share) / COUNT(1) AS str_one
+            ,SUM(return_n_uv) / SUM(is_share) AS ros_one
+            ,SUM(share_cnt) / COUNT(1) AS str
+            ,SUM(return_n_uv) / SUM(share_cnt) AS ros
+            ,SUM(is_return_1) / COUNT(1) AS str_plus
+            ,SUM(return_n_uv) / SUM(is_return_1) AS ros_minus
+            ,SUM(return_n_uv) / COUNT(1) AS rovn
+            ,SUM(new_exposure_cnt) / COUNT(1) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+    FROM    t_base
+    WHERE   page IN ("推荐")
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,page
+),
+-- 获取对照组数据
+t_control AS (
+    SELECT  dt
+            ,apptype
+            ,page
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one AS ctrl_str_one
+            ,ros_one AS ctrl_ros_one
+            ,str AS ctrl_str
+            ,ros AS ctrl_ros
+            ,str_plus AS ctrl_str_plus
+            ,ros_minus AS ctrl_ros_minus
+            ,rovn AS ctrl_rovn
+            ,vovh24 AS ctrl_vovh24
+            ,dau AS ctrl_dau
+            ,exp AS ctrl_exp
+            ,is_share AS ctrl_is_share
+            ,share_cnt AS ctrl_share_cnt
+            ,is_return_1 AS ctrl_is_return_1
+            ,return_n_uv AS ctrl_return_n_uv
+            ,viewh24 AS ctrl_viewh24
+            ,return_n_uv_noself AS ctrl_return_n_uv_noself
+    FROM    t_metrics
+    WHERE   abcode = "ab2-对照组"
+)
+-- 关联对照组,计算变化率
+SELECT  m.dt
+        ,m.apptype
+        ,m.abcode
+        ,m.page
+        -- 原始指标
+        ,m.exp_per_dau
+        ,m.str_one
+        ,m.ros_one
+        ,m.str
+        ,m.ros
+        ,m.str_plus
+        ,m.ros_minus
+        ,m.rovn
+        ,m.vovh24
+        ,m.dau
+        ,m.exp
+        ,m.is_share
+        ,m.share_cnt
+        ,m.is_return_1
+        ,m.return_n_uv
+        ,m.viewh24
+        ,m.return_n_uv_noself
+        -- 相对对照组变化率
+        ,(m.exp_per_dau - c.ctrl_exp_per_dau) / c.ctrl_exp_per_dau AS exp_per_dau_lift
+        ,(m.str_one - c.ctrl_str_one) / c.ctrl_str_one AS str_one_lift
+        ,(m.ros_one - c.ctrl_ros_one) / c.ctrl_ros_one AS ros_one_lift
+        ,(m.str - c.ctrl_str) / c.ctrl_str AS str_lift
+        ,(m.ros - c.ctrl_ros) / c.ctrl_ros AS ros_lift
+        ,(m.str_plus - c.ctrl_str_plus) / c.ctrl_str_plus AS str_plus_lift
+        ,(m.ros_minus - c.ctrl_ros_minus) / c.ctrl_ros_minus AS ros_minus_lift
+        ,(m.rovn - c.ctrl_rovn) / c.ctrl_rovn AS rovn_lift
+        ,(m.vovh24 - c.ctrl_vovh24) / c.ctrl_vovh24 AS vovh24_lift
+        ,(m.dau - c.ctrl_dau) / c.ctrl_dau AS dau_lift
+        ,(m.exp - c.ctrl_exp) / c.ctrl_exp AS exp_lift
+        ,(m.is_share - c.ctrl_is_share) / c.ctrl_is_share AS is_share_lift
+        ,(m.share_cnt - c.ctrl_share_cnt) / c.ctrl_share_cnt AS share_cnt_lift
+        ,(m.is_return_1 - c.ctrl_is_return_1) / c.ctrl_is_return_1 AS is_return_1_lift
+        ,(m.return_n_uv - c.ctrl_return_n_uv) / c.ctrl_return_n_uv AS return_n_uv_lift
+        ,(m.viewh24 - c.ctrl_viewh24) / c.ctrl_viewh24 AS viewh24_lift
+        ,(m.return_n_uv_noself - c.ctrl_return_n_uv_noself) / c.ctrl_return_n_uv_noself AS return_n_uv_noself_lift
+FROM    t_metrics m
+LEFT JOIN t_control c
+ON      m.dt = c.dt
+AND     m.apptype = c.apptype
+AND     m.page = c.page
+ORDER BY m.dt DESC, m.apptype, m.page, m.abcode
+;

+ 6 - 0
tasks/00_AB效果/02_推荐AB天级效果_对比对照组_分小时.json

@@ -0,0 +1,6 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "fiw6Fz",
+  "sort": "dt:desc",
+  "cols": null
+}

+ 6 - 0
tasks/00_AB效果/03_推荐AB天级效果_对比对照组_分小时.json

@@ -0,0 +1,6 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "fiw6Fz",
+  "sort": "dt:desc,hh:desc",
+  "cols": null
+}

+ 145 - 0
tasks/00_AB效果/03_推荐AB天级效果_对比对照组_分小时.sql

@@ -0,0 +1,145 @@
+-- 推荐AB天级效果 - 含对照组对比(分小时)
+-- 新增维度:hh(小时),各指标相对对照组的变化率(lift)
+WITH t_base AS
+(
+    SELECT  dt
+            ,hh
+            ,apptype
+            ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准&ros损失函数优化"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "实验组-str+校准&ros天级更新"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+                    ELSE "其他"
+            END AS abcode
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   dt = '${dt}'
+    AND     apptype IN ("4")
+    AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+),
+-- 计算各组基础指标(按小时)
+t_metrics AS (
+    SELECT  dt
+            ,hh
+            ,apptype
+            ,abcode
+            ,page
+            ,COUNT(1) / COUNT(DISTINCT mid) AS exp_per_dau
+            ,SUM(is_share) / COUNT(1) AS str_one
+            ,SUM(return_n_uv) / SUM(is_share) AS ros_one
+            ,SUM(share_cnt) / COUNT(1) AS str
+            ,SUM(return_n_uv) / SUM(share_cnt) AS ros
+            ,SUM(is_return_1) / COUNT(1) AS str_plus
+            ,SUM(return_n_uv) / SUM(is_return_1) AS ros_minus
+            ,SUM(return_n_uv) / COUNT(1) AS rovn
+            ,SUM(new_exposure_cnt) / COUNT(1) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+    FROM    t_base
+    WHERE   page IN ("推荐")
+    GROUP BY dt
+             ,hh
+             ,apptype
+             ,abcode
+             ,page
+),
+-- 获取对照组数据(按小时)
+t_control AS (
+    SELECT  dt
+            ,hh
+            ,apptype
+            ,page
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one AS ctrl_str_one
+            ,ros_one AS ctrl_ros_one
+            ,str AS ctrl_str
+            ,ros AS ctrl_ros
+            ,str_plus AS ctrl_str_plus
+            ,ros_minus AS ctrl_ros_minus
+            ,rovn AS ctrl_rovn
+            ,vovh24 AS ctrl_vovh24
+            ,dau AS ctrl_dau
+            ,exp AS ctrl_exp
+            ,is_share AS ctrl_is_share
+            ,share_cnt AS ctrl_share_cnt
+            ,is_return_1 AS ctrl_is_return_1
+            ,return_n_uv AS ctrl_return_n_uv
+            ,viewh24 AS ctrl_viewh24
+            ,return_n_uv_noself AS ctrl_return_n_uv_noself
+    FROM    t_metrics
+    WHERE   abcode = "对照组"
+)
+-- 关联对照组,计算变化率
+SELECT  m.dt
+        ,m.hh
+        ,m.apptype
+        ,m.abcode
+        ,m.page
+        -- 原始指标
+        ,m.exp_per_dau
+        ,m.str_one
+        ,m.ros_one
+        ,m.str
+        ,m.ros
+        ,m.str_plus
+        ,m.ros_minus
+        ,m.rovn
+        ,m.vovh24
+        ,m.dau
+        ,m.exp
+        ,m.is_share
+        ,m.share_cnt
+        ,m.is_return_1
+        ,m.return_n_uv
+        ,m.viewh24
+        ,m.return_n_uv_noself
+        -- 相对对照组变化率
+        ,(m.exp_per_dau - c.ctrl_exp_per_dau) / c.ctrl_exp_per_dau AS exp_per_dau_lift
+        ,(m.str_one - c.ctrl_str_one) / c.ctrl_str_one AS str_one_lift
+        ,(m.ros_one - c.ctrl_ros_one) / c.ctrl_ros_one AS ros_one_lift
+        ,(m.str - c.ctrl_str) / c.ctrl_str AS str_lift
+        ,(m.ros - c.ctrl_ros) / c.ctrl_ros AS ros_lift
+        ,(m.str_plus - c.ctrl_str_plus) / c.ctrl_str_plus AS str_plus_lift
+        ,(m.ros_minus - c.ctrl_ros_minus) / c.ctrl_ros_minus AS ros_minus_lift
+        ,(m.rovn - c.ctrl_rovn) / c.ctrl_rovn AS rovn_lift
+        ,(m.vovh24 - c.ctrl_vovh24) / c.ctrl_vovh24 AS vovh24_lift
+        ,(m.dau - c.ctrl_dau) / c.ctrl_dau AS dau_lift
+        ,(m.exp - c.ctrl_exp) / c.ctrl_exp AS exp_lift
+        ,(m.is_share - c.ctrl_is_share) / c.ctrl_is_share AS is_share_lift
+        ,(m.share_cnt - c.ctrl_share_cnt) / c.ctrl_share_cnt AS share_cnt_lift
+        ,(m.is_return_1 - c.ctrl_is_return_1) / c.ctrl_is_return_1 AS is_return_1_lift
+        ,(m.return_n_uv - c.ctrl_return_n_uv) / c.ctrl_return_n_uv AS return_n_uv_lift
+        ,(m.viewh24 - c.ctrl_viewh24) / c.ctrl_viewh24 AS viewh24_lift
+        ,(m.return_n_uv_noself - c.ctrl_return_n_uv_noself) / c.ctrl_return_n_uv_noself AS return_n_uv_noself_lift
+FROM    t_metrics m
+LEFT JOIN t_control c
+ON      m.dt = c.dt
+AND     m.hh = c.hh
+AND     m.apptype = c.apptype
+AND     m.page = c.page
+ORDER BY m.dt DESC, m.hh, m.apptype, m.page, m.abcode
+;

+ 6 - 0
tasks/00_AB效果/04_推荐AB天级效果_对比对照组_分seq.json

@@ -0,0 +1,6 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "IXIx4D",
+  "sort": "dt:desc,exp:desc",
+  "cols": null
+}

+ 145 - 0
tasks/00_AB效果/04_推荐AB天级效果_对比对照组_分seq.sql

@@ -0,0 +1,145 @@
+-- 推荐AB天级效果 - 含对照组对比(分sequence)
+-- 新增维度:seq(曝光序号),各指标相对对照组的变化率(lift)
+WITH t_base AS
+(
+    SELECT  dt
+            ,COALESCE(CAST(LEAST(CAST(GET_JSON_OBJECT(extend, '$.extParams.sequence') AS BIGINT), 20) AS STRING), "unknown") AS seq
+            ,apptype
+            ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准&ros损失函数优化"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "实验组-str+校准&ros天级更新"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+                    ELSE "其他"
+            END AS abcode
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   dt = '${dt}'
+    AND     apptype IN ("4")
+    AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+),
+-- 计算各组基础指标(按sequence)
+t_metrics AS (
+    SELECT  dt
+            ,seq
+            ,apptype
+            ,abcode
+            ,page
+            ,COUNT(1) / COUNT(DISTINCT mid) AS exp_per_dau
+            ,SUM(is_share) / COUNT(1) AS str_one
+            ,SUM(return_n_uv) / SUM(is_share) AS ros_one
+            ,SUM(share_cnt) / COUNT(1) AS str
+            ,SUM(return_n_uv) / SUM(share_cnt) AS ros
+            ,SUM(is_return_1) / COUNT(1) AS str_plus
+            ,SUM(return_n_uv) / SUM(is_return_1) AS ros_minus
+            ,SUM(return_n_uv) / COUNT(1) AS rovn
+            ,SUM(new_exposure_cnt) / COUNT(1) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+    FROM    t_base
+    WHERE   page IN ("推荐")
+    GROUP BY dt
+             ,seq
+             ,apptype
+             ,abcode
+             ,page
+),
+-- 获取对照组数据(按sequence)
+t_control AS (
+    SELECT  dt
+            ,seq
+            ,apptype
+            ,page
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one AS ctrl_str_one
+            ,ros_one AS ctrl_ros_one
+            ,str AS ctrl_str
+            ,ros AS ctrl_ros
+            ,str_plus AS ctrl_str_plus
+            ,ros_minus AS ctrl_ros_minus
+            ,rovn AS ctrl_rovn
+            ,vovh24 AS ctrl_vovh24
+            ,dau AS ctrl_dau
+            ,exp AS ctrl_exp
+            ,is_share AS ctrl_is_share
+            ,share_cnt AS ctrl_share_cnt
+            ,is_return_1 AS ctrl_is_return_1
+            ,return_n_uv AS ctrl_return_n_uv
+            ,viewh24 AS ctrl_viewh24
+            ,return_n_uv_noself AS ctrl_return_n_uv_noself
+    FROM    t_metrics
+    WHERE   abcode = "对照组"
+)
+-- 关联对照组,计算变化率
+SELECT  m.dt
+        ,m.seq
+        ,m.apptype
+        ,m.abcode
+        ,m.page
+        -- 原始指标
+        ,m.exp_per_dau
+        ,m.str_one
+        ,m.ros_one
+        ,m.str
+        ,m.ros
+        ,m.str_plus
+        ,m.ros_minus
+        ,m.rovn
+        ,m.vovh24
+        ,m.dau
+        ,m.exp
+        ,m.is_share
+        ,m.share_cnt
+        ,m.is_return_1
+        ,m.return_n_uv
+        ,m.viewh24
+        ,m.return_n_uv_noself
+        -- 相对对照组变化率
+        ,(m.exp_per_dau - c.ctrl_exp_per_dau) / c.ctrl_exp_per_dau AS exp_per_dau_lift
+        ,(m.str_one - c.ctrl_str_one) / c.ctrl_str_one AS str_one_lift
+        ,(m.ros_one - c.ctrl_ros_one) / c.ctrl_ros_one AS ros_one_lift
+        ,(m.str - c.ctrl_str) / c.ctrl_str AS str_lift
+        ,(m.ros - c.ctrl_ros) / c.ctrl_ros AS ros_lift
+        ,(m.str_plus - c.ctrl_str_plus) / c.ctrl_str_plus AS str_plus_lift
+        ,(m.ros_minus - c.ctrl_ros_minus) / c.ctrl_ros_minus AS ros_minus_lift
+        ,(m.rovn - c.ctrl_rovn) / c.ctrl_rovn AS rovn_lift
+        ,(m.vovh24 - c.ctrl_vovh24) / c.ctrl_vovh24 AS vovh24_lift
+        ,(m.dau - c.ctrl_dau) / c.ctrl_dau AS dau_lift
+        ,(m.exp - c.ctrl_exp) / c.ctrl_exp AS exp_lift
+        ,(m.is_share - c.ctrl_is_share) / c.ctrl_is_share AS is_share_lift
+        ,(m.share_cnt - c.ctrl_share_cnt) / c.ctrl_share_cnt AS share_cnt_lift
+        ,(m.is_return_1 - c.ctrl_is_return_1) / c.ctrl_is_return_1 AS is_return_1_lift
+        ,(m.return_n_uv - c.ctrl_return_n_uv) / c.ctrl_return_n_uv AS return_n_uv_lift
+        ,(m.viewh24 - c.ctrl_viewh24) / c.ctrl_viewh24 AS viewh24_lift
+        ,(m.return_n_uv_noself - c.ctrl_return_n_uv_noself) / c.ctrl_return_n_uv_noself AS return_n_uv_noself_lift
+FROM    t_metrics m
+LEFT JOIN t_control c
+ON      m.dt = c.dt
+AND     m.seq = c.seq
+AND     m.apptype = c.apptype
+AND     m.page = c.page
+ORDER BY m.dt DESC, m.seq, m.apptype, m.page, m.abcode
+;

+ 6 - 0
tasks/00_AB效果/05_推荐AB天级效果_对比对照组_含多跳.json

@@ -0,0 +1,6 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "6vAr7l",
+  "sort": "dt:desc",
+  "cols": null
+}

+ 190 - 0
tasks/00_AB效果/05_推荐AB天级效果_对比对照组_含多跳.sql

@@ -0,0 +1,190 @@
+-- 推荐AB天级效果 - 含对照组对比 + 多跳B/C/D指标
+-- 基于新表 dwd_recsys_alg_exposure_base_20260206,新增 b/c_1~c_3/d_1~d_3/total_bc/total_d/total_bcd per-exposure 率指标
+WITH t_base AS
+(
+    SELECT  dt
+            ,apptype
+            ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准&ros损失函数优化"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "实验组-str+校准&ros天级更新"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+                    ELSE "其他"
+            END AS abcode
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+            ,COALESCE(CAST(b AS BIGINT), 0) AS b
+            ,COALESCE(CAST(c_1 AS BIGINT), 0) AS c_1
+            ,COALESCE(CAST(c_2 AS BIGINT), 0) AS c_2
+            ,COALESCE(CAST(c_3 AS BIGINT), 0) AS c_3
+            ,COALESCE(CAST(d_1 AS BIGINT), 0) AS d_1
+            ,COALESCE(CAST(d_2 AS BIGINT), 0) AS d_2
+            ,COALESCE(CAST(d_3 AS BIGINT), 0) AS d_3
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260206
+    WHERE   dt = '${dt}'
+    AND     apptype IN ("4")
+    AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+),
+-- 计算各组基础指标
+t_metrics AS (
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,page
+            ,COUNT(1) / COUNT(DISTINCT mid) AS exp_per_dau
+            ,SUM(is_share) / COUNT(1) AS str_one
+            ,SUM(return_n_uv) / SUM(is_share) AS ros_one
+            ,SUM(share_cnt) / COUNT(1) AS str
+            ,SUM(return_n_uv) / SUM(share_cnt) AS ros
+            ,SUM(is_return_1) / COUNT(1) AS str_plus
+            ,SUM(return_n_uv) / SUM(is_return_1) AS ros_minus
+            ,SUM(return_n_uv) / COUNT(1) AS rovn
+            ,SUM(new_exposure_cnt) / COUNT(1) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            ,SUM(b) / COUNT(1) AS b
+            ,SUM(c_1) / COUNT(1) AS c_1
+            ,SUM(c_2) / COUNT(1) AS c_2
+            ,SUM(c_3) / COUNT(1) AS c_3
+            ,SUM(b + c_1 + c_2 + c_3) / COUNT(1) AS total_bc
+            ,SUM(d_1) / COUNT(1) AS d_1
+            ,SUM(d_2) / COUNT(1) AS d_2
+            ,SUM(d_3) / COUNT(1) AS d_3
+            ,SUM(d_1 + d_2 + d_3) / COUNT(1) AS total_d
+            ,SUM(b + c_1 + c_2 + c_3 + d_1 + d_2 + d_3) / COUNT(1) AS total_bcd
+            ,SUM(b + c_1 + c_2 + c_3 + d_1) / COUNT(1) AS total_bcd1
+    FROM    t_base
+    WHERE   page IN ("推荐")
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,page
+),
+-- 获取对照组数据
+t_control AS (
+    SELECT  dt
+            ,apptype
+            ,page
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one AS ctrl_str_one
+            ,ros_one AS ctrl_ros_one
+            ,str AS ctrl_str
+            ,ros AS ctrl_ros
+            ,str_plus AS ctrl_str_plus
+            ,ros_minus AS ctrl_ros_minus
+            ,rovn AS ctrl_rovn
+            ,vovh24 AS ctrl_vovh24
+            ,dau AS ctrl_dau
+            ,exp AS ctrl_exp
+            ,is_share AS ctrl_is_share
+            ,share_cnt AS ctrl_share_cnt
+            ,is_return_1 AS ctrl_is_return_1
+            ,return_n_uv AS ctrl_return_n_uv
+            ,viewh24 AS ctrl_viewh24
+            ,return_n_uv_noself AS ctrl_return_n_uv_noself
+            ,b AS ctrl_b
+            ,c_1 AS ctrl_c_1
+            ,c_2 AS ctrl_c_2
+            ,c_3 AS ctrl_c_3
+            ,total_bc AS ctrl_total_bc
+            ,d_1 AS ctrl_d_1
+            ,d_2 AS ctrl_d_2
+            ,d_3 AS ctrl_d_3
+            ,total_d AS ctrl_total_d
+            ,total_bcd AS ctrl_total_bcd
+            ,total_bcd1 AS ctrl_total_bcd1
+    FROM    t_metrics
+    WHERE   abcode = "对照组"
+)
+-- 关联对照组,计算变化率
+SELECT  m.dt
+        ,m.apptype
+        ,m.abcode
+        ,m.page
+        -- 原始指标
+        ,m.exp_per_dau
+        ,m.str_one
+        ,m.ros_one
+        ,m.str
+        ,m.ros
+        ,m.str_plus
+        ,m.ros_minus
+        ,m.rovn
+        ,m.vovh24
+        ,m.dau
+        ,m.exp
+        ,m.is_share
+        ,m.share_cnt
+        ,m.is_return_1
+        ,m.return_n_uv
+        ,m.viewh24
+        ,m.return_n_uv_noself
+        ,m.b
+        ,m.c_1
+        ,m.c_2
+        ,m.c_3
+        ,m.total_bc
+        ,m.d_1
+        ,m.d_2
+        ,m.d_3
+        ,m.total_d
+        ,m.total_bcd
+        ,m.total_bcd1
+        -- 相对对照组变化率
+        ,(m.exp_per_dau - c.ctrl_exp_per_dau) / c.ctrl_exp_per_dau AS exp_per_dau_lift
+        ,(m.str_one - c.ctrl_str_one) / c.ctrl_str_one AS str_one_lift
+        ,(m.ros_one - c.ctrl_ros_one) / c.ctrl_ros_one AS ros_one_lift
+        ,(m.str - c.ctrl_str) / c.ctrl_str AS str_lift
+        ,(m.ros - c.ctrl_ros) / c.ctrl_ros AS ros_lift
+        ,(m.str_plus - c.ctrl_str_plus) / c.ctrl_str_plus AS str_plus_lift
+        ,(m.ros_minus - c.ctrl_ros_minus) / c.ctrl_ros_minus AS ros_minus_lift
+        ,(m.rovn - c.ctrl_rovn) / c.ctrl_rovn AS rovn_lift
+        ,(m.vovh24 - c.ctrl_vovh24) / c.ctrl_vovh24 AS vovh24_lift
+        ,(m.dau - c.ctrl_dau) / c.ctrl_dau AS dau_lift
+        ,(m.exp - c.ctrl_exp) / c.ctrl_exp AS exp_lift
+        ,(m.is_share - c.ctrl_is_share) / c.ctrl_is_share AS is_share_lift
+        ,(m.share_cnt - c.ctrl_share_cnt) / c.ctrl_share_cnt AS share_cnt_lift
+        ,(m.is_return_1 - c.ctrl_is_return_1) / c.ctrl_is_return_1 AS is_return_1_lift
+        ,(m.return_n_uv - c.ctrl_return_n_uv) / c.ctrl_return_n_uv AS return_n_uv_lift
+        ,(m.viewh24 - c.ctrl_viewh24) / c.ctrl_viewh24 AS viewh24_lift
+        ,(m.return_n_uv_noself - c.ctrl_return_n_uv_noself) / c.ctrl_return_n_uv_noself AS return_n_uv_noself_lift
+        ,(m.b - c.ctrl_b) / c.ctrl_b AS b_lift
+        ,(m.c_1 - c.ctrl_c_1) / c.ctrl_c_1 AS c_1_lift
+        ,(m.c_2 - c.ctrl_c_2) / c.ctrl_c_2 AS c_2_lift
+        ,(m.c_3 - c.ctrl_c_3) / c.ctrl_c_3 AS c_3_lift
+        ,(m.total_bc - c.ctrl_total_bc) / c.ctrl_total_bc AS total_bc_lift
+        ,(m.d_1 - c.ctrl_d_1) / c.ctrl_d_1 AS d_1_lift
+        ,(m.d_2 - c.ctrl_d_2) / c.ctrl_d_2 AS d_2_lift
+        ,(m.d_3 - c.ctrl_d_3) / c.ctrl_d_3 AS d_3_lift
+        ,(m.total_d - c.ctrl_total_d) / c.ctrl_total_d AS total_d_lift
+        ,(m.total_bcd - c.ctrl_total_bcd) / c.ctrl_total_bcd AS total_bcd_lift
+        ,(m.total_bcd1 - c.ctrl_total_bcd1) / c.ctrl_total_bcd1 AS total_bcd1_lift
+FROM    t_metrics m
+LEFT JOIN t_control c
+ON      m.dt = c.dt
+AND     m.apptype = c.apptype
+AND     m.page = c.page
+ORDER BY m.dt DESC, m.apptype, m.page, m.abcode
+;

+ 195 - 0
tasks/00_AB效果/05_推荐AB天级效果_对比对照组_含多跳_v2.sql

@@ -0,0 +1,195 @@
+-- 推荐AB天级效果 - 含对照组对比 + 多跳B/C/D指标
+-- 基于新表 dwd_recsys_alg_exposure_base_20260206,新增 b/c_1~c_3/d_1~d_3/total_bc/total_d/total_bcd per-exposure 率指标
+WITH t_base AS
+(
+    SELECT  dt
+            ,apptype
+            ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab1") THEN "实验组-先验地域降权aa"
+                    WHEN apptype IN ("4") AND abcode IN ("ab0") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab7") THEN "实验组-str+校准&ros-统计量aa"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6") THEN "实验组-str+校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab9") THEN "实验组-str+校准&ros损失函数优化aa"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8") THEN "实验组-str+校准&ros损失函数优化"
+                    WHEN apptype IN ("4") AND abcode IN ("ab5") THEN "实验组-str+校准&ros天级更新aa"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4") THEN "实验组-str+校准&ros天级更新"
+                    WHEN apptype IN ("4") AND abcode IN ("ab3") THEN "对照组aa"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2") THEN "对照组"
+                    ELSE "其他"
+            END AS abcode
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+            ,COALESCE(CAST(b AS BIGINT), 0) AS b
+            ,COALESCE(CAST(c_1 AS BIGINT), 0) AS c_1
+            ,COALESCE(CAST(c_2 AS BIGINT), 0) AS c_2
+            ,COALESCE(CAST(c_3 AS BIGINT), 0) AS c_3
+            ,COALESCE(CAST(d_1 AS BIGINT), 0) AS d_1
+            ,COALESCE(CAST(d_2 AS BIGINT), 0) AS d_2
+            ,COALESCE(CAST(d_3 AS BIGINT), 0) AS d_3
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260206
+    WHERE   dt = '${dt}'
+    AND     apptype IN ("4")
+    AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+),
+-- 计算各组基础指标
+t_metrics AS (
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,page
+            ,COUNT(1) / COUNT(DISTINCT mid) AS exp_per_dau
+            ,SUM(is_share) / COUNT(1) AS str_one
+            ,SUM(return_n_uv) / SUM(is_share) AS ros_one
+            ,SUM(share_cnt) / COUNT(1) AS str
+            ,SUM(return_n_uv) / SUM(share_cnt) AS ros
+            ,SUM(is_return_1) / COUNT(1) AS str_plus
+            ,SUM(return_n_uv) / SUM(is_return_1) AS ros_minus
+            ,SUM(return_n_uv) / COUNT(1) AS rovn
+            ,SUM(new_exposure_cnt) / COUNT(1) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            ,SUM(b) / COUNT(1) AS b
+            ,SUM(c_1) / COUNT(1) AS c_1
+            ,SUM(c_2) / COUNT(1) AS c_2
+            ,SUM(c_3) / COUNT(1) AS c_3
+            ,SUM(b + c_1 + c_2 + c_3) / COUNT(1) AS total_bc
+            ,SUM(d_1) / COUNT(1) AS d_1
+            ,SUM(d_2) / COUNT(1) AS d_2
+            ,SUM(d_3) / COUNT(1) AS d_3
+            ,SUM(d_1 + d_2 + d_3) / COUNT(1) AS total_d
+            ,SUM(b + c_1 + c_2 + c_3 + d_1 + d_2 + d_3) / COUNT(1) AS total_bcd
+            ,SUM(b + c_1 + c_2 + c_3 + d_1) / COUNT(1) AS total_bcd1
+    FROM    t_base
+    WHERE   page IN ("推荐")
+    GROUP BY dt
+             ,apptype
+             ,abcode
+             ,page
+),
+-- 获取对照组数据
+t_control AS (
+    SELECT  dt
+            ,apptype
+            ,page
+            ,exp_per_dau AS ctrl_exp_per_dau
+            ,str_one AS ctrl_str_one
+            ,ros_one AS ctrl_ros_one
+            ,str AS ctrl_str
+            ,ros AS ctrl_ros
+            ,str_plus AS ctrl_str_plus
+            ,ros_minus AS ctrl_ros_minus
+            ,rovn AS ctrl_rovn
+            ,vovh24 AS ctrl_vovh24
+            ,dau AS ctrl_dau
+            ,exp AS ctrl_exp
+            ,is_share AS ctrl_is_share
+            ,share_cnt AS ctrl_share_cnt
+            ,is_return_1 AS ctrl_is_return_1
+            ,return_n_uv AS ctrl_return_n_uv
+            ,viewh24 AS ctrl_viewh24
+            ,return_n_uv_noself AS ctrl_return_n_uv_noself
+            ,b AS ctrl_b
+            ,c_1 AS ctrl_c_1
+            ,c_2 AS ctrl_c_2
+            ,c_3 AS ctrl_c_3
+            ,total_bc AS ctrl_total_bc
+            ,d_1 AS ctrl_d_1
+            ,d_2 AS ctrl_d_2
+            ,d_3 AS ctrl_d_3
+            ,total_d AS ctrl_total_d
+            ,total_bcd AS ctrl_total_bcd
+            ,total_bcd1 AS ctrl_total_bcd1
+    FROM    t_metrics
+    WHERE   abcode = "对照组"
+)
+-- 关联对照组,计算变化率
+SELECT  m.dt
+        ,m.apptype
+        ,m.abcode
+        ,m.page
+        -- 原始指标
+        ,m.exp_per_dau
+        ,m.str_one
+        ,m.ros_one
+        ,m.str
+        ,m.ros
+        ,m.str_plus
+        ,m.ros_minus
+        ,m.rovn
+        ,m.vovh24
+        ,m.dau
+        ,m.exp
+        ,m.is_share
+        ,m.share_cnt
+        ,m.is_return_1
+        ,m.return_n_uv
+        ,m.viewh24
+        ,m.return_n_uv_noself
+        ,m.b
+        ,m.c_1
+        ,m.c_2
+        ,m.c_3
+        ,m.total_bc
+        ,m.d_1
+        ,m.d_2
+        ,m.d_3
+        ,m.total_d
+        ,m.total_bcd
+        ,m.total_bcd1
+        -- 相对对照组变化率
+        ,(m.exp_per_dau - c.ctrl_exp_per_dau) / c.ctrl_exp_per_dau AS exp_per_dau_lift
+        ,(m.str_one - c.ctrl_str_one) / c.ctrl_str_one AS str_one_lift
+        ,(m.ros_one - c.ctrl_ros_one) / c.ctrl_ros_one AS ros_one_lift
+        ,(m.str - c.ctrl_str) / c.ctrl_str AS str_lift
+        ,(m.ros - c.ctrl_ros) / c.ctrl_ros AS ros_lift
+        ,(m.str_plus - c.ctrl_str_plus) / c.ctrl_str_plus AS str_plus_lift
+        ,(m.ros_minus - c.ctrl_ros_minus) / c.ctrl_ros_minus AS ros_minus_lift
+        ,(m.rovn - c.ctrl_rovn) / c.ctrl_rovn AS rovn_lift
+        ,(m.vovh24 - c.ctrl_vovh24) / c.ctrl_vovh24 AS vovh24_lift
+        ,(m.dau - c.ctrl_dau) / c.ctrl_dau AS dau_lift
+        ,(m.exp - c.ctrl_exp) / c.ctrl_exp AS exp_lift
+        ,(m.is_share - c.ctrl_is_share) / c.ctrl_is_share AS is_share_lift
+        ,(m.share_cnt - c.ctrl_share_cnt) / c.ctrl_share_cnt AS share_cnt_lift
+        ,(m.is_return_1 - c.ctrl_is_return_1) / c.ctrl_is_return_1 AS is_return_1_lift
+        ,(m.return_n_uv - c.ctrl_return_n_uv) / c.ctrl_return_n_uv AS return_n_uv_lift
+        ,(m.viewh24 - c.ctrl_viewh24) / c.ctrl_viewh24 AS viewh24_lift
+        ,(m.return_n_uv_noself - c.ctrl_return_n_uv_noself) / c.ctrl_return_n_uv_noself AS return_n_uv_noself_lift
+        ,(m.b - c.ctrl_b) / c.ctrl_b AS b_lift
+        ,(m.c_1 - c.ctrl_c_1) / c.ctrl_c_1 AS c_1_lift
+        ,(m.c_2 - c.ctrl_c_2) / c.ctrl_c_2 AS c_2_lift
+        ,(m.c_3 - c.ctrl_c_3) / c.ctrl_c_3 AS c_3_lift
+        ,(m.total_bc - c.ctrl_total_bc) / c.ctrl_total_bc AS total_bc_lift
+        ,(m.d_1 - c.ctrl_d_1) / c.ctrl_d_1 AS d_1_lift
+        ,(m.d_2 - c.ctrl_d_2) / c.ctrl_d_2 AS d_2_lift
+        ,(m.d_3 - c.ctrl_d_3) / c.ctrl_d_3 AS d_3_lift
+        ,(m.total_d - c.ctrl_total_d) / c.ctrl_total_d AS total_d_lift
+        ,(m.total_bcd - c.ctrl_total_bcd) / c.ctrl_total_bcd AS total_bcd_lift
+        ,(m.total_bcd1 - c.ctrl_total_bcd1) / c.ctrl_total_bcd1 AS total_bcd1_lift
+FROM    t_metrics m
+LEFT JOIN t_control c
+ON      m.dt = c.dt
+AND     m.apptype = c.apptype
+AND     m.page = c.page
+ORDER BY m.dt DESC, m.apptype, m.page, m.abcode
+;

+ 7 - 0
tasks/00_尾号实验/base.json

@@ -0,0 +1,7 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "pvHZPg",
+  "sort": "dt:desc",
+  "cols": null,
+  "filter": "abcode!=other"
+}

+ 123 - 0
tasks/00_尾号实验/base.sql

@@ -0,0 +1,123 @@
+WITH t_abmap AS
+(
+    SELECT "1" AS suffix, "实验组:str校准 & ros天级更新" AS abcode
+    UNION ALL SELECT "2", "实验组:str校准 & ros统计量"
+    UNION ALL SELECT "3", "实验组:ros损失函数优化"
+    UNION ALL SELECT "a", "对照组"
+    UNION ALL SELECT "b", "对照组"
+    UNION ALL SELECT "c", "对照组"
+    UNION ALL SELECT "d", "对照组"
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,COALESCE(m.abcode,"other") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,mid
+                        ,vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+                WHERE   dt="${dt}"
+                AND     apptype IN ("4")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "4"
+    AND     sub.suffix = m.suffix
+)
+,t_dau2 AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"other") AS abcode
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log
+                WHERE   dt="${dt}"
+                -- FROM    loghubods.useractive_log_per5min
+                -- WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("4")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "4"
+    AND     sub.suffix = m.suffix
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"other")
+)
+SELECT  a.dt
+        ,a.apptype
+        ,a.abcode
+        ,a.exp_per_dau
+        ,a.str_one
+        ,a.ros_one
+        ,a.str
+        ,a.ros
+        ,a.str_plus
+        ,a.ros_minus
+        ,a.rovn
+        ,a.vovh24
+        ,a.dau
+        ,a.exp
+        ,a.is_share
+        ,a.share_cnt
+        ,a.is_return_1
+        ,a.return_n_uv
+        ,a.viewh24
+        ,a.return_n_uv_noself
+        ,b.dau2
+FROM    (
+            SELECT  dt
+                    ,apptype
+                    ,abcode
+                    ,round(COALESCE(COUNT(1) / COUNT(DISTINCT mid),0),2) AS exp_per_dau
+                    ,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
+                    ,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
+                    ,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
+                    ,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
+                    ,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
+                    ,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
+                    ,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
+                    ,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
+                    ,COUNT(DISTINCT mid) AS dau
+                    ,COUNT(1) AS exp
+                    ,COALESCE(SUM(is_share),0) AS is_share
+                    ,COALESCE(SUM(share_cnt),0) AS share_cnt
+                    ,COALESCE(SUM(is_return_1),0) AS is_return_1
+                    ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+                    ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+                    ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            FROM    t_base
+            WHERE   page = "推荐"
+            GROUP BY dt
+                     ,apptype
+                     ,abcode
+        ) a
+LEFT JOIN t_dau2 b
+ON      a.dt = b.dt
+AND     a.apptype = b.apptype
+AND     a.abcode = b.abcode
+ORDER BY a.dt DESC,a.apptype,a.abcode
+;

+ 7 - 0
tasks/00_尾号实验/base_v1.json

@@ -0,0 +1,7 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "K5XBxy",
+  "sort": "dt:desc",
+  "cols": null,
+  "filter": "abcode!=other"
+}

+ 122 - 0
tasks/00_尾号实验/base_v1.sql

@@ -0,0 +1,122 @@
+WITH t_abmap AS
+(
+    SELECT "1" AS suffix, "实验组:str校准 & ros天级更新" AS abcode
+    UNION ALL SELECT "2", "实验组:str校准 & ros统计量"
+    UNION ALL SELECT "3", "实验组:ros损失函数优化"
+    UNION ALL SELECT "4", "实验组:c1_rovn & 去掉vor实验"
+    UNION ALL SELECT "5", "实验组:b0_str & 去掉vor实验"
+    UNION ALL SELECT "c", "对照组"
+    UNION ALL SELECT "d", "对照组"
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,COALESCE(m.abcode,"other") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,mid
+                        ,vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+                WHERE   dt ="${dt}"
+                -- and hh between "21" and "24"
+                AND     apptype IN ("4")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "4"
+    AND     sub.suffix = m.suffix
+)
+,t_dau2 AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"other") AS abcode
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                FROM    loghubods.useractive_log_per5min
+                WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","239999")
+                AND     apptype IN ("4")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "4"
+    AND     sub.suffix = m.suffix
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"other")
+)
+SELECT  a.dt
+        ,a.apptype
+        ,a.abcode
+        ,a.exp_per_dau
+        ,a.str_one
+        ,a.ros_one
+        ,a.str
+        ,a.ros
+        ,a.str_plus
+        ,a.ros_minus
+        ,a.rovn
+        ,a.vovh24
+        ,a.dau
+        ,a.exp
+        ,a.is_share
+        ,a.share_cnt
+        ,a.is_return_1
+        ,a.return_n_uv
+        ,a.viewh24
+        ,a.return_n_uv_noself
+        ,b.dau2
+FROM    (
+            SELECT  dt
+                    ,apptype
+                    ,abcode
+                    ,round(COALESCE(COUNT(1) / COUNT(DISTINCT mid),0),2) AS exp_per_dau
+                    ,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
+                    ,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
+                    ,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
+                    ,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
+                    ,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
+                    ,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
+                    ,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
+                    ,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
+                    ,COUNT(DISTINCT mid) AS dau
+                    ,COUNT(1) AS exp
+                    ,COALESCE(SUM(is_share),0) AS is_share
+                    ,COALESCE(SUM(share_cnt),0) AS share_cnt
+                    ,COALESCE(SUM(is_return_1),0) AS is_return_1
+                    ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+                    ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+                    ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            FROM    t_base
+            WHERE   page = "推荐"
+            GROUP BY dt
+                     ,apptype
+                     ,abcode
+        ) a
+LEFT JOIN t_dau2 b
+ON      a.dt = b.dt
+AND     a.apptype = b.apptype
+AND     a.abcode = b.abcode
+ORDER BY a.dt DESC,a.apptype,a.abcode
+;

+ 7 - 0
tasks/00_尾号实验/base_v2.json

@@ -0,0 +1,7 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "pvHZPg",
+  "sort": "dt:desc",
+  "cols": null,
+  "filter": "abcode!=other,abcode!=6,abcode!=e,abcode!=f"
+}

+ 128 - 0
tasks/00_尾号实验/base_v2.sql

@@ -0,0 +1,128 @@
+WITH t_abmap AS
+(
+    SELECT "1" AS suffix, "实验组:str校准 & ros天级更新" AS abcode
+    UNION ALL SELECT "2", "实验组:str校准 & ros统计量"
+    UNION ALL SELECT "3", "实验组:ros损失函数优化"
+    UNION ALL SELECT "5", "实验组:b0_str & 去掉vor实验"
+    UNION ALL SELECT "4", "实验组:c1_rovn & 去掉vor实验"
+    UNION ALL SELECT "6", "6"
+    UNION ALL SELECT "e", "e"
+    UNION ALL SELECT "f", "f"
+    UNION ALL SELECT "a", "对照组"
+    UNION ALL SELECT "b", "对照组"
+    UNION ALL SELECT "c", "对照组"
+    UNION ALL SELECT "d", "对照组"
+)
+,t_base AS
+(
+    SELECT  sub.*
+            ,COALESCE(m.abcode,"other") AS abcode
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,SUBSTR(GET_JSON_OBJECT(extend,'$.rootsessionid'),LENGTH(GET_JSON_OBJECT(extend,'$.rootsessionid')),1) AS suffix
+                        ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                                WHEN page IN ("回流页","其他") THEN "非推荐"
+                                ELSE "其他"
+                        END AS page
+                        ,mid
+                        ,vid
+                        ,is_share
+                        ,share_cnt
+                        ,is_return_1
+                        ,is_return_n
+                        ,is_return_noself
+                        ,return_1_uv
+                        ,return_n_uv
+                        ,return_n_uv_noself
+                        ,new_exposure_cnt
+                        ,flowpool
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+                WHERE   dt="${dt}"
+                AND     apptype IN ("4")
+                AND     page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页","回流页","其他")
+                AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+                AND     abcode NOT IN ("ab100")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "4"
+    AND     sub.suffix = m.suffix
+)
+,t_dau2 AS
+(
+    SELECT  SUBSTR(sub.dt,1,8) AS dt
+            ,sub.apptype
+            ,COALESCE(m.abcode,"other") AS abcode
+            ,COUNT(DISTINCT sub.machinecode) AS dau2
+    FROM    (
+                SELECT  dt
+                        ,apptype
+                        ,machinecode
+                        ,SUBSTR(GET_JSON_OBJECT(extparams,'$.rootSessionId'),LENGTH(GET_JSON_OBJECT(extparams,'$.rootSessionId')),1) AS suffix
+                -- FROM    loghubods.useractive_log
+                -- WHERE   dt="${dt}"
+                FROM    loghubods.useractive_log_per5min
+                WHERE   dt BETWEEN CONCAT("${dt}","000000") AND CONCAT("${dt}","235500")
+                AND     apptype IN ("4")
+            ) sub
+    LEFT JOIN t_abmap m
+    ON      sub.apptype = "4"
+    AND     sub.suffix = m.suffix
+    GROUP BY SUBSTR(sub.dt,1,8)
+             ,sub.apptype
+             ,COALESCE(m.abcode,"other")
+)
+SELECT  a.dt
+        ,a.apptype
+        ,a.abcode
+        ,a.exp_per_dau
+        ,a.str_one
+        ,a.ros_one
+        ,a.str
+        ,a.ros
+        ,a.str_plus
+        ,a.ros_minus
+        ,a.rovn
+        ,a.vovh24
+        ,a.dau
+        ,a.exp
+        ,a.is_share
+        ,a.share_cnt
+        ,a.is_return_1
+        ,a.return_n_uv
+        ,a.viewh24
+        ,a.return_n_uv_noself
+        ,b.dau2
+FROM    (
+            SELECT  dt
+                    ,apptype
+                    ,abcode
+                    ,round(COALESCE(COUNT(1) / COUNT(DISTINCT mid),0),2) AS exp_per_dau
+                    ,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
+                    ,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
+                    ,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
+                    ,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
+                    ,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
+                    ,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
+                    ,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
+                    ,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
+                    ,COUNT(DISTINCT mid) AS dau
+                    ,COUNT(1) AS exp
+                    ,COALESCE(SUM(is_share),0) AS is_share
+                    ,COALESCE(SUM(share_cnt),0) AS share_cnt
+                    ,COALESCE(SUM(is_return_1),0) AS is_return_1
+                    ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+                    ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+                    ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+            FROM    t_base
+            WHERE   page = "推荐"
+            GROUP BY dt
+                     ,apptype
+                     ,abcode
+        ) a
+LEFT JOIN t_dau2 b
+ON      a.dt = b.dt
+AND     a.apptype = b.apptype
+AND     a.abcode = b.abcode
+ORDER BY a.dt DESC,a.apptype,a.abcode
+;

+ 21 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/00_uv情况.sql

@@ -0,0 +1,21 @@
+
+-- 按 topic 分组 + 合计
+SELECT dt, COALESCE(topic, '合计') as topic,
+       count(distinct machinecode) as uv, sum(1) as cnt
+FROM loghubods.user_share_log
+WHERE dt = '${dt}'
+GROUP BY dt, topic
+GROUPING SETS ((dt, topic), (dt))
+
+UNION ALL
+
+-- click 再分: from_mid 与 target_mid 是否相同
+SELECT dt,
+       CASE WHEN split(shareid, '-')[0] = machinecode
+            THEN 'click_自点' ELSE 'click_他点' END as topic,
+       count(distinct machinecode) as uv, sum(1) as cnt
+FROM loghubods.user_share_log
+WHERE dt = '${dt}' AND topic = 'click'
+GROUP BY dt,
+       CASE WHEN split(shareid, '-')[0] = machinecode
+            THEN 'click_自点' ELSE 'click_他点' END

+ 64 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_rootshareid来源分布.sql

@@ -0,0 +1,64 @@
+-- click 中 rootshareid 的来源时间分布
+-- 当天回流(click)用 rootshareid 追溯到源头分享(share),按"来自N天前"分组
+-- 与 shareid 版本的区别:rootshareid 追溯传播链顶端,看源头分享的时间跨度
+-- 分享侧占比:分母 = N天前那天的全部分享量
+-- 回流侧占比:分母 = 当天总回流量(SUM OVER 窗口函数)
+-- vid: share 用 shareobjectid,click 用 clickobjectid
+-- 使用: python fetch_daily.py "tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_rootshareid来源分布.sql" --date 20260210
+
+WITH clicks AS (
+    -- 当天 click 明细(保留行级,不聚合)
+    SELECT rootshareid, machinecode, clickobjectid
+    FROM loghubods.user_share_log
+    WHERE dt = '${dt}'
+      AND topic = 'click'
+      AND rootshareid IS NOT NULL AND rootshareid <> ''
+),
+share_info AS (
+    -- 回溯 90 天:每个 shareid 的首次分享日期 + 分享人 + 分享内容
+    SELECT
+        shareid,
+        MIN(dt)                  AS share_date,
+        MAX(machinecode)         AS share_mid,
+        MAX(shareobjectid)       AS share_vid
+    FROM loghubods.user_share_log
+    WHERE dt >= TO_CHAR(DATEADD(TO_DATE('${dt}', 'yyyyMMdd'), -90, 'dd'), 'yyyyMMdd')
+      AND dt <= '${dt}'
+      AND topic = 'share'
+      AND shareid IS NOT NULL AND shareid <> ''
+    GROUP BY shareid
+),
+daily_total AS (
+    -- 回溯 90 天:每天的总分享 cnt / uv / vid(分享侧占比的分母)
+    SELECT
+        dt                             AS share_date,
+        COUNT(DISTINCT shareid)        AS day_share_cnt,
+        COUNT(DISTINCT machinecode)    AS day_share_uv,
+        COUNT(DISTINCT shareobjectid)  AS day_share_vid
+    FROM loghubods.user_share_log
+    WHERE dt >= TO_CHAR(DATEADD(TO_DATE('${dt}', 'yyyyMMdd'), -90, 'dd'), 'yyyyMMdd')
+      AND dt <= '${dt}'
+      AND topic = 'share'
+      AND shareid IS NOT NULL AND shareid <> ''
+    GROUP BY dt
+)
+SELECT
+    '${dt}'                                                                                          AS dt,
+    DATEDIFF(TO_DATE('${dt}', 'yyyyMMdd'), TO_DATE(si.share_date, 'yyyyMMdd'), 'dd')                 AS `来自N天前的分享`,
+    COUNT(DISTINCT c.rootshareid)                                                                    AS `分享cnt`,
+    ROUND(COUNT(DISTINCT c.rootshareid) * 100.0 / tot.day_share_cnt, 2)                              AS `分享cnt占比`,
+    COUNT(DISTINCT si.share_mid)                                                                     AS `分享uv`,
+    ROUND(COUNT(DISTINCT si.share_mid) * 100.0 / tot.day_share_uv, 2)                                AS `分享uv占比`,
+    COUNT(DISTINCT si.share_vid)                                                                     AS `分享vid`,
+    ROUND(COUNT(DISTINCT si.share_vid) * 100.0 / tot.day_share_vid, 2)                               AS `分享vid占比`,
+    COUNT(*)                                                                                         AS `回流cnt`,
+    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(), 2)                                                AS `回流cnt占比`,
+    COUNT(DISTINCT c.machinecode)                                                                    AS `回流uv`,
+    ROUND(COUNT(DISTINCT c.machinecode) * 100.0 / SUM(COUNT(DISTINCT c.machinecode)) OVER(), 2)      AS `回流uv占比`,
+    COUNT(DISTINCT c.clickobjectid)                                                                  AS `回流vid`,
+    ROUND(COUNT(DISTINCT c.clickobjectid) * 100.0 / SUM(COUNT(DISTINCT c.clickobjectid)) OVER(), 2) AS `回流vid占比`
+FROM clicks c
+LEFT JOIN share_info si ON c.rootshareid = si.shareid
+LEFT JOIN daily_total tot ON si.share_date = tot.share_date
+GROUP BY si.share_date, tot.day_share_cnt, tot.day_share_uv, tot.day_share_vid
+ORDER BY `来自N天前的分享`

+ 63 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_shareid来源分布.sql

@@ -0,0 +1,63 @@
+-- click 中 shareid 的来源时间分布
+-- 当天回流(click)追溯到之前的分享(share),按"来自N天前"分组
+-- 分享侧占比:分母 = N天前那天的全部分享量
+-- 回流侧占比:分母 = 当天总回流量(SUM OVER 窗口函数)
+-- vid: share 用 shareobjectid,click 用 clickobjectid
+-- 使用: python fetch_daily.py "tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_shareid来源分布.sql" --date 20260210
+
+WITH clicks AS (
+    -- 当天 click 明细(保留行级,不聚合)
+    SELECT shareid, machinecode, clickobjectid
+    FROM loghubods.user_share_log
+    WHERE dt = '${dt}'
+      AND topic = 'click'
+      AND shareid IS NOT NULL AND shareid <> ''
+),
+share_info AS (
+    -- 回溯 90 天:每个 shareid 的首次分享日期 + 分享人 + 分享内容
+    SELECT
+        shareid,
+        MIN(dt)              AS share_date,
+        MAX(machinecode)     AS share_mid,
+        MAX(shareobjectid)   AS share_vid
+    FROM loghubods.user_share_log
+    WHERE dt >= TO_CHAR(DATEADD(TO_DATE('${dt}', 'yyyyMMdd'), -90, 'dd'), 'yyyyMMdd')
+      AND dt <= '${dt}'
+      AND topic = 'share'
+      AND shareid IS NOT NULL AND shareid <> ''
+    GROUP BY shareid
+),
+daily_total AS (
+    -- 回溯 90 天:每天的总分享 cnt / uv / vid(分享侧占比的分母)
+    SELECT
+        dt                             AS share_date,
+        COUNT(DISTINCT shareid)        AS day_share_cnt,
+        COUNT(DISTINCT machinecode)    AS day_share_uv,
+        COUNT(DISTINCT shareobjectid)  AS day_share_vid
+    FROM loghubods.user_share_log
+    WHERE dt >= TO_CHAR(DATEADD(TO_DATE('${dt}', 'yyyyMMdd'), -90, 'dd'), 'yyyyMMdd')
+      AND dt <= '${dt}'
+      AND topic = 'share'
+      AND shareid IS NOT NULL AND shareid <> ''
+    GROUP BY dt
+)
+SELECT
+    '${dt}'                                                                                          AS dt,
+    DATEDIFF(TO_DATE('${dt}', 'yyyyMMdd'), TO_DATE(si.share_date, 'yyyyMMdd'), 'dd')                 AS `来自N天前的分享`,
+    COUNT(DISTINCT c.shareid)                                                                        AS `分享cnt`,
+    ROUND(COUNT(DISTINCT c.shareid) * 100.0 / tot.day_share_cnt, 2)                                  AS `分享cnt占比`,
+    COUNT(DISTINCT si.share_mid)                                                                     AS `分享uv`,
+    ROUND(COUNT(DISTINCT si.share_mid) * 100.0 / tot.day_share_uv, 2)                                AS `分享uv占比`,
+    COUNT(DISTINCT si.share_vid)                                                                     AS `分享vid`,
+    ROUND(COUNT(DISTINCT si.share_vid) * 100.0 / tot.day_share_vid, 2)                               AS `分享vid占比`,
+    COUNT(*)                                                                                         AS `回流cnt`,
+    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(), 2)                                                AS `回流cnt占比`,
+    COUNT(DISTINCT c.machinecode)                                                                    AS `回流uv`,
+    ROUND(COUNT(DISTINCT c.machinecode) * 100.0 / SUM(COUNT(DISTINCT c.machinecode)) OVER(), 2)      AS `回流uv占比`,
+    COUNT(DISTINCT c.clickobjectid)                                                                  AS `回流vid`,
+    ROUND(COUNT(DISTINCT c.clickobjectid) * 100.0 / SUM(COUNT(DISTINCT c.clickobjectid)) OVER(), 2) AS `回流vid占比`
+FROM clicks c
+LEFT JOIN share_info si ON c.shareid = si.shareid
+LEFT JOIN daily_total tot ON si.share_date = tot.share_date
+GROUP BY si.share_date, tot.day_share_cnt, tot.day_share_uv, tot.day_share_vid
+ORDER BY `来自N天前的分享`

+ 56 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_shareid来源分布_180d.sql

@@ -0,0 +1,56 @@
+-- click 中 shareid 的来源时间分布(180 天回溯)
+-- 当天回流(click)追溯到之前的分享(share),按"来自N天前"分组
+-- 分享侧占比:分母 = N天前那天的全部分享量
+-- 回流侧占比:分母 = 当天总回流量(SUM OVER 窗口函数)
+-- 使用: python fetch_daily.py "tasks/00_表的洞察/loghubods.user_share_log/00_洞察/01_click_shareid来源分布_180d.sql" --date 20260210
+
+WITH clicks AS (
+    -- 当天 click 明细(保留行级,不聚合)
+    SELECT shareid, machinecode
+    FROM loghubods.user_share_log
+    WHERE dt = '${dt}'
+      AND topic = 'click'
+      AND shareid IS NOT NULL AND shareid <> ''
+),
+share_info AS (
+    -- 回溯 180 天:每个 shareid 的首次分享日期 + 分享人
+    SELECT
+        shareid,
+        MIN(dt)              AS share_date,
+        MAX(machinecode)     AS share_mid
+    FROM loghubods.user_share_log
+    WHERE dt >= TO_CHAR(DATEADD(TO_DATE('${dt}', 'yyyyMMdd'), -180, 'dd'), 'yyyyMMdd')
+      AND dt <= '${dt}'
+      AND topic = 'share'
+      AND shareid IS NOT NULL AND shareid <> ''
+    GROUP BY shareid
+),
+daily_total AS (
+    -- 回溯 180 天:每天的总分享 cnt 和 uv(分享侧占比的分母)
+    SELECT
+        dt                             AS share_date,
+        COUNT(DISTINCT shareid)        AS day_share_cnt,
+        COUNT(DISTINCT machinecode)    AS day_share_uv
+    FROM loghubods.user_share_log
+    WHERE dt >= TO_CHAR(DATEADD(TO_DATE('${dt}', 'yyyyMMdd'), -180, 'dd'), 'yyyyMMdd')
+      AND dt <= '${dt}'
+      AND topic = 'share'
+      AND shareid IS NOT NULL AND shareid <> ''
+    GROUP BY dt
+)
+SELECT
+    '${dt}'                                                                                          AS dt,
+    DATEDIFF(TO_DATE('${dt}', 'yyyyMMdd'), TO_DATE(si.share_date, 'yyyyMMdd'), 'dd')                 AS `来自N天前的分享`,
+    COUNT(DISTINCT c.shareid)                                                                        AS `分享cnt`,
+    ROUND(COUNT(DISTINCT c.shareid) * 100.0 / tot.day_share_cnt, 2)                                  AS `分享cnt占比`,
+    COUNT(DISTINCT si.share_mid)                                                                     AS `分享uv`,
+    ROUND(COUNT(DISTINCT si.share_mid) * 100.0 / tot.day_share_uv, 2)                                AS `分享uv占比`,
+    COUNT(*)                                                                                         AS `回流cnt`,
+    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(), 2)                                                AS `回流cnt占比`,
+    COUNT(DISTINCT c.machinecode)                                                                    AS `回流uv`,
+    ROUND(COUNT(DISTINCT c.machinecode) * 100.0 / SUM(COUNT(DISTINCT c.machinecode)) OVER(), 2)      AS `回流uv占比`
+FROM clicks c
+LEFT JOIN share_info si ON c.shareid = si.shareid
+LEFT JOIN daily_total tot ON si.share_date = tot.share_date
+GROUP BY si.share_date, tot.day_share_cnt, tot.day_share_uv
+ORDER BY `来自N天前的分享`

+ 26 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/02_click_depth分布.sql

@@ -0,0 +1,26 @@
+-- click 按用户 max depth 分桶分布
+-- 每用户取当天最大 usersharedepth;0~50 逐个展示,>50 归一桶
+-- vid 为桶内去重 clickobjectid
+-- 使用: python fetch_daily.py .../02_click_depth分布.sql --date 20260210
+
+SELECT dt, depth_bucket,
+       uv,  ROUND(uv  * 100.0 / SUM(uv)  OVER(PARTITION BY dt), 2) as uv_pct,
+       cnt, ROUND(cnt * 100.0 / SUM(cnt) OVER(PARTITION BY dt), 2) as cnt_pct,
+       vid, ROUND(vid * 100.0 / SUM(vid) OVER(PARTITION BY dt), 2) as vid_pct,
+       actual_max
+FROM (
+    SELECT t.dt,
+           CASE WHEN max_depth <= 50 THEN max_depth ELSE 51 END as depth_bucket,
+           COUNT(DISTINCT machinecode) as uv,
+           COUNT(1) as cnt,
+           COUNT(DISTINCT clickobjectid) as vid,
+           MAX(max_depth) as actual_max
+    FROM (
+        SELECT dt, machinecode, clickobjectid,
+               MAX(CAST(usersharedepth AS BIGINT)) OVER(PARTITION BY dt, machinecode) as max_depth
+        FROM loghubods.user_share_log
+        WHERE dt = '${dt}' AND topic = 'click'
+    ) t
+    GROUP BY t.dt, CASE WHEN max_depth <= 50 THEN max_depth ELSE 51 END
+) agg
+ORDER BY depth_bucket

+ 27 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/03_click_top视频.sql

@@ -0,0 +1,27 @@
+-- click 视频粒度聚合 top100(按 uv 降序)+ 视频标题、发布时间
+-- 使用: python fetch_daily.py .../03_click_top视频.sql --date 20260210
+
+SELECT t.dt, t.clickobjectid,
+       v.title,
+       TO_CHAR(FROM_UNIXTIME(v.publish_ts / 1000), 'yyyy-MM-dd HH:mm:ss') as publish_time,
+       DATEDIFF(TO_DATE(t.dt, 'yyyyMMdd'), TO_DATE(TO_CHAR(FROM_UNIXTIME(v.publish_ts / 1000), 'yyyyMMdd'), 'yyyyMMdd'), 'dd') as days_since_pub,
+       t.uv,  ROUND(t.uv  * 100.0 / SUM(t.uv)  OVER(PARTITION BY t.dt), 2) as uv_pct,
+       t.cnt, ROUND(t.cnt * 100.0 / SUM(t.cnt) OVER(PARTITION BY t.dt), 2) as cnt_pct,
+       t.max_depth, t.avg_depth
+FROM (
+    SELECT dt, clickobjectid,
+           COUNT(DISTINCT machinecode) as uv,
+           COUNT(1) as cnt,
+           MAX(CAST(usersharedepth AS BIGINT)) as max_depth,
+           ROUND(AVG(CAST(usersharedepth AS BIGINT)), 2) as avg_depth,
+           ROW_NUMBER() OVER(PARTITION BY dt ORDER BY COUNT(DISTINCT machinecode) DESC) as rn
+    FROM loghubods.user_share_log
+    WHERE dt = '${dt}' AND topic = 'click'
+    GROUP BY dt, clickobjectid
+) t
+LEFT JOIN (
+    SELECT id as vid, title, gmt_create_timestamp as publish_ts
+    FROM videoods.wx_video
+) v ON t.clickobjectid = v.vid
+WHERE t.rn <= 100
+ORDER BY t.uv DESC

+ 27 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/04_click_top视频_by_depth.sql

@@ -0,0 +1,27 @@
+-- click 视频粒度聚合 top100(按 max_depth 降序)+ 视频标题、发布时间
+-- 使用: python fetch_daily.py .../04_click_top视频_by_depth.sql --date 20260210
+
+SELECT t.dt, t.clickobjectid,
+       v.title,
+       TO_CHAR(FROM_UNIXTIME(v.publish_ts / 1000), 'yyyy-MM-dd HH:mm:ss') as publish_time,
+       DATEDIFF(TO_DATE(t.dt, 'yyyyMMdd'), TO_DATE(TO_CHAR(FROM_UNIXTIME(v.publish_ts / 1000), 'yyyyMMdd'), 'yyyyMMdd'), 'dd') as days_since_pub,
+       t.uv,  ROUND(t.uv  * 100.0 / SUM(t.uv)  OVER(PARTITION BY t.dt), 2) as uv_pct,
+       t.cnt, ROUND(t.cnt * 100.0 / SUM(t.cnt) OVER(PARTITION BY t.dt), 2) as cnt_pct,
+       t.max_depth, t.avg_depth
+FROM (
+    SELECT dt, clickobjectid,
+           COUNT(DISTINCT machinecode) as uv,
+           COUNT(1) as cnt,
+           MAX(CAST(usersharedepth AS BIGINT)) as max_depth,
+           ROUND(AVG(CAST(usersharedepth AS BIGINT)), 2) as avg_depth,
+           ROW_NUMBER() OVER(PARTITION BY dt ORDER BY MAX(CAST(usersharedepth AS BIGINT)) DESC) as rn
+    FROM loghubods.user_share_log
+    WHERE dt = '${dt}' AND topic = 'click'
+    GROUP BY dt, clickobjectid
+) t
+LEFT JOIN (
+    SELECT id as vid, title, gmt_create_timestamp as publish_ts
+    FROM videoods.wx_video
+) v ON t.clickobjectid = v.vid
+WHERE t.rn <= 100
+ORDER BY t.max_depth DESC

+ 28 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/05_click_top_from_mid.sql

@@ -0,0 +1,28 @@
+-- click 分享者(from_mid) top1000,按带回的 target_mid 人数降序
+-- from_mid = split(shareid, '-')[0],target_mid = machinecode(排除自点)
+-- 使用: python fetch_daily.py .../05_click_top_from_mid.sql --date 20260210
+
+SELECT dt, from_mid,
+       target_uv,
+       ROUND(target_uv * 100.0 / SUM(target_uv) OVER(PARTITION BY dt), 2) as target_uv_pct,
+       cnt,
+       vid,
+       max_depth, avg_depth
+FROM (
+    SELECT dt,
+           split(shareid, '-')[0] as from_mid,
+           COUNT(DISTINCT machinecode) as target_uv,
+           COUNT(1) as cnt,
+           COUNT(DISTINCT clickobjectid) as vid,
+           MAX(CAST(usersharedepth AS BIGINT)) as max_depth,
+           ROUND(AVG(CAST(usersharedepth AS BIGINT)), 2) as avg_depth,
+           ROW_NUMBER() OVER(PARTITION BY dt ORDER BY COUNT(DISTINCT machinecode) DESC) as rn
+    FROM loghubods.user_share_log
+    WHERE dt = '${dt}' AND topic = 'click'
+      AND split(shareid, '-')[0] <> machinecode  -- 排除自点
+      And rootsourceid = '' OR rootsourceid IS NULL  -- 排除转发带回的
+      And  
+    GROUP BY dt, split(shareid, '-')[0]
+) t
+WHERE rn <= 1000
+ORDER BY target_uv DESC

+ 29 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/06_活跃表概况.sql

@@ -0,0 +1,29 @@
+
+-- 活跃表 ods_user_active_log_info_day 基础概况
+-- 按 push_type / channel_type 分组统计 UV & CNT,对比 share_log click UV
+
+-- 1. 按 push_type 分组(含合计)
+SELECT dt, 'push_type' AS dim, COALESCE(push_type, '合计') AS val,
+       COUNT(DISTINCT machinecode) AS uv, COUNT(1) AS cnt
+FROM loghubods.ods_user_active_log_info_day
+WHERE dt = '${dt}'
+GROUP BY dt, push_type
+GROUPING SETS ((dt, push_type), (dt))
+
+UNION ALL
+
+-- 2. 按 channel_type 分组
+SELECT dt, 'channel_type' AS dim, channel_type AS val,
+       COUNT(DISTINCT machinecode) AS uv, COUNT(1) AS cnt
+FROM loghubods.ods_user_active_log_info_day
+WHERE dt = '${dt}'
+GROUP BY dt, channel_type
+
+UNION ALL
+
+-- 3. share_log click UV 做对比
+SELECT dt, 'share_log' AS dim, 'click_uv' AS val,
+       COUNT(DISTINCT machinecode) AS uv, COUNT(1) AS cnt
+FROM loghubods.user_share_log
+WHERE dt = '${dt}' AND topic = 'click'
+GROUP BY dt

+ 23 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/07_click_来源分布.sql

@@ -0,0 +1,23 @@
+
+-- share_log click 回流来源分布
+-- LEFT JOIN 活跃表,按 push_type + channel_type 分组
+
+SELECT  s.dt
+       ,COALESCE(a.push_type, '未匹配') AS push_type
+       ,COALESCE(a.channel_type, '未匹配') AS channel_type
+       ,COUNT(1) AS cnt
+       ,COUNT(DISTINCT s.machinecode) AS uv
+       ,COUNT(DISTINCT s.clickobjectid) AS vid_cnt
+       ,MAX(CAST(s.usersharedepth AS BIGINT)) AS max_depth
+       ,AVG(CAST(s.usersharedepth AS BIGINT)) AS avg_depth
+FROM loghubods.user_share_log s
+LEFT JOIN loghubods.ods_user_active_log_info_day a
+  ON  s.dt = a.dt
+  AND s.machinecode = a.machinecode
+  AND s.subsessionid = a.subsessionid
+WHERE s.dt = '${dt}'
+  AND s.topic = 'click'
+GROUP BY s.dt
+        ,COALESCE(a.push_type, '未匹配')
+        ,COALESCE(a.channel_type, '未匹配')
+ORDER BY cnt DESC

+ 25 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/08_click_level_channel分布.sql

@@ -0,0 +1,25 @@
+
+-- share_log click LEFT JOIN 活跃表
+-- 按 level_type / channel_type / channel_type_frist 分组看占比
+
+SELECT  s.dt
+       ,COALESCE(a.level_type, '未匹配') AS level_type
+       ,COALESCE(a.channel_type, '未匹配') AS channel_type
+       ,COALESCE(a.channel_type_frist, '未匹配') AS channel_type_frist
+       ,COUNT(1) AS cnt
+       ,ROUND(COUNT(1) * 100.0 / SUM(COUNT(1)) OVER(PARTITION BY s.dt), 2) AS cnt_pct
+       ,COUNT(DISTINCT s.machinecode) AS uv
+       ,ROUND(COUNT(DISTINCT s.machinecode) * 100.0
+              / SUM(COUNT(DISTINCT s.machinecode)) OVER(PARTITION BY s.dt), 2) AS uv_pct
+FROM loghubods.user_share_log s
+LEFT JOIN loghubods.ods_user_active_log_info_day a
+  ON  s.dt = a.dt
+  AND s.machinecode = a.machinecode
+  AND s.subsessionid = a.subsessionid
+WHERE s.dt = '${dt}'
+  AND s.topic = 'click'
+GROUP BY s.dt
+        ,COALESCE(a.level_type, '未匹配')
+        ,COALESCE(a.channel_type, '未匹配')
+        ,COALESCE(a.channel_type_frist, '未匹配')
+ORDER BY cnt DESC

+ 49 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/09_click_内外部_首层分布.sql

@@ -0,0 +1,49 @@
+
+-- click 按 内部/外部 + 外部首层/非外部首层 分布
+-- 内部判定: rootsourceid = '' OR rootsourceid IS NULL
+-- 外部首层判定: 复用 ods_user_active_log_info_day 的 level_type 逻辑
+
+WITH t9 AS (
+    SELECT  t1.root_source_id AS rootsourceid
+           ,t1.channel
+    FROM    loghubods.ad_put_flow_record_tencent_day t1
+    LEFT JOIN loghubods.content_platform_account t2
+    ON      t1.channel = t2.channel
+    WHERE   t1.dt = MAX_PT('loghubods.ad_put_flow_record_tencent_day')
+    AND     t1.put_type_one = '企微'
+    AND     t1.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_|dyycd_'
+    GROUP BY t1.root_source_id, t1.channel
+)
+
+SELECT dt,
+       COALESCE(source_type, '合计') AS source_type,
+       COALESCE(level_type, '合计') AS level_type,
+       COUNT(DISTINCT machinecode) AS uv, COUNT(1) AS cnt
+FROM (
+    SELECT s.dt, s.machinecode,
+           CASE WHEN s.rootsourceid = '' OR s.rootsourceid IS NULL
+                THEN '内部' ELSE '外部' END AS source_type,
+           CASE
+               WHEN s.rootsourceid = '' OR s.rootsourceid IS NULL
+                   THEN '内部'
+               WHEN s.rootsourceid REGEXP 'touliu_tencentwbqw_|dyyqw_'
+                   AND t9.channel REGEXP 'xycsd|csaq|shy|jxjx|gzcr|xyjj|jxatm|xjcy|yqyx|hbwq|jxxm|gzmy|cdjh|gzjr|gzxts|twhc|qdjdz|sjzyd|gzyhc|djh|gzlx|yywl|szjn|gzdd1|cqqd|cqslh|hzjy|hzjh|sclh|xyhc|snss'
+                   AND CAST(s.usersharedepth AS BIGINT) <= 1
+                   THEN '外部首层'
+               WHEN s.rootsourceid REGEXP 'touliu_tencentwbqw_|dyyqw_'
+                   AND (t9.channel IS NULL OR t9.channel NOT REGEXP 'xycsd|csaq|shy|jxjx|gzcr|xyjj|jxatm|xjcy|yqyx|hbwq|jxxm|gzmy|cdjh|gzjr|gzxts|twhc|qdjdz|sjzyd|gzyhc|djh|gzlx|yywl|szjn|gzdd1|cqqd|cqslh|hzjy|hzjh|sclh|xyhc|snss')
+                   AND CAST(s.usersharedepth AS BIGINT) = 0
+                   THEN '外部首层'
+               WHEN s.rootsourceid NOT REGEXP 'touliu_tencentwbqw_|dyyqw_'
+                   AND CAST(s.usersharedepth AS BIGINT) = 0
+                   THEN '外部首层'
+               ELSE '外部非首层'
+           END AS level_type
+    FROM loghubods.user_share_log s
+    LEFT JOIN t9
+    ON   s.rootsourceid = t9.rootsourceid
+    WHERE s.dt = '${dt}' AND s.topic = 'click'
+) t
+GROUP BY dt, source_type, level_type
+GROUPING SETS ((dt, source_type, level_type), (dt, source_type), (dt, level_type), (dt))
+ORDER BY source_type, level_type

+ 54 - 0
tasks/00_表的洞察/loghubods.user_share_log/00_洞察/10_click_top_from_mid_排除外部首层.sql

@@ -0,0 +1,54 @@
+
+-- click 排除外部首层后的 分享者(from_mid) top1000,按带回的 target_mid 人数降序
+-- 保留: 内部 + 外部非首层
+-- 使用: python fetch_daily.py .../10_click_top_from_mid_排除外部首层.sql --date 20260209
+
+WITH t9 AS (
+    SELECT  t1.root_source_id AS rootsourceid
+           ,t1.channel
+    FROM    loghubods.ad_put_flow_record_tencent_day t1
+    LEFT JOIN loghubods.content_platform_account t2
+    ON      t1.channel = t2.channel
+    WHERE   t1.dt = MAX_PT('loghubods.ad_put_flow_record_tencent_day')
+    AND     t1.put_type_one = '企微'
+    AND     t1.root_source_id REGEXP 'touliu_tencentwbqw_|dyyqw_|dyycd_'
+    GROUP BY t1.root_source_id, t1.channel
+)
+
+SELECT dt, from_mid,
+       target_uv,
+       ROUND(target_uv * 100.0 / SUM(target_uv) OVER(PARTITION BY dt), 2) as target_uv_pct,
+       cnt,
+       vid,
+       max_depth, avg_depth
+FROM (
+    SELECT s.dt,
+           split(s.shareid, '-')[0] as from_mid,
+           COUNT(DISTINCT s.machinecode) as target_uv,
+           COUNT(1) as cnt,
+           COUNT(DISTINCT s.clickobjectid) as vid,
+           MAX(CAST(s.usersharedepth AS BIGINT)) as max_depth,
+           ROUND(AVG(CAST(s.usersharedepth AS BIGINT)), 2) as avg_depth,
+           ROW_NUMBER() OVER(PARTITION BY s.dt ORDER BY COUNT(DISTINCT s.machinecode) DESC) as rn
+    FROM loghubods.user_share_log s
+    LEFT JOIN t9
+    ON   s.rootsourceid = t9.rootsourceid
+    WHERE s.dt = '${dt}' AND s.topic = 'click'
+      AND split(s.shareid, '-')[0] <> s.machinecode  -- 排除自点
+      -- 排除外部首层
+      AND NOT (
+          (s.rootsourceid REGEXP 'touliu_tencentwbqw_|dyyqw_'
+           AND t9.channel REGEXP 'xycsd|csaq|shy|jxjx|gzcr|xyjj|jxatm|xjcy|yqyx|hbwq|jxxm|gzmy|cdjh|gzjr|gzxts|twhc|qdjdz|sjzyd|gzyhc|djh|gzlx|yywl|szjn|gzdd1|cqqd|cqslh|hzjy|hzjh|sclh|xyhc|snss'
+           AND CAST(s.usersharedepth AS BIGINT) <= 1)
+          OR
+          (s.rootsourceid REGEXP 'touliu_tencentwbqw_|dyyqw_'
+           AND (t9.channel IS NULL OR t9.channel NOT REGEXP 'xycsd|csaq|shy|jxjx|gzcr|xyjj|jxatm|xjcy|yqyx|hbwq|jxxm|gzmy|cdjh|gzjr|gzxts|twhc|qdjdz|sjzyd|gzyhc|djh|gzlx|yywl|szjn|gzdd1|cqqd|cqslh|hzjy|hzjh|sclh|xyhc|snss')
+           AND CAST(s.usersharedepth AS BIGINT) = 0)
+          OR
+          (s.rootsourceid NOT REGEXP 'touliu_tencentwbqw_|dyyqw_'
+           AND CAST(s.usersharedepth AS BIGINT) = 0)
+      )
+    GROUP BY s.dt, split(s.shareid, '-')[0]
+) t
+WHERE rn <= 1000
+ORDER BY target_uv DESC

+ 90 - 0
tasks/00_表的洞察/loghubods.user_share_log/export_neo4j.py

@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+将分享数据转换为 Neo4j 导入格式
+
+图模型: (User) -[:SHARED {vid, ts}]-> (User)
+
+用法:
+    python export_neo4j.py output/05_图数据/20260111.csv
+    python export_neo4j.py output/05_图数据/*.csv  # 多文件
+"""
+
+import sys
+import csv
+from pathlib import Path
+
+def export_neo4j(input_files, output_dir):
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    users = set()
+    relationships = []
+
+    # 读取所有输入文件
+    for input_file in input_files:
+        print(f"读取: {input_file}")
+        with open(input_file, 'r', encoding='utf-8') as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                from_mid = row['from_mid']
+                vid = row['vid']
+                target_mid = row['target_mid']
+                ts = row['ts']
+
+                users.add(from_mid)
+                users.add(target_mid)
+                relationships.append((from_mid, target_mid, vid, ts))
+
+    print(f"用户数: {len(users)}, 分享关系数: {len(relationships)}")
+
+    # 1. 导出用户节点 (neo4j-admin 格式)
+    users_file = output_dir / 'users.csv'
+    with open(users_file, 'w', newline='', encoding='utf-8') as f:
+        writer = csv.writer(f)
+        writer.writerow(['mid:ID', ':LABEL'])
+        for mid in users:
+            writer.writerow([mid, 'User'])
+    print(f"写入: {users_file}")
+
+    # 2. 导出分享关系 (neo4j-admin 格式)
+    rels_file = output_dir / 'shared.csv'
+    with open(rels_file, 'w', newline='', encoding='utf-8') as f:
+        writer = csv.writer(f)
+        writer.writerow([':START_ID', ':END_ID', 'vid', 'ts:long', ':TYPE'])
+        for from_mid, target_mid, vid, ts in relationships:
+            writer.writerow([from_mid, target_mid, vid, ts, 'SHARED'])
+    print(f"写入: {rels_file}")
+
+    # 3. 生成导入命令
+    cmd = f"""# Neo4j 导入命令 (停止数据库后执行):
+neo4j-admin database import full \\
+    --nodes={users_file.absolute()} \\
+    --relationships={rels_file.absolute()} \\
+    --overwrite-destination \\
+    neo4j
+"""
+    print(cmd)
+
+    cmd_file = output_dir / 'import_cmd.sh'
+    with open(cmd_file, 'w') as f:
+        f.write(cmd.strip())
+    print(f"写入: {cmd_file}")
+
+if __name__ == '__main__':
+    import glob
+
+    if len(sys.argv) < 2:
+        print("用法: python export_neo4j.py <csv文件或通配符>")
+        sys.exit(1)
+
+    input_files = []
+    for pattern in sys.argv[1:]:
+        input_files.extend(glob.glob(pattern))
+
+    if not input_files:
+        print(f"找不到文件: {sys.argv[1:]}")
+        sys.exit(1)
+
+    output_dir = Path(input_files[0]).parent / 'neo4j'
+    export_neo4j(input_files, output_dir)

+ 1 - 1
tasks/承接/rosn分析/02_实验组xTop10一级品类_vs对照组.sql

@@ -10,7 +10,7 @@ WITH t_raw AS
                     ELSE "其他"
             END AS page_type
     FROM    loghubods.dwd_recsys_alg_sample_all_20250212
-    WHERE   dt = '${dt}'
+    WHERE   dt LIKE '${dt}%'
     AND     apptype IN ("0","4")
     AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
     AND     abcode NOT IN ("ab100")

+ 256 - 0
tasks/承接/rosn分析/05_实验组xTop20视频_vs对照组_vor.config

@@ -0,0 +1,256 @@
+-- 预处理:解析 scoresmap + page 分类
+-- v5: 新增 vor 统计量 + score_pred/score_stat/score_real
+-- 排序公式: str * ros * vor
+WITH t_raw AS
+(
+    SELECT  *
+            ,REPLACE(GET_JSON_OBJECT(extend_alg,'$.scoresMap'),"\\","") AS scoresmap
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page_type
+    FROM    loghubods.dwd_recsys_alg_sample_all_20250212
+    WHERE   dt = '${dt}'
+    AND     apptype IN ("0","4")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+    AND     extend_alg IS NOT NULL
+    AND     GET_JSON_OBJECT(extend_alg,'$.scoresMap') IS NOT NULL
+)
+-- 过滤:只保留推荐页面
+,t_filtered AS
+(
+    SELECT  *
+    FROM    t_raw
+    WHERE   page_type = "推荐"
+)
+-- 特征提取与维度映射
+,t_base AS
+(
+    SELECT  dt
+            ,apptype
+            ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "ab4-5"
+                    ELSE "其他"
+            END AS abcode
+            ,page_type AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+            ,scoresmap
+            ,CAST(GET_JSON_OBJECT(scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
+            ,1.22 * pow(CAST(GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') AS DOUBLE), 1.15) AS rosn_pred
+            ,CAST(GET_JSON_OBJECT(scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_stat
+            ,CAST(GET_JSON_OBJECT(scoresmap,'$.vor') AS DOUBLE) AS vor_stat
+            ,GET_JSON_OBJECT(v1_feature,'$.title') AS vid_title
+    FROM    t_filtered
+)
+,t_valid AS
+(
+    SELECT  *
+    FROM    t_base
+    WHERE   str_pred IS NOT NULL
+    AND     rosn_pred IS NOT NULL
+)
+-- 计算每个 abcode 下曝光量 top20 的 vid
+,t_vid_rank AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,vid
+            ,COUNT(1) AS vid_exp_cnt
+            ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode ORDER BY COUNT(1) DESC) AS vid_rank
+    FROM    t_valid
+    GROUP BY dt, apptype, abcode, vid
+)
+,t_top5_vid AS
+(
+    SELECT  dt, apptype, abcode, vid, vid_rank
+    FROM    t_vid_rank
+    WHERE   vid_rank <= 20
+)
+-- 标记 top20 vid
+,t_with_top5 AS
+(
+    SELECT  a.*
+            ,CASE WHEN b.vid IS NOT NULL THEN a.vid ELSE NULL END AS top5_vid
+            ,CASE WHEN b.vid IS NOT NULL THEN a.vid_title ELSE NULL END AS top5_vid_title
+            ,b.vid_rank AS top5_vid_rank
+    FROM    t_valid a
+    LEFT JOIN t_top5_vid b
+    ON      a.dt = b.dt
+    AND     a.apptype = b.apptype
+    AND     a.abcode = b.abcode
+    AND     a.vid = b.vid
+)
+-- 先聚合
+,t_agg AS
+(
+    SELECT  dt
+            ,COALESCE(apptype, 'sum') AS apptype
+            ,COALESCE(abcode, 'sum') AS abcode
+            ,COALESCE(CAST(top5_vid AS STRING), 'all') AS vid
+            ,CASE WHEN GROUPING(top5_vid) = 1 THEN NULL ELSE MAX(top5_vid_title) END AS vid_title
+            ,CASE WHEN GROUPING(top5_vid) = 1 THEN NULL ELSE MAX(top5_vid_rank) END AS vid_rank
+            -- COPC
+            ,round((SUM(is_return_noself) / COUNT(1)) / NULLIF(SUM(str_pred) / COUNT(1), 0), 4) AS str_copc
+            ,round((SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0)) / NULLIF(SUM(rosn_pred) / COUNT(1), 0), 4) AS rosn_copc
+            ,round((SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0)) / NULLIF(SUM(rosn_stat) / COUNT(1), 0), 4) AS rosn_stat_copc
+            ,round((SUM(return_n_uv_noself) / COUNT(1)) / NULLIF(AVG(str_pred * rosn_pred), 0), 4) AS rovn_copc
+            ,round((SUM(return_n_uv_noself) / COUNT(1)) / NULLIF(AVG(str_pred * rosn_stat), 0), 4) AS rovn_stat_copc
+            -- 模型预测与真实值
+            ,round(COALESCE(SUM(is_return_noself) / COUNT(1),0),6) AS str_real
+            ,round(COALESCE(SUM(str_pred) / COUNT(1),0),6) AS str_pred
+            ,round(COALESCE(SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0),0),6) AS rosn_real
+            ,round(COALESCE(SUM(rosn_pred) / COUNT(1),0),6) AS rosn_pred
+            ,round(COALESCE(SUM(rosn_stat) / COUNT(1),0),6) AS rosn_stat
+            ,round(COALESCE(SUM(vor_stat) / COUNT(1),0),6) AS vor_stat
+            ,round(SUM(return_n_uv_noself) / COUNT(1), 6) AS rovn_real
+            ,round(AVG(str_pred * rosn_pred), 6) AS rovn_pred
+            ,round(AVG(str_pred * rosn_stat), 6) AS rovn_stat
+            -- score: str * ros * vor
+            ,round(AVG(str_pred * rosn_pred), 6) AS score_pred
+            ,round(AVG(str_pred * rosn_stat * vor_stat), 6) AS score_stat
+            ,round(SUM(return_n_uv_noself) / COUNT(1), 6) AS score_real
+            -- 误差
+            ,round(AVG(ABS(rosn_pred - return_n_uv_noself)),6) AS rosn_pred_mae
+            ,round(AVG(ABS(rosn_stat - return_n_uv_noself)),6) AS rosn_stat_mae
+            -- 业务指标
+            ,round(COALESCE(COUNT(1) / COUNT(DISTINCT mid),0),2) AS exp_per_dau
+            ,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
+            ,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
+            ,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
+            ,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
+            ,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
+            ,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
+            ,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
+            ,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+    FROM    t_with_top5
+    GROUP BY dt, apptype, abcode, top5_vid
+    GROUPING SETS (
+        (dt, apptype, abcode),
+        (dt, apptype, abcode, top5_vid)
+    )
+    HAVING  top5_vid IS NOT NULL OR GROUPING(top5_vid) = 1
+)
+-- 新增:获取对照组基准值并计算变化率
+,t_with_baseline AS
+(
+    SELECT  *
+            -- 计算曝光占比
+            ,round(exp * 1.0 / MAX(CASE WHEN vid = 'all' THEN exp END) OVER (PARTITION BY dt, apptype, abcode), 4) AS exp_pct
+            -- 对照组基准值(业务指标)
+            ,MAX(CASE WHEN abcode = '对照组' THEN exp_per_dau END) OVER (PARTITION BY dt, apptype, vid) AS exp_per_dau_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN str_one END) OVER (PARTITION BY dt, apptype, vid) AS str_one_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN ros_one END) OVER (PARTITION BY dt, apptype, vid) AS ros_one_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN str END) OVER (PARTITION BY dt, apptype, vid) AS str_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN ros END) OVER (PARTITION BY dt, apptype, vid) AS ros_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN str_plus END) OVER (PARTITION BY dt, apptype, vid) AS str_plus_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN ros_minus END) OVER (PARTITION BY dt, apptype, vid) AS ros_minus_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn END) OVER (PARTITION BY dt, apptype, vid) AS rovn_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN vovh24 END) OVER (PARTITION BY dt, apptype, vid) AS vovh24_base
+            -- 对照组基准值(COPC 指标)
+            ,MAX(CASE WHEN abcode = '对照组' THEN str_copc END) OVER (PARTITION BY dt, apptype, vid) AS str_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rosn_copc END) OVER (PARTITION BY dt, apptype, vid) AS rosn_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rosn_stat_copc END) OVER (PARTITION BY dt, apptype, vid) AS rosn_stat_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn_copc END) OVER (PARTITION BY dt, apptype, vid) AS rovn_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn_stat_copc END) OVER (PARTITION BY dt, apptype, vid) AS rovn_stat_copc_base
+            -- 对照组基准值(真实值)
+            ,MAX(CASE WHEN abcode = '对照组' THEN str_real END) OVER (PARTITION BY dt, apptype, vid) AS str_real_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rosn_real END) OVER (PARTITION BY dt, apptype, vid) AS rosn_real_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn_real END) OVER (PARTITION BY dt, apptype, vid) AS rovn_real_base
+            -- 对照组基准值(vor 和 score)
+            ,MAX(CASE WHEN abcode = '对照组' THEN vor_stat END) OVER (PARTITION BY dt, apptype, vid) AS vor_stat_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN score_pred END) OVER (PARTITION BY dt, apptype, vid) AS score_pred_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN score_stat END) OVER (PARTITION BY dt, apptype, vid) AS score_stat_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN score_real END) OVER (PARTITION BY dt, apptype, vid) AS score_real_base
+            -- 对照组基准值(计数指标)
+            ,MAX(CASE WHEN abcode = '对照组' THEN dau END) OVER (PARTITION BY dt, apptype, vid) AS dau_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN exp END) OVER (PARTITION BY dt, apptype, vid) AS exp_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN is_share END) OVER (PARTITION BY dt, apptype, vid) AS is_share_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN share_cnt END) OVER (PARTITION BY dt, apptype, vid) AS share_cnt_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN is_return_1 END) OVER (PARTITION BY dt, apptype, vid) AS is_return_1_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN return_n_uv END) OVER (PARTITION BY dt, apptype, vid) AS return_n_uv_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN viewh24 END) OVER (PARTITION BY dt, apptype, vid) AS viewh24_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN return_n_uv_noself END) OVER (PARTITION BY dt, apptype, vid) AS return_n_uv_noself_base
+    FROM    t_agg
+)
+-- 最终输出:原有字段 + 变化率
+SELECT  dt
+        ,apptype
+        ,abcode
+        ,vid
+        ,vid_title
+        ,vid_rank
+        ,exp_pct
+        ,round((dau - dau_base) / NULLIF(dau_base, 0), 4) AS dau_chg
+        ,round((exp - exp_base) / NULLIF(exp_base, 0), 4) AS exp_chg
+        -- COPC
+        ,str_copc, rosn_copc, rosn_stat_copc, rovn_copc, rovn_stat_copc
+        -- 模型预测与真实值
+        ,str_real, str_pred, rosn_real, rosn_pred, rosn_stat, vor_stat
+        ,rovn_real, rovn_pred, rovn_stat
+        -- score: str * ros * vor
+        ,score_pred, score_stat, score_real
+        ,rosn_pred_mae, rosn_stat_mae
+        -- 业务指标
+        ,exp_per_dau, str_one, ros_one, str, ros, str_plus, ros_minus, rovn, vovh24
+        -- 计数
+        ,dau, exp, is_share, share_cnt, is_return_1, return_n_uv, viewh24, return_n_uv_noself
+        -- ========== 变化率字段 ==========
+        -- 业务指标变化率
+        ,round((exp_per_dau - exp_per_dau_base) / NULLIF(exp_per_dau_base, 0), 4) AS exp_per_dau_chg
+        ,round((str_one - str_one_base) / NULLIF(str_one_base, 0), 4) AS str_one_chg
+        ,round((ros_one - ros_one_base) / NULLIF(ros_one_base, 0), 4) AS ros_one_chg
+        ,round((str - str_base) / NULLIF(str_base, 0), 4) AS str_chg
+        ,round((ros - ros_base) / NULLIF(ros_base, 0), 4) AS ros_chg
+        ,round((str_plus - str_plus_base) / NULLIF(str_plus_base, 0), 4) AS str_plus_chg
+        ,round((ros_minus - ros_minus_base) / NULLIF(ros_minus_base, 0), 4) AS ros_minus_chg
+        ,round((rovn - rovn_base) / NULLIF(rovn_base, 0), 4) AS rovn_chg
+        ,round((vovh24 - vovh24_base) / NULLIF(vovh24_base, 0), 4) AS vovh24_chg
+        -- COPC 变化率
+        ,round((str_copc - str_copc_base) / NULLIF(str_copc_base, 0), 4) AS str_copc_chg
+        ,round((rosn_copc - rosn_copc_base) / NULLIF(rosn_copc_base, 0), 4) AS rosn_copc_chg
+        ,round((rosn_stat_copc - rosn_stat_copc_base) / NULLIF(rosn_stat_copc_base, 0), 4) AS rosn_stat_copc_chg
+        ,round((rovn_copc - rovn_copc_base) / NULLIF(rovn_copc_base, 0), 4) AS rovn_copc_chg
+        ,round((rovn_stat_copc - rovn_stat_copc_base) / NULLIF(rovn_stat_copc_base, 0), 4) AS rovn_stat_copc_chg
+        -- 真实值变化率
+        ,round((str_real - str_real_base) / NULLIF(str_real_base, 0), 4) AS str_real_chg
+        ,round((rosn_real - rosn_real_base) / NULLIF(rosn_real_base, 0), 4) AS rosn_real_chg
+        ,round((rovn_real - rovn_real_base) / NULLIF(rovn_real_base, 0), 4) AS rovn_real_chg
+        -- vor 和 score 变化率
+        ,round((vor_stat - vor_stat_base) / NULLIF(vor_stat_base, 0), 4) AS vor_stat_chg
+        ,round((score_pred - score_pred_base) / NULLIF(score_pred_base, 0), 4) AS score_pred_chg
+        ,round((score_stat - score_stat_base) / NULLIF(score_stat_base, 0), 4) AS score_stat_chg
+        ,round((score_real - score_real_base) / NULLIF(score_real_base, 0), 4) AS score_real_chg
+        -- 计数指标变化率
+        ,round((is_share - is_share_base) / NULLIF(is_share_base, 0), 4) AS is_share_chg
+        ,round((share_cnt - share_cnt_base) / NULLIF(share_cnt_base, 0), 4) AS share_cnt_chg
+        ,round((is_return_1 - is_return_1_base) / NULLIF(is_return_1_base, 0), 4) AS is_return_1_chg
+        ,round((return_n_uv - return_n_uv_base) / NULLIF(return_n_uv_base, 0), 4) AS return_n_uv_chg
+        ,round((viewh24 - viewh24_base) / NULLIF(viewh24_base, 0), 4) AS viewh24_chg
+        ,round((return_n_uv_noself - return_n_uv_noself_base) / NULLIF(return_n_uv_noself_base, 0), 4) AS return_n_uv_noself_chg
+FROM    t_with_baseline
+ORDER BY dt DESC, apptype, abcode, exp DESC
+;

+ 391 - 0
tasks/承接/rosn校准/plot_calibration.py

@@ -0,0 +1,391 @@
+"""
+校准曲线可视化:读取 output/02_分组校准数据 下的 CSV,生成交互式 HTML。
+
+用法:
+    python tasks/承接/rosn校准/plot_calibration.py
+"""
+
+import os
+import json
+import glob
+import pandas as pd
+from collections import defaultdict
+
+DATA_DIRS = [
+    os.path.join(os.path.dirname(__file__), "output", "02_分组校准数据"),
+    os.path.join(os.path.dirname(__file__), "output", "03_label分桶校准数据"),
+]
+OUT_HTML = os.path.join(os.path.dirname(__file__), "output", "calibration.html")
+
+
+def load_data():
+    files = []
+    for d in DATA_DIRS:
+        files.extend(sorted(glob.glob(os.path.join(d, "*.csv"))))
+    if not files:
+        raise FileNotFoundError(f"No CSV files found in {DATA_DIRS}")
+    df = pd.concat([pd.read_csv(f) for f in files], ignore_index=True)
+    df["dt"] = df["dt"].astype(str)
+    return df
+
+
+def build_charts_data(df):
+    """
+    扁平化数据结构:
+    ALL_DATA[apptype][bucket_type][group_key][dt] = [ {bucket_id, predict, stat, label, ...}, ... ]
+    """
+    all_data = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list))))
+    for _, row in df.iterrows():
+        at = str(row["apptype"])
+        bt = row["bucket_type"]
+        gk = row["group_key"]
+        dt = row["dt"]
+        predict = float(row["predict"])
+        all_data[at][bt][gk][dt].append({
+            "bucket_id": int(row["bucket_id"]),
+            "predict": predict,
+            "pred_校准": round(1.22 * predict ** 1.15, 6),
+            "stat": float(row["stat"]) if pd.notna(row.get("stat")) else None,
+            "label": float(row["label"]),
+            "cnt": int(row["cnt"]),
+            "range_begin": float(row["range_begin"]),
+            "range_end": float(row["range_end"]),
+        })
+    # 排序 rows by bucket_id
+    for at in all_data:
+        for bt in all_data[at]:
+            for gk in all_data[at][bt]:
+                for dt in all_data[at][bt][gk]:
+                    all_data[at][bt][gk][dt].sort(key=lambda r: r["bucket_id"])
+    return all_data
+
+
+def render_html(all_data, all_dts):
+    """生成单图 ECharts HTML — 动态配色,5 项筛选"""
+
+    # 收集维度值
+    all_apptypes = sorted(all_data.keys())
+    all_bucket_types = sorted({bt for at in all_data for bt in all_data[at]})
+    all_group_keys = sorted({gk for at in all_data for bt in all_data[at] for gk in all_data[at][bt]})
+
+    # apptype 标签映射
+    apptype_labels = {}
+    for at in all_apptypes:
+        apptype_labels[at] = "视频号" if at == "4" else f"apptype={at}"
+
+    # ---- 序列化数据 ----
+    js_all_data = {}
+    for at in all_data:
+        js_all_data[at] = {}
+        for bt in all_data[at]:
+            js_all_data[at][bt] = {}
+            for gk in all_data[at][bt]:
+                js_all_data[at][bt][gk] = {}
+                for dt, rows in all_data[at][bt][gk].items():
+                    js_all_data[at][bt][gk][dt] = rows
+
+    # ---- 构建筛选控件 HTML ----
+    dt_options = ''.join(
+        f'<option value="{dt}">{dt}</option>' for dt in sorted(all_dts, reverse=True)
+    )
+    gk_checkboxes = ''.join(
+        '<label class="cb-item">'
+        f'<input type="checkbox" value="{gk}" {"checked" if gk == "对照组" else ""} onchange="renderChart()">'
+        f'{gk}</label>'
+        for gk in all_group_keys
+    )
+    bt_checkboxes = ''.join(
+        f'<label class="cb-item"><input type="checkbox" value="{bt}" checked onchange="renderChart()">{bt}</label>'
+        for bt in all_bucket_types
+    )
+    at_checkboxes = ''.join(
+        '<label class="cb-item">'
+        f'<input type="checkbox" value="{at}" checked onchange="renderChart()">'
+        f'{apptype_labels[at]}</label>'
+        for at in all_apptypes
+    )
+
+    html = f"""<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>校准曲线</title>
+<script src="https://cdn.jsdelivr.net/npm/echarts@5/dist/echarts.min.js"></script>
+<style>
+    body {{ font-family: -apple-system, sans-serif; margin: 20px; background: #fafafa; }}
+    .filter-row {{ display: flex; align-items: center; gap: 12px; margin-bottom: 12px; }}
+    .filter-row > label {{ font-weight: 600; min-width: 56px; }}
+    select {{ padding: 4px 8px; border-radius: 4px; border: 1px solid #ccc; }}
+    .checkbox-group {{ display: flex; flex-wrap: wrap; gap: 6px 14px; }}
+    .cb-item {{ display: inline-flex; align-items: center; gap: 4px; cursor: pointer; font-size: 13px; }}
+    .cb-line {{ display: inline-block; width: 18px; height: 0; margin-bottom: 1px; }}
+    #chart-container {{ background: #fff; border-radius: 8px; padding: 16px; box-shadow: 0 1px 3px rgba(0,0,0,.1); margin-top: 16px; }}
+    #mae-info {{ background: #fff; border-radius: 8px; padding: 16px; box-shadow: 0 1px 3px rgba(0,0,0,.1); margin-top: 12px; font-size: 13px; line-height: 1.6; }}
+    #mae-info table {{ font-size: 12px; font-variant-numeric: tabular-nums; }}
+    #mae-info th {{ font-weight: 600; white-space: nowrap; }}
+    #mae-info td {{ white-space: nowrap; }}
+</style>
+</head>
+<body>
+<h2>校准曲线</h2>
+
+<div class="filter-row">
+    <label>dt:</label>
+    <select id="sel_dt" onchange="renderChart()">{dt_options}</select>
+</div>
+<div class="filter-row">
+    <label>分组:</label>
+    <span id="gk_checks" class="checkbox-group">{gk_checkboxes}</span>
+</div>
+<div class="filter-row">
+    <label>指标:</label>
+    <span id="metric_checks" class="checkbox-group">
+        <label class="cb-item"><input type="checkbox" value="predict" checked onchange="renderChart()"><span class="cb-line" style="border-top:2px solid #333"></span>predict</label>
+        <label class="cb-item"><input type="checkbox" value="pred_校准" checked onchange="renderChart()"><span class="cb-line" style="border-top:2px dashed #333"></span>pred_校准</label>
+        <label class="cb-item"><input type="checkbox" value="stat" checked onchange="renderChart()"><span class="cb-line" style="border-top:2px dashed #333"></span>stat</label>
+        <label class="cb-item"><input type="checkbox" value="label" checked onchange="renderChart()"><span class="cb-line" style="border-top:2px dotted #333"></span>label</label>
+    </span>
+</div>
+<div class="filter-row">
+    <label>分桶:</label>
+    <span id="bt_checks" class="checkbox-group">{bt_checkboxes}</span>
+</div>
+<div class="filter-row">
+    <label>apptype:</label>
+    <span id="at_checks" class="checkbox-group">{at_checkboxes}</span>
+</div>
+
+<div id="chart-container">
+    <div id="main-chart" style="width:100%;height:600px;"></div>
+</div>
+<div id="mae-info"></div>
+
+<script>
+var ALL_DATA = {json.dumps(js_all_data, ensure_ascii=False)};
+var APPTYPE_LABELS = {json.dumps(apptype_labels, ensure_ascii=False)};
+
+var PALETTE = [
+    '#3b82f6', '#f97316', '#22c55e', '#ef4444', '#a855f7',
+    '#06b6d4', '#f59e0b', '#ec4899', '#64748b', '#84cc16',
+    '#0ea5e9', '#d946ef', '#14b8a6', '#f43f5e', '#8b5cf6',
+    '#e11d48', '#059669', '#7c3aed', '#ea580c', '#0284c7',
+];
+
+var LINE_STYLES = {{
+    'predict': 'solid',
+    'pred_校准': [10, 3],
+    'stat':    [6, 3],
+    'label':   [2, 2]
+}};
+
+var chart = echarts.init(document.getElementById('main-chart'));
+
+function getCheckedValues(id) {{
+    var vals = [];
+    document.getElementById(id).querySelectorAll('input[type=checkbox]').forEach(function(cb) {{
+        if (cb.checked) vals.push(cb.value);
+    }});
+    return vals;
+}}
+
+function renderChart() {{
+    var dt = document.getElementById('sel_dt').value;
+    var selGroups = getCheckedValues('gk_checks');
+    var selMetrics = getCheckedValues('metric_checks');
+    var selBucketTypes = getCheckedValues('bt_checks');
+    var selApptypes = getCheckedValues('at_checks');
+
+    // 1. 构建 combo 列表 (apptype × bucket_type × group_key)
+    var combos = [];
+    selApptypes.forEach(function(at) {{
+        selBucketTypes.forEach(function(bt) {{
+            selGroups.forEach(function(gk) {{
+                if (ALL_DATA[at] && ALL_DATA[at][bt] && ALL_DATA[at][bt][gk]) {{
+                    combos.push({{ at: at, bt: bt, gk: gk }});
+                }}
+            }});
+        }});
+    }});
+
+    // 2. 动态配色: 每个 combo 分配一个颜色
+    var series = [];
+    var bucketIds = null;
+
+    combos.forEach(function(combo, idx) {{
+        var color = PALETTE[idx % PALETTE.length];
+        var rows = (ALL_DATA[combo.at][combo.bt][combo.gk][dt]) || [];
+        if (!bucketIds && rows.length > 0) {{
+            bucketIds = rows.map(function(r) {{ return r.bucket_id; }});
+        }}
+        var atLabel = APPTYPE_LABELS[combo.at] || combo.at;
+        var comboLabel = atLabel + '·' + combo.bt + '·' + combo.gk;
+
+        // 3. 每个 combo × selMetrics → 一条 series
+        selMetrics.forEach(function(metric) {{
+            series.push({{
+                name: comboLabel + '·' + metric,
+                type: 'line',
+                data: rows.map(function(r) {{ return r[metric]; }}),
+                symbol: 'none',
+                lineStyle: {{
+                    type: LINE_STYLES[metric],
+                    color: color,
+                    width: metric === 'label' ? 2 : 1.5
+                }},
+                itemStyle: {{ color: color }}
+            }});
+        }});
+    }});
+
+    chart.setOption({{
+        tooltip: {{
+            trigger: 'axis',
+            formatter: function(params) {{
+                if (!params.length) return '';
+                var dataIdx = params[0].dataIndex;
+                var s = 'bucket: ' + (bucketIds ? bucketIds[dataIdx] : dataIdx) + '<br/>';
+                // 按 combo 分组显示,从 ALL_DATA 取原始行
+                var groups = {{}};
+                params.forEach(function(p) {{
+                    var parts = p.seriesName.split('·');
+                    var metric = parts.pop();
+                    var comboKey = parts.join('·');
+                    if (!groups[comboKey]) groups[comboKey] = {{}};
+                    groups[comboKey][metric] = p.value;
+                    groups[comboKey].color = p.color;
+                }});
+                combos.forEach(function(combo, ci) {{
+                    var atLabel = APPTYPE_LABELS[combo.at] || combo.at;
+                    var ck = atLabel + '·' + combo.bt + '·' + combo.gk;
+                    var g = groups[ck];
+                    if (!g) return;
+                    var rows = (ALL_DATA[combo.at][combo.bt][combo.gk][dt]) || [];
+                    var r = rows[dataIdx];
+                    s += '<br/><b style="color:' + g.color + '">' + ck + '</b>';
+                    if (r) s += ' &nbsp;<span style="color:#999">cnt=' + r.cnt + ' range=[' + r.range_begin.toFixed(4) + ', ' + r.range_end.toFixed(4) + ']</span>';
+                    s += '<br/>';
+                    if (g.predict != null) s += '  predict: ' + g.predict.toFixed(4) + '<br/>';
+                    if (g['pred_校准'] != null) s += '  pred_校准: ' + g['pred_校准'].toFixed(4) + '<br/>';
+                    if (g.stat != null) s += '  stat: ' + g.stat.toFixed(4) + '<br/>';
+                    if (g.label != null) s += '  label: ' + g.label.toFixed(4) + '<br/>';
+                }});
+                return s;
+            }}
+        }},
+        legend: {{ type: 'scroll', bottom: 0 }},
+        grid: {{ left: 60, right: 30, top: 30, bottom: 60 }},
+        xAxis: {{ name: 'bucket_id', type: 'category', data: bucketIds || [], nameLocation: 'center', nameGap: 28 }},
+        yAxis: {{ name: 'value', type: 'value', nameLocation: 'center', nameGap: 45 }},
+        series: series
+    }}, true);
+
+    // 计算并展示 MAE(label 加权,总体 + 分段)
+    function calcWMAE(rows, metric) {{
+        var wSum = 0, wTotal = 0;
+        rows.forEach(function(r) {{
+            if (r[metric] != null && r.label != null) {{
+                wSum += r.label * Math.abs(r[metric] - r.label);
+                wTotal += r.label;
+            }}
+        }});
+        return wTotal > 0 ? (wSum / wTotal) : null;
+    }}
+    function rangeInfo(rows) {{
+        var labelMin = Infinity, labelMax = -Infinity, rngMin = Infinity, rngMax = -Infinity;
+        rows.forEach(function(r) {{
+            if (r.label != null) {{ labelMin = Math.min(labelMin, r.label); labelMax = Math.max(labelMax, r.label); }}
+            if (r.range_begin != null) rngMin = Math.min(rngMin, r.range_begin);
+            if (r.range_end != null) rngMax = Math.max(rngMax, r.range_end);
+        }});
+        return 'label=[' + labelMin.toFixed(4) + ', ' + labelMax.toFixed(4) + ']'
+            + ' bucket=[' + rngMin.toFixed(4) + ', ' + rngMax.toFixed(4) + ']';
+    }}
+    function maeRow(metrics, rows) {{
+        var maeMap = {{}};
+        metrics.forEach(function(m) {{
+            maeMap[m] = calcWMAE(rows, m);
+        }});
+        // 校准Δ
+        var delta = null, deltaPct = null;
+        if (maeMap['predict'] != null && maeMap['pred_校准'] != null) {{
+            delta = maeMap['pred_校准'] - maeMap['predict'];
+            deltaPct = delta / maeMap['predict'] * 100;
+        }}
+        return {{ maeMap: maeMap, delta: delta, deltaPct: deltaPct, rng: rangeInfo(rows) }};
+    }}
+    function fmtVal(v) {{ return v != null ? v.toFixed(4) : '-'; }}
+    function fmtDelta(d) {{
+        if (d == null) return '-';
+        var sign = d.delta <= 0 ? '' : '+';
+        var color = d.delta <= 0 ? '#16a34a' : '#dc2626';
+        return '<span style="color:' + color + '">' + sign + d.delta.toFixed(4) + ' (' + sign + d.deltaPct.toFixed(1) + '%)</span>';
+    }}
+
+    var maeHtml = '';
+    var maeMetrics = selMetrics.filter(function(m) {{ return m !== 'label'; }});
+    if (maeMetrics.length > 0) {{
+        // 表头
+        var thMetrics = '';
+        maeMetrics.forEach(function(m) {{ thMetrics += '<th>' + m + '</th>'; }});
+        var hasCalDelta = maeMetrics.indexOf('predict') >= 0 && maeMetrics.indexOf('pred_校准') >= 0;
+
+        combos.forEach(function(combo, idx) {{
+            var color = PALETTE[idx % PALETTE.length];
+            var rows = (ALL_DATA[combo.at][combo.bt][combo.gk][dt]) || [];
+            if (rows.length === 0) return;
+            var atLabel = APPTYPE_LABELS[combo.at] || combo.at;
+            var comboLabel = atLabel + '·' + combo.bt + '·' + combo.gk;
+
+            var n = rows.length;
+            var cut1 = Math.floor(n / 3), cut2 = Math.floor(n * 2 / 3);
+            var segments = [
+                {{ name: '总体', rows: rows }},
+                {{ name: '低(0~' + (cut1 - 1) + ')', rows: rows.slice(0, cut1) }},
+                {{ name: '中(' + cut1 + '~' + (cut2 - 1) + ')', rows: rows.slice(cut1, cut2) }},
+                {{ name: '高(' + cut2 + '~' + (n - 1) + ')', rows: rows.slice(cut2) }}
+            ];
+
+            maeHtml += '<div style="margin-bottom:12px"><b style="color:' + color + '">' + comboLabel + '</b>';
+            maeHtml += '<table style="border-collapse:collapse;margin-top:4px;width:100%"><tr style="background:#f5f5f5">';
+            maeHtml += '<th style="text-align:left;padding:3px 8px">分段</th>' + thMetrics.replace(/<th>/g, '<th style="padding:3px 8px">');
+            if (hasCalDelta) maeHtml += '<th style="padding:3px 8px">校准Δ</th>';
+            maeHtml += '<th style="padding:3px 8px">label范围</th><th style="padding:3px 8px">bucket范围</th></tr>';
+
+            segments.forEach(function(seg) {{
+                var d = maeRow(maeMetrics, seg.rows);
+                maeHtml += '<tr style="border-top:1px solid #eee"><td style="padding:3px 8px">' + seg.name + '</td>';
+                maeMetrics.forEach(function(m) {{
+                    maeHtml += '<td style="padding:3px 8px;text-align:right">' + fmtVal(d.maeMap[m]) + '</td>';
+                }});
+                if (hasCalDelta) maeHtml += '<td style="padding:3px 8px;text-align:right">' + fmtDelta(d) + '</td>';
+                maeHtml += '<td style="padding:3px 8px;color:#888">' + d.rng.split(' ')[0] + '</td>';
+                maeHtml += '<td style="padding:3px 8px;color:#888">' + d.rng.split(' ')[1] + '</td>';
+                maeHtml += '</tr>';
+            }});
+            maeHtml += '</table></div>';
+        }});
+    }}
+    document.getElementById('mae-info').innerHTML = maeHtml;
+}}
+
+renderChart();
+window.addEventListener('resize', function() {{ chart.resize(); }});
+</script>
+</body>
+</html>"""
+
+    os.makedirs(os.path.dirname(OUT_HTML), exist_ok=True)
+    with open(OUT_HTML, "w", encoding="utf-8") as f:
+        f.write(html)
+    print(f"HTML saved to: {OUT_HTML}")
+
+
+def main():
+    df = load_data()
+    all_data = build_charts_data(df)
+    all_dts = sorted(df["dt"].unique())
+    render_html(all_data, all_dts)
+
+
+if __name__ == "__main__":
+    main()

BIN
tasks/承接/头部品类与承接品类分析/.DS_Store


+ 495 - 0
tasks/承接/线上实验/08_预测覆盖率效果分析.py

@@ -0,0 +1,495 @@
+"""
+预测值覆盖率效果分析
+
+对比有/无预测值样本的业务指标差异
+主要关注: str_plus, ros_minus, rovn, vovh24
+"""
+
+import pandas as pd
+import glob
+import os
+
+# 数据目录
+DATA_DIR = "tasks/承接/线上实验/output/07_预测值覆盖率分析"
+
+# 核心业务指标
+METRICS = ['str_plus', 'ros_minus', 'rovn', 'vovh24']
+
+# 辅助指标
+AUX_METRICS = ['sample_cnt', 'sample_ratio', 'str_one', 'ros_one', 'str', 'ros', 'dau', 'exp']
+
+
+def load_all_data():
+    """加载所有天的数据"""
+    files = sorted(glob.glob(os.path.join(DATA_DIR, "*.csv")))
+    if not files:
+        print(f"未找到数据文件: {DATA_DIR}")
+        return None
+
+    dfs = []
+    for f in files:
+        df = pd.read_csv(f)
+        dfs.append(df)
+        print(f"加载: {os.path.basename(f)} ({len(df)} 行)")
+
+    return pd.concat(dfs, ignore_index=True)
+
+
+def filter_app4(df):
+    """只保留 apptype=4 的数据(实验平台)"""
+    return df[df['apptype'] == 4].copy()
+
+
+def calc_lift(exp_val, ctrl_val):
+    """计算提升幅度"""
+    if ctrl_val == 0:
+        return float('inf') if exp_val > 0 else 0
+    return (exp_val - ctrl_val) / ctrl_val * 100
+
+
+def analyze_by_has_pred(df):
+    """按 有/无预测值 对比各组的业务指标"""
+    print("\n" + "=" * 80)
+    print("【有/无预测值 业务指标对比】")
+    print("=" * 80)
+
+    # 筛选 apptype=4
+    df4 = filter_app4(df)
+
+    # 按 abcode + has_pred 汇总(多天平均)
+    agg_cols = {m: 'mean' for m in METRICS + AUX_METRICS if m in df4.columns}
+    agg_cols['sample_cnt'] = 'sum'  # 样本量用求和
+
+    summary = df4.groupby(['abcode', 'has_pred']).agg(agg_cols).round(6)
+
+    print("\n各组指标均值(多天汇总):")
+    print(summary[METRICS].to_string())
+
+    return summary
+
+
+def compare_vs_baseline(df):
+    """对比实验组 vs 对照组"""
+    print("\n" + "=" * 80)
+    print("【实验组 vs 对照组 业务指标对比】")
+    print("=" * 80)
+
+    df4 = filter_app4(df)
+
+    results = []
+
+    for has_pred in ['有预测值', '无预测值']:
+        print(f"\n--- {has_pred} ---")
+
+        sub = df4[df4['has_pred'] == has_pred]
+
+        # 计算各组均值
+        group_means = sub.groupby('abcode')[METRICS].mean()
+
+        if '对照组' not in group_means.index:
+            print("  [缺少对照组数据]")
+            continue
+
+        baseline = group_means.loc['对照组']
+
+        print(f"\n对照组基线: {baseline.to_dict()}")
+        print(f"\n各实验组 vs 对照组 提升幅度 (%):")
+
+        for abcode in group_means.index:
+            if abcode == '对照组':
+                continue
+
+            exp_vals = group_means.loc[abcode]
+            lifts = {m: calc_lift(exp_vals[m], baseline[m]) for m in METRICS}
+
+            print(f"\n  {abcode}:")
+            for m in METRICS:
+                sign = '+' if lifts[m] > 0 else ''
+                print(f"    {m}: {exp_vals[m]:.6f} ({sign}{lifts[m]:.2f}%)")
+
+            results.append({
+                'has_pred': has_pred,
+                'abcode': abcode,
+                **{f'{m}_val': exp_vals[m] for m in METRICS},
+                **{f'{m}_lift': lifts[m] for m in METRICS}
+            })
+
+    return pd.DataFrame(results)
+
+
+def compare_pred_vs_nopred(df):
+    """对比同一组内 有预测值 vs 无预测值"""
+    print("\n" + "=" * 80)
+    print("【同组内 有预测值 vs 无预测值 对比】")
+    print("=" * 80)
+
+    df4 = filter_app4(df)
+
+    results = []
+
+    for abcode in df4['abcode'].unique():
+        sub = df4[df4['abcode'] == abcode]
+
+        has_pred = sub[sub['has_pred'] == '有预测值'][METRICS].mean()
+        no_pred = sub[sub['has_pred'] == '无预测值'][METRICS].mean()
+
+        print(f"\n{abcode}:")
+        print(f"  有预测值: {has_pred.to_dict()}")
+        print(f"  无预测值: {no_pred.to_dict()}")
+
+        diffs = {}
+        for m in METRICS:
+            diff = calc_lift(has_pred[m], no_pred[m])
+            sign = '+' if diff > 0 else ''
+            print(f"  {m} 提升: {sign}{diff:.2f}%")
+            diffs[m] = diff
+
+        results.append({
+            'abcode': abcode,
+            **{f'{m}_有预测值': has_pred[m] for m in METRICS},
+            **{f'{m}_无预测值': no_pred[m] for m in METRICS},
+            **{f'{m}_lift': diffs[m] for m in METRICS}
+        })
+
+    return pd.DataFrame(results)
+
+
+def daily_trend(df):
+    """按天查看指标趋势"""
+    print("\n" + "=" * 80)
+    print("【日趋势 - 有预测值样本】")
+    print("=" * 80)
+
+    df4 = filter_app4(df)
+    has_pred = df4[df4['has_pred'] == '有预测值']
+
+    pivot = has_pred.pivot_table(
+        index='dt',
+        columns='abcode',
+        values='rovn',
+        aggfunc='mean'
+    ).round(6)
+
+    print("\nrovn 日趋势:")
+    print(pivot.to_string())
+
+    return pivot
+
+
+def coverage_stability(df):
+    """检查预测覆盖率的稳定性"""
+    print("\n" + "=" * 80)
+    print("【预测覆盖率稳定性】")
+    print("=" * 80)
+
+    df4 = filter_app4(df)
+
+    coverage = df4[df4['has_pred'] == '有预测值'].pivot_table(
+        index='dt',
+        columns='abcode',
+        values='sample_ratio',
+        aggfunc='mean'
+    ).round(4)
+
+    print("\n各组有预测值样本占比 (按天):")
+    print(coverage.to_string())
+
+    print("\n各组覆盖率统计:")
+    print(coverage.describe().round(4).to_string())
+
+    return coverage
+
+
+def daily_lift_stability(df):
+    """按天计算实验组 vs 对照组的提升幅度,检查稳定性"""
+    print("\n" + "=" * 80)
+    print("【多天稳定性分析 - 实验组 vs 对照组 提升幅度】")
+    print("=" * 80)
+
+    df4 = filter_app4(df)
+    has_pred = df4[df4['has_pred'] == '有预测值']
+
+    results = []
+    for dt in sorted(has_pred['dt'].unique()):
+        day_data = has_pred[has_pred['dt'] == dt]
+
+        if '对照组' not in day_data['abcode'].values:
+            continue
+
+        baseline = day_data[day_data['abcode'] == '对照组'][METRICS].iloc[0]
+
+        for abcode in day_data['abcode'].unique():
+            if abcode == '对照组':
+                continue
+            exp_vals = day_data[day_data['abcode'] == abcode][METRICS].iloc[0]
+            lifts = {m: calc_lift(exp_vals[m], baseline[m]) for m in METRICS}
+            results.append({
+                'dt': dt,
+                'abcode': abcode,
+                **{f'{m}_lift': lifts[m] for m in METRICS}
+            })
+
+    lift_df = pd.DataFrame(results)
+
+    for abcode in lift_df['abcode'].unique():
+        print(f"\n{abcode}:")
+        sub = lift_df[lift_df['abcode'] == abcode]
+
+        for m in METRICS:
+            col = f'{m}_lift'
+            values = sub[col].values
+            mean_lift = values.mean()
+            std_lift = values.std()
+            min_lift = values.min()
+            max_lift = values.max()
+
+            # 判断稳定性
+            is_stable = std_lift < abs(mean_lift) * 0.5 if mean_lift != 0 else std_lift < 1
+            stability = "✓稳定" if is_stable else "⚠波动"
+
+            print(f"  {m}: 均值{mean_lift:+.2f}%, 标准差{std_lift:.2f}%, 范围[{min_lift:+.2f}%, {max_lift:+.2f}%] {stability}")
+
+        # 显示每日明细
+        print(f"  日明细:")
+        for _, row in sub.iterrows():
+            lifts_str = " | ".join([f"{m}:{row[f'{m}_lift']:+.2f}%" for m in METRICS])
+            print(f"    {row['dt']}: {lifts_str}")
+
+    return lift_df
+
+
+def compare_pred_effect_by_day(df):
+    """按天对比有/无预测值的效果差异"""
+    print("\n" + "=" * 80)
+    print("【有/无预测值效果差异分析】")
+    print("=" * 80)
+
+    df4 = filter_app4(df)
+
+    # 只看对照组,排除策略干扰
+    ctrl = df4[df4['abcode'] == '对照组']
+
+    print("\n对照组 - 有/无预测值对比 (排除策略干扰):")
+
+    results = []
+    for dt in sorted(ctrl['dt'].unique()):
+        day_data = ctrl[ctrl['dt'] == dt]
+        has_pred = day_data[day_data['has_pred'] == '有预测值'][METRICS + ['sample_cnt']].iloc[0]
+        no_pred = day_data[day_data['has_pred'] == '无预测值'][METRICS + ['sample_cnt']].iloc[0]
+
+        diffs = {m: calc_lift(has_pred[m], no_pred[m]) for m in METRICS}
+        results.append({
+            'dt': dt,
+            'sample_有预测值': has_pred['sample_cnt'],
+            'sample_无预测值': no_pred['sample_cnt'],
+            **{f'{m}_有': has_pred[m] for m in METRICS},
+            **{f'{m}_无': no_pred[m] for m in METRICS},
+            **{f'{m}_diff': diffs[m] for m in METRICS}
+        })
+
+    diff_df = pd.DataFrame(results)
+
+    # 汇总统计
+    print("\n指标差异汇总 (有预测值 vs 无预测值):")
+    for m in METRICS:
+        col = f'{m}_diff'
+        mean_diff = diff_df[col].mean()
+        std_diff = diff_df[col].std()
+        print(f"  {m}: 均值差异 {mean_diff:+.1f}%, 标准差 {std_diff:.1f}%")
+
+    print("\n日明细:")
+    for _, row in diff_df.iterrows():
+        print(f"  {row['dt']}:")
+        print(f"    样本量: 有预测值 {row['sample_有预测值']:,.0f} | 无预测值 {row['sample_无预测值']:,.0f}")
+        for m in METRICS:
+            print(f"    {m}: {row[f'{m}_有']:.6f} vs {row[f'{m}_无']:.6f} ({row[f'{m}_diff']:+.1f}%)")
+
+    return diff_df
+
+
+def daily_metrics_by_pred(df):
+    """按天、分组、有无预测值 输出指标明细"""
+    print("\n" + "=" * 80)
+    print("【分天指标明细 - 有/无预测值】")
+    print("=" * 80)
+
+    df4 = filter_app4(df)
+
+    # 选择需要的列
+    cols = ['dt', 'abcode', 'has_pred', 'sample_cnt', 'sample_ratio'] + METRICS
+
+    result = df4[cols].sort_values(['dt', 'abcode', 'has_pred'], ascending=[True, True, False])
+
+    # 输出到文件
+    output_file = os.path.join(DATA_DIR, "分天指标_有无预测值.csv")
+    result.to_csv(output_file, index=False, encoding='utf-8-sig')
+    print(f"\n输出文件: {output_file}")
+
+    # 打印预览
+    print("\n数据预览:")
+    print(result.to_string(index=False))
+
+    return result
+
+
+def daily_lift_to_file(df):
+    """按天计算实验组 vs 对照组提升幅度,输出到文件"""
+    print("\n" + "=" * 80)
+    print("【分天提升幅度 - 实验组 vs 对照组】")
+    print("=" * 80)
+
+    df4 = filter_app4(df)
+
+    results = []
+    # 有预测值、无预测值、整体
+    for has_pred in ['有预测值', '无预测值', '整体']:
+        if has_pred == '整体':
+            # 整体:按 dt + abcode 汇总(加权平均)
+            sub = df4.groupby(['dt', 'abcode']).apply(
+                lambda g: pd.Series({
+                    'sample_cnt': g['sample_cnt'].sum(),
+                    **{m: (g[m] * g['sample_cnt']).sum() / g['sample_cnt'].sum() for m in METRICS}
+                })
+            ).reset_index()
+        else:
+            sub = df4[df4['has_pred'] == has_pred]
+
+        for dt in sorted(sub['dt'].unique()):
+            day_data = sub[sub['dt'] == dt]
+
+            if '对照组' not in day_data['abcode'].values:
+                continue
+
+            baseline = day_data[day_data['abcode'] == '对照组'][METRICS].iloc[0]
+
+            for abcode in day_data['abcode'].unique():
+                exp_vals = day_data[day_data['abcode'] == abcode][METRICS].iloc[0]
+                sample_cnt = day_data[day_data['abcode'] == abcode]['sample_cnt'].iloc[0]
+                if abcode == '对照组':
+                    lifts = {m: 0 for m in METRICS}
+                else:
+                    lifts = {m: calc_lift(exp_vals[m], baseline[m]) for m in METRICS}
+
+                results.append({
+                    'dt': dt,
+                    'has_pred': has_pred,
+                    'abcode': abcode,
+                    'sample_cnt': int(sample_cnt),
+                    **{m: exp_vals[m] for m in METRICS},
+                    **{f'{m}_lift': round(lifts[m], 2) for m in METRICS}
+                })
+
+    lift_df = pd.DataFrame(results)
+    # 排序:整体在最后
+    order = {'有预测值': 0, '无预测值': 1, '整体': 2}
+    lift_df['_order'] = lift_df['has_pred'].map(order)
+    lift_df = lift_df.sort_values(['dt', '_order', 'abcode'], ascending=[True, True, True])
+    lift_df = lift_df.drop('_order', axis=1)
+
+    # 输出到文件
+    output_file = os.path.join(DATA_DIR, "分天提升幅度.csv")
+    lift_df.to_csv(output_file, index=False, encoding='utf-8-sig')
+    print(f"\n输出文件: {output_file}")
+
+    # 打印预览
+    print("\n数据预览:")
+    print(lift_df.to_string(index=False))
+
+    return lift_df
+
+
+def overall_lift_summary(df):
+    """整体效果汇总(不分有无预测值)"""
+    print("\n" + "=" * 80)
+    print("【整体效果汇总 - 不分有无预测值】")
+    print("=" * 80)
+
+    df4 = filter_app4(df)
+
+    # 按 dt + abcode 汇总(加权平均)
+    overall = df4.groupby(['dt', 'abcode']).apply(
+        lambda g: pd.Series({
+            'sample_cnt': g['sample_cnt'].sum(),
+            **{m: (g[m] * g['sample_cnt']).sum() / g['sample_cnt'].sum() for m in METRICS}
+        })
+    ).reset_index()
+
+    results = []
+    for dt in sorted(overall['dt'].unique()):
+        day_data = overall[overall['dt'] == dt]
+
+        if '对照组' not in day_data['abcode'].values:
+            continue
+
+        baseline = day_data[day_data['abcode'] == '对照组'][METRICS].iloc[0]
+
+        for abcode in day_data['abcode'].unique():
+            if abcode == '对照组':
+                continue
+            exp_vals = day_data[day_data['abcode'] == abcode][METRICS].iloc[0]
+            lifts = {m: calc_lift(exp_vals[m], baseline[m]) for m in METRICS}
+            results.append({
+                'dt': dt,
+                'abcode': abcode,
+                **{f'{m}_lift': lifts[m] for m in METRICS}
+            })
+
+    lift_df = pd.DataFrame(results)
+
+    print("\n各实验组整体效果(多天汇总):")
+    for abcode in lift_df['abcode'].unique():
+        sub = lift_df[lift_df['abcode'] == abcode]
+        print(f"\n{abcode}:")
+        for m in METRICS:
+            col = f'{m}_lift'
+            mean_lift = sub[col].mean()
+            std_lift = sub[col].std()
+            is_stable = std_lift < abs(mean_lift) * 0.5 if mean_lift != 0 else std_lift < 1
+            stability = "✓稳定" if is_stable else "⚠波动"
+            print(f"  {m}: 均值{mean_lift:+.2f}%, 标准差{std_lift:.2f}% {stability}")
+
+        # 日明细
+        print(f"  日明细:")
+        for _, row in sub.iterrows():
+            lifts_str = " | ".join([f"{m}:{row[f'{m}_lift']:+.2f}%" for m in METRICS])
+            print(f"    {row['dt']}: {lifts_str}")
+
+    return lift_df
+
+
+def main():
+    print("=" * 80)
+    print("预测值覆盖率效果分析")
+    print("=" * 80)
+
+    # 加载数据
+    df = load_all_data()
+    if df is None:
+        return
+
+    print(f"\n总数据量: {len(df)} 行")
+    print(f"日期范围: {df['dt'].min()} ~ {df['dt'].max()}")
+    print(f"包含天数: {df['dt'].nunique()} 天")
+
+    # 1. 分天指标明细
+    daily_metrics_by_pred(df)
+
+    # 2. 分天提升幅度(有预测值、无预测值、整体)
+    daily_lift_to_file(df)
+
+    # 3. 整体效果汇总
+    overall_lift_summary(df)
+
+    # 4. 有预测值样本稳定性
+    print("\n" + "=" * 80)
+    print("【有预测值样本 - 多天稳定性】")
+    print("=" * 80)
+    daily_lift_stability(df)
+
+    print("\n" + "=" * 80)
+    print("分析完成")
+    print("=" * 80)
+
+
+if __name__ == "__main__":
+    main()

+ 177 - 0
tasks/承接/线上实验/分桶诊断分析.py

@@ -0,0 +1,177 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+
+plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
+plt.rcParams['axes.unicode_minus'] = False
+
+# 读取三个分桶维度的数据
+df_a = pd.read_csv('output/06a_str_pred分桶诊断/20260125.csv')
+df_b = pd.read_csv('output/06b_ros_real分桶诊断/20260125.csv')
+df_c = pd.read_csv('output/06c_ros_pred分桶诊断/20260125.csv')
+
+# 过滤对照组数据进行分析
+df_a_ctrl = df_a[(df_a['abcode'] == '对照组') & (df_a['bucket'] != '全部')].copy()
+df_b_ctrl = df_b[(df_b['abcode'] == '对照组') & (df_b['bucket'] != '全部')].copy()
+df_c_ctrl = df_c[(df_c['abcode'] == '对照组') & (df_c['bucket'] != '全部')].copy()
+
+df_a_ctrl['bucket'] = df_a_ctrl['bucket'].astype(int)
+df_b_ctrl['bucket'] = df_b_ctrl['bucket'].astype(int)
+df_c_ctrl['bucket'] = df_c_ctrl['bucket'].astype(int)
+
+df_a_ctrl = df_a_ctrl.sort_values('bucket')
+df_b_ctrl = df_b_ctrl.sort_values('bucket')
+df_c_ctrl = df_c_ctrl.sort_values('bucket')
+
+# 创建综合分析图
+fig, axes = plt.subplots(3, 3, figsize=(16, 14))
+
+# ========== 06a: str_pred 分桶 ==========
+ax = axes[0, 0]
+ax.bar(df_a_ctrl['bucket'], df_a_ctrl['ros_pred_bias'], alpha=0.7, label='ros_pred偏差', color='steelblue')
+ax.bar(df_a_ctrl['bucket'], df_a_ctrl['ros_stat_bias'], alpha=0.5, label='ros_stat偏差', color='orange')
+ax.axhline(y=0, color='red', linestyle='--', linewidth=1)
+ax.set_xlabel('str_pred 分桶')
+ax.set_ylabel('ROS 偏差')
+ax.set_title('06a: str_pred分桶 - ROS偏差趋势\n(正=高估, 负=低估)')
+ax.legend()
+ax.set_xticks(range(1, 11))
+
+ax = axes[0, 1]
+ax.plot(df_a_ctrl['bucket'], df_a_ctrl['str_real'], 'o-', label='str_real', color='green')
+ax.plot(df_a_ctrl['bucket'], df_a_ctrl['str_pred_avg'], 's--', label='str_pred', color='blue')
+ax.set_xlabel('str_pred 分桶')
+ax.set_ylabel('分享率')
+ax.set_title('06a: str_pred分桶 - STR真实vs预测')
+ax.legend()
+ax.set_xticks(range(1, 11))
+
+ax = axes[0, 2]
+ax.plot(df_a_ctrl['bucket'], df_a_ctrl['ros_real'], 'o-', label='ros_real', color='green')
+ax.plot(df_a_ctrl['bucket'], df_a_ctrl['ros_pred_avg'], 's--', label='ros_pred', color='blue')
+ax.plot(df_a_ctrl['bucket'], df_a_ctrl['ros_stat_avg'], '^--', label='ros_stat', color='orange')
+ax.set_xlabel('str_pred 分桶')
+ax.set_ylabel('ROS')
+ax.set_title('06a: str_pred分桶 - ROS真实vs预测vs统计量')
+ax.legend()
+ax.set_xticks(range(1, 11))
+
+# ========== 06b: ros_real 分桶 (仅回流样本) ==========
+ax = axes[1, 0]
+ax.bar(df_b_ctrl['bucket'], df_b_ctrl['ros_pred_bias'], alpha=0.7, label='ros_pred偏差', color='steelblue')
+ax.bar(df_b_ctrl['bucket'], df_b_ctrl['ros_stat_bias'], alpha=0.5, label='ros_stat偏差', color='orange')
+ax.axhline(y=0, color='red', linestyle='--', linewidth=1)
+ax.set_xlabel('ros_real 分桶 (仅回流)')
+ax.set_ylabel('ROS 偏差')
+ax.set_title('06b: ros_real分桶 - ROS偏差趋势\n(仅回流样本)')
+ax.legend()
+ax.set_xticks(range(1, 11))
+
+ax = axes[1, 1]
+ax.plot(df_b_ctrl['bucket'], df_b_ctrl['ros_real_avg'], 'o-', label='ros_real', color='green')
+ax.plot(df_b_ctrl['bucket'], df_b_ctrl['ros_pred_avg'], 's--', label='ros_pred', color='blue')
+ax.plot(df_b_ctrl['bucket'], df_b_ctrl['ros_stat_avg'], '^--', label='ros_stat', color='orange')
+ax.set_xlabel('ros_real 分桶')
+ax.set_ylabel('ROS')
+ax.set_title('06b: ros_real分桶 - ROS各指标对比')
+ax.legend()
+ax.set_xticks(range(1, 11))
+
+ax = axes[1, 2]
+ax.plot(df_b_ctrl['bucket'], df_b_ctrl['ros_pred_mae'], 'o-', label='ros_pred MAE', color='blue')
+ax.plot(df_b_ctrl['bucket'], df_b_ctrl['ros_stat_mae'], 's-', label='ros_stat MAE', color='orange')
+ax.set_xlabel('ros_real 分桶')
+ax.set_ylabel('MAE')
+ax.set_title('06b: ros_real分桶 - MAE趋势\n(高回流区间误差更大)')
+ax.legend()
+ax.set_xticks(range(1, 11))
+
+# ========== 06c: ros_pred 分桶 ==========
+ax = axes[2, 0]
+ax.bar(df_c_ctrl['bucket'], df_c_ctrl['ros_pred_bias'], alpha=0.7, label='ros_pred偏差', color='steelblue')
+ax.bar(df_c_ctrl['bucket'], df_c_ctrl['ros_stat_bias'], alpha=0.5, label='ros_stat偏差', color='orange')
+ax.axhline(y=0, color='red', linestyle='--', linewidth=1)
+ax.set_xlabel('ros_pred 分桶')
+ax.set_ylabel('ROS 偏差')
+ax.set_title('06c: ros_pred分桶 - ROS偏差趋势')
+ax.legend()
+ax.set_xticks(range(1, 11))
+
+ax = axes[2, 1]
+ax.plot(df_c_ctrl['bucket'], df_c_ctrl['ros_real'], 'o-', label='ros_real', color='green')
+ax.plot(df_c_ctrl['bucket'], df_c_ctrl['ros_pred_avg'], 's--', label='ros_pred', color='blue')
+ax.set_xlabel('ros_pred 分桶')
+ax.set_ylabel('ROS')
+ax.set_title('06c: ros_pred分桶 - ROS校准度')
+ax.legend()
+ax.set_xticks(range(1, 11))
+
+ax = axes[2, 2]
+ax.plot(df_c_ctrl['bucket'], df_c_ctrl['return_rate'] * 100, 'o-', color='purple')
+ax.set_xlabel('ros_pred 分桶')
+ax.set_ylabel('回流率 (%)')
+ax.set_title('06c: ros_pred分桶 - 各桶回流率')
+ax.set_xticks(range(1, 11))
+
+plt.tight_layout()
+plt.savefig('output/分桶诊断综合分析.png', dpi=150, bbox_inches='tight')
+plt.close()
+
+# ========== 打印关键发现 ==========
+print("=" * 60)
+print("分桶诊断综合分析 - 关键发现")
+print("=" * 60)
+
+# 06a 分析
+print("\n【06a: str_pred分桶】高分享预测样本的ROS表现")
+print("-" * 50)
+a_low = df_a_ctrl[df_a_ctrl['bucket'] <= 3]['ros_pred_bias'].mean()
+a_high = df_a_ctrl[df_a_ctrl['bucket'] >= 8]['ros_pred_bias'].mean()
+print(f"  低str_pred桶(1-3) ROS偏差: {a_low:.4f}")
+print(f"  高str_pred桶(8-10) ROS偏差: {a_high:.4f}")
+if a_high > a_low:
+    print(f"  → 高分享预测样本ROS高估更严重 (+{a_high - a_low:.4f})")
+else:
+    print(f"  → 低分享预测样本ROS高估更严重 (+{a_low - a_high:.4f})")
+
+# 06b 分析
+print("\n【06b: ros_real分桶】真实回流量级与预测误差")
+print("-" * 50)
+b_low = df_b_ctrl[df_b_ctrl['bucket'] <= 3]['ros_pred_bias'].mean()
+b_high = df_b_ctrl[df_b_ctrl['bucket'] >= 8]['ros_pred_bias'].mean()
+print(f"  低ros_real桶(1-3) ROS偏差: {b_low:.4f}")
+print(f"  高ros_real桶(8-10) ROS偏差: {b_high:.4f}")
+print(f"  → 高回流样本被严重低估 (bias={b_high:.2f})")
+
+mae_low = df_b_ctrl[df_b_ctrl['bucket'] <= 3]['ros_pred_mae'].mean()
+mae_high = df_b_ctrl[df_b_ctrl['bucket'] >= 8]['ros_pred_mae'].mean()
+print(f"  低ros_real桶MAE: {mae_low:.4f}")
+print(f"  高ros_real桶MAE: {mae_high:.4f}")
+
+# 06c 分析
+print("\n【06c: ros_pred分桶】ROS模型自身校准度")
+print("-" * 50)
+c_low = df_c_ctrl[df_c_ctrl['bucket'] <= 3]['ros_pred_bias'].mean()
+c_high = df_c_ctrl[df_c_ctrl['bucket'] >= 8]['ros_pred_bias'].mean()
+print(f"  低ros_pred桶(1-3) ROS偏差: {c_low:.4f}")
+print(f"  高ros_pred桶(8-10) ROS偏差: {c_high:.4f}")
+if c_high < 0:
+    print(f"  → 高预测区间实际低于预测 (低估程度={abs(c_high):.2f})")
+
+# 全局汇总
+print("\n" + "=" * 60)
+print("核心结论")
+print("=" * 60)
+df_a_all = df_a[(df_a['abcode'] == '对照组') & (df_a['bucket'] == '全部')]
+df_b_all = df_b[(df_b['abcode'] == '对照组') & (df_b['bucket'] == '全部')]
+df_c_all = df_c[(df_c['abcode'] == '对照组') & (df_c['bucket'] == '全部')]
+
+print(f"\n全量样本指标(对照组):")
+print(f"  样本量: {df_a_all['sample_cnt'].values[0]:,}")
+print(f"  回流样本: {df_a_all['return_cnt'].values[0]:,}")
+print(f"  str_real: {df_a_all['str_real'].values[0]:.4%}")
+print(f"  ros_real: {df_a_all['ros_real'].values[0]:.4f}")
+print(f"  ros_pred偏差: {df_a_all['ros_pred_bias'].values[0]:.4f}")
+print(f"  ros_stat偏差: {df_a_all['ros_stat_bias'].values[0]:.4f}")
+
+print(f"\n图表已保存至: output/分桶诊断综合分析.png")

+ 177 - 0
tasks/承接/线上实验/分桶诊断分析_full.py

@@ -0,0 +1,177 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+
+plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
+plt.rcParams['axes.unicode_minus'] = False
+
+# 读取数据(排除昨天 20260125)
+dates = ['20260123', '20260124']
+dfs_a, dfs_c = [], []
+
+for dt in dates:
+    dfs_a.append(pd.read_csv(f'output/06a_str_pred分桶诊断_full/{dt}.csv'))
+    dfs_c.append(pd.read_csv(f'output/06c_ros_pred分桶诊断_full/{dt}.csv'))
+
+df_a = pd.concat(dfs_a, ignore_index=True)
+df_c = pd.concat(dfs_c, ignore_index=True)
+
+print(f"数据日期: {dates} (排除昨天)")
+print("=" * 70)
+
+# 定义实验组
+exp_groups = ['对照组', '实验组-str+校准', '实验组-str+校准&ros-统计量', '实验组-先验地域降权']
+
+# ========== 1. 业务指标对比(全部桶汇总) ==========
+print("\n【1. 业务指标对比 - 全量汇总】")
+print("-" * 70)
+
+# 取全部桶的汇总数据,按组聚合
+df_a_all = df_a[(df_a['bucket'] == '全部') & (df_a['apptype'] == 4)].copy()
+df_a_summary = df_a_all.groupby('abcode').agg({
+    'sample_cnt': 'sum',
+    'return_cnt': 'sum',
+    'dau': 'sum',
+    'exp': 'sum'
+}).reset_index()
+
+# 计算加权平均(用样本量加权)
+metrics = ['str_real', 'str_pred_avg', 'ros_real', 'ros_pred_avg', 'ros_stat_avg',
+           'ros_pred_bias', 'ros_stat_bias', 'rovn_real', 'rovn_pred',
+           'str_one', 'ros_one', 'str', 'ros', 'vovh24']
+
+for m in metrics:
+    df_a_all[f'{m}_weighted'] = df_a_all[m] * df_a_all['sample_cnt']
+
+weighted_agg = df_a_all.groupby('abcode').agg({f'{m}_weighted': 'sum' for m in metrics})
+for m in metrics:
+    df_a_summary[m] = weighted_agg[f'{m}_weighted'].values / df_a_summary['sample_cnt'].values
+
+# 打印对比表
+print(f"\n{'组别':<30} {'样本量':>12} {'str_real':>10} {'ros_real':>10} {'rovn_real':>12} {'vovh24':>10}")
+print("-" * 90)
+ctrl_row = df_a_summary[df_a_summary['abcode'] == '对照组'].iloc[0]
+for _, row in df_a_summary[df_a_summary['abcode'].isin(exp_groups)].iterrows():
+    print(f"{row['abcode']:<30} {int(row['sample_cnt']):>12,} {row['str_real']:>10.4%} {row['ros_real']:>10.4f} {row['rovn_real']:>12.6f} {row['vovh24']:>10.6f}")
+
+# 计算相对对照组的提升
+print("\n【相对对照组提升】")
+print("-" * 70)
+print(f"{'组别':<35} {'str提升':>10} {'ros提升':>10} {'rovn提升':>10} {'vovh24提升':>10}")
+print("-" * 70)
+for _, row in df_a_summary[df_a_summary['abcode'].isin(exp_groups)].iterrows():
+    if row['abcode'] == '对照组':
+        continue
+    str_lift = (row['str_real'] / ctrl_row['str_real'] - 1) * 100
+    ros_lift = (row['ros_real'] / ctrl_row['ros_real'] - 1) * 100
+    rovn_lift = (row['rovn_real'] / ctrl_row['rovn_real'] - 1) * 100
+    vovh24_lift = (row['vovh24'] / ctrl_row['vovh24'] - 1) * 100
+    print(f"{row['abcode']:<35} {str_lift:>+9.2f}% {ros_lift:>+9.2f}% {rovn_lift:>+9.2f}% {vovh24_lift:>+9.2f}%")
+
+# ========== 2. 分桶偏差分析 ==========
+print("\n\n【2. 分桶偏差分析 - 对照组】")
+print("-" * 70)
+
+# 只看对照组的分桶数据
+for name, df in [('str_pred分桶', df_a), ('ros_pred分桶', df_c)]:
+    df_ctrl = df[(df['abcode'] == '对照组') & (df['bucket'] != '全部') & (df['apptype'] == 4)].copy()
+    df_ctrl['bucket'] = df_ctrl['bucket'].astype(int)
+
+    # 按桶聚合(跨日期)
+    df_bucket = df_ctrl.groupby('bucket').agg({
+        'sample_cnt': 'sum',
+        'return_cnt': 'sum',
+        'ros_pred_bias': lambda x: np.average(x, weights=df_ctrl.loc[x.index, 'sample_cnt']),
+        'ros_stat_bias': lambda x: np.average(x, weights=df_ctrl.loc[x.index, 'sample_cnt']),
+        'ros_pred_mae_return': lambda x: np.average(x, weights=df_ctrl.loc[x.index, 'return_cnt']),
+    }).reset_index()
+
+    print(f"\n{name}(对照组):")
+    low_bias = df_bucket[df_bucket['bucket'] <= 3]['ros_pred_bias'].mean()
+    high_bias = df_bucket[df_bucket['bucket'] >= 8]['ros_pred_bias'].mean()
+    print(f"  低桶(1-3) ros_pred偏差: {low_bias:+.4f}")
+    print(f"  高桶(8-10) ros_pred偏差: {high_bias:+.4f}")
+    print(f"  偏差差异: {high_bias - low_bias:+.4f}")
+
+# ========== 3. 可视化 ==========
+fig, axes = plt.subplots(2, 3, figsize=(16, 10))
+
+# 3.1 业务指标对比柱状图
+ax = axes[0, 0]
+groups = ['对照组', 'str+校准', 'str+校准\n&ros统计量', '先验地域降权']
+group_names = ['对照组', '实验组-str+校准', '实验组-str+校准&ros-统计量', '实验组-先验地域降权']
+rovn_values = [df_a_summary[df_a_summary['abcode'] == g]['rovn_real'].values[0] * 1000 for g in group_names]
+colors = ['gray', 'steelblue', 'orange', 'green']
+bars = ax.bar(groups, rovn_values, color=colors, alpha=0.8)
+ax.set_ylabel('rovn (×1000)')
+ax.set_title('各组 ROVN 对比')
+for bar, val in zip(bars, rovn_values):
+    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height(), f'{val:.3f}', ha='center', va='bottom')
+
+# 3.2 str提升对比
+ax = axes[0, 1]
+str_values = [df_a_summary[df_a_summary['abcode'] == g]['str_real'].values[0] * 100 for g in group_names]
+bars = ax.bar(groups, str_values, color=colors, alpha=0.8)
+ax.set_ylabel('str_real (%)')
+ax.set_title('各组 STR 对比')
+for bar, val in zip(bars, str_values):
+    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height(), f'{val:.3f}', ha='center', va='bottom')
+
+# 3.3 ros对比
+ax = axes[0, 2]
+ros_values = [df_a_summary[df_a_summary['abcode'] == g]['ros_real'].values[0] for g in group_names]
+bars = ax.bar(groups, ros_values, color=colors, alpha=0.8)
+ax.set_ylabel('ros_real')
+ax.set_title('各组 ROS 对比')
+for bar, val in zip(bars, ros_values):
+    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height(), f'{val:.3f}', ha='center', va='bottom')
+
+# 3.4-3.5 两个分桶维度的偏差趋势
+for idx, (name, df, title) in enumerate([
+    ('06a', df_a, 'str_pred分桶 - ROS偏差'),
+    ('06c', df_c, 'ros_pred分桶 - ROS偏差')
+]):
+    ax = axes[1, idx]
+    for gname, color in [('对照组', 'gray'), ('实验组-str+校准', 'steelblue'),
+                          ('实验组-str+校准&ros-统计量', 'orange')]:
+        df_g = df[(df['abcode'] == gname) & (df['bucket'] != '全部') & (df['apptype'] == 4)].copy()
+        df_g['bucket'] = df_g['bucket'].astype(int)
+        df_g = df_g.groupby('bucket')['ros_pred_bias'].mean().reset_index()
+        label = gname.replace('实验组-', '')
+        ax.plot(df_g['bucket'], df_g['ros_pred_bias'], 'o-', label=label, color=color, alpha=0.8)
+    ax.axhline(y=0, color='red', linestyle='--', linewidth=1, alpha=0.5)
+    ax.set_xlabel('分桶')
+    ax.set_ylabel('ros_pred偏差')
+    ax.set_title(title)
+    ax.legend(fontsize=8)
+    ax.set_xticks(range(1, 11))
+
+# 3.6 真实值 vs 预测值趋势
+ax = axes[1, 2]
+df_ctrl = df_a[(df_a['abcode'] == '对照组') & (df_a['bucket'] != '全部') & (df_a['apptype'] == 4)].copy()
+df_ctrl['bucket'] = df_ctrl['bucket'].astype(int)
+df_bucket = df_ctrl.groupby('bucket').agg({'ros_real': 'mean', 'ros_pred_avg': 'mean', 'ros_stat_avg': 'mean'}).reset_index()
+ax.plot(df_bucket['bucket'], df_bucket['ros_real'], 'o-', label='ros_real', color='green', linewidth=2)
+ax.plot(df_bucket['bucket'], df_bucket['ros_pred_avg'], 's--', label='ros_pred', color='blue', linewidth=2)
+ax.plot(df_bucket['bucket'], df_bucket['ros_stat_avg'], '^--', label='ros_stat', color='orange', linewidth=2)
+ax.set_xlabel('str_pred分桶')
+ax.set_ylabel('ROS')
+ax.set_title('str_pred分桶 - ROS真实vs预测 (对照组)')
+ax.legend()
+ax.set_xticks(range(1, 11))
+
+plt.tight_layout()
+plt.savefig('output/分桶诊断综合分析_full.png', dpi=150, bbox_inches='tight')
+plt.close()
+
+print(f"\n\n图表已保存至: output/分桶诊断综合分析_full.png")
+
+# ========== 4. 实验组对比汇总表 ==========
+print("\n\n【3. 实验组 vs 对照组 完整指标】")
+print("=" * 70)
+
+cols = ['abcode', 'sample_cnt', 'str_real', 'ros_real', 'ros_pred_bias', 'ros_stat_bias',
+        'rovn_real', 'str_one', 'ros_one', 'vovh24']
+summary_table = df_a_summary[df_a_summary['abcode'].isin(exp_groups)][cols].copy()
+summary_table = summary_table.round(6)
+print(summary_table.to_string(index=False))

+ 174 - 0
tasks/承接/线上实验/分桶诊断分析_full_v2.py

@@ -0,0 +1,174 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+
+plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
+plt.rcParams['axes.unicode_minus'] = False
+
+# 读取数据(排除昨天 20260125)
+dates = ['20260123', '20260124']
+dfs_a, dfs_c = [], []
+
+for dt in dates:
+    dfs_a.append(pd.read_csv(f'output/06a_str_pred分桶诊断_full/{dt}.csv'))
+    dfs_c.append(pd.read_csv(f'output/06c_ros_pred分桶诊断_full/{dt}.csv'))
+
+df_a = pd.concat(dfs_a, ignore_index=True)
+df_c = pd.concat(dfs_c, ignore_index=True)
+
+print(f"数据日期: {dates} (排除昨天)")
+print("=" * 80)
+
+exp_groups = ['对照组', '实验组-str+校准', '实验组-str+校准&ros-统计量']
+
+# ========== 1. 真实值 vs 预测值对比(全量) ==========
+print("\n【1. 真实值 vs 预测值对比 - 全量汇总】")
+print("-" * 80)
+
+df_all = df_a[(df_a['bucket'] == '全部') & (df_a['apptype'] == 4)].copy()
+df_summary = df_all.groupby('abcode', group_keys=False).apply(lambda x: pd.Series({
+    'sample_cnt': x['sample_cnt'].sum(),
+    'str_real': np.average(x['str_real'], weights=x['sample_cnt']),
+    'str_pred': np.average(x['str_pred_avg'], weights=x['sample_cnt']),
+    'ros_real': np.average(x['ros_real'], weights=x['sample_cnt']),
+    'ros_pred': np.average(x['ros_pred_avg'], weights=x['sample_cnt']),
+    'ros_stat': np.average(x['ros_stat_avg'], weights=x['sample_cnt']),
+    'rovn_real': np.average(x['rovn_real'], weights=x['sample_cnt']),
+    'rovn_pred': np.average(x['rovn_pred'], weights=x['sample_cnt']),
+})).reset_index()
+
+print(f"\n{'组别':<35} {'str_real':>10} {'str_pred':>10} {'str差异':>10}")
+print("-" * 70)
+for _, row in df_summary[df_summary['abcode'].isin(exp_groups)].iterrows():
+    str_diff = row['str_pred'] - row['str_real']
+    print(f"{row['abcode']:<35} {row['str_real']:>10.4%} {row['str_pred']:>10.4%} {str_diff:>+10.4%}")
+
+print(f"\n{'组别':<35} {'ros_real':>10} {'ros_pred':>10} {'ros_stat':>10} {'pred差异':>10} {'stat差异':>10}")
+print("-" * 100)
+for _, row in df_summary[df_summary['abcode'].isin(exp_groups)].iterrows():
+    pred_diff = row['ros_pred'] - row['ros_real']
+    stat_diff = row['ros_stat'] - row['ros_real']
+    print(f"{row['abcode']:<35} {row['ros_real']:>10.4f} {row['ros_pred']:>10.4f} {row['ros_stat']:>10.4f} {pred_diff:>+10.4f} {stat_diff:>+10.4f}")
+
+# ========== 2. 分桶真实值 vs 预测值详细对比 ==========
+print("\n\n【2. 分桶真实值 vs 预测值详细对比 - 对照组】")
+print("=" * 80)
+
+for name, df in [('str_pred分桶', df_a), ('ros_pred分桶', df_c)]:
+    print(f"\n{name}:")
+    print("-" * 90)
+
+    df_ctrl = df[(df['abcode'] == '对照组') & (df['bucket'] != '全部') & (df['apptype'] == 4)].copy()
+    df_ctrl['bucket'] = df_ctrl['bucket'].astype(int)
+
+    # 聚合
+    agg_cols = {
+        'sample_cnt': 'sum',
+        'return_cnt': 'sum',
+        'str_real': lambda x: np.average(x, weights=df_ctrl.loc[x.index, 'sample_cnt']),
+        'str_pred_avg': lambda x: np.average(x, weights=df_ctrl.loc[x.index, 'sample_cnt']),
+        'ros_real': lambda x: np.average(x, weights=df_ctrl.loc[x.index, 'return_cnt']) if df_ctrl.loc[x.index, 'return_cnt'].sum() > 0 else np.nan,
+        'ros_pred_avg': lambda x: np.average(x, weights=df_ctrl.loc[x.index, 'sample_cnt']),
+        'ros_stat_avg': lambda x: np.average(x, weights=df_ctrl.loc[x.index, 'sample_cnt']),
+    }
+    df_bucket = df_ctrl.groupby('bucket').agg(agg_cols).reset_index()
+
+    print(f"{'桶':>4} {'样本量':>12} {'str_real':>10} {'str_pred':>10} {'str差异':>10} {'ros_real':>10} {'ros_pred':>10} {'ros_stat':>10} {'pred差异':>10}")
+    print("-" * 110)
+    for _, row in df_bucket.iterrows():
+        str_diff = row['str_pred_avg'] - row['str_real']
+        ros_pred_diff = row['ros_pred_avg'] - row['ros_real'] if pd.notna(row['ros_real']) else np.nan
+        print(f"{int(row['bucket']):>4} {int(row['sample_cnt']):>12,} {row['str_real']:>10.4%} {row['str_pred_avg']:>10.4%} {str_diff:>+10.4%} {row['ros_real']:>10.4f} {row['ros_pred_avg']:>10.4f} {row['ros_stat_avg']:>10.4f} {ros_pred_diff:>+10.4f}")
+
+# ========== 3. 可视化:真实值 vs 预测值趋势 ==========
+fig, axes = plt.subplots(2, 3, figsize=(16, 10))
+date_label = f"数据: {', '.join(dates)}"
+fig.suptitle(f'分桶诊断 - 真实值 vs 预测值对比\n({date_label})', fontsize=14, fontweight='bold')
+
+for row_idx, (name, df, title_prefix) in enumerate([
+    ('str_pred分桶', df_a, 'str_pred分桶'),
+    ('ros_pred分桶', df_c, 'ros_pred分桶')
+]):
+    df_ctrl = df[(df['abcode'] == '对照组') & (df['bucket'] != '全部') & (df['apptype'] == 4)].copy()
+    df_ctrl['bucket'] = df_ctrl['bucket'].astype(int)
+    df_bucket = df_ctrl.groupby('bucket').agg({
+        'str_real': 'mean',
+        'str_pred_avg': 'mean',
+        'ros_real': 'mean',
+        'ros_pred_avg': 'mean',
+        'ros_stat_avg': 'mean',
+        'ros_real_return': 'mean',
+        'ros_pred_return': 'mean',
+    }).reset_index()
+
+    # STR 对比
+    ax = axes[row_idx, 0]
+    ax.plot(df_bucket['bucket'], df_bucket['str_real'] * 100, 'o-', label='str_real', color='green', linewidth=2)
+    ax.plot(df_bucket['bucket'], df_bucket['str_pred_avg'] * 100, 's--', label='str_pred', color='blue', linewidth=2)
+    ax.set_xlabel('分桶')
+    ax.set_ylabel('STR (%)')
+    ax.set_title(f'{title_prefix} - STR真实vs预测')
+    ax.legend()
+    ax.set_xticks(range(1, 11))
+
+    # ROS 对比(全量)
+    ax = axes[row_idx, 1]
+    ax.plot(df_bucket['bucket'], df_bucket['ros_real'], 'o-', label='ros_real', color='green', linewidth=2)
+    ax.plot(df_bucket['bucket'], df_bucket['ros_pred_avg'], 's--', label='ros_pred', color='blue', linewidth=2)
+    ax.plot(df_bucket['bucket'], df_bucket['ros_stat_avg'], '^--', label='ros_stat', color='orange', linewidth=2)
+    ax.set_xlabel('分桶')
+    ax.set_ylabel('ROS')
+    ax.set_title(f'{title_prefix} - ROS真实vs预测vs统计量')
+    ax.legend()
+    ax.set_xticks(range(1, 11))
+
+    # ROS 仅回流样本对比
+    ax = axes[row_idx, 2]
+    ax.plot(df_bucket['bucket'], df_bucket['ros_real_return'], 'o-', label='ros_real(回流)', color='green', linewidth=2)
+    ax.plot(df_bucket['bucket'], df_bucket['ros_pred_return'], 's--', label='ros_pred(回流)', color='blue', linewidth=2)
+    ax.set_xlabel('分桶')
+    ax.set_ylabel('ROS (仅回流样本)')
+    ax.set_title(f'{title_prefix} - ROS回流样本真实vs预测')
+    ax.legend()
+    ax.set_xticks(range(1, 11))
+
+plt.tight_layout(rect=[0, 0, 1, 0.95])
+plt.savefig('output/分桶诊断_真实vs预测对比.png', dpi=150, bbox_inches='tight')
+plt.close()
+
+print(f"\n\n图表已保存至: output/分桶诊断_真实vs预测对比.png")
+
+# ========== 4. 各组预测误差汇总 ==========
+print("\n\n【3. 各组预测误差汇总 - MAE对比】")
+print("=" * 80)
+
+df_all = df_a[(df_a['bucket'] == '全部') & (df_a['apptype'] == 4)].copy()
+mae_summary = df_all.groupby('abcode', group_keys=False).apply(lambda x: pd.Series({
+    'ros_pred_mae_return': np.average(x['ros_pred_mae_return'], weights=x['return_cnt']),
+    'ros_stat_mae_return': np.average(x['ros_stat_mae_return'], weights=x['return_cnt']),
+    'ros_pred_mae_over': np.average(x['ros_pred_mae_over'].dropna(), weights=x.loc[x['ros_pred_mae_over'].dropna().index, 'return_cnt']) if x['ros_pred_mae_over'].notna().any() else np.nan,
+    'ros_pred_mae_under': np.average(x['ros_pred_mae_under'].dropna(), weights=x.loc[x['ros_pred_mae_under'].dropna().index, 'return_cnt']) if x['ros_pred_mae_under'].notna().any() else np.nan,
+})).reset_index()
+
+print(f"\n{'组别':<35} {'ros_pred_MAE':>12} {'ros_stat_MAE':>12} {'高估MAE':>10} {'低估MAE':>10}")
+print("-" * 85)
+for _, row in mae_summary[mae_summary['abcode'].isin(exp_groups)].iterrows():
+    print(f"{row['abcode']:<35} {row['ros_pred_mae_return']:>12.4f} {row['ros_stat_mae_return']:>12.4f} {row['ros_pred_mae_over']:>10.4f} {row['ros_pred_mae_under']:>10.4f}")
+
+# ========== 5. 关键发现总结 ==========
+print("\n\n" + "=" * 80)
+print("【关键发现总结】")
+print("=" * 80)
+
+ctrl = df_summary[df_summary['abcode'] == '对照组'].iloc[0]
+print(f"\n对照组基准:")
+print(f"  STR: 真实={ctrl['str_real']:.4%}, 预测={ctrl['str_pred']:.4%}, 差异={ctrl['str_pred']-ctrl['str_real']:+.4%}")
+print(f"  ROS: 真实={ctrl['ros_real']:.4f}, 预测={ctrl['ros_pred']:.4f}, 统计量={ctrl['ros_stat']:.4f}")
+print(f"       pred差异={ctrl['ros_pred']-ctrl['ros_real']:+.4f}, stat差异={ctrl['ros_stat']-ctrl['ros_real']:+.4f}")
+
+for gname in ['实验组-str+校准', '实验组-str+校准&ros-统计量']:
+    row = df_summary[df_summary['abcode'] == gname].iloc[0]
+    print(f"\n{gname}:")
+    print(f"  STR: 真实={row['str_real']:.4%}, 预测={row['str_pred']:.4%}, 差异={row['str_pred']-row['str_real']:+.4%}")
+    print(f"  ROS: 真实={row['ros_real']:.4f}, 预测={row['ros_pred']:.4f}, 统计量={row['ros_stat']:.4f}")
+    print(f"       pred差异={row['ros_pred']-row['ros_real']:+.4f}, stat差异={row['ros_stat']-row['ros_real']:+.4f}")

+ 8 - 0
tasks/报表/01_推荐分发报表.json

@@ -0,0 +1,8 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "NRBX3k",
+  "sort": "dt:desc",
+  "filter": {"hh_bucket": "SUM"},
+  "limit": 40000,
+  "cols": null
+}

+ 678 - 0
tasks/报表/01_推荐分发报表.sql

@@ -0,0 +1,678 @@
+-- =====================================================================
+-- 曝光回流链路 CUBE 聚合表 (宽表版, 含用户/品类维度 + 模型预估 + 全链路漏斗)
+-- 维度: user_type × hh_bucket × head_merge_leve2 × vid_merge_leve2 × vid_id (CUBE)
+-- 依赖: base_20260209 → (JOIN user_type + video_merge_tag + t_score) → CUBE 聚合
+-- 参考: de.sql + dwd_recsys_alg_exposure_agg_20260209
+-- =====================================================================
+--
+-- 指标分区:
+--   基础流量       exposure_cnt / exposure_uv / vid_cnt / exposure_per_user
+--   分享&回流漏斗  share_exposure_cnt → share_cnt → return_exposure_cnt → return_uv
+--                  + 4 个 rate (share_rate / return_rate / return_rate_noself / share_return_rate)
+--   模型预估       STR (曝光→非自身回流概率) / ROSN (条件回流UV) / ROVN (STR×ROSN)
+--                  每组: _real(label) + _pred(预估) + _copc + _mae + _var
+--                  _real 与漏斗字段等价: str_real=return_rate_noself, rovn_real=return_uv_noself/exposure_cnt
+--   B/C/D 链       每级: _uv + _pv + _exp + _ror + _rov
+--   全链路         all_uv/pv/exp = B + C + D, all_ror/rov
+--
+-- rov/ror 分母推导 (逐级递推):
+--   depth 维度: depth=1 的成本=该hop入口; depth=N+1 的成本=depth=N 的输出
+--   hop 维度:   hop1 的成本=链路入口; hop N+1 的成本=hop N 全量depth的输出
+--
+--   B链:  bn/b1 → rov=uv/COUNT(1), ror=uv/COUNT(DISTINCT mid)
+--         b2    → rov=uv/b1_exp,    ror=uv/b1_uv
+--         b3    → rov=uv/b2_exp,    ror=uv/b2_uv
+--   C链全量: cn_1 → rov=uv/bn_exp,    ror=uv/bn_uv
+--            cn_2 → rov=uv/cn_1_exp,  ror=uv/cn_1_uv
+--            cn_3 → rov=uv/cn_2_exp,  ror=uv/cn_2_uv
+--   C链depth拆分: cX_Y → rov=uv/上级exp, ror=uv/上级uv (X=depth, Y=hop)
+--            hop1: c1_1→bn, c2_1→c1_1, c3_1→c2_1
+--            hop2: c1_2→cn_1, c2_2→c1_2, c3_2→c2_2
+--            hop3: c1_3→cn_2, c2_3→c1_3, c3_3→c2_3
+--   D链全量: dn_1 → rov=uv/d0,         ror=uv/COUNT(DISTINCT mid)
+--            dn_2 → rov=uv/dn_1_exp,   ror=uv/dn_1_uv
+--            dn_3 → rov=uv/dn_2_exp,   ror=uv/dn_2_uv
+--   D链depth拆分: dX_Y (同 C 链模式)
+--            hop1: d1_1→d0/mid, d2_1→d1_1, d3_1→d2_1
+--            hop2: d1_2→dn_1, d2_2→d1_2, d3_2→d2_2
+--            hop3: d1_3→dn_2, d2_3→d1_3, d3_3→d2_3
+--   全链路: all → rov=uv/COUNT(1), ror=uv/COUNT(DISTINCT mid)
+-- =====================================================================
+
+-- DROP TABLE IF EXISTS loghubods.dwd_recsys_alg_exposure_agg_wide_20260209;
+-- CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_agg_wide_20260209 (
+--     -- ==================== 维度列 ====================
+--     dt                         STRING    COMMENT '日期'
+--     ,user_type                  STRING    COMMENT '用户拉活量分层(R0&新用户/R1-50/R_180_330等,汇总为SUM)'
+--     ,hh_bucket                STRING    COMMENT '小时段(00-03/04-07/.../20-23,汇总为SUM)'
+--     ,head_merge_leve2         STRING    COMMENT '进入内容品类(headvideoid品类,汇总为SUM)'
+--     ,vid_merge_leve2          STRING    COMMENT '推荐内容品类(vid品类,TOP10曝光+其他,汇总为SUM)'
+--     ,vid_id                   STRING    COMMENT '内容id(品类曝光TOP1+其他,汇总为SUM)'
+
+--     -- ==================== 基础流量 ====================
+--     ,exposure_cnt             BIGINT    COMMENT '曝光次数'
+--     ,exposure_uv              BIGINT    COMMENT '曝光人数(mid去重)'
+--     ,vid_cnt                  BIGINT    COMMENT '视频个数(vid去重)'
+--     ,exposure_per_user        DOUBLE    COMMENT '人均曝光次数 = 曝光次数/曝光人数'
+
+--     -- ==================== 分享 & 回流漏斗 ====================
+--     ,share_exposure_cnt       BIGINT    COMMENT '产生分享的曝光数'
+--     ,share_cnt                BIGINT    COMMENT '分享总次数'
+--     ,return_exposure_cnt      BIGINT    COMMENT '产生回流的曝光数(含自身) = SUM(is_return_n)'
+--     ,return_exposure_cnt_noself BIGINT  COMMENT '产生回流的曝光数(非自身) = SUM(is_return_noself)'
+--     ,return_uv                BIGINT    COMMENT '回流人数(含自身) = SUM(return_n_uv)'
+--     ,return_uv_noself         BIGINT    COMMENT '回流人数(非自身) = SUM(return_n_uv_noself)'
+--     ,share_rate               DOUBLE    COMMENT '分享率 = share_exposure_cnt/exposure_cnt'
+--     ,return_rate              DOUBLE    COMMENT '回流率(含自身) = return_exposure_cnt/exposure_cnt'
+--     ,return_rate_noself       DOUBLE    COMMENT '回流率(非自身) = return_exposure_cnt_noself/exposure_cnt'
+--     ,share_return_rate        DOUBLE    COMMENT '分享→回流转化率(非自身) = return_exposure_cnt_noself/share_exposure_cnt'
+
+--     -- ==================== 模型预估: STR (曝光→非自身回流概率) ====================
+--     ,str_real                 DOUBLE    COMMENT '= return_rate_noself, 模型label'
+--     ,str_pred                 DOUBLE    COMMENT 'STR预估 = SUM(str_pred)/exposure_cnt'
+--     ,str_copc                 DOUBLE    COMMENT 'STR copc = str_real/str_pred'
+--     ,str_mae                  DOUBLE    COMMENT 'STR MAE = AVG(|str_pred - str_real|)'
+--     ,str_var                  DOUBLE    COMMENT 'STR VAR = VARIANCE(str_pred - str_real)'
+
+--     -- ==================== 模型预估: ROSN (条件回流UV, 非自身) ====================
+--     ,rosn_real                DOUBLE    COMMENT '= return_uv_noself/return_exposure_cnt_noself, 模型label'
+--     ,rosn_pred                DOUBLE    COMMENT 'ROSN预估 = SUM(rosn_pred WHERE is_return_noself=1)/SUM(is_return_noself)'
+--     ,rosn_copc                DOUBLE    COMMENT 'ROSN copc = rosn_real/rosn_pred'
+--     ,rosn_pred_origin         DOUBLE    COMMENT 'ROSN原始预估均值 = AVG(rosn_pred_origin)'
+--     ,rosn_mae                 DOUBLE    COMMENT 'ROSN MAE = AVG(|rosn_pred - rosn_real|) WHERE is_return_noself=1'
+--     ,rosn_var                 DOUBLE    COMMENT 'ROSN VAR = VARIANCE(rosn_pred - rosn_real) WHERE is_return_noself=1'
+
+--     -- ==================== 模型预估: ROVN (STR×ROSN) ====================
+--     ,rovn_real                DOUBLE    COMMENT '= return_uv_noself/exposure_cnt, 模型label'
+--     ,rovn_pred                DOUBLE    COMMENT 'rovn预估 = AVG(str_pred*rosn_pred)'
+--     ,rovn_copc                DOUBLE    COMMENT 'rovn copc = rovn_real/rovn_pred'
+--     ,rovn_mae                 DOUBLE    COMMENT 'rovn MAE = AVG(|rovn_pred - rovn_real|)'
+--     ,rovn_var                 DOUBLE    COMMENT 'rovn VAR = VARIANCE(rovn_pred - rovn_real)'
+--     ,sortscore_avg            DOUBLE    COMMENT 'sortscore均值'
+
+--     -- ==================== B链 (分享→点击) ====================
+--     ,bn_uv                    BIGINT    COMMENT 'B链全量: 回流去重人数'
+--     ,bn_pv                    BIGINT    COMMENT 'B链全量: 回流点击次数'
+--     ,bn_exp                   BIGINT    COMMENT 'B链全量: 回流session曝光数'
+--     ,bn_ror                   DOUBLE    COMMENT 'bn_uv/exposure_uv'
+--     ,bn_rov                   DOUBLE    COMMENT 'bn_uv/exposure_cnt'
+--     ,b1_uv                    BIGINT    COMMENT 'B链depth=1: 回流去重人数'
+--     ,b1_pv                    BIGINT    COMMENT 'B链depth=1: 回流点击次数'
+--     ,b1_exp                   BIGINT    COMMENT 'B链depth=1: 回流session曝光数'
+--     ,b1_ror                   DOUBLE    COMMENT 'b1_uv/exposure_uv'
+--     ,b1_rov                   DOUBLE    COMMENT 'b1_uv/exposure_cnt'
+--     ,b2_uv                    BIGINT    COMMENT 'B链depth=2: 回流去重人数'
+--     ,b2_pv                    BIGINT    COMMENT 'B链depth=2: 回流点击次数'
+--     ,b2_exp                   BIGINT    COMMENT 'B链depth=2: 回流session曝光数'
+--     ,b2_ror                   DOUBLE    COMMENT 'b2_uv/b1_uv'
+--     ,b2_rov                   DOUBLE    COMMENT 'b2_uv/b1_exp'
+--     ,b3_uv                    BIGINT    COMMENT 'B链depth=3: 回流去重人数'
+--     ,b3_pv                    BIGINT    COMMENT 'B链depth=3: 回流点击次数'
+--     ,b3_exp                   BIGINT    COMMENT 'B链depth=3: 回流session曝光数'
+--     ,b3_ror                   DOUBLE    COMMENT 'b3_uv/b2_uv'
+--     ,b3_rov                   DOUBLE    COMMENT 'b3_uv/b2_exp'
+
+--     -- ==================== C链 (全量depth, 按hop) ====================
+--     ,cn_1_uv                  BIGINT    COMMENT 'C链hop1: 回流去重人数'
+--     ,cn_1_pv                  BIGINT    COMMENT 'C链hop1: 回流点击次数'
+--     ,cn_1_exp                 BIGINT    COMMENT 'C链hop1: 回流session曝光数'
+--     ,cn_1_ror                 DOUBLE    COMMENT 'cn_1_uv/bn_uv'
+--     ,cn_1_rov                 DOUBLE    COMMENT 'cn_1_uv/bn_exp'
+--     ,cn_2_uv                  BIGINT    COMMENT 'C链hop2: 回流去重人数'
+--     ,cn_2_pv                  BIGINT    COMMENT 'C链hop2: 回流点击次数'
+--     ,cn_2_exp                 BIGINT    COMMENT 'C链hop2: 回流session曝光数'
+--     ,cn_2_ror                 DOUBLE    COMMENT 'cn_2_uv/cn_1_uv'
+--     ,cn_2_rov                 DOUBLE    COMMENT 'cn_2_uv/cn_1_exp'
+--     ,cn_3_uv                  BIGINT    COMMENT 'C链hop3: 回流去重人数'
+--     ,cn_3_pv                  BIGINT    COMMENT 'C链hop3: 回流点击次数'
+--     ,cn_3_exp                 BIGINT    COMMENT 'C链hop3: 回流session曝光数'
+--     ,cn_3_ror                 DOUBLE    COMMENT 'cn_3_uv/cn_2_uv'
+--     ,cn_3_rov                 DOUBLE    COMMENT 'cn_3_uv/cn_2_exp'
+--     ,cn_total_uv              BIGINT    COMMENT 'C链合计UV = cn_1_uv + cn_2_uv + cn_3_uv'
+--     ,cn_total_pv              BIGINT    COMMENT 'C链合计PV = cn_1_pv + cn_2_pv + cn_3_pv'
+--     ,cn_total_exp             BIGINT    COMMENT 'C链合计EXP = cn_1_exp + cn_2_exp + cn_3_exp'
+--     ,cn_total_ror             DOUBLE    COMMENT 'cn_total_uv/bn_uv'
+--     ,cn_total_rov             DOUBLE    COMMENT 'cn_total_uv/bn_exp'
+--     -- C链 depth拆分 hop1
+--     ,c1_1_uv                  BIGINT    COMMENT 'C链d1-hop1: 回流去重人数'
+--     ,c1_1_pv                  BIGINT    COMMENT 'C链d1-hop1: 回流点击次数'
+--     ,c1_1_exp                 BIGINT    COMMENT 'C链d1-hop1: 回流session曝光数'
+--     ,c1_1_ror                 DOUBLE    COMMENT 'c1_1_uv/bn_uv'
+--     ,c1_1_rov                 DOUBLE    COMMENT 'c1_1_uv/bn_exp'
+--     ,c2_1_uv                  BIGINT    COMMENT 'C链d2-hop1: 回流去重人数'
+--     ,c2_1_pv                  BIGINT    COMMENT 'C链d2-hop1: 回流点击次数'
+--     ,c2_1_exp                 BIGINT    COMMENT 'C链d2-hop1: 回流session曝光数'
+--     ,c2_1_ror                 DOUBLE    COMMENT 'c2_1_uv/c1_1_uv'
+--     ,c2_1_rov                 DOUBLE    COMMENT 'c2_1_uv/c1_1_exp'
+--     ,c3_1_uv                  BIGINT    COMMENT 'C链d3-hop1: 回流去重人数'
+--     ,c3_1_pv                  BIGINT    COMMENT 'C链d3-hop1: 回流点击次数'
+--     ,c3_1_exp                 BIGINT    COMMENT 'C链d3-hop1: 回流session曝光数'
+--     ,c3_1_ror                 DOUBLE    COMMENT 'c3_1_uv/c2_1_uv'
+--     ,c3_1_rov                 DOUBLE    COMMENT 'c3_1_uv/c2_1_exp'
+--     -- C链 depth拆分 hop2
+--     ,c1_2_uv                  BIGINT    COMMENT 'C链d1-hop2: 回流去重人数'
+--     ,c1_2_pv                  BIGINT    COMMENT 'C链d1-hop2: 回流点击次数'
+--     ,c1_2_exp                 BIGINT    COMMENT 'C链d1-hop2: 回流session曝光数'
+--     ,c1_2_ror                 DOUBLE    COMMENT 'c1_2_uv/cn_1_uv'
+--     ,c1_2_rov                 DOUBLE    COMMENT 'c1_2_uv/cn_1_exp'
+--     ,c2_2_uv                  BIGINT    COMMENT 'C链d2-hop2: 回流去重人数'
+--     ,c2_2_pv                  BIGINT    COMMENT 'C链d2-hop2: 回流点击次数'
+--     ,c2_2_exp                 BIGINT    COMMENT 'C链d2-hop2: 回流session曝光数'
+--     ,c2_2_ror                 DOUBLE    COMMENT 'c2_2_uv/c1_2_uv'
+--     ,c2_2_rov                 DOUBLE    COMMENT 'c2_2_uv/c1_2_exp'
+--     ,c3_2_uv                  BIGINT    COMMENT 'C链d3-hop2: 回流去重人数'
+--     ,c3_2_pv                  BIGINT    COMMENT 'C链d3-hop2: 回流点击次数'
+--     ,c3_2_exp                 BIGINT    COMMENT 'C链d3-hop2: 回流session曝光数'
+--     ,c3_2_ror                 DOUBLE    COMMENT 'c3_2_uv/c2_2_uv'
+--     ,c3_2_rov                 DOUBLE    COMMENT 'c3_2_uv/c2_2_exp'
+--     -- C链 depth拆分 hop3
+--     ,c1_3_uv                  BIGINT    COMMENT 'C链d1-hop3: 回流去重人数'
+--     ,c1_3_pv                  BIGINT    COMMENT 'C链d1-hop3: 回流点击次数'
+--     ,c1_3_exp                 BIGINT    COMMENT 'C链d1-hop3: 回流session曝光数'
+--     ,c1_3_ror                 DOUBLE    COMMENT 'c1_3_uv/cn_2_uv'
+--     ,c1_3_rov                 DOUBLE    COMMENT 'c1_3_uv/cn_2_exp'
+--     ,c2_3_uv                  BIGINT    COMMENT 'C链d2-hop3: 回流去重人数'
+--     ,c2_3_pv                  BIGINT    COMMENT 'C链d2-hop3: 回流点击次数'
+--     ,c2_3_exp                 BIGINT    COMMENT 'C链d2-hop3: 回流session曝光数'
+--     ,c2_3_ror                 DOUBLE    COMMENT 'c2_3_uv/c1_3_uv'
+--     ,c2_3_rov                 DOUBLE    COMMENT 'c2_3_uv/c1_3_exp'
+--     ,c3_3_uv                  BIGINT    COMMENT 'C链d3-hop3: 回流去重人数'
+--     ,c3_3_pv                  BIGINT    COMMENT 'C链d3-hop3: 回流点击次数'
+--     ,c3_3_exp                 BIGINT    COMMENT 'C链d3-hop3: 回流session曝光数'
+--     ,c3_3_ror                 DOUBLE    COMMENT 'c3_3_uv/c2_3_uv'
+--     ,c3_3_rov                 DOUBLE    COMMENT 'c3_3_uv/c2_3_exp'
+
+--     -- ==================== D链 (session内后续曝光传播) ====================
+--     ,d0                       BIGINT    COMMENT 'D链初始成本: session内后续曝光数'
+--     ,dn_1_uv                  BIGINT    COMMENT 'D链hop1: 回流去重人数'
+--     ,dn_1_pv                  BIGINT    COMMENT 'D链hop1: 回流点击次数'
+--     ,dn_1_exp                 BIGINT    COMMENT 'D链hop1: 回流session曝光数'
+--     ,dn_1_ror                 DOUBLE    COMMENT 'dn_1_uv/exposure_uv'
+--     ,dn_1_rov                 DOUBLE    COMMENT 'dn_1_uv/d0'
+--     ,dn_2_uv                  BIGINT    COMMENT 'D链hop2: 回流去重人数'
+--     ,dn_2_pv                  BIGINT    COMMENT 'D链hop2: 回流点击次数'
+--     ,dn_2_exp                 BIGINT    COMMENT 'D链hop2: 回流session曝光数'
+--     ,dn_2_ror                 DOUBLE    COMMENT 'dn_2_uv/dn_1_uv'
+--     ,dn_2_rov                 DOUBLE    COMMENT 'dn_2_uv/dn_1_exp'
+--     ,dn_3_uv                  BIGINT    COMMENT 'D链hop3: 回流去重人数'
+--     ,dn_3_pv                  BIGINT    COMMENT 'D链hop3: 回流点击次数'
+--     ,dn_3_exp                 BIGINT    COMMENT 'D链hop3: 回流session曝光数'
+--     ,dn_3_ror                 DOUBLE    COMMENT 'dn_3_uv/dn_2_uv'
+--     ,dn_3_rov                 DOUBLE    COMMENT 'dn_3_uv/dn_2_exp'
+--     ,dn_total_uv              BIGINT    COMMENT 'D链合计UV = dn_1_uv + dn_2_uv + dn_3_uv'
+--     ,dn_total_pv              BIGINT    COMMENT 'D链合计PV = dn_1_pv + dn_2_pv + dn_3_pv'
+--     ,dn_total_exp             BIGINT    COMMENT 'D链合计EXP = dn_1_exp + dn_2_exp + dn_3_exp'
+--     ,dn_total_ror             DOUBLE    COMMENT 'dn_total_uv/exposure_uv'
+--     ,dn_total_rov             DOUBLE    COMMENT 'dn_total_uv/d0'
+--     -- D链 depth拆分 hop1
+--     ,d1_1_uv                  BIGINT    COMMENT 'D链d1-hop1: 回流去重人数'
+--     ,d1_1_pv                  BIGINT    COMMENT 'D链d1-hop1: 回流点击次数'
+--     ,d1_1_exp                 BIGINT    COMMENT 'D链d1-hop1: 回流session曝光数'
+--     ,d1_1_ror                 DOUBLE    COMMENT 'd1_1_uv/exposure_uv'
+--     ,d1_1_rov                 DOUBLE    COMMENT 'd1_1_uv/d0'
+--     ,d2_1_uv                  BIGINT    COMMENT 'D链d2-hop1: 回流去重人数'
+--     ,d2_1_pv                  BIGINT    COMMENT 'D链d2-hop1: 回流点击次数'
+--     ,d2_1_exp                 BIGINT    COMMENT 'D链d2-hop1: 回流session曝光数'
+--     ,d2_1_ror                 DOUBLE    COMMENT 'd2_1_uv/d1_1_uv'
+--     ,d2_1_rov                 DOUBLE    COMMENT 'd2_1_uv/d1_1_exp'
+--     ,d3_1_uv                  BIGINT    COMMENT 'D链d3-hop1: 回流去重人数'
+--     ,d3_1_pv                  BIGINT    COMMENT 'D链d3-hop1: 回流点击次数'
+--     ,d3_1_exp                 BIGINT    COMMENT 'D链d3-hop1: 回流session曝光数'
+--     ,d3_1_ror                 DOUBLE    COMMENT 'd3_1_uv/d2_1_uv'
+--     ,d3_1_rov                 DOUBLE    COMMENT 'd3_1_uv/d2_1_exp'
+--     -- D链 depth拆分 hop2
+--     ,d1_2_uv                  BIGINT    COMMENT 'D链d1-hop2: 回流去重人数'
+--     ,d1_2_pv                  BIGINT    COMMENT 'D链d1-hop2: 回流点击次数'
+--     ,d1_2_exp                 BIGINT    COMMENT 'D链d1-hop2: 回流session曝光数'
+--     ,d1_2_ror                 DOUBLE    COMMENT 'd1_2_uv/dn_1_uv'
+--     ,d1_2_rov                 DOUBLE    COMMENT 'd1_2_uv/dn_1_exp'
+--     ,d2_2_uv                  BIGINT    COMMENT 'D链d2-hop2: 回流去重人数'
+--     ,d2_2_pv                  BIGINT    COMMENT 'D链d2-hop2: 回流点击次数'
+--     ,d2_2_exp                 BIGINT    COMMENT 'D链d2-hop2: 回流session曝光数'
+--     ,d2_2_ror                 DOUBLE    COMMENT 'd2_2_uv/d1_2_uv'
+--     ,d2_2_rov                 DOUBLE    COMMENT 'd2_2_uv/d1_2_exp'
+--     ,d3_2_uv                  BIGINT    COMMENT 'D链d3-hop2: 回流去重人数'
+--     ,d3_2_pv                  BIGINT    COMMENT 'D链d3-hop2: 回流点击次数'
+--     ,d3_2_exp                 BIGINT    COMMENT 'D链d3-hop2: 回流session曝光数'
+--     ,d3_2_ror                 DOUBLE    COMMENT 'd3_2_uv/d2_2_uv'
+--     ,d3_2_rov                 DOUBLE    COMMENT 'd3_2_uv/d2_2_exp'
+--     -- D链 depth拆分 hop3
+--     ,d1_3_uv                  BIGINT    COMMENT 'D链d1-hop3: 回流去重人数'
+--     ,d1_3_pv                  BIGINT    COMMENT 'D链d1-hop3: 回流点击次数'
+--     ,d1_3_exp                 BIGINT    COMMENT 'D链d1-hop3: 回流session曝光数'
+--     ,d1_3_ror                 DOUBLE    COMMENT 'd1_3_uv/dn_2_uv'
+--     ,d1_3_rov                 DOUBLE    COMMENT 'd1_3_uv/dn_2_exp'
+--     ,d2_3_uv                  BIGINT    COMMENT 'D链d2-hop3: 回流去重人数'
+--     ,d2_3_pv                  BIGINT    COMMENT 'D链d2-hop3: 回流点击次数'
+--     ,d2_3_exp                 BIGINT    COMMENT 'D链d2-hop3: 回流session曝光数'
+--     ,d2_3_ror                 DOUBLE    COMMENT 'd2_3_uv/d1_3_uv'
+--     ,d2_3_rov                 DOUBLE    COMMENT 'd2_3_uv/d1_3_exp'
+--     ,d3_3_uv                  BIGINT    COMMENT 'D链d3-hop3: 回流去重人数'
+--     ,d3_3_pv                  BIGINT    COMMENT 'D链d3-hop3: 回流点击次数'
+--     ,d3_3_exp                 BIGINT    COMMENT 'D链d3-hop3: 回流session曝光数'
+--     ,d3_3_ror                 DOUBLE    COMMENT 'd3_3_uv/d2_3_uv'
+--     ,d3_3_rov                 DOUBLE    COMMENT 'd3_3_uv/d2_3_exp'
+
+--     -- ==================== 全链路 ====================
+--     ,all_uv                   BIGINT    COMMENT '全链路拉回UV = bn_uv + cn_total_uv + dn_total_uv'
+--     ,all_pv                   BIGINT    COMMENT '全链路拉回PV = bn_pv + cn_total_pv + dn_total_pv'
+--     ,all_exp                  BIGINT    COMMENT '全链路拉回EXP = bn_exp + cn_total_exp + dn_total_exp'
+--     ,all_ror                  DOUBLE    COMMENT 'all_uv/exposure_uv'
+--     ,all_rov                  DOUBLE    COMMENT 'all_uv/exposure_cnt'
+-- )
+-- COMMENT '曝光回流链路CUBE聚合-宽表版 (5维度: 用户分层/小时段/进入品类TOP1/推荐品类TOP10/内容idTOP1)'
+-- ;
+
+
+-- -- =====================================================================
+-- -- INSERT: 从 wide 宽表聚合, CUBE 全维度组合
+-- -- =====================================================================
+
+-- -- SELECT * FROM loghubods.dwd_recsys_alg_exposure_agg_wide_20260209 WHERE dt = '${dt}' ORDER BY exposure_cnt DESC LIMIT 100;
+
+-- INSERT OVERWRITE TABLE loghubods.dwd_recsys_alg_exposure_agg_wide_20260209
+
+WITH
+-- 用户拉活量分层
+t_user_type AS (
+    SELECT  DISTINCT type, openid
+    FROM    loghubods.mid_share_return_people_1year
+    WHERE   dt = TO_CHAR(DATEADD(TO_DATE('${dt}','YYYYMMDD'),-1,'dd'),'YYYYMMDD')
+    AND     type IS NOT NULL
+    AND     type != 'S_ALL'
+    AND     type NOT REGEXP 'R50'
+)
+-- 模型预估分数
+,t_score AS (
+    SELECT  apptype
+            ,videoid
+            ,recommendtraceid
+            ,scoresmap
+            ,sortscore
+    FROM    (
+                SELECT  apptype
+                        ,videoid
+                        ,recommendtraceid
+                        ,scoresmap
+                        ,sortscore
+                        ,ROW_NUMBER() OVER (PARTITION BY apptype,videoid,recommendtraceid) AS rn
+                FROM    loghubods.statistics_log_hour
+                WHERE   dt LIKE '${dt}%'
+                AND     scoresmap IS NOT NULL
+            )
+    WHERE   rn = 1
+)
+-- TOP1 进入内容品类(场): 按回流人数取 TOP1
+,t_top_head_cate AS (
+    SELECT  vt.merge_leve2
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    JOIN    loghubods.video_merge_tag vt ON base.headvideoid = vt.videoid
+    WHERE   base.dt = '${dt}'
+    AND     vt.merge_leve2 IS NOT NULL
+    GROUP BY vt.merge_leve2
+    ORDER BY SUM(CAST(is_return_noself AS BIGINT)) DESC
+    LIMIT   1
+)
+-- TOP10 推荐内容品类(货): 按曝光次数取 TOP10
+,t_top_vid_cate AS (
+    SELECT  vt.merge_leve2
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    JOIN    loghubods.video_merge_tag vt ON base.vid = vt.videoid
+    WHERE   base.dt = '${dt}'
+    AND     vt.merge_leve2 IS NOT NULL
+    GROUP BY vt.merge_leve2
+    ORDER BY COUNT(1) DESC
+    LIMIT   10
+)
+-- TOP1 内容id(货): 每品类按曝光次数取 TOP1 (曝光>10w)
+,t_top_vid AS (
+    SELECT  merge_leve2, vid
+    FROM    (
+                SELECT  vt.merge_leve2
+                        ,base.vid
+                        ,COUNT(1)                                                               AS exp_cnt
+                        ,ROW_NUMBER() OVER (PARTITION BY vt.merge_leve2 ORDER BY COUNT(1) DESC) AS rk
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+                JOIN    loghubods.video_merge_tag vt ON base.vid = vt.videoid
+                WHERE   base.dt = '${dt}'
+                AND     vt.merge_leve2 IS NOT NULL
+                GROUP BY vt.merge_leve2, base.vid
+                HAVING  exp_cnt > 100000
+            )
+    WHERE   rk <= 1
+)
+-- 宽表
+,t_wide AS (
+    SELECT  base.*
+            ,CASE WHEN e.type IS NULL OR e.type = 'R_0'                       THEN 'R0&新用户'
+                  WHEN e.type IN ('R_1','R_2_10','R_10_50')                    THEN 'R1-50'
+                  WHEN e.type IN ('R_50_100','R_100_180','R_180_330')           THEN 'R_180_330'
+                  ELSE e.type
+             END AS user_type
+            ,CASE WHEN th.merge_leve2 IS NOT NULL THEN vt_head.merge_leve2 ELSE '其他' END AS head_merge_leve2
+            ,CASE WHEN tv.merge_leve2 IS NOT NULL THEN vt_vid.merge_leve2  ELSE '其他' END AS vid_merge_leve2
+            ,CASE WHEN ti.vid IS NOT NULL          THEN base.vid           ELSE '其他' END AS vid_id
+            ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
+            ,1.22*POW(CAST(GET_JSON_OBJECT(e1.scoresmap,'$.NorXGBScore') AS DOUBLE),1.15) AS rosn_pred
+            ,CAST(GET_JSON_OBJECT(e1.scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_pred_origin
+            ,e1.sortscore
+            ,CASE
+                WHEN CAST(hh AS INT) BETWEEN 0  AND 3  THEN '00-03'
+                WHEN CAST(hh AS INT) BETWEEN 4  AND 7  THEN '04-07'
+                WHEN CAST(hh AS INT) BETWEEN 8  AND 11 THEN '08-11'
+                WHEN CAST(hh AS INT) BETWEEN 12 AND 15 THEN '12-15'
+                WHEN CAST(hh AS INT) BETWEEN 16 AND 19 THEN '16-19'
+                WHEN CAST(hh AS INT) BETWEEN 20 AND 23 THEN '20-23'
+                ELSE '-'
+             END AS hh_bucket
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20260209 base
+    LEFT JOIN t_user_type e
+    ON      SUBSTRING_INDEX(base.mid,'weixin_openid_',-1) = e.openid
+    LEFT JOIN loghubods.video_merge_tag vt_head
+    ON      base.headvideoid = vt_head.videoid
+    LEFT JOIN loghubods.video_merge_tag vt_vid
+    ON      base.vid = vt_vid.videoid
+    LEFT JOIN t_score e1
+    ON      base.apptype = e1.apptype
+    AND     base.vid = e1.videoid
+    AND     base.recomtraceid = e1.recommendtraceid
+    LEFT JOIN t_top_head_cate th
+    ON      vt_head.merge_leve2 = th.merge_leve2
+    LEFT JOIN t_top_vid_cate tv
+    ON      vt_vid.merge_leve2 = tv.merge_leve2
+    LEFT JOIN t_top_vid ti
+    ON      base.vid = ti.vid
+    WHERE   base.dt = '${dt}'
+)
+
+SELECT
+        -- ==================== 维度列 ====================
+        '${dt}'                                                                                     AS dt
+        ,CASE WHEN GROUPING(user_type) = 1 THEN 'SUM' ELSE NVL(user_type, 'SUM') END              AS user_type
+        ,CASE WHEN GROUPING(hh_bucket) = 1 THEN 'SUM' ELSE NVL(hh_bucket, 'SUM') END              AS hh_bucket
+        ,CASE WHEN GROUPING(head_merge_leve2) = 1 THEN 'SUM' ELSE NVL(head_merge_leve2, 'SUM') END AS head_merge_leve2
+        ,CASE WHEN GROUPING(vid_merge_leve2) = 1 THEN 'SUM' ELSE NVL(vid_merge_leve2, 'SUM') END   AS vid_merge_leve2
+        ,CASE WHEN GROUPING(vid_id) = 1 THEN 'SUM' ELSE NVL(vid_id, 'SUM') END                     AS vid_id
+
+        -- ==================== 基础流量 ====================
+        ,COUNT(1)                                                                       AS exposure_cnt
+        ,COUNT(DISTINCT mid)                                                            AS exposure_uv
+        ,COUNT(DISTINCT vid)                                                            AS vid_cnt
+        ,ROUND(COUNT(1) / COUNT(DISTINCT mid), 4)                                      AS exposure_per_user
+
+        -- ==================== 分享 & 回流漏斗 ====================
+        ,SUM(CAST(is_share AS BIGINT))                                                  AS share_exposure_cnt
+        ,SUM(CAST(share_cnt AS BIGINT))                                                 AS share_cnt
+        ,SUM(CAST(is_return_n AS BIGINT))                                               AS return_exposure_cnt
+        ,SUM(CAST(is_return_noself AS BIGINT))                                          AS return_exposure_cnt_noself
+        ,SUM(CAST(return_n_uv AS BIGINT))                                               AS return_uv
+        ,SUM(CAST(return_n_uv_noself AS BIGINT))                                        AS return_uv_noself
+        ,ROUND(COALESCE(SUM(CAST(is_share AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                                            AS share_rate
+        ,ROUND(COALESCE(SUM(CAST(is_return_n AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                                         AS return_rate
+        ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                                    AS return_rate_noself
+        ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(CAST(SUM(CAST(is_share AS BIGINT)) AS DOUBLE), 0), 0), 6)               AS share_return_rate
+
+        -- ==================== 模型预估: STR (曝光→非自身回流概率) ====================
+        ,ROUND(COALESCE(SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6)  AS str_real
+        ,ROUND(COALESCE(SUM(str_pred) / NULLIF(COUNT(1), 0), 0), 6)                          AS str_pred
+        ,ROUND(
+            (SUM(CAST(is_return_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
+            / NULLIF(SUM(str_pred) / NULLIF(COUNT(1), 0), 0)
+        , 4)                                                                                   AS str_copc
+        ,ROUND(AVG(ABS(str_pred - CAST(is_return_noself AS BIGINT))), 6)                      AS str_mae
+        ,ROUND(VARIANCE(str_pred - CAST(is_return_noself AS BIGINT)), 6)                      AS str_var
+
+        -- ==================== 模型预估: ROSN (条件回流UV, 非自身) ====================
+        ,ROUND(COALESCE(
+            SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0)
+        , 0), 6)                                                                               AS rosn_real
+        ,ROUND(COALESCE(SUM(CASE WHEN CAST(is_return_noself AS BIGINT) = 1 THEN rosn_pred END) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0), 6)  AS rosn_pred
+        ,ROUND(
+            (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0))
+            / NULLIF(SUM(CASE WHEN CAST(is_return_noself AS BIGINT) = 1 THEN rosn_pred END) / NULLIF(SUM(CAST(is_return_noself AS BIGINT)), 0), 0)
+        , 4)                                                                                   AS rosn_copc
+        ,ROUND(AVG(rosn_pred_origin), 6)                                                      AS rosn_pred_origin
+        ,ROUND(AVG(
+            CASE WHEN CAST(is_return_noself AS BIGINT) = 1
+                 THEN ABS(rosn_pred - CAST(return_n_uv_noself AS BIGINT))
+            END
+        ), 6)                                                                                  AS rosn_mae
+        ,ROUND(VARIANCE(
+            CASE WHEN CAST(is_return_noself AS BIGINT) = 1
+                 THEN rosn_pred - CAST(return_n_uv_noself AS BIGINT)
+            END
+        ), 6)                                                                                  AS rosn_var
+
+        -- ==================== 模型预估: ROVN (STR×ROSN) ====================
+        ,ROUND(COALESCE(SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0), 0), 6)  AS rovn_real
+        ,ROUND(AVG(str_pred * rosn_pred), 6)                                                    AS rovn_pred
+        ,ROUND(
+            (SUM(CAST(return_n_uv_noself AS BIGINT)) / NULLIF(COUNT(1), 0))
+            / NULLIF(AVG(str_pred * rosn_pred), 0)
+        , 4)                                                                                     AS rovn_copc
+        ,ROUND(AVG(ABS(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT))), 6)            AS rovn_mae
+        ,ROUND(VARIANCE(str_pred * rosn_pred - CAST(return_n_uv_noself AS BIGINT)), 6)        AS rovn_var
+        ,ROUND(AVG(CAST(sortscore AS DOUBLE)), 6)                                               AS sortscore_avg
+
+        -- ==================== B链 ====================
+        ,SUM(CAST(bn_uv AS BIGINT))                                                     AS bn_uv
+        ,SUM(CAST(bn_pv AS BIGINT))                                                     AS bn_pv
+        ,SUM(CAST(bn_exp AS BIGINT))                                                    AS bn_exp
+        ,ROUND(COALESCE(SUM(CAST(bn_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)               AS bn_ror
+        ,ROUND(COALESCE(SUM(CAST(bn_uv AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                          AS bn_rov
+        ,SUM(CAST(b1_uv AS BIGINT))                                                     AS b1_uv
+        ,SUM(CAST(b1_pv AS BIGINT))                                                     AS b1_pv
+        ,SUM(CAST(b1_exp AS BIGINT))                                                    AS b1_exp
+        ,ROUND(COALESCE(SUM(CAST(b1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)               AS b1_ror
+        ,ROUND(COALESCE(SUM(CAST(b1_uv AS BIGINT)) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0), 0), 6)                          AS b1_rov
+        ,SUM(CAST(b2_uv AS BIGINT))                                                     AS b2_uv
+        ,SUM(CAST(b2_pv AS BIGINT))                                                     AS b2_pv
+        ,SUM(CAST(b2_exp AS BIGINT))                                                    AS b2_exp
+        ,ROUND(COALESCE(SUM(CAST(b2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS b2_ror
+        ,ROUND(COALESCE(SUM(CAST(b2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)       AS b2_rov
+        ,SUM(CAST(b3_uv AS BIGINT))                                                     AS b3_uv
+        ,SUM(CAST(b3_pv AS BIGINT))                                                     AS b3_pv
+        ,SUM(CAST(b3_exp AS BIGINT))                                                    AS b3_exp
+        ,ROUND(COALESCE(SUM(CAST(b3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS b3_ror
+        ,ROUND(COALESCE(SUM(CAST(b3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(b2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)       AS b3_rov
+
+        -- ==================== C链 (全量depth, 按hop) ====================
+        ,SUM(CAST(cn_1_uv AS BIGINT))                                                   AS cn_1_uv
+        ,SUM(CAST(cn_1_pv AS BIGINT))                                                   AS cn_1_pv
+        ,SUM(CAST(cn_1_exp AS BIGINT))                                                  AS cn_1_exp
+        ,ROUND(COALESCE(SUM(CAST(cn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)      AS cn_1_ror
+        ,ROUND(COALESCE(SUM(CAST(cn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)     AS cn_1_rov
+        ,SUM(CAST(cn_2_uv AS BIGINT))                                                   AS cn_2_uv
+        ,SUM(CAST(cn_2_pv AS BIGINT))                                                   AS cn_2_pv
+        ,SUM(CAST(cn_2_exp AS BIGINT))                                                  AS cn_2_exp
+        ,ROUND(COALESCE(SUM(CAST(cn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS cn_2_ror
+        ,ROUND(COALESCE(SUM(CAST(cn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS cn_2_rov
+        ,SUM(CAST(cn_3_uv AS BIGINT))                                                   AS cn_3_uv
+        ,SUM(CAST(cn_3_pv AS BIGINT))                                                   AS cn_3_pv
+        ,SUM(CAST(cn_3_exp AS BIGINT))                                                  AS cn_3_exp
+        ,ROUND(COALESCE(SUM(CAST(cn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS cn_3_ror
+        ,ROUND(COALESCE(SUM(CAST(cn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS cn_3_rov
+        ,SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))    AS cn_total_uv
+        ,SUM(CAST(cn_1_pv AS BIGINT)) + SUM(CAST(cn_2_pv AS BIGINT)) + SUM(CAST(cn_3_pv AS BIGINT))    AS cn_total_pv
+        ,SUM(CAST(cn_1_exp AS BIGINT)) + SUM(CAST(cn_2_exp AS BIGINT)) + SUM(CAST(cn_3_exp AS BIGINT))  AS cn_total_exp
+        ,ROUND(COALESCE(
+            (SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)))
+            / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS cn_total_ror
+        ,ROUND(COALESCE(
+            (SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT)))
+            / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS cn_total_rov
+
+        -- ==================== C链 (按depth拆分) ====================
+        -- hop1
+        ,SUM(CAST(c1_1_uv AS BIGINT))                                                     AS c1_1_uv
+        ,SUM(CAST(c1_1_pv AS BIGINT))                                                     AS c1_1_pv
+        ,SUM(CAST(c1_1_exp AS BIGINT))                                                    AS c1_1_exp
+        ,ROUND(COALESCE(SUM(CAST(c1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)      AS c1_1_ror
+        ,ROUND(COALESCE(SUM(CAST(c1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(bn_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)     AS c1_1_rov
+        ,SUM(CAST(c2_1_uv AS BIGINT))                                                     AS c2_1_uv
+        ,SUM(CAST(c2_1_pv AS BIGINT))                                                     AS c2_1_pv
+        ,SUM(CAST(c2_1_exp AS BIGINT))                                                    AS c2_1_exp
+        ,ROUND(COALESCE(SUM(CAST(c2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c2_1_ror
+        ,ROUND(COALESCE(SUM(CAST(c2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c2_1_rov
+        ,SUM(CAST(c3_1_uv AS BIGINT))                                                     AS c3_1_uv
+        ,SUM(CAST(c3_1_pv AS BIGINT))                                                     AS c3_1_pv
+        ,SUM(CAST(c3_1_exp AS BIGINT))                                                    AS c3_1_exp
+        ,ROUND(COALESCE(SUM(CAST(c3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c3_1_ror
+        ,ROUND(COALESCE(SUM(CAST(c3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c3_1_rov
+        -- hop2
+        ,SUM(CAST(c1_2_uv AS BIGINT))                                                     AS c1_2_uv
+        ,SUM(CAST(c1_2_pv AS BIGINT))                                                     AS c1_2_pv
+        ,SUM(CAST(c1_2_exp AS BIGINT))                                                    AS c1_2_exp
+        ,ROUND(COALESCE(SUM(CAST(c1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c1_2_ror
+        ,ROUND(COALESCE(SUM(CAST(c1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c1_2_rov
+        ,SUM(CAST(c2_2_uv AS BIGINT))                                                     AS c2_2_uv
+        ,SUM(CAST(c2_2_pv AS BIGINT))                                                     AS c2_2_pv
+        ,SUM(CAST(c2_2_exp AS BIGINT))                                                    AS c2_2_exp
+        ,ROUND(COALESCE(SUM(CAST(c2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c2_2_ror
+        ,ROUND(COALESCE(SUM(CAST(c2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c2_2_rov
+        ,SUM(CAST(c3_2_uv AS BIGINT))                                                     AS c3_2_uv
+        ,SUM(CAST(c3_2_pv AS BIGINT))                                                     AS c3_2_pv
+        ,SUM(CAST(c3_2_exp AS BIGINT))                                                    AS c3_2_exp
+        ,ROUND(COALESCE(SUM(CAST(c3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c3_2_ror
+        ,ROUND(COALESCE(SUM(CAST(c3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c3_2_rov
+        -- hop3
+        ,SUM(CAST(c1_3_uv AS BIGINT))                                                     AS c1_3_uv
+        ,SUM(CAST(c1_3_pv AS BIGINT))                                                     AS c1_3_pv
+        ,SUM(CAST(c1_3_exp AS BIGINT))                                                    AS c1_3_exp
+        ,ROUND(COALESCE(SUM(CAST(c1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c1_3_ror
+        ,ROUND(COALESCE(SUM(CAST(c1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(cn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c1_3_rov
+        ,SUM(CAST(c2_3_uv AS BIGINT))                                                     AS c2_3_uv
+        ,SUM(CAST(c2_3_pv AS BIGINT))                                                     AS c2_3_pv
+        ,SUM(CAST(c2_3_exp AS BIGINT))                                                    AS c2_3_exp
+        ,ROUND(COALESCE(SUM(CAST(c2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c2_3_ror
+        ,ROUND(COALESCE(SUM(CAST(c2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c1_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c2_3_rov
+        ,SUM(CAST(c3_3_uv AS BIGINT))                                                     AS c3_3_uv
+        ,SUM(CAST(c3_3_pv AS BIGINT))                                                     AS c3_3_pv
+        ,SUM(CAST(c3_3_exp AS BIGINT))                                                    AS c3_3_exp
+        ,ROUND(COALESCE(SUM(CAST(c3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)    AS c3_3_ror
+        ,ROUND(COALESCE(SUM(CAST(c3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(c2_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS c3_3_rov
+
+        -- ==================== D链 (全量depth, 按hop) ====================
+        ,SUM(CAST(d0 AS BIGINT))                                                        AS d0
+        ,SUM(CAST(dn_1_uv AS BIGINT))                                                   AS dn_1_uv
+        ,SUM(CAST(dn_1_pv AS BIGINT))                                                   AS dn_1_pv
+        ,SUM(CAST(dn_1_exp AS BIGINT))                                                  AS dn_1_exp
+        ,ROUND(COALESCE(SUM(CAST(dn_1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)            AS dn_1_ror
+        ,ROUND(COALESCE(SUM(CAST(dn_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS dn_1_rov
+        ,SUM(CAST(dn_2_uv AS BIGINT))                                                   AS dn_2_uv
+        ,SUM(CAST(dn_2_pv AS BIGINT))                                                   AS dn_2_pv
+        ,SUM(CAST(dn_2_exp AS BIGINT))                                                  AS dn_2_exp
+        ,ROUND(COALESCE(SUM(CAST(dn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS dn_2_ror
+        ,ROUND(COALESCE(SUM(CAST(dn_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS dn_2_rov
+        ,SUM(CAST(dn_3_uv AS BIGINT))                                                   AS dn_3_uv
+        ,SUM(CAST(dn_3_pv AS BIGINT))                                                   AS dn_3_pv
+        ,SUM(CAST(dn_3_exp AS BIGINT))                                                  AS dn_3_exp
+        ,ROUND(COALESCE(SUM(CAST(dn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS dn_3_ror
+        ,ROUND(COALESCE(SUM(CAST(dn_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS dn_3_rov
+        ,SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))    AS dn_total_uv
+        ,SUM(CAST(dn_1_pv AS BIGINT)) + SUM(CAST(dn_2_pv AS BIGINT)) + SUM(CAST(dn_3_pv AS BIGINT))    AS dn_total_pv
+        ,SUM(CAST(dn_1_exp AS BIGINT)) + SUM(CAST(dn_2_exp AS BIGINT)) + SUM(CAST(dn_3_exp AS BIGINT))  AS dn_total_exp
+        ,ROUND(COALESCE(
+            (SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)))
+            / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS dn_total_ror
+        ,ROUND(COALESCE(
+            (SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT)))
+            / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0)
+        , 0), 6)                                                                                                            AS dn_total_rov
+
+        -- ==================== D链 (按depth拆分) ====================
+        -- hop1
+        ,SUM(CAST(d1_1_uv AS BIGINT))                                                     AS d1_1_uv
+        ,SUM(CAST(d1_1_pv AS BIGINT))                                                     AS d1_1_pv
+        ,SUM(CAST(d1_1_exp AS BIGINT))                                                    AS d1_1_exp
+        ,ROUND(COALESCE(SUM(CAST(d1_1_uv AS BIGINT)) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0), 0), 6)            AS d1_1_ror
+        ,ROUND(COALESCE(SUM(CAST(d1_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d0 AS BIGINT)) AS DOUBLE), 0), 0), 6)        AS d1_1_rov
+        ,SUM(CAST(d2_1_uv AS BIGINT))                                                     AS d2_1_uv
+        ,SUM(CAST(d2_1_pv AS BIGINT))                                                     AS d2_1_pv
+        ,SUM(CAST(d2_1_exp AS BIGINT))                                                    AS d2_1_exp
+        ,ROUND(COALESCE(SUM(CAST(d2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d2_1_ror
+        ,ROUND(COALESCE(SUM(CAST(d2_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d2_1_rov
+        ,SUM(CAST(d3_1_uv AS BIGINT))                                                     AS d3_1_uv
+        ,SUM(CAST(d3_1_pv AS BIGINT))                                                     AS d3_1_pv
+        ,SUM(CAST(d3_1_exp AS BIGINT))                                                    AS d3_1_exp
+        ,ROUND(COALESCE(SUM(CAST(d3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d3_1_ror
+        ,ROUND(COALESCE(SUM(CAST(d3_1_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d3_1_rov
+        -- hop2
+        ,SUM(CAST(d1_2_uv AS BIGINT))                                                     AS d1_2_uv
+        ,SUM(CAST(d1_2_pv AS BIGINT))                                                     AS d1_2_pv
+        ,SUM(CAST(d1_2_exp AS BIGINT))                                                    AS d1_2_exp
+        ,ROUND(COALESCE(SUM(CAST(d1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d1_2_ror
+        ,ROUND(COALESCE(SUM(CAST(d1_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_1_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d1_2_rov
+        ,SUM(CAST(d2_2_uv AS BIGINT))                                                     AS d2_2_uv
+        ,SUM(CAST(d2_2_pv AS BIGINT))                                                     AS d2_2_pv
+        ,SUM(CAST(d2_2_exp AS BIGINT))                                                    AS d2_2_exp
+        ,ROUND(COALESCE(SUM(CAST(d2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d2_2_ror
+        ,ROUND(COALESCE(SUM(CAST(d2_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d2_2_rov
+        ,SUM(CAST(d3_2_uv AS BIGINT))                                                     AS d3_2_uv
+        ,SUM(CAST(d3_2_pv AS BIGINT))                                                     AS d3_2_pv
+        ,SUM(CAST(d3_2_exp AS BIGINT))                                                    AS d3_2_exp
+        ,ROUND(COALESCE(SUM(CAST(d3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d3_2_ror
+        ,ROUND(COALESCE(SUM(CAST(d3_2_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d3_2_rov
+        -- hop3
+        ,SUM(CAST(d1_3_uv AS BIGINT))                                                     AS d1_3_uv
+        ,SUM(CAST(d1_3_pv AS BIGINT))                                                     AS d1_3_pv
+        ,SUM(CAST(d1_3_exp AS BIGINT))                                                    AS d1_3_exp
+        ,ROUND(COALESCE(SUM(CAST(d1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d1_3_ror
+        ,ROUND(COALESCE(SUM(CAST(d1_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(dn_2_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d1_3_rov
+        ,SUM(CAST(d2_3_uv AS BIGINT))                                                     AS d2_3_uv
+        ,SUM(CAST(d2_3_pv AS BIGINT))                                                     AS d2_3_pv
+        ,SUM(CAST(d2_3_exp AS BIGINT))                                                    AS d2_3_exp
+        ,ROUND(COALESCE(SUM(CAST(d2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d2_3_ror
+        ,ROUND(COALESCE(SUM(CAST(d2_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d1_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d2_3_rov
+        ,SUM(CAST(d3_3_uv AS BIGINT))                                                     AS d3_3_uv
+        ,SUM(CAST(d3_3_pv AS BIGINT))                                                     AS d3_3_pv
+        ,SUM(CAST(d3_3_exp AS BIGINT))                                                    AS d3_3_exp
+        ,ROUND(COALESCE(SUM(CAST(d3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_3_uv AS BIGINT)) AS DOUBLE), 0), 0), 6)   AS d3_3_ror
+        ,ROUND(COALESCE(SUM(CAST(d3_3_uv AS BIGINT)) / NULLIF(CAST(SUM(CAST(d2_3_exp AS BIGINT)) AS DOUBLE), 0), 0), 6)  AS d3_3_rov
+
+        -- ==================== 全链路 ====================
+        ,SUM(CAST(bn_uv AS BIGINT))
+            + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+            + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+                                                                                        AS all_uv
+        ,SUM(CAST(bn_pv AS BIGINT))
+            + SUM(CAST(cn_1_pv AS BIGINT)) + SUM(CAST(cn_2_pv AS BIGINT)) + SUM(CAST(cn_3_pv AS BIGINT))
+            + SUM(CAST(dn_1_pv AS BIGINT)) + SUM(CAST(dn_2_pv AS BIGINT)) + SUM(CAST(dn_3_pv AS BIGINT))
+                                                                                        AS all_pv
+        ,SUM(CAST(bn_exp AS BIGINT))
+            + SUM(CAST(cn_1_exp AS BIGINT)) + SUM(CAST(cn_2_exp AS BIGINT)) + SUM(CAST(cn_3_exp AS BIGINT))
+            + SUM(CAST(dn_1_exp AS BIGINT)) + SUM(CAST(dn_2_exp AS BIGINT)) + SUM(CAST(dn_3_exp AS BIGINT))
+                                                                                        AS all_exp
+        ,ROUND(COALESCE(
+            (   SUM(CAST(bn_uv AS BIGINT))
+              + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+              + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+            ) / NULLIF(CAST(COUNT(DISTINCT mid) AS DOUBLE), 0)
+        , 0), 6)                                                                        AS all_ror
+        ,ROUND(COALESCE(
+            (   SUM(CAST(bn_uv AS BIGINT))
+              + SUM(CAST(cn_1_uv AS BIGINT)) + SUM(CAST(cn_2_uv AS BIGINT)) + SUM(CAST(cn_3_uv AS BIGINT))
+              + SUM(CAST(dn_1_uv AS BIGINT)) + SUM(CAST(dn_2_uv AS BIGINT)) + SUM(CAST(dn_3_uv AS BIGINT))
+            ) / NULLIF(CAST(COUNT(1) AS DOUBLE), 0)
+        , 0), 6)                                                                        AS all_rov
+
+
+-- =====================================================================
+-- FROM + GROUP BY CUBE
+-- =====================================================================
+
+FROM    t_wide
+
+GROUP BY CUBE(
+            user_type
+            ,hh_bucket
+            ,head_merge_leve2
+            ,vid_merge_leve2
+            ,vid_id
+        )
+
+ORDER BY exposure_cnt DESC
+;

+ 293 - 0
tasks/指标分析/02_实验组xTop20视频_vs对照组_误差分析_v2_hh.sql

@@ -0,0 +1,293 @@
+-- 预处理:解析 scoresmap + page 分类
+-- v4: 新增 top20 vid 分组 + GROUPING SETS + 曝光占比
+-- v5: 新增相对对照组的变化率字段
+-- v6: 新增 rosn_ori(未校准原始分)对比校准后的 rosn_pred
+-- v7: 新增 rosn log 空间标准差(衡量相对倍数误差离散程度)
+WITH t_raw AS
+(
+    SELECT  *
+            ,REPLACE(GET_JSON_OBJECT(extend_alg,'$.scoresMap'),"\\","") AS scoresmap
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page_type
+    FROM    loghubods.dwd_recsys_alg_sample_all_20250212
+    WHERE   dt = '${dt}'
+    AND     apptype IN ("0","4")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+    AND     extend_alg IS NOT NULL
+    AND     GET_JSON_OBJECT(extend_alg,'$.scoresMap') IS NOT NULL
+)
+-- 过滤:只保留推荐页面
+,t_filtered AS
+(
+    SELECT  *
+    FROM    t_raw
+    WHERE   page_type = "推荐"
+)
+-- 特征提取与维度映射
+,t_base AS
+(
+    SELECT  dt
+            ,apptype
+            ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "ab4-5"
+                    ELSE "其他"
+            END AS abcode
+            ,page_type AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+            ,scoresmap
+            ,CAST(GET_JSON_OBJECT(scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
+            ,1.22 * pow(CAST(GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') AS DOUBLE), 1.15) AS rosn_pred
+            ,CAST(GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') AS DOUBLE) AS rosn_ori
+            ,CAST(GET_JSON_OBJECT(scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_stat
+            ,GET_JSON_OBJECT(v1_feature,'$.title') AS vid_title
+    FROM    t_filtered
+)
+,t_valid AS
+(
+    SELECT  *
+    FROM    t_base
+    WHERE   str_pred IS NOT NULL
+    AND     rosn_pred IS NOT NULL
+)
+-- 计算每个 abcode 下曝光量 top20 的 vid
+,t_vid_rank AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,vid
+            ,COUNT(1) AS vid_exp_cnt
+            ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode ORDER BY COUNT(1) DESC) AS vid_rank
+    FROM    t_valid
+    GROUP BY dt, apptype, abcode, vid
+)
+,t_top5_vid AS
+(
+    SELECT  dt, apptype, abcode, vid, vid_rank
+    FROM    t_vid_rank
+    WHERE   vid_rank <= 20
+)
+-- 标记 top20 vid
+,t_with_top5 AS
+(
+    SELECT  a.*
+            ,CASE WHEN b.vid IS NOT NULL THEN a.vid ELSE NULL END AS top5_vid
+            ,CASE WHEN b.vid IS NOT NULL THEN a.vid_title ELSE NULL END AS top5_vid_title
+            ,b.vid_rank AS top5_vid_rank
+    FROM    t_valid a
+    LEFT JOIN t_top5_vid b
+    ON      a.dt = b.dt
+    AND     a.apptype = b.apptype
+    AND     a.abcode = b.abcode
+    AND     a.vid = b.vid
+)
+-- 先聚合
+,t_agg AS
+(
+    SELECT  dt
+            ,COALESCE(apptype, 'sum') AS apptype
+            ,COALESCE(abcode, 'sum') AS abcode
+            ,COALESCE(CAST(top5_vid AS STRING), 'all') AS vid
+            ,CASE WHEN GROUPING(top5_vid) = 1 THEN NULL ELSE MAX(top5_vid_title) END AS vid_title
+            ,CASE WHEN GROUPING(top5_vid) = 1 THEN NULL ELSE MAX(top5_vid_rank) END AS vid_rank
+            -- COPC
+            ,round((SUM(is_return_noself) / COUNT(1)) / NULLIF(SUM(str_pred) / COUNT(1), 0), 4) AS str_copc
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN return_n_uv_noself END) / NULLIF(AVG(CASE WHEN is_return_noself = 1 THEN rosn_pred END), 0), 4) AS rosn_copc
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN return_n_uv_noself END) / NULLIF(AVG(CASE WHEN is_return_noself = 1 THEN rosn_ori END), 0), 4) AS rosn_ori_copc
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN return_n_uv_noself END) / NULLIF(AVG(CASE WHEN is_return_noself = 1 THEN rosn_stat END), 0), 4) AS rosn_stat_copc
+            ,round((SUM(return_n_uv_noself) / COUNT(1)) / NULLIF(AVG(str_pred * rosn_pred), 0), 4) AS rovn_copc
+            ,round((SUM(return_n_uv_noself) / COUNT(1)) / NULLIF(AVG(str_pred * rosn_ori), 0), 4) AS rovn_ori_copc
+            ,round((SUM(return_n_uv_noself) / COUNT(1)) / NULLIF(AVG(str_pred * rosn_stat), 0), 4) AS rovn_stat_copc
+            -- 模型预测与真实值
+            ,round(COALESCE(SUM(is_return_noself) / COUNT(1),0),6) AS str_real
+            ,round(COALESCE(SUM(str_pred) / COUNT(1),0),6) AS str_pred
+            ,round(COALESCE(SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0),0),6) AS rosn_real
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN rosn_pred END),6) AS rosn_pred
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN rosn_ori END),6) AS rosn_ori
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN rosn_stat END),6) AS rosn_stat
+            ,round(SUM(return_n_uv_noself) / COUNT(1), 6) AS rovn_real
+            ,round(AVG(str_pred * rosn_pred), 6) AS rovn_pred
+            ,round(AVG(str_pred * rosn_ori), 6) AS rovn_ori
+            ,round(AVG(str_pred * rosn_stat), 6) AS rovn_stat
+            -- 误差:str(无条件)
+            ,round(AVG(ABS(str_pred - is_return_noself)),6) AS str_mae
+            -- 误差:rosn(条件于 is_return_noself=1)
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN ABS(rosn_pred - return_n_uv_noself) END),6) AS rosn_pred_mae
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN ABS(rosn_ori - return_n_uv_noself) END),6) AS rosn_ori_mae
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN ABS(rosn_stat - return_n_uv_noself) END),6) AS rosn_stat_mae
+            -- 误差:rovn(无条件)
+            ,round(AVG(ABS(str_pred * rosn_pred - return_n_uv_noself)),6) AS rovn_pred_mae
+            ,round(AVG(ABS(str_pred * rosn_ori - return_n_uv_noself)),6) AS rovn_ori_mae
+            ,round(AVG(ABS(str_pred * rosn_stat - return_n_uv_noself)),6) AS rovn_stat_mae
+            -- MAPE:相对误差(仅 rosn,回流样本 actual>0)
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN ABS(rosn_pred - return_n_uv_noself) / return_n_uv_noself END),6) AS rosn_pred_mape
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN ABS(rosn_ori - return_n_uv_noself) / return_n_uv_noself END),6) AS rosn_ori_mape
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN ABS(rosn_stat - return_n_uv_noself) / return_n_uv_noself END),6) AS rosn_stat_mape
+            -- 方差:误差离散程度(方差大=忽高忽低,方差小=稳定偏移)
+            ,round(VARIANCE(str_pred - is_return_noself),6) AS str_var
+            ,round(VARIANCE(CASE WHEN is_return_noself = 1 THEN rosn_pred - return_n_uv_noself END),6) AS rosn_pred_var
+            ,round(VARIANCE(CASE WHEN is_return_noself = 1 THEN rosn_ori - return_n_uv_noself END),6) AS rosn_ori_var
+            ,round(VARIANCE(CASE WHEN is_return_noself = 1 THEN rosn_stat - return_n_uv_noself END),6) AS rosn_stat_var
+            ,round(VARIANCE(str_pred * rosn_pred - return_n_uv_noself),6) AS rovn_pred_var
+            ,round(VARIANCE(str_pred * rosn_ori - return_n_uv_noself),6) AS rovn_ori_var
+            ,round(VARIANCE(str_pred * rosn_stat - return_n_uv_noself),6) AS rovn_stat_var
+            -- log 空间误差:LN(actual) - LN(pred),用于校准分析
+            ,round(VARIANCE(CASE WHEN is_return_noself = 1 AND rosn_pred > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_pred) END),6) AS rosn_pred_log_var
+            ,round(VARIANCE(CASE WHEN is_return_noself = 1 AND rosn_ori > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_ori) END),6) AS rosn_ori_log_var
+            ,round(VARIANCE(CASE WHEN is_return_noself = 1 AND rosn_stat > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_stat) END),6) AS rosn_stat_log_var
+            ,round(STDDEV(CASE WHEN is_return_noself = 1 AND rosn_pred > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_pred) END),6) AS rosn_pred_log_std
+            ,round(STDDEV(CASE WHEN is_return_noself = 1 AND rosn_ori > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_ori) END),6) AS rosn_ori_log_std
+            ,round(STDDEV(CASE WHEN is_return_noself = 1 AND rosn_stat > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_stat) END),6) AS rosn_stat_log_std
+            -- 理论校准系数:EXP(log_variance / 2)
+            ,round(EXP(VARIANCE(CASE WHEN is_return_noself = 1 AND rosn_pred > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_pred) END) / 2),6) AS rosn_pred_correction
+            ,round(EXP(VARIANCE(CASE WHEN is_return_noself = 1 AND rosn_ori > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_ori) END) / 2),6) AS rosn_ori_correction
+            ,round(EXP(VARIANCE(CASE WHEN is_return_noself = 1 AND rosn_stat > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_stat) END) / 2),6) AS rosn_stat_correction
+            -- 样本数
+            ,COUNT(1) AS str_samples
+            ,SUM(CASE WHEN is_return_noself = 1 THEN 1 ELSE 0 END) AS rosn_samples
+            ,COUNT(1) AS rovn_samples
+            -- 业务指标
+            ,round(COALESCE(COUNT(1) / COUNT(DISTINCT mid),0),2) AS exp_per_dau
+            ,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
+            ,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
+            ,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
+            ,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
+            ,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
+            ,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
+            ,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
+            ,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+    FROM    t_with_top5
+    GROUP BY dt, apptype, abcode, top5_vid
+    GROUPING SETS (
+        (dt, apptype, abcode),
+        (dt, apptype, abcode, top5_vid)
+    )
+    HAVING  top5_vid IS NOT NULL OR GROUPING(top5_vid) = 1
+)
+-- 新增:获取对照组基准值并计算变化率
+,t_with_baseline AS
+(
+    SELECT  *
+            -- 计算曝光占比
+            ,round(exp * 1.0 / MAX(CASE WHEN vid = 'all' THEN exp END) OVER (PARTITION BY dt, apptype, abcode), 4) AS exp_pct
+            -- 对照组基准值(业务指标)
+            ,MAX(CASE WHEN abcode = '对照组' THEN exp_per_dau END) OVER (PARTITION BY dt, apptype, vid) AS exp_per_dau_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN str_one END) OVER (PARTITION BY dt, apptype, vid) AS str_one_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN ros_one END) OVER (PARTITION BY dt, apptype, vid) AS ros_one_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN str END) OVER (PARTITION BY dt, apptype, vid) AS str_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN ros END) OVER (PARTITION BY dt, apptype, vid) AS ros_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN str_plus END) OVER (PARTITION BY dt, apptype, vid) AS str_plus_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN ros_minus END) OVER (PARTITION BY dt, apptype, vid) AS ros_minus_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn END) OVER (PARTITION BY dt, apptype, vid) AS rovn_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN vovh24 END) OVER (PARTITION BY dt, apptype, vid) AS vovh24_base
+            -- 对照组基准值(COPC 指标)
+            ,MAX(CASE WHEN abcode = '对照组' THEN str_copc END) OVER (PARTITION BY dt, apptype, vid) AS str_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rosn_copc END) OVER (PARTITION BY dt, apptype, vid) AS rosn_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rosn_ori_copc END) OVER (PARTITION BY dt, apptype, vid) AS rosn_ori_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rosn_stat_copc END) OVER (PARTITION BY dt, apptype, vid) AS rosn_stat_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn_copc END) OVER (PARTITION BY dt, apptype, vid) AS rovn_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn_ori_copc END) OVER (PARTITION BY dt, apptype, vid) AS rovn_ori_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn_stat_copc END) OVER (PARTITION BY dt, apptype, vid) AS rovn_stat_copc_base
+            -- 对照组基准值(真实值)
+            ,MAX(CASE WHEN abcode = '对照组' THEN str_real END) OVER (PARTITION BY dt, apptype, vid) AS str_real_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rosn_real END) OVER (PARTITION BY dt, apptype, vid) AS rosn_real_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn_real END) OVER (PARTITION BY dt, apptype, vid) AS rovn_real_base
+            -- 对照组基准值(计数指标)
+            ,MAX(CASE WHEN abcode = '对照组' THEN dau END) OVER (PARTITION BY dt, apptype, vid) AS dau_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN exp END) OVER (PARTITION BY dt, apptype, vid) AS exp_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN is_share END) OVER (PARTITION BY dt, apptype, vid) AS is_share_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN share_cnt END) OVER (PARTITION BY dt, apptype, vid) AS share_cnt_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN is_return_1 END) OVER (PARTITION BY dt, apptype, vid) AS is_return_1_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN return_n_uv END) OVER (PARTITION BY dt, apptype, vid) AS return_n_uv_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN viewh24 END) OVER (PARTITION BY dt, apptype, vid) AS viewh24_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN return_n_uv_noself END) OVER (PARTITION BY dt, apptype, vid) AS return_n_uv_noself_base
+    FROM    t_agg
+)
+-- 最终输出:原有字段 + 变化率
+SELECT  dt
+        ,apptype
+        ,abcode
+        ,vid
+        ,vid_title
+        ,vid_rank
+        ,exp_pct
+        ,round((dau - dau_base) / NULLIF(dau_base, 0), 4) AS dau_chg
+        ,round((exp - exp_base) / NULLIF(exp_base, 0), 4) AS exp_chg
+        -- str(全量,预测是否回流)
+        ,str_samples, str_real, str_pred, str_copc, str_mae, str_var
+        -- rosn(回流子集,预测回流UV)
+        ,rosn_samples, rosn_real, rosn_pred, rosn_ori, rosn_stat
+        ,rosn_copc, rosn_ori_copc, rosn_stat_copc
+        ,rosn_pred_mae, rosn_ori_mae, rosn_stat_mae
+        ,rosn_pred_var, rosn_ori_var, rosn_stat_var
+        ,rosn_pred_mape, rosn_ori_mape, rosn_stat_mape
+        ,rosn_pred_log_var, rosn_ori_log_var, rosn_stat_log_var
+        ,rosn_pred_log_std, rosn_ori_log_std, rosn_stat_log_std
+        ,rosn_pred_correction, rosn_ori_correction, rosn_stat_correction
+        -- rovn(全量,预测回流价值)
+        ,rovn_samples, rovn_real, rovn_pred, rovn_ori, rovn_stat
+        ,rovn_copc, rovn_ori_copc, rovn_stat_copc
+        ,rovn_pred_mae, rovn_ori_mae, rovn_stat_mae
+        ,rovn_pred_var, rovn_ori_var, rovn_stat_var
+        -- 业务指标
+        ,exp_per_dau, str_one, ros_one, str, ros, str_plus, ros_minus, rovn, vovh24
+        -- 计数
+        ,dau, exp, is_share, share_cnt, is_return_1, return_n_uv, viewh24, return_n_uv_noself
+        -- ========== 变化率字段 ==========
+        -- 业务指标变化率
+        ,round((exp_per_dau - exp_per_dau_base) / NULLIF(exp_per_dau_base, 0), 4) AS exp_per_dau_chg
+        ,round((str_one - str_one_base) / NULLIF(str_one_base, 0), 4) AS str_one_chg
+        ,round((ros_one - ros_one_base) / NULLIF(ros_one_base, 0), 4) AS ros_one_chg
+        ,round((str - str_base) / NULLIF(str_base, 0), 4) AS str_chg
+        ,round((ros - ros_base) / NULLIF(ros_base, 0), 4) AS ros_chg
+        ,round((str_plus - str_plus_base) / NULLIF(str_plus_base, 0), 4) AS str_plus_chg
+        ,round((ros_minus - ros_minus_base) / NULLIF(ros_minus_base, 0), 4) AS ros_minus_chg
+        ,round((rovn - rovn_base) / NULLIF(rovn_base, 0), 4) AS rovn_chg
+        ,round((vovh24 - vovh24_base) / NULLIF(vovh24_base, 0), 4) AS vovh24_chg
+        -- COPC 变化率
+        ,round((str_copc - str_copc_base) / NULLIF(str_copc_base, 0), 4) AS str_copc_chg
+        ,round((rosn_copc - rosn_copc_base) / NULLIF(rosn_copc_base, 0), 4) AS rosn_copc_chg
+        ,round((rosn_ori_copc - rosn_ori_copc_base) / NULLIF(rosn_ori_copc_base, 0), 4) AS rosn_ori_copc_chg
+        ,round((rosn_stat_copc - rosn_stat_copc_base) / NULLIF(rosn_stat_copc_base, 0), 4) AS rosn_stat_copc_chg
+        ,round((rovn_copc - rovn_copc_base) / NULLIF(rovn_copc_base, 0), 4) AS rovn_copc_chg
+        ,round((rovn_ori_copc - rovn_ori_copc_base) / NULLIF(rovn_ori_copc_base, 0), 4) AS rovn_ori_copc_chg
+        ,round((rovn_stat_copc - rovn_stat_copc_base) / NULLIF(rovn_stat_copc_base, 0), 4) AS rovn_stat_copc_chg
+        -- 真实值变化率
+        ,round((str_real - str_real_base) / NULLIF(str_real_base, 0), 4) AS str_real_chg
+        ,round((rosn_real - rosn_real_base) / NULLIF(rosn_real_base, 0), 4) AS rosn_real_chg
+        ,round((rovn_real - rovn_real_base) / NULLIF(rovn_real_base, 0), 4) AS rovn_real_chg
+        -- 计数指标变化率
+        ,round((is_share - is_share_base) / NULLIF(is_share_base, 0), 4) AS is_share_chg
+        ,round((share_cnt - share_cnt_base) / NULLIF(share_cnt_base, 0), 4) AS share_cnt_chg
+        ,round((is_return_1 - is_return_1_base) / NULLIF(is_return_1_base, 0), 4) AS is_return_1_chg
+        ,round((return_n_uv - return_n_uv_base) / NULLIF(return_n_uv_base, 0), 4) AS return_n_uv_chg
+        ,round((viewh24 - viewh24_base) / NULLIF(viewh24_base, 0), 4) AS viewh24_chg
+        ,round((return_n_uv_noself - return_n_uv_noself_base) / NULLIF(return_n_uv_noself_base, 0), 4) AS return_n_uv_noself_chg
+FROM    t_with_baseline
+ORDER BY dt DESC, apptype, abcode, exp DESC
+;

+ 6 - 0
tasks/指标分析/02_实验组xTop20视频_vs对照组_误差分析_v3.json

@@ -0,0 +1,6 @@
+{
+  "token": "ONZqsxB9BhGH8tt90EScSJT5nHh",
+  "sheet_id": "Xc2Kue",
+  "sort": "dt:desc",
+  "cols": null
+}

+ 339 - 0
tasks/指标分析/02_实验组xTop20视频_vs对照组_误差分析_v3.sql

@@ -0,0 +1,339 @@
+-- 预处理:解析 scoresmap + page 分类
+-- v4: 新增 top20 vid 分组 + GROUPING SETS + 曝光占比
+-- v5: 新增相对对照组的变化率字段
+-- v6: 新增 rosn_ori(未校准原始分)对比校准后的 rosn_pred
+-- v7: 新增 rosn log 空间标准差(衡量相对倍数误差离散程度)
+-- v8: 新增 score 综合分 = str * (0.1+rosn) * (0.1+vor) * (1 + scoreCoefficient/cate2CoefficientDenominator)
+WITH t_raw AS
+(
+    SELECT  *
+            ,REPLACE(GET_JSON_OBJECT(extend_alg,'$.scoresMap'),"\\","") AS scoresmap
+            ,CASE   WHEN page IN ("回流后沉浸页&内页feed","详情后沉浸页","首页feed","详情页") THEN "推荐"
+                    WHEN page IN ("回流页","其他") THEN "非推荐"
+                    ELSE "其他"
+            END AS page_type
+    FROM    loghubods.dwd_recsys_alg_sample_all_20250212
+    WHERE   dt = '${dt}'
+    AND     apptype IN ("0","4")
+    AND     abcode IN ("ab0","ab1","ab2","ab3","ab4","ab5","ab6","ab7","ab8","ab9")
+    AND     abcode NOT IN ("ab100")
+    AND     extend_alg IS NOT NULL
+    AND     GET_JSON_OBJECT(extend_alg,'$.scoresMap') IS NOT NULL
+)
+-- 过滤:只保留推荐页面
+,t_filtered AS
+(
+    SELECT  *
+    FROM    t_raw
+    WHERE   page_type = "推荐"
+)
+-- 特征提取与维度映射
+,t_base AS
+(
+    SELECT  dt
+            ,apptype
+            ,CASE   WHEN apptype IN ("4") AND abcode IN ("ab0","ab1") THEN "实验组-先验地域降权"
+                    WHEN apptype IN ("4") AND abcode IN ("ab6","ab7") THEN "实验组-str+校准&ros-统计量"
+                    WHEN apptype IN ("4") AND abcode IN ("ab8","ab9") THEN "实验组-str+校准"
+                    WHEN apptype IN ("4") AND abcode IN ("ab2","ab3") THEN "对照组"
+                    WHEN apptype IN ("4") AND abcode IN ("ab4","ab5") THEN "ab4-5"
+                    ELSE "其他"
+            END AS abcode
+            ,page_type AS page
+            ,mid
+            ,vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,is_return_n
+            ,is_return_noself
+            ,return_1_uv
+            ,return_n_uv
+            ,return_n_uv_noself
+            ,new_exposure_cnt
+            ,flowpool
+            ,scoresmap
+            -- str / rosn 模型分
+            ,CAST(GET_JSON_OBJECT(scoresmap,'$.fmRov') AS DOUBLE) AS str_pred
+            ,1.22 * pow(CAST(GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') AS DOUBLE), 1.15) AS rosn_pred
+            ,CAST(GET_JSON_OBJECT(scoresmap,'$.NorXGBScore') AS DOUBLE) AS rosn_ori
+            ,CAST(GET_JSON_OBJECT(scoresmap,'$.hasReturnRovScore') AS DOUBLE) AS rosn_stat
+            -- score 组成因子
+            ,CAST(GET_JSON_OBJECT(scoresmap,'$.scoreCoefficient') AS DOUBLE) AS scoreCoefficient
+            ,CAST(GET_JSON_OBJECT(scoresmap,'$.cate2CoefficientDenominator') AS DOUBLE) AS cate2CoefficientDenominator
+            ,CAST(GET_JSON_OBJECT(scoresmap,'$.vor') AS DOUBLE) AS vor
+            ,GET_JSON_OBJECT(v1_feature,'$.title') AS vid_title
+    FROM    t_filtered
+)
+,t_valid AS
+(
+    SELECT  *
+    FROM    t_base
+    WHERE   str_pred IS NOT NULL
+    AND     rosn_pred IS NOT NULL
+)
+-- 计算行级 score_pred / score_real
+,t_with_score AS
+(
+    SELECT  *
+            ,str_pred
+                * (0.1 + rosn_pred)
+                * (0.1 + COALESCE(vor, 0))
+                * (1 + COALESCE(scoreCoefficient, 0) / NULLIF(cate2CoefficientDenominator, 0))
+                AS score_pred
+            ,is_return_noself
+                * (0.1 + return_n_uv_noself)
+                * (0.1 + COALESCE(vor, 0))
+                * (1 + COALESCE(scoreCoefficient, 0) / NULLIF(cate2CoefficientDenominator, 0))
+                AS score_real
+    FROM    t_valid
+)
+-- 计算每个 abcode 下曝光量 top20 的 vid
+,t_vid_rank AS
+(
+    SELECT  dt
+            ,apptype
+            ,abcode
+            ,vid
+            ,COUNT(1) AS vid_exp_cnt
+            ,ROW_NUMBER() OVER (PARTITION BY dt, apptype, abcode ORDER BY COUNT(1) DESC) AS vid_rank
+    FROM    t_with_score
+    GROUP BY dt, apptype, abcode, vid
+)
+,t_top5_vid AS
+(
+    SELECT  dt, apptype, abcode, vid, vid_rank
+    FROM    t_vid_rank
+    WHERE   vid_rank <= 20
+)
+-- 标记 top20 vid
+,t_with_top5 AS
+(
+    SELECT  a.*
+            ,CASE WHEN b.vid IS NOT NULL THEN a.vid ELSE NULL END AS top5_vid
+            ,CASE WHEN b.vid IS NOT NULL THEN a.vid_title ELSE NULL END AS top5_vid_title
+            ,b.vid_rank AS top5_vid_rank
+    FROM    t_with_score a
+    LEFT JOIN t_top5_vid b
+    ON      a.dt = b.dt
+    AND     a.apptype = b.apptype
+    AND     a.abcode = b.abcode
+    AND     a.vid = b.vid
+)
+-- 先聚合
+,t_agg AS
+(
+    SELECT  dt
+            ,COALESCE(apptype, 'sum') AS apptype
+            ,COALESCE(abcode, 'sum') AS abcode
+            ,COALESCE(CAST(top5_vid AS STRING), 'all') AS vid
+            ,CASE WHEN GROUPING(top5_vid) = 1 THEN NULL ELSE MAX(top5_vid_title) END AS vid_title
+            ,CASE WHEN GROUPING(top5_vid) = 1 THEN NULL ELSE MAX(top5_vid_rank) END AS vid_rank
+            -- COPC
+            ,round((SUM(is_return_noself) / COUNT(1)) / NULLIF(SUM(str_pred) / COUNT(1), 0), 4) AS str_copc
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN return_n_uv_noself END) / NULLIF(AVG(CASE WHEN is_return_noself = 1 THEN rosn_pred END), 0), 4) AS rosn_copc
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN return_n_uv_noself END) / NULLIF(AVG(CASE WHEN is_return_noself = 1 THEN rosn_ori END), 0), 4) AS rosn_ori_copc
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN return_n_uv_noself END) / NULLIF(AVG(CASE WHEN is_return_noself = 1 THEN rosn_stat END), 0), 4) AS rosn_stat_copc
+            ,round((SUM(return_n_uv_noself) / COUNT(1)) / NULLIF(AVG(str_pred * rosn_pred), 0), 4) AS rovn_copc
+            ,round((SUM(return_n_uv_noself) / COUNT(1)) / NULLIF(AVG(str_pred * rosn_ori), 0), 4) AS rovn_ori_copc
+            ,round((SUM(return_n_uv_noself) / COUNT(1)) / NULLIF(AVG(str_pred * rosn_stat), 0), 4) AS rovn_stat_copc
+            -- score COPC: AVG(score_real) / AVG(score_pred)
+            ,round(AVG(score_real) / NULLIF(AVG(score_pred), 0), 4) AS score_copc
+            -- 模型预测与真实值
+            ,round(COALESCE(SUM(is_return_noself) / COUNT(1),0),6) AS str_real
+            ,round(COALESCE(SUM(str_pred) / COUNT(1),0),6) AS str_pred
+            ,round(COALESCE(SUM(return_n_uv_noself) / NULLIF(SUM(is_return_noself), 0),0),6) AS rosn_real
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN rosn_pred END),6) AS rosn_pred
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN rosn_ori END),6) AS rosn_ori
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN rosn_stat END),6) AS rosn_stat
+            ,round(SUM(return_n_uv_noself) / COUNT(1), 6) AS rovn_real
+            ,round(AVG(str_pred * rosn_pred), 6) AS rovn_pred
+            ,round(AVG(str_pred * rosn_ori), 6) AS rovn_ori
+            ,round(AVG(str_pred * rosn_stat), 6) AS rovn_stat
+            -- score 真实值与预测值
+            ,round(AVG(score_real), 6) AS score_real
+            ,round(AVG(score_pred), 6) AS score_pred
+            -- 误差:str(无条件)
+            ,round(AVG(ABS(str_pred - is_return_noself)),6) AS str_mae
+            -- 误差:rosn(条件于 is_return_noself=1)
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN ABS(rosn_pred - return_n_uv_noself) END),6) AS rosn_pred_mae
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN ABS(rosn_ori - return_n_uv_noself) END),6) AS rosn_ori_mae
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN ABS(rosn_stat - return_n_uv_noself) END),6) AS rosn_stat_mae
+            -- 误差:rovn(无条件)
+            ,round(AVG(ABS(str_pred * rosn_pred - return_n_uv_noself)),6) AS rovn_pred_mae
+            ,round(AVG(ABS(str_pred * rosn_ori - return_n_uv_noself)),6) AS rovn_ori_mae
+            ,round(AVG(ABS(str_pred * rosn_stat - return_n_uv_noself)),6) AS rovn_stat_mae
+            -- 误差:score
+            ,round(AVG(ABS(score_pred - score_real)),6) AS score_mae
+            -- MAPE:相对误差(仅 rosn,回流样本 actual>0)
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN ABS(rosn_pred - return_n_uv_noself) / return_n_uv_noself END),6) AS rosn_pred_mape
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN ABS(rosn_ori - return_n_uv_noself) / return_n_uv_noself END),6) AS rosn_ori_mape
+            ,round(AVG(CASE WHEN is_return_noself = 1 THEN ABS(rosn_stat - return_n_uv_noself) / return_n_uv_noself END),6) AS rosn_stat_mape
+            -- MAPE:score(score_real>0 的样本)
+            ,round(AVG(CASE WHEN score_real > 0 THEN ABS(score_pred - score_real) / score_real END),6) AS score_mape
+            -- 方差:误差离散程度(方差大=忽高忽低,方差小=稳定偏移)
+            ,round(VARIANCE(str_pred - is_return_noself),6) AS str_var
+            ,round(VARIANCE(CASE WHEN is_return_noself = 1 THEN rosn_pred - return_n_uv_noself END),6) AS rosn_pred_var
+            ,round(VARIANCE(CASE WHEN is_return_noself = 1 THEN rosn_ori - return_n_uv_noself END),6) AS rosn_ori_var
+            ,round(VARIANCE(CASE WHEN is_return_noself = 1 THEN rosn_stat - return_n_uv_noself END),6) AS rosn_stat_var
+            ,round(VARIANCE(str_pred * rosn_pred - return_n_uv_noself),6) AS rovn_pred_var
+            ,round(VARIANCE(str_pred * rosn_ori - return_n_uv_noself),6) AS rovn_ori_var
+            ,round(VARIANCE(str_pred * rosn_stat - return_n_uv_noself),6) AS rovn_stat_var
+            -- 方差:score
+            ,round(VARIANCE(score_pred - score_real),6) AS score_var
+            -- log 空间误差:LN(actual) - LN(pred),用于校准分析
+            ,round(VARIANCE(CASE WHEN is_return_noself = 1 AND rosn_pred > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_pred) END),6) AS rosn_pred_log_var
+            ,round(VARIANCE(CASE WHEN is_return_noself = 1 AND rosn_ori > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_ori) END),6) AS rosn_ori_log_var
+            ,round(VARIANCE(CASE WHEN is_return_noself = 1 AND rosn_stat > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_stat) END),6) AS rosn_stat_log_var
+            ,round(STDDEV(CASE WHEN is_return_noself = 1 AND rosn_pred > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_pred) END),6) AS rosn_pred_log_std
+            ,round(STDDEV(CASE WHEN is_return_noself = 1 AND rosn_ori > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_ori) END),6) AS rosn_ori_log_std
+            ,round(STDDEV(CASE WHEN is_return_noself = 1 AND rosn_stat > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_stat) END),6) AS rosn_stat_log_std
+            -- log 空间误差:score
+            ,round(VARIANCE(CASE WHEN score_pred > 0 AND score_real > 0 THEN LN(score_real) - LN(score_pred) END),6) AS score_log_var
+            ,round(STDDEV(CASE WHEN score_pred > 0 AND score_real > 0 THEN LN(score_real) - LN(score_pred) END),6) AS score_log_std
+            -- 理论校准系数:EXP(log_variance / 2)
+            ,round(EXP(VARIANCE(CASE WHEN is_return_noself = 1 AND rosn_pred > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_pred) END) / 2),6) AS rosn_pred_correction
+            ,round(EXP(VARIANCE(CASE WHEN is_return_noself = 1 AND rosn_ori > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_ori) END) / 2),6) AS rosn_ori_correction
+            ,round(EXP(VARIANCE(CASE WHEN is_return_noself = 1 AND rosn_stat > 0 AND return_n_uv_noself > 0 THEN LN(return_n_uv_noself) - LN(rosn_stat) END) / 2),6) AS rosn_stat_correction
+            ,round(EXP(VARIANCE(CASE WHEN score_pred > 0 AND score_real > 0 THEN LN(score_real) - LN(score_pred) END) / 2),6) AS score_correction
+            -- 样本数
+            ,COUNT(1) AS str_samples
+            ,SUM(CASE WHEN is_return_noself = 1 THEN 1 ELSE 0 END) AS rosn_samples
+            ,COUNT(1) AS rovn_samples
+            ,SUM(CASE WHEN score_pred IS NOT NULL AND score_real IS NOT NULL THEN 1 ELSE 0 END) AS score_samples
+            -- 业务指标
+            ,round(COALESCE(COUNT(1) / COUNT(DISTINCT mid),0),2) AS exp_per_dau
+            ,round(COALESCE(SUM(is_share) / COUNT(1),0),6) AS str_one
+            ,round(COALESCE(SUM(return_n_uv) / SUM(is_share),0),6) AS ros_one
+            ,round(COALESCE(SUM(share_cnt) / COUNT(1),0),6) AS str
+            ,round(COALESCE(SUM(return_n_uv) / SUM(share_cnt),0),6) AS ros
+            ,round(COALESCE(SUM(is_return_1) / COUNT(1),0),6) AS str_plus
+            ,round(COALESCE(SUM(return_n_uv) / SUM(is_return_1),0),6) AS ros_minus
+            ,round(COALESCE(SUM(return_n_uv) / COUNT(1),0),6) AS rovn
+            ,round(COALESCE(SUM(new_exposure_cnt) / COUNT(1),0),6) AS vovh24
+            ,COUNT(DISTINCT mid) AS dau
+            ,COUNT(1) AS exp
+            ,COALESCE(SUM(is_share),0) AS is_share
+            ,COALESCE(SUM(share_cnt),0) AS share_cnt
+            ,COALESCE(SUM(is_return_1),0) AS is_return_1
+            ,COALESCE(SUM(return_n_uv),0) AS return_n_uv
+            ,COALESCE(SUM(new_exposure_cnt),0) AS viewh24
+            ,COALESCE(SUM(return_n_uv_noself),0) AS return_n_uv_noself
+    FROM    t_with_top5
+    GROUP BY dt, apptype, abcode, top5_vid
+    GROUPING SETS (
+        (dt, apptype, abcode),
+        (dt, apptype, abcode, top5_vid)
+    )
+    HAVING  top5_vid IS NOT NULL OR GROUPING(top5_vid) = 1
+)
+-- 新增:获取对照组基准值并计算变化率
+,t_with_baseline AS
+(
+    SELECT  *
+            -- 计算曝光占比
+            ,round(exp * 1.0 / MAX(CASE WHEN vid = 'all' THEN exp END) OVER (PARTITION BY dt, apptype, abcode), 4) AS exp_pct
+            -- 对照组基准值(业务指标)
+            ,MAX(CASE WHEN abcode = '对照组' THEN exp_per_dau END) OVER (PARTITION BY dt, apptype, vid) AS exp_per_dau_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN str_one END) OVER (PARTITION BY dt, apptype, vid) AS str_one_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN ros_one END) OVER (PARTITION BY dt, apptype, vid) AS ros_one_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN str END) OVER (PARTITION BY dt, apptype, vid) AS str_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN ros END) OVER (PARTITION BY dt, apptype, vid) AS ros_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN str_plus END) OVER (PARTITION BY dt, apptype, vid) AS str_plus_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN ros_minus END) OVER (PARTITION BY dt, apptype, vid) AS ros_minus_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn END) OVER (PARTITION BY dt, apptype, vid) AS rovn_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN vovh24 END) OVER (PARTITION BY dt, apptype, vid) AS vovh24_base
+            -- 对照组基准值(COPC 指标)
+            ,MAX(CASE WHEN abcode = '对照组' THEN str_copc END) OVER (PARTITION BY dt, apptype, vid) AS str_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rosn_copc END) OVER (PARTITION BY dt, apptype, vid) AS rosn_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rosn_ori_copc END) OVER (PARTITION BY dt, apptype, vid) AS rosn_ori_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rosn_stat_copc END) OVER (PARTITION BY dt, apptype, vid) AS rosn_stat_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn_copc END) OVER (PARTITION BY dt, apptype, vid) AS rovn_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn_ori_copc END) OVER (PARTITION BY dt, apptype, vid) AS rovn_ori_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn_stat_copc END) OVER (PARTITION BY dt, apptype, vid) AS rovn_stat_copc_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN score_copc END) OVER (PARTITION BY dt, apptype, vid) AS score_copc_base
+            -- 对照组基准值(真实值)
+            ,MAX(CASE WHEN abcode = '对照组' THEN str_real END) OVER (PARTITION BY dt, apptype, vid) AS str_real_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rosn_real END) OVER (PARTITION BY dt, apptype, vid) AS rosn_real_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN rovn_real END) OVER (PARTITION BY dt, apptype, vid) AS rovn_real_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN score_real END) OVER (PARTITION BY dt, apptype, vid) AS score_real_base
+            -- 对照组基准值(计数指标)
+            ,MAX(CASE WHEN abcode = '对照组' THEN dau END) OVER (PARTITION BY dt, apptype, vid) AS dau_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN exp END) OVER (PARTITION BY dt, apptype, vid) AS exp_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN is_share END) OVER (PARTITION BY dt, apptype, vid) AS is_share_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN share_cnt END) OVER (PARTITION BY dt, apptype, vid) AS share_cnt_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN is_return_1 END) OVER (PARTITION BY dt, apptype, vid) AS is_return_1_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN return_n_uv END) OVER (PARTITION BY dt, apptype, vid) AS return_n_uv_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN viewh24 END) OVER (PARTITION BY dt, apptype, vid) AS viewh24_base
+            ,MAX(CASE WHEN abcode = '对照组' THEN return_n_uv_noself END) OVER (PARTITION BY dt, apptype, vid) AS return_n_uv_noself_base
+    FROM    t_agg
+)
+-- 最终输出:原有字段 + 变化率
+SELECT  dt
+        ,apptype
+        ,abcode
+        ,vid
+        ,vid_title
+        ,vid_rank
+        ,exp_pct
+        ,round((dau - dau_base) / NULLIF(dau_base, 0), 4) AS dau_chg
+        ,round((exp - exp_base) / NULLIF(exp_base, 0), 4) AS exp_chg
+        -- str(全量,预测是否回流)
+        ,str_samples, str_real, str_pred, str_copc, str_mae, str_var
+        -- rosn(回流子集,预测回流UV)
+        ,rosn_samples, rosn_real, rosn_pred, rosn_ori, rosn_stat
+        ,rosn_copc, rosn_ori_copc, rosn_stat_copc
+        ,rosn_pred_mae, rosn_ori_mae, rosn_stat_mae
+        ,rosn_pred_var, rosn_ori_var, rosn_stat_var
+        ,rosn_pred_mape, rosn_ori_mape, rosn_stat_mape
+        ,rosn_pred_log_var, rosn_ori_log_var, rosn_stat_log_var
+        ,rosn_pred_log_std, rosn_ori_log_std, rosn_stat_log_std
+        ,rosn_pred_correction, rosn_ori_correction, rosn_stat_correction
+        -- rovn(全量,预测回流价值)
+        ,rovn_samples, rovn_real, rovn_pred, rovn_ori, rovn_stat
+        ,rovn_copc, rovn_ori_copc, rovn_stat_copc
+        ,rovn_pred_mae, rovn_ori_mae, rovn_stat_mae
+        ,rovn_pred_var, rovn_ori_var, rovn_stat_var
+        -- score 综合分
+        ,score_samples, score_real, score_pred
+        ,score_copc, score_mae, score_mape, score_var
+        ,score_log_var, score_log_std, score_correction
+        -- 业务指标
+        ,exp_per_dau, str_one, ros_one, str, ros, str_plus, ros_minus, rovn, vovh24
+        -- 计数
+        ,dau, exp, is_share, share_cnt, is_return_1, return_n_uv, viewh24, return_n_uv_noself
+        -- ========== 变化率字段 ==========
+        -- 业务指标变化率
+        ,round((exp_per_dau - exp_per_dau_base) / NULLIF(exp_per_dau_base, 0), 4) AS exp_per_dau_chg
+        ,round((str_one - str_one_base) / NULLIF(str_one_base, 0), 4) AS str_one_chg
+        ,round((ros_one - ros_one_base) / NULLIF(ros_one_base, 0), 4) AS ros_one_chg
+        ,round((str - str_base) / NULLIF(str_base, 0), 4) AS str_chg
+        ,round((ros - ros_base) / NULLIF(ros_base, 0), 4) AS ros_chg
+        ,round((str_plus - str_plus_base) / NULLIF(str_plus_base, 0), 4) AS str_plus_chg
+        ,round((ros_minus - ros_minus_base) / NULLIF(ros_minus_base, 0), 4) AS ros_minus_chg
+        ,round((rovn - rovn_base) / NULLIF(rovn_base, 0), 4) AS rovn_chg
+        ,round((vovh24 - vovh24_base) / NULLIF(vovh24_base, 0), 4) AS vovh24_chg
+        -- COPC 变化率
+        ,round((str_copc - str_copc_base) / NULLIF(str_copc_base, 0), 4) AS str_copc_chg
+        ,round((rosn_copc - rosn_copc_base) / NULLIF(rosn_copc_base, 0), 4) AS rosn_copc_chg
+        ,round((rosn_ori_copc - rosn_ori_copc_base) / NULLIF(rosn_ori_copc_base, 0), 4) AS rosn_ori_copc_chg
+        ,round((rosn_stat_copc - rosn_stat_copc_base) / NULLIF(rosn_stat_copc_base, 0), 4) AS rosn_stat_copc_chg
+        ,round((rovn_copc - rovn_copc_base) / NULLIF(rovn_copc_base, 0), 4) AS rovn_copc_chg
+        ,round((rovn_ori_copc - rovn_ori_copc_base) / NULLIF(rovn_ori_copc_base, 0), 4) AS rovn_ori_copc_chg
+        ,round((rovn_stat_copc - rovn_stat_copc_base) / NULLIF(rovn_stat_copc_base, 0), 4) AS rovn_stat_copc_chg
+        ,round((score_copc - score_copc_base) / NULLIF(score_copc_base, 0), 4) AS score_copc_chg
+        -- 真实值变化率
+        ,round((str_real - str_real_base) / NULLIF(str_real_base, 0), 4) AS str_real_chg
+        ,round((rosn_real - rosn_real_base) / NULLIF(rosn_real_base, 0), 4) AS rosn_real_chg
+        ,round((rovn_real - rovn_real_base) / NULLIF(rovn_real_base, 0), 4) AS rovn_real_chg
+        ,round((score_real - score_real_base) / NULLIF(score_real_base, 0), 4) AS score_real_chg
+        -- 计数指标变化率
+        ,round((is_share - is_share_base) / NULLIF(is_share_base, 0), 4) AS is_share_chg
+        ,round((share_cnt - share_cnt_base) / NULLIF(share_cnt_base, 0), 4) AS share_cnt_chg
+        ,round((is_return_1 - is_return_1_base) / NULLIF(is_return_1_base, 0), 4) AS is_return_1_chg
+        ,round((return_n_uv - return_n_uv_base) / NULLIF(return_n_uv_base, 0), 4) AS return_n_uv_chg
+        ,round((viewh24 - viewh24_base) / NULLIF(viewh24_base, 0), 4) AS viewh24_chg
+        ,round((return_n_uv_noself - return_n_uv_noself_base) / NULLIF(return_n_uv_noself_base, 0), 4) AS return_n_uv_noself_chg
+FROM    t_with_baseline
+ORDER BY dt DESC, apptype, abcode, exp DESC
+;

+ 586 - 0
tmp/低vov高曝光分析/step10_可视化.py

@@ -0,0 +1,586 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step10: 生成分析报告 HTML - 按分析步骤一步步展示
+"""
+import pandas as pd
+import json
+from pathlib import Path
+
+output_dir = Path(__file__).parent / "output"
+
+# 读取各步骤数据
+df_daily = pd.read_csv(output_dir / "step7_头部vov趋势_合并.csv").sort_values('dt')
+df_monthly = pd.read_csv(output_dir / "step7_月度统计.csv")
+
+# 图表数据
+dates = df_daily['dt'].astype(str).tolist()
+vov_data = df_daily['vov_mean'].round(4).tolist()
+problem_data = df_daily['problem_pct'].round(1).tolist()
+str_data = [round(x*100, 2) for x in df_daily['str_mean'].tolist()]
+vor_data = df_daily['vor_mean'].round(2).tolist()
+
+# 月度数据
+monthly_data = []
+for _, row in df_monthly.iterrows():
+    monthly_data.append({
+        'month': str(row['month']),
+        'vov': round(row['vov均值'], 4),
+        'str': round(row['str'], 4),
+        'ros': round(row['ros'], 2),
+        'vor': round(row['vor'], 2),
+        'problem': round(row['问题比例%'], 1)
+    })
+
+html_content = f'''<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>低VoV高曝光问题分析报告</title>
+    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
+    <script src="https://cdn.jsdelivr.net/npm/chartjs-plugin-annotation"></script>
+    <style>
+        * {{ margin: 0; padding: 0; box-sizing: border-box; }}
+        body {{
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            min-height: 100vh;
+            padding: 40px 20px;
+        }}
+        .report {{
+            max-width: 1000px;
+            margin: 0 auto;
+            background: white;
+            border-radius: 16px;
+            box-shadow: 0 20px 60px rgba(0,0,0,0.3);
+            overflow: hidden;
+        }}
+        .header {{
+            background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
+            color: white;
+            padding: 40px;
+            text-align: center;
+        }}
+        .header h1 {{ font-size: 32px; margin-bottom: 10px; }}
+        .header p {{ opacity: 0.8; }}
+
+        .step {{
+            padding: 40px;
+            border-bottom: 1px solid #eee;
+        }}
+        .step:last-child {{ border-bottom: none; }}
+
+        .step-header {{
+            display: flex;
+            align-items: center;
+            margin-bottom: 24px;
+        }}
+        .step-number {{
+            width: 48px;
+            height: 48px;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            border-radius: 50%;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            font-weight: bold;
+            font-size: 18px;
+            margin-right: 16px;
+        }}
+        .step-title {{
+            font-size: 22px;
+            color: #1a1a2e;
+        }}
+        .step-question {{
+            color: #666;
+            font-size: 14px;
+        }}
+
+        .answer-box {{
+            background: #f8f9fa;
+            border-left: 4px solid #27ae60;
+            padding: 20px;
+            margin: 20px 0;
+            border-radius: 0 8px 8px 0;
+        }}
+        .answer-box.warning {{ border-left-color: #f39c12; }}
+        .answer-box.danger {{ border-left-color: #e74c3c; }}
+
+        .answer-title {{
+            font-weight: bold;
+            color: #333;
+            margin-bottom: 8px;
+        }}
+        .answer-value {{
+            font-size: 28px;
+            font-weight: bold;
+            color: #27ae60;
+        }}
+        .answer-box.warning .answer-value {{ color: #f39c12; }}
+        .answer-box.danger .answer-value {{ color: #e74c3c; }}
+
+        .metric-grid {{
+            display: grid;
+            grid-template-columns: repeat(3, 1fr);
+            gap: 16px;
+            margin: 20px 0;
+        }}
+        .metric-card {{
+            background: #f8f9fa;
+            padding: 20px;
+            border-radius: 8px;
+            text-align: center;
+        }}
+        .metric-card.good {{ background: #d4edda; }}
+        .metric-card.bad {{ background: #f8d7da; }}
+        .metric-label {{ font-size: 14px; color: #666; margin-bottom: 8px; }}
+        .metric-value {{ font-size: 24px; font-weight: bold; }}
+        .metric-card.good .metric-value {{ color: #155724; }}
+        .metric-card.bad .metric-value {{ color: #721c24; }}
+
+        .chart-container {{
+            height: 300px;
+            margin: 20px 0;
+        }}
+
+        table {{
+            width: 100%;
+            border-collapse: collapse;
+            margin: 20px 0;
+        }}
+        th, td {{
+            padding: 12px;
+            text-align: center;
+            border-bottom: 1px solid #eee;
+        }}
+        th {{ background: #f8f9fa; font-weight: 600; }}
+        tr.problem {{ background: #fff5f5; }}
+        .badge {{
+            display: inline-block;
+            padding: 4px 12px;
+            border-radius: 20px;
+            font-size: 12px;
+            font-weight: bold;
+        }}
+        .badge-danger {{ background: #f8d7da; color: #721c24; }}
+        .badge-warning {{ background: #fff3cd; color: #856404; }}
+        .badge-success {{ background: #d4edda; color: #155724; }}
+
+        .conclusion {{
+            background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
+            color: white;
+            padding: 40px;
+        }}
+        .conclusion h2 {{ margin-bottom: 20px; }}
+        .conclusion ul {{ margin-left: 20px; }}
+        .conclusion li {{ margin-bottom: 12px; line-height: 1.8; }}
+        .highlight {{ background: rgba(255,255,255,0.2); padding: 2px 8px; border-radius: 4px; }}
+
+        .timeline {{
+            position: relative;
+            padding-left: 30px;
+        }}
+        .timeline::before {{
+            content: '';
+            position: absolute;
+            left: 10px;
+            top: 0;
+            bottom: 0;
+            width: 2px;
+            background: #ddd;
+        }}
+        .timeline-item {{
+            position: relative;
+            margin-bottom: 20px;
+        }}
+        .timeline-item::before {{
+            content: '';
+            position: absolute;
+            left: -24px;
+            top: 6px;
+            width: 10px;
+            height: 10px;
+            border-radius: 50%;
+            background: #667eea;
+        }}
+        .timeline-item.bad::before {{ background: #e74c3c; }}
+        .timeline-item.good::before {{ background: #27ae60; }}
+    </style>
+</head>
+<body>
+    <div class="report">
+        <!-- 报告头部 -->
+        <div class="header">
+            <h1>📊 低VoV高曝光问题分析报告</h1>
+            <p>数据范围: {dates[0]} ~ {dates[-1]} | 共 {len(dates)} 天</p>
+        </div>
+
+        <!-- Step 0: 问题背景 -->
+        <div class="step">
+            <div class="step-header">
+                <div class="step-number">0</div>
+                <div>
+                    <div class="step-title">问题背景</div>
+                    <div class="step-question">什么是低VoV高曝光问题?</div>
+                </div>
+            </div>
+            <p style="line-height:1.8;color:#555;">
+                <strong>排序公式</strong>:score = STR × ROS × VOR = VoV<br><br>
+                • <strong>STR</strong> (分享率): FM模型预估,用户看到视频后分享的概率<br>
+                • <strong>ROS</strong> (回流/分享比): XGB模型预估,每次分享能带来多少回流用户<br>
+                • <strong>VOR</strong> (病毒因子): 24h统计量,每个回流用户能带来多少曝光<br>
+                • <strong>VoV</strong> (病毒系数): 最终的传播效率 = STR × ROS × VOR<br><br>
+                <strong style="color:#e74c3c;">问题现象</strong>:某些视频真实VoV很低,但却获得了很高的曝光排名
+            </p>
+        </div>
+
+        <!-- Step 1: 验证现象 -->
+        <div class="step">
+            <div class="step-header">
+                <div class="step-number">1</div>
+                <div>
+                    <div class="step-title">验证现象</div>
+                    <div class="step-question">低VoV高曝光问题存在吗?</div>
+                </div>
+            </div>
+            <div class="answer-box danger">
+                <div class="answer-title">结论:问题确实存在 ✓</div>
+                <div class="answer-value">43.5% 的 Top10 记录存在此问题</div>
+            </div>
+            <p style="color:#666;margin-top:12px;">
+                定义:VoV < 0.35 但曝光排名 ≤ 5 的视频<br>
+                验证方法:统计每日 Top10 曝光视频中符合条件的比例
+            </p>
+        </div>
+
+        <!-- Step 2: 影响面 -->
+        <div class="step">
+            <div class="step-header">
+                <div class="step-number">2</div>
+                <div>
+                    <div class="step-title">量化影响</div>
+                    <div class="step-question">问题影响有多大?</div>
+                </div>
+            </div>
+            <div class="metric-grid">
+                <div class="metric-card bad">
+                    <div class="metric-label">问题记录占比</div>
+                    <div class="metric-value">38.1%</div>
+                </div>
+                <div class="metric-card bad">
+                    <div class="metric-label">问题视频数</div>
+                    <div class="metric-value">21 个</div>
+                </div>
+                <div class="metric-card bad">
+                    <div class="metric-label">影响曝光量</div>
+                    <div class="metric-value">35.7%</div>
+                </div>
+            </div>
+            <p style="color:#666;">
+                即:超过1/3的头部曝光被低效率视频占据
+            </p>
+        </div>
+
+        <!-- Step 3: 原因分析 -->
+        <div class="step">
+            <div class="step-header">
+                <div class="step-number">3</div>
+                <div>
+                    <div class="step-title">原因分析</div>
+                    <div class="step-question">STR、ROS、VOR 哪个出了问题?</div>
+                </div>
+            </div>
+            <table>
+                <thead>
+                    <tr>
+                        <th>指标</th>
+                        <th>预估偏差</th>
+                        <th>偏高比例</th>
+                        <th>诊断</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    <tr>
+                        <td>STR (分享率)</td>
+                        <td>-5.7%</td>
+                        <td>36.4%</td>
+                        <td><span class="badge badge-success">正常</span></td>
+                    </tr>
+                    <tr class="problem">
+                        <td><strong>ROS (回流比)</strong></td>
+                        <td><strong>+72.2%</strong></td>
+                        <td><strong>70.1%</strong></td>
+                        <td><span class="badge badge-danger">系统性偏高 ⚠️</span></td>
+                    </tr>
+                    <tr>
+                        <td>VOR (病毒因子)</td>
+                        <td>混合</td>
+                        <td>44.8%</td>
+                        <td><span class="badge badge-warning">有异常值</span></td>
+                    </tr>
+                </tbody>
+            </table>
+            <div class="answer-box danger">
+                <div class="answer-title">核心问题找到了!</div>
+                <div class="answer-value">ROS 预估系统性偏高 72.2%</div>
+            </div>
+        </div>
+
+        <!-- Step 4: 深入分析 ROS -->
+        <div class="step">
+            <div class="step-header">
+                <div class="step-number">4</div>
+                <div>
+                    <div class="step-title">ROS 偏差深入分析</div>
+                    <div class="step-question">ROS 为什么会偏高?</div>
+                </div>
+            </div>
+            <p style="margin-bottom:20px;color:#555;">发现 ROS 偏差与真实 ROS 呈强负相关 (r = -0.607):</p>
+            <table>
+                <thead>
+                    <tr>
+                        <th>真实 ROS</th>
+                        <th>预估偏差</th>
+                        <th>说明</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    <tr class="problem">
+                        <td>< 2</td>
+                        <td><strong>+152%</strong></td>
+                        <td><span class="badge badge-danger">严重偏高</span></td>
+                    </tr>
+                    <tr>
+                        <td>2-4</td>
+                        <td>+20%</td>
+                        <td>轻微偏高</td>
+                    </tr>
+                    <tr>
+                        <td>4-6</td>
+                        <td>-27%</td>
+                        <td>偏低</td>
+                    </tr>
+                    <tr>
+                        <td>> 6</td>
+                        <td>-54%</td>
+                        <td>严重偏低</td>
+                    </tr>
+                </tbody>
+            </table>
+            <div class="answer-box warning">
+                <div class="answer-title">根本原因</div>
+                <div class="answer-value">XGB 模型存在「回归均值」问题</div>
+            </div>
+            <p style="color:#666;margin-top:12px;">
+                模型倾向于把极端值往中间拉:低的预估偏高,高的预估偏低
+            </p>
+        </div>
+
+        <!-- Step 5: 时间趋势 -->
+        <div class="step">
+            <div class="step-header">
+                <div class="step-number">5</div>
+                <div>
+                    <div class="step-title">时间趋势分析</div>
+                    <div class="step-question">问题是什么时候开始的?</div>
+                </div>
+            </div>
+            <div class="chart-container">
+                <canvas id="problemChart"></canvas>
+            </div>
+            <div class="answer-box warning">
+                <div class="answer-title">结论</div>
+                <div class="answer-value">问题从一开始就存在,不是某个时间点突然出现</div>
+            </div>
+        </div>
+
+        <!-- Step 6: 月度对比 -->
+        <div class="step">
+            <div class="step-header">
+                <div class="step-number">6</div>
+                <div>
+                    <div class="step-title">月度对比分析</div>
+                    <div class="step-question">哪些月份问题严重?为什么?</div>
+                </div>
+            </div>
+            <table id="monthlyTable">
+                <thead>
+                    <tr>
+                        <th>月份</th>
+                        <th>VoV</th>
+                        <th>STR</th>
+                        <th>VOR</th>
+                        <th>问题比例</th>
+                        <th>诊断</th>
+                    </tr>
+                </thead>
+                <tbody></tbody>
+            </table>
+            <div class="timeline" style="margin-top:30px;">
+                <div class="timeline-item">
+                    <strong>2025年7月</strong>:问题比例 22%(数据起点)
+                </div>
+                <div class="timeline-item bad">
+                    <strong>2025年8-10月</strong>:问题恶化到 28-31%
+                    <span class="badge badge-danger">主因: VOR↓9%</span>
+                </div>
+                <div class="timeline-item">
+                    <strong>2025年11月</strong>:开始改善 27%
+                </div>
+                <div class="timeline-item good">
+                    <strong>2025年12月</strong>:大幅改善到 10%
+                    <span class="badge badge-success">原因: STR↑34%</span>
+                </div>
+                <div class="timeline-item bad">
+                    <strong>2026年1月</strong>:回升到 18%
+                    <span class="badge badge-warning">原因: STR↓10%</span>
+                </div>
+            </div>
+        </div>
+
+        <!-- Step 7: VoV 分解趋势 -->
+        <div class="step">
+            <div class="step-header">
+                <div class="step-number">7</div>
+                <div>
+                    <div class="step-title">VoV 分解趋势</div>
+                    <div class="step-question">STR、VOR 的变化趋势如何?</div>
+                </div>
+            </div>
+            <div class="chart-container">
+                <canvas id="decomposeChart"></canvas>
+            </div>
+            <p style="color:#666;margin-top:12px;">
+                可以看到:12月 STR 明显上升(紫色线),问题比例随之下降
+            </p>
+        </div>
+
+        <!-- 最终结论 -->
+        <div class="conclusion">
+            <h2>📋 最终结论与建议</h2>
+            <ul>
+                <li><strong>问题确认</strong>:低VoV高曝光问题 <span class="highlight">确实存在</span>,影响 35.7% 的头部曝光</li>
+                <li><strong>根本原因</strong>:<span class="highlight">ROS 预估偏高 72%</span>,XGB 模型存在「回归均值」问题</li>
+                <li><strong>时间规律</strong>:问题一直存在,8-10月最严重(VOR↓),12月改善(STR↑)</li>
+                <li><strong>问题视频特征</strong>:<span class="highlight">低 STR</span> 的视频更容易出问题</li>
+            </ul>
+            <h3 style="margin-top:30px;margin-bottom:16px;">💡 建议</h3>
+            <ul>
+                <li><strong>短期</strong>:对真实 ROS < 2 的视频增加预估衰减系数</li>
+                <li><strong>中期</strong>:改进 XGB 模型,解决「回归均值」问题</li>
+                <li><strong>长期</strong>:考虑端到端 VoV 预估,避免三模型误差累积</li>
+            </ul>
+        </div>
+    </div>
+
+    <script>
+        const dates = {json.dumps(dates)};
+        const formattedDates = dates.map(d => d.slice(4,6) + '/' + d.slice(6,8));
+        const problemData = {json.dumps(problem_data)};
+        const strData = {json.dumps(str_data)};
+        const vorData = {json.dumps(vor_data)};
+
+        // 问题比例趋势图
+        const problemColors = problemData.map(v => v > 25 ? '#e74c3c' : v > 15 ? '#f39c12' : '#27ae60');
+        new Chart(document.getElementById('problemChart'), {{
+            type: 'bar',
+            data: {{
+                labels: formattedDates,
+                datasets: [{{
+                    label: '问题比例 (%)',
+                    data: problemData,
+                    backgroundColor: problemColors,
+                    borderRadius: 2
+                }}]
+            }},
+            options: {{
+                responsive: true,
+                maintainAspectRatio: false,
+                plugins: {{
+                    legend: {{ display: false }},
+                    annotation: {{
+                        annotations: {{
+                            line1: {{
+                                type: 'line',
+                                yMin: 20, yMax: 20,
+                                borderColor: '#e74c3c',
+                                borderWidth: 2,
+                                borderDash: [6, 6],
+                                label: {{ display: true, content: '严重线 20%', position: 'end', backgroundColor: '#e74c3c' }}
+                            }}
+                        }}
+                    }}
+                }},
+                scales: {{
+                    y: {{ beginAtZero: true, max: 50, title: {{ display: true, text: '问题比例 (%)' }} }}
+                }}
+            }}
+        }});
+
+        // STR/VOR 分解图
+        new Chart(document.getElementById('decomposeChart'), {{
+            type: 'line',
+            data: {{
+                labels: formattedDates,
+                datasets: [{{
+                    label: 'STR (×100)',
+                    data: strData,
+                    borderColor: '#9b59b6',
+                    tension: 0.3,
+                    yAxisID: 'y'
+                }}, {{
+                    label: 'VOR',
+                    data: vorData,
+                    borderColor: '#f39c12',
+                    tension: 0.3,
+                    yAxisID: 'y1'
+                }}]
+            }},
+            options: {{
+                responsive: true,
+                maintainAspectRatio: false,
+                interaction: {{ mode: 'index', intersect: false }},
+                plugins: {{ legend: {{ display: true, position: 'top' }} }},
+                scales: {{
+                    y: {{ type: 'linear', position: 'left', title: {{ display: true, text: 'STR (×100)' }} }},
+                    y1: {{ type: 'linear', position: 'right', title: {{ display: true, text: 'VOR' }}, grid: {{ drawOnChartArea: false }} }}
+                }}
+            }}
+        }});
+
+        // 月度表格
+        const monthlyData = {json.dumps(monthly_data)};
+        const tbody = document.querySelector('#monthlyTable tbody');
+        monthlyData.forEach(row => {{
+            const tr = document.createElement('tr');
+            if (row.problem > 25) tr.classList.add('problem');
+
+            let diagnosis = '';
+            if (row.problem <= 15) diagnosis = '<span class="badge badge-success">✓ 良好</span>';
+            else if (row.vor < 6.2) diagnosis = '<span class="badge badge-danger">VOR↓</span>';
+            else if (row.str < 0.045) diagnosis = '<span class="badge badge-danger">STR↓</span>';
+            else diagnosis = '<span class="badge badge-warning">待分析</span>';
+
+            tr.innerHTML = `
+                <td>${{row.month}}</td>
+                <td>${{row.vov.toFixed(3)}}</td>
+                <td>${{row.str.toFixed(4)}}</td>
+                <td>${{row.vor.toFixed(2)}}</td>
+                <td>${{row.problem.toFixed(1)}}%</td>
+                <td>${{diagnosis}}</td>
+            `;
+            tbody.appendChild(tr);
+        }});
+    </script>
+</body>
+</html>
+'''
+
+# 保存
+html_path = output_dir / "分析报告.html"
+with open(html_path, 'w', encoding='utf-8') as f:
+    f.write(html_content)
+
+print(f"分析报告已生成: {html_path}")

+ 100 - 0
tmp/低vov高曝光分析/step1_分析.py

@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step1: 验证"低vov高曝光"现象是否存在
+"""
+import pandas as pd
+from pathlib import Path
+
+# 读取数据
+data_file = Path(__file__).parent / "output" / "step1_验证现象_20260111_20260113.csv"
+df = pd.read_csv(data_file)
+
+print("=" * 70)
+print("Step1: 验证低vov高曝光现象是否存在")
+print("=" * 70)
+
+# 1. 数据概览
+print("\n【1. 数据概览】")
+print(f"样本数: {len(df)} 条(item x 天)")
+print(f"日期范围: {df['dt'].min()} ~ {df['dt'].max()}")
+print(f"曝光rank范围: {df['曝光rank'].min()} ~ {df['曝光rank'].max()}")
+
+# 2. vov0 分布
+print("\n【2. vov0 整体分布】")
+print(f"均值: {df['vov0'].mean():.4f}")
+print(f"中位数: {df['vov0'].median():.4f}")
+print(f"标准差: {df['vov0'].std():.4f}")
+print(f"最小值: {df['vov0'].min():.4f}")
+print(f"最大值: {df['vov0'].max():.4f}")
+
+# 3. 按曝光rank分组看vov分布
+print("\n【3. 按曝光rank分组看vov0】")
+df['rank_group'] = pd.cut(df['曝光rank'], bins=[0, 5, 10, 20, 50],
+                          labels=['Top1-5', 'Top6-10', 'Top11-20', 'Top21-50'])
+group_stats = df.groupby('rank_group', observed=True).agg({
+    'vov0': ['mean', 'median', 'min', 'count'],
+    'exp': 'mean'
+}).round(4)
+group_stats.columns = ['vov0均值', 'vov0中位数', 'vov0最小', '样本数', '平均曝光']
+print(group_stats.to_string())
+
+# 4. 找出低vov高曝光的item
+median_vov = df['vov0'].median()
+print(f"\n【4. 识别低vov高曝光item】")
+print(f"定义: 曝光rank<=10 且 vov0<中位数({median_vov:.4f})")
+
+problem_items = df[(df['曝光rank'] <= 10) & (df['vov0'] < median_vov)]
+top10 = df[df['曝光rank'] <= 10]
+print(f"符合条件的记录数: {len(problem_items)}")
+print(f"占Top10总记录数: {len(problem_items)}/{len(top10)} = {len(problem_items)/len(top10)*100:.1f}%")
+
+if len(problem_items) > 0:
+    print("\n问题item详情:")
+    print(problem_items[['dt', 'vid', '曝光rank', 'exp', 'vov0', '标题']].to_string(index=False))
+
+# 5. 结论
+print("\n" + "=" * 70)
+print("【Step1 结论】")
+print("=" * 70)
+if len(problem_items) > 0:
+    print(f"现象存在: {len(problem_items)}/{len(top10)} ({len(problem_items)/len(top10)*100:.1f}%) 的Top10记录存在低vov高曝光问题")
+    print(f"  - 这些item的vov0低于中位数{median_vov:.4f},但曝光rank进入Top10")
+else:
+    print("现象不存在")
+
+# 保存问题item到文件
+output_file = Path(__file__).parent / "output" / "step1_问题item.csv"
+problem_items.to_csv(output_file, index=False)
+print(f"\n问题item已保存到: {output_file}")
+
+# 保存分析结论到文件
+conclusion_file = Path(__file__).parent / "output" / "step1_结论.txt"
+with open(conclusion_file, 'w', encoding='utf-8') as f:
+    f.write("=" * 70 + "\n")
+    f.write("Step1: 验证低vov高曝光现象是否存在\n")
+    f.write("=" * 70 + "\n\n")
+
+    f.write("【数据概览】\n")
+    f.write(f"样本数: {len(df)} 条(item x 天)\n")
+    f.write(f"日期范围: {df['dt'].min()} ~ {df['dt'].max()}\n")
+    f.write(f"曝光rank范围: {df['曝光rank'].min()} ~ {df['曝光rank'].max()}\n\n")
+
+    f.write("【vov0 整体分布】\n")
+    f.write(f"均值: {df['vov0'].mean():.4f}\n")
+    f.write(f"中位数: {df['vov0'].median():.4f}\n")
+    f.write(f"标准差: {df['vov0'].std():.4f}\n")
+    f.write(f"最小值: {df['vov0'].min():.4f}\n")
+    f.write(f"最大值: {df['vov0'].max():.4f}\n\n")
+
+    f.write("【按曝光rank分组看vov0】\n")
+    f.write(group_stats.to_string() + "\n\n")
+
+    f.write("【结论】\n")
+    if len(problem_items) > 0:
+        f.write(f"现象存在: {len(problem_items)}/{len(top10)} ({len(problem_items)/len(top10)*100:.1f}%) 的Top10记录存在低vov高曝光问题\n")
+        f.write(f"定义: 曝光rank<=10 且 vov0<中位数({median_vov:.4f})\n")
+    else:
+        f.write("现象不存在\n")
+
+print(f"分析结论已保存到: {conclusion_file}")

+ 115 - 0
tmp/低vov高曝光分析/step2_分析.py

@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step2: 量化影响面 - 多少视频有这个问题,占比多少
+"""
+import pandas as pd
+from pathlib import Path
+
+# 读取数据
+data_file = Path(__file__).parent / "output" / "step2_影响面_20260101_20260113.csv"
+df = pd.read_csv(data_file)
+
+print("=" * 70)
+print("Step2: 量化影响面")
+print("=" * 70)
+
+# 1. 数据概览
+print("\n【1. 数据概览】")
+print(f"样本数: {len(df)} 条(item x 天)")
+print(f"唯一视频数: {df['vid'].nunique()}")
+print(f"日期范围: {df['dt'].min()} ~ {df['dt'].max()}")
+print(f"天数: {df['dt'].nunique()}")
+
+# 2. 定义问题item
+# 使用整体vov0中位数作为阈值
+median_vov = df['vov0'].median()
+print(f"\n【2. 问题定义】")
+print(f"vov0中位数: {median_vov:.4f}")
+print(f"问题定义: 曝光rank<=10 且 vov0<{median_vov:.4f}")
+
+# 3. 统计问题item
+top10 = df[df['曝光rank'] <= 10]
+problem_items = top10[top10['vov0'] < median_vov]
+
+print(f"\n【3. 问题item统计】")
+print(f"Top10总记录数: {len(top10)}")
+print(f"问题记录数: {len(problem_items)}")
+print(f"问题占比: {len(problem_items)/len(top10)*100:.1f}%")
+
+# 4. 按天统计
+print(f"\n【4. 按天统计问题占比】")
+daily_stats = []
+for dt in sorted(df['dt'].unique()):
+    day_top10 = df[(df['dt'] == dt) & (df['曝光rank'] <= 10)]
+    day_problem = day_top10[day_top10['vov0'] < median_vov]
+    daily_stats.append({
+        'dt': dt,
+        'top10_count': len(day_top10),
+        'problem_count': len(day_problem),
+        'problem_ratio': len(day_problem) / len(day_top10) * 100 if len(day_top10) > 0 else 0
+    })
+daily_df = pd.DataFrame(daily_stats)
+print(daily_df.to_string(index=False))
+
+# 5. 问题视频列表(去重)
+print(f"\n【5. 问题视频列表(去重)】")
+problem_vids = problem_items.groupby('vid').agg({
+    'dt': 'count',
+    'vov0': 'mean',
+    '曝光rank': 'mean',
+    'exp': 'sum',
+    '标题': 'first'
+}).reset_index()
+problem_vids.columns = ['vid', '出现天数', '平均vov0', '平均rank', '总曝光', '标题']
+problem_vids = problem_vids.sort_values('出现天数', ascending=False)
+print(f"问题视频数: {len(problem_vids)}")
+print(problem_vids.to_string(index=False))
+
+# 6. 影响的曝光量
+print(f"\n【6. 影响面量化】")
+total_exp_top10 = top10['exp'].sum()
+problem_exp = problem_items['exp'].sum()
+print(f"Top10总曝光: {total_exp_top10:,.0f}")
+print(f"问题item曝光: {problem_exp:,.0f}")
+print(f"问题曝光占比: {problem_exp/total_exp_top10*100:.1f}%")
+
+# 保存结果
+print("\n" + "=" * 70)
+print("【Step2 结论】")
+print("=" * 70)
+print(f"1. 问题占比: {len(problem_items)}/{len(top10)} ({len(problem_items)/len(top10)*100:.1f}%) 的Top10记录存在低vov高曝光")
+print(f"2. 问题视频: {len(problem_vids)} 个视频至少出现过1次问题")
+print(f"3. 影响曝光: {problem_exp:,.0f} ({problem_exp/total_exp_top10*100:.1f}% 的Top10曝光)")
+
+# 保存到文件
+output_dir = Path(__file__).parent / "output"
+
+# 保存问题视频列表
+problem_vids.to_csv(output_dir / "step2_问题视频列表.csv", index=False)
+
+# 保存每日统计
+daily_df.to_csv(output_dir / "step2_每日统计.csv", index=False)
+
+# 保存结论
+with open(output_dir / "step2_结论.txt", 'w', encoding='utf-8') as f:
+    f.write("=" * 70 + "\n")
+    f.write("Step2: 量化影响面\n")
+    f.write("=" * 70 + "\n\n")
+
+    f.write("【数据范围】\n")
+    f.write(f"日期: {df['dt'].min()} ~ {df['dt'].max()} ({df['dt'].nunique()}天)\n")
+    f.write(f"样本数: {len(df)} 条\n\n")
+
+    f.write("【问题定义】\n")
+    f.write(f"曝光rank<=10 且 vov0<{median_vov:.4f}(中位数)\n\n")
+
+    f.write("【影响面统计】\n")
+    f.write(f"问题记录占比: {len(problem_items)}/{len(top10)} = {len(problem_items)/len(top10)*100:.1f}%\n")
+    f.write(f"问题视频数: {len(problem_vids)} 个\n")
+    f.write(f"影响曝光量: {problem_exp:,.0f} ({problem_exp/total_exp_top10*100:.1f}%)\n\n")
+
+    f.write("【每日问题占比】\n")
+    f.write(daily_df.to_string(index=False))
+
+print(f"\n结果已保存到 output/ 目录")

+ 122 - 0
tmp/低vov高曝光分析/step3_分析.py

@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step3: 分析原因 - 对比模型预估值与真实值,找出偏差来源
+"""
+import pandas as pd
+from pathlib import Path
+import glob
+
+# 读取并合并数据
+data_dir = Path(__file__).parent / "output" / "step3_原因分析"
+all_files = glob.glob(str(data_dir / "*.csv"))
+df = pd.concat([pd.read_csv(f) for f in all_files], ignore_index=True)
+
+print("=" * 70)
+print("Step3: 分析原因")
+print("=" * 70)
+
+# 1. 数据概览
+print("\n【1. 数据概览】")
+print(f"样本数: {len(df)} 条")
+print(f"视频数: {df['vid'].nunique()}")
+print(f"日期范围: {df['dt'].min()} ~ {df['dt'].max()}")
+
+# 2. 计算各分项偏差
+df['str_bias'] = df['pred_str'] - df['real_str']
+df['ros_bias'] = df['pred_ros'] - df['real_ros']
+df['vor_bias'] = df['pred_vor'] - df['real_vor']
+
+# 百分比偏差(避免除0)
+df['str_bias_pct'] = df.apply(lambda x: (x['pred_str'] - x['real_str']) / x['real_str'] * 100 if x['real_str'] > 0 else None, axis=1)
+df['ros_bias_pct'] = df.apply(lambda x: (x['pred_ros'] - x['real_ros']) / x['real_ros'] * 100 if pd.notna(x['real_ros']) and x['real_ros'] > 0 else None, axis=1)
+df['vor_bias_pct'] = df.apply(lambda x: (x['pred_vor'] - x['real_vor']) / x['real_vor'] * 100 if pd.notna(x['real_vor']) and x['real_vor'] > 0 else None, axis=1)
+
+# 3. 整体偏差分析
+print("\n【2. 各分项偏差统计】")
+print(f"str 偏差: 平均 {df['str_bias'].mean():+.6f} ({df['str_bias_pct'].mean():+.1f}%)")
+print(f"ros 偏差: 平均 {df['ros_bias'].mean():+.4f} ({df['ros_bias_pct'].mean():+.1f}%)")
+print(f"vor 偏差: 平均 {df['vor_bias'].mean():+.4f} ({df['vor_bias_pct'].mean():+.1f}%)")
+
+# 4. 按视频汇总
+print("\n【3. 按视频汇总偏差】")
+vid_stats = df.groupby('vid').agg({
+    'sample_cnt': 'sum',
+    'pred_str': 'mean',
+    'real_str': 'mean',
+    'str_bias_pct': 'mean',
+    'pred_ros': 'mean',
+    'real_ros': 'mean',
+    'ros_bias_pct': 'mean',
+    'pred_vor': 'mean',
+    'real_vor': 'mean',
+    'vor_bias_pct': 'mean',
+    'real_vov': 'mean'
+}).round(4)
+vid_stats.columns = ['样本数', '预估str', '真实str', 'str偏差%', '预估ros', '真实ros', 'ros偏差%', '预估vor', '真实vor', 'vor偏差%', '真实vov']
+vid_stats = vid_stats.reset_index()
+print(vid_stats.to_string(index=False))
+
+# 5. 找出主要偏差来源
+print("\n【4. 主要偏差来源分析】")
+avg_str_bias = abs(df['str_bias_pct'].mean())
+avg_ros_bias = abs(df['ros_bias_pct'].mean())
+avg_vor_bias = abs(df['vor_bias_pct'].mean())
+
+biases = [('str', avg_str_bias), ('ros', avg_ros_bias), ('vor', avg_vor_bias)]
+biases.sort(key=lambda x: x[1], reverse=True)
+
+print(f"偏差排序: {biases[0][0]}({biases[0][1]:.1f}%) > {biases[1][0]}({biases[1][1]:.1f}%) > {biases[2][0]}({biases[2][1]:.1f}%)")
+print(f"\n主要偏差来源: {biases[0][0].upper()}")
+
+# 6. 详细分析主要偏差
+main_bias = biases[0][0]
+if main_bias == 'ros':
+    print("\n【5. ROS 偏差详细分析】")
+    print("ros 预估偏高的可能原因:")
+    print("  1. XGB 模型使用历史 ros 作为特征,但头部视频的 ros 会随时间衰减")
+    print("  2. 模型训练样本偏向高分享场景,导致对低分享场景预估偏高")
+    print("  3. 头部视频的分享用户结构变化(早期核心用户 → 后期普通用户)")
+elif main_bias == 'str':
+    print("\n【5. STR 偏差详细分析】")
+    print("str 预估偏差的可能原因:")
+    print("  1. FM 模型采样率校正有偏差")
+    print("  2. 头部视频的分享行为与训练样本分布不同")
+elif main_bias == 'vor':
+    print("\n【5. VOR 偏差详细分析】")
+    print("vor 预估偏差的可能原因:")
+    print("  1. vor 使用 24h 统计量,存在滞后性")
+    print("  2. 头部视频的裂变效率在快速变化")
+
+# 保存结果
+print("\n" + "=" * 70)
+print("【Step3 结论】")
+print("=" * 70)
+print(f"主要偏差来源: {biases[0][0].upper()} (平均偏差 {biases[0][1]:+.1f}%)")
+print(f"次要偏差来源: {biases[1][0].upper()} (平均偏差 {biases[1][1]:+.1f}%)")
+
+# 保存到文件
+output_dir = Path(__file__).parent / "output"
+
+# 保存合并后的数据
+df.to_csv(output_dir / "step3_预估vs真实_合并.csv", index=False)
+
+# 保存视频汇总
+vid_stats.to_csv(output_dir / "step3_按视频汇总.csv", index=False)
+
+# 保存结论
+with open(output_dir / "step3_结论.txt", 'w', encoding='utf-8') as f:
+    f.write("=" * 70 + "\n")
+    f.write("Step3: 分析原因\n")
+    f.write("=" * 70 + "\n\n")
+
+    f.write("【各分项偏差统计】\n")
+    f.write(f"str 偏差: 平均 {df['str_bias'].mean():+.6f} ({df['str_bias_pct'].mean():+.1f}%)\n")
+    f.write(f"ros 偏差: 平均 {df['ros_bias'].mean():+.4f} ({df['ros_bias_pct'].mean():+.1f}%)\n")
+    f.write(f"vor 偏差: 平均 {df['vor_bias'].mean():+.4f} ({df['vor_bias_pct'].mean():+.1f}%)\n\n")
+
+    f.write("【主要偏差来源】\n")
+    f.write(f"排序: {biases[0][0]}({biases[0][1]:.1f}%) > {biases[1][0]}({biases[1][1]:.1f}%) > {biases[2][0]}({biases[2][1]:.1f}%)\n")
+    f.write(f"结论: {biases[0][0].upper()} 是主要偏差来源\n")
+
+print(f"\n结果已保存到 output/ 目录")

+ 91 - 0
tmp/低vov高曝光分析/step3b_分析.py

@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step3b: 分析整体预估偏差(所有头部视频)
+"""
+import pandas as pd
+from pathlib import Path
+import glob
+
+# 读取并合并数据
+data_dir = Path(__file__).parent / "output" / "step3b_整体偏差"
+all_files = glob.glob(str(data_dir / "*.csv"))
+df = pd.concat([pd.read_csv(f) for f in all_files], ignore_index=True)
+
+print("=" * 70)
+print("Step3b: 整体预估偏差分析")
+print("=" * 70)
+
+# 1. 数据概览
+print("\n【1. 数据概览】")
+print(f"样本数: {len(df)} 条")
+print(f"视频数: {df['vid'].nunique()}")
+print(f"日期范围: {df['dt'].min()} ~ {df['dt'].max()}")
+
+# 2. 过滤有效数据(real_ros 不为空)
+df_valid = df[df['real_ros'].notna() & (df['real_ros'] > 0)].copy()
+print(f"有效样本: {len(df_valid)} 条(real_ros > 0)")
+
+# 3. 计算偏差
+df_valid['str_bias'] = df_valid['pred_str'] - df_valid['real_str']
+df_valid['ros_bias'] = df_valid['pred_ros'] - df_valid['real_ros']
+df_valid['vor_bias'] = df_valid['pred_vor'] - df_valid['real_vor']
+
+df_valid['str_bias_pct'] = (df_valid['pred_str'] - df_valid['real_str']) / df_valid['real_str'] * 100
+df_valid['ros_bias_pct'] = (df_valid['pred_ros'] - df_valid['real_ros']) / df_valid['real_ros'] * 100
+df_valid['vor_bias_pct'] = (df_valid['pred_vor'] - df_valid['real_vor']) / df_valid['real_vor'] * 100
+
+# 4. 整体偏差统计
+print("\n【2. 整体偏差统计】")
+print(f"STR: 预估均值={df_valid['pred_str'].mean():.6f}, 真实均值={df_valid['real_str'].mean():.6f}, 偏差={df_valid['str_bias_pct'].mean():+.1f}%")
+print(f"ROS: 预估均值={df_valid['pred_ros'].mean():.4f}, 真实均值={df_valid['real_ros'].mean():.4f}, 偏差={df_valid['ros_bias_pct'].mean():+.1f}%")
+print(f"VOR: 预估均值={df_valid['pred_vor'].mean():.4f}, 真实均值={df_valid['real_vor'].mean():.4f}, 偏差={df_valid['vor_bias_pct'].mean():+.1f}%")
+
+# 5. 偏差分布
+print("\n【3. 偏差分布】")
+for col, name in [('str_bias_pct', 'STR'), ('ros_bias_pct', 'ROS'), ('vor_bias_pct', 'VOR')]:
+    high = (df_valid[col] > 0).sum()
+    low = (df_valid[col] < 0).sum()
+    print(f"{name}: 偏高{high}个({high/len(df_valid)*100:.1f}%), 偏低{low}个({low/len(df_valid)*100:.1f}%)")
+
+# 6. 按曝光量分组看偏差
+print("\n【4. 按曝光量分组看偏差】")
+df_valid['exp_group'] = pd.qcut(df_valid['sample_cnt'], q=4, labels=['低曝光', '中低曝光', '中高曝光', '高曝光'])
+group_stats = df_valid.groupby('exp_group', observed=True).agg({
+    'str_bias_pct': 'mean',
+    'ros_bias_pct': 'mean',
+    'vor_bias_pct': 'mean',
+    'sample_cnt': ['mean', 'count']
+}).round(2)
+group_stats.columns = ['STR偏差%', 'ROS偏差%', 'VOR偏差%', '平均样本', '视频数']
+print(group_stats.to_string())
+
+# 7. 结论
+print("\n" + "=" * 70)
+print("【Step3b 结论】")
+print("=" * 70)
+
+str_bias = df_valid['str_bias_pct'].mean()
+ros_bias = df_valid['ros_bias_pct'].mean()
+vor_bias = df_valid['vor_bias_pct'].mean()
+
+print(f"整体偏差情况:")
+print(f"  STR: {str_bias:+.1f}% {'(偏高)' if str_bias > 10 else '(偏低)' if str_bias < -10 else '(正常)'}")
+print(f"  ROS: {ros_bias:+.1f}% {'(偏高)' if ros_bias > 10 else '(偏低)' if ros_bias < -10 else '(正常)'}")
+print(f"  VOR: {vor_bias:+.1f}% {'(偏高)' if vor_bias > 10 else '(偏低)' if vor_bias < -10 else '(正常)'}")
+
+# 保存结果
+output_dir = Path(__file__).parent / "output"
+df_valid.to_csv(output_dir / "step3b_整体偏差_合并.csv", index=False)
+
+with open(output_dir / "step3b_结论.txt", 'w', encoding='utf-8') as f:
+    f.write("=" * 70 + "\n")
+    f.write("Step3b: 整体预估偏差分析\n")
+    f.write("=" * 70 + "\n\n")
+    f.write(f"样本: {len(df_valid)} 条, {df_valid['vid'].nunique()} 个视频\n\n")
+    f.write("【整体偏差】\n")
+    f.write(f"STR: {str_bias:+.1f}%\n")
+    f.write(f"ROS: {ros_bias:+.1f}%\n")
+    f.write(f"VOR: {vor_bias:+.1f}%\n")
+
+print(f"\n结果已保存到 output/ 目录")

+ 93 - 0
tmp/低vov高曝光分析/step3c_分析.py

@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step3c: 深入分析 ROS 偏高原因
+"""
+import pandas as pd
+from pathlib import Path
+
+# 读取数据
+data_file = Path(__file__).parent / "output" / "step3b_整体偏差_合并.csv"
+df = pd.read_csv(data_file)
+
+print("=" * 70)
+print("Step3c: ROS 偏高原因深入分析")
+print("=" * 70)
+
+# 1. ROS 偏差与样本量的关系
+print("\n【1. ROS 偏差与样本量(曝光量)的关系】")
+df['sample_group'] = pd.cut(df['sample_cnt'], bins=[0, 150, 300, 500, 10000],
+                            labels=['<150', '150-300', '300-500', '>500'])
+group_ros = df.groupby('sample_group', observed=True).agg({
+    'ros_bias_pct': ['mean', 'std'],
+    'pred_ros': 'mean',
+    'real_ros': 'mean',
+    'sample_cnt': 'count'
+}).round(2)
+group_ros.columns = ['ROS偏差%', '偏差标准差', '预估ROS', '真实ROS', '视频数']
+print(group_ros.to_string())
+
+# 2. ROS 偏差与真实 STR 的关系(分享率低的视频 ROS 是否更不准)
+print("\n【2. ROS 偏差与真实分享率(STR)的关系】")
+df['str_group'] = pd.cut(df['real_str'], bins=[0, 0.005, 0.01, 0.02, 1],
+                         labels=['<0.5%', '0.5-1%', '1-2%', '>2%'])
+group_str = df.groupby('str_group', observed=True).agg({
+    'ros_bias_pct': 'mean',
+    'pred_ros': 'mean',
+    'real_ros': 'mean',
+    'sample_cnt': 'count'
+}).round(2)
+group_str.columns = ['ROS偏差%', '预估ROS', '真实ROS', '视频数']
+print(group_str.to_string())
+
+# 3. ROS 偏差与真实 ROS 的关系
+print("\n【3. ROS 偏差与真实ROS的关系】")
+df['real_ros_group'] = pd.cut(df['real_ros'], bins=[0, 2, 4, 6, 100],
+                              labels=['<2', '2-4', '4-6', '>6'])
+group_real_ros = df.groupby('real_ros_group', observed=True).agg({
+    'ros_bias_pct': 'mean',
+    'pred_ros': 'mean',
+    'real_ros': 'mean',
+    'sample_cnt': 'count'
+}).round(2)
+group_real_ros.columns = ['ROS偏差%', '预估ROS', '真实ROS', '视频数']
+print(group_real_ros.to_string())
+
+# 4. 相关性分析
+print("\n【4. 相关性分析】")
+print(f"ROS偏差 vs 样本量: r = {df['ros_bias_pct'].corr(df['sample_cnt']):.3f}")
+print(f"ROS偏差 vs 真实STR: r = {df['ros_bias_pct'].corr(df['real_str']):.3f}")
+print(f"ROS偏差 vs 真实ROS: r = {df['ros_bias_pct'].corr(df['real_ros']):.3f}")
+print(f"ROS偏差 vs 真实VOV: r = {df['ros_bias_pct'].corr(df['real_vov']):.3f}")
+
+# 5. 结论
+print("\n" + "=" * 70)
+print("【Step3c 结论】")
+print("=" * 70)
+
+# 找出规律
+low_sample = df[df['sample_cnt'] < 150]['ros_bias_pct'].mean()
+high_sample = df[df['sample_cnt'] > 500]['ros_bias_pct'].mean()
+print(f"低曝光组 ROS 偏差: {low_sample:+.1f}%")
+print(f"高曝光组 ROS 偏差: {high_sample:+.1f}%")
+
+if low_sample > high_sample:
+    print("\n规律: 曝光量越低,ROS 预估偏差越大")
+    print("原因推测: 低曝光视频的分享样本少,ROS 真实值波动大,模型难以准确预估")
+
+# 保存结论
+output_dir = Path(__file__).parent / "output"
+with open(output_dir / "step3c_结论.txt", 'w', encoding='utf-8') as f:
+    f.write("=" * 70 + "\n")
+    f.write("Step3c: ROS 偏高原因分析\n")
+    f.write("=" * 70 + "\n\n")
+    f.write("【发现】\n")
+    f.write(f"1. 低曝光组 ROS 偏差: {low_sample:+.1f}%\n")
+    f.write(f"2. 高曝光组 ROS 偏差: {high_sample:+.1f}%\n")
+    f.write(f"3. 曝光量越低,ROS 预估偏差越大\n\n")
+    f.write("【原因推测】\n")
+    f.write("1. XGB 模型整体对 ROS 预估偏高(系统性偏差)\n")
+    f.write("2. 低曝光视频分享样本少,真实 ROS 波动大\n")
+    f.write("3. 模型可能使用了全局平均 ROS 作为先验,导致预估值被拉高\n")
+
+print(f"\n结果已保存到 output/ 目录")

+ 82 - 0
tmp/低vov高曝光分析/step3d_分析.py

@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step3d: 分析 ROS 与推荐天数的关系
+"""
+import pandas as pd
+from pathlib import Path
+
+# 读取 step2 数据(有推荐天数间隔字段)
+data_file = Path(__file__).parent / "output" / "step2_影响面_20260101_20260113.csv"
+df = pd.read_csv(data_file)
+
+print("=" * 70)
+print("Step3d: ROS 与推荐天数的关系")
+print("=" * 70)
+
+# 1. 数据概览
+print("\n【1. 数据概览】")
+print(f"样本数: {len(df)} 条")
+print(f"推荐天数范围: {df['推荐天数间隔'].min()} ~ {df['推荐天数间隔'].max()}")
+
+# 2. 按推荐天数分组看 ros
+print("\n【2. 按推荐天数分组看 ROS】")
+df['天数分组'] = pd.cut(df['推荐天数间隔'], bins=[-1, 3, 7, 14, 30, 1000],
+                       labels=['0-3天', '4-7天', '8-14天', '15-30天', '>30天'])
+group_day = df.groupby('天数分组', observed=True).agg({
+    'ros_t0': ['mean', 'std'],
+    'vov0': 'mean',
+    'exp': 'sum',
+    'vid': 'count'
+}).round(4)
+group_day.columns = ['ROS均值', 'ROS标准差', 'VOV均值', '总曝光', '视频数']
+print(group_day.to_string())
+
+# 3. 问题视频的推荐天数分布
+print("\n【3. 问题视频的推荐天数分布】")
+# 定义问题视频(vov0 < 中位数 且 曝光rank <= 10)
+median_vov = df['vov0'].median()
+problem_df = df[(df['曝光rank'] <= 10) & (df['vov0'] < median_vov)]
+print(f"问题视频数: {len(problem_df)}")
+
+if len(problem_df) > 0:
+    problem_day = problem_df.groupby('天数分组', observed=True).agg({
+        'ros_t0': 'mean',
+        'vov0': 'mean',
+        'vid': 'count'
+    }).round(4)
+    problem_day.columns = ['ROS均值', 'VOV均值', '视频数']
+    print(problem_day.to_string())
+
+# 4. 对比新视频 vs 老视频
+print("\n【4. 新视频 vs 老视频对比】")
+new_videos = df[df['推荐天数间隔'] <= 7]
+old_videos = df[df['推荐天数间隔'] > 30]
+print(f"新视频(<=7天): ROS={new_videos['ros_t0'].mean():.4f}, VOV={new_videos['vov0'].mean():.4f}, 数量={len(new_videos)}")
+print(f"老视频(>30天): ROS={old_videos['ros_t0'].mean():.4f}, VOV={old_videos['vov0'].mean():.4f}, 数量={len(old_videos)}")
+
+# 5. 结论
+print("\n" + "=" * 70)
+print("【Step3d 结论】")
+print("=" * 70)
+new_ros = new_videos['ros_t0'].mean()
+old_ros = old_videos['ros_t0'].mean()
+if new_ros > old_ros:
+    print(f"新视频 ROS ({new_ros:.4f}) > 老视频 ROS ({old_ros:.4f})")
+    print(f"ROS 随推荐天数增加而下降 (衰减 {(1-old_ros/new_ros)*100:.1f}%)")
+else:
+    print(f"新视频 ROS ({new_ros:.4f}) <= 老视频 ROS ({old_ros:.4f})")
+    print("ROS 没有随推荐天数明显衰减")
+
+# 保存结论
+output_dir = Path(__file__).parent / "output"
+with open(output_dir / "step3d_结论.txt", 'w', encoding='utf-8') as f:
+    f.write("=" * 70 + "\n")
+    f.write("Step3d: ROS 与推荐天数的关系\n")
+    f.write("=" * 70 + "\n\n")
+    f.write(f"新视频(<=7天): ROS={new_ros:.4f}\n")
+    f.write(f"老视频(>30天): ROS={old_ros:.4f}\n")
+    if new_ros > old_ros:
+        f.write(f"结论: ROS 随推荐天数增加而下降\n")
+
+print(f"\n结果已保存到 output/ 目录")

+ 80 - 0
tmp/低vov高曝光分析/step3e_copc分析.py

@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step3e: COPC 校准分析
+看预估 ROS 在不同分段下的真实 ROS(校准曲线)
+"""
+import pandas as pd
+from pathlib import Path
+
+# 读取数据
+data_file = Path(__file__).parent / "output" / "step3b_整体偏差_合并.csv"
+df = pd.read_csv(data_file)
+
+print("=" * 70)
+print("Step3e: COPC 校准分析")
+print("=" * 70)
+
+# 1. 按预估 ROS 分桶,看真实 ROS
+print("\n【1. ROS 校准曲线(按预估值分桶)】")
+df['pred_ros_bucket'] = pd.cut(df['pred_ros'], bins=[0, 2, 3, 4, 5, 6, 100],
+                               labels=['0-2', '2-3', '3-4', '4-5', '5-6', '>6'])
+copc = df.groupby('pred_ros_bucket', observed=True).agg({
+    'pred_ros': 'mean',
+    'real_ros': 'mean',
+    'vid': 'count'
+}).round(4)
+copc.columns = ['预估ROS均值', '真实ROS均值', '样本数']
+copc['COPC'] = (copc['真实ROS均值'] / copc['预估ROS均值']).round(4)
+print(copc.to_string())
+
+# 2. 计算整体 COPC
+overall_copc = df['real_ros'].mean() / df['pred_ros'].mean()
+print(f"\n整体 COPC = {overall_copc:.4f}")
+print(f"(理想值为 1.0,<1 表示预估偏高,>1 表示预估偏低)")
+
+# 3. 按预估 STR 分桶
+print("\n【2. STR 校准曲线(按预估值分桶)】")
+df['pred_str_bucket'] = pd.cut(df['pred_str'], bins=[0, 0.005, 0.008, 0.01, 0.015, 1],
+                               labels=['0-0.5%', '0.5-0.8%', '0.8-1%', '1-1.5%', '>1.5%'])
+copc_str = df.groupby('pred_str_bucket', observed=True).agg({
+    'pred_str': 'mean',
+    'real_str': 'mean',
+    'vid': 'count'
+}).round(6)
+copc_str.columns = ['预估STR均值', '真实STR均值', '样本数']
+copc_str['COPC'] = (copc_str['真实STR均值'] / copc_str['预估STR均值']).round(4)
+print(copc_str.to_string())
+
+overall_copc_str = df['real_str'].mean() / df['pred_str'].mean()
+print(f"\nSTR 整体 COPC = {overall_copc_str:.4f}")
+
+# 4. 结论
+print("\n" + "=" * 70)
+print("【Step3e 结论】")
+print("=" * 70)
+print(f"ROS COPC = {overall_copc:.4f}(预估偏高 {(1-overall_copc)*100:.1f}%)")
+print(f"STR COPC = {overall_copc_str:.4f}(预估偏低 {(overall_copc_str-1)*100:.1f}%)")
+
+print("\nROS 校准问题:")
+for idx, row in copc.iterrows():
+    if row['COPC'] < 0.8:
+        print(f"  - 预估 {idx}: COPC={row['COPC']:.2f},严重偏高")
+    elif row['COPC'] > 1.2:
+        print(f"  - 预估 {idx}: COPC={row['COPC']:.2f},严重偏低")
+
+# 保存结果
+output_dir = Path(__file__).parent / "output"
+copc.to_csv(output_dir / "step3e_ros_copc.csv")
+copc_str.to_csv(output_dir / "step3e_str_copc.csv")
+
+with open(output_dir / "step3e_结论.txt", 'w', encoding='utf-8') as f:
+    f.write("=" * 70 + "\n")
+    f.write("Step3e: COPC 校准分析\n")
+    f.write("=" * 70 + "\n\n")
+    f.write(f"ROS 整体 COPC = {overall_copc:.4f}\n")
+    f.write(f"STR 整体 COPC = {overall_copc_str:.4f}\n\n")
+    f.write("ROS 分桶 COPC:\n")
+    f.write(copc.to_string())
+
+print(f"\n结果已保存到 output/ 目录")

+ 89 - 0
tmp/低vov高曝光分析/step3f_copc详细.py

@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step3f: COPC 详细分析 - 统一口径
+"""
+import pandas as pd
+from pathlib import Path
+
+# 读取数据
+data_file = Path(__file__).parent / "output" / "step3b_整体偏差_合并.csv"
+df = pd.read_csv(data_file)
+
+print("=" * 70)
+print("Step3f: COPC 详细分析(统一口径)")
+print("=" * 70)
+
+# 1. 不同口径的 COPC 计算
+print("\n【1. 不同口径的偏差计算】")
+
+# 口径1: mean(real) / mean(pred) - 整体 COPC
+copc1_ros = df['real_ros'].mean() / df['pred_ros'].mean()
+copc1_str = df['real_str'].mean() / df['pred_str'].mean()
+
+# 口径2: mean(pred - real) / mean(real) - 整体偏差率
+bias2_ros = (df['pred_ros'].mean() - df['real_ros'].mean()) / df['real_ros'].mean()
+bias2_str = (df['pred_str'].mean() - df['real_str'].mean()) / df['real_str'].mean()
+
+# 口径3: mean((pred - real) / real) - 样本偏差率平均(之前用的)
+bias3_ros = df['ros_bias_pct'].mean() / 100
+bias3_str = df['str_bias_pct'].mean() / 100
+
+print("ROS:")
+print(f"  口径1 COPC = {copc1_ros:.4f} (real均值/pred均值)")
+print(f"  口径2 偏差 = {bias2_ros:+.4f} ((pred均值-real均值)/real均值)")
+print(f"  口径3 偏差 = {bias3_ros:+.4f} (样本偏差率平均)")
+
+print("\nSTR:")
+print(f"  口径1 COPC = {copc1_str:.4f}")
+print(f"  口径2 偏差 = {bias2_str:+.4f}")
+print(f"  口径3 偏差 = {bias3_str:+.4f}")
+
+# 2. 分析口径差异原因
+print("\n【2. 口径差异原因分析】")
+print("口径2和口径3的差异来源于:")
+print("  - 口径2: 用整体均值计算,大样本权重高")
+print("  - 口径3: 每个样本权重相同,对小样本/极端值更敏感")
+
+# 3. 加权 COPC(按样本量加权)
+print("\n【3. 按样本量加权的分析】")
+total_samples = df['sample_cnt'].sum()
+weighted_pred_ros = (df['pred_ros'] * df['sample_cnt']).sum() / total_samples
+weighted_real_ros = (df['real_ros'] * df['sample_cnt']).sum() / total_samples
+weighted_copc_ros = weighted_real_ros / weighted_pred_ros
+
+weighted_pred_str = (df['pred_str'] * df['sample_cnt']).sum() / total_samples
+weighted_real_str = (df['real_str'] * df['sample_cnt']).sum() / total_samples
+weighted_copc_str = weighted_real_str / weighted_pred_str
+
+print(f"ROS 加权COPC = {weighted_copc_ros:.4f}")
+print(f"STR 加权COPC = {weighted_copc_str:.4f}")
+
+# 4. 结论
+print("\n" + "=" * 70)
+print("【Step3f 结论】")
+print("=" * 70)
+print("口径选择建议:")
+print("  - 评估整体效果用 COPC(口径1)")
+print("  - 评估单个视频准确度用样本偏差(口径3)")
+print(f"\n整体结论:")
+print(f"  - ROS COPC={copc1_ros:.2f},整体偏高 {(1-copc1_ros)*100:.0f}%")
+print(f"  - ROS 样本偏差={bias3_ros*100:+.0f}%,单个视频预估普遍偏高")
+print(f"  - 差异原因:极端值拉高了样本偏差")
+
+# 保存结论
+output_dir = Path(__file__).parent / "output"
+with open(output_dir / "step3f_结论.txt", 'w', encoding='utf-8') as f:
+    f.write("=" * 70 + "\n")
+    f.write("Step3f: COPC 详细分析\n")
+    f.write("=" * 70 + "\n\n")
+    f.write("【ROS】\n")
+    f.write(f"COPC = {copc1_ros:.4f}\n")
+    f.write(f"整体偏差 = {bias2_ros:+.4f}\n")
+    f.write(f"样本偏差 = {bias3_ros:+.4f}\n\n")
+    f.write("【STR】\n")
+    f.write(f"COPC = {copc1_str:.4f}\n")
+    f.write(f"整体偏差 = {bias2_str:+.4f}\n")
+    f.write(f"样本偏差 = {bias3_str:+.4f}\n")
+
+print(f"\n结果已保存到 output/ 目录")

+ 134 - 0
tmp/低vov高曝光分析/step4_建议.md

@@ -0,0 +1,134 @@
+# 低 VoV 高曝光问题分析报告
+
+## 一、问题概述
+
+**现象**:部分头部视频 vov 低,但曝光量很大
+
+**排序公式**:`score = str × ros × vor`
+- str: FM 模型预估
+- ros: XGB 模型预估
+- vor: 24h 统计量
+
+## 二、数据验证结果
+
+### Step1: 现象验证 ✓
+- **结论**:现象存在
+- **数据**:43.5% 的 Top10 记录存在低vov高曝光问题
+
+### Step2: 影响面量化 ✓
+| 指标 | 数值 |
+|------|------|
+| 问题记录占比 | 38.1% (43/113) |
+| 问题视频数 | 21 个 |
+| 影响曝光量 | 4164万 (35.7%) |
+
+### Step3: 原因分析 ✓
+| 分项 | 整体偏差 | 偏高比例 | 结论 |
+|------|----------|----------|------|
+| STR | -5.7% | 36.4% | 正常 |
+| **ROS** | **+72.2%** | **70.1%** | **系统性偏高** |
+| VOR | 混合 | 44.8% | 有异常值 |
+
+## 三、根因分析
+
+### 核心问题:XGB 模型对 ROS 预估存在系统性偏差
+
+### 发现1: ROS 偏差与真实 ROS 强负相关 (r = -0.607)
+| 真实 ROS | 预估偏差 | 说明 |
+|----------|----------|------|
+| < 2 | **+152%** | 严重偏高 |
+| 2-4 | +20% | 轻微偏高 |
+| 4-6 | -27% | 偏低 |
+| > 6 | -54% | 严重偏低 |
+
+**结论**:模型存在"回归均值"问题,对极端值预估不准
+
+### 发现2: ROS 偏差与分享率(STR)负相关
+| 真实 STR | ROS 偏差 |
+|----------|----------|
+| < 0.5% | +115% |
+| 0.5-1% | +113% |
+| 1-2% | +60% |
+| > 2% | +16% |
+
+**结论**:分享率低的视频,ROS 预估偏差更大
+
+### 发现3: ROS/VOV 随推荐天数衰减
+| 视频类型 | ROS | VOV |
+|----------|-----|-----|
+| 新视频(≤7天) | 1.08 | 0.39 |
+| 老视频(>30天) | 0.98 | 0.25 |
+| **衰减** | **-8.7%** | **-35%** |
+
+**结论**:VOV 衰减比 ROS 更明显,老视频问题更严重
+
+### 典型案例
+| vid | 预估ros | 真实ros | 偏差 |
+|-----|---------|---------|------|
+| 63535473 | 5.99 | 3.11 | +218% |
+| 62421458 | 4.86 | 3.34 | +154% |
+| 55931081 | 6.79 | 4.90 | +112% |
+
+## 四、问题根因总结
+
+```
+问题链路:
+1. XGB 模型对 ROS 整体偏高 72.2%
+2. 真实 ROS 越低,预估偏差越大(回归均值问题)
+3. 分享率低的视频,ROS 预估更不准
+4. 老视频的 VOV 衰减 35%,但模型没有及时感知
+   ↓
+结果: 低 VOV 视频获得高曝光
+```
+
+## 五、解决建议
+
+### 短期方案(快速生效)
+1. **ROS 模型校准**
+   - 识别真实 ROS < 2 的视频,对其预估 ROS 增加衰减系数
+   - 或增加 ROS 置信度阈值
+
+2. **曝光频控**
+   - 对推荐天数 > 30 天的视频增加曝光限制
+   - 避免老视频长期霸榜
+
+### 中期方案
+3. **ROS 模型改进**
+   - 解决"回归均值"问题:对极端值使用专门的预估策略
+   - 增加分享率分层:低 STR 和高 STR 使用不同的 ROS 预估模型
+   - 增加时效性特征:近 6h ros vs 24h ros
+
+4. **在线学习**
+   - 使用实时反馈更新 ros 预估
+   - 减少模型滞后性
+
+### 长期方案
+5. **端到端 VoV 预估**
+   - 直接预估 vov,而非 str × ros × vor
+   - 避免三个模型误差累积
+
+6. **Exploration 机制**
+   - 对推荐天数 > 14 天的视频增加探索比例
+   - 及时发现效率下降
+
+## 六、监控建议
+
+建议增加以下监控指标:
+1. Top10 视频的 ros 预估 vs 真实 偏差率
+2. 按真实 ROS 分组的预估偏差(监控回归均值问题)
+3. 按推荐天数分组的 VOV 趋势(监控衰减)
+4. 单视频连续进入 Top10 的天数
+
+## 七、文件清单
+
+```
+tasks/承接/低vov高曝光分析/
+├── step1_验证现象.sql / step1_分析.py
+├── step2_影响面.sql / step2_分析.py
+├── step3_原因分析.sql / step3_分析.py
+├── step3b_整体偏差.sql / step3b_分析.py
+├── step3c_分析.py (ROS偏差深入分析)
+├── step3d_分析.py (ROS时间衰减分析)
+├── step4_建议.md (本报告)
+└── output/ (所有中间数据)
+```

+ 94 - 0
tmp/低vov高曝光分析/step5_分析.py

@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step5: 分析问题从什么时候开始 - ROS COPC 时间趋势
+"""
+import pandas as pd
+from pathlib import Path
+import glob
+
+# 读取并合并数据
+data_dir = Path(__file__).parent / "output" / "step5_时间趋势"
+all_files = glob.glob(str(data_dir / "*.csv"))
+df = pd.concat([pd.read_csv(f) for f in all_files], ignore_index=True)
+df = df.sort_values('dt')
+
+print("=" * 70)
+print("Step5: ROS COPC 时间趋势分析")
+print("=" * 70)
+
+# 1. 数据概览
+print("\n【1. 数据概览】")
+print(f"日期范围: {df['dt'].min()} ~ {df['dt'].max()}")
+print(f"天数: {len(df)}")
+
+# 2. 时间趋势
+print("\n【2. ROS COPC 时间趋势】")
+print(f"{'日期':<12} {'预估ROS':>10} {'真实ROS':>10} {'COPC':>8} {'偏差':>10}")
+print("-" * 55)
+for _, row in df.iterrows():
+    bias = (1 - row['ros_copc']) * 100 if pd.notna(row['ros_copc']) else 0
+    copc = row['ros_copc'] if pd.notna(row['ros_copc']) else 0
+    print(f"{int(row['dt']):<12} {row['pred_ros']:>10.4f} {row['real_ros']:>10.4f} {copc:>8.4f} {bias:>+9.1f}%")
+
+# 3. 分段统计
+print("\n【3. 分段统计】")
+df['week'] = pd.to_datetime(df['dt'].astype(str)).dt.isocalendar().week
+weekly = df.groupby('week').agg({
+    'pred_ros': 'mean',
+    'real_ros': 'mean',
+    'ros_copc': 'mean'
+}).round(4)
+weekly['偏差%'] = ((1 - weekly['ros_copc']) * 100).round(1)
+print(weekly.to_string())
+
+# 4. 找拐点
+print("\n【4. 趋势分析】")
+df['bias_pct'] = (1 - df['ros_copc']) * 100
+
+# 计算滚动平均
+df['bias_ma3'] = df['bias_pct'].rolling(3, min_periods=1).mean()
+
+# 找最大偏差时间点
+max_bias_idx = df['bias_pct'].idxmax()
+max_bias_date = df.loc[max_bias_idx, 'dt']
+max_bias_val = df.loc[max_bias_idx, 'bias_pct']
+
+min_bias_idx = df['bias_pct'].idxmin()
+min_bias_date = df.loc[min_bias_idx, 'dt']
+min_bias_val = df.loc[min_bias_idx, 'bias_pct']
+
+print(f"最大偏差: {int(max_bias_date)} ({max_bias_val:+.1f}%)")
+print(f"最小偏差: {int(min_bias_date)} ({min_bias_val:+.1f}%)")
+
+# 前后对比
+first_week = df.head(7)['bias_pct'].mean()
+last_week = df.tail(7)['bias_pct'].mean()
+print(f"\n前7天平均偏差: {first_week:+.1f}%")
+print(f"后7天平均偏差: {last_week:+.1f}%")
+
+# 5. 结论
+print("\n" + "=" * 70)
+print("【Step5 结论】")
+print("=" * 70)
+if last_week > first_week + 5:
+    print(f"问题在恶化: 偏差从 {first_week:+.1f}% 上升到 {last_week:+.1f}%")
+elif last_week < first_week - 5:
+    print(f"问题在改善: 偏差从 {first_week:+.1f}% 下降到 {last_week:+.1f}%")
+else:
+    print(f"问题一直存在: 偏差稳定在 {df['bias_pct'].mean():+.1f}% 左右")
+
+# 保存结果
+output_dir = Path(__file__).parent / "output"
+df.to_csv(output_dir / "step5_时间趋势_合并.csv", index=False)
+
+with open(output_dir / "step5_结论.txt", 'w', encoding='utf-8') as f:
+    f.write("=" * 70 + "\n")
+    f.write("Step5: ROS COPC 时间趋势\n")
+    f.write("=" * 70 + "\n\n")
+    f.write(f"日期范围: {df['dt'].min()} ~ {df['dt'].max()}\n")
+    f.write(f"平均偏差: {df['bias_pct'].mean():+.1f}%\n")
+    f.write(f"前7天: {first_week:+.1f}%\n")
+    f.write(f"后7天: {last_week:+.1f}%\n")
+
+print(f"\n结果已保存到 output/ 目录")

+ 93 - 0
tmp/低vov高曝光分析/step6_历史抽样.py

@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step6: 历史抽样分析 - 问题是否一直存在
+"""
+import pandas as pd
+from pathlib import Path
+import glob
+
+output_dir = Path(__file__).parent / "output"
+
+# 定义时间段
+periods = [
+    ("2025年3月", "step1_验证现象_20250301_20250303.csv"),
+    ("2025年5月", "step1_验证现象_20250501_20250503.csv"),
+    ("2025年7月", "step1_验证现象_20250701_20250703.csv"),
+    ("2025年9月", "step1_验证现象_20250901_20250903.csv"),
+    ("2025年10月", "step1_验证现象_20251001_20251003.csv"),
+    ("2026年1月", "step1_验证现象_20260107_20260113.csv"),
+]
+
+print("=" * 70)
+print("Step6: 历史抽样分析 - 问题是否一直存在")
+print("=" * 70)
+
+# 分析每个时间段
+results = []
+for period_name, filename in periods:
+    filepath = output_dir / filename
+    if not filepath.exists():
+        print(f"\n{period_name}: 文件不存在")
+        continue
+
+    df = pd.read_csv(filepath)
+
+    # 只看 Top10
+    df_top10 = df[df['曝光rank'] <= 10].copy()
+
+    if len(df_top10) == 0:
+        print(f"\n{period_name}: 无 Top10 数据")
+        continue
+
+    # 计算低vov高曝光比例(vov < 中位数 且 rank <= 5)
+    vov_median = df_top10['vov0'].median()
+    problem_mask = (df_top10['vov0'] < vov_median) & (df_top10['曝光rank'] <= 5)
+    problem_ratio = problem_mask.sum() / len(df_top10) * 100
+
+    # 统计
+    stats = {
+        '时间段': period_name,
+        '天数': df_top10['dt'].nunique(),
+        'Top10记录数': len(df_top10),
+        'vov中位数': round(df_top10['vov0'].median(), 4),
+        'vov均值': round(df_top10['vov0'].mean(), 4),
+        'str均值': round(df_top10['str_t0'].mean(), 4),
+        'ros均值': round(df_top10['ros_t0'].mean(), 2),
+        'vor均值': round(df_top10['vor_t0'].mean(), 2),
+        '问题比例%': round(problem_ratio, 1)
+    }
+    results.append(stats)
+
+    print(f"\n【{period_name}】")
+    print(f"  天数: {stats['天数']}, 记录数: {stats['Top10记录数']}")
+    print(f"  vov: 中位数={stats['vov中位数']}, 均值={stats['vov均值']}")
+    print(f"  str={stats['str均值']}, ros={stats['ros均值']}, vor={stats['vor均值']}")
+    print(f"  低vov高曝光比例: {stats['问题比例%']}%")
+
+# 汇总对比
+print("\n" + "=" * 70)
+print("【时间段对比】")
+print("=" * 70)
+if results:
+    result_df = pd.DataFrame(results)
+    print(result_df.to_string(index=False))
+
+    # 保存结果
+    result_df.to_csv(output_dir / "step6_历史对比.csv", index=False)
+
+# 结论
+print("\n" + "=" * 70)
+print("【Step6 结论】")
+print("=" * 70)
+if len(results) >= 2:
+    first = results[0]
+    last = results[-1]
+    print(f"问题比例变化: {first['时间段']} {first['问题比例%']}% → {last['时间段']} {last['问题比例%']}%")
+
+    vov_change = (last['vov均值'] - first['vov均值']) / first['vov均值'] * 100
+    ros_change = (last['ros均值'] - first['ros均值']) / first['ros均值'] * 100
+    print(f"vov 变化: {vov_change:+.1f}%")
+    print(f"ros 变化: {ros_change:+.1f}%")
+
+print(f"\n结果已保存到 output/ 目录")

+ 130 - 0
tmp/低vov高曝光分析/step7_分析.py

@@ -0,0 +1,130 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step7: 头部视频 vov 时间趋势分析 - 找出问题开始时间
+"""
+import pandas as pd
+from pathlib import Path
+import glob
+
+# 读取并合并数据
+data_dir = Path(__file__).parent / "output" / "step7_头部vov趋势"
+all_files = glob.glob(str(data_dir / "*.csv"))
+df = pd.concat([pd.read_csv(f) for f in all_files], ignore_index=True)
+df = df.sort_values('dt').reset_index(drop=True)
+
+print("=" * 70)
+print("Step7: 头部视频(Top10曝光) vov 时间趋势分析")
+print("=" * 70)
+
+# 1. 数据概览
+print("\n【1. 数据概览】")
+print(f"日期范围: {df['dt'].min()} ~ {df['dt'].max()}")
+print(f"天数: {len(df)}")
+
+# 2. 按月统计
+print("\n【2. 按月统计】")
+df['month'] = df['dt'].astype(str).str[:6]
+monthly = df.groupby('month').agg({
+    'vov_mean': 'mean',
+    'vov_median': 'mean',
+    'str_mean': 'mean',
+    'ros_mean': 'mean',
+    'vor_mean': 'mean',
+    'problem_pct': 'mean'
+}).round(4)
+monthly.columns = ['vov均值', 'vov中位数', 'str', 'ros', 'vor', '问题比例%']
+print(monthly.to_string())
+
+# 3. 找拐点 - 问题比例变化
+print("\n【3. 问题比例趋势】")
+df['problem_ma7'] = df['problem_pct'].rolling(7, min_periods=1).mean()
+
+# 按周统计
+df['week'] = pd.to_datetime(df['dt'].astype(str)).dt.isocalendar().week
+df['year'] = pd.to_datetime(df['dt'].astype(str)).dt.year
+df['yearweek'] = df['year'].astype(str) + '-W' + df['week'].astype(str).str.zfill(2)
+
+weekly = df.groupby('yearweek').agg({
+    'dt': ['min', 'max'],
+    'vov_mean': 'mean',
+    'problem_pct': 'mean'
+}).round(2)
+weekly.columns = ['开始日期', '结束日期', 'vov均值', '问题比例%']
+print(weekly.tail(20).to_string())
+
+# 4. 找问题开始恶化的时间点
+print("\n【4. 趋势分析】")
+
+# 计算前半段和后半段
+mid_point = len(df) // 2
+first_half = df.iloc[:mid_point]
+second_half = df.iloc[mid_point:]
+
+print(f"前半段 ({first_half['dt'].min()}~{first_half['dt'].max()}):")
+print(f"  vov均值: {first_half['vov_mean'].mean():.4f}")
+print(f"  问题比例: {first_half['problem_pct'].mean():.1f}%")
+
+print(f"\n后半段 ({second_half['dt'].min()}~{second_half['dt'].max()}):")
+print(f"  vov均值: {second_half['vov_mean'].mean():.4f}")
+print(f"  问题比例: {second_half['problem_pct'].mean():.1f}%")
+
+# 5. 找最高和最低问题比例的时间段
+print("\n【5. 极值分析】")
+# 按周统计后找极值
+weekly_problem = df.groupby('yearweek')['problem_pct'].mean().round(1)
+max_week = weekly_problem.idxmax()
+min_week = weekly_problem.idxmin()
+print(f"问题最严重的周: {max_week} ({weekly_problem[max_week]}%)")
+print(f"问题最轻的周: {min_week} ({weekly_problem[min_week]}%)")
+
+# 6. 整体趋势判断
+print("\n【6. 趋势回归分析】")
+import numpy as np
+x = np.arange(len(df))
+y = df['problem_pct'].values
+
+# 线性回归
+slope, intercept = np.polyfit(x, y, 1)
+trend_per_month = slope * 30
+print(f"问题比例趋势: 每月变化 {trend_per_month:+.2f}%")
+
+y_vov = df['vov_mean'].values
+slope_vov, _ = np.polyfit(x, y_vov, 1)
+trend_vov_per_month = slope_vov * 30
+print(f"vov均值趋势: 每月变化 {trend_vov_per_month:+.4f}")
+
+# 7. 结论
+print("\n" + "=" * 70)
+print("【Step7 结论】")
+print("=" * 70)
+
+change_problem = second_half['problem_pct'].mean() - first_half['problem_pct'].mean()
+change_vov = (second_half['vov_mean'].mean() - first_half['vov_mean'].mean()) / first_half['vov_mean'].mean() * 100
+
+if abs(change_problem) < 3:
+    print("问题比例基本稳定,一直存在")
+elif change_problem > 0:
+    print(f"问题在恶化: 比例上升 {change_problem:+.1f}%")
+else:
+    print(f"问题在改善: 比例下降 {change_problem:+.1f}%")
+
+print(f"vov变化: {change_vov:+.1f}%")
+print(f"\n问题从数据开始({df['dt'].min()})就存在,不是某个时间点突然出现")
+
+# 保存结果
+output_dir = Path(__file__).parent / "output"
+df.to_csv(output_dir / "step7_头部vov趋势_合并.csv", index=False)
+monthly.to_csv(output_dir / "step7_月度统计.csv")
+
+with open(output_dir / "step7_结论.txt", 'w', encoding='utf-8') as f:
+    f.write("=" * 70 + "\n")
+    f.write("Step7: 头部视频 vov 时间趋势\n")
+    f.write("=" * 70 + "\n\n")
+    f.write(f"日期范围: {df['dt'].min()} ~ {df['dt'].max()}\n")
+    f.write(f"问题比例变化: {change_problem:+.1f}%\n")
+    f.write(f"vov变化: {change_vov:+.1f}%\n")
+    f.write(f"\n月度统计:\n")
+    f.write(monthly.to_string())
+
+print(f"\n结果已保存到 output/ 目录")

+ 153 - 0
tmp/低vov高曝光分析/step8_分析.py

@@ -0,0 +1,153 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step8: 月度详细对比分析 - 找出9月问题严重的原因
+"""
+import pandas as pd
+import numpy as np
+from pathlib import Path
+
+output_dir = Path(__file__).parent / "output"
+
+# 读取数据
+df_jul = pd.read_csv(output_dir / "step8_月度对比_20250718_20250731.csv")
+df_sep = pd.read_csv(output_dir / "step8_月度对比_20250901_20250930.csv")
+df_dec = pd.read_csv(output_dir / "step8_月度对比_20251201_20251231.csv")
+
+df_jul['month'] = '7月(基线)'
+df_sep['month'] = '9月(问题)'
+df_dec['month'] = '12月(改善)'
+
+print("=" * 70)
+print("Step8: 月度详细对比分析")
+print("=" * 70)
+
+# 1. 整体指标对比
+print("\n【1. 整体指标对比】")
+print(f"{'月份':<12} {'记录数':>8} {'vov均值':>10} {'str均值':>10} {'ros均值':>10} {'vor均值':>10}")
+print("-" * 65)
+
+for name, df in [('7月(基线)', df_jul), ('9月(问题)', df_sep), ('12月(改善)', df_dec)]:
+    print(f"{name:<12} {len(df):>8} {df['vov0'].mean():>10.4f} {df['str_t0'].mean():>10.4f} "
+          f"{df['ros_t0'].mean():>10.2f} {df['vor_t0'].mean():>10.2f}")
+
+# 2. 问题视频分析(低vov高曝光)
+print("\n【2. 问题视频分析】")
+print("定义: vov < 0.35 且 rank <= 5")
+
+for name, df in [('7月', df_jul), ('9月', df_sep), ('12月', df_dec)]:
+    problem = df[(df['vov0'] < 0.35) & (df['曝光rank'] <= 5)]
+    normal = df[~((df['vov0'] < 0.35) & (df['曝光rank'] <= 5))]
+
+    print(f"\n{name}:")
+    print(f"  问题视频: {len(problem)} 条 ({len(problem)/len(df)*100:.1f}%)")
+    if len(problem) > 0:
+        print(f"    - vov均值: {problem['vov0'].mean():.4f}")
+        print(f"    - str均值: {problem['str_t0'].mean():.4f}")
+        print(f"    - ros均值: {problem['ros_t0'].mean():.2f}")
+        print(f"    - vor均值: {problem['vor_t0'].mean():.2f}")
+    if len(normal) > 0:
+        print(f"  正常视频: {len(normal)} 条")
+        print(f"    - vov均值: {normal['vov0'].mean():.4f}")
+
+# 3. 分解 vov = str × ros × vor,找差异来源
+print("\n【3. vov 分解分析 (vov ≈ str × ros × vor)】")
+
+def decompose_vov(df):
+    return {
+        'str': df['str_t0'].mean(),
+        'ros': df['ros_t0'].mean(),
+        'vor': df['vor_t0'].mean(),
+        'vov': df['vov0'].mean(),
+        'str*ros*vor': df['str_t0'].mean() * df['ros_t0'].mean() * df['vor_t0'].mean()
+    }
+
+jul_stats = decompose_vov(df_jul)
+sep_stats = decompose_vov(df_sep)
+dec_stats = decompose_vov(df_dec)
+
+print(f"\n{'指标':<8} {'7月':>10} {'9月':>10} {'12月':>10} {'9月vs7月':>12} {'12月vs9月':>12}")
+print("-" * 65)
+for key in ['str', 'ros', 'vor', 'vov']:
+    j, s, d = jul_stats[key], sep_stats[key], dec_stats[key]
+    chg1 = (s - j) / j * 100 if j != 0 else 0
+    chg2 = (d - s) / s * 100 if s != 0 else 0
+    print(f"{key:<8} {j:>10.4f} {s:>10.4f} {d:>10.4f} {chg1:>+11.1f}% {chg2:>+11.1f}%")
+
+# 4. 问题视频的具体分析
+print("\n【4. 9月问题视频详情】")
+sep_problem = df_sep[(df_sep['vov0'] < 0.35) & (df_sep['曝光rank'] <= 5)].copy()
+
+# 按视频聚合
+if len(sep_problem) > 0:
+    vid_stats = sep_problem.groupby('vid').agg({
+        'dt': 'count',
+        'vov0': 'mean',
+        'str_t0': 'mean',
+        'ros_t0': 'mean',
+        'vor_t0': 'mean',
+        'exp': 'sum',
+        '标题': 'first'
+    }).round(4)
+    vid_stats.columns = ['出现天数', 'vov均值', 'str', 'ros', 'vor', '总曝光', '标题']
+    vid_stats = vid_stats.sort_values('出现天数', ascending=False)
+
+    print(f"问题视频数: {len(vid_stats)}")
+    print(f"\n出现最多的问题视频 Top5:")
+    for i, (vid, row) in enumerate(vid_stats.head(5).iterrows()):
+        print(f"  {i+1}. vid={vid}, 出现{int(row['出现天数'])}天")
+        print(f"     vov={row['vov均值']:.4f}, str={row['str']:.4f}, ros={row['ros']:.2f}, vor={row['vor']:.2f}")
+        print(f"     标题: {row['标题'][:30]}...")
+
+# 5. 对比问题视频的特征
+print("\n【5. 问题视频 vs 正常视频特征对比】")
+df_all = pd.concat([df_jul, df_sep, df_dec])
+df_all['is_problem'] = (df_all['vov0'] < 0.35) & (df_all['曝光rank'] <= 5)
+
+problem_df = df_all[df_all['is_problem']]
+normal_df = df_all[~df_all['is_problem']]
+
+print(f"\n{'特征':<12} {'问题视频':>12} {'正常视频':>12} {'差异':>12}")
+print("-" * 50)
+for col, name in [('str_t0', 'str'), ('ros_t0', 'ros'), ('vor_t0', 'vor')]:
+    p_val = problem_df[col].mean()
+    n_val = normal_df[col].mean()
+    diff = (p_val - n_val) / n_val * 100 if n_val != 0 else 0
+    print(f"{name:<12} {p_val:>12.4f} {n_val:>12.4f} {diff:>+11.1f}%")
+
+# 6. 结论
+print("\n" + "=" * 70)
+print("【Step8 结论:9月问题严重的原因】")
+print("=" * 70)
+
+# 计算各因素的贡献
+str_contrib = (sep_stats['str'] - jul_stats['str']) / jul_stats['str'] * 100
+ros_contrib = (sep_stats['ros'] - jul_stats['ros']) / jul_stats['ros'] * 100
+vor_contrib = (sep_stats['vor'] - jul_stats['vor']) / jul_stats['vor'] * 100
+
+print(f"\n9月 vs 7月 各因素变化:")
+print(f"  STR: {str_contrib:+.1f}%")
+print(f"  ROS: {ros_contrib:+.1f}%")
+print(f"  VOR: {vor_contrib:+.1f}%")
+
+# 找主因
+factors = [('STR', abs(str_contrib)), ('ROS', abs(ros_contrib)), ('VOR', abs(vor_contrib))]
+main_factor = max(factors, key=lambda x: x[1])
+
+print(f"\n主要原因: {main_factor[0]} 变化最大 ({factors[0][1]:.1f}% / {factors[1][1]:.1f}% / {factors[2][1]:.1f}%)")
+
+# 保存结论
+with open(output_dir / "step8_结论.txt", 'w', encoding='utf-8') as f:
+    f.write("=" * 70 + "\n")
+    f.write("Step8: 月度对比分析\n")
+    f.write("=" * 70 + "\n\n")
+    f.write("【整体指标】\n")
+    f.write(f"7月: vov={jul_stats['vov']:.4f}, str={jul_stats['str']:.4f}, ros={jul_stats['ros']:.2f}, vor={jul_stats['vor']:.2f}\n")
+    f.write(f"9月: vov={sep_stats['vov']:.4f}, str={sep_stats['str']:.4f}, ros={sep_stats['ros']:.2f}, vor={sep_stats['vor']:.2f}\n")
+    f.write(f"12月: vov={dec_stats['vov']:.4f}, str={dec_stats['str']:.4f}, ros={dec_stats['ros']:.2f}, vor={dec_stats['vor']:.2f}\n")
+    f.write(f"\n【9月问题原因】\n")
+    f.write(f"STR变化: {str_contrib:+.1f}%\n")
+    f.write(f"ROS变化: {ros_contrib:+.1f}%\n")
+    f.write(f"VOR变化: {vor_contrib:+.1f}%\n")
+
+print(f"\n结果已保存到 output/ 目录")

+ 167 - 0
tmp/低vov高曝光分析/step9_天级趋势.py

@@ -0,0 +1,167 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+Step9: 天级数据和趋势线分析
+"""
+import pandas as pd
+import numpy as np
+from pathlib import Path
+
+output_dir = Path(__file__).parent / "output"
+
+# 读取180天数据
+df = pd.read_csv(output_dir / "step7_头部vov趋势_合并.csv")
+df = df.sort_values('dt').reset_index(drop=True)
+df['date'] = pd.to_datetime(df['dt'].astype(str))
+
+print("=" * 80)
+print("Step9: 天级数据和趋势线分析")
+print("=" * 80)
+
+# 1. 12月-1月详细数据
+print("\n【1. 12月-1月 天级数据】")
+df_recent = df[df['dt'] >= 20251201].copy()
+
+print(f"{'日期':<10} {'vov均值':>8} {'str':>8} {'ros':>6} {'vor':>6} {'问题%':>8} {'趋势':>6}")
+print("-" * 60)
+
+# 计算7日滚动平均
+df_recent['problem_ma7'] = df_recent['problem_pct'].rolling(7, min_periods=1).mean()
+df_recent['vov_ma7'] = df_recent['vov_mean'].rolling(7, min_periods=1).mean()
+
+prev_problem = None
+for _, row in df_recent.iterrows():
+    trend = ""
+    if prev_problem is not None:
+        if row['problem_pct'] > prev_problem + 5:
+            trend = "↑↑"
+        elif row['problem_pct'] > prev_problem:
+            trend = "↑"
+        elif row['problem_pct'] < prev_problem - 5:
+            trend = "↓↓"
+        elif row['problem_pct'] < prev_problem:
+            trend = "↓"
+        else:
+            trend = "→"
+    prev_problem = row['problem_pct']
+
+    print(f"{int(row['dt']):<10} {row['vov_mean']:>8.4f} {row['str_mean']:>8.4f} "
+          f"{row['ros_mean']:>6.2f} {row['vor_mean']:>6.2f} {row['problem_pct']:>7.1f}% {trend:>6}")
+
+# 2. 趋势线(文字图表)
+print("\n【2. 问题比例趋势线(12月-1月)】")
+print("日期        0%    10%   20%   30%   40%   50%")
+print("           |-----|-----|-----|-----|-----|")
+
+for _, row in df_recent.iterrows():
+    bar_len = int(row['problem_pct'] / 2)  # 每2%一个字符
+    bar = "█" * bar_len
+    ma_pos = int(row['problem_ma7'] / 2)
+
+    date_str = str(int(row['dt']))
+    print(f"{date_str}   {bar:<25} {row['problem_pct']:.0f}%")
+
+# 3. vov 趋势线
+print("\n【3. vov 均值趋势线(12月-1月)】")
+print("日期       0.2   0.3   0.4   0.5   0.6   0.7")
+print("           |-----|-----|-----|-----|-----|")
+
+for _, row in df_recent.iterrows():
+    # vov 范围 0.2-0.7,映射到 0-25 个字符
+    bar_len = int((row['vov_mean'] - 0.2) / 0.02)
+    bar_len = max(0, min(25, bar_len))
+    bar = "█" * bar_len
+
+    date_str = str(int(row['dt']))
+    print(f"{date_str}   {bar:<25} {row['vov_mean']:.2f}")
+
+# 4. 关键时间点分析
+print("\n【4. 关键时间点分析】")
+
+# 找问题比例的拐点
+df_recent['problem_diff'] = df_recent['problem_pct'].diff()
+
+# 找最大下降点
+min_diff_idx = df_recent['problem_diff'].idxmin()
+if pd.notna(min_diff_idx):
+    row = df_recent.loc[min_diff_idx]
+    print(f"问题比例最大下降: {int(row['dt'])} (下降 {row['problem_diff']:.1f}%)")
+
+# 找最大上升点
+max_diff_idx = df_recent['problem_diff'].idxmax()
+if pd.notna(max_diff_idx):
+    row = df_recent.loc[max_diff_idx]
+    print(f"问题比例最大上升: {int(row['dt'])} (上升 {row['problem_diff']:.1f}%)")
+
+# 5. 12月 vs 1月 分段对比
+print("\n【5. 12月 vs 1月 对比】")
+dec = df_recent[df_recent['dt'] < 20260101]
+jan = df_recent[df_recent['dt'] >= 20260101]
+
+print(f"\n{'指标':<12} {'12月均值':>12} {'1月均值':>12} {'变化':>12}")
+print("-" * 50)
+for col, name in [('vov_mean', 'vov'), ('str_mean', 'str'), ('ros_mean', 'ros'),
+                  ('vor_mean', 'vor'), ('problem_pct', '问题比例%')]:
+    dec_val = dec[col].mean()
+    jan_val = jan[col].mean()
+    if dec_val != 0:
+        change = (jan_val - dec_val) / dec_val * 100
+        print(f"{name:<12} {dec_val:>12.4f} {jan_val:>12.4f} {change:>+11.1f}%")
+
+# 6. 周度汇总
+print("\n【6. 周度汇总】")
+df_recent['week'] = df_recent['date'].dt.isocalendar().week
+df_recent['year'] = df_recent['date'].dt.year
+
+weekly = df_recent.groupby(['year', 'week']).agg({
+    'dt': ['min', 'max', 'count'],
+    'vov_mean': 'mean',
+    'str_mean': 'mean',
+    'ros_mean': 'mean',
+    'vor_mean': 'mean',
+    'problem_pct': 'mean'
+}).round(4)
+
+print(f"{'周':<10} {'日期范围':<20} {'天数':>4} {'vov':>8} {'str':>8} {'问题%':>8}")
+print("-" * 65)
+for (year, week), row in weekly.iterrows():
+    start = int(row[('dt', 'min')])
+    end = int(row[('dt', 'max')])
+    days = int(row[('dt', 'count')])
+    vov = row[('vov_mean', 'mean')]
+    str_v = row[('str_mean', 'mean')]
+    prob = row[('problem_pct', 'mean')]
+    print(f"{year}-W{week:<4} {start}-{end:<12} {days:>4} {vov:>8.4f} {str_v:>8.4f} {prob:>7.1f}%")
+
+# 7. 结论
+print("\n" + "=" * 80)
+print("【Step9 结论】")
+print("=" * 80)
+
+dec_prob = dec['problem_pct'].mean()
+jan_prob = jan['problem_pct'].mean()
+change = jan_prob - dec_prob
+
+print(f"\n12月平均问题比例: {dec_prob:.1f}%")
+print(f"1月平均问题比例: {jan_prob:.1f}%")
+print(f"变化: {change:+.1f}%")
+
+if change > 5:
+    print(f"\n⚠️  1月问题回升明显,需要关注")
+
+    # 找原因
+    dec_str = dec['str_mean'].mean()
+    jan_str = jan['str_mean'].mean()
+    str_change = (jan_str - dec_str) / dec_str * 100
+
+    dec_vor = dec['vor_mean'].mean()
+    jan_vor = jan['vor_mean'].mean()
+    vor_change = (jan_vor - dec_vor) / dec_vor * 100
+
+    print(f"\n原因分析:")
+    print(f"  STR 变化: {str_change:+.1f}%")
+    print(f"  VOR 变化: {vor_change:+.1f}%")
+
+# 保存天级数据
+df_recent.to_csv(output_dir / "step9_天级数据_12月1月.csv", index=False)
+print(f"\n结果已保存到 output/ 目录")

+ 91 - 0
tmp/低vov高曝光分析/v2_step1_数据驱动定义.py

@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+V2 Step1: 用数据驱动定义"低vov高曝光"问题
+不再拍脑袋定阈值,而是用分布来定义
+"""
+import pandas as pd
+import numpy as np
+from pathlib import Path
+
+output_dir = Path(__file__).parent / "output"
+
+# 读取180天数据
+df = pd.read_csv(output_dir / "step8_月度对比_20251201_20251231.csv")  # 先用一个月
+
+print("=" * 70)
+print("V2 Step1: 数据驱动定义问题")
+print("=" * 70)
+
+# 1. 看 vov 分布
+print("\n【1. VoV 分布统计】")
+print(f"样本数: {len(df)}")
+print(f"VoV 均值: {df['vov0'].mean():.4f}")
+print(f"VoV 中位数: {df['vov0'].median():.4f}")
+print(f"VoV P25: {df['vov0'].quantile(0.25):.4f}")
+print(f"VoV P75: {df['vov0'].quantile(0.75):.4f}")
+
+# 2. 按曝光rank分组看vov分布
+print("\n【2. 不同曝光rank的VoV分布】")
+df['rank_group'] = pd.cut(df['曝光rank'], bins=[0, 3, 5, 10, 50], labels=['Top3', 'Top4-5', 'Top6-10', 'Top11-50'])
+rank_stats = df.groupby('rank_group', observed=True).agg({
+    'vov0': ['mean', 'median', lambda x: x.quantile(0.25)],
+    '曝光rank': 'count'
+}).round(4)
+rank_stats.columns = ['vov均值', 'vov中位数', 'vov_P25', '样本数']
+print(rank_stats)
+
+# 3. 数据驱动定义"低vov"
+# 定义:vov < 该rank组的P25 为"低vov"
+print("\n【3. 数据驱动的问题定义】")
+overall_p25 = df['vov0'].quantile(0.25)
+overall_p50 = df['vov0'].median()
+
+print(f"方案A: vov < P25 ({overall_p25:.4f}) 为低vov")
+print(f"方案B: vov < P50 ({overall_p50:.4f}) 为低vov")
+
+# 4. 用新定义计算问题比例
+print("\n【4. 不同定义下的问题比例】")
+
+for threshold_name, threshold in [('P25', overall_p25), ('P50', overall_p50), ('0.35(旧)', 0.35)]:
+    # 低vov且rank靠前
+    problem_a = df[(df['vov0'] < threshold) & (df['曝光rank'] <= 3)]
+    problem_b = df[(df['vov0'] < threshold) & (df['曝光rank'] <= 5)]
+
+    print(f"\n阈值={threshold_name} ({threshold:.4f}):")
+    print(f"  低vov且Top3: {len(problem_a)} ({len(problem_a)/len(df)*100:.1f}%)")
+    print(f"  低vov且Top5: {len(problem_b)} ({len(problem_b)/len(df)*100:.1f}%)")
+
+# 5. 看问题的本质:rank与vov的关系
+print("\n【5. 核心问题:高rank是否对应高vov?】")
+# 如果排序合理,rank越靠前vov应该越高
+corr = df['曝光rank'].corr(df['vov0'])
+print(f"曝光rank 与 vov 相关系数: {corr:.4f}")
+print(f"(负数表示rank越小vov越高,这是期望的;正数表示排序有问题)")
+
+# 按rank分组的vov
+print("\n各rank组vov均值:")
+for rank in [1, 2, 3, 4, 5, 10]:
+    subset = df[df['曝光rank'] == rank]
+    if len(subset) > 0:
+        print(f"  Rank {rank}: vov={subset['vov0'].mean():.4f} (n={len(subset)})")
+
+# 6. 结论
+print("\n" + "=" * 70)
+print("【V2 Step1 结论】")
+print("=" * 70)
+print(f"""
+问题重新定义:
+  - 旧定义: vov < 0.35 且 rank ≤ 5 (拍脑袋)
+  - 新定义: vov < P25 ({overall_p25:.4f}) 且 rank ≤ 3 (数据驱动)
+
+核心问题验证:
+  - rank 与 vov 相关系数 = {corr:.4f}
+  - 如果接近0或为正,说明排序没有很好地按vov排
+  - 这才是"低vov高曝光"问题的本质
+""")
+
+# 保存
+with open(output_dir / "v2_step1_结论.txt", 'w', encoding='utf-8') as f:
+    f.write(f"问题定义阈值: vov < {overall_p25:.4f} (P25)\n")
+    f.write(f"rank与vov相关系数: {corr:.4f}\n")

+ 106 - 0
tmp/低vov高曝光分析/v2_step2_分析.py

@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+V2 Step2: 统一口径分析
+在同一张表上验证:预估值 vs 真实值
+"""
+import pandas as pd
+import numpy as np
+from pathlib import Path
+
+output_dir = Path(__file__).parent / "output"
+
+# 读取数据(合并多天)
+import glob
+data_dir = output_dir / "v2_step2_统一口径"
+all_files = glob.glob(str(data_dir / "*.csv"))
+df = pd.concat([pd.read_csv(f) for f in all_files], ignore_index=True)
+
+print("=" * 70)
+print("V2 Step2: 统一口径分析")
+print("=" * 70)
+
+print(f"\n数据量: {len(df)} 个视频")
+
+# 1. 整体偏差统计
+print("\n【1. 整体偏差统计】")
+print(f"ROS 偏差均值: {df['ros_bias_pct'].mean():+.1f}%")
+print(f"ROS 偏差中位数: {df['ros_bias_pct'].median():+.1f}%")
+print(f"STR 偏差均值: {df['str_bias_pct'].mean():+.1f}%")
+print(f"STR 偏差中位数: {df['str_bias_pct'].median():+.1f}%")
+
+# 2. COPC(整体)
+print("\n【2. 整体 COPC】")
+ros_copc = df['real_ros'].sum() / df['pred_ros'].sum()
+str_copc = df['real_str'].sum() / df['pred_str'].sum()
+print(f"ROS COPC = {ros_copc:.4f} (1.0为理想)")
+print(f"STR COPC = {str_copc:.4f}")
+
+# 3. 按曝光量分组(头部 vs 尾部)
+print("\n【3. 按曝光量分组】")
+df['exp_group'] = pd.qcut(df['total_exp'], q=3, labels=['低曝光', '中曝光', '高曝光'])
+group_stats = df.groupby('exp_group', observed=True).agg({
+    'ros_bias_pct': ['mean', 'median'],
+    'str_bias_pct': ['mean', 'median'],
+    'vid': 'count'
+}).round(1)
+print(group_stats)
+
+# 4. 按真实ROS分组(验证回归均值问题)
+print("\n【4. 按真实ROS分组】")
+df['real_ros_group'] = pd.cut(df['real_ros'], bins=[0, 2, 4, 6, 100], labels=['<2', '2-4', '4-6', '>6'])
+ros_group_stats = df.groupby('real_ros_group', observed=True).agg({
+    'pred_ros': 'mean',
+    'real_ros': 'mean',
+    'ros_bias_pct': ['mean', 'count']
+}).round(2)
+ros_group_stats.columns = ['预估ROS', '真实ROS', '偏差%', '样本数']
+print(ros_group_stats)
+
+# 5. 核心验证:预估score高但真实score低的视频
+print("\n【5. 核心问题验证】")
+# 计算预估排名和真实排名
+df['pred_rank'] = df['pred_score'].rank(ascending=False)
+df['real_rank'] = df['real_score'].rank(ascending=False)
+df['rank_diff'] = df['real_rank'] - df['pred_rank']  # 正数表示真实排名比预估差
+
+# 预估Top10但真实排名差的视频
+pred_top10 = df[df['pred_rank'] <= 10].copy()
+print(f"预估排名Top10的视频:")
+print(f"  真实排名均值: {pred_top10['real_rank'].mean():.1f}")
+print(f"  排名差距均值: {pred_top10['rank_diff'].mean():.1f}")
+
+# 问题视频:预估Top10但真实排名 > 20
+problem_vids = pred_top10[pred_top10['real_rank'] > 15]
+print(f"  预估Top10但真实排名>15: {len(problem_vids)} 个 ({len(problem_vids)/10*100:.0f}%)")
+
+# 6. 预估排名 vs 真实排名 相关性
+rank_corr = df['pred_rank'].corr(df['real_rank'])
+print(f"\n预估排名与真实排名相关系数: {rank_corr:.4f}")
+print(f"(1.0为完美,越低说明排序越不准)")
+
+# 7. 详细看问题视频
+if len(problem_vids) > 0:
+    print("\n【6. 问题视频详情】")
+    print("预估排名靠前但真实排名差的视频:")
+    for _, row in problem_vids.iterrows():
+        print(f"  vid={row['vid']}: 预估rank={int(row['pred_rank'])}, 真实rank={int(row['real_rank'])}")
+        print(f"    ROS偏差={row['ros_bias_pct']:+.1f}%, STR偏差={row['str_bias_pct']:+.1f}%")
+
+# 8. 结论
+print("\n" + "=" * 70)
+print("【V2 Step2 结论】")
+print("=" * 70)
+print(f"""
+口径统一后的发现:
+  1. ROS 整体 COPC = {ros_copc:.2f} ({'偏高' if ros_copc < 1 else '偏低' if ros_copc > 1 else '正常'})
+  2. STR 整体 COPC = {str_copc:.2f}
+  3. 预估排名与真实排名相关系数 = {rank_corr:.2f}
+  4. 预估Top10中,{len(problem_vids)/10*100:.0f}% 真实排名 > 15
+
+结论:
+  - 相关系数 {rank_corr:.2f} {'较高,排序整体合理' if rank_corr > 0.7 else '偏低,排序有改进空间' if rank_corr > 0.5 else '较低,排序问题明显'}
+""")
+
+# 保存
+df.to_csv(output_dir / "v2_step2_统一口径_分析.csv", index=False)

+ 135 - 0
tmp/低vov高曝光分析/v2_step3_对比分析.py

@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""
+V2 Step3: 问题视频 vs 正常视频对比
+找出导致排序失准的真正原因
+"""
+import pandas as pd
+import numpy as np
+from pathlib import Path
+import glob
+
+output_dir = Path(__file__).parent / "output"
+
+# 读取数据
+data_dir = output_dir / "v2_step2_统一口径"
+all_files = glob.glob(str(data_dir / "*.csv"))
+df = pd.concat([pd.read_csv(f) for f in all_files], ignore_index=True)
+
+print("=" * 70)
+print("V2 Step3: 问题视频 vs 正常视频对比分析")
+print("=" * 70)
+
+# 1. 定义问题视频:预估排名好但真实排名差
+df['pred_rank'] = df.groupby(df.index // (len(df)//7) if len(df) > 7 else 0)['pred_score'].rank(ascending=False)
+df['real_rank'] = df.groupby(df.index // (len(df)//7) if len(df) > 7 else 0)['real_score'].rank(ascending=False)
+
+# 简化:用整体排名
+df['pred_rank_all'] = df['pred_score'].rank(ascending=False)
+df['real_rank_all'] = df['real_score'].rank(ascending=False)
+df['rank_diff'] = df['real_rank_all'] - df['pred_rank_all']
+
+# 问题视频:预估Top 25%,但真实排名 Bottom 50%
+top_25_pct = df['pred_rank_all'].quantile(0.25)
+bottom_50_pct = df['real_rank_all'].quantile(0.50)
+
+df['is_problem'] = (df['pred_rank_all'] <= top_25_pct) & (df['real_rank_all'] > bottom_50_pct)
+
+print(f"\n问题定义: 预估排名 Top 25% 但真实排名 Bottom 50%")
+print(f"问题视频数: {df['is_problem'].sum()} / {len(df)} ({df['is_problem'].mean()*100:.1f}%)")
+
+# 2. 对比分析
+print("\n【对比分析】")
+problem = df[df['is_problem']]
+normal = df[~df['is_problem']]
+
+comparison = pd.DataFrame({
+    '问题视频': [
+        len(problem),
+        problem['pred_ros'].mean(),
+        problem['real_ros'].mean(),
+        problem['ros_bias_pct'].mean(),
+        problem['pred_str'].mean(),
+        problem['real_str'].mean(),
+        problem['str_bias_pct'].mean(),
+    ],
+    '正常视频': [
+        len(normal),
+        normal['pred_ros'].mean(),
+        normal['real_ros'].mean(),
+        normal['ros_bias_pct'].mean(),
+        normal['pred_str'].mean(),
+        normal['real_str'].mean(),
+        normal['str_bias_pct'].mean(),
+    ]
+}, index=['样本数', '预估ROS', '真实ROS', 'ROS偏差%', '预估STR', '真实STR', 'STR偏差%'])
+
+print(comparison.round(4).to_string())
+
+# 3. 关键对比:问题视频的预估vs真实
+print("\n【关键发现】")
+if len(problem) > 0:
+    print(f"\n问题视频特征:")
+    print(f"  预估ROS: {problem['pred_ros'].mean():.2f}")
+    print(f"  真实ROS: {problem['real_ros'].mean():.2f}")
+    print(f"  ROS偏差: {problem['ros_bias_pct'].mean():+.1f}%")
+    print(f"  预估STR: {problem['pred_str'].mean():.6f}")
+    print(f"  真实STR: {problem['real_str'].mean():.6f}")
+    print(f"  STR偏差: {problem['str_bias_pct'].mean():+.1f}%")
+
+print(f"\n正常视频特征:")
+print(f"  预估ROS: {normal['pred_ros'].mean():.2f}")
+print(f"  真实ROS: {normal['real_ros'].mean():.2f}")
+print(f"  ROS偏差: {normal['ros_bias_pct'].mean():+.1f}%")
+
+# 4. 分析偏差来源
+print("\n【偏差来源分析】")
+if len(problem) > 0:
+    ros_diff = problem['ros_bias_pct'].mean() - normal['ros_bias_pct'].mean()
+    str_diff = problem['str_bias_pct'].mean() - normal['str_bias_pct'].mean()
+
+    print(f"问题视频 vs 正常视频:")
+    print(f"  ROS偏差差异: {ros_diff:+.1f}%")
+    print(f"  STR偏差差异: {str_diff:+.1f}%")
+
+    if abs(ros_diff) > abs(str_diff):
+        print(f"\n主要原因: ROS预估问题(差异更大)")
+    else:
+        print(f"\n主要原因: STR预估问题(差异更大)")
+
+# 5. 真实ROS分布对比
+print("\n【真实ROS分布对比】")
+if len(problem) > 0:
+    print(f"问题视频真实ROS分布:")
+    print(f"  < 2: {(problem['real_ros'] < 2).sum()} ({(problem['real_ros'] < 2).mean()*100:.1f}%)")
+    print(f"  2-4: {((problem['real_ros'] >= 2) & (problem['real_ros'] < 4)).sum()}")
+    print(f"  > 4: {(problem['real_ros'] >= 4).sum()}")
+
+print(f"\n正常视频真实ROS分布:")
+print(f"  < 2: {(normal['real_ros'] < 2).sum()} ({(normal['real_ros'] < 2).mean()*100:.1f}%)")
+print(f"  2-4: {((normal['real_ros'] >= 2) & (normal['real_ros'] < 4)).sum()}")
+print(f"  > 4: {(normal['real_ros'] >= 4).sum()}")
+
+# 6. 结论
+print("\n" + "=" * 70)
+print("【V2 Step3 结论】")
+print("=" * 70)
+
+if len(problem) > 0:
+    main_cause = "ROS" if abs(ros_diff) > abs(str_diff) else "STR"
+    print(f"""
+核心发现:
+  1. 问题视频占比: {df['is_problem'].mean()*100:.1f}%
+  2. 主要原因: {main_cause} 预估不准
+  3. 问题视频特征:
+     - {'ROS预估偏高' if problem['ros_bias_pct'].mean() > 0 else 'ROS预估偏低'}: {problem['ros_bias_pct'].mean():+.1f}%
+     - {'STR预估偏高' if problem['str_bias_pct'].mean() > 0 else 'STR预估偏低'}: {problem['str_bias_pct'].mean():+.1f}%
+  4. 真实ROS < 2 的比例:
+     - 问题视频: {(problem['real_ros'] < 2).mean()*100:.1f}%
+     - 正常视频: {(normal['real_ros'] < 2).mean()*100:.1f}%
+""")
+else:
+    print("问题视频数量为0,可能定义阈值需要调整")
+
+# 保存
+df.to_csv(output_dir / "v2_step3_对比分析.csv", index=False)

Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio