Просмотр исходного кода

feat: 添加 production_code 表建表代码、pyproject.toml 项目配置及尾号实验 SQL 更新

- 新增 production_code/ 目录,包含各表的 JSON schema 和 SQL 建表代码
- 新增 fetch_table_code.py 获取表代码脚本
- 新增 pyproject.toml 和 uv.lock 项目依赖配置
- 更新尾号实验 base_v3/v4/v4_v1 及 AB 效果 SQL
- 更新 lib/odps_module.py 增强 ODPS 模块功能
- .gitignore 添加 .venv/ 忽略

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
yangxiaohui 12 часов назад
Родитель
Сommit
9b74f78e5d
100 измененных файлов с 15532 добавлено и 0 удалено
  1. 1 0
      .gitignore
  2. 40 0
      fetch_table_code.py
  3. 403 0
      lib/odps_module.py
  4. 333 0
      production_code/loghubods.aitags_basedata.json
  5. 188 0
      production_code/loghubods.aitags_basedata.sql
  6. 319 0
      production_code/loghubods.aitags_repeatdata.json
  7. 169 0
      production_code/loghubods.aitags_repeatdata.sql
  8. 49 0
      production_code/loghubods.alg_channel_recommend_exp_feature_20250212.json
  9. 178 0
      production_code/loghubods.alg_channel_recommend_exp_feature_20250212.sql
  10. 49 0
      production_code/loghubods.alg_festive_recommend_exp_feature_20250212.json
  11. 178 0
      production_code/loghubods.alg_festive_recommend_exp_feature_20250212.sql
  12. 49 0
      production_code/loghubods.alg_merge_cate1_recommend_exp_feature_20250212.json
  13. 178 0
      production_code/loghubods.alg_merge_cate1_recommend_exp_feature_20250212.sql
  14. 49 0
      production_code/loghubods.alg_merge_cate2_recommend_exp_feature_20250212.json
  15. 178 0
      production_code/loghubods.alg_merge_cate2_recommend_exp_feature_20250212.sql
  16. 54 0
      production_code/loghubods.alg_mid_feature_return_tags.json
  17. 139 0
      production_code/loghubods.alg_mid_feature_return_tags.sql
  18. 50 0
      production_code/loghubods.alg_mid_feature_returncf.json
  19. 113 0
      production_code/loghubods.alg_mid_feature_returncf.sql
  20. 54 0
      production_code/loghubods.alg_mid_feature_share_tags.json
  21. 142 0
      production_code/loghubods.alg_mid_feature_share_tags.sql
  22. 50 0
      production_code/loghubods.alg_mid_feature_sharecf.json
  23. 114 0
      production_code/loghubods.alg_mid_feature_sharecf.sql
  24. 69 0
      production_code/loghubods.alg_recsys_feature_cf_i2i_v2.json
  25. 189 0
      production_code/loghubods.alg_recsys_feature_cf_i2i_v2.sql
  26. 49 0
      production_code/loghubods.alg_recsys_feature_user_share_return_stat.json
  27. 448 0
      production_code/loghubods.alg_recsys_feature_user_share_return_stat.sql
  28. 69 0
      production_code/loghubods.alg_recsys_recall_cf_mid_ros_scenetype.json
  29. 260 0
      production_code/loghubods.alg_recsys_recall_cf_mid_ros_scenetype.sql
  30. 69 0
      production_code/loghubods.alg_recsys_recall_cf_mid_rov_scenetype.json
  31. 258 0
      production_code/loghubods.alg_recsys_recall_cf_mid_rov_scenetype.sql
  32. 54 0
      production_code/loghubods.alg_vid_apptype_recommend_exp_feature_20250212.json
  33. 182 0
      production_code/loghubods.alg_vid_apptype_recommend_exp_feature_20250212.sql
  34. 54 0
      production_code/loghubods.alg_vid_brand_recommend_exp_feature_20250212.json
  35. 182 0
      production_code/loghubods.alg_vid_brand_recommend_exp_feature_20250212.sql
  36. 118 0
      production_code/loghubods.alg_vid_feature_all_exp_base_add.json
  37. 116 0
      production_code/loghubods.alg_vid_feature_all_exp_base_add.sql
  38. 118 0
      production_code/loghubods.alg_vid_feature_all_exp_base_v2.json
  39. 116 0
      production_code/loghubods.alg_vid_feature_all_exp_base_v2.sql
  40. 56 0
      production_code/loghubods.alg_vid_feature_basic_info.json
  41. 398 0
      production_code/loghubods.alg_vid_feature_basic_info.sql
  42. 48 0
      production_code/loghubods.alg_vid_feature_cfreturn.json
  43. 148 0
      production_code/loghubods.alg_vid_feature_cfreturn.sql
  44. 48 0
      production_code/loghubods.alg_vid_feature_cfshare.json
  45. 151 0
      production_code/loghubods.alg_vid_feature_cfshare.sql
  46. 48 0
      production_code/loghubods.alg_vid_global_feature_20250212.json
  47. 219 0
      production_code/loghubods.alg_vid_global_feature_20250212.sql
  48. 54 0
      production_code/loghubods.alg_vid_hotsencetype_recommend_exp_feature_20250212.json
  49. 184 0
      production_code/loghubods.alg_vid_hotsencetype_recommend_exp_feature_20250212.sql
  50. 38 0
      production_code/loghubods.alg_vid_long_period_recommend_exp_feature_20250212.json
  51. 141 0
      production_code/loghubods.alg_vid_long_period_recommend_exp_feature_20250212.sql
  52. 54 0
      production_code/loghubods.alg_vid_province_recommend_exp_feature_20250212.json
  53. 182 0
      production_code/loghubods.alg_vid_province_recommend_exp_feature_20250212.sql
  54. 49 0
      production_code/loghubods.alg_vid_recommend_exp_feature_20250212.json
  55. 276 0
      production_code/loghubods.alg_vid_recommend_exp_feature_20250212.sql
  56. 49 0
      production_code/loghubods.alg_vid_recommend_flowpool_exp_feature_20250212.json
  57. 180 0
      production_code/loghubods.alg_vid_recommend_flowpool_exp_feature_20250212.sql
  58. 49 0
      production_code/loghubods.alg_video_unionid_recommend_exp_feature_20250212.json
  59. 278 0
      production_code/loghubods.alg_video_unionid_recommend_exp_feature_20250212.sql
  60. 46 0
      production_code/loghubods.changwen_rootsourceid_group_hour.json
  61. 187 0
      production_code/loghubods.changwen_rootsourceid_group_hour.sql
  62. 87 0
      production_code/loghubods.content_ai_tags_no_dt.json
  63. 87 0
      production_code/loghubods.content_ai_tags_no_dt.sql
  64. 260 0
      production_code/loghubods.dwd_recsys_alg_exposure_base_20250108.json
  65. 774 0
      production_code/loghubods.dwd_recsys_alg_exposure_base_20250108.sql
  66. 203 0
      production_code/loghubods.dwd_recsys_alg_exposure_base_view_20250402.json
  67. 189 0
      production_code/loghubods.dwd_recsys_alg_exposure_base_view_20250402.sql
  68. 437 0
      production_code/loghubods.dwd_recsys_alg_sample_all_20250212.json
  69. 456 0
      production_code/loghubods.dwd_recsys_alg_sample_all_20250212.sql
  70. 48 0
      production_code/loghubods.mid_global_feature_20250212.json
  71. 283 0
      production_code/loghubods.mid_global_feature_20250212.sql
  72. 41 0
      production_code/loghubods.operators_channel.json
  73. 478 0
      production_code/loghubods.operators_channel.sql
  74. 48 0
      production_code/loghubods.operators_channel_dt.json
  75. 24 0
      production_code/loghubods.operators_channel_dt.sql
  76. 40 0
      production_code/loghubods.operators_channel_spider.json
  77. 528 0
      production_code/loghubods.operators_channel_spider.sql
  78. 48 0
      production_code/loghubods.operators_channel_spider_dt.json
  79. 24 0
      production_code/loghubods.operators_channel_spider_dt.sql
  80. 59 0
      production_code/loghubods.scene_type_vid_cf_feature_20250212.json
  81. 89 0
      production_code/loghubods.scene_type_vid_cf_feature_20250212.sql
  82. 29 0
      production_code/loghubods.tag_level_2_base.json
  83. 54 0
      production_code/loghubods.tag_level_2_base.sql
  84. 53 0
      production_code/loghubods.vid_click_cf_feature_20250212.json
  85. 129 0
      production_code/loghubods.vid_click_cf_feature_20250212.sql
  86. 32 0
      production_code/loghubods.vid_festive_labels.json
  87. 237 0
      production_code/loghubods.vid_festive_labels.sql
  88. 109 0
      production_code/loghubods.video_ai_tags.json
  89. 83 0
      production_code/loghubods.video_ai_tags.sql
  90. 233 0
      production_code/videoods.dim_video.json
  91. 431 0
      production_code/videoods.dim_video.sql
  92. 43 0
      production_code/videoods.every_video_status_category.json
  93. 431 0
      production_code/videoods.every_video_status_category.sql
  94. 145 0
      production_code/videoods.flow_pool_level_video.json
  95. 153 0
      production_code/videoods.flow_pool_level_video.sql
  96. 38 0
      production_code/videoods.if_create_video.json
  97. 431 0
      production_code/videoods.if_create_video.sql
  98. 94 0
      production_code/videoods.topic.json
  99. 251 0
      production_code/videoods.topic.sql
  100. 42 0
      production_code/videoods.total_price_video.json

+ 1 - 0
.gitignore

@@ -2,3 +2,4 @@ __pycache__/
 *.pyc
 output/
 .DS_Store
+.venv/

+ 40 - 0
fetch_table_code.py

@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# coding=utf-8
+"""根据表名从 DataWorks 获取生产代码,保存到 production_code/ 目录。
+
+用法:
+    python fetch_table_code.py loghubods.dwd_recsys_alg_exposure_base_20250108
+    python fetch_table_code.py loghubods.dwd_recsys_alg_exposure_base_20250108 --force
+    python fetch_table_code.py loghubods.dwd_recsys_alg_exposure_base_20250108 --recursive
+    python fetch_table_code.py loghubods.dwd_recsys_alg_exposure_base_20250108 --recursive --depth 5
+"""
+
+import sys
+import argparse
+
+
+def main():
+    parser = argparse.ArgumentParser(description="获取表的 DataWorks 生产代码")
+    parser.add_argument("table_name", help="表名,格式: project.table 或 table")
+    parser.add_argument("--force", action="store_true", help="跳过缓存,强制从 API 拉取")
+    parser.add_argument("--recursive", "-r", action="store_true", help="递归获取所有上游表的代码")
+    parser.add_argument("--depth", type=int, default=3, help="递归最大深度(默认 3)")
+    args = parser.parse_args()
+
+    from lib.odps_module import DataWorksClient
+
+    dw = DataWorksClient()
+
+    if args.recursive:
+        dw.get_node_code_recursive(args.table_name, max_depth=args.depth, force=args.force)
+    else:
+        results = dw.get_node_code(args.table_name, force=args.force)
+        if not results:
+            print(f"未找到 '{args.table_name}' 的生产代码")
+            sys.exit(1)
+        for r in results:
+            print(f"任务: {r['task_name']}  代码长度: {len(r['content'])} chars")
+
+
+if __name__ == "__main__":
+    main()

+ 403 - 0
lib/odps_module.py

@@ -12,6 +12,15 @@ from tqdm import tqdm
 import pyarrow as pa
 from pyarrow import csv as pa_csv
 
+# DataWorks SDK(可选依赖,仅 DataWorksClient 用到)
+try:
+    from alibabacloud_dataworks_public20240518.client import Client as _DWClient
+    from alibabacloud_tea_openapi import models as _open_api_models
+    from alibabacloud_dataworks_public20240518 import models as _dw_models
+    _DW_AVAILABLE = True
+except ImportError:
+    _DW_AVAILABLE = False
+
 # 开启 Instance Tunnel,解除 1 万条限制
 options.tunnel.use_instance_tunnel = True
 options.tunnel.limit_instance_tunnel = False
@@ -195,3 +204,397 @@ class ODPSClient(object):
         total_time = time.time() - start_time
         print(f"总耗时: {total_time:.1f}s")
         print(f"完成: {output_file}")
+
+
+# ──────────────────────────────────────────────────────────────────────────────
+# DataWorks 客户端:根据表名获取生产代码
+# ──────────────────────────────────────────────────────────────────────────────
+# 最佳实践链路:GetTable → GetTask → Script.Content
+#   1. GetTable(entity_id, include_business_metadata=True) 精确获取表的上游任务
+#   2. GetTask(task_id, project_env='Prod') 获取任务的 SQL 代码
+# ──────────────────────────────────────────────────────────────────────────────
+
+# 账号下所有可访问的 DataWorks 项目(project_id → name)
+_DW_PROJECTS = {
+    4858:   "loghubods",
+    11300:  "DWH",
+    5477:   "videocdm",
+    548768: "piaoquan_api",
+    148813: "content_safety",
+    96094:  "algo",
+    52578:  "majin",
+    5057:   "useractionbi",
+    5034:   "user_video_action_cdm",
+    4868:   "usercdm",
+    4859:   "videoods",
+    6025:   "videoads",
+    5535:   "user_video_tag",
+    19288:  "RecallEmbedding",
+    10762:  "Test_model1",
+    193831: "cost_mgt_1894469520484605",
+    156474: "dyp_1",
+    156475: "dyp_2",
+    343868: "pq_data_space",
+    343957: "pq_grafana_se",
+}
+
+
+_CACHE_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "production_code")
+
+
+def _call_with_retry(fn, max_retries=3, base_delay=2):
+    """带限流重试的 API 调用包装。"""
+    for attempt in range(max_retries):
+        try:
+            return fn()
+        except Exception as e:
+            if "Throttling" in str(e) and attempt < max_retries - 1:
+                delay = base_delay * (2 ** attempt)
+                print(f"  [throttled] 等待 {delay}s 后重试...")
+                time.sleep(delay)
+                continue
+            raise
+
+
+class DataWorksClient:
+    def __init__(self):
+        if not _DW_AVAILABLE:
+            raise ImportError(
+                "请先安装 DataWorks SDK:\n"
+                "pip install alibabacloud-dataworks-public20240518"
+            )
+        # 初始化所有 AK 对应的客户端(不同 AK 对不同项目有权限)
+        self._clients = {}
+        for config_name, cfg in ODPS_CONFIGS.items():
+            dw_config = _open_api_models.Config(
+                access_key_id=cfg["access_id"],
+                access_key_secret=cfg["access_secret"],
+                endpoint="dataworks.cn-hangzhou.aliyuncs.com",
+            )
+            self._clients[config_name] = _DWClient(dw_config)
+        self.client = self._clients["default"]
+
+    @staticmethod
+    def _build_entity_id(table_name: str) -> str:
+        """构造 GetTable 的 entity ID。
+
+        支持格式:
+          - project.table  → maxcompute-table:::project::table
+          - table           → maxcompute-table:::loghubods::table
+        """
+        parts = table_name.split(".", 1)
+        if len(parts) == 2:
+            project, table = parts
+        else:
+            project, table = "loghubods", parts[0]
+        return f"maxcompute-table:::{project}::{table}"
+
+    def get_table_info(self, table_name: str) -> dict:
+        """获取表的元信息(含上游任务列表)。
+
+        Returns:
+            dict with keys: name, comment, dataworks_tasks[{id, name}], ...
+        """
+        entity_id = self._build_entity_id(table_name)
+        resp = _call_with_retry(lambda: self.client.get_table(
+            _dw_models.GetTableRequest(id=entity_id, include_business_metadata=True)
+        ))
+        table = resp.body.to_map().get("Table", {})
+        biz = table.get("BusinessMetadata", {})
+        return {
+            "id": table.get("Id"),
+            "name": table.get("Name"),
+            "comment": table.get("Comment"),
+            "project_id": biz.get("Extension", {}).get("ProjectId"),
+            "dataworks_tasks": biz.get("UpstreamTasks", []),
+            "partition_keys": table.get("PartitionKeys", []),
+        }
+
+    def _get_task_code(self, task_id: int) -> dict:
+        """尝试用所有 AK 获取任务代码,返回第一个成功的结果。"""
+        for config_name, client in self._clients.items():
+            try:
+                resp = _call_with_retry(lambda c=client: c.get_task(
+                    _dw_models.GetTaskRequest(id=task_id, project_env="Prod")
+                ))
+                task = resp.body.to_map().get("Task", {})
+                return {
+                    "task_id": task_id,
+                    "task_name": task.get("Name"),
+                    "task_type": task.get("Type"),
+                    "content": (task.get("Script") or {}).get("Content", ""),
+                    "config": config_name,
+                }
+            except Exception as e:
+                if "11020205003" in str(e):
+                    continue  # 无权限,尝试下一个 AK
+                raise
+        return None
+
+    @staticmethod
+    def _normalize_table_name(table_name: str) -> str:
+        """补全 project 前缀:table → loghubods.table"""
+        if "." not in table_name:
+            return f"loghubods.{table_name}"
+        return table_name
+
+    @staticmethod
+    def _cache_path(table_name: str) -> str:
+        return os.path.join(_CACHE_DIR, f"{table_name}.sql")
+
+    @staticmethod
+    def _schema_cache_path(table_name: str) -> str:
+        return os.path.join(_CACHE_DIR, f"{table_name}.json")
+
+    def _read_cache(self, table_name: str) -> str | None:
+        path = self._cache_path(table_name)
+        if os.path.exists(path):
+            with open(path, "r", encoding="utf-8") as f:
+                return f.read()
+        return None
+
+    def _write_cache(self, table_name: str, content: str):
+        os.makedirs(_CACHE_DIR, exist_ok=True)
+        with open(self._cache_path(table_name), "w", encoding="utf-8") as f:
+            f.write(content)
+
+    def _read_schema_cache(self, table_name: str) -> dict | None:
+        import json
+        path = self._schema_cache_path(table_name)
+        if os.path.exists(path):
+            with open(path, "r", encoding="utf-8") as f:
+                return json.load(f)
+        return None
+
+    def _write_schema_cache(self, table_name: str, schema: dict):
+        import json
+        os.makedirs(_CACHE_DIR, exist_ok=True)
+        with open(self._schema_cache_path(table_name), "w", encoding="utf-8") as f:
+            json.dump(schema, f, ensure_ascii=False, indent=2)
+
+    def _ensure_schema_cache(self, table_name: str, force: bool = False,
+                              dataworks_tasks: list | None = None):
+        """确保 schema 缓存存在,无则拉取并写入。
+
+        Args:
+            dataworks_tasks: 预获取的上游任务列表,避免重复调用 get_table_info()
+        """
+        if not force:
+            cached = self._read_schema_cache(table_name)
+            if cached is not None:
+                return
+        try:
+            schema = self.get_table_schema(table_name, dataworks_tasks=dataworks_tasks)
+            self._write_schema_cache(table_name, schema)
+            print(f"[saved] {self._schema_cache_path(table_name)}")
+        except Exception as e:
+            print(f"[WARN] 获取表结构失败 {table_name}: {e}")
+
+    def get_table_schema(self, table_name: str,
+                          dataworks_tasks: list | None = None) -> dict:
+        """通过 ODPS SDK 获取表结构元信息。
+
+        Args:
+            table_name: 表名(支持 project.table 格式)
+            dataworks_tasks: 预获取的上游任务列表,避免重复 API 调用
+
+        Returns:
+            dict: {name, project, comment, columns, partition_keys, dataworks_tasks}
+        """
+        table_name = self._normalize_table_name(table_name)
+        parts = table_name.split(".", 1)
+        project, table = parts[0], parts[1]
+
+        # 用默认 AK 对应的 ODPSClient 获取 ODPS 表结构
+        odps_client = ODPSClient(project=project)
+        t = odps_client.odps.get_table(table)
+
+        columns = [
+            {"name": c.name, "type": str(c.type), "comment": c.comment or ""}
+            for c in t.table_schema.columns
+        ]
+        partition_keys = [
+            {"name": c.name, "type": str(c.type), "comment": c.comment or ""}
+            for c in t.table_schema.partitions
+        ]
+
+        # 上游任务:优先用传入的,否则从 DataWorks API 获取
+        if dataworks_tasks is None:
+            try:
+                info = self.get_table_info(table_name)
+                dataworks_tasks = [
+                    {"id": task.get("Id"), "name": task.get("Name")}
+                    for task in info.get("dataworks_tasks", [])
+                ]
+            except Exception:
+                dataworks_tasks = []
+
+        # 直接上游表(血缘)
+        try:
+            upstream_tables = self.get_upstream_tables(table_name)
+        except Exception:
+            upstream_tables = []
+
+        return {
+            "name": table,
+            "project": project,
+            "comment": t.comment or "",
+            "columns": columns,
+            "partition_keys": partition_keys,
+            "dataworks_tasks": dataworks_tasks,
+            "upstream_tables": upstream_tables,
+        }
+
+    def get_node_code(self, table_name: str, force: bool = False) -> list:
+        """根据表名获取生产代码(优先读本地缓存)。
+
+        流程:本地缓存 → GetTable → GetTask → 写缓存 → 返回代码
+
+        Args:
+            table_name: 表名(支持 project.table 格式)
+            force: True 时跳过缓存,强制从 API 拉取
+
+        Returns:
+            list of dict,每条包含:
+                task_id, task_name, task_type, content
+        """
+        table_name = self._normalize_table_name(table_name)
+
+        # 读缓存
+        if not force:
+            cached = self._read_cache(table_name)
+            if cached is not None:
+                print(f"[cache] {self._cache_path(table_name)}")
+                # 同时检查 schema 缓存,无则补拉
+                self._ensure_schema_cache(table_name, force=False)
+                return [{"task_id": None, "task_name": "(cached)", "task_type": None, "content": cached}]
+
+        # API 拉取
+        info = self.get_table_info(table_name)
+        upstream = info.get("dataworks_tasks", [])
+        if not upstream:
+            print(f"表 '{table_name}' 没有上游任务")
+            return []
+
+        results = []
+        for task in upstream:
+            task_id = task.get("Id")
+            task_name = task.get("Name")
+            result = self._get_task_code(task_id)
+            if result:
+                results.append(result)
+            else:
+                print(f"[WARN] 任务 {task_name}({task_id}) 所有 AK 均无权限")
+
+        # 写缓存
+        if results:
+            parts = []
+            for r in results:
+                header = f"-- Task: {r['task_name']}  ID: {r['task_id']}  Type: {r['task_type']}"
+                parts.append(f"{header}\n{r['content']}")
+            self._write_cache(table_name, "\n\n".join(parts))
+            print(f"[saved] {self._cache_path(table_name)}")
+
+        # 获取并缓存 schema(复用已有的上游任务信息,避免重复 API 调用)
+        up_tasks = [
+            {"id": task.get("Id"), "name": task.get("Name")}
+            for task in upstream
+        ]
+        self._ensure_schema_cache(table_name, force=force, dataworks_tasks=up_tasks)
+
+        return results
+
+    def get_upstream_tables(self, table_name: str) -> list[str]:
+        """通过血缘 API 获取表的直接上游表列表。
+
+        Returns:
+            list of str,如 ["loghubods.user_share_log_flow", ...]
+        """
+        entity_id = self._build_entity_id(table_name)
+        resp = _call_with_retry(lambda: self.client.list_lineages(
+            _dw_models.ListLineagesRequest(dst_entity_id=entity_id, page_size=50)
+        ))
+        lineages = resp.body.to_map().get("PagingInfo", {}).get("Lineages", [])
+
+        tables = []
+        for l in lineages:
+            src_id = l.get("SrcEntity", {}).get("Id", "")
+            # maxcompute-table:::project::table → project.table
+            parts = src_id.replace("maxcompute-table:::", "").split("::")
+            if len(parts) == 2:
+                tables.append(f"{parts[0]}.{parts[1]}")
+        return sorted(set(tables))
+
+    def get_node_code_recursive(self, table_name: str, max_depth: int = 3,
+                                force: bool = False) -> dict:
+        """BFS 逐层获取表及其所有上游表的生产代码。
+
+        通过血缘 API(ListLineages)逐层追溯上游依赖,
+        每层的代码和上游表都会被缓存到 production_code/。
+
+        Args:
+            table_name: 表名(支持 project.table 格式)
+            max_depth: 最大追溯层数,默认 3
+            force: True 时跳过缓存
+
+        Returns:
+            dict: {
+                "project.table": {
+                    "code": [...],            # get_node_code 返回值
+                    "upstream": ["a.b", ...],  # 上游表名列表
+                    "depth": int
+                }, ...
+            }
+        """
+        from collections import deque
+
+        table_name = self._normalize_table_name(table_name)
+        result = {}
+        queue = deque([(table_name, 0)])
+        visited = {table_name}
+
+        while queue:
+            tbl, depth = queue.popleft()
+            indent = "  " * depth
+            print(f"{indent}[depth={depth}] {tbl}")
+
+            # 获取代码
+            code = self.get_node_code(tbl, force=force)
+
+            # 获取上游表
+            upstream = []
+            if depth < max_depth:
+                try:
+                    upstream = self.get_upstream_tables(tbl)
+                except Exception:
+                    pass
+
+            result[tbl] = {"code": code, "upstream": upstream, "depth": depth}
+
+            # 下一层入队
+            for up_tbl in upstream:
+                if up_tbl not in visited:
+                    visited.add(up_tbl)
+                    queue.append((up_tbl, depth + 1))
+
+        # 打印汇总
+        print(f"\n共追溯 {len(result)} 张表:")
+        for tbl, info in result.items():
+            has_code = "有代码" if info["code"] else "无代码"
+            n_up = len(info["upstream"])
+            print(f"  {'  ' * info['depth']}{tbl}  ({has_code}, {n_up} 个上游)")
+
+        return result
+
+    def print_node_code(self, table_name: str):
+        """打印表的生产代码(人类可读格式)"""
+        results = self.get_node_code(table_name)
+        if not results:
+            print(f"未找到 '{table_name}' 的生产代码")
+            return
+        for r in results:
+            print(f"\n{'='*60}")
+            print(f"任务: {r['task_name']}  ID: {r['task_id']}  "
+                  f"类型: {r['task_type']}")
+            print(f"{'='*60}")
+            print(r["content"] or "(无内容)")

+ 333 - 0
production_code/loghubods.aitags_basedata.json

@@ -0,0 +1,333 @@
+{
+  "name": "aitags_basedata",
+  "project": "loghubods",
+  "comment": "",
+  "columns": [
+    {
+      "name": "video_id",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "video_title",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "video_url",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视觉音乐文字",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "内容选题",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频主题",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频关键词",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频主体",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频场景",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "情感倾向",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频风格",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "是否有片尾引导",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "引导时长",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "引导强度",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "传播性判断",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "推测观众地域",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "推测观众年龄段",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "推测观众性别",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "推测观众价值类型",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "推测观众用户价值点",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "推测观众用观众收入",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "背景音类型",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "背景音风格",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "语音类型",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "歌曲名",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "音色",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "产品水印",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "产品名称",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "字幕",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "颜色",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "字号",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "位置",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频口播",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "封面主体",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "人物个数",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "文字数量",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "文字关键字",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "封面主题",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "知名人物",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "人物年龄段",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "场景描述",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "时效性_有无时效",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "时效性_具体时间",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频一级分类",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "二级分类",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_1",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_name_1",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_score_1",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "tag_2",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_name_2",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_score_2",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "tag_3",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_name_3",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_score_3",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "具体时效",
+      "type": "STRING",
+      "comment": "具体时效"
+    },
+    {
+      "name": "是否节日视频",
+      "type": "STRING",
+      "comment": "是否节日视频"
+    },
+    {
+      "name": "节日名称",
+      "type": "STRING",
+      "comment": "节日名称"
+    },
+    {
+      "name": "是否存在热点",
+      "type": "STRING",
+      "comment": "是否存在热点信息"
+    },
+    {
+      "name": "该热点的特征",
+      "type": "STRING",
+      "comment": "热点的特征描述"
+    },
+    {
+      "name": "热点内容概括",
+      "type": "STRING",
+      "comment": "热点内容的概括"
+    },
+    {
+      "name": "判断是热点的原因",
+      "type": "STRING",
+      "comment": "判断为热点的原因"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": ""
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": ""
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1021770953,
+      "name": "AI标签特征提取"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.result_log_day"
+  ]
+}

+ 188 - 0
production_code/loghubods.aitags_basedata.sql

@@ -0,0 +1,188 @@
+-- Task: AI标签特征提取  ID: 1021770953  Type: ODPS_SQL
+----odps sql 
+----********************************************************************--
+----author:杜崇宇
+----create time:2024-11-11 15:43:03
+----********************************************************************--
+----1016prompt
+----SELECT  dt
+----        ,video_id
+----        ,video_title
+----        ,video_url
+----        ,GET_JSON_OBJECT(data,'$.一、基础信息.视觉/音乐/文字') AS 视觉音乐文字
+----        ,GET_JSON_OBJECT(data,'$.一、基础信息.内容选题') AS 内容选题
+----        ,GET_JSON_OBJECT(data,'$.一、基础信息.视频主题') AS 视频主题
+----        ,GET_JSON_OBJECT(data,'$.二、主体和场景.视频主体') AS 视频主体
+----        ,GET_JSON_OBJECT(data,'$.二、主体和场景.视频场景') AS 视频场景
+----        ,GET_JSON_OBJECT(data,'$.三、情感与风格.情感倾向') AS 情感倾向
+----        ,GET_JSON_OBJECT(data,'$.三、情感与风格.视频风格') AS 视频风格
+----        ,GET_JSON_OBJECT(data,'$.四、视频传播性与观众.片尾引导.视频片尾是否有引导观众分享?') AS 片尾引导分享
+----        ,GET_JSON_OBJECT(data,'$.四、视频传播性与观众.片尾引导.引导时长') AS 引导时长
+----        ,GET_JSON_OBJECT(data,'$.四、视频传播性与观众.传播性判断') AS 传播性判断
+----        ,GET_JSON_OBJECT(data,'$.四、视频传播性与观众.观众画像.推测观众的年龄') AS 推测观众年龄
+----        ,GET_JSON_OBJECT(data,'$.四、视频传播性与观众.观众画像.推测观众的性别') AS 推测观众性别
+----        ,GET_JSON_OBJECT(data,'$.四、视频传播性与观众.观众画像.推测观众的地域') AS 推测观众地域
+----        ,GET_JSON_OBJECT(data,'$.五、音画细节.音频细节.视频中的音频信息,是否有歌曲?') AS 是否有歌曲
+----        ,GET_JSON_OBJECT(data,'$.五、音画细节.音频细节.视频中的音频信息,歌曲名是什么?') AS 歌曲名
+----        ,GET_JSON_OBJECT(data,'$.五、音画细节.视频水印.是否有产品名的水印?') AS 是否有水印
+----        ,GET_JSON_OBJECT(data,'$.五、音画细节.视频字幕.是否有字幕?') AS 是否有字幕
+----        ,GET_JSON_OBJECT(data,'$.五、音画细节.视频字幕.字幕的颜色') AS 字幕颜色
+----        ,GET_JSON_OBJECT(data,'$.五、音画细节.视频字幕.字幕的位置如何') AS 字幕位置
+----        ,GET_JSON_OBJECT(data,'$.五、音画细节.视频口播') AS 视频口播
+----        ,GET_JSON_OBJECT(data,'$.六、人物与场景.知名人物.视频或音频中是否出现知名人物?') AS 是否有知名人物
+----        ,GET_JSON_OBJECT(data,'$.六、人物与场景.人物年龄段') AS 人物年龄段
+----        ,GET_JSON_OBJECT(data,'$.六、人物与场景.场景描述') AS 场景描述
+----        ,GET_JSON_OBJECT(data,'$.七、时效性与分类.视频一级分类') AS 视频一级分类
+----        ,GET_JSON_OBJECT(data,'$.七、时效性与分类.二级分类') AS 模型判断二级分类
+----FROM    loghubods.video_tag_info_per5min
+----WHERE   dt REGEXP '20241110'
+----;
+----
+----1101prompt
+---- 创建分区表语句
+--CREATE TABLE loghubods.aitags_basedata
+--(
+--    video_id     STRING
+--    ,video_title STRING
+--    ,video_url   STRING
+--    -- 基础信息
+--    ,视觉音乐文字      STRING
+--    ,内容选题        STRING
+--    ,视频主题        STRING
+--    ,视频关键词       STRING
+--    -- 主体和场景
+--    ,视频主体        STRING
+--    ,视频场景        STRING
+--    -- 情感与风格
+--    ,情感倾向        STRING
+--    ,视频风格        STRING
+--    -- 视频传播性与画像
+--    ,是否有片尾引导     STRING
+--    ,引导时长        BIGINT
+--    ,引导强度        STRING
+--    ,传播性判断       STRING
+--    ,推测观众地域      STRING
+--    ,推测观众年龄段     STRING
+--    ,推测观众性别      STRING
+--    ,推测观众价值类型    STRING
+--    ,推测观众用户价值点   STRING
+--    ,推测观众用观众收入   STRING
+--    -- 音画细节
+--    ,背景音类型       STRING
+--    ,背景音风格       STRING
+--    ,语音类型        STRING
+--    ,歌曲名         STRING
+--    ,音色          STRING
+--    ,产品水印        STRING
+--    ,产品名称        STRING
+--    ,字幕          STRING
+--    ,颜色          STRING
+--    ,字号          STRING
+--    ,位置          STRING
+--    ,视频口播        STRING
+--    -- 封面信息
+--    ,封面主体        STRING
+--    ,人物个数        BIGINT
+--    ,文字数量        BIGINT
+--    ,文字关键字       STRING
+--    ,封面主题        STRING
+--    -- 人物与场景
+--    ,知名人物        STRING
+--    ,人物年龄段       STRING
+--    ,场景描述        STRING
+--    -- 时效性与分类
+--    ,时效性_有无时效        STRING
+--        ,时效性_具体时间        STRING
+--
+--    ,视频一级分类      STRING
+--    ,二级分类        STRING
+--    ,tag_1       STRING
+--    ,tag_name_1  STRING
+--    ,tag_score_1 BIGINT
+--    ,tag_2       STRING
+--    ,tag_name_2  STRING
+--    ,tag_score_2 BIGINT
+--    ,tag_3       STRING
+--    ,tag_name_3  STRING
+--    ,tag_score_3 BIGINT
+--)
+--PARTITIONED BY 
+--(
+--    dt           STRING
+--)
+--;
+-- 删除指定分区内的数据
+--ALTER TABLE loghubods.aitags_basedata  DROP PARTITION (dt = '20241101');
+--DROP TABLE IF EXISTS loghubods.aitags_basedata
+
+
+INSERT OVERWRITE TABLE loghubods.aitags_basedata PARTITION (dt = '${bizdate}')
+SELECT  video_id
+        ,video_title
+        ,video_url --基础信息
+        ,GET_JSON_OBJECT(data,'$.一、基础信息.关键维度') AS 视觉音乐文字
+        ,GET_JSON_OBJECT(data,'$.一、基础信息.内容选题') AS 内容选题
+        ,GET_JSON_OBJECT(data,'$.一、基础信息.视频主题') AS 视频主题
+        ,GET_JSON_OBJECT(data,'$.一、基础信息.视频关键词') AS 视频关键词 --主体和场景
+        ,GET_JSON_OBJECT(data,'$.二、主体和场景.视频主体') AS 视频主体
+        ,GET_JSON_OBJECT(data,'$.二、主体和场景.视频场景') AS 视频场景 --情感与风格
+        ,GET_JSON_OBJECT(data,'$.三、情感与风格.情感倾向') AS 情感倾向
+        ,GET_JSON_OBJECT(data,'$.三、情感与风格.视频风格') AS 视频风格 --视频传播性与画像
+        ,GET_JSON_OBJECT(data,'$.四、视频传播性与画像.片尾引导.有无') AS 是否有片尾引导
+        ,GET_JSON_OBJECT(data,'$.四、视频传播性与画像.片尾引导.引导时长') AS 引导时长
+        ,GET_JSON_OBJECT(data,'$.四、视频传播性与画像.片尾引导.引导强度') AS 引导强度
+        ,GET_JSON_OBJECT(data,'$.四、视频传播性与画像.传播性判断') AS 传播性判断
+        ,GET_JSON_OBJECT(data,'$.四、视频传播性与画像.视频用户画像.地域') AS 推测观众地域
+        ,GET_JSON_OBJECT(data,'$.四、视频传播性与画像.视频用户画像.年龄段') AS 推测观众年龄段
+        ,GET_JSON_OBJECT(data,'$.四、视频传播性与画像.视频用户画像.性别') AS 推测观众性别
+        ,GET_JSON_OBJECT(data,'$.四、视频传播性与画像.视频用户画像.价值类型') AS 推测观众价值类型
+        ,GET_JSON_OBJECT(data,'$.四、视频传播性与画像.视频用户画像.用户价值点') AS 推测观众用户价值点
+        ,GET_JSON_OBJECT(data,'$.四、视频传播性与画像.视频用户画像.观众收入') AS 推测观众用观众收入 --音画细节
+        ,GET_JSON_OBJECT(data,'$.五、音画细节.音频细节.背景音类型') AS 背景音类型
+        ,GET_JSON_OBJECT(data,'$.五、音画细节.音频细节.背景音风格') AS 背景音风格
+        ,GET_JSON_OBJECT(data,'$.五、音画细节.音频细节.语音类型') AS 语音类型
+        ,GET_JSON_OBJECT(data,'$.五、音画细节.音频细节.歌曲名') AS 歌曲名
+        ,GET_JSON_OBJECT(data,'$.五、音画细节.音频细节.音色') AS 音色
+        ,GET_JSON_OBJECT(data,'$.五、音画细节.视频水印.产品水印') AS 产品水印
+        ,GET_JSON_OBJECT(data,'$.五、音画细节.视频水印.产品名称') AS 产品名称
+        ,GET_JSON_OBJECT(data,'$.五、音画细节.视频字幕.字幕') AS 字幕
+        ,GET_JSON_OBJECT(data,'$.五、音画细节.视频字幕.颜色') AS 颜色
+        ,GET_JSON_OBJECT(data,'$.五、音画细节.视频字幕.字号') AS 字号
+        ,GET_JSON_OBJECT(data,'$.五、音画细节.视频字幕.位置') AS 位置
+        ,GET_JSON_OBJECT(data,'$.五、音画细节.视频口播') AS 视频口播 --封面信息
+        ,GET_JSON_OBJECT(data,'$.六、封面信息.封面主体') AS 封面主体
+        ,GET_JSON_OBJECT(data,'$.六、封面信息.人物个数') AS 人物个数
+        ,GET_JSON_OBJECT(data,'$.六、封面信息.文字数量') AS 文字数量
+        ,GET_JSON_OBJECT(data,'$.六、封面信息.文字关键字') AS 文字关键字
+        ,GET_JSON_OBJECT(data,'$.六、封面信息.封面主题') AS 封面主题 --人物与场景
+        ,GET_JSON_OBJECT(data,'$.七、人物与场景.知名人物') AS 知名人物
+        ,GET_JSON_OBJECT(data,'$.七、人物与场景.人物年龄段') AS 人物年龄段
+        ,GET_JSON_OBJECT(data,'$.七、人物与场景.场景描述') AS 场景描述 --时效性与分类
+        ,GET_JSON_OBJECT(data,'$.八、时效性与分类.时效性.有无时效') AS 时效性有无时效
+        ,GET_JSON_OBJECT(data,'$.八、时效性与分类.时效性.具体时间') AS 时效性具体时间
+        ,CASE   WHEN GET_JSON_OBJECT(DATA,'$.八、时效性与分类.视频一级分类') IN ('一级品类_音乐','一级品类_剧情 / 剧情演绎','一级品类_二次元','一级品类_游戏','一级品类_公益','一级品类_随拍 / 颜值','一级品类_舞蹈','一级品类_动物 / 萌宠','一级品类_三农','一级品类_科技 / 科技数码','一级品类_财经','一级品类_母婴 / 母婴亲子','一级品类_法律 / 人文社科','一级品类_科普 / 人文社科','一级品类_情感 / 情感心理','一级品类_职场 / 人文社科','一级品类_教育 / 教育培训','一级品类_摄影摄像','一级品类_艺术 / 才艺技能','一级品类_美食','一级品类_旅行 / 旅游','一级品类_地域本地','一级品类_时尚 / 时尚 / 美妆','一级品类_文化 / 人文社科','一级品类_搞笑 / 休闲娱乐','一级品类_明星 / 名人','一级品类_综艺','一级品类_影视综艺','一级品类_电影','一级品类_电视剧','一级品类_汽车','一级品类_体育 / 运动','一级品类_医疗健康 / 长寿 / 健身','一级品类_生活记录 / 生活','一级品类_生活家居 / 家居家装','一级品类_时政社会','一级品类_奇人异象','一级品类_历史','一级品类_军事','一级品类_宗教','一级品类_短剧','一级品类_收藏品') THEN GET_JSON_OBJECT(DATA,'$.八、时效性与分类.视频一级分类')
+                ELSE '无'
+        END AS 视频一级分类
+        ,GET_JSON_OBJECT(DATA,'$.八、时效性与分类.二级分类') AS 二级分类
+        ,GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[0]') AS tag_1
+        ,CASE   WHEN REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[0]'),'、')[0],'品类-','') IN ('祝福音乐','中国战争史','中国历史影像','知识科普','正能量剧情','杂技柔术','早中晚好','益智解密','饮食健康','戏曲戏剧','未来科幻','天气变化','他国政策','贪污腐败','书法','食品安全','社会风气','生活小妙招','生活技巧科普','省份城市亮点','人生忠告','人财诈骗','亲子日常','亲情音乐','木工','魔术特效','迷信祝福','民族异域音乐','民生政策','名画赏析','美食教程','麻将','旅行攻略','历史名人','老综艺影像','老年相关法律科普','老年时尚','老年审美美女','老年生活','老明星','惊奇事件','节日祝福','健身操','健康知识','惠民新闻','绘画','怀念时光','红歌老歌','罕见画面','国际文化','国家统一','国家力量','国家科技力量','搞笑段子','风景实拍','对口型表演','动物萌宠','动物表演','大型集体艺术','当代正能量人物','传统文化','吃播探店','长寿知识','本地生活','K12教育','(老)电影切片') THEN REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[0]'),'、')[0],'品类-','')
+                ELSE '无'
+        END AS tag_name_1
+        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[0]'),'、')[1],'分数-','') AS tag_score_1
+        ,GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[1]') AS tag_2
+        ,CASE   WHEN REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[1]'),'、')[0],'品类-','') IN ('祝福音乐','中国战争史','中国历史影像','知识科普','正能量剧情','杂技柔术','早中晚好','益智解密','饮食健康','戏曲戏剧','未来科幻','天气变化','他国政策','贪污腐败','书法','食品安全','社会风气','生活小妙招','生活技巧科普','省份城市亮点','人生忠告','人财诈骗','亲子日常','亲情音乐','木工','魔术特效','迷信祝福','民族异域音乐','民生政策','名画赏析','美食教程','麻将','旅行攻略','历史名人','老综艺影像','老年相关法律科普','老年时尚','老年审美美女','老年生活','老明星','惊奇事件','节日祝福','健身操','健康知识','惠民新闻','绘画','怀念时光','红歌老歌','罕见画面','国际文化','国家统一','国家力量','国家科技力量','搞笑段子','风景实拍','对口型表演','动物萌宠','动物表演','大型集体艺术','当代正能量人物','传统文化','吃播探店','长寿知识','本地生活','K12教育','(老)电影切片') THEN REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[1]'),'、')[0],'品类-','') ELSE '无' END AS tag_name_2
+        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[1]'),'、')[1],'分数-','') AS tag_score_2
+        ,GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[2]') AS tag_3
+        ,CASE   WHEN REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[2]'),'、')[0],'品类-','') IN ('祝福音乐','中国战争史','中国历史影像','知识科普','正能量剧情','杂技柔术','早中晚好','益智解密','饮食健康','戏曲戏剧','未来科幻','天气变化','他国政策','贪污腐败','书法','食品安全','社会风气','生活小妙招','生活技巧科普','省份城市亮点','人生忠告','人财诈骗','亲子日常','亲情音乐','木工','魔术特效','迷信祝福','民族异域音乐','民生政策','名画赏析','美食教程','麻将','旅行攻略','历史名人','老综艺影像','老年相关法律科普','老年时尚','老年审美美女','老年生活','老明星','惊奇事件','节日祝福','健身操','健康知识','惠民新闻','绘画','怀念时光','红歌老歌','罕见画面','国际文化','国家统一','国家力量','国家科技力量','搞笑段子','风景实拍','对口型表演','动物萌宠','动物表演','大型集体艺术','当代正能量人物','传统文化','吃播探店','长寿知识','本地生活','K12教育','(老)电影切片') THEN REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[2]'),'、')[0],'品类-','')ELSE '无' END AS tag_name_3
+        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[2]'),'、')[1],'分数-','') AS tag_score_3
+        ,regexp_replace(GET_JSON_OBJECT(GET_JSON_OBJECT(data,'$.八、时效性与分类.节日时效性'),'$.具体时效') , '\\[|"|\\]', '') AS 具体时效 
+        ,regexp_replace(GET_JSON_OBJECT(GET_JSON_OBJECT(data,'$.八、时效性与分类.节日时效性'),'$.是否节日视频') , '\\[|"|\\]', '') as 是否节日视频
+        ,regexp_replace(GET_JSON_OBJECT(GET_JSON_OBJECT(data,'$.八、时效性与分类.节日时效性'),'$.节日名称') , '\\[|"|\\]', '')  as 节日名称
+
+        ,regexp_replace(GET_JSON_OBJECT(GET_JSON_OBJECT(data,'$.九、热点信息'),'$.是否存在热点') , '\\[|"|\\]', '')  as 是否存在热点
+        ,regexp_replace(GET_JSON_OBJECT(GET_JSON_OBJECT(data,'$.九、热点信息'),'$.该热点的特征') , '\\[|"|\\]', '')  as 该热点的特征
+        ,regexp_replace(GET_JSON_OBJECT(GET_JSON_OBJECT(data,'$.九、热点信息'),'$.热点内容概括') , '\\[|"|\\]', '')  as 热点内容概括
+        ,regexp_replace(GET_JSON_OBJECT(GET_JSON_OBJECT(data,'$.九、热点信息'),'$.判断是热点的原因') , '\\[|"|\\]', '')  as 判断是热点的原因
+FROM    loghubods.result_log_day 
+WHERE   dt = '${bizdate}'
+;

+ 319 - 0
production_code/loghubods.aitags_repeatdata.json

@@ -0,0 +1,319 @@
+{
+  "name": "aitags_repeatdata",
+  "project": "loghubods",
+  "comment": "",
+  "columns": [
+    {
+      "name": "title_duration",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "video_id",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "video_title",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "video_url",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视觉音乐文字",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "内容选题",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频主题",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频关键词",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频主体",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频场景",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "情感倾向",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频风格",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "是否有片尾引导",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "引导时长",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "引导强度",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "传播性判断",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "推测观众地域",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "推测观众年龄段",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "推测观众性别",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "推测观众价值类型",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "推测观众用户价值点",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "推测观众用观众收入",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "背景音类型",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "背景音风格",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "语音类型",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "歌曲名",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "音色",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "产品水印",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "产品名称",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "字幕",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "颜色",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "字号",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "位置",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频口播",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "封面主体",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "人物个数",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "文字数量",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "文字关键字",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "封面主题",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "知名人物",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "人物年龄段",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "场景描述",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "时效性_有无时效",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "时效性_具体时间",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "视频一级分类",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "二级分类",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_1",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_name_1",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_score_1",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "tag_2",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_name_2",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_score_2",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "tag_3",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_name_3",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_score_3",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "具体时效",
+      "type": "STRING",
+      "comment": "具体时效"
+    },
+    {
+      "name": "是否节日视频",
+      "type": "STRING",
+      "comment": "是否节日视频"
+    },
+    {
+      "name": "节日名称",
+      "type": "STRING",
+      "comment": "节日名称"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": ""
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": ""
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1021796755,
+      "name": "提取重复视频结果"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.aitags_basedata",
+    "videoods.wx_video"
+  ]
+}

+ 169 - 0
production_code/loghubods.aitags_repeatdata.sql

@@ -0,0 +1,169 @@
+-- Task: 提取重复视频结果  ID: 1021796755  Type: ODPS_SQL
+----odps sql 
+----********************************************************************--
+----author:杜崇宇
+----create time:2024-11-11 20:31:52
+----********************************************************************--
+--CREATE TABLE loghubods.aitags_repeatdata
+--(
+--        title_duration STRING
+--,video_id     STRING
+--    ,video_title STRING
+--    ,video_url   STRING
+--    -- 基础信息
+--    ,视觉音乐文字      STRING
+--    ,内容选题        STRING
+--    ,视频主题        STRING
+--    ,视频关键词       STRING
+--    -- 主体和场景
+--    ,视频主体        STRING
+--    ,视频场景        STRING
+--    -- 情感与风格
+--    ,情感倾向        STRING
+--    ,视频风格        STRING
+--    -- 视频传播性与画像
+--    ,是否有片尾引导     STRING
+--    ,引导时长        BIGINT
+--    ,引导强度        STRING
+--    ,传播性判断       STRING
+--    ,推测观众地域      STRING
+--    ,推测观众年龄段     STRING
+--    ,推测观众性别      STRING
+--    ,推测观众价值类型    STRING
+--    ,推测观众用户价值点   STRING
+--    ,推测观众用观众收入   STRING
+--    -- 音画细节
+--    ,背景音类型       STRING
+--    ,背景音风格       STRING
+--    ,语音类型        STRING
+--    ,歌曲名         STRING
+--    ,音色          STRING
+--    ,产品水印        STRING
+--    ,产品名称        STRING
+--    ,字幕          STRING
+--    ,颜色          STRING
+--    ,字号          STRING
+--    ,位置          STRING
+--    ,视频口播        STRING
+--    -- 封面信息
+--    ,封面主体        STRING
+--    ,人物个数        BIGINT
+--    ,文字数量        BIGINT
+--    ,文字关键字       STRING
+--    ,封面主题        STRING
+--    -- 人物与场景
+--    ,知名人物        STRING
+--    ,人物年龄段       STRING
+--    ,场景描述        STRING
+--    -- 时效性与分类
+--   ,时效性_有无时效        STRING
+--        ,时效性_具体时间        STRING
+--    ,视频一级分类      STRING
+--    ,二级分类        STRING
+--    ,tag_1       STRING
+--    ,tag_name_1  STRING
+--    ,tag_score_1 BIGINT
+--    ,tag_2       STRING
+--    ,tag_name_2  STRING
+--    ,tag_score_2 BIGINT
+--    ,tag_3       STRING
+--    ,tag_name_3  STRING
+--    ,tag_score_3 BIGINT
+--)
+--PARTITIONED BY 
+--(
+--    dt           STRING
+--)
+--;
+--DROP TABLE IF EXISTS loghubods.aitags_repeatdata
+INSERT OVERWRITE TABLE loghubods.aitags_repeatdata PARTITION (dt = '${bizdate}')
+SELECT  DISTINCT title_duration
+        ,video_id
+        ,video_title
+        ,video_url
+        ,视觉音乐文字
+        ,内容选题
+        ,视频主题
+        ,视频关键词
+        ,视频主体
+        ,视频场景
+        ,情感倾向
+        ,视频风格
+        ,是否有片尾引导
+        ,引导时长
+        ,引导强度
+        ,传播性判断
+        ,推测观众地域
+        ,推测观众年龄段
+        ,推测观众性别
+        ,推测观众价值类型
+        ,推测观众用户价值点
+        ,推测观众用观众收入
+        ,背景音类型
+        ,背景音风格
+        ,语音类型
+        ,歌曲名
+        ,音色
+        ,产品水印
+        ,产品名称
+        ,字幕
+        ,颜色
+        ,字号
+        ,位置
+        ,视频口播
+        ,封面主体
+        ,人物个数
+        ,文字数量
+        ,文字关键字
+        ,封面主题
+        ,知名人物
+        ,人物年龄段
+        ,场景描述
+        ,时效性_有无时效
+        ,时效性_具体时间
+        ,视频一级分类
+        ,二级分类
+        ,tag_1
+        ,tag_name_1
+        ,tag_score_1
+        ,tag_2
+        ,tag_name_2
+        ,tag_score_2
+        ,tag_3
+        ,tag_name_3
+        ,tag_score_3
+        ,具体时效
+        ,是否节日视频
+        ,case WHEN 是否节日视频='是' and 节日名称 REGEXP '端午' THEN '端午节' 
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '儿童节|六一' THEN '儿童节' 
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '国庆' THEN '国庆节' 
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '父亲' THEN '父亲节' 
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '七夕' THEN '七夕节'
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '母亲节' THEN '母亲节' 
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '重阳' THEN '重阳节' 
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '妇女节|三八|38节|女神节|女王节' THEN '妇女节'
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '鬼节|中元' THEN '中元节'
+              WHEN 是否节日视频='是' and 节日名称  REGEXP '中秋|元旦将至' THEN '中秋节'
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '元宵|正月十五' THEN '元宵节' 
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '建军节|八一' THEN '建军节'
+              WHEN 是否节日视频='是' and 节日名称 REGEXP  '清明|踏青节|行清节|三月节|祭祖' THEN '清明'
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '七一|建党' THEN '建党节'
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '感恩' THEN '感恩节'
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '教师' THEN '教师节'
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '五四|青年节' THEN '青年节'
+              WHEN 是否节日视频='是' and 节日名称 REGEXP '腊八' THEN '腊八节'
+              ELSE 节日名称 end  节日名称
+FROM    (
+            SELECT  *
+            FROM    (
+                        SELECT  a.*
+                                ,CONCAT(CLEAR_TITLE_SIGNAL(b.title),'-',total_time) AS title_duration
+                                ,ROW_NUMBER() OVER (PARTITION BY CONCAT(CLEAR_TITLE_SIGNAL(b.title),'-',total_time) ORDER BY CAST(video_id AS BIGINT) DESC ) AS rank
+                        FROM    loghubods.aitags_basedata a
+                        LEFT JOIN videoods.wx_video b
+                        ON      a.video_id = b.id
+                        WHERE   dt <= '${bizdate}'
+                        AND     tag_name_1 IS NOT NULL
+                    ) 
+            WHERE   rank = 1
+        )

+ 49 - 0
production_code/loghubods.alg_channel_recommend_exp_feature_20250212.json

@@ -0,0 +1,49 @@
+{
+  "name": "alg_channel_recommend_exp_feature_20250212",
+  "project": "loghubods",
+  "comment": "09_推荐场景下的视频来源特征",
+  "columns": [
+    {
+      "name": "channel",
+      "type": "STRING",
+      "comment": "视频来源"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023818576,
+      "name": "10_推荐场景下的视频来源特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_basic_info",
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

Разница между файлами не показана из-за своего большого размера
+ 178 - 0
production_code/loghubods.alg_channel_recommend_exp_feature_20250212.sql


+ 49 - 0
production_code/loghubods.alg_festive_recommend_exp_feature_20250212.json

@@ -0,0 +1,49 @@
+{
+  "name": "alg_festive_recommend_exp_feature_20250212",
+  "project": "loghubods",
+  "comment": "10_推荐场景下的节日特征",
+  "columns": [
+    {
+      "name": "festive",
+      "type": "STRING",
+      "comment": "节日"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023818577,
+      "name": "11_推荐场景下的视频_节日特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_basic_info",
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

Разница между файлами не показана из-за своего большого размера
+ 178 - 0
production_code/loghubods.alg_festive_recommend_exp_feature_20250212.sql


+ 49 - 0
production_code/loghubods.alg_merge_cate1_recommend_exp_feature_20250212.json

@@ -0,0 +1,49 @@
+{
+  "name": "alg_merge_cate1_recommend_exp_feature_20250212",
+  "project": "loghubods",
+  "comment": "07_推荐场景下的merge一级品类特征",
+  "columns": [
+    {
+      "name": "merge_cate1",
+      "type": "STRING",
+      "comment": "merge一级品类"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023818555,
+      "name": "08_推荐场景下的一级品类特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_basic_info",
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

Разница между файлами не показана из-за своего большого размера
+ 178 - 0
production_code/loghubods.alg_merge_cate1_recommend_exp_feature_20250212.sql


+ 49 - 0
production_code/loghubods.alg_merge_cate2_recommend_exp_feature_20250212.json

@@ -0,0 +1,49 @@
+{
+  "name": "alg_merge_cate2_recommend_exp_feature_20250212",
+  "project": "loghubods",
+  "comment": "08_推荐场景下的merge二级品类特征",
+  "columns": [
+    {
+      "name": "merge_cate2",
+      "type": "STRING",
+      "comment": "merge二级品类"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023818564,
+      "name": "09_推荐场景下的二级品类特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_basic_info",
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

Разница между файлами не показана из-за своего большого размера
+ 178 - 0
production_code/loghubods.alg_merge_cate2_recommend_exp_feature_20250212.sql


+ 54 - 0
production_code/loghubods.alg_mid_feature_return_tags.json

@@ -0,0 +1,54 @@
+{
+  "name": "alg_mid_feature_return_tags",
+  "project": "loghubods",
+  "comment": "TABLE COMMENT",
+  "columns": [
+    {
+      "name": "mid",
+      "type": "STRING",
+      "comment": "mid"
+    },
+    {
+      "name": "feature",
+      "type": "JSON",
+      "comment": "数值"
+    },
+    {
+      "name": "tag_ts",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1017584776,
+      "name": "04_用户回流点击对应的tag"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.user_share_log_flow",
+    "videoods.wx_video_per1h"
+  ]
+}

+ 139 - 0
production_code/loghubods.alg_mid_feature_return_tags.sql

@@ -0,0 +1,139 @@
+-- Task: 04_用户回流点击对应的tag  ID: 1017584776  Type: ODPS_SQL
+--@exclude_input=loghubods.user_share_log_flow
+--odps sql 
+--********************************************************************--
+--author:于卓异
+--create time:2024-05-13 14:35:10
+--********************************************************************--
+CREATE TABLE IF NOT EXISTS loghubods.alg_mid_feature_return_tags
+(
+    mid     STRING COMMENT 'mid'
+    ,feature JSON  COMMENT '数值'
+    ,tag_ts STRING COMMENT ''
+)
+COMMENT 'TABLE COMMENT'
+PARTITIONED BY 
+(
+    dt       STRING COMMENT '日期'
+    ,hh STRING COMMENT '小时'
+)
+LIFECYCLE 30
+;
+SET odps.sql.python.version = cp37
+;
+
+INSERT OVERWRITE TABLE loghubods.alg_mid_feature_return_tags PARTITION (dt = '${dt}',hh = '${hh}')
+
+
+
+WITH t_return AS 
+(
+    SELECT  __topic__
+            ,eventinfos
+            ,apptype
+            ,clickobjectid AS vid
+            ,shareobjectid
+            ,machinecode AS mid
+            ,clienttimestamp
+            ,pagesource
+            ,parentpagesource
+            ,parentrootpagesource
+            ,shareid
+            ,rootshareid
+            ,subsessionid
+    FROM    loghubods.user_share_log_flow
+    WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * ${hours_early}),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+    AND     __topic__ IN ('click')
+    AND     apptype NOT IN ('12')
+    AND     apptype IS NOT NULL
+    AND     clickobjectid IS NOT NULL
+    AND     machinecode IS NOT NULL
+    AND     machinecode != ""
+    AND     clienttimestamp IS NOT NULL 
+    and  rootPageSource not  REGEXP '-pages/home'-- todo 加限制,保证是头部回流----非自分享回流
+)
+,t_return_ AS 
+(
+    SELECT  apptype
+            ,mid
+            ,vid
+            ,pagesource
+            ,subsessionid
+            ,MIN(clienttimestamp) AS clienttimestamp
+    FROM    t_return
+    GROUP BY apptype
+             ,mid
+             ,vid
+             ,pagesource
+             ,subsessionid
+)
+,t_title AS 
+(
+    SELECT  t1.vid AS vid
+            ,t2.title AS title
+            ,FENCI_STR2STR(t2.title,3) AS tags
+    FROM    (
+                SELECT  DISTINCT vid
+                        ,"1"
+                FROM    t_return_
+            ) t1
+    JOIN    (
+                SELECT  DISTINCT id
+                        ,title
+                FROM    videoods.wx_video_per1h
+                WHERE   title IS NOT NULL
+                AND     title != ""
+            ) t2
+    ON      t1.vid = t2.id
+)
+,t_title_tag AS 
+(
+    SELECT  vid
+            ,title
+            ,tag
+    FROM    (
+                SELECT  vid
+                        ,title
+                        ,tags
+                FROM    t_title
+                WHERE   tags IS NOT NULL
+                AND     tags != ""
+            ) 
+    LATERAL VIEW EXPLODE(SPLIT(tags,',')) t AS tag
+)
+,t_join AS 
+(
+    SELECT  mid
+            ,tag
+            ,CAST((
+                        UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - clienttimestamp / 1000
+            ) AS BIGINT) AS ts_diff
+    FROM    t_return_ t1
+    JOIN    t_title_tag t2
+    ON      t1.vid = t2.vid
+)
+,t_group AS 
+(
+    SELECT  mid
+            ,CONCAT_WS(',',COLLECT_LIST(CONCAT(tag,":",ts_diff))) AS tag_ts
+    FROM    t_join
+    GROUP BY mid
+)
+,t_mid_tags AS 
+(
+    SELECT  mid
+            ,JSON_OBJECT('tags_2h',tags_2h,"tags_1d",tags_1d,"tags_3d",tags_3d,"tags_7d",tags_7d,"tags_14d",tags_14d) AS feature
+            ,tag_ts
+    FROM    (
+                SELECT  mid
+                        ,tag_ts
+                        ,get_top_tags(tag_ts,3600 * 2,10) AS tags_2h
+                        ,get_top_tags(tag_ts,3600 * 24,10) AS tags_1d
+                        ,get_top_tags(tag_ts,3600 * 24 * 3,10) AS tags_3d
+                        ,get_top_tags(tag_ts,3600 * 24 * 7,10) AS tags_7d
+                        ,get_top_tags(tag_ts,3600 * 24 * 14,10) AS tags_14d -- 336
+                FROM    t_group
+            ) 
+)
+SELECT  *
+FROM    t_mid_tags

+ 50 - 0
production_code/loghubods.alg_mid_feature_returncf.json

@@ -0,0 +1,50 @@
+{
+  "name": "alg_mid_feature_returncf",
+  "project": "loghubods",
+  "comment": "基于视频共线CF特征的用户“点击回流”对应的vids",
+  "columns": [
+    {
+      "name": "mid",
+      "type": "STRING",
+      "comment": "视频id"
+    },
+    {
+      "name": "feature",
+      "type": "JSON",
+      "comment": "json格式的特征组合"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1017834885,
+      "name": "09_基于共线的用户点击回流CF特征"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_cfreturn",
+    "loghubods.alg_vid_feature_cfshare",
+    "loghubods.user_share_log_flow"
+  ]
+}

+ 113 - 0
production_code/loghubods.alg_mid_feature_returncf.sql

@@ -0,0 +1,113 @@
+-- Task: 09_基于共线的用户点击回流CF特征  ID: 1017834885  Type: ODPS_SQL
+--@exclude_input=loghubods.user_share_log_flow
+--odps sql 
+--********************************************************************--
+--author:张博
+--create time:2024-06-07 17:55:01
+--********************************************************************--
+-- select * from loghubods.alg_mid_feature_returncf where dt = "20240606" and hh = "16" limit 100;
+CREATE TABLE IF NOT EXISTS loghubods.alg_mid_feature_returncf
+(
+    mid      STRING COMMENT '视频id'
+    ,feature JSON COMMENT 'json格式的特征组合'
+)
+COMMENT '基于视频共线CF特征的用户“点击回流”对应的vids'
+PARTITIONED BY 
+(
+    dt       STRING COMMENT '天'
+    ,hh      STRING COMMENT '小时'
+)
+LIFECYCLE 30
+;
+
+INSERT OVERWRITE TABLE loghubods.alg_mid_feature_returncf PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_origin AS 
+(
+    SELECT  apptype
+            ,mid
+            ,vid
+            ,pagesource
+            ,subsessionid
+            ,MIN(clienttimestamp) AS clienttimestamp
+    FROM    (
+                SELECT  __topic__
+                        ,eventinfos 
+                        ,apptype
+                        ,clickobjectid AS vid
+                        ,shareobjectid
+                        ,machinecode AS mid
+                        ,clienttimestamp
+                        ,pagesource
+                        ,parentpagesource
+                        ,parentrootpagesource
+                        ,shareid
+                        ,rootshareid
+                        ,subsessionid
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * ${hours_early}),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+                AND     __topic__ IN ('click')
+                AND     apptype NOT IN ('12')
+                AND     apptype IS NOT NULL
+                AND     clickobjectid IS NOT NULL
+                AND     machinecode IS NOT NULL
+                AND     machinecode != ""
+                AND     clienttimestamp IS NOT NULL
+                AND     rootPageSource NOT REGEXP '-pages/home' -- todo 加限制,保证是头部回流----非自分享回流
+            ) 
+    GROUP BY apptype
+             ,mid
+             ,vid
+             ,pagesource
+             ,subsessionid
+)
+,t_share_cf AS 
+(
+    SELECT  vid
+            ,JSON_FORMAT(feature) AS feature1
+    FROM    loghubods.alg_vid_feature_cfshare
+    WHERE   dt = '${dt}'
+    AND     hh = '${hh}'
+)
+,t_return_cf AS 
+(
+    SELECT  vid
+            ,JSON_FORMAT(feature) AS feature2
+    FROM    loghubods.alg_vid_feature_cfreturn
+    WHERE   dt = '${dt}'
+    AND     hh = '${hh}'
+)
+,t_group AS 
+(
+    SELECT  mid
+            ,COLLECT_LIST(feature1) AS feature1_list
+            ,COUNT(feature1) AS cnt1
+            ,COLLECT_LIST(feature2) AS feature2_list
+            ,COUNT(feature2) AS cnt2
+    FROM    (
+                SELECT  mid
+                        ,feature1
+                        ,feature2
+                FROM    t_origin t1
+                LEFT JOIN t_share_cf t2
+                ON      t1.vid = t2.vid
+                LEFT JOIN t_return_cf t3
+                ON      t1.vid = t3.vid
+            ) 
+    GROUP BY mid
+)
+,t_group_ AS 
+(
+    SELECT  mid
+            ,IF(feature1_list IS NULL OR SIZE(feature1_list) == 0,NULL,loghubods.feature_from_list_json(feature1_list)) AS feature1
+            ,IF(feature2_list IS NULL OR SIZE(feature2_list) == 0,NULL,loghubods.feature_from_list_json(feature2_list)) AS feature2
+            ,cnt1
+            ,cnt2
+    FROM    t_group
+)SELECT  mid
+        ,CASE   WHEN feature1 IS NULL THEN JSON_OBJECT("return",feature2)
+                WHEN feature2 IS NULL THEN JSON_OBJECT("share",feature1)
+                ELSE JSON_OBJECT("share",feature1,"return",feature2)
+        END AS feature
+FROM    t_group_
+WHERE   feature1 IS NOT NULL
+AND     feature2 IS NOT NULL

+ 54 - 0
production_code/loghubods.alg_mid_feature_share_tags.json

@@ -0,0 +1,54 @@
+{
+  "name": "alg_mid_feature_share_tags",
+  "project": "loghubods",
+  "comment": "TABLE COMMENT",
+  "columns": [
+    {
+      "name": "mid",
+      "type": "STRING",
+      "comment": "mid"
+    },
+    {
+      "name": "feature",
+      "type": "JSON",
+      "comment": "数值"
+    },
+    {
+      "name": "tag_ts",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1017584782,
+      "name": "05_用户分享对应的tag"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.user_share_log_flow",
+    "videoods.wx_video_per1h"
+  ]
+}

+ 142 - 0
production_code/loghubods.alg_mid_feature_share_tags.sql

@@ -0,0 +1,142 @@
+-- Task: 05_用户分享对应的tag  ID: 1017584782  Type: ODPS_SQL
+--@exclude_input=loghubods.user_share_log_flow
+--odps sql 
+--********************************************************************--
+--author:于卓异
+--create time:2024-05-13 14:35:10
+--********************************************************************--
+CREATE TABLE IF NOT EXISTS loghubods.alg_mid_feature_share_tags
+(
+    mid     STRING COMMENT 'mid'
+    ,feature JSON  COMMENT '数值'
+    ,tag_ts STRING COMMENT ''
+)
+COMMENT 'TABLE COMMENT'
+PARTITIONED BY 
+(
+    dt       STRING COMMENT '日期'
+    ,hh STRING COMMENT '小时'
+)
+LIFECYCLE 30
+;
+SET odps.sql.python.version = cp37
+;
+
+INSERT OVERWRITE TABLE loghubods.alg_mid_feature_share_tags PARTITION (dt = '${dt}',hh = '${hh}')
+
+
+
+
+WITH t_share AS 
+(
+    SELECT  __topic__
+            ,eventinfos
+            ,apptype
+            ,clickobjectid
+            ,shareobjectid AS vid
+            ,machinecode AS mid
+            ,clienttimestamp
+            ,pagesource
+            ,parentpagesource
+            ,parentrootpagesource
+            ,shareid
+            ,rootshareid
+            ,subsessionid
+    FROM    loghubods.user_share_log_flow
+    WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * ${hours_early}),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+    AND     __topic__ IN ('share')
+    AND     apptype NOT IN ('12')
+    AND     apptype IS NOT NULL
+    AND     shareobjectid IS NOT NULL
+    AND     machinecode IS NOT NULL
+    AND     machinecode != ""
+    AND     clienttimestamp IS NOT NULL
+)
+,t_share_ AS 
+(
+    -- 一次曝光的多次分享,只保留最早的一次。
+    SELECT  apptype
+            ,mid
+            ,vid
+            ,pagesource
+            ,subsessionid
+            ,shareid
+            ,MIN(clienttimestamp) as clienttimestamp
+    FROM    t_share
+    GROUP BY apptype
+             ,mid
+             ,vid
+             ,pagesource
+             ,subsessionid
+             ,shareid
+)
+,t_title AS 
+(
+    SELECT  t1.vid AS vid
+            ,t2.title AS title
+            ,FENCI_STR2STR(t2.title,3) AS tags
+    FROM    (
+                SELECT  DISTINCT vid
+                        ,"1"
+                FROM    t_share_
+            ) t1
+    JOIN    (
+                SELECT  DISTINCT id
+                        ,title
+                FROM    videoods.wx_video_per1h
+                WHERE   title IS NOT NULL
+                AND     title != ""
+            ) t2
+    ON      t1.vid = t2.id
+)
+,t_title_tag AS 
+(
+    SELECT  vid
+            ,title
+            ,tag
+    FROM    (
+                SELECT  vid
+                        ,title
+                        ,tags
+                FROM    t_title
+                WHERE   tags IS NOT NULL
+                AND     tags != ""
+            ) 
+    LATERAL VIEW EXPLODE(SPLIT(tags,',')) t AS tag
+)
+,t_join AS 
+(
+    SELECT  mid
+            ,tag
+            ,CAST((
+                        UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - clienttimestamp / 1000
+            ) AS BIGINT) AS ts_diff
+    FROM    t_share_ t1
+    JOIN    t_title_tag t2
+    ON      t1.vid = t2.vid
+)
+,t_group AS 
+(
+    SELECT  mid
+            ,CONCAT_WS(',',COLLECT_LIST(CONCAT(tag,":",ts_diff))) AS tag_ts
+    FROM    t_join
+    GROUP BY mid
+)
+,t_mid_tags AS 
+(
+    SELECT  mid
+            ,JSON_OBJECT('tags_2h',tags_2h,"tags_1d",tags_1d,"tags_3d",tags_3d,"tags_7d",tags_7d,"tags_14d",tags_14d) AS feature
+            ,tag_ts
+    FROM    (
+                SELECT  mid
+                        ,tag_ts
+                        ,get_top_tags(tag_ts,3600 * 2,10) AS tags_2h
+                        ,get_top_tags(tag_ts,3600 * 24,10) AS tags_1d
+                        ,get_top_tags(tag_ts,3600 * 24 * 3,10) AS tags_3d
+                        ,get_top_tags(tag_ts,3600 * 24 * 7,10) AS tags_7d
+                        ,get_top_tags(tag_ts,3600 * 24 * 14,10) AS tags_14d -- 336
+                FROM    t_group
+            ) 
+)
+SELECT  *
+FROM    t_mid_tags

+ 50 - 0
production_code/loghubods.alg_mid_feature_sharecf.json

@@ -0,0 +1,50 @@
+{
+  "name": "alg_mid_feature_sharecf",
+  "project": "loghubods",
+  "comment": "基于视频共线CF特征的用户“分享”对应的vids",
+  "columns": [
+    {
+      "name": "mid",
+      "type": "STRING",
+      "comment": "视频id"
+    },
+    {
+      "name": "feature",
+      "type": "JSON",
+      "comment": "json格式的特征组合"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1017835016,
+      "name": "08_基于共线的用户分享CF特征"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_cfreturn",
+    "loghubods.alg_vid_feature_cfshare",
+    "loghubods.user_share_log_flow"
+  ]
+}

+ 114 - 0
production_code/loghubods.alg_mid_feature_sharecf.sql

@@ -0,0 +1,114 @@
+-- Task: 08_基于共线的用户分享CF特征  ID: 1017835016  Type: ODPS_SQL
+--@exclude_input=loghubods.user_share_log_flow
+--odps sql 
+--********************************************************************--
+--author:张博
+--create time:2024-06-07 18:59:55
+--********************************************************************--
+-- select * from loghubods.alg_mid_feature_sharecf where dt = "20240606" and hh = "16" limit 100;
+CREATE TABLE IF NOT EXISTS loghubods.alg_mid_feature_sharecf
+(
+    mid      STRING COMMENT '视频id'
+    ,feature JSON COMMENT 'json格式的特征组合'
+)
+COMMENT '基于视频共线CF特征的用户“分享”对应的vids'
+PARTITIONED BY 
+(
+    dt       STRING COMMENT '天'
+    ,hh      STRING COMMENT '小时'
+)
+LIFECYCLE 30
+;
+
+INSERT OVERWRITE TABLE loghubods.alg_mid_feature_sharecf PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_origin AS 
+(
+    SELECT  apptype
+            ,mid
+            ,vid
+            ,pagesource
+            ,subsessionid
+            ,MIN(clienttimestamp) AS clienttimestamp
+    FROM    (
+                SELECT  __topic__
+                        ,eventinfos
+                        ,apptype
+                        ,clickobjectid
+                        ,shareobjectid AS vid
+                        ,machinecode AS mid
+                        ,clienttimestamp
+                        ,pagesource
+                        ,parentpagesource
+                        ,parentrootpagesource
+                        ,shareid
+                        ,rootshareid
+                        ,subsessionid
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * ${hours_early}),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+                AND     __topic__ IN ('share')
+                AND     apptype NOT IN ('12')
+                AND     apptype IS NOT NULL
+                AND     shareobjectid IS NOT NULL
+                AND     machinecode IS NOT NULL
+                AND     machinecode != ""
+                AND     clienttimestamp IS NOT NULL
+                AND     rootPageSource NOT REGEXP '-pages/home' -- todo 加限制,保证是头部回流----非自分享回流
+            ) 
+    GROUP BY apptype
+             ,mid
+             ,vid
+             ,pagesource
+             ,shareid
+             ,subsessionid
+)
+,t_share_cf AS 
+(
+    SELECT  vid
+            ,JSON_FORMAT(feature) AS feature1
+    FROM    loghubods.alg_vid_feature_cfshare
+    WHERE   dt = '${dt}'
+    AND     hh = '${hh}'
+)
+,t_return_cf AS 
+(
+    SELECT  vid
+            ,JSON_FORMAT(feature) AS feature2
+    FROM    loghubods.alg_vid_feature_cfreturn
+    WHERE   dt = '${dt}'
+    AND     hh = '${hh}'
+)
+,t_group AS 
+(
+    SELECT  mid
+            ,COLLECT_LIST(feature1) AS feature1_list
+            ,COUNT(feature1) AS cnt1
+            ,COLLECT_LIST(feature2) AS feature2_list
+            ,COUNT(feature2) AS cnt2
+    FROM    (
+                SELECT  mid
+                        ,feature1
+                        ,feature2
+                FROM    t_origin t1
+                LEFT JOIN t_share_cf t2
+                ON      t1.vid = t2.vid
+                LEFT JOIN t_return_cf t3
+                ON      t1.vid = t3.vid
+            ) 
+    GROUP BY mid
+)
+,t_group_ AS 
+(
+    SELECT  mid
+            ,IF(feature1_list IS NULL OR SIZE(feature1_list) == 0,NULL,loghubods.feature_from_list_json(feature1_list)) AS feature1
+            ,IF(feature2_list IS NULL OR SIZE(feature2_list) == 0,NULL,loghubods.feature_from_list_json(feature2_list)) AS feature2
+            ,cnt1
+            ,cnt2
+    FROM    t_group
+)SELECT  mid
+        ,CASE   WHEN feature1 IS NULL THEN JSON_OBJECT("return",feature2)
+                WHEN feature2 IS NULL THEN JSON_OBJECT("share",feature1)
+                ELSE JSON_OBJECT("share",feature1,"return",feature2)
+        END AS feature
+FROM    t_group_
+WHERE   feature1 IS NOT NULL
+AND     feature2 IS NOT NULL

+ 69 - 0
production_code/loghubods.alg_recsys_feature_cf_i2i_v2.json

@@ -0,0 +1,69 @@
+{
+  "name": "alg_recsys_feature_cf_i2i_v2",
+  "project": "loghubods",
+  "comment": "TABLE COMMENT",
+  "columns": [
+    {
+      "name": "vid_a",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "vid_b",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "exp",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_n",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "rovn",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20240105"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:04"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20240105"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:04"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1018464715,
+      "name": "ROVn的CF策略_v2"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_all_exp_base_v2",
+    "loghubods.user_share_log_flow"
+  ]
+}

+ 189 - 0
production_code/loghubods.alg_recsys_feature_cf_i2i_v2.sql

@@ -0,0 +1,189 @@
+-- Task: ROVn的CF策略_v2  ID: 1018464715  Type: ODPS_SQL
+--@exclude_input=loghubods.video_action_log_flow_new
+--@exclude_input=loghubods.vid_rovn_cf_base
+--@exclude_input=loghubods.user_share_log_flow
+--odps sql 
+--********************************************************************--
+--author:于卓异
+--create time:2024-05-17 13:20:28
+--********************************************************************--
+CREATE TABLE IF NOT EXISTS loghubods.alg_recsys_feature_cf_i2i_v2
+(
+    vid_A     STRING COMMENT ''
+    ,vid_B    STRING COMMENT ''
+    ,exp      STRING COMMENT ''
+    ,return_n STRING COMMENT ''
+    ,rovn     STRING COMMENT ''
+    ,feature  STRING COMMENT ''
+)
+PARTITIONED BY 
+(
+    dt        STRING COMMENT '日期:20240105'
+    ,hh       STRING COMMENT '小时:04'
+)
+STORED AS ALIORC
+TBLPROPERTIES ('comment' = 'TABLE COMMENT')
+LIFECYCLE 365
+;
+
+--ALTER TABLE loghubods.alg_recsys_feature_cf_i2i_v2 ADD COLUMNS (feature STRING)
+INSERT OVERWRITE TABLE loghubods.alg_recsys_feature_cf_i2i_v2 PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_recommend AS 
+(
+    SELECT  a.mid AS mid
+            ,a.vid AS vid
+            ,a.subsessionid
+            ,a.pagesource
+            ,MIN(a.clienttimestamp) AS clienttimestamp
+            ,COUNT(DISTINCT b.mid) AS return_cnt
+    FROM    (
+                SELECT  __topic__
+                        ,eventinfos
+                        ,apptype
+                        ,shareobjectid AS vid
+                        ,machinecode AS mid
+                        ,clienttimestamp
+                        ,pagesource
+                        ,parentpagesource
+                        ,parentrootpagesource
+                        ,shareid
+                        ,rootshareid
+                        ,subsessionid
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 24 * ${day}),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+                AND     __topic__ IN ('share')
+                AND     pagesource REGEXP '-pages/user-videos-share-recommend$'
+            ) a
+    LEFT JOIN   (
+                    SELECT  __topic__
+                            ,eventinfos
+                            ,apptype
+                            ,clickobjectid AS vid
+                            ,machinecode AS mid
+                            ,clienttimestamp
+                            ,pagesource
+                            ,parentpagesource
+                            ,parentrootpagesource
+                            ,shareid
+                            ,rootshareid
+                            ,subsessionid
+                    FROM    loghubods.user_share_log_flow
+                    WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 24 * ${day}),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+                    AND     __topic__ IN ('click')
+                ) b
+    ON      a.shareid = b.rootshareid
+    GROUP BY a.mid
+             ,a.vid
+             ,a.subsessionid
+             ,a.pagesource
+)
+,t_recommend_vid AS 
+(
+    SELECT  a.mid
+            ,a.vid AS vid_A
+            ,a.subsessionid
+            ,c.vid AS vid_B
+            ,c.return_cnt
+    FROM    (
+                SELECT  apptype
+                        ,pagesource
+                        ,uid
+                        ,mid
+                        ,vid
+                        ,logtimestamp
+                        ,businesstype
+                        ,sessionid
+                        ,subsessionid
+                        ,abinfodata
+                        ,recommendlogvo
+                        ,extparams
+                        ,abcode
+                        ,recommendPageType
+                        ,flowpool
+                        ,level
+                FROM    loghubods.alg_vid_feature_all_exp_base_v2
+                WHERE   dt = '${dt}'
+                AND     hh = '${hh}'
+                AND     pagesource REGEXP '-pages/user-videos-share$'
+                AND     GET_JSON_OBJECT(extparams,'$.recommendPageType') REGEXP '-pages/user-videos-share-recommend-detail'
+                AND     subsessionid IN (
+                            SELECT  DISTINCT subsessionid
+                            FROM    t_recommend
+                            WHERE   return_cnt > 0
+                        ) 
+            ) a
+    LEFT JOIN   (
+                    SELECT  mid
+                            ,vid
+                            ,a.subsessionid
+                            ,a.pagesource
+                            ,clienttimestamp
+                            ,return_cnt
+                    FROM    t_recommend a
+                ) c
+    ON      a.subsessionid = c.subsessionid
+    AND     a.mid = c.mid
+    AND     a.logtimestamp <= c.clienttimestamp
+)
+SELECT  *
+        ,JSON_FORMAT(JSON_OBJECT("exp",view,"return_n",return,"rovn",ROV)) AS feature
+FROM    (
+            SELECT  vid_A
+                    ,vid_B
+                    ,cast(COUNT(subsessionid) as string) AS view
+                    ,cast(SUM(return_cnt) as string) AS return
+                    ,cast(round(SUM(return_cnt) / COUNT(subsessionid),6) as string) AS ROV
+            FROM    t_recommend_vid
+            WHERE   vid_B IS NOT NULL
+            GROUP BY vid_A
+                     ,vid_B
+        ) 
+;
+
+CREATE TABLE IF NOT EXISTS loghubods.alg_recsys_recall_cf_rovn_v2
+(
+    vid          STRING COMMENT ''
+    ,videoid_arr STRING COMMENT ''
+    ,score_arr   STRING COMMENT ''
+    ,cnt         STRING COMMENT ''
+)
+PARTITIONED BY 
+(
+    dt           STRING COMMENT '日期:20240105'
+    ,hh          STRING COMMENT '小时:04'
+)
+STORED AS ALIORC
+TBLPROPERTIES ('comment' = 'TABLE COMMENT')
+LIFECYCLE 365
+;
+
+INSERT OVERWRITE TABLE loghubods.alg_recsys_recall_cf_rovn_v2 PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_group AS 
+(
+    SELECT  vid_A AS key
+            ,vid_B AS vid
+            ,rovn AS score
+            ,ROW_NUMBER() OVER (PARTITION BY vid_A ORDER BY rovn DESC ) AS rank_num
+    FROM    loghubods.alg_recsys_feature_cf_i2i_v2
+    WHERE   dt = '${dt}'
+    AND     hh = '${hh}'
+    AND     exp > ${value}
+)
+,t_recall AS 
+(
+    SELECT  key
+            ,CONCAT_WS(',',COLLECT_LIST(vid)) AS videoid_arr
+            ,CONCAT_WS(',',COLLECT_LIST(CAST(score AS STRING))) AS score_arr
+            ,COUNT(vid) AS cnt
+    FROM    (
+                SELECT  key
+                        ,vid
+                        ,score
+                        ,rank_num
+                FROM    t_group
+                WHERE   rank_num <= 100
+                ORDER BY rank_num ASC
+            ) 
+    GROUP BY key
+)SELECT  *
+FROM    t_recall

+ 49 - 0
production_code/loghubods.alg_recsys_feature_user_share_return_stat.json

@@ -0,0 +1,49 @@
+{
+  "name": "alg_recsys_feature_user_share_return_stat",
+  "project": "loghubods",
+  "comment": "统计用户最近30d的分享回流数据",
+  "columns": [
+    {
+      "name": "mid",
+      "type": "STRING",
+      "comment": "mid"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "统计值"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20240105"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:04"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20240105"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:04"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023890602,
+      "name": "05_全局用户分享回流统计"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_basic_info",
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

+ 448 - 0
production_code/loghubods.alg_recsys_feature_user_share_return_stat.sql

@@ -0,0 +1,448 @@
+-- Task: 05_全局用户分享回流统计  ID: 1023890602  Type: ODPS_SQL
+--********************************************************************--
+-- 回流统计维度
+-- is_return_n_noself, return_n_uv_noself 计算return
+-- 
+-- 类目维度json名称简写
+-- na:name, sp:share_pv(分享pv求和), rp:return_n_pv_noself(回流pv求和)
+-- ru:return_n_uv_noself(回流uv求和), mu:max_return_n_uv_noself(一次曝光最大回流uv)
+--
+-- mid维度json名称简写
+-- s_pv:share_pv(分享pv求和), s_cnt:share_cnt(分享次数求和), 
+-- r_pv:return_pv(回流pv求和), r_uv:return_uv(回流uv求和)
+-- m_s_cnt:max_share_cnt(一次曝光最大分享次数), m_r_uv:max_return_uv(一次曝光最大回流uv)
+-- m_s_s:max_share_seq(最大分享序列), m_r_s:max_return_seq(最大回流序列)
+-- l_s_s:last_share_seq(最近分享序列), l_r_s:last_return_seq(最近回流序列)
+-- c1_s:cate1_seq(merge_first_level_cate序列-回流率), c2_s:cate2_seq(merge_second_level_cate序列-回流率)
+-- l1_s:label1_seq(festive_label1序列-回流率), l2_s:label2_seq(festive_label2序列-回流率)
+--********************************************************************--
+CREATE TABLE IF NOT EXISTS loghubods.alg_recsys_feature_user_share_return_stat
+(
+    mid      STRING COMMENT 'mid'
+    ,feature STRING COMMENT '统计值'
+)
+PARTITIONED BY 
+(
+    dt       STRING COMMENT '日期:20240105'
+    ,hh      STRING COMMENT '小时:04'
+)
+STORED AS ALIORC
+TBLPROPERTIES ('comment' = '统计用户最近30d的分享回流数据')
+LIFECYCLE 60
+;
+
+INSERT OVERWRITE TABLE loghubods.alg_recsys_feature_user_share_return_stat PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_video_info AS 
+(
+    SELECT  vid
+            ,TRIM(channel) AS channel
+            ,TRIM(cate1) AS cate1
+            ,TRIM(cate2) AS cate2
+            ,TRIM(label1) AS label1
+            ,TRIM(label2) AS label2
+    FROM    (
+                SELECT  CAST(vid AS BIGINT) vid
+                        ,GET_JSON_OBJECT(feature,"$.channel") AS channel
+                        ,GET_JSON_OBJECT(feature,"$.merge_first_level_cate") AS cate1
+                        ,GET_JSON_OBJECT(feature,"$.merge_second_level_cate") AS cate2
+                        ,GET_JSON_OBJECT(feature,"$.festive_label1") AS label1
+                        ,GET_JSON_OBJECT(feature,"$.festive_label2") AS label2
+                        ,ROW_NUMBER() OVER (PARTITION BY vid ) AS rn
+                FROM    loghubods.alg_vid_feature_basic_info
+                WHERE   CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 2 * 3600),'YYYYMMDDHH') AND "${dt}${hh}"
+            ) 
+    WHERE   rn = 1
+)
+,t_base AS 
+(
+    SELECT  mid
+            ,tt1.vid
+            ,is_share
+            ,share_cnt
+            ,is_return_noself
+            ,return_1_uv_noself
+            ,is_return_n_noself
+            ,return_n_uv_noself
+            ,ts
+            ,channel
+            ,cate1
+            ,cate2
+            ,label1
+            ,label2
+    FROM    (
+                SELECT  (CASE    WHEN uid IS NOT NULL
+                                    AND LENGTH(uid) > 0
+                                    AND uid != 'null' THEN uid
+                                ELSE mid
+                        END) AS mid
+                        ,CAST(vid AS BIGINT) vid
+                        ,CAST(is_share AS BIGINT) AS is_share
+                        ,CAST(share_cnt AS BIGINT) AS share_cnt
+                        ,CAST(is_return_noself AS BIGINT) AS is_return_noself
+                        ,CAST(return_1_uv_noself AS BIGINT) AS return_1_uv_noself
+                        ,CAST(is_return_n_noself AS BIGINT) AS is_return_n_noself
+                        ,CAST(return_n_uv_noself AS BIGINT) AS return_n_uv_noself
+                        ,CAST(ts AS BIGINT) AS ts
+                FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+                WHERE   CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - ${hours_early} * 3600),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 1 * 3600),'YYYYMMDDHH')
+                AND     mid IS NOT NULL
+                AND     LENGTH(mid) > 1
+                AND     vid IS NOT NULL
+                AND     LENGTH(vid) > 1
+                AND     is_share = '1'
+            ) tt1
+    LEFT JOIN t_video_info tt2
+    ON      tt1.vid = tt2.vid
+)
+,t_total AS 
+(
+    SELECT  mid
+            ,SUM(is_share) AS share_pv
+            ,SUM(share_cnt) AS share_cnt
+            ,SUM(is_return_n_noself) AS return_n_pv_noself
+            ,SUM(return_n_uv_noself) AS return_n_uv_noself
+            ,MAX(share_cnt) AS max_share_cnt
+            ,MAX(return_n_uv_noself) AS max_return_uv
+    FROM    t_base
+    GROUP BY mid
+)
+,t_max_share AS 
+(
+    SELECT  mid
+            ,CONCAT("[",ARRAY_JOIN(COLLECT_LIST(record),","),"]") AS seq
+    FROM    (
+                SELECT  mid
+                        ,JSON_FORMAT(JSON_OBJECT("id",vid,"cnt",share_cnt,"ts",ts)) AS record
+                FROM    (
+                            SELECT  mid
+                                    ,vid
+                                    ,share_cnt
+                                    ,ts
+                                    ,ROW_NUMBER() OVER (PARTITION BY mid ORDER BY share_cnt DESC,ts DESC ) AS rank
+                            FROM    t_base
+                            WHERE   share_cnt > 0
+                        ) 
+                WHERE   rank <= ${max_n}
+            ) 
+    GROUP BY mid
+)
+,t_max_return AS 
+(
+    SELECT  mid
+            ,CONCAT("[",ARRAY_JOIN(COLLECT_LIST(record),","),"]") AS seq
+    FROM    (
+                SELECT  mid
+                        ,JSON_FORMAT(JSON_OBJECT("id",vid,"uv",return_n_uv_noself,"ts",ts)) AS record
+                FROM    (
+                            SELECT  mid
+                                    ,vid
+                                    ,return_n_uv_noself
+                                    ,ts
+                                    ,ROW_NUMBER() OVER (PARTITION BY mid ORDER BY return_n_uv_noself DESC,ts DESC ) AS rank
+                            FROM    t_base
+                            WHERE   is_return_n_noself > 0
+                            AND     return_n_uv_noself > 0
+                        ) 
+                WHERE   rank <= ${max_n}
+            ) 
+    GROUP BY mid
+)
+,t_last_share AS 
+(
+    SELECT  mid
+            ,CONCAT("[",ARRAY_JOIN(COLLECT_LIST(record),","),"]") AS seq
+    FROM    (
+                SELECT  mid
+                        ,JSON_FORMAT(JSON_OBJECT("id",vid,"cnt",share_cnt,"ts",ts)) AS record
+                FROM    (
+                            SELECT  mid
+                                    ,vid
+                                    ,share_cnt
+                                    ,ts
+                                    ,ROW_NUMBER() OVER (PARTITION BY mid ORDER BY ts DESC ) AS rank
+                            FROM    t_base
+                            WHERE   share_cnt > 0
+                        ) 
+                WHERE   rank <= ${last_n}
+            ) 
+    GROUP BY mid
+)
+,t_last_return AS 
+(
+    SELECT  mid
+            ,CONCAT("[",ARRAY_JOIN(COLLECT_LIST(record),","),"]") AS seq
+    FROM    (
+                SELECT  mid
+                        ,JSON_FORMAT(JSON_OBJECT("id",vid,"uv",return_n_uv_noself,"ts",ts)) AS record
+                FROM    (
+                            SELECT  mid
+                                    ,vid
+                                    ,return_n_uv_noself
+                                    ,ts
+                                    ,ROW_NUMBER() OVER (PARTITION BY mid ORDER BY ts DESC ) AS rank
+                            FROM    t_base
+                            WHERE   is_return_n_noself > 0
+                            AND     return_n_uv_noself > 0
+                        ) 
+                WHERE   rank <= ${last_n}
+            ) 
+    GROUP BY mid
+)
+,t_last_1_return AS 
+(
+    SELECT  mid
+            ,CONCAT("[",ARRAY_JOIN(COLLECT_LIST(record),","),"]") AS seq
+    FROM    (
+                SELECT  mid
+                        ,JSON_FORMAT(JSON_OBJECT("id",vid,"uv",return_1_uv_noself,"ts",ts)) AS record
+                FROM    (
+                            SELECT  mid
+                                    ,vid
+                                    ,return_1_uv_noself
+                                    ,ts
+                                    ,ROW_NUMBER() OVER (PARTITION BY mid ORDER BY ts DESC ) AS rank
+                            FROM    t_base
+                            WHERE   is_return_noself > 0
+                            AND     return_1_uv_noself > 0
+                        ) 
+                WHERE   rank <= ${last_n}
+            ) 
+    GROUP BY mid
+)
+,t_cate1 AS 
+(
+    SELECT  mid
+            ,CONCAT("[",ARRAY_JOIN(COLLECT_LIST(record),","),"]") AS seq
+    FROM    (
+                SELECT  mid
+                        ,JSON_FORMAT(
+                                    JSON_OBJECT("na",cate1,"sp",share_pv,"rp",return_n_pv_noself,"ru",return_n_uv_noself,"mu",max_return_uv)
+                        ) AS record
+                FROM    (
+                            SELECT  mid
+                                    ,cate1
+                                    ,share_pv
+                                    ,(CASE   WHEN return_n_pv_noself > 0 THEN return_n_pv_noself
+                                            ELSE NULL
+                                    END) AS return_n_pv_noself
+                                    ,(CASE   WHEN return_n_uv_noself > 0 THEN return_n_uv_noself
+                                            ELSE NULL
+                                    END) AS return_n_uv_noself
+                                    ,(CASE   WHEN max_return_uv > 0 THEN max_return_uv
+                                            ELSE NULL
+                                    END) AS max_return_uv
+                                    ,ROW_NUMBER() OVER (PARTITION BY mid ORDER BY (1.0 * return_n_uv_noself / (share_pv + ${smooth_plus})) DESC ) AS rank
+                            FROM    (
+                                        SELECT  mid
+                                                ,cate1
+                                                ,SUM(is_share) AS share_pv
+                                                ,SUM(is_return_n_noself) AS return_n_pv_noself
+                                                ,SUM(return_n_uv_noself) AS return_n_uv_noself
+                                                ,MAX(return_n_uv_noself) AS max_return_uv
+                                        FROM    t_base
+                                        WHERE   cate1 IS NOT NULL
+                                        AND     cate1 != 'unknown'
+                                        AND     cate1 != ''
+                                        GROUP BY mid
+                                                 ,cate1
+                                    ) 
+                        ) 
+                WHERE   rank <= ${cate_n}
+            ) 
+    GROUP BY mid
+)
+,t_cate2 AS 
+(
+    SELECT  mid
+            ,CONCAT("[",ARRAY_JOIN(COLLECT_LIST(record),","),"]") AS seq
+    FROM    (
+                SELECT  mid
+                        ,JSON_FORMAT(
+                                    JSON_OBJECT("na",cate2,"sp",share_pv,"rp",return_n_pv_noself,"ru",return_n_uv_noself,"mu",max_return_uv)
+                        ) AS record
+                FROM    (
+                            SELECT  mid
+                                    ,cate2
+                                    ,share_pv
+                                    ,(CASE   WHEN return_n_pv_noself > 0 THEN return_n_pv_noself
+                                            ELSE NULL
+                                    END) AS return_n_pv_noself
+                                    ,(CASE   WHEN return_n_uv_noself > 0 THEN return_n_uv_noself
+                                            ELSE NULL
+                                    END) AS return_n_uv_noself
+                                    ,(CASE   WHEN max_return_uv > 0 THEN max_return_uv
+                                            ELSE NULL
+                                    END) AS max_return_uv
+                                    ,ROW_NUMBER() OVER (PARTITION BY mid ORDER BY (1.0 * return_n_uv_noself / (share_pv + ${smooth_plus})) DESC ) AS rank
+                            FROM    (
+                                        SELECT  mid
+                                                ,cate2
+                                                ,SUM(is_share) AS share_pv
+                                                ,SUM(is_return_n_noself) AS return_n_pv_noself
+                                                ,SUM(return_n_uv_noself) AS return_n_uv_noself
+                                                ,MAX(return_n_uv_noself) AS max_return_uv
+                                        FROM    t_base
+                                        WHERE   cate2 IS NOT NULL
+                                        AND     cate2 != 'unknown'
+                                        AND     cate2 != ''
+                                        GROUP BY mid
+                                                 ,cate2
+                                    ) 
+                        ) 
+                WHERE   rank <= ${cate_n}
+            ) 
+    GROUP BY mid
+)
+,t_label1 AS 
+(
+    SELECT  mid
+            ,CONCAT("[",ARRAY_JOIN(COLLECT_LIST(record),","),"]") AS seq
+    FROM    (
+                SELECT  mid
+                        ,JSON_FORMAT(
+                                    JSON_OBJECT("na",label1,"sp",share_pv,"rp",return_n_pv_noself,"ru",return_n_uv_noself,"mu",max_return_uv)
+                        ) AS record
+                FROM    (
+                            SELECT  mid
+                                    ,label1
+                                    ,share_pv
+                                    ,(CASE   WHEN return_n_pv_noself > 0 THEN return_n_pv_noself
+                                            ELSE NULL
+                                    END) AS return_n_pv_noself
+                                    ,(CASE   WHEN return_n_uv_noself > 0 THEN return_n_uv_noself
+                                            ELSE NULL
+                                    END) AS return_n_uv_noself
+                                    ,(CASE   WHEN max_return_uv > 0 THEN max_return_uv
+                                            ELSE NULL
+                                    END) AS max_return_uv
+                                    ,ROW_NUMBER() OVER (PARTITION BY mid ORDER BY (1.0 * return_n_uv_noself / (share_pv + ${smooth_plus})) DESC ) AS rank
+                            FROM    (
+                                        SELECT  mid
+                                                ,label1
+                                                ,SUM(is_share) AS share_pv
+                                                ,SUM(is_return_n_noself) AS return_n_pv_noself
+                                                ,SUM(return_n_uv_noself) AS return_n_uv_noself
+                                                ,MAX(return_n_uv_noself) AS max_return_uv
+                                        FROM    t_base
+                                        WHERE   label1 IS NOT NULL
+                                        AND     label1 != 'unknown'
+                                        AND     label1 != ''
+                                        GROUP BY mid
+                                                 ,label1
+                                    ) 
+                        ) 
+                WHERE   rank <= ${cate_n}
+            ) 
+    GROUP BY mid
+)
+,t_label2 AS 
+(
+    SELECT  mid
+            ,CONCAT("[",ARRAY_JOIN(COLLECT_LIST(record),","),"]") AS seq
+    FROM    (
+                SELECT  mid
+                        ,JSON_FORMAT(
+                                    JSON_OBJECT("na",label2,"sp",share_pv,"rp",return_n_pv_noself,"ru",return_n_uv_noself,"mu",max_return_uv)
+                        ) AS record
+                FROM    (
+                            SELECT  mid
+                                    ,label2
+                                    ,share_pv
+                                    ,(CASE   WHEN return_n_pv_noself > 0 THEN return_n_pv_noself
+                                            ELSE NULL
+                                    END) AS return_n_pv_noself
+                                    ,(CASE   WHEN return_n_uv_noself > 0 THEN return_n_uv_noself
+                                            ELSE NULL
+                                    END) AS return_n_uv_noself
+                                    ,(CASE   WHEN max_return_uv > 0 THEN max_return_uv
+                                            ELSE NULL
+                                    END) AS max_return_uv
+                                    ,ROW_NUMBER() OVER (PARTITION BY mid ORDER BY (1.0 * return_n_uv_noself / (share_pv + ${smooth_plus})) DESC ) AS rank
+                            FROM    (
+                                        SELECT  mid
+                                                ,label2
+                                                ,SUM(is_share) AS share_pv
+                                                ,SUM(is_return_n_noself) AS return_n_pv_noself
+                                                ,SUM(return_n_uv_noself) AS return_n_uv_noself
+                                                ,MAX(return_n_uv_noself) AS max_return_uv
+                                        FROM    t_base
+                                        WHERE   label2 IS NOT NULL
+                                        AND     label2 != 'unknown'
+                                        AND     label2 != ''
+                                        GROUP BY mid
+                                                 ,label2
+                                    ) 
+                        ) 
+                WHERE   rank <= ${cate_n}
+            ) 
+    GROUP BY mid
+)
+,t_result AS 
+(
+    SELECT  mid
+            ,share_pv
+            ,share_cnt
+            ,(CASE   WHEN return_pv > 0 THEN return_pv
+                    ELSE NULL
+            END) AS return_pv
+            ,(CASE   WHEN return_uv > 0 THEN return_uv
+                    ELSE NULL
+            END) AS return_uv
+            ,(CASE   WHEN max_share_cnt > 0 THEN max_share_cnt
+                    ELSE NULL
+            END) AS max_share_cnt
+            ,(CASE   WHEN max_return_uv > 0 THEN max_return_uv
+                    ELSE NULL
+            END) AS max_return_uv
+            ,max_share_seq
+            ,max_return_seq
+            ,last_share_seq
+            ,last_return_seq
+            ,cate1_seq
+            ,cate2_seq
+            ,label1_seq
+            ,label2_seq
+            ,last_1_return_seq
+    FROM    (
+                SELECT  t1.mid
+                        ,t1.share_pv
+                        ,t1.share_cnt
+                        ,t1.return_n_pv_noself AS return_pv
+                        ,t1.return_n_uv_noself AS return_uv
+                        ,t1.max_share_cnt
+                        ,t1.max_return_uv
+                        ,t2.seq AS max_share_seq
+                        ,t3.seq AS max_return_seq
+                        ,t4.seq AS last_share_seq
+                        ,t5.seq AS last_return_seq
+                        ,t6.seq AS cate1_seq
+                        ,t7.seq AS cate2_seq
+                        ,t8.seq AS label1_seq
+                        ,t9.seq AS label2_seq
+                        ,t10.seq AS last_1_return_seq
+                FROM    t_total t1
+                LEFT JOIN t_max_share t2
+                ON      t1.mid = t2.mid
+                LEFT JOIN t_max_return t3
+                ON      t1.mid = t3.mid
+                LEFT JOIN t_last_share t4
+                ON      t1.mid = t4.mid
+                LEFT JOIN t_last_return t5
+                ON      t1.mid = t5.mid
+                LEFT JOIN t_cate1 t6
+                ON      t1.mid = t6.mid
+                LEFT JOIN t_cate2 t7
+                ON      t1.mid = t7.mid
+                LEFT JOIN t_label1 t8
+                ON      t1.mid = t8.mid
+                LEFT JOIN t_label2 t9
+                ON      t1.mid = t9.mid
+                LEFT JOIN t_last_1_return t10
+                ON      t1.mid = t10.mid
+            ) 
+)SELECT  mid
+        ,JSON_FORMAT(
+                    JSON_OBJECT('s_pv',share_pv,"s_cnt",share_cnt,"r_pv",return_pv,"r_uv",return_uv,"m_s_cnt",max_share_cnt,"m_r_uv",max_return_uv,"m_s_s",max_share_seq,"m_r_s",max_return_seq,"l_s_s",last_share_seq,"l_r_s",last_return_seq,"c1_s",cate1_seq,"c2_s",cate2_seq,"l1_s",label1_seq,"l2_s",label2_seq,"l_r1_s",last_1_return_seq)
+        ) AS feature
+FROM    t_result
+;

+ 69 - 0
production_code/loghubods.alg_recsys_recall_cf_mid_ros_scenetype.json

@@ -0,0 +1,69 @@
+{
+  "name": "alg_recsys_recall_cf_mid_ros_scenetype",
+  "project": "loghubods",
+  "comment": "模拟returnv2,通过回流找mid,mid找分享。基于rosn的召回策略",
+  "columns": [
+    {
+      "name": "sence_type",
+      "type": "STRING",
+      "comment": "用户入口来源类型,[私聊 群聊 公众号文章等]。(沿用上游的拼写错误,正确是scene)"
+    },
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": "回流视频,作为触发源头。"
+    },
+    {
+      "name": "vids",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "scores",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "ranks",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "cnt",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20241216"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:14"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20241216"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:14"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1022960572,
+      "name": "20250104_returnv2_cf召回_ros_scenetype"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.user_share_log_flow",
+    "loghubods.video_action_log_flow_new"
+  ]
+}

+ 260 - 0
production_code/loghubods.alg_recsys_recall_cf_mid_ros_scenetype.sql

@@ -0,0 +1,260 @@
+-- Task: 20250104_returnv2_cf召回_ros_scenetype  ID: 1022960572  Type: ODPS_SQL
+--@exclude_input=loghubods.user_share_log_flow
+--@exclude_input=loghubods.video_action_log_flow_new
+--odps sql 
+--********************************************************************--
+--author:赵海鹏
+--create time:2025-01-04 17:37:45
+-- 目的: 找到某个视频上回流的人,还通过哪些视频回流或者还看了哪些视频
+-- 1. 表1:统计用户在过去N小时内,在某个视频上的曝光和回流数
+-- 2. 表2:统计某个视频在过去N小时内分享的 不同用户的回流次数【尽量关联scenetype, 通过曝光日志关联】
+-- 3. 表2作为主表,通过mid, Left Join 表1,
+--********************************************************************--
+CREATE TABLE IF NOT EXISTS loghubods.alg_recsys_recall_cf_mid_ros_scenetype
+(
+    sence_type  STRING COMMENT '用户入口来源类型,[私聊 群聊 公众号文章等]。(沿用上游的拼写错误,正确是scene)'
+    ,vid        STRING COMMENT '回流视频,作为触发源头。'
+    ,vids       STRING COMMENT ''
+    ,scores     STRING COMMENT ''
+    ,ranks      STRING COMMENT ''
+    ,cnt        STRING COMMENT ''
+)
+PARTITIONED BY 
+(
+    dt          STRING COMMENT '日期:20241216'
+    ,hh         STRING COMMENT '小时:14'
+)
+STORED AS ALIORC
+TBLPROPERTIES ('comment' = '模拟returnv2,通过回流找mid,mid找分享。基于rosn的召回策略')
+LIFECYCLE 90
+;
+
+INSERT OVERWRITE TABLE loghubods.alg_recsys_recall_cf_mid_ros_scenetype PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_view AS 
+(
+    SELECT  apptype
+            ,pagesource
+            ,uid
+            ,mid
+            ,clienttimestamp
+            ,sessionid
+            ,subsessionid
+            ,shareid
+            ,headvideoid
+            ,videoid AS vid
+            ,CASE   WHEN hotsencetype IN ('1008','1007','1058','1074','1010') THEN hotsencetype
+                    ELSE 'other'
+            END AS hotsencetype
+            ,CONCAT(year,month,day,hour) AS dthh
+            ,GET_JSON_OBJECT(extparams,'$.recomTraceId') AS recomTraceId
+    FROM    loghubods.video_action_log_flow_new
+    WHERE   CONCAT(year,month,day,hour) >= TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - ${time_diff}),'YYYYMMDDHH')
+    AND     CONCAT(year,month,day,hour) < '${dt}${hh}'
+    AND     businesstype = 'videoView'
+    AND     apptype NOT IN ("12")
+)
+,t_return AS -- 过去N小时内的回流                                                        
+(
+    SELECT  a.apptype
+            ,a.vid
+            ,a.mid
+            ,a.shareid
+            ,COALESCE(b.hotsencetype,"other") AS hotsencetype
+    FROM    (
+                SELECT  apptype
+                        ,clickobjectid AS vid
+                        ,machinecode AS mid
+                        ,pagesource
+                        ,recomtraceid
+                        ,sessionid
+                        ,shareid
+                        ,subsessionid
+                        ,CONCAT(year,month,day,hour) AS dthh
+                        ,COALESCE(CAST(clienttimestamp AS BIGINT) / 1000,0) AS ts
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) >= TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - ${time_diff}),'YYYYMMDDHH')
+                AND     CONCAT(year,month,day,hour) < '${dt}${hh}'
+                AND     __topic__ = "click"
+                AND     apptype NOT IN ("12")
+                AND     pagesource REGEXP "-pages/user-videos-share$"
+            ) a
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    (
+                                SELECT  apptype
+                                        ,mid
+                                        ,vid
+                                        ,subsessionid
+                                        ,pagesource
+                                        ,hotsencetype
+                                        ,ROW_NUMBER() OVER (PARTITION BY apptype,mid,vid,subsessionid,pagesource ORDER BY CAST(clienttimestamp AS BIGINT) DESC ) AS rn
+                                FROM    t_view
+                                WHERE   pagesource REGEXP "-pages/user-videos-share$"
+                            ) 
+                    WHERE   rn = 1
+                ) b
+    ON      a.apptype = b.apptype
+    AND     a.mid = b.mid
+    AND     a.vid = b.vid
+    AND     a.subsessionid = b.subsessionid
+    AND     a.pagesource = b.pagesource
+)
+,t_share AS -- 查询分享记录,多次分享保留最近的一次                                                                                   
+(
+    SELECT  *
+    FROM    (
+                SELECT  apptype
+                        ,shareobjectid AS vid
+                        ,shareid
+                        ,machinecode AS mid
+                        ,COALESCE(CAST(clienttimestamp AS BIGINT) / 1000,0) AS share_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY apptype,shareobjectid,shareid,machinecode ORDER BY CAST(clienttimestamp AS BIGINT) DESC ) AS rn
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) >= TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - ${time_diff}),'YYYYMMDDHH')
+                AND     CONCAT(year,month,day,hour) < '${dt}${hh}'
+                AND     __topic__ = "share"
+                AND     apptype NOT IN ("12")
+                AND     shareobjectid IS NOT NULL
+                AND     shareobjectid != ''
+            ) 
+    WHERE   rn = 1
+)
+,t_left AS -- 最近N小时内回流,且回流对应的分享也是最近N小时内                                                                            
+(
+    SELECT  tr.vid
+            ,tr.mid
+            ,tr.hotsencetype
+            ,COUNT(1) AS cnt
+    FROM    t_return tr
+    LEFT JOIN t_share ts
+    ON      tr.apptype = ts.apptype
+    AND     tr.shareid = ts.shareid
+    AND     tr.vid = ts.vid
+    AND     tr.mid != ts.mid
+    WHERE   ts.share_ts IS NOT NULL
+    GROUP BY tr.vid
+             ,tr.mid
+             ,tr.hotsencetype
+)
+,t_right AS -- 查询最近N小时内,某个用户在某个视频上的曝光次数和回流次数                                                                
+(
+    SELECT  tv.mid
+            ,tv.vid
+            ,COALESCE(tv.view_cnt,0) AS view_cnt
+            ,COALESCE(tr.share_cnt,0) AS share_cnt
+            ,COALESCE(tr.return_cnt,0) AS return_cnt
+    FROM    (
+                SELECT  mid
+                        ,vid
+                        ,COUNT(1) AS view_cnt
+                FROM    t_view
+                GROUP BY mid
+                         ,vid
+            ) tv
+    LEFT JOIN   (
+                    SELECT  ts.mid
+                            ,ts.vid
+                            ,COUNT(DISTINCT ts.shareid) AS share_cnt
+                            ,COUNT(tr.mid) AS return_cnt
+                    FROM    t_share ts
+                    LEFT JOIN t_return tr
+                    ON      ts.shareid = tr.shareid
+                    AND     ts.vid = tr.vid
+                    GROUP BY ts.mid
+                             ,ts.vid
+                ) tr
+    ON      tr.mid = tv.mid
+    AND     tr.vid = tv.vid
+)
+,t_va_vb_return_view AS 
+(
+    SELECT  a.vid AS vid_left
+            ,b.vid AS vid_right
+            ,a.hotsencetype
+            ,COUNT(DISTINCT a.mid) AS va_vb_together_cnt
+    FROM    t_left a
+    JOIN    (
+                SELECT  DISTINCT vid
+                        ,mid
+                FROM    t_view
+            ) b
+    ON      a.mid = b.mid
+    GROUP BY a.vid
+             ,b.vid
+             ,a.hotsencetype
+)
+,t_result_v1 AS 
+(
+    SELECT  a.vid AS vid_left
+            ,b.vid AS vid_right
+            ,a.hotsencetype
+            ,SUM(b.share_cnt) AS share
+            ,SUM(IF(b.return_cnt > 0,1,0)) AS return
+            ,SUM(IF(b.return_cnt > 0,1,0)) / SUM(b.share_cnt) * LN(SUM(IF(b.return_cnt > 0,1,0)) + 1) AS score
+    FROM    (
+                SELECT  b.vid
+                        ,b.mid
+                        ,b.hotsencetype
+                FROM    (
+                            SELECT  vid
+                            FROM    t_left
+                            GROUP BY vid
+                            HAVING  COUNT(DISTINCT mid) >= 5
+                        ) a
+                JOIN    t_left b
+                ON      a.vid = b.vid
+            ) a
+    JOIN    t_right b
+    ON      a.mid = b.mid
+    AND     a.vid <> b.vid
+    GROUP BY a.vid
+             ,b.vid
+             ,a.hotsencetype
+)
+,t_result_v2 AS 
+(
+    SELECT  a.vid_left
+            ,a.vid_right
+            ,a.hotsencetype
+            ,a.share
+            ,a.return
+            ,a.score
+            ,b.va_vb_together_cnt
+            ,ROW_NUMBER() OVER (PARTITION BY a.vid_left ORDER BY b.va_vb_together_cnt DESC ) AS rn_old
+    FROM    t_result_v1 a
+    JOIN    t_va_vb_return_view b
+    ON      a.vid_left = b.vid_left
+    AND     a.vid_right = b.vid_right
+    AND     a.hotsencetype = b.hotsencetype
+)
+,t_recall AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  hotsencetype
+                        ,vid_left
+                        ,vid_right
+                        ,share
+                        ,return
+                        ,score
+                        ,ROW_NUMBER() OVER (PARTITION BY hotsencetype,vid_left ORDER BY score DESC ) AS rn
+                FROM    t_result_v2
+                WHERE   score > 0
+                AND     rn_old <= 100
+            ) 
+    WHERE   rn <= 100
+    DISTRIBUTE BY hotsencetype,vid_left
+    SORT BY hotsencetype
+,vid_left
+,score DESC
+)SELECT  hotsencetype
+        ,vid_left AS vid
+        ,CONCAT_WS(',',COLLECT_LIST(vid_right)) AS vids
+        ,CONCAT_WS(',',COLLECT_LIST(CAST(round(score,4) AS STRING))) AS scores
+        ,CONCAT_WS(',',COLLECT_LIST(CAST(rn AS STRING))) AS ranks
+        ,COUNT(vid_right) AS cnt
+FROM    t_recall
+GROUP BY hotsencetype
+         ,vid_left
+ORDER BY COUNT(vid_right) DESC
+;

+ 69 - 0
production_code/loghubods.alg_recsys_recall_cf_mid_rov_scenetype.json

@@ -0,0 +1,69 @@
+{
+  "name": "alg_recsys_recall_cf_mid_rov_scenetype",
+  "project": "loghubods",
+  "comment": "模拟returnv2,通过回流找mid,mid找曝光。基于rovn的召回策略",
+  "columns": [
+    {
+      "name": "sence_type",
+      "type": "STRING",
+      "comment": "用户入口来源类型,[私聊 群聊 公众号文章等]。(沿用上游的拼写错误,正确是scene)"
+    },
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": "回流视频,作为触发源头。"
+    },
+    {
+      "name": "vids",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "scores",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "ranks",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "cnt",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20241216"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:14"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20241216"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:14"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1022960345,
+      "name": "20250104_returnv2_cf召回_rov_scenetype"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.user_share_log_flow",
+    "loghubods.video_action_log_flow_new"
+  ]
+}

+ 258 - 0
production_code/loghubods.alg_recsys_recall_cf_mid_rov_scenetype.sql

@@ -0,0 +1,258 @@
+-- Task: 20250104_returnv2_cf召回_rov_scenetype  ID: 1022960345  Type: ODPS_SQL
+--@exclude_input=loghubods.user_share_log_flow
+--@exclude_input=loghubods.video_action_log_flow_new
+--odps sql 
+--********************************************************************--
+--author:赵海鹏
+--create time:2025-01-04 17:37:45
+-- 目的: 找到某个视频上回流的人,还通过哪些视频回流或者还看了哪些视频
+-- 1. 表1:统计用户在过去N小时内,在某个视频上的曝光和回流数
+-- 2. 表2:统计某个视频在过去N小时内分享的 不同用户的回流次数【尽量关联scenetype, 通过曝光日志关联】
+-- 3. 表2作为主表,通过mid, Left Join 表1,
+--********************************************************************--
+CREATE TABLE IF NOT EXISTS loghubods.alg_recsys_recall_cf_mid_rov_scenetype
+(
+    sence_type STRING COMMENT '用户入口来源类型,[私聊 群聊 公众号文章等]。(沿用上游的拼写错误,正确是scene)'
+    ,vid        STRING COMMENT '回流视频,作为触发源头。'
+    ,vids       STRING COMMENT ''
+    ,scores     STRING COMMENT ''
+    ,ranks      STRING COMMENT ''
+    ,cnt        STRING COMMENT ''
+)
+PARTITIONED BY 
+(
+    dt          STRING COMMENT '日期:20241216'
+    ,hh         STRING COMMENT '小时:14'
+)
+STORED AS ALIORC
+TBLPROPERTIES ('comment' = '模拟returnv2,通过回流找mid,mid找曝光。基于rovn的召回策略')
+LIFECYCLE 90
+;
+
+INSERT OVERWRITE TABLE loghubods.alg_recsys_recall_cf_mid_rov_scenetype PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_view AS 
+(
+    SELECT  apptype
+            ,pagesource
+            ,uid
+            ,mid
+            ,clienttimestamp
+            ,sessionid
+            ,subsessionid
+            ,shareid
+            ,headvideoid
+            ,videoid AS vid
+            ,CASE   WHEN hotsencetype IN ('1008','1007','1058','1074','1010') THEN hotsencetype
+                    ELSE 'other'
+            END AS hotsencetype
+            ,CONCAT(year,month,day,hour) AS dthh
+            ,GET_JSON_OBJECT(extparams,'$.recomTraceId') AS recomTraceId
+    FROM    loghubods.video_action_log_flow_new
+    WHERE   CONCAT(year,month,day,hour) >= TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - ${time_diff}),'YYYYMMDDHH')
+    AND     CONCAT(year,month,day,hour) < '${dt}${hh}'
+    AND     businesstype = 'videoView'
+    AND     apptype NOT IN ("12")
+)
+,t_return AS -- 过去N小时内的回流                                                    
+(
+    SELECT  a.apptype
+            ,a.vid
+            ,a.mid
+            ,a.shareid
+            ,COALESCE(b.hotsencetype,"other") AS hotsencetype
+    FROM    (
+                SELECT  apptype
+                        ,clickobjectid AS vid
+                        ,machinecode AS mid
+                        ,pagesource
+                        ,recomtraceid
+                        ,sessionid
+                        ,shareid
+                        ,subsessionid
+                        ,CONCAT(year,month,day,hour) AS dthh
+                        ,COALESCE(CAST(clienttimestamp AS BIGINT) / 1000,0) AS ts
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) >= TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - ${time_diff}),'YYYYMMDDHH')
+                AND     CONCAT(year,month,day,hour) < '${dt}${hh}'
+                AND     __topic__ = "click"
+                AND     apptype NOT IN ("12")
+                AND     pagesource REGEXP "-pages/user-videos-share$"
+            ) a
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    (
+                                SELECT  apptype
+                                        ,mid
+                                        ,vid
+                                        ,subsessionid
+                                        ,pagesource
+                                        ,hotsencetype
+                                        ,ROW_NUMBER() OVER (PARTITION BY apptype,mid,vid,subsessionid,pagesource ORDER BY CAST(clienttimestamp AS BIGINT) DESC ) AS rn
+                                FROM    t_view
+                                WHERE   pagesource REGEXP "-pages/user-videos-share$"
+                            ) 
+                    WHERE   rn = 1
+                ) b
+    ON      a.apptype = b.apptype
+    AND     a.mid = b.mid
+    AND     a.vid = b.vid
+    AND     a.subsessionid = b.subsessionid
+    AND     a.pagesource = b.pagesource
+)
+,t_share AS -- 查询分享记录,多次分享保留最近的一次                                                                               
+(
+    SELECT  *
+    FROM    (
+                SELECT  apptype
+                        ,shareobjectid AS vid
+                        ,shareid
+                        ,machinecode AS mid
+                        ,COALESCE(CAST(clienttimestamp AS BIGINT) / 1000,0) AS share_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY apptype,shareobjectid,shareid,machinecode ORDER BY CAST(clienttimestamp AS BIGINT) DESC ) AS rn
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) >= TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - ${time_diff}),'YYYYMMDDHH')
+                AND     CONCAT(year,month,day,hour) < '${dt}${hh}'
+                AND     __topic__ = "share"
+                AND     apptype NOT IN ("12")
+                AND     shareobjectid IS NOT NULL
+                AND     shareobjectid != ''
+            ) 
+    WHERE   rn = 1
+)
+,t_left AS -- 最近N小时内回流,且回流对应的分享也是最近N小时内                                                                        
+(
+    SELECT  tr.vid
+            ,tr.mid
+            ,tr.hotsencetype
+            ,COUNT(1) AS cnt
+    FROM    t_return tr
+    LEFT JOIN t_share ts
+    ON      tr.apptype = ts.apptype
+    AND     tr.shareid = ts.shareid
+    AND     tr.vid = ts.vid
+    AND     tr.mid != ts.mid
+    WHERE   ts.share_ts IS NOT NULL
+    GROUP BY tr.vid
+             ,tr.mid
+             ,tr.hotsencetype
+)
+,t_right AS -- 查询最近N小时内,某个用户在某个视频上的曝光次数和回流次数                                                            
+(
+    SELECT  tv.mid
+            ,tv.vid
+            ,COALESCE(tv.view_cnt,0) AS view_cnt
+            ,COALESCE(tr.return_cnt,0) AS return_cnt
+    FROM    (
+                SELECT  mid
+                        ,vid
+                        ,COUNT(1) AS view_cnt
+                FROM    t_view
+                GROUP BY mid
+                         ,vid
+            ) tv
+    LEFT JOIN   (
+                    SELECT  ts.mid
+                            ,ts.vid
+                            ,COUNT(tr.mid) AS return_cnt
+                    FROM    t_share ts
+                    LEFT JOIN t_return tr
+                    ON      ts.shareid = tr.shareid
+                    AND     ts.vid = tr.vid
+                    GROUP BY ts.mid
+                             ,ts.vid
+                ) tr
+    ON      tr.mid = tv.mid
+    AND     tr.vid = tv.vid
+)
+,t_va_vb_return_view AS 
+(
+    SELECT  a.vid AS vid_left
+            ,b.vid AS vid_right
+            ,a.hotsencetype
+            ,COUNT(DISTINCT a.mid) AS va_vb_together_cnt
+    FROM    t_left a
+    JOIN    (
+                SELECT  DISTINCT vid
+                        ,mid
+                FROM    t_view
+            ) b
+    ON      a.mid = b.mid
+    GROUP BY a.vid
+             ,b.vid
+             ,a.hotsencetype
+)
+,t_result_v1 AS 
+(
+    SELECT  a.vid AS vid_left
+            ,b.vid AS vid_right
+            ,a.hotsencetype
+            ,SUM(b.view_cnt) AS view
+            ,SUM(IF(b.return_cnt > 0,1,0)) AS return
+            ,SUM(IF(b.return_cnt > 0,1,0)) / SUM(b.view_cnt) * LN(SUM(IF(b.return_cnt > 0,1,0)) + 1) AS score
+    FROM    (
+                SELECT  b.vid
+                        ,b.mid
+                        ,b.hotsencetype
+                FROM    (
+                            SELECT  vid
+                            FROM    t_left
+                            GROUP BY vid
+                            HAVING  COUNT(DISTINCT mid) >= 5
+                        ) a
+                JOIN    t_left b
+                ON      a.vid = b.vid
+            ) a
+    JOIN    t_right b
+    ON      a.mid = b.mid
+    AND     a.vid <> b.vid
+    GROUP BY a.vid
+             ,b.vid
+             ,a.hotsencetype
+)
+,t_result_v2 AS 
+(
+    SELECT  a.vid_left
+            ,a.vid_right
+            ,a.hotsencetype
+            ,a.view
+            ,a.return
+            ,a.score
+            ,b.va_vb_together_cnt
+            ,ROW_NUMBER() OVER (PARTITION BY a.vid_left ORDER BY b.va_vb_together_cnt DESC ) AS rn_old
+    FROM    t_result_v1 a
+    JOIN    t_va_vb_return_view b
+    ON      a.vid_left = b.vid_left
+    AND     a.vid_right = b.vid_right
+    AND     a.hotsencetype = b.hotsencetype
+)
+,t_recall AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  hotsencetype
+                        ,vid_left
+                        ,vid_right
+                        ,view
+                        ,return
+                        ,score
+                        ,ROW_NUMBER() OVER (PARTITION BY hotsencetype,vid_left ORDER BY score DESC ) AS rn
+                FROM    t_result_v2
+                WHERE   score > 0
+                AND     rn_old <= 100
+            ) 
+    WHERE   rn <= 100
+    DISTRIBUTE BY hotsencetype,vid_left
+    SORT BY hotsencetype
+,vid_left
+,score DESC
+)SELECT  hotsencetype as sence_type
+        ,vid_left AS vid
+        ,CONCAT_WS(',',COLLECT_LIST(vid_right)) AS vids
+        ,CONCAT_WS(',',COLLECT_LIST(CAST(round(score,4) AS STRING))) AS scores
+        ,CONCAT_WS(',',COLLECT_LIST(CAST(rn AS STRING))) AS ranks
+        ,COUNT(vid_right) AS cnt
+FROM    t_recall
+GROUP BY hotsencetype
+         ,vid_left
+ORDER BY COUNT(vid_right) DESC
+;

+ 54 - 0
production_code/loghubods.alg_vid_apptype_recommend_exp_feature_20250212.json

@@ -0,0 +1,54 @@
+{
+  "name": "alg_vid_apptype_recommend_exp_feature_20250212",
+  "project": "loghubods",
+  "comment": "03_推荐场景下的视频和apptype特征",
+  "columns": [
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": "视频id"
+    },
+    {
+      "name": "apptype",
+      "type": "STRING",
+      "comment": "apptype"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023788964,
+      "name": "04_推荐场景下的视频_app特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_basic_info",
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

Разница между файлами не показана из-за своего большого размера
+ 182 - 0
production_code/loghubods.alg_vid_apptype_recommend_exp_feature_20250212.sql


+ 54 - 0
production_code/loghubods.alg_vid_brand_recommend_exp_feature_20250212.json

@@ -0,0 +1,54 @@
+{
+  "name": "alg_vid_brand_recommend_exp_feature_20250212",
+  "project": "loghubods",
+  "comment": "05_推荐场景下的视频和手机品牌特征",
+  "columns": [
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": "视频id"
+    },
+    {
+      "name": "brand",
+      "type": "STRING",
+      "comment": "手机品牌"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023788968,
+      "name": "06_推荐场景下的视频_brand特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_basic_info",
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

Разница между файлами не показана из-за своего большого размера
+ 182 - 0
production_code/loghubods.alg_vid_brand_recommend_exp_feature_20250212.sql


+ 118 - 0
production_code/loghubods.alg_vid_feature_all_exp_base_add.json

@@ -0,0 +1,118 @@
+{
+  "name": "alg_vid_feature_all_exp_base_add",
+  "project": "loghubods",
+  "comment": "TABLE COMMENT",
+  "columns": [
+    {
+      "name": "apptype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "pagesource",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "uid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "mid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "logtimestamp",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "businesstype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "sessionid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "subsessionid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "abinfodata",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "recommendlogvo",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "extparams",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "abcode",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "recommendpagetype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "flowpool",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "level",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1018595946,
+      "name": "all曝光底表"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.video_action_log_flow_new"
+  ]
+}

+ 116 - 0
production_code/loghubods.alg_vid_feature_all_exp_base_add.sql

@@ -0,0 +1,116 @@
+-- Task: all曝光底表  ID: 1018595946  Type: ODPS_SQL
+--odps sql 
+--********************************************************************--
+--author:于卓异
+--create time:2024-07-11 17:37:42
+--********************************************************************--
+--@exclude_input=loghubods.alg_vid_feature_all_exp_base_add
+--@exclude_input=loghubods.video_action_log_flow_new
+--odps sql 
+--********************************************************************--
+--author:于卓异
+--create time:2024-07-09 14:06:40
+--********************************************************************--
+--CREATE TABLE IF NOT EXISTS loghubods.alg_vid_feature_all_exp_base_add
+--(
+--    apptype            STRING
+--    ,pagesource        STRING
+--    ,uid               STRING
+--    ,mid               STRING
+--    ,vid               STRING
+--    ,logtimestamp      STRING
+--    ,businesstype      STRING
+--    ,sessionid         STRING
+--    ,subsessionid      STRING
+--    ,abinfodata        STRING
+--    ,recommendlogvo    STRING
+--    ,extparams         STRING
+--    ,abcode            STRING
+--    ,recommendPageType STRING
+--    ,flowpool          STRING
+--    ,level             STRING
+--)
+--COMMENT 'TABLE COMMENT'
+--PARTITIONED BY 
+--(
+--    dt                 STRING COMMENT '天'
+--    ,hh                STRING COMMENT '小时'
+--)
+--LIFECYCLE 30
+--;
+INSERT OVERWRITE TABLE loghubods.alg_vid_feature_all_exp_base_add PARTITION (dt = '${dt}',hh = '${hh}')
+SELECT DISTINCT  apptype
+            ,pagesource
+            ,uid
+            ,mid
+            ,videoid AS vid
+            ,logtimestamp
+            ,businesstype
+            ,sessionid
+            ,subsessionid
+            ,abinfodata
+            ,recommendlogvo
+            ,extparams
+            ,GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') AS abcode
+            ,GET_JSON_OBJECT(extparams,'$.recommendPageType') AS recommendPageType
+            ,flowpool
+            ,SPLIT(flowpool,'#')[2] AS level
+    FROM    loghubods.video_action_log_flow_new 
+WHERE   CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+    AND     apptype NOT IN ('12')
+    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$|-pages/user-videos-share$'
+    AND     businesstype IN ('videoView');
+
+
+--drop table loghubods.alg_vid_feature_all_exp_base_v2 
+--CREATE TABLE IF NOT EXISTS loghubods.alg_vid_feature_all_exp_base_v2 
+--(
+--         apptype  STRING 
+--        ,pagesource STRING
+--        ,uid STRING
+--        ,mid STRING
+--        ,vid  STRING
+--        ,logtimestamp STRING
+--        ,businesstype STRING
+--        ,sessionid STRING
+--        ,subsessionid STRING
+--        ,abinfodata STRING
+--        ,recommendlogvo STRING
+--        ,extparams STRING
+--        ,abcode STRING
+--        ,recommendPageType  STRING
+--        ,flowpool STRING
+--        ,level STRING    
+--)
+--COMMENT 'TABLE COMMENT'
+--PARTITIONED BY 
+--(
+--    dt STRING COMMENT '天'
+--   ,hh STRING COMMENT '小时'
+--
+--
+--)
+--LIFECYCLE 30
+--;
+
+
+
+INSERT OVERWRITE  TABLE loghubods.alg_vid_feature_all_exp_base_v2 PARTITION (dt = '${dt}',hh = '${hh}')
+SELECT  DISTINCT  apptype
+        ,pagesource
+        ,uid
+        ,mid
+        ,vid
+        ,logtimestamp
+        ,businesstype
+        ,sessionid
+        ,subsessionid
+        ,abinfodata
+        ,recommendlogvo
+        ,extparams
+        ,abcode
+        ,recommendPageType
+        ,flowpool
+        ,level
+FROM    loghubods.alg_vid_feature_all_exp_base_add
+WHERE   CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * ${hours_early}),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH'))),'YYYYMMDDHH')

+ 118 - 0
production_code/loghubods.alg_vid_feature_all_exp_base_v2.json

@@ -0,0 +1,118 @@
+{
+  "name": "alg_vid_feature_all_exp_base_v2",
+  "project": "loghubods",
+  "comment": "TABLE COMMENT",
+  "columns": [
+    {
+      "name": "apptype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "pagesource",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "uid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "mid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "logtimestamp",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "businesstype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "sessionid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "subsessionid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "abinfodata",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "recommendlogvo",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "extparams",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "abcode",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "recommendpagetype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "flowpool",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "level",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1018595946,
+      "name": "all曝光底表"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_all_exp_base_add"
+  ]
+}

+ 116 - 0
production_code/loghubods.alg_vid_feature_all_exp_base_v2.sql

@@ -0,0 +1,116 @@
+-- Task: all曝光底表  ID: 1018595946  Type: ODPS_SQL
+--odps sql 
+--********************************************************************--
+--author:于卓异
+--create time:2024-07-11 17:37:42
+--********************************************************************--
+--@exclude_input=loghubods.alg_vid_feature_all_exp_base_add
+--@exclude_input=loghubods.video_action_log_flow_new
+--odps sql 
+--********************************************************************--
+--author:于卓异
+--create time:2024-07-09 14:06:40
+--********************************************************************--
+--CREATE TABLE IF NOT EXISTS loghubods.alg_vid_feature_all_exp_base_add
+--(
+--    apptype            STRING
+--    ,pagesource        STRING
+--    ,uid               STRING
+--    ,mid               STRING
+--    ,vid               STRING
+--    ,logtimestamp      STRING
+--    ,businesstype      STRING
+--    ,sessionid         STRING
+--    ,subsessionid      STRING
+--    ,abinfodata        STRING
+--    ,recommendlogvo    STRING
+--    ,extparams         STRING
+--    ,abcode            STRING
+--    ,recommendPageType STRING
+--    ,flowpool          STRING
+--    ,level             STRING
+--)
+--COMMENT 'TABLE COMMENT'
+--PARTITIONED BY 
+--(
+--    dt                 STRING COMMENT '天'
+--    ,hh                STRING COMMENT '小时'
+--)
+--LIFECYCLE 30
+--;
+INSERT OVERWRITE TABLE loghubods.alg_vid_feature_all_exp_base_add PARTITION (dt = '${dt}',hh = '${hh}')
+SELECT DISTINCT  apptype
+            ,pagesource
+            ,uid
+            ,mid
+            ,videoid AS vid
+            ,logtimestamp
+            ,businesstype
+            ,sessionid
+            ,subsessionid
+            ,abinfodata
+            ,recommendlogvo
+            ,extparams
+            ,GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003') AS abcode
+            ,GET_JSON_OBJECT(extparams,'$.recommendPageType') AS recommendPageType
+            ,flowpool
+            ,SPLIT(flowpool,'#')[2] AS level
+    FROM    loghubods.video_action_log_flow_new 
+WHERE   CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+    AND     apptype NOT IN ('12')
+    AND     pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$|-pages/user-videos-share$'
+    AND     businesstype IN ('videoView');
+
+
+--drop table loghubods.alg_vid_feature_all_exp_base_v2 
+--CREATE TABLE IF NOT EXISTS loghubods.alg_vid_feature_all_exp_base_v2 
+--(
+--         apptype  STRING 
+--        ,pagesource STRING
+--        ,uid STRING
+--        ,mid STRING
+--        ,vid  STRING
+--        ,logtimestamp STRING
+--        ,businesstype STRING
+--        ,sessionid STRING
+--        ,subsessionid STRING
+--        ,abinfodata STRING
+--        ,recommendlogvo STRING
+--        ,extparams STRING
+--        ,abcode STRING
+--        ,recommendPageType  STRING
+--        ,flowpool STRING
+--        ,level STRING    
+--)
+--COMMENT 'TABLE COMMENT'
+--PARTITIONED BY 
+--(
+--    dt STRING COMMENT '天'
+--   ,hh STRING COMMENT '小时'
+--
+--
+--)
+--LIFECYCLE 30
+--;
+
+
+
+INSERT OVERWRITE  TABLE loghubods.alg_vid_feature_all_exp_base_v2 PARTITION (dt = '${dt}',hh = '${hh}')
+SELECT  DISTINCT  apptype
+        ,pagesource
+        ,uid
+        ,mid
+        ,vid
+        ,logtimestamp
+        ,businesstype
+        ,sessionid
+        ,subsessionid
+        ,abinfodata
+        ,recommendlogvo
+        ,extparams
+        ,abcode
+        ,recommendPageType
+        ,flowpool
+        ,level
+FROM    loghubods.alg_vid_feature_all_exp_base_add
+WHERE   CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * ${hours_early}),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH'))),'YYYYMMDDHH')

+ 56 - 0
production_code/loghubods.alg_vid_feature_basic_info.json

@@ -0,0 +1,56 @@
+{
+  "name": "alg_vid_feature_basic_info",
+  "project": "loghubods",
+  "comment": "推荐算法-labelmatch表",
+  "columns": [
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "feature",
+      "type": "JSON",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20240105"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:04"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20240105"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:04"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1017892287,
+      "name": "01_视频基础信息_20241223"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.aitags_repeatdata",
+    "loghubods.content_ai_tags_no_dt",
+    "loghubods.operators_channel_dt",
+    "loghubods.operators_channel_spider_dt",
+    "loghubods.tag_level_2_base",
+    "loghubods.vid_festive_labels",
+    "videoods.dim_video",
+    "videoods.flow_pool_level_video",
+    "videoods.wx_video_per1h"
+  ]
+}

+ 398 - 0
production_code/loghubods.alg_vid_feature_basic_info.sql

@@ -0,0 +1,398 @@
+-- Task: 01_视频基础信息_20241223  ID: 1017892287  Type: ODPS_SQL
+--@exclude_input=loghubods.operators_channel_spider_dt
+--@exclude_input=loghubods.operators_channel_dt
+--@exclude_input=videoods.dim_video
+--@exclude_input=loghubods.content_ai_tags_no_dt
+--@exclude_input=loghubods.tag_level_2_base
+--@exclude_input=loghubods.vid_festive_labels
+--@exclude_input=loghubods.operators_channel_spider_day
+--@exclude_input=loghubods.aitags_repeatdata
+--@exclude_input=loghubods.operators_channel_day
+--odps sql 
+--********************************************************************--
+--author:于卓异
+--create time:2024-06-11 16:47:23
+--********************************************************************--
+CREATE TABLE IF NOT EXISTS loghubods.alg_vid_feature_basic_info
+(
+    vid      STRING
+    ,feature JSON
+)
+PARTITIONED BY 
+(
+    dt       STRING COMMENT '日期:20240105'
+    ,hh      STRING COMMENT '小时:04'
+)
+STORED AS ALIORC
+TBLPROPERTIES ('comment' = '视频基础特征')
+LIFECYCLE 90
+;
+
+SET odps.sql.python.version = cp37
+;
+
+INSERT OVERWRITE TABLE loghubods.alg_vid_feature_basic_info PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_video_merge_cate AS (
+    SELECT  a.vid As vid
+            ,merge_second_level_cate
+            ,CASE
+                -- 音乐相关
+                WHEN merge_second_level_cate REGEXP '祝福音乐|人生感悟音乐|民族异域音乐|亲情音乐|红歌老歌|音乐知识' THEN '音乐'
+                -- 剧情/剧情演绎相关
+                WHEN merge_second_level_cate REGEXP '正能量剧情|对口型表演|快闪' THEN '剧情/剧情演绎'
+                -- 游戏相关
+                WHEN merge_second_level_cate REGEXP '拟真游戏|麻将|棋牌' THEN '游戏'
+                -- 随拍/颜值相关
+                WHEN merge_second_level_cate REGEXP '老年审美美女|老年审美帅哥' THEN '随拍/颜值'
+                -- 舞蹈相关
+                WHEN merge_second_level_cate REGEXP '红歌老歌舞蹈|广场舞|舞蹈教程' THEN '舞蹈'
+                -- 动物/萌宠相关
+                WHEN merge_second_level_cate REGEXP '宠物日常|动物表演|生动物' THEN '动物/萌宠'
+                -- 三农相关
+                WHEN merge_second_level_cate REGEXP '农村生活|农业技术' THEN '三农'
+                -- 科技/科技数码相关
+                WHEN merge_second_level_cate REGEXP '老年相关科技|未来科幻|国家科技力量' THEN '科技/科技数码'
+                -- 财经相关
+                WHEN merge_second_level_cate REGEXP '保险|理财' THEN '财经'
+                -- 母婴/母婴亲子相关
+                WHEN merge_second_level_cate REGEXP '亲子日常|K12教育' THEN '母婴/母婴亲子'
+                -- 法律/科普/人文社科相关
+                WHEN merge_second_level_cate REGEXP '老年相关法律科普|知识科普|生活技巧科普' THEN '法律/科普/人文社科'
+                -- 情感/情感心理相关
+                WHEN merge_second_level_cate REGEXP '怀念时光|人生忠告|迷信祝福|节日祝福|早中晚好' THEN '情感/情感心理'
+                -- 职场/人文社科相关
+                WHEN merge_second_level_cate REGEXP '退休前|退休后' THEN '职场/人文社科'
+                -- 教育/教育培训相关
+                WHEN merge_second_level_cate REGEXP '益智解密|老年教育' THEN '教育/教育培训'
+                -- 摄影摄像相关
+                WHEN merge_second_level_cate REGEXP '风景实拍|动植物实拍|人像模特实拍|摄影教学' THEN '摄影摄像'
+                -- 艺术/才艺技能相关
+                WHEN merge_second_level_cate REGEXP '名画赏析|杂技柔术|魔术|魔术特效|书法|绘画|木工|口技|大型集体艺术|戏曲戏剧|二人转|其他才艺' THEN '艺术/才艺技能'
+                -- 美食相关
+                WHEN merge_second_level_cate REGEXP '美食测评|美食教程|吃播探店' THEN '美食'
+                -- 旅行/旅游相关
+                WHEN merge_second_level_cate REGEXP '旅行记录|旅行攻略' THEN '旅行/旅游'
+                -- 地域本地相关
+                WHEN merge_second_level_cate REGEXP '省份城市亮点|本地新闻|本地生活' THEN '地域本地'
+                -- 时尚/美妆相关
+                WHEN merge_second_level_cate REGEXP '老年时尚|美妆护肤穿搭' THEN '时尚/美妆'
+                -- 文化/人文社科相关
+                WHEN merge_second_level_cate REGEXP '传统文化|国际文化' THEN '文化/人文社科'
+                -- 搞笑/休闲娱乐相关
+                WHEN merge_second_level_cate REGEXP '搞笑瞬间合集|搞笑段子' THEN '搞笑/休闲娱乐'
+                -- 明星/名人相关
+                WHEN merge_second_level_cate REGEXP '历史名人|当代正能量人物|老明星' THEN '明星/名人'
+                -- 综艺/影视综艺相关
+                WHEN merge_second_level_cate REGEXP '老年人上综艺|老年关心纪录片|老综艺影像|电影切片|电影解说|电视剧切片|电视剧解说' THEN '综艺/影视综艺'
+                -- 体育/运动相关
+                WHEN merge_second_level_cate REGEXP '中国队比赛|老年运动' THEN '体育/运动'
+                -- 医疗健康/长寿/健身相关
+                WHEN merge_second_level_cate REGEXP '健康知识|长寿知识|饮食健康' THEN '医疗健康/长寿/健身'
+                -- 生活记录/生活相关
+                WHEN merge_second_level_cate REGEXP '健身操' THEN '生活记录/生活'
+                -- 生活家居/家居家装相关
+                WHEN merge_second_level_cate REGEXP '老年生活|生活小妙招|园艺花艺' THEN '生活家居/家居家装'
+                -- 时政社会相关
+                WHEN merge_second_level_cate REGEXP '民生政策|流行病疫情|社会风气|食品安全|贪污腐败|人财诈骗|核污染|惠民新闻|天气变化|国家力量|国际时政|他国政策' THEN '时政社会'
+                -- 奇人异象相关
+                WHEN merge_second_level_cate REGEXP '惊奇事件|罕见画面' THEN '奇人异象'
+                -- 历史相关
+                WHEN merge_second_level_cate REGEXP '中国战争史|中国党史|中国历史影像' THEN '历史'
+                -- 军事相关
+                WHEN merge_second_level_cate REGEXP '国际军事|国内军事|国家统一' THEN '军事'
+                ELSE b.first_level_cate
+            END AS merge_first_level_cate
+    FROM    (
+                SELECT  DISTINCT a.id AS vid
+                        ,CASE   WHEN b.tag_level_2 IS NOT NULL THEN b.tag_level_2
+                                ELSE c.tag_name_1
+                        END AS merge_second_level_cate
+                FROM    (
+                            SELECT  DISTINCT id
+                                    ,CONCAT(clear_title_signal(title),'-',total_time) AS title_duration
+                            FROM    videoods.wx_video_per1h
+                        ) a
+                LEFT JOIN   (
+                                SELECT  *
+                                FROM    loghubods.tag_level_2_base
+                            ) b
+                ON      a.title_duration = b.title_duration
+                LEFT JOIN   (
+                                SELECT  DISTINCT title_duration
+                                        ,tags
+                                        ,tag_name_1
+                                        ,tag_name_2
+                                        ,tag_name_3
+                                FROM    loghubods.content_ai_tags_no_dt
+                            ) c
+                ON      a.title_duration = c.title_duration
+            ) a
+    LEFT JOIN   (
+                    SELECT  DISTINCT title_duration
+                            ,一级品类 AS first_level_cate
+                            ,videoid
+                    FROM    (
+                                SELECT  CONCAT(CLEAR_TITLE_SIGNAL(title),'-',total_time) AS title_duration
+                                        ,一级品类
+                                        ,videoid
+                                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(CLEAR_TITLE_SIGNAL(title),'-',total_time) ORDER BY videoid DESC ) AS rank
+                                FROM    (
+                                            SELECT  DISTINCT videoid
+                                                    ,b.title
+                                                    ,b.total_time
+                                                    ,SUBSTRING_INDEX(words_1,'_',-1) AS 一级品类
+                                            FROM    (
+                                                        SELECT  videoid
+                                                                ,words_1
+                                                        FROM    (
+                                                                    SELECT  videoid
+                                                                            ,words_1
+                                                                    FROM    videoods.dim_video
+                                                                    LATERAL VIEW EXPLODE(SPLIT(tags,',')) t AS words_1
+                                                                ) 
+                                                        WHERE   words_1 REGEXP '一级品类_'
+                                                    ) a
+                                            LEFT JOIN videoods.wx_video_per1h b
+                                            ON      a.videoid = b.id
+                                        ) 
+                            ) 
+                    WHERE   rank = 1
+                    HAVING  title_duration NOT REGEXP 'None-|null-'
+                ) b
+    ON      a.vid = b.videoid
+)
+,t_video_merge_cate_total as 
+(SELECT  vid
+        ,CONCAT_WS(',',topic,topic1,topic2,topic3) as ad 
+FROM    (
+            SELECT  id AS vid
+                    ,CASE   WHEN te.topic REGEXP '旅行|旅游'
+                                OR te.keywords REGEXP '旅行|旅游'
+                                OR tg.merge_first_level_cate REGEXP '旅行|旅游'
+                                OR tg.merge_second_level_cate REGEXP '旅行|旅游'
+                                OR te.cate2 REGEXP '旅行|旅游'
+                                OR te.cate2_list REGEXP '旅行|旅游'
+                                OR te.cate1_list REGEXP '旅行|旅游' THEN '旅游'
+                            ELSE '其他'
+                    END AS topic
+                    ,'其他' AS topic1
+                    ,'其他' AS topic2
+                    ,'其他' AS topic3
+            FROM    videoods.wx_video_per1h ta
+            LEFT JOIN   (
+                            SELECT  *
+                            FROM    (
+                                        SELECT  video_id
+                                                ,video_title
+                                                ,title_duration
+                                                ,IF(视频一级分类 IS NULL,"unknown",GET_CATE1(视频一级分类)) AS cate1_list -- 目前只有一个
+                                                ,IF(二级分类 IS NULL,"unknown",GET_CATE2_ALL(二级分类)) AS cate2_list
+                                                ,IF(tag_1 IS NULL,"unknown",GET_CATE2_ONLY(tag_1)) AS cate2
+                                                ,COALESCE(内容选题,"") AS topic -- 几个字的话题总结
+                                                ,COALESCE(视频主题,"") AS theme -- 一小段话的主题总结
+                                                ,IF(视频关键词 IS NULL,"",REGEXP_REPLACE(视频关键词,'[\\[\\]\"]','')) AS keywords -- 视频关键词
+                                                ,IF(视频风格 IS NULL,"",REGEXP_REPLACE(视频风格,',',',')) AS style -- 视频风格
+                                                -- ,情感倾向,是否有片尾引导,传播性判断,推测观众年龄段,推测观众性别,推测观众价值类型,推测观众用观众收入
+                                                ,IF(推测观众用户价值点 IS NULL,"",REPLACE(REGEXP_REPLACE(推测观众用户价值点,'[\\[\\]\"]',''),",",",")) AS user_value -- 推测观众用户价值点
+                                                ,CASE   WHEN 时效性_有无时效 IS NULL THEN "0"
+                                                        WHEN 时效性_有无时效 = "有" THEN "1"
+                                                        WHEN 时效性_有无时效 = "无" THEN "0"
+                                                        ELSE "0"
+                                                END AS if_timeliness -- 时效性_有无时效
+                                                ,IF(时效性_具体时间 IS NULL,"",REGEXP_REPLACE(时效性_具体时间,',',',')) AS timeliness
+                                                ,是否节日视频 AS is_fes
+                                                ,IF(是否节日视频 = '是',节日名称,NULL) AS fes_name
+                                                ,IF(是否节日视频 = '是',具体时效,NULL) AS fes_time
+                                                ,ARRAY_JOIN(
+                                                           SLICE(
+                                                                 ARRAY_DISTINCT(
+                                                                                REGEXP_EXTRACT_ALL(推测观众地域,'上海|云南|北京|印度|台湾|吉林|四川|天津|宁夏|安徽|山东|山西|巴西|广东|广西|德国|新疆|日本|朝鲜|江苏|江西|河北|河南|法国|泰国|浙江|海南|湖北|湖南|澳门|甘肃|福建|缅甸|美国|英国|西藏|贵州|越南|辽宁|迪拜|重庆|陕西|青海|韩国|香港|俄罗斯|内蒙古|加拿大|新加坡|菲律宾|黑龙江|巴基斯坦|澳大利亚|马来西亚',0)
+                                                                 )
+                                                           ,1,5)
+                                                ,',') AS video_region
+                                                ,ROW_NUMBER() OVER (PARTITION BY CONCAT(CLEAN_TEXT(video_title),"-",SUBSTRING_INDEX(title_duration,"-",-1)) ORDER BY dt DESC ) AS rank
+                                        FROM    loghubods.aitags_repeatdata
+                                        WHERE   dt >= "${dt-1}"
+                                    ) 
+                            WHERE   rank = 1
+                        ) te
+            ON      CONCAT(CLEAN_TEXT(ta.title),"-",CAST(ta.total_time AS STRING)) = CONCAT(CLEAN_TEXT(te.video_title),"-",SUBSTRING_INDEX(te.title_duration,"-",-1))
+            LEFT JOIN   (
+                            SELECT  *
+                            FROM    t_video_merge_cate
+                        ) tg
+            ON      ta.id = tg.vid
+            WHERE   id IS NOT NULL
+            AND     ta.title IS NOT NULL
+            AND     ta.total_time IS NOT NULL
+            AND     (
+                        recommend_status = -6
+                        OR      gmt_create_timestamp > (
+                                    UNIX_TIMESTAMP(DATETIME(CURRENT_TIMESTAMP())) - 7 * 24 * 3600
+                        ) * 1000
+            )
+            ORDER BY CAST(id AS BIGINT) DESC
+        ) )
+
+
+
+
+SELECT  id AS vid
+        ,CASE   WHEN te.rank IS NOT NULL THEN JSON_OBJECT(
+                            "title_time_w_h_unionid", CONCAT(CLEAN_TEXT(COALESCE(ta.title,"")),'-',COALESCE(ta.total_time,"0"),'-',COALESCE(ta.width,"0"),'-',COALESCE(ta.height,"0")),
+                            "title",title,
+                            "title_split",FENCI_STR2STR(title,3),
+                            "width",width,
+                            "height",height,
+                            "gmt_create_timestamp",gmt_create_timestamp,
+                            "size",size,
+                            "recommend_status",recommend_status,
+                            "total_time",total_time,
+                            "bit_rate",bit_rate,
+                            "vid_source",COALESCE(tb.vid_source,"unknown"),
+                            "channel",COALESCE(tc.channel,"unknown"),
+                            "cate1_list",te.cate1_list,
+                            "cate2_list",te.cate2_list,
+                            "cate2",te.cate2,
+                            "topic",te.topic,
+                            "theme",te.theme,
+                            "keywords",te.keywords,
+                            "style",te.style,
+                            "user_value",te.user_value,
+                            "if_timeliness",te.if_timeliness,
+                            "timeliness",te.timeliness,
+                            "festive_label1",TRIM(tf.first_labels),
+                            "festive_label2",TRIM(tf.secondary_labels),
+                            "merge_second_level_cate",TRIM(tg.merge_second_level_cate),
+                            "merge_first_level_cate",TRIM(tg.merge_first_level_cate),
+                            "is_fes", te.is_fes,
+                            "fes_time", te.fes_time,
+                            "fes_name",te.fes_name,
+                            "uid",ta.uid,
+                            "video_region",te.video_region,
+                            "ad",th.ad,
+                            "attribute_province",ti.province
+                        )
+                ELSE JSON_OBJECT(
+                        "title_time_w_h_unionid",CONCAT(CLEAN_TEXT(COALESCE(ta.title,"")),'-',COALESCE(ta.total_time,"0"),'-',COALESCE(ta.width,"0"),'-',COALESCE(ta.height,"0")),
+                        "title",title,
+                        "title_split",FENCI_STR2STR(title,3),
+                        "width",width,
+                        "height",height,
+                        "gmt_create_timestamp",gmt_create_timestamp,
+                        "size",size,
+                        "recommend_status",recommend_status,
+                        "total_time",total_time,
+                        "bit_rate",bit_rate,
+                        "vid_source",COALESCE(tb.vid_source,"unknown"),
+                        "channel",COALESCE(tc.channel,"unknown"),
+                        "festive_label1",TRIM(tf.first_labels),
+                        "festive_label2",TRIM(tf.secondary_labels),
+                        "merge_second_level_cate",TRIM(tg.merge_second_level_cate),
+                        "merge_first_level_cate",TRIM(tg.merge_first_level_cate),
+                        "is_fes", te.is_fes,
+                        "fes_time", te.fes_time,
+                        "fes_name", te.fes_name,
+                            "uid",ta.uid,
+                            "video_region",te.video_region,
+                            "ad",th.ad,
+                            "attribute_province",ti.province
+                    )
+        END AS feature
+FROM    videoods.wx_video_per1h ta
+LEFT JOIN   (
+                SELECT  *
+                FROM    (
+                            SELECT  videoid AS vid
+                                    ,type AS vid_source
+                                    ,ROW_NUMBER() OVER (PARTITION BY videoid ORDER BY type DESC ) AS rn
+                            FROM    loghubods.operators_channel_dt
+                             WHERE   dt = MAX_PT("loghubods.operators_channel_dt")
+                        ) 
+                WHERE   rn = 1
+            ) tb
+ON      ta.id = tb.vid
+LEFT JOIN   (
+                SELECT  *
+                FROM    (
+                            SELECT  videoid AS vid
+                                    ,channel AS channel
+                                    ,ROW_NUMBER() OVER (PARTITION BY videoid ORDER BY channel ) AS rn
+                            FROM    loghubods.operators_channel_spider_dt
+                             WHERE   dt = MAX_PT("loghubods.operators_channel_spider_dt")
+                        ) 
+                WHERE   rn = 1
+            ) tc
+ON      ta.id = tc.vid
+LEFT JOIN   (
+                SELECT  *,CONCAT(cate1_list,cate2_list,cate2,topic,keywords) as cate_total
+                FROM    (
+                            SELECT  video_id
+                                    ,video_title
+                                    ,title_duration
+                                    ,IF(视频一级分类 IS NULL,"unknown",GET_CATE1(视频一级分类)) AS cate1_list -- 目前只有一个
+                                    ,IF(二级分类 IS NULL,"unknown",GET_CATE2_ALL(二级分类)) AS cate2_list
+                                    ,IF(tag_1 IS NULL,"unknown",GET_CATE2_ONLY(tag_1)) AS cate2
+                                    ,COALESCE(内容选题,"") AS topic -- 几个字的话题总结
+                                    ,COALESCE(视频主题,"") AS theme -- 一小段话的主题总结
+                                    ,IF(视频关键词 IS NULL,"",REGEXP_REPLACE(视频关键词,'[\\[\\]\"]','')) AS keywords -- 视频关键词
+                                    ,IF(视频风格 IS NULL,"",REGEXP_REPLACE(视频风格,',',',')) AS style -- 视频风格
+                                    -- ,情感倾向,是否有片尾引导,传播性判断,推测观众年龄段,推测观众性别,推测观众价值类型,推测观众用观众收入
+                                    ,IF(推测观众用户价值点 IS NULL,"",REPLACE(REGEXP_REPLACE(推测观众用户价值点,'[\\[\\]\"]',''),",",",")) AS user_value -- 推测观众用户价值点
+                                    ,CASE   WHEN 时效性_有无时效 IS NULL THEN "0"
+                                            WHEN 时效性_有无时效 = "有" THEN "1"
+                                            WHEN 时效性_有无时效 = "无" THEN "0"
+                                            ELSE "0"
+                                    END AS if_timeliness -- 时效性_有无时效
+                                    ,IF(时效性_具体时间 IS NULL,"",REGEXP_REPLACE(时效性_具体时间,',',',')) AS timeliness
+                                    ,是否节日视频 AS is_fes
+                                    ,IF(是否节日视频 = '是',节日名称, NULL ) AS fes_name
+                                    ,IF(是否节日视频 = '是',具体时效, NULL ) AS fes_time
+                                    ,ARRAY_JOIN(SLICE(ARRAY_DISTINCT(REGEXP_EXTRACT_ALL(推测观众地域,'上海|云南|北京|印度|台湾|吉林|四川|天津|宁夏|安徽|山东|山西|巴西|广东|广西|德国|新疆|日本|朝鲜|江苏|江西|河北|河南|法国|泰国|浙江|海南|湖北|湖南|澳门|甘肃|福建|缅甸|美国|英国|西藏|贵州|越南|辽宁|迪拜|重庆|陕西|青海|韩国|香港|俄罗斯|内蒙古|加拿大|新加坡|菲律宾|黑龙江|巴基斯坦|澳大利亚|马来西亚',0)),1,5),',') AS video_region
+                                    ,ROW_NUMBER() OVER (PARTITION BY CONCAT(CLEAN_TEXT(video_title),"-",SUBSTRING_INDEX(title_duration,"-",-1)) ORDER BY dt DESC ) AS rank
+                            FROM    loghubods.aitags_repeatdata
+                            WHERE   dt >= "${dt-1}"
+                        ) 
+                WHERE   rank = 1
+            ) te
+ON      CONCAT(CLEAN_TEXT(ta.title),"-",CAST(ta.total_time AS STRING)) = CONCAT(CLEAN_TEXT(te.video_title),"-",SUBSTRING_INDEX(te.title_duration,"-",-1))
+LEFT JOIN   (
+                SELECT  *
+                FROM    (
+                            SELECT  videoid
+                                    ,COALESCE(first_labels,"unknown") AS first_labels
+                                    ,COALESCE(secondary_labels,"unknown") AS secondary_labels
+                                    ,ROW_NUMBER() OVER (PARTITION BY videoid ORDER BY first_labels DESC ) AS rn
+                            FROM    loghubods.vid_festive_labels
+                        ) 
+                WHERE   rn = 1
+            ) tf
+ON      ta.id = tf.videoid
+LEFT JOIN (
+    SELECT  *
+    FROM    t_video_merge_cate
+) tg ON ta.id = tg.vid
+left join t_video_merge_cate_total th 
+ON ta.id = th.vid
+left join (SELECT  DISTINCT video_id
+                        ,GET_JSON_OBJECT(attribute,'$.province')  as province
+                FROM    (
+                            SELECT  DISTINCT video_id
+                                    ,attribute
+                                    ,ROW_NUMBER() OVER (PARTITION BY video_id ORDER BY create_time ASC ) AS rank
+                            FROM    videoods.flow_pool_level_video
+                            WHERE   life_cycle_id IS NOT NULL
+                        ) 
+                WHERE   rank = 1
+                and attribute is not null ) ti  
+on ta.id = ti.video_id
+WHERE   id IS NOT NULL
+AND     ta.title IS NOT NULL
+AND     ta.total_time IS NOT NULL
+AND     (
+    recommend_status = -6
+    OR 
+    gmt_create_timestamp>(UNIX_TIMESTAMP(DATETIME(CURRENT_TIMESTAMP())) - 7 * 24 * 3600) * 1000
+)
+ORDER BY CAST(id AS BIGINT) DESC
+;

+ 48 - 0
production_code/loghubods.alg_vid_feature_cfreturn.json

@@ -0,0 +1,48 @@
+{
+  "name": "alg_vid_feature_cfreturn",
+  "project": "loghubods",
+  "comment": "视频特征-用户连续点击回流CF",
+  "columns": [
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": "视频id"
+    },
+    {
+      "name": "feature",
+      "type": "JSON",
+      "comment": "json格式的特征组合"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1017832569,
+      "name": "03_用户连续点击回流CF"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.user_share_log_flow"
+  ]
+}

+ 148 - 0
production_code/loghubods.alg_vid_feature_cfreturn.sql

@@ -0,0 +1,148 @@
+-- Task: 03_用户连续点击回流CF  ID: 1017832569  Type: ODPS_SQL
+--@exclude_input=loghubods.user_share_log_flow
+--odps sql 
+--********************************************************************--
+--author:张博
+--create time:2024-06-06 13:27:20
+--********************************************************************--
+
+-- select * from loghubods.alg_vid_feature_cfreturn where dt = "20240606" and hh = "16";
+
+CREATE TABLE IF NOT EXISTS loghubods.alg_vid_feature_cfreturn
+(
+   vid STRING COMMENT '视频id'
+   ,feature JSON  COMMENT 'json格式的特征组合'
+   
+)
+COMMENT '视频特征-用户连续点击回流CF'
+PARTITIONED BY 
+(
+   dt STRING COMMENT '天'
+  ,hh STRING COMMENT '小时'
+
+
+)
+LIFECYCLE 30
+;
+INSERT OVERWRITE TABLE loghubods.alg_vid_feature_cfreturn PARTITION (dt = '${dt}',hh = '${hh}')
+
+
+WITH t_origin AS 
+(
+    SELECT  apptype
+            ,mid
+            ,vid
+            ,pagesource
+            ,subsessionid
+            ,MIN(clienttimestamp) AS ts
+            ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) AS ts_now
+            ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - MIN(clienttimestamp) / 1000 AS ts_diff
+    FROM    (
+                SELECT  __topic__
+                        ,eventinfos
+                        ,apptype
+                        ,clickobjectid AS vid
+                        ,shareobjectid
+                        ,machinecode AS mid
+                        ,clienttimestamp
+                        ,pagesource
+                        ,parentpagesource
+                        ,parentrootpagesource
+                        ,shareid
+                        ,rootshareid
+                        ,subsessionid
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * ${hours_early}),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+                AND     __topic__ IN ('click')
+                AND     apptype NOT IN ('12')
+                AND     apptype IS NOT NULL
+                AND     clickobjectid IS NOT NULL
+                AND     machinecode IS NOT NULL
+                AND     machinecode != ""
+                AND     clienttimestamp IS NOT NULL
+            ) 
+    GROUP BY apptype
+             ,mid
+             ,vid
+             ,pagesource
+             ,subsessionid
+) -- SELECT  *
+-- FROM    t_share_
+-- limit 999
+-- ;
+-- SELECT  vid2vid_cf_py("14824141:1,21175605:2,21175605:3,21256335:0")
+,t_cf AS 
+(
+    SELECT  loghubods.vid2vid_cf_py(vid_ts) AS (cf_a,cf_b)
+    FROM    (
+                SELECT  mid
+                        ,CONCAT_WS(',',COLLECT_SET(CONCAT(vid,":",CAST(ts / 1000 AS BIGINT)))) AS vid_ts
+                        ,COUNT(1) AS cnt
+                FROM    (
+                            SELECT  mid
+                                    ,vid
+                                    ,ts
+                            FROM    t_origin
+                            WHERE   mid IS NOT NULL
+                            AND     mid <> ""
+                            AND     vid IS NOT NULL
+                            AND     vid <> "0"
+                            AND     vid <> ""
+                            AND     ts IS NOT NULL
+                        ) 
+                GROUP BY mid
+                HAVING  COUNT(1) > 1
+                AND     SIZE(COLLECT_SET(vid)) > 1
+            ) 
+) -- SELECT  *
+-- FROM    t_cf
+-- ;
+,t_score AS 
+(
+    SELECT  a.cf_a
+            ,a.cf_b
+            ,a.cnt AS cnt_ab
+            ,b.cnt AS cnt_a
+            ,ROUND(a.cnt / b.cnt,6) AS score
+            ,ROW_NUMBER() OVER (PARTITION BY a.cf_a ORDER BY a.cnt / b.cnt DESC ) AS rank
+    FROM    (
+                SELECT  cf_a
+                        ,cf_b
+                        ,COUNT(1) AS cnt
+                FROM    t_cf
+                GROUP BY cf_a
+                         ,cf_b
+            ) a
+    JOIN    (
+                SELECT  cf_a
+                        ,COUNT(1) AS cnt
+                FROM    t_cf
+                GROUP BY cf_a
+                ORDER BY COUNT(1) DESC
+                LIMIT   ${key_limit}
+            ) b
+    ON      a.cf_a = b.cf_a
+)
+,t_recall AS 
+(
+    SELECT  cf_a AS vid -- ,CONCAT_WS(',',COLLECT_LIST(cf_b)) AS videoid_arr
+    -- ,CONCAT_WS(',',COLLECT_LIST(CAST(score AS STRING))) AS score_arr
+    -- ,CONCAT_WS(',',COLLECT_LIST(CAST(cnt_ab AS STRING))) AS cnt_ab_arr
+    -- ,CONCAT_WS(',',COLLECT_LIST(CAST(cnt_a AS STRING))) AS cnt_a_arr
+            ,JSON_OBJECT("videoid_arr",CONCAT_WS(',',COLLECT_LIST(cf_b)),"score_arr",CONCAT_WS(',',COLLECT_LIST(CAST(score AS STRING))),"cnt_ab_arr",CONCAT_WS(',',COLLECT_LIST(CAST(cnt_ab AS STRING))),"cnt_a_arr",CONCAT_WS(',',COLLECT_LIST(CAST(cnt_a AS STRING)))) AS feature
+    FROM    (
+                SELECT  cf_a
+                        ,cf_b
+                        ,cnt_ab
+                        ,cnt_a
+                        ,score
+                        ,rank
+                FROM    t_score
+                WHERE   rank <= ${rank_limit}
+                ORDER BY rank ASC,CAST(cf_b AS BIGINT) DESC
+            ) 
+    GROUP BY cf_a
+    ORDER BY SUM(cnt_a) DESC
+)
+SELECT  *
+FROM    t_recall

+ 48 - 0
production_code/loghubods.alg_vid_feature_cfshare.json

@@ -0,0 +1,48 @@
+{
+  "name": "alg_vid_feature_cfshare",
+  "project": "loghubods",
+  "comment": "视频特征-用户连续分享CF",
+  "columns": [
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": "视频id"
+    },
+    {
+      "name": "feature",
+      "type": "JSON",
+      "comment": "json格式的特征组合"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1017832518,
+      "name": "02_用户连续分享CF"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.user_share_log_flow"
+  ]
+}

+ 151 - 0
production_code/loghubods.alg_vid_feature_cfshare.sql

@@ -0,0 +1,151 @@
+-- Task: 02_用户连续分享CF  ID: 1017832518  Type: ODPS_SQL
+--@exclude_input=loghubods.user_share_log_flow
+--odps sql 
+--********************************************************************--
+--author:张博
+--create time:2024-06-06 13:27:20
+--********************************************************************--
+
+-- select * from loghubods.alg_vid_feature_cfshare where dt = "20240606" and hh = "16";
+
+CREATE TABLE IF NOT EXISTS loghubods.alg_vid_feature_cfshare
+(
+   vid STRING COMMENT '视频id'
+   ,feature JSON  COMMENT 'json格式的特征组合'
+   
+)
+COMMENT '视频特征-用户连续分享CF'
+PARTITIONED BY 
+(
+   dt STRING COMMENT '天'
+  ,hh STRING COMMENT '小时'
+
+
+)
+LIFECYCLE 30
+;
+INSERT OVERWRITE TABLE loghubods.alg_vid_feature_cfshare PARTITION (dt = '${dt}',hh = '${hh}')
+
+
+WITH t_origin AS 
+(
+    -- 一次曝光的多次分享,只保留最早的一次。
+    SELECT  apptype
+            ,mid
+            ,vid
+            ,pagesource
+            ,subsessionid
+            ,shareid
+            ,MIN(clienttimestamp) AS ts
+            ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) AS ts_now
+            ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - MIN(clienttimestamp) / 1000 AS ts_diff
+    FROM    (
+                SELECT  __topic__
+                        ,eventinfos
+                        ,apptype
+                        ,clickobjectid
+                        ,shareobjectid AS vid
+                        ,machinecode AS mid
+                        ,clienttimestamp
+                        ,pagesource
+                        ,parentpagesource
+                        ,parentrootpagesource
+                        ,shareid
+                        ,rootshareid
+                        ,subsessionid
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * ${hours_early}),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+                AND     __topic__ IN ('share')
+                AND     apptype NOT IN ('12')
+                AND     apptype IS NOT NULL
+                AND     shareobjectid IS NOT NULL
+                AND     machinecode IS NOT NULL
+                AND     machinecode != ""
+                AND     clienttimestamp IS NOT NULL
+            ) 
+    GROUP BY apptype
+             ,mid
+             ,vid
+             ,pagesource
+             ,subsessionid
+             ,shareid
+) -- SELECT  *
+-- FROM    t_share_
+-- limit 999
+-- ;
+-- SELECT  vid2vid_cf_py("14824141:1,21175605:2,21175605:3,21256335:0")
+,t_cf AS 
+(
+    SELECT  loghubods.vid2vid_cf_py(vid_ts) AS (cf_a,cf_b)
+    FROM    (
+                SELECT  mid
+                        ,CONCAT_WS(',',COLLECT_SET(CONCAT(vid,":",CAST(ts / 1000 AS BIGINT)))) AS vid_ts
+                        ,COUNT(1) AS cnt
+                FROM    (
+                            SELECT  mid
+                                    ,vid
+                                    ,ts
+                            FROM    t_origin
+                            WHERE   mid IS NOT NULL
+                            AND     mid <> ""
+                            AND     vid IS NOT NULL
+                            AND     vid <> "0"
+                            AND     vid <> ""
+                            AND     ts IS NOT NULL
+                        ) 
+                GROUP BY mid
+                HAVING  COUNT(1) > 1
+                AND     SIZE(COLLECT_SET(vid)) > 1
+            ) 
+) -- SELECT  *
+-- FROM    t_cf
+-- ;
+,t_score AS 
+(
+    SELECT  a.cf_a
+            ,a.cf_b
+            ,a.cnt AS cnt_ab
+            ,b.cnt AS cnt_a
+            ,ROUND(a.cnt / b.cnt,6) AS score
+            ,ROW_NUMBER() OVER (PARTITION BY a.cf_a ORDER BY a.cnt / b.cnt DESC ) AS rank
+    FROM    (
+                SELECT  cf_a
+                        ,cf_b
+                        ,COUNT(1) AS cnt
+                FROM    t_cf
+                GROUP BY cf_a
+                         ,cf_b
+            ) a
+    JOIN    (
+                SELECT  cf_a
+                        ,COUNT(1) AS cnt
+                FROM    t_cf
+                GROUP BY cf_a
+                ORDER BY COUNT(1) DESC
+                LIMIT   ${key_limit}
+            ) b
+    ON      a.cf_a = b.cf_a
+)
+,t_recall AS 
+(
+    SELECT  cf_a AS vid -- ,CONCAT_WS(',',COLLECT_LIST(cf_b)) AS videoid_arr
+    -- ,CONCAT_WS(',',COLLECT_LIST(CAST(score AS STRING))) AS score_arr
+    -- ,CONCAT_WS(',',COLLECT_LIST(CAST(cnt_ab AS STRING))) AS cnt_ab_arr
+    -- ,CONCAT_WS(',',COLLECT_LIST(CAST(cnt_a AS STRING))) AS cnt_a_arr
+            ,JSON_OBJECT("videoid_arr",CONCAT_WS(',',COLLECT_LIST(cf_b)),"score_arr",CONCAT_WS(',',COLLECT_LIST(CAST(score AS STRING))),"cnt_ab_arr",CONCAT_WS(',',COLLECT_LIST(CAST(cnt_ab AS STRING))),"cnt_a_arr",CONCAT_WS(',',COLLECT_LIST(CAST(cnt_a AS STRING)))) AS feature
+    FROM    (
+                SELECT  cf_a
+                        ,cf_b
+                        ,cnt_ab
+                        ,cnt_a
+                        ,score
+                        ,rank
+                FROM    t_score
+                WHERE   rank <= ${rank_limit}
+                ORDER BY rank ASC,CAST(cf_b AS BIGINT) DESC
+            ) 
+    GROUP BY cf_a
+    ORDER BY SUM(cnt_a) DESC
+)
+SELECT  *
+FROM    t_recall

+ 48 - 0
production_code/loghubods.alg_vid_global_feature_20250212.json

@@ -0,0 +1,48 @@
+{
+  "name": "alg_vid_global_feature_20250212",
+  "project": "loghubods",
+  "comment": "00_全局视频特征",
+  "columns": [
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": "视频id"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023781563,
+      "name": "01_全局视频特征_20250211"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

+ 219 - 0
production_code/loghubods.alg_vid_global_feature_20250212.sql

@@ -0,0 +1,219 @@
+-- Task: 01_全局视频特征_20250211  ID: 1023781563  Type: ODPS_SQL
+CREATE TABLE IF NOT EXISTS loghubods.alg_vid_global_feature_20250212
+(
+    vid      STRING COMMENT '视频id'
+    ,feature STRING COMMENT '特征JSON'
+)
+COMMENT '00_全局视频特征'
+PARTITIONED BY 
+(
+    dt       STRING COMMENT '天'
+    ,hh      STRING COMMENT '小时'
+)
+LIFECYCLE 30
+;
+
+INSERT OVERWRITE TABLE loghubods.alg_vid_global_feature_20250212 PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_base AS 
+(
+    SELECT  vid
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,return_1_uv
+            ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) AS ts_now
+            ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - CAST(ts AS BIGINT) AS ts_diff
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 24 * 7),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+    AND     apptype NOT IN ("12")
+)
+,t_agg AS 
+(
+    SELECT  vid
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN 1 ELSE 0 END) AS exp_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN 1 ELSE 0 END) AS exp_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN 1 ELSE 0 END) AS exp_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN 1 ELSE 0 END) AS exp_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN 1 ELSE 0 END) AS exp_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN 1 ELSE 0 END) AS exp_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN 1 ELSE 0 END) AS exp_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN is_share ELSE 0 END) AS is_share_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN is_share ELSE 0 END) AS is_share_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN is_share ELSE 0 END) AS is_share_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN is_share ELSE 0 END) AS is_share_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN is_share ELSE 0 END) AS is_share_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN is_share ELSE 0 END) AS is_share_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN is_share ELSE 0 END) AS is_share_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN share_cnt ELSE 0 END) AS share_cnt_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN share_cnt ELSE 0 END) AS share_cnt_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN share_cnt ELSE 0 END) AS share_cnt_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN share_cnt ELSE 0 END) AS share_cnt_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN share_cnt ELSE 0 END) AS share_cnt_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN share_cnt ELSE 0 END) AS share_cnt_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN share_cnt ELSE 0 END) AS share_cnt_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN is_return_1 ELSE 0 END) AS is_return_1_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN is_return_1 ELSE 0 END) AS is_return_1_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN is_return_1 ELSE 0 END) AS is_return_1_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN is_return_1 ELSE 0 END) AS is_return_1_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN is_return_1 ELSE 0 END) AS is_return_1_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN is_return_1 ELSE 0 END) AS is_return_1_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN is_return_1 ELSE 0 END) AS is_return_1_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN return_1_uv ELSE 0 END) AS return_1_uv_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN return_1_uv ELSE 0 END) AS return_1_uv_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN return_1_uv ELSE 0 END) AS return_1_uv_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN return_1_uv ELSE 0 END) AS return_1_uv_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN return_1_uv ELSE 0 END) AS return_1_uv_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN return_1_uv ELSE 0 END) AS return_1_uv_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN return_1_uv ELSE 0 END) AS return_1_uv_168h
+    FROM    t_base
+    WHERE   ts_diff >= 0
+    GROUP BY vid
+)
+,t_index AS 
+(
+    SELECT  *
+            ,ROUND(COALESCE(is_share_1h / exp_1h,0),6) AS str_one_1h
+            ,ROUND(COALESCE(return_1_uv_1h / is_share_1h,0),6) AS ros_one_1h
+            ,ROUND(COALESCE(share_cnt_1h / exp_1h,0),6) AS str_1h
+            ,ROUND(COALESCE(return_1_uv_1h / share_cnt_1h,0),6) AS ros_1h
+            ,ROUND(COALESCE(is_return_1_1h / exp_1h,0),6) AS str_plus_1h
+            ,ROUND(COALESCE(return_1_uv_1h / is_return_1_1h,0),6) AS ros_minus_1h
+            ,ROUND(COALESCE(return_1_uv_1h / exp_1h,0),6) AS rovn_1h
+            ,ROUND(COALESCE(is_share_3h / exp_3h,0),6) AS str_one_3h
+            ,ROUND(COALESCE(return_1_uv_3h / is_share_3h,0),6) AS ros_one_3h
+            ,ROUND(COALESCE(share_cnt_3h / exp_3h,0),6) AS str_3h
+            ,ROUND(COALESCE(return_1_uv_3h / share_cnt_3h,0),6) AS ros_3h
+            ,ROUND(COALESCE(is_return_1_3h / exp_3h,0),6) AS str_plus_3h
+            ,ROUND(COALESCE(return_1_uv_3h / is_return_1_3h,0),6) AS ros_minus_3h
+            ,ROUND(COALESCE(return_1_uv_3h / exp_3h,0),6) AS rovn_3h
+            ,ROUND(COALESCE(is_share_6h / exp_6h,0),6) AS str_one_6h
+            ,ROUND(COALESCE(return_1_uv_6h / is_share_6h,0),6) AS ros_one_6h
+            ,ROUND(COALESCE(share_cnt_6h / exp_6h,0),6) AS str_6h
+            ,ROUND(COALESCE(return_1_uv_6h / share_cnt_6h,0),6) AS ros_6h
+            ,ROUND(COALESCE(is_return_1_6h / exp_6h,0),6) AS str_plus_6h
+            ,ROUND(COALESCE(return_1_uv_6h / is_return_1_6h,0),6) AS ros_minus_6h
+            ,ROUND(COALESCE(return_1_uv_6h / exp_6h,0),6) AS rovn_6h
+            ,ROUND(COALESCE(is_share_12h / exp_12h,0),6) AS str_one_12h
+            ,ROUND(COALESCE(return_1_uv_12h / is_share_12h,0),6) AS ros_one_12h
+            ,ROUND(COALESCE(share_cnt_12h / exp_12h,0),6) AS str_12h
+            ,ROUND(COALESCE(return_1_uv_12h / share_cnt_12h,0),6) AS ros_12h
+            ,ROUND(COALESCE(is_return_1_12h / exp_12h,0),6) AS str_plus_12h
+            ,ROUND(COALESCE(return_1_uv_12h / is_return_1_12h,0),6) AS ros_minus_12h
+            ,ROUND(COALESCE(return_1_uv_12h / exp_12h,0),6) AS rovn_12h
+            ,ROUND(COALESCE(is_share_24h / exp_24h,0),6) AS str_one_24h
+            ,ROUND(COALESCE(return_1_uv_24h / is_share_24h,0),6) AS ros_one_24h
+            ,ROUND(COALESCE(share_cnt_24h / exp_24h,0),6) AS str_24h
+            ,ROUND(COALESCE(return_1_uv_24h / share_cnt_24h,0),6) AS ros_24h
+            ,ROUND(COALESCE(is_return_1_24h / exp_24h,0),6) AS str_plus_24h
+            ,ROUND(COALESCE(return_1_uv_24h / is_return_1_24h,0),6) AS ros_minus_24h
+            ,ROUND(COALESCE(return_1_uv_24h / exp_24h,0),6) AS rovn_24h
+            ,ROUND(COALESCE(is_share_72h / exp_72h,0),6) AS str_one_72h
+            ,ROUND(COALESCE(return_1_uv_72h / is_share_72h,0),6) AS ros_one_72h
+            ,ROUND(COALESCE(share_cnt_72h / exp_72h,0),6) AS str_72h
+            ,ROUND(COALESCE(return_1_uv_72h / share_cnt_72h,0),6) AS ros_72h
+            ,ROUND(COALESCE(is_return_1_72h / exp_72h,0),6) AS str_plus_72h
+            ,ROUND(COALESCE(return_1_uv_72h / is_return_1_72h,0),6) AS ros_minus_72h
+            ,ROUND(COALESCE(return_1_uv_72h / exp_72h,0),6) AS rovn_72h
+            ,ROUND(COALESCE(is_share_168h / exp_168h,0),6) AS str_one_168h
+            ,ROUND(COALESCE(return_1_uv_168h / is_share_168h,0),6) AS ros_one_168h
+            ,ROUND(COALESCE(share_cnt_168h / exp_168h,0),6) AS str_168h
+            ,ROUND(COALESCE(return_1_uv_168h / share_cnt_168h,0),6) AS ros_168h
+            ,ROUND(COALESCE(is_return_1_168h / exp_168h,0),6) AS str_plus_168h
+            ,ROUND(COALESCE(return_1_uv_168h / is_return_1_168h,0),6) AS ros_minus_168h
+            ,ROUND(COALESCE(return_1_uv_168h / exp_168h,0),6) AS rovn_168h
+    FROM    t_agg
+) 
+,t_result AS 
+(
+    SELECT  vid
+            ,JSON_FORMAT(JSON_OBJECT(
+                    "exp_1h", CAST(exp_1h AS STRING),
+                    "exp_3h", CAST(exp_3h AS STRING),
+                    "exp_6h", CAST(exp_6h AS STRING),
+                    "exp_12h", CAST(exp_12h AS STRING),
+                    "exp_24h", CAST(exp_24h AS STRING),
+                    "exp_72h", CAST(exp_72h AS STRING),
+                    "exp_168h", CAST(exp_168h AS STRING),
+                    "is_share_1h", CAST(is_share_1h AS STRING),
+                    "is_share_3h", CAST(is_share_3h AS STRING),
+                    "is_share_6h", CAST(is_share_6h AS STRING),
+                    "is_share_12h", CAST(is_share_12h AS STRING),
+                    "is_share_24h", CAST(is_share_24h AS STRING),
+                    "is_share_72h", CAST(is_share_72h AS STRING),
+                    "is_share_168h", CAST(is_share_168h AS STRING),
+                    "share_cnt_1h", CAST(share_cnt_1h AS STRING),
+                    "share_cnt_3h", CAST(share_cnt_3h AS STRING),
+                    "share_cnt_6h", CAST(share_cnt_6h AS STRING),
+                    "share_cnt_12h", CAST(share_cnt_12h AS STRING),
+                    "share_cnt_24h", CAST(share_cnt_24h AS STRING),
+                    "share_cnt_72h", CAST(share_cnt_72h AS STRING),
+                    "share_cnt_168h", CAST(share_cnt_168h AS STRING),
+                    "is_return_1_1h", CAST(is_return_1_1h AS STRING),
+                    "is_return_1_3h", CAST(is_return_1_3h AS STRING),
+                    "is_return_1_6h", CAST(is_return_1_6h AS STRING),
+                    "is_return_1_12h", CAST(is_return_1_12h AS STRING),
+                    "is_return_1_24h", CAST(is_return_1_24h AS STRING),
+                    "is_return_1_72h", CAST(is_return_1_72h AS STRING),
+                    "is_return_1_168h", CAST(is_return_1_168h AS STRING),
+                    "return_1_uv_1h", CAST(return_1_uv_1h AS STRING),
+                    "return_1_uv_3h", CAST(return_1_uv_3h AS STRING),
+                    "return_1_uv_6h", CAST(return_1_uv_6h AS STRING),
+                    "return_1_uv_12h", CAST(return_1_uv_12h AS STRING),
+                    "return_1_uv_24h", CAST(return_1_uv_24h AS STRING),
+                    "return_1_uv_72h", CAST(return_1_uv_72h AS STRING),
+                    "return_1_uv_168h", CAST(return_1_uv_168h AS STRING),
+                    "str_one_1h", CAST(str_one_1h AS STRING),
+                    -- "ros_one_1h", CAST(ros_one_1h AS STRING),
+                    -- "str_1h", CAST(str_1h AS STRING),
+                    -- "ros_1h", CAST(ros_1h AS STRING),
+                    -- "str_plus_1h", CAST(str_plus_1h AS STRING),
+                    -- "ros_minus_1h", CAST(ros_minus_1h AS STRING),
+                    -- "rovn_1h", CAST(rovn_1h AS STRING),
+                    -- "str_one_3h", CAST(str_one_3h AS STRING),
+                    -- "ros_one_3h", CAST(ros_one_3h AS STRING),
+                    -- "str_3h", CAST(str_3h AS STRING),
+                    -- "ros_3h", CAST(ros_3h AS STRING),
+                    -- "str_plus_3h", CAST(str_plus_3h AS STRING),
+                    -- "ros_minus_3h", CAST(ros_minus_3h AS STRING),
+                    -- "rovn_3h", CAST(rovn_3h AS STRING),
+                    -- "str_one_6h", CAST(str_one_6h AS STRING),
+                    -- "ros_one_6h", CAST(ros_one_6h AS STRING),
+                    -- "str_6h", CAST(str_6h AS STRING),
+                    -- "ros_6h", CAST(ros_6h AS STRING),
+                    -- "str_plus_6h", CAST(str_plus_6h AS STRING),
+                    -- "ros_minus_6h", CAST(ros_minus_6h AS STRING),
+                    -- "rovn_6h", CAST(rovn_6h AS STRING),
+                    -- "str_one_12h", CAST(str_one_12h AS STRING),
+                    -- "ros_one_12h", CAST(ros_one_12h AS STRING),
+                    -- "str_12h", CAST(str_12h AS STRING),
+                    -- "ros_12h", CAST(ros_12h AS STRING),
+                    -- "str_plus_12h", CAST(str_plus_12h AS STRING),
+                    -- "ros_minus_12h", CAST(ros_minus_12h AS STRING),
+                    -- "rovn_12h", CAST(rovn_12h AS STRING),
+                    -- "str_one_24h", CAST(str_one_24h AS STRING),
+                    -- "ros_one_24h", CAST(ros_one_24h AS STRING),
+                    -- "str_24h", CAST(str_24h AS STRING),
+                    -- "ros_24h", CAST(ros_24h AS STRING),
+                    -- "str_plus_24h", CAST(str_plus_24h AS STRING),
+                    -- "ros_minus_24h", CAST(ros_minus_24h AS STRING),
+                    -- "rovn_24h", CAST(rovn_24h AS STRING),
+                    -- "str_one_72h", CAST(str_one_72h AS STRING),
+                    -- "ros_one_72h", CAST(ros_one_72h AS STRING),
+                    -- "str_72h", CAST(str_72h AS STRING),
+                    -- "ros_72h", CAST(ros_72h AS STRING),
+                    -- "str_plus_72h", CAST(str_plus_72h AS STRING),
+                    -- "ros_minus_72h", CAST(ros_minus_72h AS STRING),
+                    -- "rovn_72h", CAST(rovn_72h AS STRING),
+                    -- "str_one_168h", CAST(str_one_168h AS STRING),
+                    -- "ros_one_168h", CAST(ros_one_168h AS STRING),
+                    -- "str_168h", CAST(str_168h AS STRING),
+                    -- "ros_168h", CAST(ros_168h AS STRING),
+                    -- "str_plus_168h", CAST(str_plus_168h AS STRING),
+                    -- "ros_minus_168h", CAST(ros_minus_168h AS STRING),
+                    "rovn_168h", CAST(rovn_168h AS STRING)
+            ))
+    FROM    t_index
+)
+SELECT  *
+FROM    t_result
+;

+ 54 - 0
production_code/loghubods.alg_vid_hotsencetype_recommend_exp_feature_20250212.json

@@ -0,0 +1,54 @@
+{
+  "name": "alg_vid_hotsencetype_recommend_exp_feature_20250212",
+  "project": "loghubods",
+  "comment": "06_推荐场景下的视频和热启动场景值特征",
+  "columns": [
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": "视频id"
+    },
+    {
+      "name": "hotsencetype",
+      "type": "STRING",
+      "comment": "热启动场景值【正确的拼写为: hotscenetype】"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023788983,
+      "name": "07_推荐场景下的视频_scenetype特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_basic_info",
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

Разница между файлами не показана из-за своего большого размера
+ 184 - 0
production_code/loghubods.alg_vid_hotsencetype_recommend_exp_feature_20250212.sql


+ 38 - 0
production_code/loghubods.alg_vid_long_period_recommend_exp_feature_20250212.json

@@ -0,0 +1,38 @@
+{
+  "name": "alg_vid_long_period_recommend_exp_feature_20250212",
+  "project": "loghubods",
+  "comment": "11_推荐场景下的视频长周期特征",
+  "columns": [
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": "视频id"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023818581,
+      "name": "12_推荐场景下的视频特征_长周期_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

Разница между файлами не показана из-за своего большого размера
+ 141 - 0
production_code/loghubods.alg_vid_long_period_recommend_exp_feature_20250212.sql


+ 54 - 0
production_code/loghubods.alg_vid_province_recommend_exp_feature_20250212.json

@@ -0,0 +1,54 @@
+{
+  "name": "alg_vid_province_recommend_exp_feature_20250212",
+  "project": "loghubods",
+  "comment": "04_推荐场景下的视频和省份特征",
+  "columns": [
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": "视频id"
+    },
+    {
+      "name": "province",
+      "type": "STRING",
+      "comment": "省份"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023788967,
+      "name": "05_推荐场景下的视频_province特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_basic_info",
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

Разница между файлами не показана из-за своего большого размера
+ 182 - 0
production_code/loghubods.alg_vid_province_recommend_exp_feature_20250212.sql


+ 49 - 0
production_code/loghubods.alg_vid_recommend_exp_feature_20250212.json

@@ -0,0 +1,49 @@
+{
+  "name": "alg_vid_recommend_exp_feature_20250212",
+  "project": "loghubods",
+  "comment": "01_推荐场景下的视频特征",
+  "columns": [
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": "视频id"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023781564,
+      "name": "02_推荐场景下的视频特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_basic_info",
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

+ 276 - 0
production_code/loghubods.alg_vid_recommend_exp_feature_20250212.sql

@@ -0,0 +1,276 @@
+-- Task: 02_推荐场景下的视频特征_20250212  ID: 1023781564  Type: ODPS_SQL
+CREATE TABLE IF NOT EXISTS loghubods.alg_vid_recommend_exp_feature_20250212
+(
+    vid      STRING COMMENT '视频id'
+    ,feature STRING COMMENT '特征JSON'
+)
+COMMENT '01_推荐场景下的视频特征'
+PARTITIONED BY 
+(
+    dt       STRING COMMENT '天'
+    ,hh      STRING COMMENT '小时'
+)
+LIFECYCLE 30
+;
+
+
+INSERT OVERWRITE TABLE loghubods.alg_vid_recommend_exp_feature_20250212 PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_exp AS 
+(
+    SELECT  vid
+            ,apptype
+            ,page
+            ,recommendpagetype
+            ,province
+            ,hotsencetype
+            ,machineinfo_brand AS brand
+            ,is_share
+            ,return_n_uv
+            ,share_cnt
+            ,is_return_1
+            ,new_exposure_cnt
+            ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) AS ts_now
+            ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - CAST(ts AS BIGINT) AS ts_diff
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 24 * 7),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+    AND     (
+                page IN ("详情后沉浸页","详情页")
+                OR      (
+                            page = "回流后沉浸页&内页feed"
+                            AND     recommendpagetype REGEXP "-pages/user-videos-share-recommend-detail$"
+                )
+    )
+    AND     apptype NOT IN ("12")
+)
+,t_basic_info AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  vid
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.channel"),"unknown") AS channel
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.merge_first_level_cate"),"unknown") AS merge_cate1
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.merge_second_level_cate"),"unknown") AS merge_cate2
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.festive_label1"),"unknown") AS festive_label1
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.festive_label2"),"unknown") AS festive_label2
+                        ,ROW_NUMBER() OVER (PARTITION BY vid ) AS rn
+                FROM    loghubods.alg_vid_feature_basic_info
+                WHERE   CONCAT(dt,hh) = "${dt}${hh}"
+            ) 
+    WHERE   rn = 1
+)
+,t_base AS 
+(
+    SELECT  ta.*
+            ,info.channel
+            ,info.merge_cate1
+            ,info.merge_cate2
+            ,info.festive_label1
+            ,info.festive_label2
+    FROM    t_exp ta
+    LEFT JOIN t_basic_info info
+    ON      ta.vid = info.vid
+)
+,t_agg AS 
+(
+    SELECT  vid
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN 1 ELSE 0 END) AS exp_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN 1 ELSE 0 END) AS exp_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN 1 ELSE 0 END) AS exp_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN 1 ELSE 0 END) AS exp_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN 1 ELSE 0 END) AS exp_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN 1 ELSE 0 END) AS exp_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN 1 ELSE 0 END) AS exp_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN is_share ELSE 0 END) AS is_share_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN is_share ELSE 0 END) AS is_share_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN is_share ELSE 0 END) AS is_share_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN is_share ELSE 0 END) AS is_share_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN is_share ELSE 0 END) AS is_share_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN is_share ELSE 0 END) AS is_share_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN is_share ELSE 0 END) AS is_share_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN share_cnt ELSE 0 END) AS share_cnt_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN share_cnt ELSE 0 END) AS share_cnt_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN share_cnt ELSE 0 END) AS share_cnt_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN share_cnt ELSE 0 END) AS share_cnt_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN share_cnt ELSE 0 END) AS share_cnt_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN share_cnt ELSE 0 END) AS share_cnt_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN share_cnt ELSE 0 END) AS share_cnt_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN is_return_1 ELSE 0 END) AS is_return_1_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN is_return_1 ELSE 0 END) AS is_return_1_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN is_return_1 ELSE 0 END) AS is_return_1_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN is_return_1 ELSE 0 END) AS is_return_1_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN is_return_1 ELSE 0 END) AS is_return_1_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN is_return_1 ELSE 0 END) AS is_return_1_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN is_return_1 ELSE 0 END) AS is_return_1_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN return_n_uv ELSE 0 END) AS return_n_uv_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN return_n_uv ELSE 0 END) AS return_n_uv_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN return_n_uv ELSE 0 END) AS return_n_uv_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN return_n_uv ELSE 0 END) AS return_n_uv_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN return_n_uv ELSE 0 END) AS return_n_uv_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN return_n_uv ELSE 0 END) AS return_n_uv_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN return_n_uv ELSE 0 END) AS return_n_uv_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_168h
+    FROM    t_base
+    WHERE   ts_diff >= 0
+    GROUP BY vid
+)
+,t_index AS 
+(
+    SELECT  *
+            ,ROUND(COALESCE(is_share_1h / exp_1h,0),6) AS str_one_1h
+            ,ROUND(COALESCE(return_n_uv_1h / is_share_1h,0),6) AS ros_one_1h
+            ,ROUND(COALESCE(share_cnt_1h / exp_1h,0),6) AS str_1h
+            ,ROUND(COALESCE(return_n_uv_1h / share_cnt_1h,0),6) AS ros_1h
+            ,ROUND(COALESCE(is_return_1_1h / exp_1h,0),6) AS str_plus_1h
+            ,ROUND(COALESCE(return_n_uv_1h / is_return_1_1h,0),6) AS ros_minus_1h
+            ,ROUND(COALESCE(return_n_uv_1h / exp_1h,0),6) AS rovn_1h
+            ,ROUND(COALESCE(is_share_3h / exp_3h,0),6) AS str_one_3h
+            ,ROUND(COALESCE(return_n_uv_3h / is_share_3h,0),6) AS ros_one_3h
+            ,ROUND(COALESCE(share_cnt_3h / exp_3h,0),6) AS str_3h
+            ,ROUND(COALESCE(return_n_uv_3h / share_cnt_3h,0),6) AS ros_3h
+            ,ROUND(COALESCE(is_return_1_3h / exp_3h,0),6) AS str_plus_3h
+            ,ROUND(COALESCE(return_n_uv_3h / is_return_1_3h,0),6) AS ros_minus_3h
+            ,ROUND(COALESCE(return_n_uv_3h / exp_3h,0),6) AS rovn_3h
+            ,ROUND(COALESCE(is_share_6h / exp_6h,0),6) AS str_one_6h
+            ,ROUND(COALESCE(return_n_uv_6h / is_share_6h,0),6) AS ros_one_6h
+            ,ROUND(COALESCE(share_cnt_6h / exp_6h,0),6) AS str_6h
+            ,ROUND(COALESCE(return_n_uv_6h / share_cnt_6h,0),6) AS ros_6h
+            ,ROUND(COALESCE(is_return_1_6h / exp_6h,0),6) AS str_plus_6h
+            ,ROUND(COALESCE(return_n_uv_6h / is_return_1_6h,0),6) AS ros_minus_6h
+            ,ROUND(COALESCE(return_n_uv_6h / exp_6h,0),6) AS rovn_6h
+            ,ROUND(COALESCE(is_share_12h / exp_12h,0),6) AS str_one_12h
+            ,ROUND(COALESCE(return_n_uv_12h / is_share_12h,0),6) AS ros_one_12h
+            ,ROUND(COALESCE(share_cnt_12h / exp_12h,0),6) AS str_12h
+            ,ROUND(COALESCE(return_n_uv_12h / share_cnt_12h,0),6) AS ros_12h
+            ,ROUND(COALESCE(is_return_1_12h / exp_12h,0),6) AS str_plus_12h
+            ,ROUND(COALESCE(return_n_uv_12h / is_return_1_12h,0),6) AS ros_minus_12h
+            ,ROUND(COALESCE(return_n_uv_12h / exp_12h,0),6) AS rovn_12h
+            ,ROUND(COALESCE(is_share_24h / exp_24h,0),6) AS str_one_24h
+            ,ROUND(COALESCE(return_n_uv_24h / is_share_24h,0),6) AS ros_one_24h
+            ,ROUND(COALESCE(share_cnt_24h / exp_24h,0),6) AS str_24h
+            ,ROUND(COALESCE(return_n_uv_24h / share_cnt_24h,0),6) AS ros_24h
+            ,ROUND(COALESCE(is_return_1_24h / exp_24h,0),6) AS str_plus_24h
+            ,ROUND(COALESCE(return_n_uv_24h / is_return_1_24h,0),6) AS ros_minus_24h
+            ,ROUND(COALESCE(return_n_uv_24h / exp_24h,0),6) AS rovn_24h
+            ,ROUND(COALESCE(is_share_72h / exp_72h,0),6) AS str_one_72h
+            ,ROUND(COALESCE(return_n_uv_72h / is_share_72h,0),6) AS ros_one_72h
+            ,ROUND(COALESCE(share_cnt_72h / exp_72h,0),6) AS str_72h
+            ,ROUND(COALESCE(return_n_uv_72h / share_cnt_72h,0),6) AS ros_72h
+            ,ROUND(COALESCE(is_return_1_72h / exp_72h,0),6) AS str_plus_72h
+            ,ROUND(COALESCE(return_n_uv_72h / is_return_1_72h,0),6) AS ros_minus_72h
+            ,ROUND(COALESCE(return_n_uv_72h / exp_72h,0),6) AS rovn_72h
+            ,ROUND(COALESCE(is_share_168h / exp_168h,0),6) AS str_one_168h
+            ,ROUND(COALESCE(return_n_uv_168h / is_share_168h,0),6) AS ros_one_168h
+            ,ROUND(COALESCE(share_cnt_168h / exp_168h,0),6) AS str_168h
+            ,ROUND(COALESCE(return_n_uv_168h / share_cnt_168h,0),6) AS ros_168h
+            ,ROUND(COALESCE(is_return_1_168h / exp_168h,0),6) AS str_plus_168h
+            ,ROUND(COALESCE(return_n_uv_168h / is_return_1_168h,0),6) AS ros_minus_168h
+            ,ROUND(COALESCE(return_n_uv_168h / exp_168h,0),6) AS rovn_168h
+    FROM    t_agg
+) 
+,t_result AS 
+(
+    SELECT  vid
+            ,JSON_FORMAT(JSON_OBJECT(
+                    "exp_1h", CAST(exp_1h AS STRING),
+                    "exp_3h", CAST(exp_3h AS STRING),
+                    "exp_6h", CAST(exp_6h AS STRING),
+                    "exp_12h", CAST(exp_12h AS STRING),
+                    "exp_24h", CAST(exp_24h AS STRING),
+                    "exp_72h", CAST(exp_72h AS STRING),
+                    "exp_168h", CAST(exp_168h AS STRING),
+                    "is_share_1h", CAST(is_share_1h AS STRING),
+                    "is_share_3h", CAST(is_share_3h AS STRING),
+                    "is_share_6h", CAST(is_share_6h AS STRING),
+                    "is_share_12h", CAST(is_share_12h AS STRING),
+                    "is_share_24h", CAST(is_share_24h AS STRING),
+                    "is_share_72h", CAST(is_share_72h AS STRING),
+                    "is_share_168h", CAST(is_share_168h AS STRING),
+                    "share_cnt_1h", CAST(share_cnt_1h AS STRING),
+                    "share_cnt_3h", CAST(share_cnt_3h AS STRING),
+                    "share_cnt_6h", CAST(share_cnt_6h AS STRING),
+                    "share_cnt_12h", CAST(share_cnt_12h AS STRING),
+                    "share_cnt_24h", CAST(share_cnt_24h AS STRING),
+                    "share_cnt_72h", CAST(share_cnt_72h AS STRING),
+                    "share_cnt_168h", CAST(share_cnt_168h AS STRING),
+                    "is_return_1_1h", CAST(is_return_1_1h AS STRING),
+                    "is_return_1_3h", CAST(is_return_1_3h AS STRING),
+                    "is_return_1_6h", CAST(is_return_1_6h AS STRING),
+                    "is_return_1_12h", CAST(is_return_1_12h AS STRING),
+                    "is_return_1_24h", CAST(is_return_1_24h AS STRING),
+                    "is_return_1_72h", CAST(is_return_1_72h AS STRING),
+                    "is_return_1_168h", CAST(is_return_1_168h AS STRING),
+                    "return_n_uv_1h", CAST(return_n_uv_1h AS STRING),
+                    "return_n_uv_3h", CAST(return_n_uv_3h AS STRING),
+                    "return_n_uv_6h", CAST(return_n_uv_6h AS STRING),
+                    "return_n_uv_12h", CAST(return_n_uv_12h AS STRING),
+                    "return_n_uv_24h", CAST(return_n_uv_24h AS STRING),
+                    "return_n_uv_72h", CAST(return_n_uv_72h AS STRING),
+                    "return_n_uv_168h", CAST(return_n_uv_168h AS STRING),
+                    "new_exposure_cnt_1h", CAST(new_exposure_cnt_1h AS STRING),
+                    "new_exposure_cnt_3h", CAST(new_exposure_cnt_3h AS STRING),
+                    "new_exposure_cnt_6h", CAST(new_exposure_cnt_6h AS STRING),
+                    "new_exposure_cnt_12h", CAST(new_exposure_cnt_12h AS STRING),
+                    "new_exposure_cnt_24h", CAST(new_exposure_cnt_24h AS STRING),
+                    "new_exposure_cnt_72h", CAST(new_exposure_cnt_72h AS STRING),
+                    "new_exposure_cnt_168h", CAST(new_exposure_cnt_168h AS STRING),
+                    "str_one_1h", CAST(str_one_1h AS STRING),
+                    -- "ros_one_1h", CAST(ros_one_1h AS STRING),
+                    -- "str_1h", CAST(str_1h AS STRING),
+                    -- "ros_1h", CAST(ros_1h AS STRING),
+                    -- "str_plus_1h", CAST(str_plus_1h AS STRING),
+                    -- "ros_minus_1h", CAST(ros_minus_1h AS STRING),
+                    -- "rovn_1h", CAST(rovn_1h AS STRING),
+                    -- "str_one_3h", CAST(str_one_3h AS STRING),
+                    -- "ros_one_3h", CAST(ros_one_3h AS STRING),
+                    -- "str_3h", CAST(str_3h AS STRING),
+                    -- "ros_3h", CAST(ros_3h AS STRING),
+                    -- "str_plus_3h", CAST(str_plus_3h AS STRING),
+                    -- "ros_minus_3h", CAST(ros_minus_3h AS STRING),
+                    -- "rovn_3h", CAST(rovn_3h AS STRING),
+                    -- "str_one_6h", CAST(str_one_6h AS STRING),
+                    -- "ros_one_6h", CAST(ros_one_6h AS STRING),
+                    -- "str_6h", CAST(str_6h AS STRING),
+                    -- "ros_6h", CAST(ros_6h AS STRING),
+                    -- "str_plus_6h", CAST(str_plus_6h AS STRING),
+                    -- "ros_minus_6h", CAST(ros_minus_6h AS STRING),
+                    -- "rovn_6h", CAST(rovn_6h AS STRING),
+                    -- "str_one_12h", CAST(str_one_12h AS STRING),
+                    -- "ros_one_12h", CAST(ros_one_12h AS STRING),
+                    -- "str_12h", CAST(str_12h AS STRING),
+                    -- "ros_12h", CAST(ros_12h AS STRING),
+                    -- "str_plus_12h", CAST(str_plus_12h AS STRING),
+                    -- "ros_minus_12h", CAST(ros_minus_12h AS STRING),
+                    -- "rovn_12h", CAST(rovn_12h AS STRING),
+                    -- "str_one_24h", CAST(str_one_24h AS STRING),
+                    -- "ros_one_24h", CAST(ros_one_24h AS STRING),
+                    -- "str_24h", CAST(str_24h AS STRING),
+                    -- "ros_24h", CAST(ros_24h AS STRING),
+                    -- "str_plus_24h", CAST(str_plus_24h AS STRING),
+                    -- "ros_minus_24h", CAST(ros_minus_24h AS STRING),
+                    -- "rovn_24h", CAST(rovn_24h AS STRING),
+                    -- "str_one_72h", CAST(str_one_72h AS STRING),
+                    -- "ros_one_72h", CAST(ros_one_72h AS STRING),
+                    -- "str_72h", CAST(str_72h AS STRING),
+                    -- "ros_72h", CAST(ros_72h AS STRING),
+                    -- "str_plus_72h", CAST(str_plus_72h AS STRING),
+                    -- "ros_minus_72h", CAST(ros_minus_72h AS STRING),
+                    -- "rovn_72h", CAST(rovn_72h AS STRING),
+                    -- "str_one_168h", CAST(str_one_168h AS STRING),
+                    -- "ros_one_168h", CAST(ros_one_168h AS STRING),
+                    -- "str_168h", CAST(str_168h AS STRING),
+                    -- "ros_168h", CAST(ros_168h AS STRING),
+                    -- "str_plus_168h", CAST(str_plus_168h AS STRING),
+                    -- "ros_minus_168h", CAST(ros_minus_168h AS STRING),
+                    "rovn_168h", CAST(rovn_168h AS STRING)
+            ))
+    FROM    t_index
+)
+SELECT  *
+FROM    t_result
+;

+ 49 - 0
production_code/loghubods.alg_vid_recommend_flowpool_exp_feature_20250212.json

@@ -0,0 +1,49 @@
+{
+  "name": "alg_vid_recommend_flowpool_exp_feature_20250212",
+  "project": "loghubods",
+  "comment": "02_推荐流量池场景下的视频特征",
+  "columns": [
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": "视频id"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023788945,
+      "name": "03_推荐流量池场景下的视频特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_basic_info",
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

Разница между файлами не показана из-за своего большого размера
+ 180 - 0
production_code/loghubods.alg_vid_recommend_flowpool_exp_feature_20250212.sql


+ 49 - 0
production_code/loghubods.alg_video_unionid_recommend_exp_feature_20250212.json

@@ -0,0 +1,49 @@
+{
+  "name": "alg_video_unionid_recommend_exp_feature_20250212",
+  "project": "loghubods",
+  "comment": "13_推荐场景下唯一ID视频特征",
+  "columns": [
+    {
+      "name": "video_unionid",
+      "type": "STRING",
+      "comment": "视频唯一ID"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023841633,
+      "name": "13_推荐场景下唯一ID视频特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_vid_feature_basic_info",
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

+ 278 - 0
production_code/loghubods.alg_video_unionid_recommend_exp_feature_20250212.sql

@@ -0,0 +1,278 @@
+-- Task: 13_推荐场景下唯一ID视频特征_20250212  ID: 1023841633  Type: ODPS_SQL
+CREATE TABLE IF NOT EXISTS loghubods.alg_video_unionid_recommend_exp_feature_20250212
+(
+    video_unionid  STRING COMMENT '视频唯一ID'
+    ,feature       STRING COMMENT '特征JSON'
+)
+COMMENT '13_推荐场景下唯一ID视频特征'
+PARTITIONED BY 
+(
+    dt       STRING COMMENT '天'
+    ,hh      STRING COMMENT '小时'
+)
+LIFECYCLE 30
+;
+
+
+INSERT OVERWRITE TABLE loghubods.alg_video_unionid_recommend_exp_feature_20250212 PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_exp AS 
+(
+    SELECT  vid
+            ,apptype
+            ,page
+            ,recommendpagetype
+            ,province
+            ,hotsencetype
+            ,machineinfo_brand AS brand
+            ,is_share
+            ,return_n_uv
+            ,share_cnt
+            ,is_return_1
+            ,new_exposure_cnt
+            ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) AS ts_now
+            ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - CAST(ts AS BIGINT) AS ts_diff
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 24 * 7),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+    AND     (
+                page IN ("详情后沉浸页","详情页")
+                OR      (
+                            page = "回流后沉浸页&内页feed"
+                            AND     recommendpagetype REGEXP "-pages/user-videos-share-recommend-detail$"
+                )
+    )
+    AND     apptype NOT IN ("12")
+)
+,t_basic_info AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  vid
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.channel"),"unknown") AS channel
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.merge_first_level_cate"),"unknown") AS merge_cate1
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.merge_second_level_cate"),"unknown") AS merge_cate2
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.festive_label1"),"unknown") AS festive_label1
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.festive_label2"),"unknown") AS festive_label2
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.title_time_w_h_unionid"),"unknown") AS title_time_w_h_unionid
+                        ,ROW_NUMBER() OVER (PARTITION BY vid ) AS rn
+                FROM    loghubods.alg_vid_feature_basic_info
+                WHERE   CONCAT(dt,hh) = "${dt}${hh}"
+            ) 
+    WHERE   rn = 1
+)
+,t_base AS 
+(
+    SELECT  ta.*
+            ,info.channel
+            ,info.merge_cate1
+            ,info.merge_cate2
+            ,info.festive_label1
+            ,info.festive_label2
+            ,info.title_time_w_h_unionid
+    FROM    t_exp ta
+    LEFT JOIN t_basic_info info
+    ON      ta.vid = info.vid
+)
+,t_agg AS 
+(
+    SELECT  title_time_w_h_unionid AS video_unionid
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN 1 ELSE 0 END) AS exp_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN 1 ELSE 0 END) AS exp_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN 1 ELSE 0 END) AS exp_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN 1 ELSE 0 END) AS exp_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN 1 ELSE 0 END) AS exp_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN 1 ELSE 0 END) AS exp_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN 1 ELSE 0 END) AS exp_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN is_share ELSE 0 END) AS is_share_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN is_share ELSE 0 END) AS is_share_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN is_share ELSE 0 END) AS is_share_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN is_share ELSE 0 END) AS is_share_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN is_share ELSE 0 END) AS is_share_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN is_share ELSE 0 END) AS is_share_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN is_share ELSE 0 END) AS is_share_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN share_cnt ELSE 0 END) AS share_cnt_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN share_cnt ELSE 0 END) AS share_cnt_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN share_cnt ELSE 0 END) AS share_cnt_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN share_cnt ELSE 0 END) AS share_cnt_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN share_cnt ELSE 0 END) AS share_cnt_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN share_cnt ELSE 0 END) AS share_cnt_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN share_cnt ELSE 0 END) AS share_cnt_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN is_return_1 ELSE 0 END) AS is_return_1_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN is_return_1 ELSE 0 END) AS is_return_1_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN is_return_1 ELSE 0 END) AS is_return_1_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN is_return_1 ELSE 0 END) AS is_return_1_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN is_return_1 ELSE 0 END) AS is_return_1_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN is_return_1 ELSE 0 END) AS is_return_1_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN is_return_1 ELSE 0 END) AS is_return_1_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN return_n_uv ELSE 0 END) AS return_n_uv_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN return_n_uv ELSE 0 END) AS return_n_uv_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN return_n_uv ELSE 0 END) AS return_n_uv_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN return_n_uv ELSE 0 END) AS return_n_uv_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN return_n_uv ELSE 0 END) AS return_n_uv_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN return_n_uv ELSE 0 END) AS return_n_uv_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN return_n_uv ELSE 0 END) AS return_n_uv_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN new_exposure_cnt ELSE 0 END) AS new_exposure_cnt_168h
+    FROM    t_base
+    WHERE   ts_diff >= 0
+    GROUP BY title_time_w_h_unionid
+)
+,t_index AS 
+(
+    SELECT  *
+            ,ROUND(COALESCE(is_share_1h / exp_1h,0),6) AS str_one_1h
+            ,ROUND(COALESCE(return_n_uv_1h / is_share_1h,0),6) AS ros_one_1h
+            ,ROUND(COALESCE(share_cnt_1h / exp_1h,0),6) AS str_1h
+            ,ROUND(COALESCE(return_n_uv_1h / share_cnt_1h,0),6) AS ros_1h
+            ,ROUND(COALESCE(is_return_1_1h / exp_1h,0),6) AS str_plus_1h
+            ,ROUND(COALESCE(return_n_uv_1h / is_return_1_1h,0),6) AS ros_minus_1h
+            ,ROUND(COALESCE(return_n_uv_1h / exp_1h,0),6) AS rovn_1h
+            ,ROUND(COALESCE(is_share_3h / exp_3h,0),6) AS str_one_3h
+            ,ROUND(COALESCE(return_n_uv_3h / is_share_3h,0),6) AS ros_one_3h
+            ,ROUND(COALESCE(share_cnt_3h / exp_3h,0),6) AS str_3h
+            ,ROUND(COALESCE(return_n_uv_3h / share_cnt_3h,0),6) AS ros_3h
+            ,ROUND(COALESCE(is_return_1_3h / exp_3h,0),6) AS str_plus_3h
+            ,ROUND(COALESCE(return_n_uv_3h / is_return_1_3h,0),6) AS ros_minus_3h
+            ,ROUND(COALESCE(return_n_uv_3h / exp_3h,0),6) AS rovn_3h
+            ,ROUND(COALESCE(is_share_6h / exp_6h,0),6) AS str_one_6h
+            ,ROUND(COALESCE(return_n_uv_6h / is_share_6h,0),6) AS ros_one_6h
+            ,ROUND(COALESCE(share_cnt_6h / exp_6h,0),6) AS str_6h
+            ,ROUND(COALESCE(return_n_uv_6h / share_cnt_6h,0),6) AS ros_6h
+            ,ROUND(COALESCE(is_return_1_6h / exp_6h,0),6) AS str_plus_6h
+            ,ROUND(COALESCE(return_n_uv_6h / is_return_1_6h,0),6) AS ros_minus_6h
+            ,ROUND(COALESCE(return_n_uv_6h / exp_6h,0),6) AS rovn_6h
+            ,ROUND(COALESCE(is_share_12h / exp_12h,0),6) AS str_one_12h
+            ,ROUND(COALESCE(return_n_uv_12h / is_share_12h,0),6) AS ros_one_12h
+            ,ROUND(COALESCE(share_cnt_12h / exp_12h,0),6) AS str_12h
+            ,ROUND(COALESCE(return_n_uv_12h / share_cnt_12h,0),6) AS ros_12h
+            ,ROUND(COALESCE(is_return_1_12h / exp_12h,0),6) AS str_plus_12h
+            ,ROUND(COALESCE(return_n_uv_12h / is_return_1_12h,0),6) AS ros_minus_12h
+            ,ROUND(COALESCE(return_n_uv_12h / exp_12h,0),6) AS rovn_12h
+            ,ROUND(COALESCE(is_share_24h / exp_24h,0),6) AS str_one_24h
+            ,ROUND(COALESCE(return_n_uv_24h / is_share_24h,0),6) AS ros_one_24h
+            ,ROUND(COALESCE(share_cnt_24h / exp_24h,0),6) AS str_24h
+            ,ROUND(COALESCE(return_n_uv_24h / share_cnt_24h,0),6) AS ros_24h
+            ,ROUND(COALESCE(is_return_1_24h / exp_24h,0),6) AS str_plus_24h
+            ,ROUND(COALESCE(return_n_uv_24h / is_return_1_24h,0),6) AS ros_minus_24h
+            ,ROUND(COALESCE(return_n_uv_24h / exp_24h,0),6) AS rovn_24h
+            ,ROUND(COALESCE(is_share_72h / exp_72h,0),6) AS str_one_72h
+            ,ROUND(COALESCE(return_n_uv_72h / is_share_72h,0),6) AS ros_one_72h
+            ,ROUND(COALESCE(share_cnt_72h / exp_72h,0),6) AS str_72h
+            ,ROUND(COALESCE(return_n_uv_72h / share_cnt_72h,0),6) AS ros_72h
+            ,ROUND(COALESCE(is_return_1_72h / exp_72h,0),6) AS str_plus_72h
+            ,ROUND(COALESCE(return_n_uv_72h / is_return_1_72h,0),6) AS ros_minus_72h
+            ,ROUND(COALESCE(return_n_uv_72h / exp_72h,0),6) AS rovn_72h
+            ,ROUND(COALESCE(is_share_168h / exp_168h,0),6) AS str_one_168h
+            ,ROUND(COALESCE(return_n_uv_168h / is_share_168h,0),6) AS ros_one_168h
+            ,ROUND(COALESCE(share_cnt_168h / exp_168h,0),6) AS str_168h
+            ,ROUND(COALESCE(return_n_uv_168h / share_cnt_168h,0),6) AS ros_168h
+            ,ROUND(COALESCE(is_return_1_168h / exp_168h,0),6) AS str_plus_168h
+            ,ROUND(COALESCE(return_n_uv_168h / is_return_1_168h,0),6) AS ros_minus_168h
+            ,ROUND(COALESCE(return_n_uv_168h / exp_168h,0),6) AS rovn_168h
+    FROM    t_agg
+) 
+,t_result AS 
+(
+    SELECT  video_unionid
+            ,JSON_FORMAT(JSON_OBJECT(
+                    "exp_1h", CAST(exp_1h AS STRING),
+                    "exp_3h", CAST(exp_3h AS STRING),
+                    "exp_6h", CAST(exp_6h AS STRING),
+                    "exp_12h", CAST(exp_12h AS STRING),
+                    "exp_24h", CAST(exp_24h AS STRING),
+                    "exp_72h", CAST(exp_72h AS STRING),
+                    "exp_168h", CAST(exp_168h AS STRING),
+                    "is_share_1h", CAST(is_share_1h AS STRING),
+                    "is_share_3h", CAST(is_share_3h AS STRING),
+                    "is_share_6h", CAST(is_share_6h AS STRING),
+                    "is_share_12h", CAST(is_share_12h AS STRING),
+                    "is_share_24h", CAST(is_share_24h AS STRING),
+                    "is_share_72h", CAST(is_share_72h AS STRING),
+                    "is_share_168h", CAST(is_share_168h AS STRING),
+                    "share_cnt_1h", CAST(share_cnt_1h AS STRING),
+                    "share_cnt_3h", CAST(share_cnt_3h AS STRING),
+                    "share_cnt_6h", CAST(share_cnt_6h AS STRING),
+                    "share_cnt_12h", CAST(share_cnt_12h AS STRING),
+                    "share_cnt_24h", CAST(share_cnt_24h AS STRING),
+                    "share_cnt_72h", CAST(share_cnt_72h AS STRING),
+                    "share_cnt_168h", CAST(share_cnt_168h AS STRING),
+                    "is_return_1_1h", CAST(is_return_1_1h AS STRING),
+                    "is_return_1_3h", CAST(is_return_1_3h AS STRING),
+                    "is_return_1_6h", CAST(is_return_1_6h AS STRING),
+                    "is_return_1_12h", CAST(is_return_1_12h AS STRING),
+                    "is_return_1_24h", CAST(is_return_1_24h AS STRING),
+                    "is_return_1_72h", CAST(is_return_1_72h AS STRING),
+                    "is_return_1_168h", CAST(is_return_1_168h AS STRING),
+                    "return_n_uv_1h", CAST(return_n_uv_1h AS STRING),
+                    "return_n_uv_3h", CAST(return_n_uv_3h AS STRING),
+                    "return_n_uv_6h", CAST(return_n_uv_6h AS STRING),
+                    "return_n_uv_12h", CAST(return_n_uv_12h AS STRING),
+                    "return_n_uv_24h", CAST(return_n_uv_24h AS STRING),
+                    "return_n_uv_72h", CAST(return_n_uv_72h AS STRING),
+                    "return_n_uv_168h", CAST(return_n_uv_168h AS STRING),
+                    "new_exposure_cnt_1h", CAST(new_exposure_cnt_1h AS STRING),
+                    "new_exposure_cnt_3h", CAST(new_exposure_cnt_3h AS STRING),
+                    "new_exposure_cnt_6h", CAST(new_exposure_cnt_6h AS STRING),
+                    "new_exposure_cnt_12h", CAST(new_exposure_cnt_12h AS STRING),
+                    "new_exposure_cnt_24h", CAST(new_exposure_cnt_24h AS STRING),
+                    "new_exposure_cnt_72h", CAST(new_exposure_cnt_72h AS STRING),
+                    "new_exposure_cnt_168h", CAST(new_exposure_cnt_168h AS STRING),
+                    "str_one_1h", CAST(str_one_1h AS STRING),
+                    -- "ros_one_1h", CAST(ros_one_1h AS STRING),
+                    -- "str_1h", CAST(str_1h AS STRING),
+                    -- "ros_1h", CAST(ros_1h AS STRING),
+                    -- "str_plus_1h", CAST(str_plus_1h AS STRING),
+                    -- "ros_minus_1h", CAST(ros_minus_1h AS STRING),
+                    -- "rovn_1h", CAST(rovn_1h AS STRING),
+                    -- "str_one_3h", CAST(str_one_3h AS STRING),
+                    -- "ros_one_3h", CAST(ros_one_3h AS STRING),
+                    -- "str_3h", CAST(str_3h AS STRING),
+                    -- "ros_3h", CAST(ros_3h AS STRING),
+                    -- "str_plus_3h", CAST(str_plus_3h AS STRING),
+                    -- "ros_minus_3h", CAST(ros_minus_3h AS STRING),
+                    -- "rovn_3h", CAST(rovn_3h AS STRING),
+                    -- "str_one_6h", CAST(str_one_6h AS STRING),
+                    -- "ros_one_6h", CAST(ros_one_6h AS STRING),
+                    -- "str_6h", CAST(str_6h AS STRING),
+                    -- "ros_6h", CAST(ros_6h AS STRING),
+                    -- "str_plus_6h", CAST(str_plus_6h AS STRING),
+                    -- "ros_minus_6h", CAST(ros_minus_6h AS STRING),
+                    -- "rovn_6h", CAST(rovn_6h AS STRING),
+                    -- "str_one_12h", CAST(str_one_12h AS STRING),
+                    -- "ros_one_12h", CAST(ros_one_12h AS STRING),
+                    -- "str_12h", CAST(str_12h AS STRING),
+                    -- "ros_12h", CAST(ros_12h AS STRING),
+                    -- "str_plus_12h", CAST(str_plus_12h AS STRING),
+                    -- "ros_minus_12h", CAST(ros_minus_12h AS STRING),
+                    -- "rovn_12h", CAST(rovn_12h AS STRING),
+                    -- "str_one_24h", CAST(str_one_24h AS STRING),
+                    -- "ros_one_24h", CAST(ros_one_24h AS STRING),
+                    -- "str_24h", CAST(str_24h AS STRING),
+                    -- "ros_24h", CAST(ros_24h AS STRING),
+                    -- "str_plus_24h", CAST(str_plus_24h AS STRING),
+                    -- "ros_minus_24h", CAST(ros_minus_24h AS STRING),
+                    -- "rovn_24h", CAST(rovn_24h AS STRING),
+                    -- "str_one_72h", CAST(str_one_72h AS STRING),
+                    -- "ros_one_72h", CAST(ros_one_72h AS STRING),
+                    -- "str_72h", CAST(str_72h AS STRING),
+                    -- "ros_72h", CAST(ros_72h AS STRING),
+                    -- "str_plus_72h", CAST(str_plus_72h AS STRING),
+                    -- "ros_minus_72h", CAST(ros_minus_72h AS STRING),
+                    -- "rovn_72h", CAST(rovn_72h AS STRING),
+                    -- "str_one_168h", CAST(str_one_168h AS STRING),
+                    -- "ros_one_168h", CAST(ros_one_168h AS STRING),
+                    -- "str_168h", CAST(str_168h AS STRING),
+                    -- "ros_168h", CAST(ros_168h AS STRING),
+                    -- "str_plus_168h", CAST(str_plus_168h AS STRING),
+                    -- "ros_minus_168h", CAST(ros_minus_168h AS STRING),
+                    "rovn_168h", CAST(rovn_168h AS STRING)
+            ))
+    FROM    t_index
+)
+SELECT  *
+FROM    t_result
+;

+ 46 - 0
production_code/loghubods.changwen_rootsourceid_group_hour.json

@@ -0,0 +1,46 @@
+{
+  "name": "changwen_rootsourceid_group_hour",
+  "project": "loghubods",
+  "comment": "",
+  "columns": [
+    {
+      "name": "id",
+      "type": "BIGINT",
+      "comment": "自增主键"
+    },
+    {
+      "name": "group_name",
+      "type": "STRING",
+      "comment": "分组名称"
+    },
+    {
+      "name": "root_source_id",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "save_timestamp",
+      "type": "BIGINT",
+      "comment": "保存时候的时间,毫秒时间戳"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": ""
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": ""
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1024546173,
+      "name": "changwen_rootsourceid_group_hour"
+    }
+  ],
+  "upstream_tables": []
+}

+ 187 - 0
production_code/loghubods.changwen_rootsourceid_group_hour.sql

@@ -0,0 +1,187 @@
+-- Task: changwen_rootsourceid_group_hour  ID: 1024546173  Type: DI
+{
+	"extend":{
+		"mode":"wizard",
+		"resourceGroup":"S_res_group_42901_1627548651694",
+		"oneStopPageNum":2
+	},
+	"transform":false,
+	"mappingRule":{
+		"sourceType":"mysql",
+		"targetType":"odps",
+		"needDiff":true,
+		"targetTableColumn":[
+			{
+				"name":"id",
+				"comment":"自增主键",
+				"typeDetail":"{\"odpsType\":\"BIGINT\",\"typeName\":\"BIGINT\"}",
+				"type":"BIGINT"
+			},
+			{
+				"name":"group_name",
+				"comment":"分组名称",
+				"typeDetail":"{\"odpsType\":\"STRING\",\"typeName\":\"STRING\"}",
+				"type":"STRING"
+			},
+			{
+				"name":"root_source_id",
+				"comment":"",
+				"typeDetail":"{\"odpsType\":\"STRING\",\"typeName\":\"STRING\"}",
+				"type":"STRING"
+			},
+			{
+				"name":"save_timestamp",
+				"comment":"保存时候的时间,毫秒时间戳",
+				"typeDetail":"{\"odpsType\":\"BIGINT\",\"typeName\":\"BIGINT\"}",
+				"type":"BIGINT"
+			}
+		],
+		"sourceTableColumn":[
+			{
+				"columnType":"-5",
+				"typeSize":"20",
+				"nullable":"0",
+				"columnSize":"20",
+				"name":"id",
+				"comment":"自增主键",
+				"newDigit":"0",
+				"type":"BIGINT",
+				"primaryKey":"true"
+			},
+			{
+				"columnType":"12",
+				"typeSize":"20",
+				"nullable":"1",
+				"columnSize":"20",
+				"name":"group_name",
+				"comment":"分组名称",
+				"newDigit":"0",
+				"type":"VARCHAR"
+			},
+			{
+				"columnType":"12",
+				"typeSize":"100",
+				"nullable":"1",
+				"columnSize":"100",
+				"name":"root_source_id",
+				"comment":"",
+				"newDigit":"0",
+				"type":"VARCHAR"
+			},
+			{
+				"columnType":"-5",
+				"typeSize":"20",
+				"nullable":"1",
+				"columnSize":"20",
+				"name":"save_timestamp",
+				"comment":"保存时候的时间,毫秒时间戳",
+				"newDigit":"0",
+				"type":"BIGINT"
+			}
+		]
+	},
+	"markConfig":{
+		"columnMappingUseCustomRule":false
+	},
+	"type":"job",
+	"version":"2.0",
+	"steps":[
+		{
+			"stepType":"mysql",
+			"copies":1,
+			"parameter":{
+				"envType":1,
+				"datasource":"adplatform",
+				"useSpecialSecret":false,
+				"column":[
+					"id",
+					"group_name",
+					"root_source_id",
+					"save_timestamp"
+				],
+				"tableComment":"",
+				"where":"",
+				"connection":[
+					{
+						"datasource":"adplatform",
+						"table":[
+							"changwen_rootsourceid_group"
+						]
+					}
+				],
+				"splitPk":"id"
+			},
+			"name":"Reader",
+			"gui":{
+				"x":100,
+				"y":100
+			},
+			"category":"reader"
+		},
+		{
+			"stepType":"odps",
+			"copies":1,
+			"parameter":{
+				"partition":"dt=${bizdate}${hh}",
+				"truncate":true,
+				"envType":1,
+				"datasource":"odps_first",
+				"tunnelQuota":"default",
+				"isSupportThreeModel":false,
+				"column":[
+					"id",
+					"group_name",
+					"root_source_id",
+					"save_timestamp"
+				],
+				"emptyAsNull":false,
+				"tableComment":"",
+				"consistencyCommit":false,
+				"table":"changwen_rootsourceid_group_hour"
+			},
+			"name":"Writer",
+			"gui":{
+				"x":100,
+				"y":200
+			},
+			"category":"writer"
+		},
+		{
+			"copies":1,
+			"parameter":{
+				"nodes":[],
+				"edges":[],
+				"groups":[],
+				"version":"2.0"
+			},
+			"name":"Processor",
+			"gui":{
+				"x":100,
+				"y":300
+			},
+			"category":"processor"
+		}
+	],
+	"order":{
+		"hops":[
+			{
+				"from":"Reader",
+				"gui":{
+					"sourceAnchor":1,
+					"targetAnchor":0
+				},
+				"to":"Writer"
+			}
+		]
+	},
+	"setting":{
+		"errorLimit":{
+			"record":""
+		},
+		"locale":"zh_CN",
+		"speed":{
+			"throttle":false,
+			"concurrent":2
+		}
+	}
+}

+ 87 - 0
production_code/loghubods.content_ai_tags_no_dt.json

@@ -0,0 +1,87 @@
+{
+  "name": "content_ai_tags_no_dt",
+  "project": "loghubods",
+  "comment": "AI内容标签",
+  "columns": [
+    {
+      "name": "type",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "videoid",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "title_duration",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "video_path",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tags",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_1",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_name_1",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_score_1",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "tag_2",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_name_2",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_score_2",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "tag_3",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_name_3",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_score_3",
+      "type": "BIGINT",
+      "comment": ""
+    }
+  ],
+  "partition_keys": [],
+  "dataworks_tasks": [
+    {
+      "id": 1021799017,
+      "name": "视频标签_title_druation_无分区"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.video_ai_tags"
+  ]
+}

+ 87 - 0
production_code/loghubods.content_ai_tags_no_dt.sql

@@ -0,0 +1,87 @@
+-- Task: 视频标签_title_druation_无分区  ID: 1021799017  Type: ODPS_SQL
+--odps sql 
+--********************************************************************--
+--author:杜崇宇
+--create time:2024-09-27 15:18:24
+--********************************************************************--
+--重复内容标签
+--DROP TABLE IF EXISTS loghubods.content_ai_tags_no_dt;
+--CREATE TABLE IF NOT EXISTS loghubods.content_ai_tags_no_dt
+--(
+--    type            STRING COMMENT ''
+--    ,videoid        BIGINT COMMENT ''
+--    ,title_duration STRING COMMENT ''
+--    ,video_path     STRING COMMENT ''
+--    ,tags           STRING COMMENT ''
+--    ,tag_1          STRING COMMENT ''
+--    ,tag_name_1     STRING COMMENT ''
+--    ,tag_score_1    BIGINT COMMENT ''
+--    ,tag_2          STRING COMMENT ''
+--    ,tag_name_2     STRING COMMENT ''
+--    ,tag_score_2    BIGINT COMMENT ''
+--    ,tag_3          STRING COMMENT ''
+--    ,tag_name_3     STRING COMMENT ''
+--    ,tag_score_3    BIGINT COMMENT ''
+--)
+--COMMENT 'AI内容标签'
+
+INSERT OVERWRITE TABLE loghubods.content_ai_tags_no_dt 
+SELECT  DISTINCT type
+        ,videoid
+        ,title_duration
+        ,video_path
+        ,tags
+        ,tag_1
+        ,CASE   WHEN INSTR(tag_name_1,'{') > 0 THEN GET_JSON_OBJECT(tag_name_1,'$.品类')  
+                
+                ELSE SUBSTRING_INDEX(tag_name_1,',',1)
+        END AS processed_tag_name_1
+        ,tag_score_1
+        ,tag_2
+        ,CASE   WHEN INSTR(tag_name_2,'{') > 0 THEN GET_JSON_OBJECT(tag_name_2,'$.品类')  
+                ELSE SUBSTRING_INDEX(tag_name_2,',',1)
+        END AS processed_tag_name_2
+        ,tag_score_2
+        ,tag_3
+        ,CASE   WHEN INSTR(tag_name_3,'{') > 0 THEN  GET_JSON_OBJECT(tag_name_3,'$.品类')  
+                ELSE SUBSTRING_INDEX(tag_name_3,',',1)
+        END AS processed_tag_name_3
+        ,tag_score_3
+FROM    (
+            SELECT  DISTINCT type
+                    ,videoid
+                    ,title_duration
+                    ,video_path
+                    ,tags
+                    ,tag_1
+                    ,tag_name_1
+                    ,tag_score_1
+                    ,tag_2
+                    ,tag_name_2
+                    ,tag_score_2
+                    ,tag_3
+                    ,tag_name_3
+                    ,tag_score_3
+            FROM    (
+                        SELECT  type
+                                ,videoid
+                                ,title_duration
+                                ,video_path
+                                ,tags
+                                ,tag_1
+                                ,tag_name_1
+                                ,tag_score_1
+                                ,tag_2
+                                ,tag_name_2
+                                ,tag_score_2
+                                ,tag_3
+                                ,tag_name_3
+                                ,tag_score_3
+                                ,ROW_NUMBER() OVER (PARTITION BY title_duration ORDER BY videoid DESC ) AS rank
+                        FROM    loghubods.video_ai_tags
+                        WHERE   dt <= '${bizdate}${hh}'
+                        AND     type != 'test'
+                    ) 
+            WHERE   rank = 1
+        ) 
+HAVING  processed_tag_name_1 NOT REGEXP ':|:|,|,|{|分数|-|-'

+ 260 - 0
production_code/loghubods.dwd_recsys_alg_exposure_base_20250108.json

@@ -0,0 +1,260 @@
+{
+  "name": "dwd_recsys_alg_exposure_base_20250108",
+  "project": "loghubods",
+  "comment": "推荐算法-labelmatch表-20250108更新最新版",
+  "columns": [
+    {
+      "name": "apptype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "uid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "mid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "sessionid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "subsessionid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "pagesource",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "page",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "recommendlogvo",
+      "type": "STRING",
+      "comment": "推荐算法的返回结果日志存在这个字段中"
+    },
+    {
+      "name": "abcode",
+      "type": "STRING",
+      "comment": "推荐算法的ab分组:ab0"
+    },
+    {
+      "name": "recommendpagetype",
+      "type": "STRING",
+      "comment": "用于区分pagesource相同时某些场景的。三种回流头部;两种下滑-沉浸页下滑和feed下滑。"
+    },
+    {
+      "name": "recomtraceid",
+      "type": "STRING",
+      "comment": "在后端调取推荐服务之前生成。前端降级会空;后端也可能为空。"
+    },
+    {
+      "name": "headvideoid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "rootsourceid",
+      "type": "STRING",
+      "comment": "区分touliu等流量,咨询产品。"
+    },
+    {
+      "name": "hotsencetype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "flowpool",
+      "type": "STRING",
+      "comment": "非流量池,是空字符串。没有null值。"
+    },
+    {
+      "name": "level",
+      "type": "STRING",
+      "comment": "非流量池,是null。"
+    },
+    {
+      "name": "clientip",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "machineinfo_brand",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "machineinfo_model",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "machineinfo_system",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "machineinfo_wechatversion",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "machineinfo_sdkversion",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "province",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "city",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "ts",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "is_share",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "share_cnt",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "is_return_1",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_1_pv",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_1_uv",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_1_mids",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "is_return_n",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_n_pv",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_n_uv",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_n_mids",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "is_return_noself",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_1_uv_noself",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_1_mids_noself",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "is_return_n_noself",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_n_uv_noself",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_n_mids_noself",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "new_exposure_cnt",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "extend",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20240105"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:04"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20240105"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:04"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023041413,
+      "name": "20250108_label_明细表"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.changwen_rootsourceid_group_hour",
+    "loghubods.dwd_recsys_alg_exposure_base_view_20250402",
+    "loghubods.user_share_log_flow"
+  ]
+}

+ 774 - 0
production_code/loghubods.dwd_recsys_alg_exposure_base_20250108.sql

@@ -0,0 +1,774 @@
+-- Task: 20250108_label_明细表  ID: 1023041413  Type: ODPS_SQL
+--@exclude_input=loghubods.video_action_log_flow_new
+--@exclude_input=loghubods.user_share_log_flow
+--*********************
+-- alg_recsys_rank_labelmatch_20250108
+--*********************
+--drop table loghubods.dwd_recsys_alg_exposure_base_20250108;
+CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_base_20250108
+(
+    apptype                    STRING
+    ,uid                       STRING
+    ,mid                       STRING
+    ,vid                       STRING
+    ,sessionid                 STRING
+    ,subsessionid              STRING
+    ,pagesource                STRING
+    ,page                      STRING
+    ,recommendlogvo            STRING COMMENT '推荐算法的返回结果日志存在这个字段中'
+    ,abcode                    STRING COMMENT '推荐算法的ab分组:ab0'
+    ,recommendpagetype         STRING COMMENT '用于区分pagesource相同时某些场景的。三种回流头部;两种下滑-沉浸页下滑和feed下滑。 -pages/user-videos-share-recommend-detail 是沉浸页。'
+    ,recomtraceid              STRING COMMENT '在后端调取推荐服务之前生成。前端降级会空;后端也可能为空。'
+    ,headvideoid               STRING
+    ,rootsourceid              STRING COMMENT '区分touliu等流量,咨询产品。'
+    ,hotsencetype              STRING
+    ,flowpool                  STRING COMMENT '非流量池,是空字符串。没有null值。'
+    ,level                     STRING COMMENT '非流量池,是null。'
+    ,clientip                  STRING
+    ,machineinfo_brand         STRING
+    ,machineinfo_model         STRING
+    ,machineinfo_system        STRING
+    ,machineinfo_wechatversion STRING
+    ,machineinfo_sdkversion    STRING
+    ,province                  STRING
+    ,city                      STRING
+    ,ts                        STRING
+    ,is_share                  STRING
+    ,share_cnt                 STRING
+    ,is_return_1               STRING
+    ,return_1_pv               STRING
+    ,return_1_uv               STRING
+    ,return_1_mids             STRING
+    ,is_return_n               STRING
+    ,return_n_pv               STRING
+    ,return_n_uv               STRING
+    ,return_n_mids             STRING
+    ,is_return_noself          STRING
+    ,return_1_uv_noself        STRING
+    ,return_1_mids_noself      STRING
+    ,is_return_n_noself        STRING
+    ,return_n_uv_noself        STRING
+    ,return_n_mids_noself      STRING
+    ,new_exposure_cnt          STRING
+    ,extend                    STRING
+)
+PARTITIONED BY 
+(
+    dt                         STRING COMMENT '日期:20240105'
+    ,hh                        STRING COMMENT '小时:04'
+)
+STORED AS ALIORC
+TBLPROPERTIES ('comment' = '推荐算法-labelmatch表-20250108更新最新版')
+LIFECYCLE 3650
+;
+
+SET hive.exec.dynamic.partition = true
+;
+
+SET hive.exec.dynamic.partition.mode = nonstrict
+;
+
+SET odps.stage.mapper.split.size = 1024
+;
+
+INSERT OVERWRITE TABLE loghubods.dwd_recsys_alg_exposure_base_20250108 PARTITION (dt,hh)
+WITH t_return AS 
+(
+    SELECT  *
+            ,CONCAT(dthh,":",shareid,":",vid,":",dthh_id) AS id
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,clickobjectid AS vid
+                        ,sessionid
+                        ,subsessionid -- 注意这是回流对应的subsessionid,每次回流点击会重置,可以通过这个字段找到回流的曝光。
+                        ,shareid
+                        ,rootshareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,clickobjectid,sessionid,subsessionid,shareid,rootshareid ORDER BY clienttimestamp DESC ) AS rn
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),shareid,clickobjectid ORDER BY clienttimestamp ) AS dthh_id
+                FROM    loghubods.user_share_log_flow -- 回流行为,理应subsessionid只有一条,但有脏数据,去重。
+                WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') --WHERE   CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                AND     __topic__ = 'click'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12') -- 12的pagesoucre是h5-share和h5-detail 暂时过滤掉 不做处理
+                AND     machinecode IS NOT NULL
+                AND     clickobjectid IS NOT NULL
+                AND     pagesource REGEXP "-pages/user-videos-share$" -- 存在脏数据 vlog-gzh /mine/mine-info$ 结尾的,都过滤掉。
+            ) 
+    WHERE   rn = 1
+)
+,t_share_from_sharelog AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  CONCAT(year,month,day,hour) AS dthh
+                        ,apptype
+                        ,machinecode AS mid
+                        ,shareobjectid AS vid
+                        ,sessionid
+                        ,subsessionid
+                        ,pagesource
+                        ,shareid
+                        ,CAST(clienttimestamp / 1000 AS BIGINT) AS ts
+                        ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,machinecode,shareobjectid,sessionid,subsessionid,pagesource,shareid ORDER BY clienttimestamp DESC ) AS rn
+                FROM    loghubods.user_share_log_flow
+                WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') --WHERE   CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                AND     __topic__ = 'share'
+                AND     apptype IS NOT NULL
+                AND     apptype NOT IN ('12')
+                AND     machinecode IS NOT NULL
+                AND     shareobjectid IS NOT NULL
+            ) 
+    WHERE   rn = 1
+)
+,t_exposure AS 
+(
+    SELECT  dthh_id
+            ,dthh
+            ,apptype
+            ,uid
+            ,mid
+            ,vid
+            ,sessionid
+            ,subsessionid
+            ,rootsessionid_new
+            ,pagesource
+            ,recommendlogvo
+            ,abcode
+            ,recommendpagetype
+            ,recomtraceid
+            ,headvideoid
+            ,rootsourceid
+            ,hotsencetype
+            ,animationscenetype
+            ,JSON_PARSE(IF(JSON_VALID(extparams),extparams,"{}")) AS extParams
+            ,flowpool
+            ,level
+            ,clientip
+            ,machineinfo_brand
+            ,machineinfo_model
+            ,machineinfo_system
+            ,machineinfo_wechatversion
+            ,machineinfo_sdkversion
+            ,province
+            ,city
+            ,versioncode
+            ,ts
+            ,rn
+            ,id
+            ,dt
+            ,hh
+    FROM    loghubods.dwd_recsys_alg_exposure_base_view_20250402
+    WHERE   CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+)
+,t_exposure_recommend AS 
+(
+    SELECT  *
+    FROM    t_exposure
+    WHERE   pagesource REGEXP 'category$|recommend$|-pages/user-videos-detail$'
+)
+,t_return_exposure_1 AS -- 曝光关联回流,用于计算viewh24                                
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.id ORDER BY t2.ts DESC ) AS rn
+                FROM    t_exposure_recommend t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.headvideoid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_return_exposure_2 AS -- 曝光关联回流,用于计算viewh24                                
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.exposure_id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_return_exposure_1
+                            WHERE   return_id IS NULL
+                        ) t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.headvideoid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_return_exposure_3 AS -- 曝光关联回流,用于计算viewh24                                
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.exposure_id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_return_exposure_2
+                            WHERE   return_id IS NULL
+                        ) t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.subsessionid = t2.subsessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_return_exposure_4 AS -- 曝光关联回流,用于计算viewh24                                
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.exposure_id AS exposure_id
+                        ,t1.mid AS mid
+                        ,t1.vid AS vid
+                        ,t1.subsessionid AS subsessionid
+                        ,t1.sessionid AS sessionid
+                        ,t1.headvideoid AS headvideoid
+                        ,t1.dthh
+                        ,t2.id AS return_id
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.exposure_id ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_return_exposure_3
+                            WHERE   return_id IS NULL
+                        ) t1
+                LEFT JOIN t_return t2
+                ON      t1.mid = t2.mid
+                AND     t1.sessionid = t2.sessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_return_exposure AS 
+(
+    SELECT  a.*
+            ,b.exposure_cnt AS new_exposure_cnt
+    FROM    t_return a
+    LEFT JOIN   (
+                    SELECT  return_id
+                            ,COUNT(1) AS exposure_cnt
+                    FROM    (
+                                SELECT  *
+                                FROM    t_return_exposure_1
+                                WHERE   return_id IS NOT NULL
+                                UNION ALL
+                                SELECT  *
+                                FROM    t_return_exposure_2
+                                WHERE   return_id IS NOT NULL
+                                UNION ALL
+                                SELECT  *
+                                FROM    t_return_exposure_3
+                                WHERE   return_id IS NOT NULL
+                                UNION ALL
+                                SELECT  *
+                                FROM    t_return_exposure_4
+                                WHERE   return_id IS NOT NULL
+                            ) 
+                    GROUP BY return_id
+                ) b
+    ON      a.id = b.return_id
+)
+,t_normal_share_exposure_1 AS -- 开始处理常规的分享与曝光关联                                                                                          
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    t_share_from_sharelog t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+                AND     t1.pagesource = t2.pagesource
+                AND     t1.ts >= t2.ts
+                WHERE   t1.pagesource NOT REGEXP "pages/detail-user-videos-share-recommend$"
+            ) 
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_2 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_1
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+                AND     t1.pagesource = t2.pagesource
+                AND     t1.ts >= t2.ts
+            ) 
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_3 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_2
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+                AND     t1.pagesource = t2.pagesource
+            ) 
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_4 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_3
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+                AND     t1.pagesource = t2.pagesource
+            ) 
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_5 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_4
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_normal_share_exposure_6 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_normal_share_exposure_5
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_exposure_detail AS 
+(
+    SELECT  *
+    FROM    t_exposure
+    WHERE   pagesource REGEXP "-pages/user-videos-detail$|pages/detail-recommend$"
+)
+,t_no_normal_share_exposure_1 AS -- 开始处理非常规的分享与曝光关联                                                                                         
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    t_share_from_sharelog t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+                AND     t1.ts >= t2.ts
+                WHERE   t1.pagesource REGEXP "pages/detail-user-videos-share-recommend$"
+            ) 
+    WHERE   rn = 1
+)
+,t_no_normal_share_exposure_2 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_no_normal_share_exposure_1
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+                AND     t1.ts >= t2.ts
+            ) 
+    WHERE   rn = 1
+)
+,t_no_normal_share_exposure_3 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_no_normal_share_exposure_2
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.subsessionid = t2.subsessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_no_normal_share_exposure_4 AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  t1.dthh
+                        ,t1.apptype
+                        ,t1.mid
+                        ,t1.vid
+                        ,t1.sessionid
+                        ,t1.subsessionid
+                        ,t1.pagesource
+                        ,t1.shareid
+                        ,t1.ts
+                        ,t2.id AS exposure_id
+                        ,t2.ts AS exposure_ts
+                        ,ROW_NUMBER() OVER (PARTITION BY t1.dthh,t1.apptype,t1.mid,t1.vid,t1.sessionid,t1.subsessionid,t1.pagesource,t1.shareid ORDER BY t2.ts DESC ) AS rn
+                FROM    (
+                            SELECT  *
+                            FROM    t_no_normal_share_exposure_3
+                            WHERE   exposure_id IS NULL
+                        ) t1
+                LEFT JOIN t_exposure_detail t2
+                ON      t1.apptype = t2.apptype
+                AND     t1.mid = t2.mid
+                AND     t1.vid = t2.vid
+                AND     t1.sessionid = t2.sessionid
+            ) 
+    WHERE   rn = 1
+)
+,t_share_exposure AS 
+(
+    SELECT  *
+    FROM    t_normal_share_exposure_1
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_2
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_3
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_4
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_5
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_normal_share_exposure_6
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_1
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_2
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_3
+    WHERE   exposure_id IS NOT NULL
+    UNION ALL
+    SELECT  *
+    FROM    t_no_normal_share_exposure_4
+)
+,t_share_with_label AS 
+(
+    SELECT  a.dthh
+            ,a.apptype -- join 条件
+            ,a.mid
+            ,a.vid -- join 条件
+            ,a.sessionid
+            ,a.subsessionid
+            ,a.pagesource
+            ,a.shareid -- join 条件
+            ,a.ts
+            ,a.exposure_id
+            ,COALESCE(b.return_1_pv,0) AS return_1_pv
+            ,COALESCE(b.return_1_uv,0) AS return_1_uv
+            ,b.return_1_mids AS return_1_mids -- 可能为null,再决策是否提前处理。
+            ,COALESCE(c.return_n_pv,0) AS return_n_pv
+            ,COALESCE(c.return_n_uv,0) AS return_n_uv
+            ,c.return_n_mids AS return_n_mids -- 可能为null,再决策是否提前处理。
+            ,COALESCE(c.new_exposure_cnt,0) AS new_exposure_cnt
+    FROM    t_share_exposure a
+    LEFT JOIN   (
+                    SELECT  shareid
+                            ,vid
+                            ,apptype
+                            ,COUNT(1) AS return_1_pv
+                            ,COUNT(DISTINCT mid) AS return_1_uv
+                            ,CONCAT_WS(',',COLLECT_SET(mid)) AS return_1_mids
+                    FROM    t_return
+                    GROUP BY shareid
+                             ,vid
+                             ,apptype
+                ) b
+    ON      a.shareid = b.shareid
+    AND     a.vid = b.vid
+    AND     a.apptype = b.apptype
+    LEFT JOIN   (
+                    SELECT  rootshareid
+                            ,vid
+                            ,apptype
+                            ,COUNT(1) AS return_n_pv
+                            ,COUNT(DISTINCT mid) AS return_n_uv
+                            ,CONCAT_WS(',',COLLECT_SET(mid)) AS return_n_mids
+                            ,SUM(new_exposure_cnt) AS new_exposure_cnt
+                    FROM    t_return_exposure
+                    GROUP BY rootshareid
+                             ,vid
+                             ,apptype
+                ) c
+    ON      a.shareid = c.rootshareid
+    AND     a.vid = c.vid
+    AND     a.apptype = c.apptype
+)
+,t_share_with_label_group AS 
+(
+    SELECT  exposure_id
+            ,COUNT(1) AS share_cnt
+            ,SUM(return_1_pv) AS return_1_pv
+            ,COALESCE(SIZE(SPLIT(DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_1_mids))),",")),0) AS return_1_uv
+            ,DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_1_mids))) AS return_1_mids -- 可能是null
+            ,SUM(return_n_pv) AS return_n_pv
+            ,COALESCE(SIZE(SPLIT(DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_n_mids))),",")),0) AS return_n_uv
+            ,DEDUPLICATION4LIST(CONCAT_WS(',',COLLECT_LIST(return_n_mids))) AS return_n_mids -- 可能是null
+            ,SUM(new_exposure_cnt) AS new_exposure_cnt
+    FROM    t_share_with_label
+    GROUP BY exposure_id
+)
+,t_root_source_id_group_name AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  root_source_id
+                        ,group_name
+                        ,ROW_NUMBER() OVER (PARTITION BY root_source_id ) AS rn
+                FROM    loghubods.changwen_rootsourceid_group_hour
+                WHERE   dt = MAX_PT('loghubods.changwen_rootsourceid_group_hour')
+            ) 
+    WHERE   rn = 1
+)
+,t_exposure_share_return AS 
+(
+    SELECT  apptype
+            ,uid
+            ,mid
+            ,vid
+            ,sessionid
+            ,subsessionid
+            ,pagesource
+            ,CASE   WHEN pagesource REGEXP 'pages/user-videos-share-recommend$' THEN '回流后沉浸页&内页feed'
+                    WHEN pagesource REGEXP 'pages/detail-recommend$' THEN '详情后沉浸页'
+                    WHEN pagesource REGEXP 'pages/user-videos-share$' THEN '回流页'
+                    WHEN pagesource REGEXP 'pages/user-videos-detail$' THEN '详情页'
+                    WHEN pagesource REGEXP 'pages/category$' THEN '首页feed'
+                    ELSE '其他'
+            END AS pagesource_new
+            ,recommendlogvo -- 推荐算法的返回结果日志存在这个字段中
+            ,abcode -- 推荐算法的ab分组
+            ,recommendpagetype -- 三种回流头部;两种下滑-沉浸页下滑和feed下滑
+            ,recomtraceid
+            ,headvideoid
+            ,rootsourceid
+            ,hotsencetype
+            ,flowpool -- 14#68#3#1735262438476#2
+            ,level
+            ,clientip
+            ,machineinfo_brand
+            ,machineinfo_model
+            ,machineinfo_system
+            ,machineinfo_wechatversion
+            ,machineinfo_sdkversion
+            ,province
+            ,city
+            ,ts
+            ,IF(COALESCE(share_cnt,0) > 0,1,0) AS is_share
+            ,COALESCE(share_cnt,0) AS share_cnt
+            ,IF(COALESCE(return_1_uv,0) > 0,1,0) AS is_return_1
+            ,COALESCE(return_1_pv,0) AS return_1_pv
+            ,COALESCE(return_1_uv,0) AS return_1_uv
+            ,return_1_mids -- 可能是null
+            ,IF(COALESCE(return_n_pv,0) > 0,1,0) AS is_return_n
+            ,COALESCE(return_n_pv,0) AS return_n_pv
+            ,COALESCE(return_n_uv,0) AS return_n_uv
+            ,return_n_mids -- 可能是null
+            ,IF(COALESCE(COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid)),0),0) > 0,1,0) AS is_return_noself
+            ,COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid)),0) AS return_1_uv_noself
+            ,ARRAY_JOIN(ARRAY_REMOVE(SPLIT(return_1_mids,","),mid),",") AS return_1_mids_noself
+            ,IF(COALESCE(COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid)),0),0) > 0,1,0) AS is_return_n_noself
+            ,COALESCE(SIZE(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid)),0) AS return_n_uv_noself
+            ,ARRAY_JOIN(ARRAY_REMOVE(SPLIT(return_n_mids,","),mid),",") AS return_n_mids_noself
+            ,COALESCE(new_exposure_cnt) AS new_exposure_cnt
+            ,JSON_FORMAT(
+                        JSON_OBJECT("animationSceneType",animationSceneType,"extParams",extParams,"rootsessionid",rootsessionid_new,"versioncode",versioncode,"group_name",tc.group_name)
+            ) AS extend
+            ,SUBSTR(dthh,1,8) AS dt
+            ,SUBSTR(dthh,9,2) AS hh
+    FROM    t_exposure ta
+    LEFT JOIN t_share_with_label_group tb
+    ON      ta.id = tb.exposure_id
+    LEFT JOIN t_root_source_id_group_name tc
+    ON      ta.rootsourceid = tc.root_source_id
+)SELECT  *
+FROM    t_exposure_share_return
+;

+ 203 - 0
production_code/loghubods.dwd_recsys_alg_exposure_base_view_20250402.json

@@ -0,0 +1,203 @@
+{
+  "name": "dwd_recsys_alg_exposure_base_view_20250402",
+  "project": "loghubods",
+  "comment": "推荐算法-labelmatch表-上游曝光表",
+  "columns": [
+    {
+      "name": "dthh_id",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "dthh",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "apptype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "uid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "mid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "sessionid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "subsessionid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "rootsessionid_new",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "pagesource",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "recommendlogvo",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "abcode",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "recommendpagetype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "recomtraceid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "headvideoid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "rootsourceid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "hotsencetype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "animationscenetype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "extparams",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "flowpool",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "level",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "clientip",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "machineinfo_brand",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "machineinfo_model",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "machineinfo_system",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "machineinfo_wechatversion",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "machineinfo_sdkversion",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "province",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "city",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "versioncode",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "ts",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "rn",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "id",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20240105"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:04"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "日期:20240105"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时:04"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1024847760,
+      "name": "label_上游曝光表"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.video_action_log_flow_new"
+  ]
+}

+ 189 - 0
production_code/loghubods.dwd_recsys_alg_exposure_base_view_20250402.sql

@@ -0,0 +1,189 @@
+-- Task: label_上游曝光表  ID: 1024847760  Type: ODPS_SQL
+--odps sql 
+--********************************************************************--
+--author:于卓异
+--create time:2025-04-02 20:48:38
+--********************************************************************--
+--@exclude_input=loghubods.video_action_log_flow_new
+--odps sql 
+--********************************************************************--
+--author:于卓异
+--create time:2025-04-01 10:46:29
+--********************************************************************--
+CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_exposure_base_view_20250402
+(
+        dthh_id string 
+        ,dthh string
+        ,apptype string
+        ,uid string
+        ,mid string
+        ,vid string
+        ,sessionid string
+        ,subsessionid string
+        ,rootsessionid_new string
+        ,pagesource string
+        ,recommendlogvo  string-- 推荐算法的返回结果日志存在这个字段中
+        ,abcode  string-- 推荐算法的ab分组
+        ,recommendpagetype string -- 三种回流头部;两种下滑-沉浸页下滑和feed下滑
+        ,recomtraceid string
+        ,headvideoid string
+        ,rootsourceid string
+        ,hotsencetype string
+        ,animationSceneType string
+        ,extParams string  
+        ,flowpool string-- 14#68#3#1735262438476#2
+        ,level string
+        ,clientip string
+        ,machineinfo_brand string
+        ,machineinfo_model string 
+        ,machineinfo_system string 
+        ,machineinfo_wechatversion string
+        ,machineinfo_sdkversion string
+        ,province string
+        ,city string
+        ,versioncode string
+        , ts string
+        ,rn string
+        ,id  string
+)
+PARTITIONED BY 
+(
+    dt                         STRING COMMENT '日期:20240105'
+    ,hh                        STRING COMMENT '小时:04'
+)
+STORED AS ALIORC
+TBLPROPERTIES ('columnar.nested.type' = 'true','comment' = '推荐算法-labelmatch表-上游曝光表')
+LIFECYCLE 3
+;
+SET hive.exec.dynamic.partition = true
+;
+
+SET hive.exec.dynamic.partition.mode = nonstrict
+;
+
+
+INSERT OVERWRITE TABLE loghubods.dwd_recsys_alg_exposure_base_view_20250402 PARTITION (dt,hh)
+
+
+WITH t_exposure_base AS 
+(
+    SELECT  ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),subsessionid ORDER BY clienttimestamp DESC ) AS dthh_id
+            ,CONCAT(year,month,day,hour) AS dthh
+            ,apptype
+            ,uid
+            ,mid
+            ,videoid AS vid
+            ,sessionid
+            ,subsessionid
+            ,rootsessionid_new
+            ,pagesource
+            ,recommendlogvo -- 推荐算法的返回结果日志存在这个字段中
+            ,COALESCE(GET_JSON_OBJECT(extparams,'$.eventInfos.ab_test003'),"unknown") AS abcode -- 推荐算法的ab分组
+            ,GET_JSON_OBJECT(extparams,'$.recommendPageType') AS recommendpagetype -- 三种回流头部;两种下滑-沉浸页下滑和feed下滑
+            ,GET_JSON_OBJECT(extparams,'$.recomTraceId') AS recomtraceid
+            ,CASE   WHEN GET_JSON_OBJECT(extParams,'$.head_videoid') IS NOT NULL THEN GET_JSON_OBJECT(extParams,'$.head_videoid')
+                    ELSE GET_JSON_OBJECT(extParams,'$.head_videoId')
+            END AS headvideoid
+            ,GET_JSON_OBJECT(extParams,'$.rootSourceId') AS rootsourceid
+            ,COALESCE(hotsencetype,sencetype,"other") AS hotsencetype
+            ,GET_JSON_OBJECT(extParams,'$.animationSceneType') AS animationSceneType
+            ,extParams AS extParams
+            ,flowpool -- 14#68#3#1735262438476#2
+            ,SPLIT(flowpool,'#')[2] AS level
+            ,clientip
+            ,machineinfo_brand
+            ,machineinfo_model
+            ,machineinfo_system
+            ,machineinfo_wechatversion
+            ,machineinfo_sdkversion
+            ,ANALYSISIP(clientip,"region") AS province
+            ,ANALYSISIP(clientip,"city") AS city
+            ,versioncode
+            ,CAST(logtimestamp / 1000 AS BIGINT) AS ts
+            ,ROW_NUMBER() OVER (PARTITION BY CONCAT(year,month,day,hour),apptype,uid,mid,videoid,sessionid,subsessionid,pagesource ORDER BY logtimestamp ) AS rn
+            ,CONCAT(year,month,day) AS dt
+            ,hour AS hh
+    FROM    loghubods.video_action_log_flow_new
+    WHERE   CONCAT(year,month,day,hour) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH') --WHERE   CONCAT(year,month,day,hour) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+    AND     businesstype IN ('videoView')
+    AND     apptype IS NOT NULL
+    AND     apptype NOT IN ('12')
+    AND     mid IS NOT NULL
+    AND     videoid IS NOT NULL
+)
+SELECT  dthh_id
+        ,dthh
+        ,apptype
+        ,uid
+        ,mid
+        ,vid
+        ,sessionid
+        ,subsessionid
+        ,rootsessionid_new
+        ,pagesource
+        ,recommendlogvo
+        ,abcode
+        ,recommendpagetype
+        ,recomtraceid
+        ,headvideoid
+        ,rootsourceid
+        ,hotsencetype
+        ,animationscenetype
+        ,extparams
+        ,flowpool
+        ,level
+        ,clientip
+        ,machineinfo_brand
+        ,machineinfo_model
+        ,machineinfo_system
+        ,machineinfo_wechatversion
+        ,machineinfo_sdkversion
+        ,province
+        ,city
+        ,versioncode
+        ,ts
+        ,rn
+        ,CONCAT(dthh,":",subsessionid,":",dthh_id) AS id
+        ,dt
+        ,hh
+FROM    t_exposure_base
+WHERE   pagesource NOT REGEXP "-pages/user-videos-share$"
+AND     rn = 1
+UNION ALL
+SELECT  dthh_id
+        ,dthh
+        ,apptype
+        ,uid
+        ,mid
+        ,vid
+        ,sessionid
+        ,subsessionid
+        ,rootsessionid_new
+        ,pagesource
+        ,recommendlogvo
+        ,abcode
+        ,recommendpagetype
+        ,recomtraceid
+        ,headvideoid
+        ,rootsourceid
+        ,hotsencetype
+        ,animationscenetype
+        ,extparams
+        ,flowpool
+        ,level
+        ,clientip
+        ,machineinfo_brand
+        ,machineinfo_model
+        ,machineinfo_system
+        ,machineinfo_wechatversion
+        ,machineinfo_sdkversion
+        ,province
+        ,city
+        ,versioncode
+        ,ts
+        ,rn
+        ,CONCAT(dthh,":",subsessionid,":",dthh_id) AS id
+        ,dt
+        ,hh
+FROM    t_exposure_base
+WHERE   pagesource REGEXP "-pages/user-videos-share$"

+ 437 - 0
production_code/loghubods.dwd_recsys_alg_sample_all_20250212.json

@@ -0,0 +1,437 @@
+{
+  "name": "dwd_recsys_alg_sample_all_20250212",
+  "project": "loghubods",
+  "comment": "推荐全量样本表[20250212版]",
+  "columns": [
+    {
+      "name": "apptype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "uid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "mid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "vid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "sessionid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "subsessionid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "pagesource",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "page",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "recommendlogvo",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "abcode",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "recommendpagetype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "recomtraceid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "headvideoid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "rootsourceid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "hotsencetype",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "flowpool",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "level",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "clientip",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "brand",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "model",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "system",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "wechatversion",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "sdkversion",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "province",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "city",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "ts",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "is_share",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "share_cnt",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "is_return_1",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_1_pv",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_1_uv",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_1_mids",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "is_return_n",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_n_pv",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_n_uv",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_n_mids",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "is_return_noself",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_1_uv_noself",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_1_mids_noself",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "is_return_n_noself",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_n_uv_noself",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "return_n_mids_noself",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "new_exposure_cnt",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "extend",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "score",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "extend_alg",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "allfeaturemap",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "metafeaturemap",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "v1_feature",
+      "type": "STRING",
+      "comment": "待排序视频基础信息"
+    },
+    {
+      "name": "v2_feature",
+      "type": "STRING",
+      "comment": "头部视频基础信息"
+    },
+    {
+      "name": "b1_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "b2_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "b3_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "b4_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "b5_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "b6_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "b7_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "b8_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "b9_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "b10_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "b11_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "b12_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "b13_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "c1_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "c2_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "c3_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "c4_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "c5_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "c6_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "c7_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "c8_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "c9_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "d1_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "d2_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "d3_feature",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023840988,
+      "name": "00_推荐特征sample表_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_channel_recommend_exp_feature_20250212",
+    "loghubods.alg_festive_recommend_exp_feature_20250212",
+    "loghubods.alg_merge_cate1_recommend_exp_feature_20250212",
+    "loghubods.alg_merge_cate2_recommend_exp_feature_20250212",
+    "loghubods.alg_mid_feature_return_tags",
+    "loghubods.alg_mid_feature_returncf",
+    "loghubods.alg_mid_feature_share_tags",
+    "loghubods.alg_mid_feature_sharecf",
+    "loghubods.alg_recsys_feature_cf_i2i_v2",
+    "loghubods.alg_recsys_feature_user_share_return_stat",
+    "loghubods.alg_vid_apptype_recommend_exp_feature_20250212",
+    "loghubods.alg_vid_brand_recommend_exp_feature_20250212",
+    "loghubods.alg_vid_feature_basic_info",
+    "loghubods.alg_vid_global_feature_20250212",
+    "loghubods.alg_vid_hotsencetype_recommend_exp_feature_20250212",
+    "loghubods.alg_vid_long_period_recommend_exp_feature_20250212",
+    "loghubods.alg_vid_province_recommend_exp_feature_20250212",
+    "loghubods.alg_vid_recommend_exp_feature_20250212",
+    "loghubods.alg_vid_recommend_flowpool_exp_feature_20250212",
+    "loghubods.alg_video_unionid_recommend_exp_feature_20250212",
+    "loghubods.dwd_recsys_alg_exposure_base_20250108",
+    "loghubods.mid_global_feature_20250212",
+    "loghubods.scene_type_vid_cf_feature_20250212",
+    "loghubods.statistics_log_hour",
+    "loghubods.vid_click_cf_feature_20250212"
+  ]
+}

+ 456 - 0
production_code/loghubods.dwd_recsys_alg_sample_all_20250212.sql

@@ -0,0 +1,456 @@
+-- Task: 00_推荐特征sample表_20250212  ID: 1023840988  Type: ODPS_SQL
+--@exclude_input=loghubods.statistics_log_hour
+CREATE TABLE IF NOT EXISTS loghubods.dwd_recsys_alg_sample_all_20250212
+(
+    apptype               STRING
+    ,uid                  STRING
+    ,mid                  STRING
+    ,vid                  STRING
+    ,sessionid            STRING
+    ,subsessionid         STRING
+    ,pagesource           STRING
+    ,page                 STRING
+    ,recommendlogvo       STRING
+    ,abcode               STRING
+    ,recommendpagetype    STRING
+    ,recomtraceid         STRING
+    ,headvideoid          STRING
+    ,rootsourceid         STRING
+    ,hotsencetype         STRING
+    ,flowpool             STRING
+    ,level                STRING
+    ,clientip             STRING
+    ,brand                STRING
+    ,`model`              STRING
+    ,system               STRING
+    ,wechatversion        STRING
+    ,sdkversion           STRING
+    ,province             STRING
+    ,city                 STRING
+    ,ts                   STRING
+    ,is_share             STRING
+    ,share_cnt            STRING
+    ,is_return_1          STRING
+    ,return_1_pv          STRING
+    ,return_1_uv          STRING
+    ,return_1_mids        STRING
+    ,is_return_n          STRING
+    ,return_n_pv          STRING
+    ,return_n_uv          STRING
+    ,return_n_mids        STRING
+    ,is_return_noself     STRING
+    ,return_1_uv_noself   STRING
+    ,return_1_mids_noself STRING
+    ,is_return_n_noself   STRING
+    ,return_n_uv_noself   STRING
+    ,return_n_mids_noself STRING
+    ,new_exposure_cnt     STRING
+    ,extend               STRING
+    ,score                STRING
+    ,extend_alg           STRING
+    ,allfeaturemap        STRING
+    ,metafeaturemap       STRING
+    ,v1_feature           STRING COMMENT '待排序视频基础信息'
+    ,v2_feature           STRING COMMENT '头部视频基础信息'
+    ,b1_feature           STRING
+    ,b2_feature           STRING
+    ,b3_feature           STRING
+    ,b4_feature           STRING
+    ,b5_feature           STRING
+    ,b6_feature           STRING
+    ,b7_feature           STRING
+    ,b8_feature           STRING
+    ,b9_feature           STRING
+    ,b10_feature          STRING
+    ,b11_feature          STRING
+    ,b12_feature          STRING
+    ,b13_feature          STRING
+    ,c1_feature           STRING
+    ,c2_feature           STRING
+    ,c3_feature           STRING
+    ,c4_feature           STRING
+    ,c5_feature           STRING
+    ,c6_feature           STRING
+    ,c7_feature           STRING
+    ,c8_feature           STRING
+    ,c9_feature           STRING
+    ,d1_feature           STRING
+    ,d2_feature           STRING
+    ,d3_feature           STRING
+)
+COMMENT '推荐全量样本表[20250212版]'
+PARTITIONED BY 
+(
+    dt                    STRING COMMENT '天'
+    ,hh                   STRING COMMENT '小时'
+)
+LIFECYCLE 365
+;
+
+INSERT OVERWRITE TABLE loghubods.dwd_recsys_alg_sample_all_20250212 PARTITION (dt,hh)
+WITH t_exp AS 
+(
+    SELECT  apptype
+            ,uid
+            ,mid
+            ,vid
+            ,CASE   WHEN uid IS NOT NULL
+                        AND LENGTH(uid) > 0
+                        AND uid != 'null' THEN uid
+                    ELSE mid
+            END AS merge_mid
+            ,sessionid
+            ,subsessionid
+            ,pagesource
+            ,page
+            ,recommendlogvo
+            ,abcode
+            ,recommendpagetype
+            ,recomtraceid
+            ,headvideoid
+            ,rootsourceid
+            ,hotsencetype
+            ,flowpool
+            ,level
+            ,clientip
+            ,machineinfo_brand AS brand
+            ,machineinfo_model AS model
+            ,machineinfo_system AS system
+            ,machineinfo_wechatversion AS wechatversion
+            ,machineinfo_sdkversion AS sdkversion
+            ,province
+            ,city
+            ,ts
+            ,is_share
+            ,share_cnt
+            ,is_return_1
+            ,return_1_pv
+            ,return_1_uv
+            ,return_1_mids
+            ,is_return_n
+            ,return_n_pv
+            ,return_n_uv
+            ,return_n_mids
+            ,is_return_noself
+            ,return_1_uv_noself
+            ,return_1_mids_noself
+            ,is_return_n_noself
+            ,return_n_uv_noself
+            ,return_n_mids_noself
+            ,new_exposure_cnt
+            ,extend
+            ,dt
+            ,hh
+            ,CASE   WHEN hotsencetype IN ("1007","1008") THEN hotsencetype
+                    ELSE "other"
+            END hotsencetype_2
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+    AND     apptype NOT IN ("12")
+)
+,t_basic_info AS 
+(
+    SELECT  *
+    FROM    (
+                SELECT  vid
+                        ,JSON_FORMAT(feature) AS feature
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.channel"),"unknown") AS channel
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.merge_first_level_cate"),"unknown") AS merge_cate1
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.merge_second_level_cate"),"unknown") AS merge_cate2
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.festive_label1"),"unknown") AS festive_label1
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.festive_label2"),"unknown") AS festive_label2
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.festive_label2"),"unknown") AS festive
+                        ,COALESCE(GET_JSON_OBJECT(feature,"$.title_time_w_h_unionid"),"unknown") AS title_time_w_h_unionid
+                        ,ROW_NUMBER() OVER (PARTITION BY vid ) AS rn
+                FROM    loghubods.alg_vid_feature_basic_info
+                WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+            ) 
+    WHERE   rn = 1
+)
+,t_result AS 
+(
+    SELECT  ta.apptype AS apptype
+            ,ta.uid AS uid
+            ,ta.mid AS mid
+            ,ta.vid AS vid
+            ,ta.sessionid AS sessionid
+            ,ta.subsessionid AS subsessionid
+            ,ta.pagesource AS pagesource
+            ,ta.page AS page
+            ,ta.recommendlogvo AS recommendlogvo
+            ,ta.abcode AS abcode
+            ,ta.recommendpagetype AS recommendpagetype
+            ,ta.recomtraceid AS recomtraceid
+            ,ta.headvideoid AS headvideoid
+            ,ta.rootsourceid AS rootsourceid
+            ,ta.hotsencetype AS hotsencetype
+            ,ta.flowpool AS flowpool
+            ,ta.level AS level
+            ,ta.clientip AS clientip
+            ,ta.brand AS brand
+            ,ta.model AS model
+            ,ta.system AS system
+            ,ta.wechatversion AS wechatversion
+            ,ta.sdkversion AS sdkversion
+            ,ta.province AS province
+            ,ta.city AS city
+            ,ta.ts AS ts
+            ,ta.is_share AS is_share
+            ,ta.share_cnt AS share_cnt
+            ,ta.is_return_1 AS is_return_1
+            ,ta.return_1_pv AS return_1_pv
+            ,ta.return_1_uv AS return_1_uv
+            ,ta.return_1_mids AS return_1_mids
+            ,ta.is_return_n AS is_return_n
+            ,ta.return_n_pv AS return_n_pv
+            ,ta.return_n_uv AS return_n_uv
+            ,ta.return_n_mids AS return_n_mids
+            ,ta.is_return_noself AS is_return_noself
+            ,ta.return_1_uv_noself AS return_1_uv_noself
+            ,ta.return_1_mids_noself AS return_1_mids_noself
+            ,ta.is_return_n_noself AS is_return_n_noself
+            ,ta.return_n_uv_noself AS return_n_uv_noself
+            ,ta.return_n_mids_noself AS return_n_mids_noself
+            ,ta.new_exposure_cnt AS new_exposure_cnt
+            ,ta.extend AS extend
+            ,e1.score AS score
+            ,e1.extend_alg AS extend_alg
+            ,e1.allfeaturemap AS allfeaturemap
+            ,e1.metafeaturemap AS metafeaturemap
+            ,v1.feature AS v1_feature
+            ,v2.feature AS v2_feature
+            ,b1.feature AS b1_feature
+            ,b2.feature AS b2_feature
+            ,b3.feature AS b3_feature
+            ,b4.feature AS b4_feature
+            ,b5.feature AS b5_feature
+            ,b6.feature AS b6_feature
+            ,b7.feature AS b7_feature
+            ,b8.feature AS b8_feature
+            ,b9.feature AS b9_feature
+            ,b10.feature AS b10_feature
+            ,b11.feature AS b11_feature
+            ,b12.feature AS b12_feature
+            ,b13.feature AS b13_feature
+            ,c1.feature AS c1_feature
+            ,"{}" AS c2_feature
+            ,"{}" AS c3_feature
+            ,"{}" AS c4_feature
+            ,c5.feature AS c5_feature
+            ,c6.feature AS c6_feature
+            ,c7.feature AS c7_feature
+            ,c8.feature AS c8_feature
+            ,c9.feature AS c9_feature
+            ,d1.feature AS d1_feature
+            ,d2.feature AS d2_feature
+            ,d3.feature2 AS d3_feature
+            ,ta.dt AS dt
+            ,ta.hh AS hh
+    FROM    t_exp ta
+    LEFT JOIN t_basic_info v1
+    ON      ta.vid = v1.vid
+    LEFT JOIN t_basic_info v2
+    ON      ta.headvideoid = v2.vid
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_vid_global_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) b1
+    ON      ta.vid = b1.vid
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_vid_recommend_exp_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) b2
+    ON      ta.vid = b2.vid
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_vid_recommend_flowpool_exp_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) b3
+    ON      ta.vid = b3.vid
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_vid_apptype_recommend_exp_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) b4
+    ON      ta.vid = b4.vid
+    AND     ta.apptype = b4.apptype
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_vid_province_recommend_exp_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) b5
+    ON      ta.vid = b5.vid
+    AND     ta.province = b5.province
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_vid_brand_recommend_exp_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) b6
+    ON      ta.vid = b6.vid
+    AND     ta.brand = b6.brand
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_vid_hotsencetype_recommend_exp_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) b7
+    ON      ta.vid = b7.vid
+    AND     ta.hotsencetype_2 = b7.hotsencetype
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_merge_cate1_recommend_exp_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) b8
+    ON      v1.merge_cate1 = b8.merge_cate1
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_merge_cate2_recommend_exp_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) b9
+    ON      v1.merge_cate2 = b9.merge_cate2
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_channel_recommend_exp_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) b10
+    ON      v1.channel = b10.channel
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_festive_recommend_exp_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) b11
+    ON      v1.festive = b11.festive
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_vid_long_period_recommend_exp_feature_20250212
+                    WHERE   dt = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDD')
+                ) b12
+    ON      ta.vid = b12.vid
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_video_unionid_recommend_exp_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) b13
+    ON      v1.title_time_w_h_unionid = b13.video_unionid
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.mid_global_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) c1
+    ON      ta.mid = c1.mid
+    -- LEFT JOIN   (
+    --                 SELECT  *
+    --                 FROM    loghubods.mid_merge_cate1_feature_20250212
+    --                 WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+    --             ) c2
+    -- ON      ta.mid = c2.mid
+    -- AND     v1.merge_cate1 = c2.merge_cate1
+    -- LEFT JOIN   (
+    --                 SELECT  *
+    --                 FROM    loghubods.mid_merge_cate2_feature_20250212
+    --                 WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+    --             ) c3
+    -- ON      ta.mid = c3.mid
+    -- AND     v1.merge_cate2 = c3.merge_cate2
+    -- LEFT JOIN   (
+    --                 SELECT  *
+    --                 FROM    loghubods.mid_u2u_friend_index_feature_20250212
+    --                 WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+    --             ) c4
+    -- ON      ta.mid = c4.mid
+    LEFT JOIN   (
+                    SELECT  mid
+                            ,JSON_FORMAT(feature) AS feature
+                    FROM    loghubods.alg_mid_feature_return_tags
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) c5
+    ON      ta.mid = c5.mid
+    LEFT JOIN   (
+                    SELECT  mid
+                            ,JSON_FORMAT(feature) AS feature
+                    FROM    loghubods.alg_mid_feature_share_tags
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) c6
+    ON      ta.mid = c6.mid
+    LEFT JOIN   (
+                    SELECT  mid
+                            ,JSON_FORMAT(feature) AS feature
+                    FROM    loghubods.alg_mid_feature_sharecf
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) c7
+    ON      ta.mid = c7.mid
+    LEFT JOIN   (
+                    SELECT  mid
+                            ,JSON_FORMAT(feature) AS feature
+                    FROM    loghubods.alg_mid_feature_returncf
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) c8
+    ON      ta.mid = c8.mid
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.alg_recsys_feature_user_share_return_stat
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) c9
+    ON      ta.merge_mid = c9.mid
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.scene_type_vid_cf_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) d1
+    ON      ta.headvideoid = d1.vid_a
+    AND     ta.vid = d1.vid_b
+    AND     ta.hotsencetype = d1.sence_type
+    LEFT JOIN   (
+                    SELECT  *
+                    FROM    loghubods.vid_click_cf_feature_20250212
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) d2
+    ON      ta.headvideoid = d2.vid_a
+    AND     ta.vid = d2.vid_b
+    LEFT JOIN   (
+                    SELECT  *
+                            ,JSON_FORMAT(JSON_OBJECT("exp",exp,"return_n",return_n,"rovn",rovn)) AS feature2
+                    FROM    loghubods.alg_recsys_feature_cf_i2i_v2
+                    WHERE   CONCAT(dt,hh) = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                ) d3
+    ON      ta.headvideoid = d3.vid_a
+    AND     ta.vid = d3.vid_b
+    LEFT JOIN   (
+                    SELECT  videoid
+                            ,recommendtraceid
+                            ,allfeaturemap
+                            ,metafeaturemap
+                            ,score
+                            ,JSON_FORMAT(
+                                        JSON_OBJECT("pushFrom",pushFrom,"relevantVideoId",relevantvideoid,"rovScore",rovscore,"scoreRos",scoreros,"scoreStr",scorestr,"scoresMap",scoresMap,"sortScore",sortScore,"pushfromrank",pushfromrank)
+                            ) AS extend_alg
+                    FROM    (
+                                SELECT  pushfrom
+                                        ,rovscore
+                                        ,score
+                                        ,scoreros
+                                        ,scorestr
+                                        ,JSON_PARSE(scoresmap) AS scoresmap
+                                        ,sortscore
+                                        ,videoid
+                                        ,relevantvideoid
+                                        ,metafeaturemap
+                                        ,allfeaturemap
+                                        ,recommendtraceid
+                                        ,JSON_PARSE(pushfromrank) AS pushfromrank
+                                        ,ROW_NUMBER() OVER (PARTITION BY videoid,recommendtraceid ) AS rn
+                                FROM    loghubods.statistics_log_hour
+                                WHERE   dt = TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 25),'YYYYMMDDHH')
+                                AND     (
+                                            JSON_VALID(scoresmap) = TRUE
+                                            AND      JSON_VALID(pushfromrank) = TRUE
+                                )
+                            ) 
+                    WHERE   rn = 1
+                ) e1
+    ON      ta.vid = e1.videoid
+    AND     ta.recomtraceid = e1.recommendtraceid
+)SELECT  *
+FROM    t_result
+;

+ 48 - 0
production_code/loghubods.mid_global_feature_20250212.json

@@ -0,0 +1,48 @@
+{
+  "name": "mid_global_feature_20250212",
+  "project": "loghubods",
+  "comment": "全部曝光场景下的mid特征",
+  "columns": [
+    {
+      "name": "mid",
+      "type": "STRING",
+      "comment": "mid"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023819179,
+      "name": "01_全局用户特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

+ 283 - 0
production_code/loghubods.mid_global_feature_20250212.sql

@@ -0,0 +1,283 @@
+-- Task: 01_全局用户特征_20250212  ID: 1023819179  Type: ODPS_SQL
+CREATE TABLE IF NOT EXISTS loghubods.mid_global_feature_20250212
+(
+    mid      STRING COMMENT 'mid'
+    ,feature STRING COMMENT '特征JSON'
+)
+COMMENT '全部曝光场景下的mid特征'
+PARTITIONED BY 
+(
+    dt       STRING COMMENT '天'
+    ,hh      STRING COMMENT '小时'
+)
+LIFECYCLE 30
+;
+
+INSERT OVERWRITE TABLE loghubods.mid_global_feature_20250212 PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_exp AS -- 曝光特征    
+(
+    SELECT  vid
+            ,mid
+            ,apptype
+            ,page
+            ,recommendpagetype
+            ,province
+            ,hotsencetype
+            ,machineinfo_brand AS brand
+            ,is_share
+            ,return_n_uv
+            ,return_1_uv
+            ,share_cnt
+            ,is_return_1
+            ,new_exposure_cnt
+            ,GET_JSON_OBJECT(extend, "$.animationSceneType") AS animationSceneType
+            ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) AS ts_now
+            ,UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - CAST(ts AS BIGINT) AS ts_diff
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 24 * 7),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+    AND     apptype NOT IN ("12")
+)
+,t_agg AS 
+(
+    SELECT  mid
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN 1 ELSE 0 END) AS exp_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN 1 ELSE 0 END) AS exp_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN 1 ELSE 0 END) AS exp_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN 1 ELSE 0 END) AS exp_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN 1 ELSE 0 END) AS exp_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN 1 ELSE 0 END) AS exp_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN 1 ELSE 0 END) AS exp_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN is_share ELSE 0 END) AS is_share_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN is_share ELSE 0 END) AS is_share_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN is_share ELSE 0 END) AS is_share_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN is_share ELSE 0 END) AS is_share_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN is_share ELSE 0 END) AS is_share_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN is_share ELSE 0 END) AS is_share_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN is_share ELSE 0 END) AS is_share_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN share_cnt ELSE 0 END) AS share_cnt_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN share_cnt ELSE 0 END) AS share_cnt_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN share_cnt ELSE 0 END) AS share_cnt_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN share_cnt ELSE 0 END) AS share_cnt_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN share_cnt ELSE 0 END) AS share_cnt_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN share_cnt ELSE 0 END) AS share_cnt_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN share_cnt ELSE 0 END) AS share_cnt_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN is_return_1 ELSE 0 END) AS is_return_1_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN is_return_1 ELSE 0 END) AS is_return_1_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN is_return_1 ELSE 0 END) AS is_return_1_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN is_return_1 ELSE 0 END) AS is_return_1_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN is_return_1 ELSE 0 END) AS is_return_1_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN is_return_1 ELSE 0 END) AS is_return_1_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN is_return_1 ELSE 0 END) AS is_return_1_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN return_1_uv ELSE 0 END) AS return_1_uv_1h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN return_1_uv ELSE 0 END) AS return_1_uv_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN return_1_uv ELSE 0 END) AS return_1_uv_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN return_1_uv ELSE 0 END) AS return_1_uv_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN return_1_uv ELSE 0 END) AS return_1_uv_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN return_1_uv ELSE 0 END) AS return_1_uv_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN return_1_uv ELSE 0 END) AS return_1_uv_168h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 1 THEN share_cnt ELSE 0 END) AS share_1h -- 曝光的分享次数当作用户的分享次数
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 3 THEN share_cnt ELSE 0 END) AS share_3h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 6 THEN share_cnt ELSE 0 END) AS share_6h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 12 THEN share_cnt ELSE 0 END) AS share_12h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 24 THEN share_cnt ELSE 0 END) AS share_24h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 72 THEN share_cnt ELSE 0 END) AS share_72h
+            ,SUM(CASE    WHEN ts_diff <= 3600 * 168 THEN share_cnt ELSE 0 END) AS share_168h
+            ,SUM(
+                CASE    WHEN ts_diff <= 3600 * 1
+                            AND page IN ("回流页","详情页")
+                            AND animationSceneType IS NULL THEN 1 ELSE 0 END
+            ) AS click_1h -- 回流页的曝光当作用户的回流点击次数
+            ,SUM(
+                CASE    WHEN ts_diff <= 3600 * 3
+                            AND page IN ("回流页","详情页")
+                            AND animationSceneType IS NULL THEN 1 ELSE 0 END
+            ) AS click_3h
+            ,SUM(
+                CASE    WHEN ts_diff <= 3600 * 6
+                            AND page IN ("回流页","详情页")
+                            AND animationSceneType IS NULL THEN 1 ELSE 0 END
+            ) AS click_6h
+            ,SUM(
+                CASE    WHEN ts_diff <= 3600 * 12
+                            AND page IN ("回流页","详情页")
+                            AND animationSceneType IS NULL THEN 1 ELSE 0 END
+            ) AS click_12h
+            ,SUM(
+                CASE    WHEN ts_diff <= 3600 * 24
+                            AND page IN ("回流页","详情页")
+                            AND animationSceneType IS NULL THEN 1 ELSE 0 END
+            ) AS click_24h
+            ,SUM(
+                CASE    WHEN ts_diff <= 3600 * 72
+                            AND page IN ("回流页","详情页")
+                            AND animationSceneType IS NULL THEN 1 ELSE 0 END
+            ) AS click_72h
+            ,SUM(
+                CASE    WHEN ts_diff <= 3600 * 168
+                            AND page IN ("回流页","详情页")
+                            AND animationSceneType IS NULL THEN 1 ELSE 0 END
+            ) AS click_168h
+    FROM    t_exp
+    WHERE   ts_diff >= 0
+    GROUP BY mid
+)
+,t_index AS 
+(
+    SELECT  *
+            ,ROUND(COALESCE(is_share_1h / exp_1h,0),6) AS str_one_1h
+            ,ROUND(COALESCE(return_1_uv_1h / is_share_1h,0),6) AS ros_one_1h
+            ,ROUND(COALESCE(share_cnt_1h / exp_1h,0),6) AS str_1h
+            ,ROUND(COALESCE(return_1_uv_1h / share_cnt_1h,0),6) AS ros_1h
+            ,ROUND(COALESCE(is_return_1_1h / exp_1h,0),6) AS str_plus_1h
+            ,ROUND(COALESCE(return_1_uv_1h / is_return_1_1h,0),6) AS ros_minus_1h
+            ,ROUND(COALESCE(return_1_uv_1h / exp_1h,0),6) AS rovn_1h
+            ,ROUND(COALESCE(is_share_3h / exp_3h,0),6) AS str_one_3h
+            ,ROUND(COALESCE(return_1_uv_3h / is_share_3h,0),6) AS ros_one_3h
+            ,ROUND(COALESCE(share_cnt_3h / exp_3h,0),6) AS str_3h
+            ,ROUND(COALESCE(return_1_uv_3h / share_cnt_3h,0),6) AS ros_3h
+            ,ROUND(COALESCE(is_return_1_3h / exp_3h,0),6) AS str_plus_3h
+            ,ROUND(COALESCE(return_1_uv_3h / is_return_1_3h,0),6) AS ros_minus_3h
+            ,ROUND(COALESCE(return_1_uv_3h / exp_3h,0),6) AS rovn_3h
+            ,ROUND(COALESCE(is_share_6h / exp_6h,0),6) AS str_one_6h
+            ,ROUND(COALESCE(return_1_uv_6h / is_share_6h,0),6) AS ros_one_6h
+            ,ROUND(COALESCE(share_cnt_6h / exp_6h,0),6) AS str_6h
+            ,ROUND(COALESCE(return_1_uv_6h / share_cnt_6h,0),6) AS ros_6h
+            ,ROUND(COALESCE(is_return_1_6h / exp_6h,0),6) AS str_plus_6h
+            ,ROUND(COALESCE(return_1_uv_6h / is_return_1_6h,0),6) AS ros_minus_6h
+            ,ROUND(COALESCE(return_1_uv_6h / exp_6h,0),6) AS rovn_6h
+            ,ROUND(COALESCE(is_share_12h / exp_12h,0),6) AS str_one_12h
+            ,ROUND(COALESCE(return_1_uv_12h / is_share_12h,0),6) AS ros_one_12h
+            ,ROUND(COALESCE(share_cnt_12h / exp_12h,0),6) AS str_12h
+            ,ROUND(COALESCE(return_1_uv_12h / share_cnt_12h,0),6) AS ros_12h
+            ,ROUND(COALESCE(is_return_1_12h / exp_12h,0),6) AS str_plus_12h
+            ,ROUND(COALESCE(return_1_uv_12h / is_return_1_12h,0),6) AS ros_minus_12h
+            ,ROUND(COALESCE(return_1_uv_12h / exp_12h,0),6) AS rovn_12h
+            ,ROUND(COALESCE(is_share_24h / exp_24h,0),6) AS str_one_24h
+            ,ROUND(COALESCE(return_1_uv_24h / is_share_24h,0),6) AS ros_one_24h
+            ,ROUND(COALESCE(share_cnt_24h / exp_24h,0),6) AS str_24h
+            ,ROUND(COALESCE(return_1_uv_24h / share_cnt_24h,0),6) AS ros_24h
+            ,ROUND(COALESCE(is_return_1_24h / exp_24h,0),6) AS str_plus_24h
+            ,ROUND(COALESCE(return_1_uv_24h / is_return_1_24h,0),6) AS ros_minus_24h
+            ,ROUND(COALESCE(return_1_uv_24h / exp_24h,0),6) AS rovn_24h
+            ,ROUND(COALESCE(is_share_72h / exp_72h,0),6) AS str_one_72h
+            ,ROUND(COALESCE(return_1_uv_72h / is_share_72h,0),6) AS ros_one_72h
+            ,ROUND(COALESCE(share_cnt_72h / exp_72h,0),6) AS str_72h
+            ,ROUND(COALESCE(return_1_uv_72h / share_cnt_72h,0),6) AS ros_72h
+            ,ROUND(COALESCE(is_return_1_72h / exp_72h,0),6) AS str_plus_72h
+            ,ROUND(COALESCE(return_1_uv_72h / is_return_1_72h,0),6) AS ros_minus_72h
+            ,ROUND(COALESCE(return_1_uv_72h / exp_72h,0),6) AS rovn_72h
+            ,ROUND(COALESCE(is_share_168h / exp_168h,0),6) AS str_one_168h
+            ,ROUND(COALESCE(return_1_uv_168h / is_share_168h,0),6) AS ros_one_168h
+            ,ROUND(COALESCE(share_cnt_168h / exp_168h,0),6) AS str_168h
+            ,ROUND(COALESCE(return_1_uv_168h / share_cnt_168h,0),6) AS ros_168h
+            ,ROUND(COALESCE(is_return_1_168h / exp_168h,0),6) AS str_plus_168h
+            ,ROUND(COALESCE(return_1_uv_168h / is_return_1_168h,0),6) AS ros_minus_168h
+            ,ROUND(COALESCE(return_1_uv_168h / exp_168h,0),6) AS rovn_168h
+    FROM    t_agg
+) 
+,t_result AS (
+    SELECT  mid 
+            ,JSON_FORMAT(JSON_OBJECT(
+                "exp_1h", CAST(exp_1h AS STRING),
+                "exp_3h", CAST(exp_3h AS STRING),
+                "exp_6h", CAST(exp_6h AS STRING),
+                "exp_12h", CAST(exp_12h AS STRING),
+                "exp_24h", CAST(exp_24h AS STRING),
+                "exp_72h", CAST(exp_72h AS STRING),
+                "exp_168h", CAST(exp_168h AS STRING),
+                "is_share_1h", CAST(is_share_1h AS STRING),
+                "is_share_3h", CAST(is_share_3h AS STRING),
+                "is_share_6h", CAST(is_share_6h AS STRING),
+                "is_share_12h", CAST(is_share_12h AS STRING),
+                "is_share_24h", CAST(is_share_24h AS STRING),
+                "is_share_72h", CAST(is_share_72h AS STRING),
+                "is_share_168h", CAST(is_share_168h AS STRING),
+                "share_cnt_1h", CAST(share_cnt_1h AS STRING),
+                "share_cnt_3h", CAST(share_cnt_3h AS STRING),
+                "share_cnt_6h", CAST(share_cnt_6h AS STRING),
+                "share_cnt_12h", CAST(share_cnt_12h AS STRING),
+                "share_cnt_24h", CAST(share_cnt_24h AS STRING),
+                "share_cnt_72h", CAST(share_cnt_72h AS STRING),
+                "share_cnt_168h", CAST(share_cnt_168h AS STRING),
+                "is_return_1_1h", CAST(is_return_1_1h AS STRING),
+                "is_return_1_3h", CAST(is_return_1_3h AS STRING),
+                "is_return_1_6h", CAST(is_return_1_6h AS STRING),
+                "is_return_1_12h", CAST(is_return_1_12h AS STRING),
+                "is_return_1_24h", CAST(is_return_1_24h AS STRING),
+                "is_return_1_72h", CAST(is_return_1_72h AS STRING),
+                "is_return_1_168h", CAST(is_return_1_168h AS STRING),
+                "return_1_uv_1h", CAST(return_1_uv_1h AS STRING),
+                "return_1_uv_3h", CAST(return_1_uv_3h AS STRING),
+                "return_1_uv_6h", CAST(return_1_uv_6h AS STRING),
+                "return_1_uv_12h", CAST(return_1_uv_12h AS STRING),
+                "return_1_uv_24h", CAST(return_1_uv_24h AS STRING),
+                "return_1_uv_72h", CAST(return_1_uv_72h AS STRING),
+                "return_1_uv_168h", CAST(return_1_uv_168h AS STRING),
+                "share_1h", CAST(share_1h AS STRING),
+                "share_3h", CAST(share_3h AS STRING),
+                "share_6h", CAST(share_6h AS STRING),
+                "share_12h", CAST(share_12h AS STRING),
+                "share_24h", CAST(share_24h AS STRING),
+                "share_72h", CAST(share_72h AS STRING),
+                "share_168h", CAST(share_168h AS STRING),
+                "click_1h", CAST(click_1h AS STRING),
+                "click_3h", CAST(click_3h AS STRING),
+                "click_6h", CAST(click_6h AS STRING),
+                "click_12h", CAST(click_12h AS STRING),
+                "click_24h", CAST(click_24h AS STRING),
+                "click_72h", CAST(click_72h AS STRING),
+                "click_168h", CAST(click_168h AS STRING),
+                "str_one_1h", CAST(str_one_1h AS STRING),
+                -- "ros_one_1h", CAST(ros_one_1h AS STRING),
+                -- "str_1h", CAST(str_1h AS STRING),
+                -- "ros_1h", CAST(ros_1h AS STRING),
+                -- "str_plus_1h", CAST(str_plus_1h AS STRING),
+                -- "ros_minus_1h", CAST(ros_minus_1h AS STRING),
+                -- "rovn_1h", CAST(rovn_1h AS STRING),
+                -- "str_one_3h", CAST(str_one_3h AS STRING),
+                -- "ros_one_3h", CAST(ros_one_3h AS STRING),
+                -- "str_3h", CAST(str_3h AS STRING),
+                -- "ros_3h", CAST(ros_3h AS STRING),
+                -- "str_plus_3h", CAST(str_plus_3h AS STRING),
+                -- "ros_minus_3h", CAST(ros_minus_3h AS STRING),
+                -- "rovn_3h", CAST(rovn_3h AS STRING),
+                -- "str_one_6h", CAST(str_one_6h AS STRING),
+                -- "ros_one_6h", CAST(ros_one_6h AS STRING),
+                -- "str_6h", CAST(str_6h AS STRING),
+                -- "ros_6h", CAST(ros_6h AS STRING),
+                -- "str_plus_6h", CAST(str_plus_6h AS STRING),
+                -- "ros_minus_6h", CAST(ros_minus_6h AS STRING),
+                -- "rovn_6h", CAST(rovn_6h AS STRING),
+                -- "str_one_12h", CAST(str_one_12h AS STRING),
+                -- "ros_one_12h", CAST(ros_one_12h AS STRING),
+                -- "str_12h", CAST(str_12h AS STRING),
+                -- "ros_12h", CAST(ros_12h AS STRING),
+                -- "str_plus_12h", CAST(str_plus_12h AS STRING),
+                -- "ros_minus_12h", CAST(ros_minus_12h AS STRING),
+                -- "rovn_12h", CAST(rovn_12h AS STRING),
+                -- "str_one_24h", CAST(str_one_24h AS STRING),
+                -- "ros_one_24h", CAST(ros_one_24h AS STRING),
+                -- "str_24h", CAST(str_24h AS STRING),
+                -- "ros_24h", CAST(ros_24h AS STRING),
+                -- "str_plus_24h", CAST(str_plus_24h AS STRING),
+                -- "ros_minus_24h", CAST(ros_minus_24h AS STRING),
+                -- "rovn_24h", CAST(rovn_24h AS STRING),
+                -- "str_one_72h", CAST(str_one_72h AS STRING),
+                -- "ros_one_72h", CAST(ros_one_72h AS STRING),
+                -- "str_72h", CAST(str_72h AS STRING),
+                -- "ros_72h", CAST(ros_72h AS STRING),
+                -- "str_plus_72h", CAST(str_plus_72h AS STRING),
+                -- "ros_minus_72h", CAST(ros_minus_72h AS STRING),
+                -- "rovn_72h", CAST(rovn_72h AS STRING),
+                -- "str_one_168h", CAST(str_one_168h AS STRING),
+                -- "ros_one_168h", CAST(ros_one_168h AS STRING),
+                -- "str_168h", CAST(str_168h AS STRING),
+                -- "ros_168h", CAST(ros_168h AS STRING),
+                -- "str_plus_168h", CAST(str_plus_168h AS STRING),
+                -- "ros_minus_168h", CAST(ros_minus_168h AS STRING),               
+                "rovn_168h", CAST(rovn_168h AS STRING)
+            ))
+    FROM    t_index
+)SELECT  *
+FROM    t_result
+;

+ 41 - 0
production_code/loghubods.operators_channel.json

@@ -0,0 +1,41 @@
+{
+  "name": "operators_channel",
+  "project": "loghubods",
+  "comment": "TABLE COMMENT",
+  "columns": [
+    {
+      "name": "type",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "name",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "uid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "videoid",
+      "type": "STRING",
+      "comment": ""
+    }
+  ],
+  "partition_keys": [],
+  "dataworks_tasks": [
+    {
+      "id": 1007013800,
+      "name": "渠道统计优化"
+    }
+  ],
+  "upstream_tables": [
+    "videoods.user_type_tag_per1h",
+    "videoods.user_type_tag_rel_per1h",
+    "videoods.wx_video_per1h",
+    "videoods.wx_video_tag_per1h",
+    "videoods.wx_video_tag_rel_per1h"
+  ]
+}

+ 478 - 0
production_code/loghubods.operators_channel.sql

@@ -0,0 +1,478 @@
+-- Task: 渠道统计优化  ID: 1007013800  Type: ODPS_SQL
+--odps sql 
+--********************************************************************--
+--author:杜崇宇
+--create time:2022-11-13 03:10:25
+--********************************************************************--
+--CREATE TABLE IF NOT EXISTS loghubods.operators_channel
+--(
+--    type  STRING COMMENT ''
+--    ,name STRING COMMENT ''
+--    ,uid STRING COMMENT ''
+--    ,videoid STRING COMMENT ''
+--)
+--COMMENT 'TABLE COMMENT';
+--select * from operators_channel where videoid='27750839' 
+
+
+INSERT OVERWRITE TABLE operators_channel
+
+WITH user_tags AS 
+(
+    SELECT  a.uid -- 使用 CONCAT_WS 函数将收集到的标签名称用逗号拼接成一个字符串
+            ,CONCAT_WS(',',COLLECT_SET(tag_name)) AS identity_tagname
+    FROM    videoods.wx_video_per1h a
+    LEFT JOIN videoods.user_type_tag_rel_per1h b
+    ON      a.uid = b.uid
+    LEFT JOIN videoods.user_type_tag_per1h c
+    ON      b.tag_id = c.tag_id -- 按用户 ID 进行分组,确保每个用户的标签信息被正确聚合
+    GROUP BY a.uid
+) -- 公共表表达式 excluded_videos,用于找出需要排除的视频 ID
+-- 从 videoods.wx_video_tag_rel_per1h 表中筛选出标签 ID 属于特定集合的视频 ID
+,excluded_videos AS 
+(
+    SELECT  DISTINCT video_id AS id
+    FROM    videoods.wx_video_tag_rel_per1h
+    WHERE   tag_id IN (88467,88530,88468)
+) -- 公共表表达式 autopick_videos,用于找出自动挑选的视频 ID
+-- 从 videoods.wx_video_tag_rel_per1h 表中筛选出标签 ID 属于特定集合的视频 ID
+,autopick_videos AS 
+(
+    SELECT  DISTINCT video_id AS id
+    FROM    videoods.wx_video_tag_rel_per1h
+    WHERE   tag_id IN (88467,88530)
+) -- 公共表表达式 handpick_videos,用于找出手动挑选的视频 ID
+-- 从 videoods.wx_video_tag_rel_per1h 表中筛选出标签 ID 属于特定集合的视频 ID
+,handpick_videos AS 
+(
+    SELECT  DISTINCT video_id AS id
+    FROM    videoods.wx_video_tag_rel_per1h
+    WHERE   tag_id IN (88468)
+) -- 主查询部分,从 user_tags 和 videoods.wx_video_per1h 表中查询数据
+
+
+
+select * from (SELECT 
+    CASE 
+        -- 范军的相关条件
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '公众号爬虫新' AND ut.identity_tagname REGEXP '账号' AND ut.identity_tagname REGEXP '范军' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '机器自动改造' AND ut.identity_tagname REGEXP '范军' AND wv.id NOT IN (SELECT id FROM excluded_videos) THEN '全面spider'
+        WHEN ut.identity_tagname REGEXP '机器自动改造' AND ut.identity_tagname REGEXP '范军' AND wv.id IN (SELECT id FROM autopick_videos) THEN '全面spider-autopick'
+        WHEN ut.identity_tagname REGEXP '机器自动改造' AND ut.identity_tagname REGEXP '范军' AND wv.id IN (SELECT id FROM handpick_videos) THEN '全面spider-handpick'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '小年糕爬虫' AND ut.identity_tagname REGEXP '范军' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '公众新号' AND ut.identity_tagname REGEXP '范军' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '抖音爬虫' AND ut.identity_tagname REGEXP '范军' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '西瓜新爬虫' AND ut.identity_tagname REGEXP '范军' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '快手爬虫' AND ut.identity_tagname REGEXP '范军' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '范军' AND ut.identity_tagname REGEXP '长沙运营引入' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '范军' AND ut.identity_tagname REGEXP '机器制作视频' THEN 'AGC'
+        -- 余海涛的相关条件
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '看一看爬虫' AND ut.identity_tagname REGEXP '推荐' AND ut.identity_tagname REGEXP '首页推荐' AND ut.identity_tagname REGEXP '余海涛' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '机器自动改造' AND ut.identity_tagname REGEXP '余海涛' AND wv.id NOT IN (SELECT id FROM excluded_videos) THEN '全面spider'
+        WHEN ut.identity_tagname REGEXP '机器自动改造' AND ut.identity_tagname REGEXP '余海涛' AND wv.id IN (SELECT id FROM autopick_videos) THEN '全面spider-autopick'
+        WHEN ut.identity_tagname REGEXP '机器自动改造' AND ut.identity_tagname REGEXP '余海涛' AND wv.id IN (SELECT id FROM handpick_videos) THEN '全面spider-handpick'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '小年糕爬虫' AND ut.identity_tagname REGEXP '余海涛' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '公众新号' AND ut.identity_tagname REGEXP '余海涛' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '抖音爬虫' AND ut.identity_tagname REGEXP '余海涛' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '西瓜新爬虫' AND ut.identity_tagname REGEXP '余海涛' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '快手爬虫' AND ut.identity_tagname REGEXP '余海涛' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '余海涛' AND ut.identity_tagname REGEXP '长沙运营引入' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '余海涛' AND ut.identity_tagname REGEXP '机器制作视频' THEN 'AGC'
+        -- 罗情的相关条件
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '西瓜视频爬虫' AND ut.identity_tagname REGEXP '推荐' AND ut.identity_tagname REGEXP '播放榜' AND ut.identity_tagname REGEXP '罗情' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '机器自动改造' AND ut.identity_tagname REGEXP '罗情' AND wv.id NOT IN (SELECT id FROM excluded_videos) THEN '全面spider'
+        WHEN ut.identity_tagname REGEXP '机器自动改造' AND ut.identity_tagname REGEXP '罗情' AND wv.id IN (SELECT id FROM autopick_videos) THEN '全面spider-autopick'
+        WHEN ut.identity_tagname REGEXP '机器自动改造' AND ut.identity_tagname REGEXP '罗情' AND wv.id IN (SELECT id FROM handpick_videos) THEN '全面spider-handpick'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '小年糕爬虫' AND ut.identity_tagname REGEXP '罗情' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '公众新号' AND ut.identity_tagname REGEXP '罗情' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '抖音爬虫' AND ut.identity_tagname REGEXP '罗情' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '西瓜新爬虫' AND ut.identity_tagname REGEXP '罗情' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '快手爬虫' AND ut.identity_tagname REGEXP '罗情' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '罗情' AND ut.identity_tagname REGEXP '长沙运营引入' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '罗情' AND ut.identity_tagname REGEXP '机器制作视频' THEN 'AGC'
+
+
+         WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '看一看爬虫' AND ut.identity_tagname REGEXP '推荐' AND ut.identity_tagname REGEXP '鲁涛播放榜' AND ut.identity_tagname REGEXP '鲁涛' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '机器自动改造' AND ut.identity_tagname REGEXP '鲁涛' AND wv.id NOT IN (SELECT id FROM excluded_videos) THEN '全面spider'
+        WHEN ut.identity_tagname REGEXP '机器自动改造' AND ut.identity_tagname REGEXP '鲁涛' AND wv.id IN (SELECT id FROM autopick_videos) THEN '全面spider-autopick'
+        WHEN ut.identity_tagname REGEXP '机器自动改造' AND ut.identity_tagname REGEXP '鲁涛' AND wv.id IN (SELECT id FROM handpick_videos) THEN '全面spider-handpick'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '小年糕爬虫' AND ut.identity_tagname REGEXP '鲁涛' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '公众新号' AND ut.identity_tagname REGEXP '鲁涛' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '抖音爬虫' AND ut.identity_tagname REGEXP '鲁涛' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '西瓜新爬虫' AND ut.identity_tagname REGEXP '鲁涛' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP 'spider' AND ut.identity_tagname REGEXP '快手爬虫' AND ut.identity_tagname REGEXP '鲁涛' AND ut.identity_tagname REGEXP '账号' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '鲁涛' AND ut.identity_tagname REGEXP '长沙运营引入' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '鲁涛' AND ut.identity_tagname REGEXP '机器制作视频' THEN 'AGC'
+
+                WHEN ut.identity_tagname REGEXP '刘诗雨' AND ut.identity_tagname REGEXP '长沙运营引入' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '刘诗雨' AND ut.identity_tagname REGEXP '机器自动改造' AND wv.id NOT IN (SELECT id FROM excluded_videos) THEN '全面spider'
+        WHEN ut.identity_tagname REGEXP '刘诗雨' AND ut.identity_tagname REGEXP '机器自动改造' AND wv.id IN (SELECT id FROM autopick_videos) THEN '全面spider-autopick'
+        WHEN ut.identity_tagname REGEXP '刘诗雨' AND ut.identity_tagname REGEXP '机器自动改造' AND wv.id IN (SELECT id FROM handpick_videos) THEN '全面spider-handpick'
+        WHEN ut.identity_tagname REGEXP '刘诗雨' AND ut.identity_tagname REGEXP '机器制作视频' THEN 'AGC'
+
+        WHEN ut.identity_tagname REGEXP '王玉婷' AND ut.identity_tagname REGEXP '长沙运营引入' THEN '垂直spider'
+        WHEN ut.identity_tagname REGEXP '王玉婷' AND ut.identity_tagname REGEXP '机器自动改造' AND wv.id NOT IN (SELECT id FROM excluded_videos) THEN '全面spider'
+        WHEN ut.identity_tagname REGEXP '王玉婷' AND ut.identity_tagname REGEXP '机器自动改造' AND wv.id IN (SELECT id FROM autopick_videos) THEN '全面spider-autopick'
+        WHEN ut.identity_tagname REGEXP '王玉婷' AND ut.identity_tagname REGEXP '机器自动改造' AND wv.id IN (SELECT id FROM handpick_videos) THEN '全面spider-handpick'
+        WHEN ut.identity_tagname REGEXP '王玉婷' AND ut.identity_tagname REGEXP '机器制作视频' THEN 'AGC'
+
+
+        WHEN ut.identity_tagname REGEXP '刘坤宇' AND ut.identity_tagname REGEXP '机器自动改造' AND wv.id NOT IN (SELECT id FROM excluded_videos) THEN '全面spider'
+        WHEN ut.identity_tagname REGEXP '刘坤宇' AND ut.identity_tagname REGEXP '机器自动改造' AND wv.id IN (SELECT id FROM autopick_videos) THEN '全面spider-autopick'
+        WHEN ut.identity_tagname REGEXP '刘坤宇' AND ut.identity_tagname REGEXP '机器自动改造' AND wv.id IN (SELECT id FROM handpick_videos) THEN '全面spider-handpick'
+        WHEN ut.identity_tagname REGEXP '刘坤宇' AND ut.identity_tagname REGEXP '机器制作视频' AND     ut.identity_tagname REGEXP 'AI自制' THEN 'AGC'
+        WHEN ut.identity_tagname REGEXP '阮望' AND ut.identity_tagname  REGEXP '机器制作视频' AND     ut.identity_tagname REGEXP 'AI自制' THEN 'AGC'
+
+        WHEN ut.identity_tagname REGEXP '刘梓漩' AND ut.identity_tagname REGEXP '机器制作视频' AND     ut.identity_tagname REGEXP 'AI自制'  THEN 'AGC' 
+
+        WHEN ut.identity_tagname REGEXP '王知微' AND ut.identity_tagname REGEXP '机器自动改造'  THEN '全面spider'
+        WHEN ut.identity_tagname REGEXP '王知微' AND ut.identity_tagname REGEXP '机器制作视频' AND     ut.identity_tagname REGEXP 'AI自制'  THEN 'AGC' 
+        WHEN  ut.identity_tagname REGEXP '机器制作视频' AND     ut.identity_tagname REGEXP 'AI自制'  THEN 'AGC' 
+    END AS name,
+    -- 根据不同的人员标签确定 channel 字段值
+    CASE 
+        WHEN ut.identity_tagname REGEXP '范军' THEN '范军'
+        WHEN ut.identity_tagname REGEXP '余海涛' THEN '余海涛'
+        WHEN ut.identity_tagname REGEXP '罗情' THEN '罗情'
+        WHEN ut.identity_tagname REGEXP '鲁涛' THEN '鲁涛'
+        WHEN ut.identity_tagname REGEXP '刘诗雨' THEN '刘诗雨'
+        WHEN ut.identity_tagname REGEXP '王玉婷' THEN '王玉婷'
+        WHEN ut.identity_tagname REGEXP '刘坤宇' THEN '刘坤宇'
+        WHEN ut.identity_tagname REGEXP '阮望' THEN '阮望'
+        WHEN ut.identity_tagname REGEXP '王知微' THEN '王知微'
+        WHEN ut.identity_tagname REGEXP '刘梓漩' THEN '刘梓漩'
+        WHEN ut.identity_tagname REGEXP '刘兆恒' THEN '刘兆恒'
+        WHEN ut.identity_tagname REGEXP '张博' THEN '张博'
+        WHEN ut.identity_tagname REGEXP '尹梦莎' THEN '尹梦莎'
+        WHEN ut.identity_tagname REGEXP '马晗' THEN '马晗'
+        else '无负责人'
+    END AS channel,
+    ut.uid,
+    wv.id AS videoid
+FROM 
+    user_tags ut
+LEFT JOIN 
+    videoods.wx_video_per1h wv ON ut.uid = wv.uid WHERE ut.identity_tagname NOT REGEXP '信欣|王雪珂')
+    where name is not null  ;
+
+INSERT INTO  TABLE operators_channel
+WITH user_tags AS 
+(
+    SELECT  a.uid -- 使用 CONCAT_WS 函数将收集到的标签名称用逗号拼接成一个字符串
+            ,CONCAT_WS(',',COLLECT_SET(tag_name)) AS identity_tagname
+    FROM    videoods.wx_video_per1h a
+    LEFT JOIN videoods.user_type_tag_rel_per1h b
+    ON      a.uid = b.uid
+    LEFT JOIN videoods.user_type_tag_per1h c
+    ON      b.tag_id = c.tag_id -- 按用户 ID 进行分组,确保每个用户的标签信息被正确聚合
+    GROUP BY a.uid
+) -- 公共表表达式 excluded_videos,用于找出需要排除的视频 ID
+-- 从 videoods.wx_video_tag_rel_per1h 表中筛选出标签 ID 属于特定集合的视频 ID
+
+select 
+*
+from 
+(SELECT 
+    
+    CASE 
+        -- 垂直 spider 相关条件
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '节日祝福幸福吉祥' AND identity_tagname REGEXP '推荐' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '好友视频' AND identity_tagname REGEXP '推荐' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '趣转' AND identity_tagname REGEXP '推荐' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝福的问候' AND identity_tagname REGEXP '推荐' THEN '垂直spider'
+        WHEN identity_tagname REGEXP '垂直spider-封面测试' AND identity_tagname REGEXP '信欣' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '老年圈' AND identity_tagname REGEXP '推荐' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '老年生活快乐' AND identity_tagname REGEXP '推荐' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '新视圈' AND identity_tagname REGEXP '推荐' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '福气好运到' AND identity_tagname REGEXP '推荐' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '超级票圈' AND identity_tagname REGEXP '推荐' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '绚烂祝福' AND identity_tagname REGEXP '推荐' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '新春祝福' AND identity_tagname REGEXP '推荐' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '朝朝欢喜祝福语' AND identity_tagname REGEXP '推荐' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '公众号爬虫新' AND identity_tagname REGEXP '账号' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '西瓜视频爬虫' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' AND identity_tagname REGEXP '王雪珂播放榜' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '小年糕爬虫' AND identity_tagname REGEXP '账号' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '小年糕爬虫' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '西瓜视频爬虫' AND identity_tagname REGEXP '搜索' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '西瓜新爬虫' AND identity_tagname REGEXP '账号' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '快手爬虫' AND identity_tagname REGEXP '账号' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '快手爬虫' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN b.uid = 71617897 AND identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '抖音爬虫' AND identity_tagname REGEXP '账号' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '抖音爬虫' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP 'youtube爬虫' AND identity_tagname REGEXP '账号' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '众妙音信' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '刚刚都传' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '本山祝福' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '吉祥幸福' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '岁岁年年迎福气' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝福圈子' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '海豚祝福' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '福气旺' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '小年糕+' AND identity_tagname REGEXP '王雪珂' AND (identity_tagname REGEXP '20230928' OR identity_tagname REGEXP '20231010') THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝福生活' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '中老年娱乐' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '老年队伍' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '看一看爬虫' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' AND identity_tagname REGEXP '知足常乐' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '看一看爬虫' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' AND identity_tagname REGEXP 'hcm' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '老年团队' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '花好月圆中老年' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '欢欢喜喜祝福到' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '优乐搞笑小视频' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '看一看线下' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '看一看pius' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '好看视频' AND identity_tagname REGEXP '账号' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝万物复苏' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '漂漂圈' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '珊瑚祝福' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝福咱们中老年之视频' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '每天送祝福' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '经典旺福气' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '退休大本营' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '美好星河' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '吉祥祝福为你传递好运' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '开心幸福到万家' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '吉祥佳节要祝福' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '鲸鱼祝福' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝尽善尽美' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝福意气风发' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '天星小视频' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '节日应祝福快乐' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '老友每日祝福' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝福你欢裕如意吉祥' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '佳节祝福富足吉祥' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '节日祝福咱们五谷丰登' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝春华秋实' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝百岁之好' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '开心快乐常相伴' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '视频刷刷' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '海鸟祝福' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '打开迎好运' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '福小顺' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '新万物复苏' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '新尽善尽美' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '新欢欢喜喜' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '博清祝福' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '鲨鱼祝福' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '佳节祝福喜乐多多' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '公众新号' AND identity_tagname REGEXP '账号' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '小年糕话题' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝福好运暴富' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '愿你福气常在' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '最好送你' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '天天聚福气' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '节日祝福你祥贵' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '小年糕爬虫' AND identity_tagname REGEXP '账号' AND identity_tagname REGEXP 'xng自动抓账号' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝福圈推荐流' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '小年糕推荐流' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP '快手小程序' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝福年年顺心吉祥' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '祝福快转' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '乐跑乐动' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '篻圈故事' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '微圈视频' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '乐跑迪捷' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '好运祝福多' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '愿你福气满满' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '节日祝福花开富贵' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+        WHEN identity_tagname REGEXP 'spider' AND identity_tagname REGEXP '玲玲快乐祝福' AND identity_tagname REGEXP '推荐' AND identity_tagname REGEXP '王雪珂' THEN '垂直spider'
+    
+                            WHEN identity_tagname REGEXP '信欣'
+                                AND identity_tagname REGEXP '中青看点相关推荐'
+                                AND identity_tagname REGEXP '相关推荐' THEN '垂直spider'
+                            WHEN identity_tagname REGEXP '信欣'
+                                AND identity_tagname REGEXP '中青看点账号'
+                                AND identity_tagname REGEXP 'zhanghao' THEN '垂直spider'
+                            WHEN identity_tagname REGEXP '信欣'
+                                AND identity_tagname REGEXP '中青看点推荐'
+                                AND identity_tagname REGEXP '中青看点' THEN '垂直spider'
+
+
+        -- 全面 spider 相关条件
+        WHEN identity_tagname REGEXP '机器自动改造' AND identity_tagname REGEXP '王雪珂' AND b.id NOT IN (
+            SELECT DISTINCT video_id 
+            FROM videoods.wx_video_tag_rel_per1h 
+            WHERE tag_id IN (88467,88530,88468)
+        ) THEN '全面spider'
+        WHEN identity_tagname REGEXP '机器自动改造' AND identity_tagname REGEXP '王雪珂' AND b.id IN (
+            SELECT DISTINCT video_id 
+            FROM videoods.wx_video_tag_rel_per1h 
+            WHERE tag_id IN (88467,88530)
+        ) THEN '全面spider-autopick'
+        WHEN identity_tagname REGEXP '机器自动改造' AND identity_tagname REGEXP '王雪珂' AND b.id IN (
+            SELECT DISTINCT video_id 
+            FROM videoods.wx_video_tag_rel_per1h 
+            WHERE tag_id IN (88468)
+        ) THEN '全面spider-handpick'
+        WHEN identity_tagname REGEXP '信欣' AND identity_tagname REGEXP '机器自动改造' AND b.id NOT IN (
+            SELECT DISTINCT video_id 
+            FROM videoods.wx_video_tag_rel_per1h 
+            WHERE tag_id IN (88467,88530,88468)
+        ) THEN '全面spider'
+        WHEN identity_tagname REGEXP '信欣' AND identity_tagname REGEXP '机器自动改造' AND b.id IN (
+            SELECT DISTINCT video_id 
+            FROM videoods.wx_video_tag_rel_per1h 
+            WHERE tag_id IN (88467,88530)
+        ) THEN '全面spider-autopick'
+        WHEN identity_tagname REGEXP '信欣' AND identity_tagname REGEXP '机器自动改造' AND b.id IN (
+            SELECT DISTINCT video_id 
+            FROM videoods.wx_video_tag_rel_per1h 
+            WHERE tag_id IN (88468)
+        ) THEN '全面spider-handpick'
+         WHEN  identity_tagname REGEXP '信欣' AND identity_tagname REGEXP '机器制作视频' AND     identity_tagname REGEXP 'AI自制'  THEN 'AGC' 
+         WHEN  identity_tagname REGEXP '信欣' AND identity_tagname REGEXP 'spider' AND     identity_tagname REGEXP '垂直重发'  THEN '垂直spider' 
+        ELSE NULL
+    END AS name,
+    '信欣' AS channel,
+    user_tags.uid,
+    b.id AS videoid
+FROM 
+    user_tags 
+LEFT JOIN 
+    videoods.wx_video_per1h b ON user_tags.uid = b.uid) where 
+ name is not null
+union 
+SELECT  DISTINCT 'userupload' AS type
+        ,'杜崇宇' AS name
+        ,a.uid
+        ,b.id AS videoid
+FROM    (
+            SELECT  a.uid
+                    ,CONCAT_WS(',',COLLECT_SET(tag_name)) AS identity_tagname
+            FROM    videoods.wx_video_per1h a
+            LEFT JOIN videoods.user_type_tag_rel_per1h b
+            ON      a.uid = b.uid
+            LEFT JOIN videoods.user_type_tag_per1h c
+            ON      b.tag_id = c.tag_id
+            GROUP BY a.uid
+        ) a
+LEFT JOIN videoods.wx_video_per1h b
+ON      a.uid = b.uid
+WHERE   (
+            (
+                        a.identity_tagname NOT REGEXP 'spider'
+                        AND     a.identity_tagname NOT REGEXP 'transport'
+                        AND     a.identity_tagname NOT REGEXP '机器制作视频'
+                        AND     a.identity_tagname NOT REGEXP 'oldVideoPush'
+            )
+            OR      a.identity_tagname IS NULL
+)
+AND     b.id NOT IN (
+            SELECT  DISTINCT video_id AS id
+            FROM    videoods.wx_video_tag_rel_per1h
+            WHERE   tag_id IN (87812,88050,88251)
+        )  --新增策略统计
+UNION
+SELECT  DISTINCT 'UGC-handpick' AS type
+        ,'杜崇宇' AS name
+        ,a.uid
+        ,b.video_id
+FROM    videoods.wx_video_per1h a
+LEFT JOIN   (
+                SELECT  DISTINCT video_id
+                        ,tag_id
+                        ,CASE   WHEN tag_name REGEXP '#str-' THEN tag_name
+                        END AS tag
+                FROM    (
+                            SELECT  a.video_id
+                                    ,a.tag_id
+                                    ,b.tag_name
+                            FROM    (
+                                        SELECT  video_id
+                                                ,tag_id
+                                        FROM    videoods.wx_video_tag_rel_per1h
+                                    ) a
+                            LEFT JOIN   (
+                                            SELECT  tag_id
+                                                    ,tag_name
+                                            FROM    videoods.wx_video_tag_per1h
+                                        ) b
+                            ON      a.tag_id = b.tag_id
+                        )  --HAVING  tag IS NOT NULL
+            ) b
+ON      a.id = b.video_id
+WHERE   b.tag_id IN (87812)
+UNION
+SELECT  DISTINCT 'UGC-autopick' AS type
+        ,'杜崇宇' AS name
+        ,a.uid
+        ,b.video_id
+FROM    videoods.wx_video_per1h a
+LEFT JOIN   (
+                SELECT  DISTINCT video_id
+                        ,tag_id
+                        ,CASE   WHEN tag_name REGEXP '#str-' THEN tag_name
+                        END AS tag
+                FROM    (
+                            SELECT  a.video_id
+                                    ,a.tag_id
+                                    ,b.tag_name
+                            FROM    (
+                                        SELECT  video_id
+                                                ,tag_id
+                                        FROM    videoods.wx_video_tag_rel_per1h
+                                    ) a
+                            LEFT JOIN   (
+                                            SELECT  tag_id
+                                                    ,tag_name
+                                            FROM    videoods.wx_video_tag_per1h
+                                        ) b
+                            ON      a.tag_id = b.tag_id
+                        )  --HAVING  tag IS NOT NULL
+            ) b
+ON      a.id = b.video_id
+WHERE   b.tag_id IN (88050,88251)
+UNION ----信欣爬虫
+SELECT  DISTINCT 'oldVideoPush' AS type
+        ,'杜崇宇' AS name
+        ,a.uid
+        ,b.id AS videoid
+FROM    (
+            SELECT  a.uid
+                    ,CONCAT_WS(',',COLLECT_SET(tag_name)) AS identity_tagname
+            FROM    videoods.wx_video_per1h a
+            LEFT JOIN videoods.user_type_tag_rel_per1h b
+            ON      a.uid = b.uid
+            LEFT JOIN videoods.user_type_tag_per1h c
+            ON      b.tag_id = c.tag_id
+            GROUP BY a.uid
+        ) a
+LEFT JOIN videoods.wx_video_per1h b
+ON      a.uid = b.uid
+WHERE   a.identity_tagname REGEXP 'oldVideoPush'
+union 
+SELECT  *
+FROM    (
+            SELECT  DISTINCT 'transport' AS type
+                    ,CASE   WHEN a.identity_tagname REGEXP '信欣|王雪珂' THEN '信欣'
+                            WHEN a.identity_tagname REGEXP '尹梦莎' THEN '尹梦莎'
+                            WHEN a.identity_tagname REGEXP '范军' THEN '范军'
+                            WHEN a.identity_tagname REGEXP '鲁涛' THEN '鲁涛'
+                            WHEN a.identity_tagname REGEXP '穆新艺' THEN '穆新艺'
+                            WHEN a.identity_tagname REGEXP '罗情' THEN '罗情'
+                            WHEN a.identity_tagname REGEXP '余海涛' THEN '余海涛'
+                            WHEN a.identity_tagname REGEXP '任年' THEN '任年'
+                            WHEN a.identity_tagname REGEXP '刘诗雨' THEN '刘诗雨'
+                            WHEN a.identity_tagname REGEXP '王玉婷' THEN '王玉婷'
+                            WHEN a.identity_tagname REGEXP '王媛' THEN '王媛'
+                            WHEN a.identity_tagname REGEXP '王知微' THEN '王知微'
+                            WHEN a.identity_tagname REGEXP '刘兆恒' THEN '刘兆恒'
+                            WHEN a.identity_tagname REGEXP '阮望' THEN '阮望'
+                    END AS name
+                    ,a.uid
+                    ,b.id AS videoid
+            FROM    (
+                        SELECT  a.uid
+                                ,CONCAT_WS(',',COLLECT_SET(tag_name)) AS identity_tagname
+                        FROM    videoods.wx_video_per1h a
+                        LEFT JOIN videoods.user_type_tag_rel_per1h b
+                        ON      a.uid = b.uid
+                        LEFT JOIN videoods.user_type_tag_per1h c
+                        ON      b.tag_id = c.tag_id
+                        GROUP BY a.uid
+                    ) a
+            LEFT JOIN videoods.wx_video_per1h b
+            ON      a.uid = b.uid
+            AND     a.identity_tagname REGEXP 'transport'
+        ) 
+WHERE   videoid IS NOT NULL
+AND     name IS NOT NULL;

+ 48 - 0
production_code/loghubods.operators_channel_dt.json

@@ -0,0 +1,48 @@
+{
+  "name": "operators_channel_dt",
+  "project": "loghubods",
+  "comment": "",
+  "columns": [
+    {
+      "name": "type",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "name",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "uid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "videoid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "分区"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "分区"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1025579802,
+      "name": "渠道统计优化_分区"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.operators_channel"
+  ]
+}

+ 24 - 0
production_code/loghubods.operators_channel_dt.sql

@@ -0,0 +1,24 @@
+-- Task: 渠道统计优化_分区  ID: 1025579802  Type: ODPS_SQL
+--odps sql 
+--********************************************************************--
+--author:于卓异
+--create time:2025-04-28 11:14:08
+--********************************************************************--
+CREATE TABLE IF NOT EXISTS operators_channel_dt
+(
+    `type`   STRING
+    ,name    STRING
+    ,uid     STRING
+    ,videoid STRING
+)
+PARTITIONED BY 
+(
+    dt       STRING COMMENT '分区'
+)
+STORED AS ALIORC
+LIFECYCLE 60
+;
+
+INSERT OVERWRITE TABLE operators_channel_dt PARTITION (dt = '${date}${hh}')
+SELECT  *
+FROM    operators_channel

+ 40 - 0
production_code/loghubods.operators_channel_spider.json

@@ -0,0 +1,40 @@
+{
+  "name": "operators_channel_spider",
+  "project": "loghubods",
+  "comment": "爬虫渠道",
+  "columns": [
+    {
+      "name": "name",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "channel",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "uid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "videoid",
+      "type": "STRING",
+      "comment": ""
+    }
+  ],
+  "partition_keys": [],
+  "dataworks_tasks": [
+    {
+      "id": 1008392924,
+      "name": "爬虫产品与负责人"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.operators_channel_spider_base",
+    "videoods.wx_video_per1h",
+    "videoods.wx_video_tag_per1h",
+    "videoods.wx_video_tag_rel_per1h"
+  ]
+}

+ 528 - 0
production_code/loghubods.operators_channel_spider.sql

@@ -0,0 +1,528 @@
+-- Task: 爬虫产品与负责人  ID: 1008392924  Type: ODPS_SQL
+--odps sql 
+--********************************************************************--
+--odps sql 
+--********************************************************************--
+--author:杜崇宇
+--create time:2022-11-13 03:10:25
+--********************************************************************--
+--CREATE TABLE IF NOT EXISTS loghubods.operators_channel_spider
+--(
+--    name  STRING COMMENT ''
+--    ,channel STRING COMMENT ''
+--    ,uid STRING COMMENT ''
+--    ,videoid STRING COMMENT ''
+--)
+--COMMENT '爬虫渠道'; 
+INSERT OVERWRITE TABLE operators_channel_spider
+SELECT  DISTINCT a.name
+        ,CASE   WHEN b.tags REGEXP '自制内容测试|自制内容搬运|AIGC自制内容' THEN '自制'
+                WHEN b.tags REGEXP '搬运工具' THEN '搬运工具'
+                WHEN b.tags REGEXP '搬运改造' THEN '搬运改造'
+                WHEN b.tags REGEXP '站内重发' THEN '站内重发'
+                WHEN b.tags REGEXP '搬运测试' THEN '搬运测试'
+                ELSE '搬运'
+        END AS type
+        ,a.uid
+        ,a.videoid
+FROM    (
+            SELECT  DISTINCT 'transport' AS type
+                    ,'内容运营' AS name
+                    ,a.uid
+                    ,b.id AS videoid
+            FROM    (
+                        SELECT  *
+                        FROM    operators_channel_spider_base
+                    ) a
+            LEFT JOIN videoods.wx_video_per1h b
+            ON      a.uid = b.uid
+            WHERE   a.identity_tagname REGEXP 'transport'
+            AND     a.identity_tagname NOT REGEXP '王知微|刘兆恒|阮望'
+        ) a
+LEFT JOIN   (
+                SELECT  a.video_id AS videoid
+                        ,CONCAT_WS(',',COLLECT_SET(b.tag_name)) AS tags
+                FROM    (
+                            SELECT  video_id
+                                    ,tag_id
+                            FROM    videoods.wx_video_tag_rel_per1h
+                        ) a
+                LEFT JOIN   (
+                                SELECT  tag_id
+                                        ,tag_name
+                                FROM    videoods.wx_video_tag_per1h
+                            ) b
+                ON      a.tag_id = b.tag_id
+                GROUP BY video_id
+            ) b
+ON      a.videoid = b.videoid
+UNION
+SELECT  DISTINCT a.name
+        ,CASE   WHEN b.tags REGEXP '#str-老视频复推历史同期_54|#str-老视频复推中周期_57|#str-老视频复推短周期_56' THEN '老视频复推_周期'
+                WHEN b.tags REGEXP '#str-老视频复推单视频_24|#str-老视频复推单视频_分发有效分享_24|#str-老视频复推单视频_有效分享_241|#str-老内容复推单视频重复_33|#str-老内容复推单视频重复_分发有效分享_33' THEN '老视频复推_视频'
+                WHEN b.tags REGEXP '#str-老视频复推单品类_23|#str-老视频复推单品类_分发有效分享_23|#str-老视频复推混合品类_38' THEN '老视频复推_品类'
+                ELSE '老视频复推_未归类'
+        END AS type
+        ,a.uid
+        ,a.videoid
+FROM    (
+            SELECT  DISTINCT 'oldVideoPush' AS type
+                    ,'杜崇宇' AS name
+                    ,a.uid
+                    ,b.id AS videoid
+            FROM    (
+                        SELECT  *
+                        FROM    operators_channel_spider_base
+                    ) a
+            LEFT JOIN videoods.wx_video_per1h b
+            ON      a.uid = b.uid
+            WHERE   a.identity_tagname REGEXP 'oldVideoPush'
+        ) a
+LEFT JOIN   (
+                SELECT  a.video_id AS videoid
+                        ,CONCAT_WS(',',COLLECT_SET(b.tag_name)) AS tags
+                FROM    (
+                            SELECT  video_id
+                                    ,tag_id
+                            FROM    videoods.wx_video_tag_rel_per1h
+                        ) a
+                LEFT JOIN   (
+                                SELECT  tag_id
+                                        ,tag_name
+                                FROM    videoods.wx_video_tag_per1h
+                            ) b
+                ON      a.tag_id = b.tag_id
+                GROUP BY video_id
+            ) b
+ON      a.videoid = b.videoid
+UNION
+SELECT  DISTINCT CASE    WHEN a.identity_tagname REGEXP '鲁涛' THEN '鲁涛'
+                WHEN a.identity_tagname REGEXP '余海涛' THEN '余海涛'
+                WHEN a.identity_tagname REGEXP '范军' THEN '范军'
+                WHEN a.identity_tagname REGEXP '罗情' THEN '罗情'
+                WHEN a.identity_tagname REGEXP '刘诗雨' THEN '刘诗雨'
+                WHEN a.identity_tagname REGEXP '阮望' THEN '阮望'
+                WHEN a.identity_tagname REGEXP '王知微' THEN '王知微'
+                WHEN a.identity_tagname REGEXP '尹梦莎' THEN '尹梦莎'
+                WHEN a.identity_tagname REGEXP '信欣' THEN '信欣'
+                WHEN a.identity_tagname REGEXP '刘梓漩' THEN '刘梓漩'
+                WHEN a.identity_tagname REGEXP '刘兆恒' THEN '刘兆恒'
+                WHEN a.identity_tagname REGEXP '张博' THEN '张博'
+                 END AS name
+        ,'aidit' AS channel
+        ,a.uid
+        ,b.id AS videoid
+FROM    (
+            SELECT  *
+            FROM    operators_channel_spider_base
+        ) a
+LEFT JOIN videoods.wx_video_per1h b
+ON      a.uid = b.uid
+WHERE   a.identity_tagname REGEXP '机器制作视频'
+AND     a.identity_tagname REGEXP 'AI自制'
+UNION
+SELECT  *
+FROM    (
+            SELECT  CASE    WHEN a.identity_tagname REGEXP '范军' THEN '范军'
+                            WHEN a.identity_tagname REGEXP '鲁涛' THEN '鲁涛'
+                            WHEN a.identity_tagname REGEXP '余海涛' THEN '余海涛'
+                            WHEN a.identity_tagname REGEXP '罗情' THEN '罗情'
+                            WHEN a.identity_tagname REGEXP '王知微' THEN '王知微'
+                            WHEN a.identity_tagname REGEXP '刘诗雨' THEN '刘诗雨'
+                            ELSE NULL
+                    END AS name
+                    ,CASE   WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '公众号爬虫新'
+                                AND a.identity_tagname REGEXP '账号' THEN '公众号'
+                            WHEN a.identity_tagname REGEXP '范军'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname NOT REGEXP '单点视频' THEN '机器自动改造'
+                            WHEN a.identity_tagname REGEXP '范军'
+                                AND a.identity_tagname REGEXP '机器制作视频'
+                                AND a.identity_tagname NOT REGEXP 'AI自制' THEN '机器制作视频'
+                            WHEN a.identity_tagname REGEXP '鲁涛'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname NOT REGEXP '单点视频' THEN '机器自动改造'
+                            WHEN a.identity_tagname REGEXP '鲁涛'
+                                AND a.identity_tagname REGEXP '机器制作视频'
+                                AND a.identity_tagname NOT REGEXP 'AI自制' THEN '机器制作视频'
+                            WHEN a.identity_tagname REGEXP '鲁涛'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname REGEXP '单点视频' THEN '单点视频'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '小年糕爬虫'
+                                AND a.identity_tagname REGEXP '账号' THEN '小年糕'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '公众新号'
+                                AND a.identity_tagname REGEXP '账号' THEN '公众新号'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '抖音爬虫'
+                                AND a.identity_tagname REGEXP '账号' THEN '抖音'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '西瓜新爬虫'
+                                AND a.identity_tagname REGEXP '账号' THEN '西瓜视频爬虫'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '快手爬虫'
+                                AND a.identity_tagname REGEXP '账号' THEN '快手爬虫'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '视频号'
+                                AND a.identity_tagname REGEXP '账号' THEN '视频H品类账号'
+                            WHEN a.identity_tagname REGEXP '范军'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname REGEXP '单点视频' THEN '单点视频'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '看一看爬虫'
+                                AND a.identity_tagname REGEXP '推荐'
+                                AND a.identity_tagname REGEXP '鲁涛播放榜' THEN '看一看'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '看一看爬虫'
+                                AND a.identity_tagname REGEXP '推荐'
+                                AND a.identity_tagname REGEXP '首页推荐'
+                                AND a.identity_tagname REGEXP '余海涛' THEN '看一看'
+                            WHEN a.identity_tagname REGEXP '余海涛'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname NOT REGEXP '单点视频' THEN '机器自动改造'
+                            WHEN a.identity_tagname REGEXP '余海涛'
+                                AND a.identity_tagname REGEXP '机器制作视频'
+                                AND a.identity_tagname NOT REGEXP 'AI自制' THEN '机器制作视频'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '小年糕爬虫'
+                                AND a.identity_tagname REGEXP '余海涛'
+                                AND a.identity_tagname REGEXP '账号' THEN '小年糕'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '公众新号'
+                                AND a.identity_tagname REGEXP '余海涛'
+                                AND a.identity_tagname REGEXP '账号' THEN '公众新号'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '抖音爬虫'
+                                AND a.identity_tagname REGEXP '余海涛'
+                                AND a.identity_tagname REGEXP '账号' THEN '抖音'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '西瓜新爬虫'
+                                AND a.identity_tagname REGEXP '余海涛'
+                                AND a.identity_tagname REGEXP '账号' THEN '西瓜视频爬虫'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '快手爬虫'
+                                AND a.identity_tagname REGEXP '余海涛'
+                                AND a.identity_tagname REGEXP '账号' THEN '快手爬虫'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '视频号'
+                                AND a.identity_tagname REGEXP '余海涛'
+                                AND a.identity_tagname REGEXP '账号' THEN '视频H品类账号'
+                            WHEN a.identity_tagname REGEXP '余海涛'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname REGEXP '单点视频' THEN '单点视频'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '西瓜视频爬虫'
+                                AND a.identity_tagname REGEXP '推荐'
+                                AND a.identity_tagname REGEXP '播放榜'
+                                AND a.identity_tagname REGEXP '罗情' THEN '西瓜'
+                            WHEN a.identity_tagname REGEXP '罗情'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname NOT REGEXP '单点视频' THEN '机器自动改造'
+                            WHEN a.identity_tagname REGEXP '罗情'
+                                AND a.identity_tagname REGEXP '机器制作视频'
+                                AND a.identity_tagname NOT REGEXP 'AI自制' THEN '机器制作视频'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '小年糕爬虫'
+                                AND a.identity_tagname REGEXP '罗情'
+                                AND a.identity_tagname REGEXP '账号' THEN '小年糕'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '公众新号'
+                                AND a.identity_tagname REGEXP '罗情'
+                                AND a.identity_tagname REGEXP '账号' THEN '公众新号'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '抖音爬虫'
+                                AND a.identity_tagname REGEXP '罗情'
+                                AND a.identity_tagname REGEXP '账号' THEN '抖音'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '西瓜新爬虫'
+                                AND a.identity_tagname REGEXP '罗情'
+                                AND a.identity_tagname REGEXP '账号' THEN '西瓜视频爬虫'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '快手爬虫'
+                                AND a.identity_tagname REGEXP '罗情'
+                                AND a.identity_tagname REGEXP '账号' THEN '快手爬虫'
+                            WHEN a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '视频号'
+                                AND a.identity_tagname REGEXP '罗情'
+                                AND a.identity_tagname REGEXP '账号' THEN '视频H品类账号'
+                            WHEN a.identity_tagname REGEXP '罗情'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname REGEXP '单点视频' THEN '单点视频'
+                            WHEN a.identity_tagname REGEXP '王知微' AND a.identity_tagname REGEXP '机器自动改造' THEN CASE   WHEN a.identity_tagname REGEXP '直接溯源' THEN '直接溯源'
+                                    WHEN a.identity_tagname REGEXP '相似溯源' THEN '相似溯源'
+                                    WHEN a.identity_tagname REGEXP '单点视频' THEN '单点视频'
+                                    WHEN a.identity_tagname REGEXP '抖音品类账号' THEN '抖音品类账号'
+                                    WHEN a.identity_tagname REGEXP '快手品类账号' THEN '快手品类账号'
+                                    WHEN a.identity_tagname REGEXP '视频H品类账号' THEN '视频H品类账号'
+                                    WHEN a.identity_tagname REGEXP '快手关键词抓取' THEN '快手关键词抓取'
+                                    WHEN a.identity_tagname REGEXP '抖音关键词抓取' THEN '抖音关键词抓取'
+                                    WHEN a.identity_tagname REGEXP '视频号关键词抓取' THEN '视频号关键词抓取'
+                                    WHEN a.identity_tagname REGEXP '快手创作者版爬虫' THEN '快手创作者版爬虫'
+                                    ELSE '机器自动改造'
+                            END
+                            WHEN a.identity_tagname REGEXP '刘诗雨'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname NOT REGEXP '单点视频' THEN '机器自动改造'
+                            WHEN a.identity_tagname REGEXP '刘诗雨'
+                                AND a.identity_tagname REGEXP '机器制作视频'
+                                AND a.identity_tagname NOT REGEXP 'AI自制' THEN '机器制作视频'
+                            WHEN a.identity_tagname REGEXP '刘诗雨'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname REGEXP '单点视频' THEN '单点视频'
+                            WHEN a.identity_tagname REGEXP '范军'
+                                AND a.identity_tagname REGEXP '机器制作视频'
+                                AND a.identity_tagname NOT REGEXP 'AI自制' THEN '机器制作视频'
+                            ELSE NULL
+                    END AS channel
+                    ,a.uid
+                    ,b.id AS videoid
+            FROM    operators_channel_spider_base a
+            LEFT JOIN videoods.wx_video_per1h b
+            ON      a.uid = b.uid
+        ) 
+WHERE   channel IS NOT NULL
+AND     name IS NOT NULL
+UNION
+SELECT  *
+FROM    (
+            SELECT  DISTINCT '信欣' AS name
+                    ,CASE   WHEN a.identity_tagname REGEXP '公众号爬虫新' THEN '公众号'
+                            WHEN a.identity_tagname REGEXP '西瓜视频爬虫' AND a.identity_tagname REGEXP '推荐' THEN '西瓜'
+                            WHEN a.identity_tagname REGEXP '西瓜视频爬虫' AND a.identity_tagname REGEXP '搜索' THEN '西瓜'
+                            WHEN a.identity_tagname REGEXP '西瓜新爬虫' AND a.identity_tagname REGEXP '账号' THEN '西瓜'
+                            WHEN a.identity_tagname REGEXP '小年糕爬虫' AND a.identity_tagname REGEXP '推荐' THEN '小年糕'
+                            WHEN a.identity_tagname REGEXP '小年糕爬虫' AND a.identity_tagname REGEXP '账号' THEN '小年糕'
+                            WHEN a.identity_tagname REGEXP 'xng自动抓账号'
+                                AND a.identity_tagname REGEXP '小年糕爬虫'
+                                AND a.identity_tagname REGEXP '账号' THEN '小年糕'
+                            WHEN a.identity_tagname REGEXP '小年糕+'
+                                AND a.identity_tagname REGEXP '王雪珂'
+                                AND (a.identity_tagname REGEXP '20230928'
+                                OR a.identity_tagname REGEXP '20231010') THEN '小年糕+'
+                            WHEN a.identity_tagname REGEXP '小年糕话题' THEN '小年糕话题'
+                            WHEN a.identity_tagname REGEXP '快手爬虫' AND a.identity_tagname REGEXP '账号' THEN '快手'
+                            WHEN a.identity_tagname REGEXP '快手爬虫' AND a.identity_tagname REGEXP '推荐' THEN '快手'
+                            WHEN a.identity_tagname REGEXP '快手品类账号' THEN '快手品类账号'
+                            WHEN a.identity_tagname REGEXP '快手推荐流' THEN '快手推荐流'
+                            WHEN a.identity_tagname REGEXP '快手小程序' THEN '快手小程序'
+                            WHEN a.identity_tagname REGEXP '快手创作者版爬虫' THEN '快手创作者版爬虫'
+                            WHEN a.identity_tagname REGEXP '抖音爬虫' AND a.identity_tagname REGEXP '账号' THEN '抖音'
+                            WHEN a.identity_tagname REGEXP '抖音爬虫' AND a.identity_tagname REGEXP '推荐' THEN '抖音'
+                            WHEN a.identity_tagname REGEXP '抖音品类账号' THEN '抖音品类账号'
+                            WHEN a.identity_tagname REGEXP '抖音推荐流' THEN '抖音推荐流'
+                            WHEN a.identity_tagname REGEXP 'youtube爬虫' THEN 'youtube'
+                            WHEN a.identity_tagname REGEXP '众妙音信' THEN '众妙音信'
+                            WHEN a.identity_tagname REGEXP '刚刚都传' THEN '刚刚都传'
+                            WHEN a.identity_tagname REGEXP '本山祝福' THEN '本山祝福'
+                            WHEN a.identity_tagname REGEXP '吉祥幸福' THEN '吉祥幸福'
+                            WHEN a.identity_tagname REGEXP '岁岁年年迎福气' THEN '岁岁年年迎福气'
+                            WHEN a.identity_tagname REGEXP '祝福圈子' THEN '祝福圈子'
+                            WHEN a.identity_tagname REGEXP '海豚祝福' THEN '海豚祝福'
+                            WHEN a.identity_tagname REGEXP '福气旺' THEN '福气旺'
+                            WHEN a.identity_tagname REGEXP '祝福生活' THEN '祝福生活'
+                            WHEN a.identity_tagname REGEXP '中老年娱乐' THEN '中老年娱乐'
+                            WHEN a.identity_tagname REGEXP '老年队伍' THEN '老年队伍'
+                            WHEN a.identity_tagname REGEXP '看一看爬虫' AND a.identity_tagname REGEXP '知足常乐' THEN '看一看'
+                            WHEN a.identity_tagname REGEXP '看一看爬虫' AND a.identity_tagname REGEXP 'hcm' THEN '看一看'
+                            WHEN a.identity_tagname REGEXP '花好月圆中老年' THEN '花好月圆中老年'
+                            WHEN a.identity_tagname REGEXP '老年团队' THEN '老年团队'
+                            WHEN a.identity_tagname REGEXP '欢欢喜喜祝福到' THEN '欢欢喜喜祝福到'
+                            WHEN a.identity_tagname REGEXP '优乐搞笑小视频' THEN '优乐搞笑小视频'
+                            WHEN a.identity_tagname REGEXP '看一看线下' THEN '看一看线下'
+                            WHEN a.identity_tagname REGEXP '看一看pius' THEN '看一看plus'
+                            WHEN a.identity_tagname REGEXP '好看视频' THEN '好看视频'
+                            WHEN a.identity_tagname REGEXP '祝万物复苏' THEN '祝万物复苏'
+                            WHEN a.identity_tagname REGEXP '漂漂圈' THEN '漂漂圈'
+                            WHEN a.identity_tagname REGEXP '珊瑚祝福' THEN '珊瑚祝福'
+                            WHEN a.identity_tagname REGEXP '祝福咱们中老年之视频' THEN '祝福咱们中老年之视频'
+                            WHEN a.identity_tagname REGEXP '每天送祝福' THEN '每天送祝福'
+                            WHEN a.identity_tagname REGEXP '经典旺福气' THEN '经典旺福气'
+                            WHEN a.identity_tagname REGEXP '退休大本营' THEN '退休大本营'
+                            WHEN a.identity_tagname REGEXP '美好星河' THEN '美好星河'
+                            WHEN a.identity_tagname REGEXP '吉祥祝福为你传递好运' THEN '吉祥祝福为你传递好运'
+                            WHEN a.identity_tagname REGEXP '开心幸福到万家' THEN '开心幸福到万家'
+                            WHEN a.identity_tagname REGEXP '吉祥佳节要祝福' THEN '吉祥佳节要祝福'
+                            WHEN a.identity_tagname REGEXP '鲸鱼祝福' THEN '鲸鱼祝福'
+                            WHEN a.identity_tagname REGEXP '祝尽善尽美' THEN '祝尽善尽美'
+                            WHEN a.identity_tagname REGEXP '祝福意气风发' THEN '祝福意气风发'
+                            WHEN a.identity_tagname REGEXP '天星小视频' THEN '天星小视频'
+                            WHEN a.identity_tagname REGEXP '节日应祝福快乐' THEN '节日应祝福快乐'
+                            WHEN a.identity_tagname REGEXP '老友每日祝福' THEN '老友每日祝福'
+                            WHEN a.identity_tagname REGEXP '祝福你欢裕如意吉祥' THEN '祝福你欢裕如意吉祥'
+                            WHEN a.identity_tagname REGEXP '佳节祝福富足吉祥' THEN '佳节祝福富足吉祥'
+                            WHEN a.identity_tagname REGEXP '节日祝福咱们五谷丰登' THEN '节日祝福咱们五谷丰登'
+                            WHEN a.identity_tagname REGEXP '祝春华秋实' THEN '祝春华秋实'
+                            WHEN a.identity_tagname REGEXP '祝百岁之好' THEN '祝百岁之好'
+                            WHEN a.identity_tagname REGEXP '开心快乐常相伴' THEN '开心快乐常相伴'
+                            WHEN a.identity_tagname REGEXP '视频刷刷' THEN '视频刷刷'
+                            WHEN a.identity_tagname REGEXP '海鸟祝福' THEN '海鸟祝福'
+                            WHEN a.identity_tagname REGEXP '福小顺' THEN '福小顺'
+                            WHEN a.identity_tagname REGEXP '新欢欢喜喜' THEN '欢欢喜喜祝福到新'
+                            WHEN a.identity_tagname REGEXP '新万物复苏' THEN '祝万物复苏new'
+                            WHEN a.identity_tagname REGEXP '新尽善尽美' THEN '祝尽善尽美new'
+                            WHEN a.identity_tagname REGEXP '鲨鱼祝福' THEN '鲨鱼祝福'
+                            WHEN a.identity_tagname REGEXP '博清祝福' THEN '博清祝福'
+                            WHEN a.identity_tagname REGEXP '佳节祝福喜乐多多' THEN '佳节祝福喜乐多多'
+                            WHEN a.identity_tagname REGEXP '公众新号' THEN '公众新号'
+                            WHEN a.identity_tagname REGEXP '祝福圈推荐流' THEN '祝福圈推荐流'
+                            WHEN a.identity_tagname REGEXP '小年糕推荐流' THEN '小年糕推荐流'
+                            WHEN a.identity_tagname REGEXP '视频号推荐流' THEN '视频号推荐流'
+                            WHEN a.identity_tagname REGEXP '快手推荐流' THEN '快手推荐流'
+                            WHEN a.identity_tagname REGEXP '抖音推荐流' THEN '抖音推荐流'
+                            WHEN a.identity_tagname REGEXP '祝福快转' THEN '祝福快转'
+                            WHEN a.identity_tagname REGEXP '乐跑乐动' THEN '乐跑乐动'
+                            WHEN a.identity_tagname REGEXP '篻圈故事' THEN '篻圈故事'
+                            WHEN a.identity_tagname REGEXP '微圈视频' THEN '微圈视频'
+                            WHEN a.identity_tagname REGEXP '好运祝福多' THEN '好运祝福多'
+                            WHEN a.identity_tagname REGEXP '愿你福气满满' THEN '愿你福气满满'
+                            WHEN a.identity_tagname REGEXP '节日祝福花开富贵' THEN '节日祝福花开富贵'
+                            WHEN a.identity_tagname REGEXP '玲玲快乐祝福' THEN '玲玲快乐祝福'
+                            WHEN a.identity_tagname REGEXP '乐跑迪捷' THEN '乐跑迪捷'
+                            WHEN a.identity_tagname REGEXP '搬运搜索top视频溯源账号' THEN '搬运搜索top视频溯源账号'
+                            WHEN a.identity_tagname REGEXP '祝福好运暴富' THEN '祝福好运暴富'
+                            WHEN a.identity_tagname REGEXP '祝福年年顺心吉祥' THEN '祝福年年顺心吉祥'
+                            WHEN a.identity_tagname REGEXP '最好送你' THEN '最好送你'
+                            WHEN a.identity_tagname REGEXP '愿你福气常在' THEN '愿你福气常在'
+                            WHEN a.identity_tagname REGEXP '天天聚福气' THEN '天天聚福气'
+                            WHEN a.identity_tagname REGEXP '节日祝福你祥贵' THEN '节日祝福你祥贵'
+                            WHEN a.identity_tagname REGEXP '快手创作者版爬虫' THEN '快手创作者版爬虫'
+                            WHEN a.identity_tagname REGEXP '抖音品类账号' THEN '抖音品类账号'
+                            WHEN a.identity_tagname REGEXP '单点视频' THEN '单点视频'
+                            WHEN a.identity_tagname REGEXP '打开迎好运' THEN '打开迎好运'
+                            WHEN a.identity_tagname REGEXP '抖音关键词抓取' THEN '抖音关键词抓取'
+                            WHEN a.identity_tagname REGEXP '快手关键词抓取' AND a.identity_tagname REGEXP '信欣' THEN '快手关键词抓取'
+                            WHEN a.identity_tagname REGEXP '快手关键词抓取' AND a.identity_tagname REGEXP '刘坤宇' THEN '快手关键词抓取'
+                            WHEN a.identity_tagname REGEXP '视频号关键词抓取' THEN '视频号关键词抓取'
+                            WHEN a.identity_tagname REGEXP '节日祝福幸福吉祥' THEN '节日祝福幸福吉祥'
+                            WHEN a.identity_tagname REGEXP '好友视频' THEN '好友视频'
+                            WHEN a.identity_tagname REGEXP '趣转' THEN '趣转'
+                            WHEN a.identity_tagname REGEXP '祝福的问候' THEN '祝福的问候'
+                            WHEN a.identity_tagname REGEXP '老年圈' THEN '老年圈'
+                            WHEN a.identity_tagname REGEXP '老年生活快乐' THEN '老年生活快乐'
+                            WHEN a.identity_tagname REGEXP '新视圈' THEN '新视圈'
+                            WHEN a.identity_tagname REGEXP '福气好运到' THEN '福气好运到'
+                            WHEN a.identity_tagname REGEXP '超级票圈' THEN '超级票圈'
+                            WHEN a.identity_tagname REGEXP '绚烂祝福' THEN '绚烂祝福'
+                            WHEN a.identity_tagname REGEXP '新春祝福' THEN '新春祝福'
+                            WHEN a.identity_tagname REGEXP '朝朝欢喜祝福语' THEN '朝朝欢喜祝福语'
+                            WHEN a.identity_tagname REGEXP '信欣'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND NOT a.identity_tagname REGEXP '视频H品类账号|快手关键词抓取|抖音关键词抓取|视频号关键词抓取' THEN CASE   WHEN a.identity_tagname REGEXP '直接溯源' THEN '直接溯源'
+                                    WHEN a.identity_tagname REGEXP '相似溯源' THEN '相似溯源'
+                                    WHEN a.identity_tagname REGEXP '单点视频' THEN '单点视频'
+                                    WHEN a.identity_tagname REGEXP '抖音品类账号' THEN '抖音品类账号'
+                                    WHEN a.identity_tagname REGEXP '快手品类账号' THEN '快手品类账号'
+                                    ELSE '机器自动改造'
+                            END
+                            WHEN a.identity_tagname REGEXP '信欣'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname REGEXP '视频H品类账号' THEN '视频H品类账号'
+                            WHEN a.identity_tagname REGEXP '王雪珂'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname REGEXP '快手品类账号' THEN '快手品类账号'
+                            WHEN a.identity_tagname REGEXP '王雪珂'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname REGEXP '视频号推荐流' THEN '视频号推荐流'
+                            WHEN a.identity_tagname REGEXP '王雪珂'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname REGEXP '快手推荐流' THEN '快手推荐流'
+                            WHEN a.identity_tagname REGEXP '王雪珂'
+                                AND a.identity_tagname REGEXP '机器自动改造'
+                                AND a.identity_tagname REGEXP '抖音推荐流' THEN '抖音推荐流'
+                                WHEN a.identity_tagname REGEXP '信欣'
+                                AND a.identity_tagname REGEXP 'spider'
+                                AND a.identity_tagname REGEXP '垂直重发' THEN '垂直重发'
+                            WHEN a.identity_tagname REGEXP '王雪珂' AND a.identity_tagname REGEXP '机器自动改造' THEN CASE   WHEN a.identity_tagname REGEXP '直接溯源' THEN '直接溯源'
+                                    WHEN a.identity_tagname REGEXP '相似溯源' THEN '相似溯源'
+                                    WHEN a.identity_tagname REGEXP '单点视频' THEN '单点视频'
+                                    WHEN a.identity_tagname REGEXP '抖音品类账号' THEN '抖音品类账号'
+                                    WHEN a.identity_tagname REGEXP '快手品类账号' THEN '快手品类账号'
+                                    WHEN a.identity_tagname REGEXP '视频H品类账号' THEN '视频H品类账号'
+                                    WHEN a.identity_tagname REGEXP '快手关键词抓取' THEN '快手关键词抓取'
+                                    WHEN a.identity_tagname REGEXP '抖音关键词抓取' THEN '抖音关键词抓取'
+                                    WHEN a.identity_tagname REGEXP '视频号关键词抓取' THEN '视频号关键词抓取'
+                                    WHEN a.identity_tagname REGEXP '快手创作者版爬虫' THEN '快手创作者版爬虫'
+                                    ELSE '机器自动改造'
+                            END
+                            ELSE '未知渠道'
+                    END AS channel
+                    ,a.uid
+                    ,b.id AS videoid
+            FROM    (
+                        SELECT  *
+                        FROM    operators_channel_spider_base
+                    ) a
+            LEFT JOIN videoods.wx_video_per1h b
+            ON      a.uid = b.uid
+            WHERE   a.identity_tagname REGEXP '信欣|王雪珂'
+            UNION
+            SELECT  DISTINCT '信欣' AS name
+                    ,CASE   WHEN a.identity_tagname REGEXP 'xng自动抓账号'
+                                AND a.identity_tagname REGEXP '小年糕爬虫'
+                                AND a.identity_tagname REGEXP '账号' THEN '小年糕'
+                            WHEN a.identity_tagname REGEXP '信欣'
+                                AND a.identity_tagname REGEXP '中青看点相关推荐'
+                                AND a.identity_tagname REGEXP '相关推荐' THEN '中青看点相关推荐'
+                            WHEN a.identity_tagname REGEXP '信欣'
+                                AND a.identity_tagname REGEXP '中青看点账号'
+                                AND a.identity_tagname REGEXP 'zhanghao' THEN '中青看点账号'
+                            WHEN a.identity_tagname REGEXP '信欣'
+                                AND a.identity_tagname REGEXP '中青看点推荐'
+                                AND a.identity_tagname REGEXP '中青看点' THEN '中青看点推荐'
+                            ELSE '未知渠道'
+                    END AS channel
+                    ,a.uid
+                    ,b.id AS videoid
+            FROM    (
+                        SELECT  *
+                        FROM    operators_channel_spider_base
+                    ) a
+            LEFT JOIN videoods.wx_video_per1h b
+            ON      a.uid = b.uid
+        ) 
+WHERE   channel <> '未知渠道'
+UNION
+SELECT  DISTINCT a.name
+        ,CASE   WHEN b.tags REGEXP '自制内容测试|自制内容搬运|AIGC自制内容' THEN '自制'
+                WHEN b.tags REGEXP '搬运工具' THEN '搬运工具'
+                WHEN b.tags REGEXP '搬运改造' THEN '搬运改造'
+                WHEN b.tags REGEXP '站内重发' THEN '站内重发'
+                WHEN b.tags REGEXP '搬运测试' THEN '搬运测试'
+                ELSE '搬运'
+        END AS type
+        ,a.uid
+        ,a.videoid
+FROM    (
+            SELECT  DISTINCT 'transport' AS type
+                    ,'王知微' AS name
+                    ,a.uid
+                    ,b.id AS videoid
+            FROM    (
+                        SELECT  *
+                        FROM    operators_channel_spider_base
+                    ) a
+            LEFT JOIN videoods.wx_video_per1h b
+            ON      a.uid = b.uid
+            WHERE   a.identity_tagname REGEXP 'transport'
+            AND     a.identity_tagname REGEXP '王知微'
+        ) a
+LEFT JOIN   (
+                SELECT  a.video_id AS videoid
+                        ,CONCAT_WS(',',COLLECT_SET(b.tag_name)) AS tags
+                FROM    (
+                            SELECT  video_id
+                                    ,tag_id
+                            FROM    videoods.wx_video_tag_rel_per1h
+                        ) a
+                LEFT JOIN   (
+                                SELECT  tag_id
+                                        ,tag_name
+                                FROM    videoods.wx_video_tag_per1h
+                            ) b
+                ON      a.tag_id = b.tag_id
+                GROUP BY video_id
+            ) b
+ON      a.videoid = b.videoid
+;

+ 48 - 0
production_code/loghubods.operators_channel_spider_dt.json

@@ -0,0 +1,48 @@
+{
+  "name": "operators_channel_spider_dt",
+  "project": "loghubods",
+  "comment": "",
+  "columns": [
+    {
+      "name": "name",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "channel",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "uid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "videoid",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "分区"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "分区"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1025579701,
+      "name": "爬虫产品负责人_分区"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.operators_channel_spider"
+  ]
+}

+ 24 - 0
production_code/loghubods.operators_channel_spider_dt.sql

@@ -0,0 +1,24 @@
+-- Task: 爬虫产品负责人_分区  ID: 1025579701  Type: ODPS_SQL
+--odps sql 
+--********************************************************************--
+--author:于卓异
+--create time:2025-04-28 11:13:49
+--********************************************************************--
+CREATE TABLE IF NOT EXISTS loghubods.operators_channel_spider_dt
+(
+    name     STRING
+    ,channel STRING
+    ,uid     STRING
+    ,videoid STRING
+)
+PARTITIONED BY 
+(
+    dt       STRING COMMENT '分区'
+)
+STORED AS ALIORC
+LIFECYCLE 60
+;
+
+INSERT OVERWRITE TABLE operators_channel_spider_dt PARTITION (dt = '${date}${hh}')
+SELECT  *
+FROM    operators_channel_spider

+ 59 - 0
production_code/loghubods.scene_type_vid_cf_feature_20250212.json

@@ -0,0 +1,59 @@
+{
+  "name": "scene_type_vid_cf_feature_20250212",
+  "project": "loghubods",
+  "comment": "01_场景和视频与视频之间的cf特征",
+  "columns": [
+    {
+      "name": "sence_type",
+      "type": "STRING",
+      "comment": "场景值"
+    },
+    {
+      "name": "vid_a",
+      "type": "STRING",
+      "comment": "视频ID"
+    },
+    {
+      "name": "vid_b",
+      "type": "STRING",
+      "comment": "视频ID"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023781540,
+      "name": "01_scenetype和vid的cf特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.alg_recsys_recall_cf_mid_ros_scenetype",
+    "loghubods.alg_recsys_recall_cf_mid_rov_scenetype"
+  ]
+}

+ 89 - 0
production_code/loghubods.scene_type_vid_cf_feature_20250212.sql

@@ -0,0 +1,89 @@
+-- Task: 01_scenetype和vid的cf特征_20250212  ID: 1023781540  Type: ODPS_SQL
+CREATE TABLE IF NOT EXISTS loghubods.scene_type_vid_cf_feature_20250212
+(
+    sence_type  STRING COMMENT '场景值'
+    ,vid_a      STRING COMMENT '视频ID'
+    ,vid_b      STRING COMMENT '视频ID'
+    ,feature    STRING COMMENT '特征JSON'
+)
+COMMENT '01_场景和视频与视频之间的cf特征'
+PARTITIONED BY 
+(
+    dt          STRING COMMENT '天'
+    ,hh         STRING COMMENT '小时'
+)
+LIFECYCLE 30
+;
+
+INSERT OVERWRITE TABLE loghubods.scene_type_vid_cf_feature_20250212 PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_cf_ros AS 
+(
+    SELECT  sence_type
+            ,vid AS vid_a
+            ,item AS vid_b
+            ,scores[i1] AS ros_cf_score
+            ,ranks[i1] AS ros_cf_rank
+            ,"0" AS rov_cf_score
+            ,"0" AS rov_cf_rank
+    FROM    (
+                SELECT  sence_type
+                        ,vid
+                        ,SPLIT(vids,",") AS vids
+                        ,SPLIT(scores,",") AS scores
+                        ,SPLIT(ranks,",") AS ranks
+                FROM    loghubods.alg_recsys_recall_cf_mid_ros_scenetype
+                WHERE   CONCAT(dt,hh) = '${dt}${hh}'
+            ) 
+    LATERAL VIEW POSEXPLODE(vids) t1 AS
+            i1
+            ,item
+)
+,t_cf_rov AS 
+(
+    SELECT  sence_type
+            ,vid AS vid_a
+            ,item AS vid_b
+            ,"0" AS ros_cf_score
+            ,"0" AS ros_cf_rank
+            ,scores[i1] AS rov_cf_score
+            ,ranks[i1] AS rov_cf_rank
+    FROM    (
+                SELECT  sence_type
+                        ,vid
+                        ,SPLIT(vids,",") AS vids
+                        ,SPLIT(scores,",") AS scores
+                        ,SPLIT(ranks,",") AS ranks
+                FROM    loghubods.alg_recsys_recall_cf_mid_rov_scenetype
+                WHERE   CONCAT(dt,hh) = '${dt}${hh}'
+            ) 
+    LATERAL VIEW POSEXPLODE(vids) t1 AS
+            i1
+            ,item
+)SELECT  sence_type
+        ,vid_a
+        ,vid_b
+        ,JSON_FORMAT(JSON_OBJECT(
+            "ros_cf_score",CAST(ros_cf_score AS STRING),
+            "ros_cf_rank",CAST(ros_cf_rank AS STRING),
+            "rov_cf_score",CAST(rov_cf_score AS STRING),
+            "rov_cf_rank",CAST(rov_cf_rank AS STRING)
+        ))
+FROM    (
+            SELECT  sence_type
+                    ,vid_a
+                    ,vid_b
+                    ,AVG(CAST(ros_cf_score AS DOUBLE)) AS ros_cf_score
+                    ,AVG(CAST(ros_cf_rank AS BIGINT)) AS ros_cf_rank
+                    ,AVG(CAST(rov_cf_score AS DOUBLE)) AS rov_cf_score
+                    ,AVG(CAST(rov_cf_rank AS BIGINT)) AS rov_cf_rank
+            FROM    (
+                        SELECT  *
+                        FROM    t_cf_ros
+                        UNION ALL
+                        SELECT  *
+                        FROM    t_cf_rov
+                    ) 
+            GROUP BY sence_type
+                     ,vid_a
+                     ,vid_b
+        )

+ 29 - 0
production_code/loghubods.tag_level_2_base.json

@@ -0,0 +1,29 @@
+{
+  "name": "tag_level_2_base",
+  "project": "loghubods",
+  "comment": "二级品类表",
+  "columns": [
+    {
+      "name": "title_duration",
+      "type": "STRING",
+      "comment": "去emoji标题"
+    },
+    {
+      "name": "tag_level_2",
+      "type": "STRING",
+      "comment": "二级品类"
+    }
+  ],
+  "partition_keys": [],
+  "dataworks_tasks": [
+    {
+      "id": 1020979778,
+      "name": "重复内容标签"
+    }
+  ],
+  "upstream_tables": [
+    "videoods.wx_video",
+    "videoods.wx_video_tag",
+    "videoods.wx_video_tag_rel"
+  ]
+}

+ 54 - 0
production_code/loghubods.tag_level_2_base.sql

@@ -0,0 +1,54 @@
+-- Task: 重复内容标签  ID: 1020979778  Type: ODPS_SQL
+--odps sql 
+--********************************************************************--
+--author:杜崇宇
+--create time:2024-09-25 17:09:30
+--********************************************************************--
+--CREATE TABLE IF NOT EXISTS loghubods.tag_level_2_base
+--(
+--    title_duration  STRING COMMENT '去emoji标题'
+--    ,tag_level_2 STRING COMMENT '二级品类'
+--)
+--COMMENT '二级品类表'
+-- ;
+--
+--DROP TABLE IF EXISTS  loghubods.tag_level_2_base;
+
+
+INSERT OVERWRITE TABLE loghubods.tag_level_2_base
+SELECT  DISTINCT title_duration
+        ,二级品类
+FROM    (
+            SELECT  title_duration
+                    ,二级品类
+                    ,ROW_NUMBER() OVER (PARTITION BY title_duration ORDER BY id DESC ) AS rank
+            FROM    (
+                        SELECT  DISTINCT a.id
+                                ,a.title
+                                ,CLEAR_TITLE_SIGNAL(a.title) AS clear_title
+                                ,a.total_time
+                                ,CONCAT(CLEAR_TITLE_SIGNAL(a.title),'-',a.total_time) AS title_duration
+                                ,SUBSTRING_INDEX(b.words_1,'-',-1) AS 二级品类
+                        FROM    videoods.wx_video a
+                        LEFT JOIN   (
+                                        SELECT  DISTINCT a.video_id AS videoid
+                                                ,b.tag_name AS words_1
+                                        FROM    (
+                                                    SELECT  video_id
+                                                            ,tag_id
+                                                    FROM    videoods.wx_video_tag_rel
+                                                ) a
+                                        LEFT JOIN   (
+                                                        SELECT  tag_id
+                                                                ,tag_name
+                                                        FROM    videoods.wx_video_tag
+                                                        WHERE   tag_name REGEXP '品类-'
+                                                    ) b
+                                        ON      a.tag_id = b.tag_id
+                                        HAVING  words_1 IS NOT NULL
+                                    ) b
+                        ON      a.id = b.videoid
+                        HAVING  二级品类 IS NOT NULL
+                    ) 
+        ) 
+WHERE   rank = 1

+ 53 - 0
production_code/loghubods.vid_click_cf_feature_20250212.json

@@ -0,0 +1,53 @@
+{
+  "name": "vid_click_cf_feature_20250212",
+  "project": "loghubods",
+  "comment": "02_视频点击事件的CF特征,使用详情页曝光和回流页曝光当作点击事件",
+  "columns": [
+    {
+      "name": "vid_a",
+      "type": "STRING",
+      "comment": "视频ID"
+    },
+    {
+      "name": "vid_b",
+      "type": "STRING",
+      "comment": "视频ID"
+    },
+    {
+      "name": "feature",
+      "type": "STRING",
+      "comment": "特征JSON"
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "天"
+    },
+    {
+      "name": "hh",
+      "type": "STRING",
+      "comment": "小时"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1023781551,
+      "name": "02_基于用户点击的CF特征_20250212"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.dwd_recsys_alg_exposure_base_20250108"
+  ]
+}

+ 129 - 0
production_code/loghubods.vid_click_cf_feature_20250212.sql

@@ -0,0 +1,129 @@
+-- Task: 02_基于用户点击的CF特征_20250212  ID: 1023781551  Type: ODPS_SQL
+CREATE TABLE IF NOT EXISTS loghubods.vid_click_cf_feature_20250212
+(
+    vid_a   STRING COMMENT '视频ID'
+    ,vid_b   STRING COMMENT '视频ID'
+    ,feature STRING COMMENT '特征JSON'
+)
+COMMENT '02_视频点击事件的CF特征,使用详情页曝光和回流页曝光当作点击事件'
+PARTITIONED BY 
+(
+    dt       STRING COMMENT '天'
+    ,hh      STRING COMMENT '小时'
+)
+LIFECYCLE 30
+;
+
+INSERT OVERWRITE TABLE loghubods.vid_click_cf_feature_20250212 PARTITION (dt = '${dt}',hh = '${hh}')
+WITH t_return AS 
+(
+    SELECT  mid
+            ,vid
+    FROM    loghubods.dwd_recsys_alg_exposure_base_20250108
+    WHERE   CONCAT(dt,hh) BETWEEN TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 24 * 7),'YYYYMMDDHH') AND TO_CHAR(FROM_UNIXTIME(UNIX_TIMESTAMP(TO_DATE('${dt}${hh}','YYYYMMDDHH')) - 3600 * 1),'YYYYMMDDHH')
+    AND     apptype NOT IN ("5","19","22","23","12")
+    AND     page IN ("回流页","详情页")
+    GROUP BY mid
+             ,vid
+)
+,t_vid_hot AS 
+(
+    SELECT  vid
+            ,COUNT(DISTINCT mid) AS hots
+    FROM    t_return
+    GROUP BY vid
+)
+,t_mid_hot AS 
+(
+    SELECT  mid
+            ,COUNT(DISTINCT vid) AS hots
+            ,1 / LN(1 + COUNT(DISTINCT vid)) AS mid_hot_score
+    FROM    t_return
+    GROUP BY mid
+)
+,t_pair AS 
+(
+    SELECT  ta.mid
+            ,ta.vid AS vid_left
+            ,tb.vid AS vid_right
+    FROM    t_return ta
+    JOIN    t_return tb
+    ON      ta.mid = tb.mid
+    AND     ta.vid <> tb.vid
+)
+,t_pair_hot AS 
+(
+    SELECT  vid_left
+            ,vid_right
+            ,COUNT(DISTINCT mid) AS onlines
+            ,ROW_NUMBER() OVER (PARTITION BY vid_left ORDER BY COUNT(DISTINCT mid) + rand() / 10 DESC ) AS rn
+    FROM    t_pair
+    GROUP BY vid_left
+             ,vid_right
+)
+,t_pair_candidate AS 
+(
+    SELECT  ta.vid_left
+            ,ta.vid_right
+            ,ta.mid
+            ,tb.onlines
+    FROM    t_pair ta
+    JOIN    (
+                SELECT  vid_left
+                        ,vid_right
+                        ,onlines
+                FROM    t_pair_hot
+                WHERE   rn <= 100
+            ) tb
+    ON      ta.vid_left = tb.vid_left
+    AND     ta.vid_right = tb.vid_right
+)
+,t_pair_candidate_score AS 
+(
+    SELECT  ta.vid_left
+            ,ta.vid_right
+            ,ta.mid
+            ,ta.onlines
+            ,COALESCE(tb.mid_hot_score,0) AS mid_hot_score
+    FROM    t_pair_candidate ta
+    LEFT JOIN t_mid_hot tb
+    ON      ta.mid = tb.mid
+)
+,t_result AS 
+(
+    SELECT  ta.vid_left
+            ,ta.vid_right
+            ,ta.onlines
+            ,ta.mid_hot_score
+            ,COALESCE(tb.hots,0) AS vid_left_hots
+            ,COALESCE(tc.hots,0) AS vid_right_hots
+            ,ta.mid_hot_score / (COALESCE(tb.hots,0) + COALESCE(tc.hots,0) - ta.onlines) AS score
+    FROM    (
+                SELECT  vid_left
+                        ,vid_right
+                        ,AVG(onlines) AS onlines
+                        ,SUM(mid_hot_score) AS mid_hot_score
+                FROM    t_pair_candidate_score
+                GROUP BY vid_left
+                         ,vid_right
+            ) ta
+    LEFT JOIN t_vid_hot tb
+    ON      ta.vid_left = tb.vid
+    LEFT JOIN t_vid_hot tc
+    ON      ta.vid_right = tc.vid
+)
+,t_recall AS 
+(
+    SELECT  vid_left AS vid_a
+            ,vid_right AS vid_b
+            ,JSON_FORMAT(
+                        JSON_OBJECT("score",CAST(ROUND(score,8) AS STRING),"rank",CAST(rn AS STRING),"onlines",CAST(onlines AS STRING))
+            )
+    FROM    (
+                SELECT  *
+                        ,ROW_NUMBER() OVER (PARTITION BY vid_left ORDER BY score DESC ) AS rn
+                FROM    t_result
+            ) 
+)SELECT  *
+FROM    t_recall
+;

+ 32 - 0
production_code/loghubods.vid_festive_labels.json

@@ -0,0 +1,32 @@
+{
+  "name": "vid_festive_labels",
+  "project": "loghubods",
+  "comment": "",
+  "columns": [
+    {
+      "name": "videoid",
+      "type": "BIGINT",
+      "comment": "视频ID"
+    },
+    {
+      "name": "first_labels",
+      "type": "STRING",
+      "comment": "一级标签"
+    },
+    {
+      "name": "secondary_labels",
+      "type": "STRING",
+      "comment": "二级标签"
+    }
+  ],
+  "partition_keys": [],
+  "dataworks_tasks": [
+    {
+      "id": 1019838087,
+      "name": "节日标签视频库"
+    }
+  ],
+  "upstream_tables": [
+    "videoods.wx_video_per1h"
+  ]
+}

+ 237 - 0
production_code/loghubods.vid_festive_labels.sql

@@ -0,0 +1,237 @@
+-- Task: 节日标签视频库  ID: 1019838087  Type: ODPS_SQL
+--odps sql 
+--********************************************************************--
+--author:于卓异
+--create time:2024-08-29 17:16:57
+--********************************************************************--
+CREATE TABLE IF NOT EXISTS vid_festive_labels(
+  videoid BIGINT COMMENT '视频ID',
+   first_labels STRING COMMENT '一级标签',
+   secondary_labels STRING COMMENT '二级标签'
+) 
+STORED AS ALIORC ;
+
+
+
+
+
+
+INSERT INTO  TABLE vid_festive_labels 
+SELECT  videoid
+        ,CASE   WHEN secondary_labels IN ('元旦','腊八节','小年','除夕','春节','情人节','元宵节','龙抬头','妇女节','劳动节','母亲节','儿童节','端午节','父亲节','建党节','七七事变','建军节','七夕节','中元节','中秋节','毛主席逝世','国庆节','重阳节','感恩节','公祭日','平安夜','圣诞节','毛主席诞辰','初一','初二','初三','初四','初五','初六','初七','初八','初九','初十') THEN '节假日'
+                WHEN secondary_labels IN ('小寒','大寒','立春','雨水','惊蛰','春分','清明','谷雨','立夏','小满','芒种','夏至','小暑','大暑','立秋','处暑','白露','秋分','寒露','霜降','立冬','小雪','大雪','冬至') THEN '节气'
+                WHEN secondary_labels IN ('早上好','中午好','下午好','晚上好','晚安') THEN '问候语'
+                WHEN secondary_labels IN ('祝福') THEN '祝福语'
+                WHEN secondary_labels IN ('孙中山诞辰','孙中山逝世','周恩来诞辰','周恩来逝世','邓小平诞辰','邓小平逝世','李克强诞辰','李克强逝世','袁隆平诞辰','袁隆平逝世','彭德怀诞辰','彭德怀逝世','朱德诞辰','朱德逝世','吴尊友逝世') THEN '人物'
+                WHEN secondary_labels IN ('两会','315国际消费者权益日','四龙日') THEN '热点'
+        END AS first_labels
+        ,secondary_labels
+FROM    (
+            SELECT  id AS videoid
+                    ,title
+                    ,CASE   WHEN title REGEXP '元旦' THEN '元旦'
+                            WHEN title REGEXP '腊八' THEN '腊八节'
+                            WHEN title REGEXP '小年' THEN '小年'
+                            WHEN title REGEXP '除夕' THEN '除夕'
+                            WHEN title REGEXP '春节|新春佳节|新年|过年|大年' THEN '春节'
+                            WHEN title REGEXP '初一|迎春日|开门红|新春' THEN '初一'
+                            WHEN title REGEXP '初二|犯娘日|迎婿日|回娘家|祭财神' THEN '初二'
+                            WHEN title REGEXP '初三|赤狗日|福神' THEN '初三'
+                            WHEN title REGEXP '初四|迎接神灵日|灶神|喜神|五路|扔穷' THEN '初四'
+                            WHEN title REGEXP '初五|破五|迎财神|送穷神|财神日|五穷' THEN '初五'
+                            WHEN title REGEXP '初六' THEN '初六'
+                            WHEN title REGEXP '初七' THEN '初七'
+                            WHEN title REGEXP '初八' THEN '初八'
+                            WHEN title REGEXP '初九' THEN '初九'
+                            WHEN title REGEXP '初十' THEN '初十'
+                            WHEN title REGEXP '情人节' THEN '情人节'
+                            WHEN title REGEXP '元宵|正月十五' THEN '元宵节'
+                            WHEN title REGEXP '龙抬头|二月二|2月2|剃头日|春耕节|农事节|青龙节|春龙节|龙头节' THEN '龙抬头'
+                            WHEN title REGEXP '妇女节|三八|38节|38节快乐|38节祝福|女神节|女王节' THEN '妇女节'
+                            WHEN title REGEXP '5.1|5·1|五一|劳动节|国际示威游行日|51' THEN '劳动节'
+                            WHEN title REGEXP '母亲节' THEN '母亲节'
+                            WHEN title REGEXP '儿童节|六一' THEN '儿童节'
+                            WHEN title REGEXP '端午' THEN '端午节'
+                            WHEN title REGEXP '父亲节' THEN '父亲节'
+                            WHEN title REGEXP '七一|建党' THEN '建党节'
+                            WHEN title REGEXP '七七事变' THEN '七七事变'
+                            WHEN title REGEXP '建军节|八一' THEN '建军节'
+                            WHEN title REGEXP '七夕' THEN '七夕节'
+                            WHEN title REGEXP '鬼节|中元节' THEN '中元节'
+                            WHEN title REGEXP '中秋|元旦将至' THEN '中秋节'
+                            WHEN title REGEXP '毛主席逝世' THEN '毛主席逝世'
+                            WHEN title REGEXP '国庆' THEN '国庆节'
+                            WHEN title REGEXP '重阳' THEN '重阳节'
+                            WHEN title REGEXP '感恩节' THEN '感恩节'
+                            WHEN title REGEXP '公祭日|南京大屠杀' THEN '公祭日'
+                            WHEN title REGEXP '平安夜' THEN '平安夜'
+                            WHEN title REGEXP '圣诞' THEN '圣诞节'
+                            WHEN title REGEXP '毛主席诞辰' THEN '毛主席诞辰'
+                            WHEN title REGEXP '小寒' THEN '小寒'
+                            WHEN title REGEXP '大寒' THEN '大寒'
+                            WHEN title REGEXP '立春' THEN '立春'
+                            WHEN title REGEXP '雨水' THEN '雨水'
+                            WHEN title REGEXP '惊蛰' THEN '惊蛰'
+                            WHEN title REGEXP '春分' THEN '春分'
+                            WHEN title REGEXP '清明|踏青节|踏青节|行清节|三月节|祭祖' THEN '清明'
+                            WHEN title REGEXP '谷雨' THEN '谷雨'
+                            WHEN title REGEXP '立夏' THEN '立夏'
+                            WHEN title REGEXP '小满' THEN '小满'
+                            WHEN title REGEXP '芒种' THEN '芒种'
+                            WHEN title REGEXP '夏至' THEN '夏至'
+                            WHEN title REGEXP '小暑' THEN '小暑'
+                            WHEN title REGEXP '大暑' THEN '大暑'
+                            WHEN title REGEXP '立秋' THEN '立秋'
+                            WHEN title REGEXP '处暑' THEN '处暑'
+                            WHEN title REGEXP '白露' THEN '白露'
+                            WHEN title REGEXP '秋分' THEN '秋分'
+                            WHEN title REGEXP '寒露' THEN '寒露'
+                            WHEN title REGEXP '霜降' THEN '霜降'
+                            WHEN title REGEXP '立冬' THEN '立冬'
+                            WHEN title REGEXP '小雪' THEN '小雪'
+                            WHEN title REGEXP '大雪' THEN '大雪'
+                            WHEN title REGEXP '冬至' THEN '冬至'
+                            WHEN title REGEXP '早晨|早上|清晨' THEN '早上好'
+                            WHEN title REGEXP '中午好|中午' THEN '中午好'
+                            WHEN title REGEXP '下午好|下午' THEN '下午好'
+                            WHEN title REGEXP '晚上好|晚上' THEN '晚上好'
+                            WHEN title REGEXP '晚安' THEN '晚安'
+                            WHEN title REGEXP '祝福|祝愿|祝你|祝贺|祝大家|祝您|祝好运|祝群主|祝朋友' THEN '祝福'
+                            WHEN title REGEXP '孙中山诞辰' THEN '孙中山诞辰'
+                            WHEN title REGEXP '孙中山逝世' THEN '孙中山逝世'
+                            WHEN title REGEXP '周恩来诞辰' THEN '周恩来诞辰'
+                            WHEN title REGEXP '周恩来逝世' THEN '周恩来逝世'
+                            WHEN title REGEXP '邓小平诞辰' THEN '邓小平诞辰'
+                            WHEN title REGEXP '邓小平逝世' THEN '邓小平逝世'
+                            WHEN title REGEXP '李克强诞辰' THEN '李克强诞辰'
+                            WHEN title REGEXP '李克强逝世' THEN '李克强逝世'
+                            WHEN title REGEXP '袁隆平诞辰' THEN '袁隆平诞辰'
+                            WHEN title REGEXP '袁隆平逝世' THEN '袁隆平逝世'
+                            WHEN title REGEXP '彭德怀诞辰' THEN '彭德怀诞辰'
+                            WHEN title REGEXP '彭德怀逝世' THEN '彭德怀逝世'
+                            WHEN title REGEXP '朱德诞辰' THEN '朱德诞辰'
+                            WHEN title REGEXP '朱德逝世' THEN '朱德逝世'
+                            WHEN title REGEXP '吴尊友逝世' THEN '吴尊友逝世'
+                            WHEN title REGEXP '两会|人大代表|人民代表大会|政协' THEN '两会'
+                            WHEN title REGEXP '315|消费者|三一五|3·15|打假日' THEN '315国际消费者权益日'
+                            WHEN title REGEXP '四龙日|龙年龙月龙日龙时' THEN '四龙日'
+                    END AS secondary_labels
+                    ,gmt_create
+                    ,recommend_status
+            FROM    videoods.wx_video_per1h
+            WHERE   title NOT REGEXP '2018|2019|2020|2021|2022|2023|鼠年|牛年|虎年|兔年|小年糕|小年轻' --AND     TO_CHAR(gmt_create,'YYYYMMDDHH') = '${datetime}${hhl}' 
+            AND     TO_CHAR(gmt_create,'YYYYMMDDHH') = '${datetime}${hhl}'
+            --AND     TO_CHAR(gmt_create,'YYYYMMDDHH') <= '${datetime}${hhl}'
+            AND     recommend_status IN (-6,-7)
+        ) 
+        where '${hhl}'<>23
+;
+INSERT INTO TABLE vid_festive_labels 
+SELECT  videoid
+        ,CASE   WHEN secondary_labels IN ('元旦','腊八节','小年','除夕','春节','情人节','元宵节','龙抬头','妇女节','劳动节','母亲节','儿童节','端午节','父亲节','建党节','七七事变','建军节','七夕节','中元节','中秋节','毛主席逝世','国庆节','重阳节','感恩节','公祭日','平安夜','圣诞节','毛主席诞辰','初一','初二','初三','初四','初五','初六','初七','初八','初九','初十') THEN '节假日'
+                WHEN secondary_labels IN ('小寒','大寒','立春','雨水','惊蛰','春分','清明','谷雨','立夏','小满','芒种','夏至','小暑','大暑','立秋','处暑','白露','秋分','寒露','霜降','立冬','小雪','大雪','冬至') THEN '节气'
+                WHEN secondary_labels IN ('早上好','中午好','下午好','晚上好','晚安') THEN '问候语'
+                WHEN secondary_labels IN ('祝福') THEN '祝福语'
+                WHEN secondary_labels IN ('孙中山诞辰','孙中山逝世','周恩来诞辰','周恩来逝世','邓小平诞辰','邓小平逝世','李克强诞辰','李克强逝世','袁隆平诞辰','袁隆平逝世','彭德怀诞辰','彭德怀逝世','朱德诞辰','朱德逝世','吴尊友逝世') THEN '人物'
+                WHEN secondary_labels IN ('两会','315国际消费者权益日','四龙日') THEN '热点'
+        END AS first_labels
+        ,secondary_labels
+FROM    (
+            SELECT  id AS videoid
+                    ,title
+                    ,CASE   WHEN title REGEXP '元旦' THEN '元旦'
+                            WHEN title REGEXP '腊八' THEN '腊八节'
+                            WHEN title REGEXP '小年' THEN '小年'
+                            WHEN title REGEXP '除夕' THEN '除夕'
+                            WHEN title REGEXP '春节|新春佳节|新年|过年|大年' THEN '春节'
+                            WHEN title REGEXP '初一|迎春日|开门红|新春' THEN '初一'
+                            WHEN title REGEXP '初二|犯娘日|迎婿日|回娘家|祭财神' THEN '初二'
+                            WHEN title REGEXP '初三|赤狗日|福神' THEN '初三'
+                            WHEN title REGEXP '初四|迎接神灵日|灶神|喜神|五路|扔穷' THEN '初四'
+                            WHEN title REGEXP '初五|破五|迎财神|送穷神|财神日|五穷' THEN '初五'
+                            WHEN title REGEXP '初六' THEN '初六'
+                            WHEN title REGEXP '初七' THEN '初七'
+                            WHEN title REGEXP '初八' THEN '初八'
+                            WHEN title REGEXP '初九' THEN '初九'
+                            WHEN title REGEXP '初十' THEN '初十'
+                            WHEN title REGEXP '情人节' THEN '情人节'
+                            WHEN title REGEXP '元宵|正月十五' THEN '元宵节'
+                            WHEN title REGEXP '龙抬头|二月二|2月2|剃头日|春耕节|农事节|青龙节|春龙节|龙头节' THEN '龙抬头'
+                            WHEN title REGEXP '妇女节|三八|38节|38节快乐|38节祝福|女神节|女王节' THEN '妇女节'
+                            WHEN title REGEXP '5.1|5·1|五一|劳动节|国际示威游行日|51' THEN '劳动节'
+                            WHEN title REGEXP '母亲节' THEN '母亲节'
+                            WHEN title REGEXP '儿童节|六一' THEN '儿童节'
+                            WHEN title REGEXP '端午' THEN '端午节'
+                            WHEN title REGEXP '父亲节' THEN '父亲节'
+                            WHEN title REGEXP '七一|建党' THEN '建党节'
+                            WHEN title REGEXP '七七事变' THEN '七七事变'
+                            WHEN title REGEXP '建军节|八一' THEN '建军节'
+                            WHEN title REGEXP '七夕' THEN '七夕节'
+                            WHEN title REGEXP '鬼节|中元节' THEN '中元节'
+                            WHEN title REGEXP '中秋|元旦将至' THEN '中秋节'
+                            WHEN title REGEXP '毛主席逝世' THEN '毛主席逝世'
+                            WHEN title REGEXP '国庆' THEN '国庆节'
+                            WHEN title REGEXP '重阳' THEN '重阳节'
+                            WHEN title REGEXP '感恩节' THEN '感恩节'
+                            WHEN title REGEXP '公祭日|南京大屠杀' THEN '公祭日'
+                            WHEN title REGEXP '平安夜' THEN '平安夜'
+                            WHEN title REGEXP '圣诞' THEN '圣诞节'
+                            WHEN title REGEXP '毛主席诞辰' THEN '毛主席诞辰'
+                            WHEN title REGEXP '小寒' THEN '小寒'
+                            WHEN title REGEXP '大寒' THEN '大寒'
+                            WHEN title REGEXP '立春' THEN '立春'
+                            WHEN title REGEXP '雨水' THEN '雨水'
+                            WHEN title REGEXP '惊蛰' THEN '惊蛰'
+                            WHEN title REGEXP '春分' THEN '春分'
+                            WHEN title REGEXP '清明|踏青节|踏青节|行清节|三月节|祭祖' THEN '清明'
+                            WHEN title REGEXP '谷雨' THEN '谷雨'
+                            WHEN title REGEXP '立夏' THEN '立夏'
+                            WHEN title REGEXP '小满' THEN '小满'
+                            WHEN title REGEXP '芒种' THEN '芒种'
+                            WHEN title REGEXP '夏至' THEN '夏至'
+                            WHEN title REGEXP '小暑' THEN '小暑'
+                            WHEN title REGEXP '大暑' THEN '大暑'
+                            WHEN title REGEXP '立秋' THEN '立秋'
+                            WHEN title REGEXP '处暑' THEN '处暑'
+                            WHEN title REGEXP '白露' THEN '白露'
+                            WHEN title REGEXP '秋分' THEN '秋分'
+                            WHEN title REGEXP '寒露' THEN '寒露'
+                            WHEN title REGEXP '霜降' THEN '霜降'
+                            WHEN title REGEXP '立冬' THEN '立冬'
+                            WHEN title REGEXP '小雪' THEN '小雪'
+                            WHEN title REGEXP '大雪' THEN '大雪'
+                            WHEN title REGEXP '冬至' THEN '冬至'
+                            WHEN title REGEXP '早晨|早上|清晨' THEN '早上好'
+                            WHEN title REGEXP '中午好|中午' THEN '中午好'
+                            WHEN title REGEXP '下午好|下午' THEN '下午好'
+                            WHEN title REGEXP '晚上好|晚上' THEN '晚上好'
+                            WHEN title REGEXP '晚安' THEN '晚安'
+                            WHEN title REGEXP '祝福|祝愿|祝你|祝贺|祝大家|祝您|祝好运|祝群主|祝朋友' THEN '祝福'
+                            WHEN title REGEXP '孙中山诞辰' THEN '孙中山诞辰'
+                            WHEN title REGEXP '孙中山逝世' THEN '孙中山逝世'
+                            WHEN title REGEXP '周恩来诞辰' THEN '周恩来诞辰'
+                            WHEN title REGEXP '周恩来逝世' THEN '周恩来逝世'
+                            WHEN title REGEXP '邓小平诞辰' THEN '邓小平诞辰'
+                            WHEN title REGEXP '邓小平逝世' THEN '邓小平逝世'
+                            WHEN title REGEXP '李克强诞辰' THEN '李克强诞辰'
+                            WHEN title REGEXP '李克强逝世' THEN '李克强逝世'
+                            WHEN title REGEXP '袁隆平诞辰' THEN '袁隆平诞辰'
+                            WHEN title REGEXP '袁隆平逝世' THEN '袁隆平逝世'
+                            WHEN title REGEXP '彭德怀诞辰' THEN '彭德怀诞辰'
+                            WHEN title REGEXP '彭德怀逝世' THEN '彭德怀逝世'
+                            WHEN title REGEXP '朱德诞辰' THEN '朱德诞辰'
+                            WHEN title REGEXP '朱德逝世' THEN '朱德逝世'
+                            WHEN title REGEXP '吴尊友逝世' THEN '吴尊友逝世'
+                            WHEN title REGEXP '两会|人大代表|人民代表大会|政协' THEN '两会'
+                            WHEN title REGEXP '315|消费者|三一五|3·15|打假日' THEN '315国际消费者权益日'
+                            WHEN title REGEXP '四龙日|龙年龙月龙日龙时' THEN '四龙日'
+                    END AS secondary_labels
+                    ,gmt_create
+                    ,recommend_status
+            FROM    videoods.wx_video_per1h
+            WHERE   title NOT REGEXP '2018|2019|2020|2021|2022|2023|鼠年|牛年|虎年|兔年|小年糕|小年轻' --AND     TO_CHAR(gmt_create,'YYYYMMDDHH') = '${datetime}${hhl}' 
+            AND     TO_CHAR(gmt_create,'YYYYMMDDHH') = '${datetime-1}${hhl}'
+            AND     recommend_status IN (-6,-7)
+        ) 
+        where '${hhl}'=23
+;

+ 109 - 0
production_code/loghubods.video_ai_tags.json

@@ -0,0 +1,109 @@
+{
+  "name": "video_ai_tags",
+  "project": "loghubods",
+  "comment": "AI内容标签",
+  "columns": [
+    {
+      "name": "type",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "videoid",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "title",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "total_time",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "title_duration",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "video_path",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tags",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_1",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_name_1",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_score_1",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "tag_2",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_name_2",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_score_2",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "tag_3",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_name_3",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "tag_score_3",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "分区"
+    }
+  ],
+  "partition_keys": [
+    {
+      "name": "dt",
+      "type": "STRING",
+      "comment": "分区"
+    }
+  ],
+  "dataworks_tasks": [
+    {
+      "id": 1021080500,
+      "name": "视频标签_单内容"
+    }
+  ],
+  "upstream_tables": [
+    "loghubods.result_log",
+    "videoods.wx_video_per1h"
+  ]
+}

+ 83 - 0
production_code/loghubods.video_ai_tags.sql

@@ -0,0 +1,83 @@
+-- Task: 视频标签_单内容  ID: 1021080500  Type: ODPS_SQL
+--odps sql 
+--********************************************************************--
+--author:杜崇宇
+--create time:2024-09-27 15:18:24
+--********************************************************************--
+--重复内容标签
+--DROP TABLE IF EXISTS loghubods.video_ai_tags;
+--CREATE TABLE IF NOT EXISTS loghubods.video_ai_tags
+--(
+--    type            STRING COMMENT ''
+--    ,videoid        BIGINT COMMENT ''
+--    ,title          STRING COMMENT ''
+--    ,total_time     BIGINT COMMENT ''
+--    ,title_duration STRING COMMENT ''
+--    ,video_path     STRING COMMENT ''
+--    ,tags           STRING COMMENT ''
+--    ,tag_1          STRING COMMENT ''
+--    ,tag_name_1     STRING COMMENT ''
+--    ,tag_score_1    BIGINT COMMENT ''
+--    ,tag_2          STRING COMMENT ''
+--    ,tag_name_2     STRING COMMENT ''
+--    ,tag_score_2    BIGINT COMMENT ''
+--    ,tag_3          STRING COMMENT ''
+--    ,tag_name_3     STRING COMMENT ''
+--    ,tag_score_3    BIGINT COMMENT ''
+--)
+--COMMENT 'AI内容标签'
+--PARTITIONED BY 
+--(
+--    dt              STRING COMMENT '分区'
+--)
+--LIFECYCLE 365
+--;
+--INSERT OVERWRITE TABLE loghubods.video_ai_tags PARTITION (dt = '${today}${hh}')
+--SELECT  DISTINCT type AS type
+--        ,video_id
+--        ,a.video_title
+--        ,b.total_time
+--        ,CONCAT(CLEAR_TITLE_SIGNAL(b.title),'-',b.total_time) AS title_duration
+--        ,video_url
+--        ,GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类') AS tags
+--        ,GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[0]') AS tag_1
+--        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[0]'),'、')[0],'品类-','') AS tag_name_1
+--        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[0]'),'、')[1],'分数-','') AS tag_score_1
+--        ,GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[1]') AS tag_2
+--        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[1]'),'、')[0],'品类-','') AS tag_name_2
+--        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[1]'),'、')[1],'分数-','') AS tag_score_2
+--        ,GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[2]') AS tag_3
+--        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[2]'),'、')[0],'品类-','') AS tag_name_3
+--        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[2]'),'、')[1],'分数-','') AS tag_score_3
+--FROM    loghubods.video_tag_info_per5min a
+--LEFT JOIN videoods.wx_video_per1h b
+--ON      a.video_id = b.id
+--WHERE   dt >= '${today}${hh}00'
+--AND     dt <= '${today}${hh}55'
+--HAVING  tags IS NOT NULL
+
+
+INSERT OVERWRITE TABLE loghubods.video_ai_tags PARTITION (dt = '${today}${hh}')
+
+
+SELECT  DISTINCT type AS type
+        ,video_id
+        ,a.video_title
+        ,b.total_time
+        ,CONCAT(CLEAR_TITLE_SIGNAL(b.title),'-',b.total_time) AS title_duration
+        ,video_url
+        ,GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类') AS tags
+        ,GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[0]') AS tag_1
+        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[0]'),'、')[0],'品类-','') AS tag_name_1
+        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[0]'),'、')[1],'分数-','') AS tag_score_1
+        ,GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[1]') AS tag_2
+        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[1]'),'、')[0],'品类-','') AS tag_name_2
+        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[1]'),'、')[1],'分数-','') AS tag_score_2
+        ,GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[2]') AS tag_3
+        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[2]'),'、')[0],'品类-','') AS tag_name_3
+        ,REGEXP_REPLACE(SPLIT(GET_JSON_OBJECT(data,'$.八、时效性与分类.二级分类[2]'),'、')[1],'分数-','') AS tag_score_3
+FROM    loghubods.result_log a
+LEFT JOIN videoods.wx_video_per1h b
+ON      a.video_id = b.id
+WHERE   dt = '${today}${hh}'
+HAVING  tags IS NOT NULL

+ 233 - 0
production_code/videoods.dim_video.json

@@ -0,0 +1,233 @@
+{
+  "name": "dim_video",
+  "project": "videoods",
+  "comment": "",
+  "columns": [
+    {
+      "name": "videoid",
+      "type": "BIGINT",
+      "comment": "视频id"
+    },
+    {
+      "name": "title",
+      "type": "STRING",
+      "comment": "视频标题"
+    },
+    {
+      "name": "distrubute_title",
+      "type": "STRING",
+      "comment": "视频分发标题"
+    },
+    {
+      "name": "video_category",
+      "type": "STRING",
+      "comment": "视频种类"
+    },
+    {
+      "name": "tag_name",
+      "type": "STRING",
+      "comment": "视频一级标签"
+    },
+    {
+      "name": "gmt_create",
+      "type": "DATETIME",
+      "comment": "上传时间  "
+    },
+    {
+      "name": "tags",
+      "type": "STRING",
+      "comment": "视频内容标签"
+    },
+    {
+      "name": "uid",
+      "type": "BIGINT",
+      "comment": "生产者id"
+    },
+    {
+      "name": "video_edit",
+      "type": "STRING",
+      "comment": "视频审核状态"
+    },
+    {
+      "name": "video_data_stat",
+      "type": "STRING",
+      "comment": "视频的数据状态"
+    },
+    {
+      "name": "video_recommend",
+      "type": "STRING",
+      "comment": "视频可搜状态"
+    },
+    {
+      "name": "is_img",
+      "type": "STRING",
+      "comment": "视频分发封面"
+    },
+    {
+      "name": "existence_days",
+      "type": "DOUBLE",
+      "comment": "发布距今天数"
+    },
+    {
+      "name": "video_url",
+      "type": "STRING",
+      "comment": "后台视频链接"
+    },
+    {
+      "name": "total_time",
+      "type": "BIGINT",
+      "comment": "视频时长"
+    },
+    {
+      "name": "play_count",
+      "type": "BIGINT",
+      "comment": "播放人数"
+    },
+    {
+      "name": "play_count_total",
+      "type": "BIGINT",
+      "comment": "播放次数"
+    },
+    {
+      "name": "total_reward",
+      "type": "DOUBLE",
+      "comment": "赞赏总金额"
+    },
+    {
+      "name": "currentday_reward",
+      "type": "DOUBLE",
+      "comment": "当日赞赏金额"
+    },
+    {
+      "name": "total_reward_times",
+      "type": "BIGINT",
+      "comment": "赞赏次数"
+    },
+    {
+      "name": "reward_person",
+      "type": "BIGINT",
+      "comment": "赞赏人数"
+    },
+    {
+      "name": "total_price",
+      "type": "BIGINT",
+      "comment": "付费总金额"
+    },
+    {
+      "name": "currentday_price",
+      "type": "BIGINT",
+      "comment": "当日付费金额"
+    },
+    {
+      "name": "total_price_times",
+      "type": "BIGINT",
+      "comment": "付费次数"
+    },
+    {
+      "name": "total_price_person",
+      "type": "BIGINT",
+      "comment": "付费人数"
+    },
+    {
+      "name": "app_recommend_status",
+      "type": "STRING",
+      "comment": "app推荐状态"
+    },
+    {
+      "name": "charge",
+      "type": "STRING",
+      "comment": "收费状态"
+    },
+    {
+      "name": "is_pwd",
+      "type": "STRING",
+      "comment": "是否加密"
+    },
+    {
+      "name": "width",
+      "type": "BIGINT",
+      "comment": "视频宽度"
+    },
+    {
+      "name": "height",
+      "type": "BIGINT",
+      "comment": "视频高度"
+    },
+    {
+      "name": "tag_name2",
+      "type": "STRING",
+      "comment": "二级标签"
+    },
+    {
+      "name": "tag_name3",
+      "type": "STRING",
+      "comment": "三级标签"
+    },
+    {
+      "name": "if_create",
+      "type": "STRING",
+      "comment": "是否为创作视频"
+    },
+    {
+      "name": "if_change",
+      "type": "STRING",
+      "comment": "是否再创作"
+    },
+    {
+      "name": "app_type",
+      "type": "BIGINT",
+      "comment": "发布端"
+    },
+    {
+      "name": "descr",
+      "type": "STRING",
+      "comment": "视频简介"
+    },
+    {
+      "name": "video_type",
+      "type": "STRING",
+      "comment": "视频创作类型"
+    },
+    {
+      "name": "if_in_youthcommunity",
+      "type": "STRING",
+      "comment": "是否进入青年社区"
+    },
+    {
+      "name": "if_in_agedcommunity",
+      "type": "STRING",
+      "comment": "是否进入老年社区"
+    },
+    {
+      "name": "topic_name",
+      "type": "STRING",
+      "comment": "话题名称"
+    }
+  ],
+  "partition_keys": [],
+  "dataworks_tasks": [
+    {
+      "id": 1000579613,
+      "name": "视频维度表"
+    }
+  ],
+  "upstream_tables": [
+    "videoods.every_video_status_category",
+    "videoods.if_create_video",
+    "videoods.topic",
+    "videoods.total_price_video",
+    "videoods.total_reward_video",
+    "videoods.video_repository_type",
+    "videoods.video_topic",
+    "videoods.videoid_second_tag_name",
+    "videoods.videoid_third_tag_name",
+    "videoods.videoid_top_tag_name",
+    "videoods.wx_video",
+    "videoods.wx_video_detail",
+    "videoods.wx_video_pwd",
+    "videoods.wx_video_recommend_ext",
+    "videoods.wx_video_status",
+    "videoods.wx_video_tag",
+    "videoods.wx_video_tag_rel"
+  ]
+}

+ 431 - 0
production_code/videoods.dim_video.sql

@@ -0,0 +1,431 @@
+-- Task: 视频维度表  ID: 1000579613  Type: ODPS_SQL
+--@exclude_input=total_price_video
+--@exclude_output=total_price_video
+--@exclude_input=total_reward_video
+--@exclude_output=total_reward_video
+--@exclude_input=every_video_status_category
+--@exclude_output=every_video_status_category
+--@exclude_input=videoid_top_tag_name
+--@exclude_output=videoid_top_tag_name
+--odps sql 
+--********************************************************************--
+--author:liuzhongguo
+--create time:2020-06-06 16:40:39
+--********************************************************************--
+--每个视频的一级标签
+DROP TABLE IF EXISTS videoid_top_tag_name ;
+
+CREATE TABLE videoid_top_tag_name AS
+SELECT  a.id AS videoid
+        ,concat_ws(',',collect_set(c.tag_name)) tag_name
+FROM    videoods.wx_video a
+LEFT JOIN videoods.wx_video_tag_rel b
+ON      a.id = b.video_id LEFT
+JOIN    (
+            SELECT  tag_id
+                    ,tag_name
+                    ,level
+            FROM    videoods.wx_video_tag
+        ) c
+ON      b.tag_id = c.tag_id
+--WHERE   tag_name IN ('音乐','综艺','搞笑','舞蹈','祝福','旅行','百态','健康','科技','妙招','影视','美食','时尚','运动','游戏','抗肺炎','在家学','热点','社会','人文','生活','财富','情感','时尚' ,'游戏' ,'网课' ,'微商' ,'婚庆' ,'小语种' ,'广告' ,'宣传片' ,'聚会活动' ,'加密&可见' ,'低质内容' ,'宗教' ,'影视作品' ,'ASMR' ,'无法加载' ,'其他平台' ,'私密&删除' ,'内容无意义' ,'涉及政治' ,'邪教迷信' ,'群体事件' ,'暴恐血腥' ,'网络谣言' ,'淫秽色情' ,'涉嫌侵权' ,'低俗内容' ,'其他')
+WHERE   level = 1
+GROUP BY a.id
+;
+
+
+
+
+
+--每个视频的二级标签
+DROP TABLE IF EXISTS videoid_second_tag_name ;
+
+CREATE TABLE videoid_second_tag_name AS
+SELECT  a.id AS videoid
+        ,concat_ws(',',collect_set(c.tag_name)) tag_name
+FROM    videoods.wx_video a
+LEFT JOIN videoods.wx_video_tag_rel b
+ON      a.id = b.video_id LEFT
+JOIN    (
+            SELECT  tag_id
+                    ,tag_name
+                    ,level
+            FROM    videoods.wx_video_tag
+        ) c
+ON      b.tag_id = c.tag_id
+WHERE   tag_name IN ('民生' ,'灾祸' ,'时政' ,'法制' ,'节庆活动' ,'正能量' ,'打抱不平' ,'八卦' ,'军事' ,'历史' ,'非遗文化' ,'人物传记' ,'法律' ,'哲学' ,'艺术' ,'心理' ,'文学' ,'人生经历' ,'风土人情' ,'才艺' ,'妙招' ,'生活常识' ,'家居' ,'美女' ,'萌娃' ,'动物' ,'种草' ,'祝福' ,'助眠' ,'生活观赏' ,'星座' ,'生活辟谣' ,'三农' ,'奇闻' ,'科技前沿' ,'科学科普' ,'数码' ,'机械' ,'汽车' ,'科学推理' ,'科学实验' ,'财经' ,'商贸' ,'创业分享' ,'两性' ,'爱情' ,'亲情' ,'友情' ,'职场' ,'人生哲理' ,'乡情' ,'性学' ,'养生' ,'医学' ,'育儿' ,'美食分享' ,'美食制作' ,'黑暗料理' ,'奢侈料理' ,'自制片' ,'电影' ,'电视剧' ,'动漫' ,'综艺' ,'航拍' ,'摄影' ,'VLOG' ,'地标' ,'户外探险' ,'自驾游' ,'段子' ,'恶搞' ,'街访' ,'鬼畜' ,'相声' ,'小品' ,'脱口秀' ,'搞笑配音' ,'糗事' ,'舞蹈展示' ,'舞蹈教学' ,'影视舞蹈' ,'健身' ,'赛事' ,'瑜伽' ,'足球' ,'篮球' ,'乒乓球' ,'轮滑' ,'滑雪' ,'极限运动' ,'游泳' ,'钓鱼' ,'运动教学' ,'唱歌' ,'MV' ,'音乐相册' ,'音乐随拍' ,'音乐现场' ,'戏曲' ,'说唱' ,'口技' ,'乐器' ,'教学' ,'穿搭' ,'美妆' ,'美甲' ,'美发' ,'T台秀' ,'手游' ,'PC游戏' ,'主机游戏' ,'页游' ,'桌游' ,'街机')
+--WHERE   level = 2
+GROUP BY a.id
+;
+
+
+
+--每个视频的三级标签
+DROP TABLE IF EXISTS videoid_third_tag_name ;
+
+CREATE TABLE videoid_third_tag_name AS
+SELECT  a.id AS videoid
+        ,concat_ws(',',collect_set(c.tag_name)) tag_name
+FROM    videoods.wx_video a
+LEFT JOIN videoods.wx_video_tag_rel b
+ON      a.id = b.video_id LEFT
+JOIN    (
+            SELECT  tag_id
+                    ,tag_name
+                    ,level
+            FROM    videoods.wx_video_tag
+        ) c
+ON      b.tag_id = c.tag_id
+--WHERE   tag_name IN ('就业' ,'租房' ,'劳务' ,'食品' ,'出行' ,'天灾' ,'人祸' ,'刑事案件' ,'民事纠纷' ,'娱乐圈' ,'仪仗队' ,'阅兵' ,'军演' ,'武器' ,'战争' ,'中国古代史' ,'中国近代史' ,'国外历史' ,'建筑' ,'雕塑' ,'手工' ,'书法' ,'绘画' ,'魔术' ,'杂技' ,'房屋' ,'室内设计' ,'防火防盗' ,'园艺' ,'音乐喷泉' ,'展会' ,'民俗' ,'奇人' ,'世界之最' ,'未解之谜' ,'外星文明' ,'时空穿梭' ,'灵异事件' ,'风水玄说' ,'新产品' ,'新技术' ,'科学发现' ,'动物' ,'物理' ,'数学' ,'天文地理' ,'物质' ,'证券' ,'金融投资' ,'宏观经济' ,'产业经济' ,'房地产' ,'婚姻' ,'恋爱' ,'体疗' ,'食疗' ,'中医' ,'营养学' ,'医疗原理' ,'急救知识' ,'儿童健康' ,'儿童教育' ,'美食推荐' ,'吃播' ,'探店' ,'大胃王' ,'电影解说' ,'电影剪辑' ,'电视剧解说' ,'电视剧剪辑' ,'动漫解说' ,'动漫剪辑' ,'流浪' ,'品城' ,'暗访' ,'国外' ,'国内' ,'舞蹈种类(略)' ,'奥运会' ,'NBA' ,'世界杯' ,'优质' ,'翻唱' ,'反串' ,'合成' ,'乐器种类' ,'内衣秀' ,'旗袍秀' ,'时装秀')
+WHERE   level = 2
+GROUP by  a.id
+;
+
+
+
+
+
+
+
+
+
+
+
+--每个视频的各种状态
+DROP TABLE IF EXISTS every_video_status_category ;
+
+CREATE TABLE every_video_status_category AS
+SELECT  a.video_id as videoid
+        ,视频审核状态 as video_edit
+        ,视频的数据状态 as video_data_stat
+        ,视频可搜状态 as video_recommend
+        ,视频种类 as video_category
+FROM    (
+            SELECT  video_id
+                    ,(
+                        CASE    WHEN audit_status=1 THEN '审核中'
+                                WHEN audit_status=2 THEN '不通过'
+                                WHEN audit_status=3 THEN '待修改'
+                                WHEN audit_status=4 THEN '自己可见'
+                                WHEN audit_status=5 THEN '通过' 
+                        END
+                    ) AS 视频审核状态
+                    ,(
+                        CASE    WHEN video_status=1 THEN '有效'  --公开
+                                WHEN video_status=2 THEN '已删除'
+                                WHEN video_status=3 THEN '已屏蔽'
+                                WHEN video_status=4 THEN '关注可见'
+                                WHEN video_status=5 THEN '分享可见'
+                                WHEN video_status=6 THEN '自己可见' 
+                        END
+                    ) AS 视频的数据状态
+                    ,(
+                        CASE    WHEN recommend_status=0 THEN '不可搜'
+                                WHEN recommend_status=-6 THEN '待推荐'
+                                WHEN recommend_status=1 THEN '普通推荐'     --普通推荐,编辑推荐都是推荐
+                                WHEN recommend_status=10 THEN '编辑推荐'
+                                WHEN recommend_status=-7 THEN '可搜索' 
+                        END
+                    ) AS 视频可搜状态
+            FROM    videoods.wx_video_status
+        ) a
+FULL OUTER JOIN (
+                    SELECT  video_id
+                            ,(
+                                CASE    WHEN category_id=1 THEN '内容'
+                                        WHEN category_id=2 THEN '场景'
+                                        WHEN category_id=3 THEN '工具'
+                                        WHEN category_id=4 THEN '未分类'
+                                        WHEN category_id=5 THEN '原创内容'
+                                        WHEN category_id=6 THEN '转载内容' 
+                                END
+                            ) AS 视频种类
+                            ,row_number() over(partition by video_id order by id desc) as rn
+                    FROM    videoods.wx_video_category_info
+                ) b
+ON      a.video_id = b.video_id
+AND     b.rn = 1
+;
+
+
+
+--赞赏总金额和赞赏人数、被赞赏视频数
+DROP TABLE IF EXISTS total_reward_video ;
+
+CREATE TABLE total_reward_video AS
+SELECT  video_id AS videoid
+        ,SUM(reward_amount)/100 AS total_reward    --赞赏总金额
+        ,coalesce(
+            SUM(
+                CASE    WHEN FROM_UNIXTIME(CAST( create_time/ 1000 AS BIGINT) )='${bizdate}' THEN reward_amount 
+                END
+            )
+            ,0
+        )/100 AS currentday_reward    --当日赞赏金额
+        ,COUNT(1) AS total_reward_times    --赞赏次数
+        ,COUNT(DISTINCT uid) AS reward_person    --赞赏人数
+FROM    videoods.wx_video_reward_record
+GROUP BY video_id
+;
+
+--付费数据
+DROP TABLE IF EXISTS total_price_video ;
+
+CREATE TABLE total_price_video AS
+SELECT  video_id AS videoid
+        ,SUM(price) AS total_price    --付费总金额
+        ,coalesce(
+            SUM(
+                CASE    WHEN FROM_UNIXTIME(CAST( gmt_payment_timestamp/ 1000 AS BIGINT) )='${bizdate}' THEN price 
+                END
+            )
+            ,0
+        ) AS currentday_price    --当日付费金额
+        ,COUNT(1) AS total_price_times    --付费次数
+        ,COUNT(DISTINCT uid) AS total_price_person    --付费人数
+       
+FROM    videoods.wx_video_purchase_record
+GROUP BY video_id
+;
+
+
+
+-- 新增视频类型判别
+DROP TABLE if_create_video;
+CREATE TABLE  IF NOT EXISTS  if_create_video AS 
+SELECT DISTINCT
+id
+,(CASE WHEN t2.video_id IS NULL THEN '上传视频'
+       WHEN t2.video_id IS NOT NULL THEN '创作视频'
+       END ) if_create
+,if_change
+,(CASE WHEN t2.from_scene=2 THEN '创作工具' WHEN t2.from_scene=3 THEN '普通上传转创作工具'
+ WHEN t2.from_scene=4 THEN '后台转加工' WHEN t2.from_scene=5 THEN '卡点视频' END ) video_type
+FROM 
+(SELECT 
+id
+FROM 
+videoods.wx_video
+) t1
+LEFT JOIN 
+(SELECT 
+video_id,from_scene
+FROM 
+videoods.produce_video_project
+) t2
+ON t1.id=t2.video_id
+LEFT JOIN 
+(SELECT  pvp.video_id,
+(CASE
+WHEN pvp.parent_project_id is not null and pvp.uid!=pvp_parent.uid THEN '再创作'
+WHEN  pvp.parent_project_id IS NOT NULL AND pvp.uid=pvp_parent.uid THEN '再编辑' END ) if_change
+FROM    videoods.produce_video_project pvp
+JOIN    videoods.produce_video_project pvp_parent
+ON     CAST (pvp.parent_project_id AS STRING ) = pvp_parent.project_id
+JOIN videoods.wx_video
+ON    pvp.video_id = wx_video.id
+UNION ALL 
+SELECT  pvp.video_id,'原创'
+FROM    videoods.produce_video_project pvp
+JOIN   videoods.wx_video
+ON    pvp.video_id = wx_video.id
+WHERE pvp.parent_project_id IS NULL
+) t3
+ON t1.id=t3.video_id
+;
+
+
+
+--CREATE TABLE dim_video AS
+INSERT OVERWRITE TABLE dim_video
+SELECT  a1.videoid    --视频id
+        ,a1.title    --视频标题
+        ,distrubute_title    --视频分发标题
+        ,video_category    --视频种类
+        ,a3.tag_name    --视频一级分类
+        ,a1.gmt_create    --上传时间  
+        ,tags    --视频标签
+        ,a1.uid    --生产者id
+        ,video_edit    --视频审核状态
+        ,video_data_stat    --视频的数据状态
+        ,video_recommend    --视频可搜状态
+        ,CASE    WHEN a7.cover_img_path IS NULL THEN '无' 
+                 ELSE '有' 
+         END AS is_img    --视频分发封面
+        ,(
+            UNIX_TIMESTAMP(datetrunc(GETDATE(), "DD"))-UNIX_TIMESTAMP(a1.gmt_create)
+        )/(60*60*24) AS existence_days    --发布距今天数
+        ,concat(
+            'https://admin.yishihui.com/cms/post-detail/'
+            ,a1.videoid
+            ,'/info'
+        ) AS video_url    --后台视频链接
+        ,a1.total_time    --视频时长
+        ,coalesce(a1.play_count,0) play_count    --播放人数
+        ,coalesce(a1.play_count_total,0) play_count_total    --播放次数
+        ,coalesce(total_reward,0) total_reward    --赞赏总金额
+        ,coalesce(currentday_reward,0) currentday_reward    --当日赞赏金额
+        ,coalesce(total_reward_times,0) total_reward_times    --赞赏次数
+        ,coalesce(reward_person,0) reward_person    --赞赏人数
+        ,coalesce(total_price,0) total_price    --付费总金额
+        ,coalesce(currentday_price,0) currentday_price    --当日付费金额
+        ,coalesce(total_price_times,0) total_price_times    --付费次数
+        ,coalesce(total_price_person,0) total_price_person    --付费人数
+        ----------------------------------新增----------------------------------
+        ,app_recommend_status    --app推荐状态
+        ,charge    --收费状态
+        ,CASE    WHEN a10.video_id IS NULL THEN '未加密' 
+                 ELSE '加密' 
+         END AS is_pwd    --是否加密
+        ,width    --视频宽度
+        ,height    --视频高度
+        ----------------------------------新增2---------------------------------
+        ,a11.tag_name as tag_name2     --二级标签
+        ,a12.tag_name as tag_name3     --三级标签
+        ---------------------------------新增3----------------------------------
+        ,a13.if_create --是否为创作视频
+        ---------------------------------新增4----------------------------------    
+        ,a13.if_change
+        ,a9.app_type
+        ,a9.descr
+        ,a13.video_type
+        ,a14.if_in_youthCommunity
+        ,a14.if_in_agedCommunity
+        ,a15.name
+FROM    (
+            SELECT  id AS videoid
+                    ,uid
+                    ,title
+                    ,play_count
+                    ,play_count_total
+                    ,total_time
+                    ,gmt_create
+                    ,width
+                    ,height
+            FROM    videoods.wx_video
+        ) a1
+LEFT JOIN (    --视频标签
+              SELECT  a.video_id
+                      ,concat_ws(',', collect_set(b.tag_name)) AS tags
+              FROM    (
+                          SELECT  video_id
+                                  ,tag_id
+                          FROM    videoods.wx_video_tag_rel
+                      ) a
+              LEFT JOIN (
+                            SELECT  tag_id
+                                    ,tag_name
+                            FROM    videoods.wx_video_tag
+                        ) b
+              ON      a.tag_id = b.tag_id
+              GROUP BY video_id
+          ) a2
+ON      a1.videoid = a2.video_id LEFT
+JOIN    (    --每个视频的一级分类
+            SELECT  videoid
+                    ,tag_name
+            FROM    videoid_top_tag_name
+        ) a3
+ON      a1.videoid = a3.videoid
+LEFT JOIN (    --每个视频的各种状态
+              SELECT  videoid
+                      ,video_edit
+                      ,video_data_stat
+                      ,video_recommend
+                      ,video_category
+              FROM    every_video_status_category
+          ) a4
+ON      a1.videoid = a4.videoid LEFT
+JOIN    (    --赞赏总金额和赞赏人数、被赞赏视频数
+            SELECT  videoid
+                    ,total_reward
+                    ,currentday_reward
+                    ,total_reward_times
+                    ,reward_person
+            FROM    total_reward_video
+        ) a5
+ON      a1.videoid = a5.videoid
+LEFT JOIN (    --付费数据
+              SELECT  videoid
+                      ,total_price
+                      ,currentday_price
+                      ,total_price_times
+                      ,total_price_person
+              FROM    total_price_video
+          ) a6
+ON      a1.videoid = a6.videoid LEFT
+JOIN    (    --分发标题
+            SELECT  video_id
+                    ,title AS distrubute_title
+                    ,cover_img_path
+            FROM    videoods.wx_video_recommend_ext
+        ) a7
+ON      a1.videoid = a7.video_id
+LEFT JOIN (    --app推荐
+              SELECT  video_id
+                      ,(
+                          CASE    WHEN app_recommend_status=-1 THEN '未分类'
+                                  WHEN app_recommend_status=0 THEN '不可搜'
+                                  WHEN app_recommend_status=-7 THEN '可搜索'
+                                  WHEN app_recommend_status=1 THEN '普通推荐'     --有app推荐
+                                  WHEN app_recommend_status=-6 THEN '待推荐'
+                                  WHEN app_recommend_status=10 THEN '编辑推荐'
+                                  WHEN app_recommend_status=20 THEN '实时推荐'
+                          END
+                      ) AS app_recommend_status    --app推荐状态
+              FROM    videoods.wx_video_status
+          ) a8
+ON      a1.videoid = a8.video_id LEFT
+JOIN    (    --是否免费
+            SELECT  video_id
+                    ,CASE    WHEN charge=0 THEN '免费'
+                             WHEN charge=1 THEN '收费' 
+                     END AS charge
+                    ,app_type
+                    ,descr
+            FROM    videoods.wx_video_detail
+        ) a9
+ON      a1.videoid = a9.video_id
+LEFT JOIN (    --是否加密
+              SELECT  video_id
+              FROM    wx_video_pwd
+          ) a10
+ON      a1.videoid = a10.video_id LEFT
+JOIN    videoid_second_tag_name a11    --二级标签
+ON      a1.videoid = a11.videoid
+LEFT JOIN videoid_third_tag_name a12    --三级标签
+ON      a1.videoid = a12.videoid
+LEFT JOIN if_create_video  a13
+ON  a1.videoid=a13.id
+LEFT JOIN (
+SELECT  video_id
+        ,CONCAT_WS(
+            ','
+            ,collect_set(
+                CASE    WHEN repository_type=1 AND data_status=1 THEN '进入青年社区'
+                        WHEN repository_type=1 AND data_status=0 THEN '未进入青年社区' 
+                END
+            )
+        ) if_in_youthCommunity
+        ,CONCAT_WS(
+            ','
+            ,collect_set(
+                CASE    WHEN repository_type=0 AND data_status=1 THEN '进入老年社区'
+                        WHEN repository_type=0 AND data_status=0 THEN '未进入老年社区' 
+                END
+            )
+        ) if_in_agedCommunity
+FROM    videoods.video_repository_type
+GROUP BY video_id
+) a14
+ON a1.videoid=a14.video_id
+LEFT JOIN 
+(SELECT 
+t1.video_id,t2.name
+FROM 
+videoods.video_topic t1 LEFT JOIN videoods.topic t2
+ON t1.topic_id=t2.id
+WHERE  t1.data_status=1
+) a15
+ON a1.videoid=a15.video_id
+;

+ 43 - 0
production_code/videoods.every_video_status_category.json

@@ -0,0 +1,43 @@
+{
+  "name": "every_video_status_category",
+  "project": "videoods",
+  "comment": "",
+  "columns": [
+    {
+      "name": "videoid",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "video_edit",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "video_data_stat",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "video_recommend",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "video_category",
+      "type": "STRING",
+      "comment": ""
+    }
+  ],
+  "partition_keys": [],
+  "dataworks_tasks": [
+    {
+      "id": 1000579613,
+      "name": "视频维度表"
+    }
+  ],
+  "upstream_tables": [
+    "videoods.wx_video_category_info",
+    "videoods.wx_video_status"
+  ]
+}

+ 431 - 0
production_code/videoods.every_video_status_category.sql

@@ -0,0 +1,431 @@
+-- Task: 视频维度表  ID: 1000579613  Type: ODPS_SQL
+--@exclude_input=total_price_video
+--@exclude_output=total_price_video
+--@exclude_input=total_reward_video
+--@exclude_output=total_reward_video
+--@exclude_input=every_video_status_category
+--@exclude_output=every_video_status_category
+--@exclude_input=videoid_top_tag_name
+--@exclude_output=videoid_top_tag_name
+--odps sql 
+--********************************************************************--
+--author:liuzhongguo
+--create time:2020-06-06 16:40:39
+--********************************************************************--
+--每个视频的一级标签
+DROP TABLE IF EXISTS videoid_top_tag_name ;
+
+CREATE TABLE videoid_top_tag_name AS
+SELECT  a.id AS videoid
+        ,concat_ws(',',collect_set(c.tag_name)) tag_name
+FROM    videoods.wx_video a
+LEFT JOIN videoods.wx_video_tag_rel b
+ON      a.id = b.video_id LEFT
+JOIN    (
+            SELECT  tag_id
+                    ,tag_name
+                    ,level
+            FROM    videoods.wx_video_tag
+        ) c
+ON      b.tag_id = c.tag_id
+--WHERE   tag_name IN ('音乐','综艺','搞笑','舞蹈','祝福','旅行','百态','健康','科技','妙招','影视','美食','时尚','运动','游戏','抗肺炎','在家学','热点','社会','人文','生活','财富','情感','时尚' ,'游戏' ,'网课' ,'微商' ,'婚庆' ,'小语种' ,'广告' ,'宣传片' ,'聚会活动' ,'加密&可见' ,'低质内容' ,'宗教' ,'影视作品' ,'ASMR' ,'无法加载' ,'其他平台' ,'私密&删除' ,'内容无意义' ,'涉及政治' ,'邪教迷信' ,'群体事件' ,'暴恐血腥' ,'网络谣言' ,'淫秽色情' ,'涉嫌侵权' ,'低俗内容' ,'其他')
+WHERE   level = 1
+GROUP BY a.id
+;
+
+
+
+
+
+--每个视频的二级标签
+DROP TABLE IF EXISTS videoid_second_tag_name ;
+
+CREATE TABLE videoid_second_tag_name AS
+SELECT  a.id AS videoid
+        ,concat_ws(',',collect_set(c.tag_name)) tag_name
+FROM    videoods.wx_video a
+LEFT JOIN videoods.wx_video_tag_rel b
+ON      a.id = b.video_id LEFT
+JOIN    (
+            SELECT  tag_id
+                    ,tag_name
+                    ,level
+            FROM    videoods.wx_video_tag
+        ) c
+ON      b.tag_id = c.tag_id
+WHERE   tag_name IN ('民生' ,'灾祸' ,'时政' ,'法制' ,'节庆活动' ,'正能量' ,'打抱不平' ,'八卦' ,'军事' ,'历史' ,'非遗文化' ,'人物传记' ,'法律' ,'哲学' ,'艺术' ,'心理' ,'文学' ,'人生经历' ,'风土人情' ,'才艺' ,'妙招' ,'生活常识' ,'家居' ,'美女' ,'萌娃' ,'动物' ,'种草' ,'祝福' ,'助眠' ,'生活观赏' ,'星座' ,'生活辟谣' ,'三农' ,'奇闻' ,'科技前沿' ,'科学科普' ,'数码' ,'机械' ,'汽车' ,'科学推理' ,'科学实验' ,'财经' ,'商贸' ,'创业分享' ,'两性' ,'爱情' ,'亲情' ,'友情' ,'职场' ,'人生哲理' ,'乡情' ,'性学' ,'养生' ,'医学' ,'育儿' ,'美食分享' ,'美食制作' ,'黑暗料理' ,'奢侈料理' ,'自制片' ,'电影' ,'电视剧' ,'动漫' ,'综艺' ,'航拍' ,'摄影' ,'VLOG' ,'地标' ,'户外探险' ,'自驾游' ,'段子' ,'恶搞' ,'街访' ,'鬼畜' ,'相声' ,'小品' ,'脱口秀' ,'搞笑配音' ,'糗事' ,'舞蹈展示' ,'舞蹈教学' ,'影视舞蹈' ,'健身' ,'赛事' ,'瑜伽' ,'足球' ,'篮球' ,'乒乓球' ,'轮滑' ,'滑雪' ,'极限运动' ,'游泳' ,'钓鱼' ,'运动教学' ,'唱歌' ,'MV' ,'音乐相册' ,'音乐随拍' ,'音乐现场' ,'戏曲' ,'说唱' ,'口技' ,'乐器' ,'教学' ,'穿搭' ,'美妆' ,'美甲' ,'美发' ,'T台秀' ,'手游' ,'PC游戏' ,'主机游戏' ,'页游' ,'桌游' ,'街机')
+--WHERE   level = 2
+GROUP BY a.id
+;
+
+
+
+--每个视频的三级标签
+DROP TABLE IF EXISTS videoid_third_tag_name ;
+
+CREATE TABLE videoid_third_tag_name AS
+SELECT  a.id AS videoid
+        ,concat_ws(',',collect_set(c.tag_name)) tag_name
+FROM    videoods.wx_video a
+LEFT JOIN videoods.wx_video_tag_rel b
+ON      a.id = b.video_id LEFT
+JOIN    (
+            SELECT  tag_id
+                    ,tag_name
+                    ,level
+            FROM    videoods.wx_video_tag
+        ) c
+ON      b.tag_id = c.tag_id
+--WHERE   tag_name IN ('就业' ,'租房' ,'劳务' ,'食品' ,'出行' ,'天灾' ,'人祸' ,'刑事案件' ,'民事纠纷' ,'娱乐圈' ,'仪仗队' ,'阅兵' ,'军演' ,'武器' ,'战争' ,'中国古代史' ,'中国近代史' ,'国外历史' ,'建筑' ,'雕塑' ,'手工' ,'书法' ,'绘画' ,'魔术' ,'杂技' ,'房屋' ,'室内设计' ,'防火防盗' ,'园艺' ,'音乐喷泉' ,'展会' ,'民俗' ,'奇人' ,'世界之最' ,'未解之谜' ,'外星文明' ,'时空穿梭' ,'灵异事件' ,'风水玄说' ,'新产品' ,'新技术' ,'科学发现' ,'动物' ,'物理' ,'数学' ,'天文地理' ,'物质' ,'证券' ,'金融投资' ,'宏观经济' ,'产业经济' ,'房地产' ,'婚姻' ,'恋爱' ,'体疗' ,'食疗' ,'中医' ,'营养学' ,'医疗原理' ,'急救知识' ,'儿童健康' ,'儿童教育' ,'美食推荐' ,'吃播' ,'探店' ,'大胃王' ,'电影解说' ,'电影剪辑' ,'电视剧解说' ,'电视剧剪辑' ,'动漫解说' ,'动漫剪辑' ,'流浪' ,'品城' ,'暗访' ,'国外' ,'国内' ,'舞蹈种类(略)' ,'奥运会' ,'NBA' ,'世界杯' ,'优质' ,'翻唱' ,'反串' ,'合成' ,'乐器种类' ,'内衣秀' ,'旗袍秀' ,'时装秀')
+WHERE   level = 2
+GROUP by  a.id
+;
+
+
+
+
+
+
+
+
+
+
+
+--每个视频的各种状态
+DROP TABLE IF EXISTS every_video_status_category ;
+
+CREATE TABLE every_video_status_category AS
+SELECT  a.video_id as videoid
+        ,视频审核状态 as video_edit
+        ,视频的数据状态 as video_data_stat
+        ,视频可搜状态 as video_recommend
+        ,视频种类 as video_category
+FROM    (
+            SELECT  video_id
+                    ,(
+                        CASE    WHEN audit_status=1 THEN '审核中'
+                                WHEN audit_status=2 THEN '不通过'
+                                WHEN audit_status=3 THEN '待修改'
+                                WHEN audit_status=4 THEN '自己可见'
+                                WHEN audit_status=5 THEN '通过' 
+                        END
+                    ) AS 视频审核状态
+                    ,(
+                        CASE    WHEN video_status=1 THEN '有效'  --公开
+                                WHEN video_status=2 THEN '已删除'
+                                WHEN video_status=3 THEN '已屏蔽'
+                                WHEN video_status=4 THEN '关注可见'
+                                WHEN video_status=5 THEN '分享可见'
+                                WHEN video_status=6 THEN '自己可见' 
+                        END
+                    ) AS 视频的数据状态
+                    ,(
+                        CASE    WHEN recommend_status=0 THEN '不可搜'
+                                WHEN recommend_status=-6 THEN '待推荐'
+                                WHEN recommend_status=1 THEN '普通推荐'     --普通推荐,编辑推荐都是推荐
+                                WHEN recommend_status=10 THEN '编辑推荐'
+                                WHEN recommend_status=-7 THEN '可搜索' 
+                        END
+                    ) AS 视频可搜状态
+            FROM    videoods.wx_video_status
+        ) a
+FULL OUTER JOIN (
+                    SELECT  video_id
+                            ,(
+                                CASE    WHEN category_id=1 THEN '内容'
+                                        WHEN category_id=2 THEN '场景'
+                                        WHEN category_id=3 THEN '工具'
+                                        WHEN category_id=4 THEN '未分类'
+                                        WHEN category_id=5 THEN '原创内容'
+                                        WHEN category_id=6 THEN '转载内容' 
+                                END
+                            ) AS 视频种类
+                            ,row_number() over(partition by video_id order by id desc) as rn
+                    FROM    videoods.wx_video_category_info
+                ) b
+ON      a.video_id = b.video_id
+AND     b.rn = 1
+;
+
+
+
+--赞赏总金额和赞赏人数、被赞赏视频数
+DROP TABLE IF EXISTS total_reward_video ;
+
+CREATE TABLE total_reward_video AS
+SELECT  video_id AS videoid
+        ,SUM(reward_amount)/100 AS total_reward    --赞赏总金额
+        ,coalesce(
+            SUM(
+                CASE    WHEN FROM_UNIXTIME(CAST( create_time/ 1000 AS BIGINT) )='${bizdate}' THEN reward_amount 
+                END
+            )
+            ,0
+        )/100 AS currentday_reward    --当日赞赏金额
+        ,COUNT(1) AS total_reward_times    --赞赏次数
+        ,COUNT(DISTINCT uid) AS reward_person    --赞赏人数
+FROM    videoods.wx_video_reward_record
+GROUP BY video_id
+;
+
+--付费数据
+DROP TABLE IF EXISTS total_price_video ;
+
+CREATE TABLE total_price_video AS
+SELECT  video_id AS videoid
+        ,SUM(price) AS total_price    --付费总金额
+        ,coalesce(
+            SUM(
+                CASE    WHEN FROM_UNIXTIME(CAST( gmt_payment_timestamp/ 1000 AS BIGINT) )='${bizdate}' THEN price 
+                END
+            )
+            ,0
+        ) AS currentday_price    --当日付费金额
+        ,COUNT(1) AS total_price_times    --付费次数
+        ,COUNT(DISTINCT uid) AS total_price_person    --付费人数
+       
+FROM    videoods.wx_video_purchase_record
+GROUP BY video_id
+;
+
+
+
+-- 新增视频类型判别
+DROP TABLE if_create_video;
+CREATE TABLE  IF NOT EXISTS  if_create_video AS 
+SELECT DISTINCT
+id
+,(CASE WHEN t2.video_id IS NULL THEN '上传视频'
+       WHEN t2.video_id IS NOT NULL THEN '创作视频'
+       END ) if_create
+,if_change
+,(CASE WHEN t2.from_scene=2 THEN '创作工具' WHEN t2.from_scene=3 THEN '普通上传转创作工具'
+ WHEN t2.from_scene=4 THEN '后台转加工' WHEN t2.from_scene=5 THEN '卡点视频' END ) video_type
+FROM 
+(SELECT 
+id
+FROM 
+videoods.wx_video
+) t1
+LEFT JOIN 
+(SELECT 
+video_id,from_scene
+FROM 
+videoods.produce_video_project
+) t2
+ON t1.id=t2.video_id
+LEFT JOIN 
+(SELECT  pvp.video_id,
+(CASE
+WHEN pvp.parent_project_id is not null and pvp.uid!=pvp_parent.uid THEN '再创作'
+WHEN  pvp.parent_project_id IS NOT NULL AND pvp.uid=pvp_parent.uid THEN '再编辑' END ) if_change
+FROM    videoods.produce_video_project pvp
+JOIN    videoods.produce_video_project pvp_parent
+ON     CAST (pvp.parent_project_id AS STRING ) = pvp_parent.project_id
+JOIN videoods.wx_video
+ON    pvp.video_id = wx_video.id
+UNION ALL 
+SELECT  pvp.video_id,'原创'
+FROM    videoods.produce_video_project pvp
+JOIN   videoods.wx_video
+ON    pvp.video_id = wx_video.id
+WHERE pvp.parent_project_id IS NULL
+) t3
+ON t1.id=t3.video_id
+;
+
+
+
+--CREATE TABLE dim_video AS
+INSERT OVERWRITE TABLE dim_video
+SELECT  a1.videoid    --视频id
+        ,a1.title    --视频标题
+        ,distrubute_title    --视频分发标题
+        ,video_category    --视频种类
+        ,a3.tag_name    --视频一级分类
+        ,a1.gmt_create    --上传时间  
+        ,tags    --视频标签
+        ,a1.uid    --生产者id
+        ,video_edit    --视频审核状态
+        ,video_data_stat    --视频的数据状态
+        ,video_recommend    --视频可搜状态
+        ,CASE    WHEN a7.cover_img_path IS NULL THEN '无' 
+                 ELSE '有' 
+         END AS is_img    --视频分发封面
+        ,(
+            UNIX_TIMESTAMP(datetrunc(GETDATE(), "DD"))-UNIX_TIMESTAMP(a1.gmt_create)
+        )/(60*60*24) AS existence_days    --发布距今天数
+        ,concat(
+            'https://admin.yishihui.com/cms/post-detail/'
+            ,a1.videoid
+            ,'/info'
+        ) AS video_url    --后台视频链接
+        ,a1.total_time    --视频时长
+        ,coalesce(a1.play_count,0) play_count    --播放人数
+        ,coalesce(a1.play_count_total,0) play_count_total    --播放次数
+        ,coalesce(total_reward,0) total_reward    --赞赏总金额
+        ,coalesce(currentday_reward,0) currentday_reward    --当日赞赏金额
+        ,coalesce(total_reward_times,0) total_reward_times    --赞赏次数
+        ,coalesce(reward_person,0) reward_person    --赞赏人数
+        ,coalesce(total_price,0) total_price    --付费总金额
+        ,coalesce(currentday_price,0) currentday_price    --当日付费金额
+        ,coalesce(total_price_times,0) total_price_times    --付费次数
+        ,coalesce(total_price_person,0) total_price_person    --付费人数
+        ----------------------------------新增----------------------------------
+        ,app_recommend_status    --app推荐状态
+        ,charge    --收费状态
+        ,CASE    WHEN a10.video_id IS NULL THEN '未加密' 
+                 ELSE '加密' 
+         END AS is_pwd    --是否加密
+        ,width    --视频宽度
+        ,height    --视频高度
+        ----------------------------------新增2---------------------------------
+        ,a11.tag_name as tag_name2     --二级标签
+        ,a12.tag_name as tag_name3     --三级标签
+        ---------------------------------新增3----------------------------------
+        ,a13.if_create --是否为创作视频
+        ---------------------------------新增4----------------------------------    
+        ,a13.if_change
+        ,a9.app_type
+        ,a9.descr
+        ,a13.video_type
+        ,a14.if_in_youthCommunity
+        ,a14.if_in_agedCommunity
+        ,a15.name
+FROM    (
+            SELECT  id AS videoid
+                    ,uid
+                    ,title
+                    ,play_count
+                    ,play_count_total
+                    ,total_time
+                    ,gmt_create
+                    ,width
+                    ,height
+            FROM    videoods.wx_video
+        ) a1
+LEFT JOIN (    --视频标签
+              SELECT  a.video_id
+                      ,concat_ws(',', collect_set(b.tag_name)) AS tags
+              FROM    (
+                          SELECT  video_id
+                                  ,tag_id
+                          FROM    videoods.wx_video_tag_rel
+                      ) a
+              LEFT JOIN (
+                            SELECT  tag_id
+                                    ,tag_name
+                            FROM    videoods.wx_video_tag
+                        ) b
+              ON      a.tag_id = b.tag_id
+              GROUP BY video_id
+          ) a2
+ON      a1.videoid = a2.video_id LEFT
+JOIN    (    --每个视频的一级分类
+            SELECT  videoid
+                    ,tag_name
+            FROM    videoid_top_tag_name
+        ) a3
+ON      a1.videoid = a3.videoid
+LEFT JOIN (    --每个视频的各种状态
+              SELECT  videoid
+                      ,video_edit
+                      ,video_data_stat
+                      ,video_recommend
+                      ,video_category
+              FROM    every_video_status_category
+          ) a4
+ON      a1.videoid = a4.videoid LEFT
+JOIN    (    --赞赏总金额和赞赏人数、被赞赏视频数
+            SELECT  videoid
+                    ,total_reward
+                    ,currentday_reward
+                    ,total_reward_times
+                    ,reward_person
+            FROM    total_reward_video
+        ) a5
+ON      a1.videoid = a5.videoid
+LEFT JOIN (    --付费数据
+              SELECT  videoid
+                      ,total_price
+                      ,currentday_price
+                      ,total_price_times
+                      ,total_price_person
+              FROM    total_price_video
+          ) a6
+ON      a1.videoid = a6.videoid LEFT
+JOIN    (    --分发标题
+            SELECT  video_id
+                    ,title AS distrubute_title
+                    ,cover_img_path
+            FROM    videoods.wx_video_recommend_ext
+        ) a7
+ON      a1.videoid = a7.video_id
+LEFT JOIN (    --app推荐
+              SELECT  video_id
+                      ,(
+                          CASE    WHEN app_recommend_status=-1 THEN '未分类'
+                                  WHEN app_recommend_status=0 THEN '不可搜'
+                                  WHEN app_recommend_status=-7 THEN '可搜索'
+                                  WHEN app_recommend_status=1 THEN '普通推荐'     --有app推荐
+                                  WHEN app_recommend_status=-6 THEN '待推荐'
+                                  WHEN app_recommend_status=10 THEN '编辑推荐'
+                                  WHEN app_recommend_status=20 THEN '实时推荐'
+                          END
+                      ) AS app_recommend_status    --app推荐状态
+              FROM    videoods.wx_video_status
+          ) a8
+ON      a1.videoid = a8.video_id LEFT
+JOIN    (    --是否免费
+            SELECT  video_id
+                    ,CASE    WHEN charge=0 THEN '免费'
+                             WHEN charge=1 THEN '收费' 
+                     END AS charge
+                    ,app_type
+                    ,descr
+            FROM    videoods.wx_video_detail
+        ) a9
+ON      a1.videoid = a9.video_id
+LEFT JOIN (    --是否加密
+              SELECT  video_id
+              FROM    wx_video_pwd
+          ) a10
+ON      a1.videoid = a10.video_id LEFT
+JOIN    videoid_second_tag_name a11    --二级标签
+ON      a1.videoid = a11.videoid
+LEFT JOIN videoid_third_tag_name a12    --三级标签
+ON      a1.videoid = a12.videoid
+LEFT JOIN if_create_video  a13
+ON  a1.videoid=a13.id
+LEFT JOIN (
+SELECT  video_id
+        ,CONCAT_WS(
+            ','
+            ,collect_set(
+                CASE    WHEN repository_type=1 AND data_status=1 THEN '进入青年社区'
+                        WHEN repository_type=1 AND data_status=0 THEN '未进入青年社区' 
+                END
+            )
+        ) if_in_youthCommunity
+        ,CONCAT_WS(
+            ','
+            ,collect_set(
+                CASE    WHEN repository_type=0 AND data_status=1 THEN '进入老年社区'
+                        WHEN repository_type=0 AND data_status=0 THEN '未进入老年社区' 
+                END
+            )
+        ) if_in_agedCommunity
+FROM    videoods.video_repository_type
+GROUP BY video_id
+) a14
+ON a1.videoid=a14.video_id
+LEFT JOIN 
+(SELECT 
+t1.video_id,t2.name
+FROM 
+videoods.video_topic t1 LEFT JOIN videoods.topic t2
+ON t1.topic_id=t2.id
+WHERE  t1.data_status=1
+) a15
+ON a1.videoid=a15.video_id
+;

+ 145 - 0
production_code/videoods.flow_pool_level_video.json

@@ -0,0 +1,145 @@
+{
+  "name": "flow_pool_level_video",
+  "project": "videoods",
+  "comment": "流量池分级下的视频",
+  "columns": [
+    {
+      "name": "id",
+      "type": "BIGINT",
+      "comment": "ID"
+    },
+    {
+      "name": "flow_pool_id",
+      "type": "BIGINT",
+      "comment": "流量池ID:冗余字段"
+    },
+    {
+      "name": "flow_pool_level_id",
+      "type": "BIGINT",
+      "comment": "流量池分级ID"
+    },
+    {
+      "name": "life_cycle_id",
+      "type": "BIGINT",
+      "comment": "生命周期ID:视频从进入1层到最后退出的统一ID"
+    },
+    {
+      "name": "video_id",
+      "type": "BIGINT",
+      "comment": "视频ID"
+    },
+    {
+      "name": "uid",
+      "type": "BIGINT",
+      "comment": "视频生产者uid"
+    },
+    {
+      "name": "start_type",
+      "type": "BIGINT",
+      "comment": "开始类型:0自动 1手动"
+    },
+    {
+      "name": "end_type",
+      "type": "BIGINT",
+      "comment": "结束类型:0未结束 1到期结束 2正常结束 3手动结束"
+    },
+    {
+      "name": "start_time",
+      "type": "BIGINT",
+      "comment": "开始时间:毫秒"
+    },
+    {
+      "name": "end_time",
+      "type": "BIGINT",
+      "comment": "结束时间:毫秒"
+    },
+    {
+      "name": "expire_time",
+      "type": "BIGINT",
+      "comment": "有效时间:毫秒"
+    },
+    {
+      "name": "distribute_count",
+      "type": "BIGINT",
+      "comment": "分发数"
+    },
+    {
+      "name": "view_count",
+      "type": "BIGINT",
+      "comment": "曝光数:从进入该层开始计算"
+    },
+    {
+      "name": "end_view_count",
+      "type": "BIGINT",
+      "comment": "结束后统计到的曝光数"
+    },
+    {
+      "name": "play_count",
+      "type": "BIGINT",
+      "comment": "播放数:从进入该层开始计算"
+    },
+    {
+      "name": "end_play_count",
+      "type": "BIGINT",
+      "comment": "结束后统计到的播放数"
+    },
+    {
+      "name": "real_play_count",
+      "type": "BIGINT",
+      "comment": "有效播放数:从进入该层开始计算"
+    },
+    {
+      "name": "end_real_play_count",
+      "type": "BIGINT",
+      "comment": "结束后统计到的有效曝光数"
+    },
+    {
+      "name": "back_flow",
+      "type": "BIGINT",
+      "comment": "回流数:从进入该层开始计算"
+    },
+    {
+      "name": "end_back_flow",
+      "type": "BIGINT",
+      "comment": "结束后统计到的回流数"
+    },
+    {
+      "name": "data_status",
+      "type": "BIGINT",
+      "comment": "是否有效:0无效 1有效,默认有效"
+    },
+    {
+      "name": "create_time",
+      "type": "DATETIME",
+      "comment": "创建时间"
+    },
+    {
+      "name": "update_time",
+      "type": "DATETIME",
+      "comment": "更新时间"
+    },
+    {
+      "name": "view_algo_count",
+      "type": "BIGINT",
+      "comment": "召回池同时流量池曝光"
+    },
+    {
+      "name": "share_friend_count",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "attribute",
+      "type": "STRING",
+      "comment": ""
+    }
+  ],
+  "partition_keys": [],
+  "dataworks_tasks": [
+    {
+      "id": 1006748760,
+      "name": "flow_pool_level_video"
+    }
+  ],
+  "upstream_tables": []
+}

+ 153 - 0
production_code/videoods.flow_pool_level_video.sql

@@ -0,0 +1,153 @@
+-- Task: flow_pool_level_video  ID: 1006748760  Type: DI
+{
+	"extend":{
+		"mode":"wizard",
+		"resourceGroup":"S_res_group_42901_1627548651694",
+		"oneStopPageNum":2
+	},
+	"transform":false,
+	"type":"job",
+	"version":"2.0",
+	"steps":[
+		{
+			"stepType":"mysql",
+			"copies":1,
+			"parameter":{
+				"envType":1,
+				"datasource":"flowpool",
+				"useSpecialSecret":false,
+				"column":[
+					"id",
+					"flow_pool_id",
+					"flow_pool_level_id",
+					"life_cycle_id",
+					"video_id",
+					"uid",
+					"start_type",
+					"end_type",
+					"start_time",
+					"end_time",
+					"expire_time",
+					"distribute_count",
+					"view_count",
+					"view_algo_count",
+					"end_view_count",
+					"play_count",
+					"end_play_count",
+					"real_play_count",
+					"end_real_play_count",
+					"back_flow",
+					"end_back_flow",
+					"share_friend_count",
+					"data_status",
+					"create_time",
+					"update_time",
+					"attribute"
+				],
+				"tableComment":"流量池分级下的视频",
+				"where":"",
+				"connection":[
+					{
+						"datasource":"flowpool",
+						"table":[
+							"flow_pool_level_video"
+						]
+					}
+				],
+				"splitPk":"id"
+			},
+			"name":"Reader",
+			"gui":{
+				"x":100,
+				"y":100
+			},
+			"category":"reader"
+		},
+		{
+			"stepType":"odps",
+			"copies":1,
+			"parameter":{
+				"truncate":true,
+				"envType":1,
+				"datasource":"odps_first",
+				"isSupportThreeModel":false,
+				"tunnelQuota":"default",
+				"column":[
+					"id",
+					"flow_pool_id",
+					"flow_pool_level_id",
+					"life_cycle_id",
+					"video_id",
+					"uid",
+					"start_type",
+					"end_type",
+					"start_time",
+					"end_time",
+					"expire_time",
+					"distribute_count",
+					"view_count",
+					"view_algo_count",
+					"end_view_count",
+					"play_count",
+					"end_play_count",
+					"real_play_count",
+					"end_real_play_count",
+					"back_flow",
+					"end_back_flow",
+					"share_friend_count",
+					"data_status",
+					"create_time",
+					"update_time",
+					"attribute"
+				],
+				"emptyAsNull":false,
+				"tableComment":"流量池分级下的视频",
+				"consistencyCommit":false,
+				"table":"flow_pool_level_video"
+			},
+			"name":"Writer",
+			"gui":{
+				"x":100,
+				"y":200
+			},
+			"category":"writer"
+		},
+		{
+			"copies":1,
+			"parameter":{
+				"nodes":[],
+				"edges":[],
+				"groups":[],
+				"version":"2.0"
+			},
+			"name":"Processor",
+			"gui":{
+				"x":100,
+				"y":300
+			},
+			"category":"processor"
+		}
+	],
+	"order":{
+		"hops":[
+			{
+				"from":"Reader",
+				"gui":{
+					"sourceAnchor":1,
+					"targetAnchor":0
+				},
+				"to":"Writer"
+			}
+		]
+	},
+	"setting":{
+		"errorLimit":{
+			"record":""
+		},
+		"locale":"zh_CN",
+		"speed":{
+			"throttle":false,
+			"concurrent":2
+		}
+	}
+}

+ 38 - 0
production_code/videoods.if_create_video.json

@@ -0,0 +1,38 @@
+{
+  "name": "if_create_video",
+  "project": "videoods",
+  "comment": "",
+  "columns": [
+    {
+      "name": "id",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "if_create",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "if_change",
+      "type": "STRING",
+      "comment": ""
+    },
+    {
+      "name": "video_type",
+      "type": "STRING",
+      "comment": ""
+    }
+  ],
+  "partition_keys": [],
+  "dataworks_tasks": [
+    {
+      "id": 1000579613,
+      "name": "视频维度表"
+    }
+  ],
+  "upstream_tables": [
+    "videoods.produce_video_project",
+    "videoods.wx_video"
+  ]
+}

+ 431 - 0
production_code/videoods.if_create_video.sql

@@ -0,0 +1,431 @@
+-- Task: 视频维度表  ID: 1000579613  Type: ODPS_SQL
+--@exclude_input=total_price_video
+--@exclude_output=total_price_video
+--@exclude_input=total_reward_video
+--@exclude_output=total_reward_video
+--@exclude_input=every_video_status_category
+--@exclude_output=every_video_status_category
+--@exclude_input=videoid_top_tag_name
+--@exclude_output=videoid_top_tag_name
+--odps sql 
+--********************************************************************--
+--author:liuzhongguo
+--create time:2020-06-06 16:40:39
+--********************************************************************--
+--每个视频的一级标签
+DROP TABLE IF EXISTS videoid_top_tag_name ;
+
+CREATE TABLE videoid_top_tag_name AS
+SELECT  a.id AS videoid
+        ,concat_ws(',',collect_set(c.tag_name)) tag_name
+FROM    videoods.wx_video a
+LEFT JOIN videoods.wx_video_tag_rel b
+ON      a.id = b.video_id LEFT
+JOIN    (
+            SELECT  tag_id
+                    ,tag_name
+                    ,level
+            FROM    videoods.wx_video_tag
+        ) c
+ON      b.tag_id = c.tag_id
+--WHERE   tag_name IN ('音乐','综艺','搞笑','舞蹈','祝福','旅行','百态','健康','科技','妙招','影视','美食','时尚','运动','游戏','抗肺炎','在家学','热点','社会','人文','生活','财富','情感','时尚' ,'游戏' ,'网课' ,'微商' ,'婚庆' ,'小语种' ,'广告' ,'宣传片' ,'聚会活动' ,'加密&可见' ,'低质内容' ,'宗教' ,'影视作品' ,'ASMR' ,'无法加载' ,'其他平台' ,'私密&删除' ,'内容无意义' ,'涉及政治' ,'邪教迷信' ,'群体事件' ,'暴恐血腥' ,'网络谣言' ,'淫秽色情' ,'涉嫌侵权' ,'低俗内容' ,'其他')
+WHERE   level = 1
+GROUP BY a.id
+;
+
+
+
+
+
+--每个视频的二级标签
+DROP TABLE IF EXISTS videoid_second_tag_name ;
+
+CREATE TABLE videoid_second_tag_name AS
+SELECT  a.id AS videoid
+        ,concat_ws(',',collect_set(c.tag_name)) tag_name
+FROM    videoods.wx_video a
+LEFT JOIN videoods.wx_video_tag_rel b
+ON      a.id = b.video_id LEFT
+JOIN    (
+            SELECT  tag_id
+                    ,tag_name
+                    ,level
+            FROM    videoods.wx_video_tag
+        ) c
+ON      b.tag_id = c.tag_id
+WHERE   tag_name IN ('民生' ,'灾祸' ,'时政' ,'法制' ,'节庆活动' ,'正能量' ,'打抱不平' ,'八卦' ,'军事' ,'历史' ,'非遗文化' ,'人物传记' ,'法律' ,'哲学' ,'艺术' ,'心理' ,'文学' ,'人生经历' ,'风土人情' ,'才艺' ,'妙招' ,'生活常识' ,'家居' ,'美女' ,'萌娃' ,'动物' ,'种草' ,'祝福' ,'助眠' ,'生活观赏' ,'星座' ,'生活辟谣' ,'三农' ,'奇闻' ,'科技前沿' ,'科学科普' ,'数码' ,'机械' ,'汽车' ,'科学推理' ,'科学实验' ,'财经' ,'商贸' ,'创业分享' ,'两性' ,'爱情' ,'亲情' ,'友情' ,'职场' ,'人生哲理' ,'乡情' ,'性学' ,'养生' ,'医学' ,'育儿' ,'美食分享' ,'美食制作' ,'黑暗料理' ,'奢侈料理' ,'自制片' ,'电影' ,'电视剧' ,'动漫' ,'综艺' ,'航拍' ,'摄影' ,'VLOG' ,'地标' ,'户外探险' ,'自驾游' ,'段子' ,'恶搞' ,'街访' ,'鬼畜' ,'相声' ,'小品' ,'脱口秀' ,'搞笑配音' ,'糗事' ,'舞蹈展示' ,'舞蹈教学' ,'影视舞蹈' ,'健身' ,'赛事' ,'瑜伽' ,'足球' ,'篮球' ,'乒乓球' ,'轮滑' ,'滑雪' ,'极限运动' ,'游泳' ,'钓鱼' ,'运动教学' ,'唱歌' ,'MV' ,'音乐相册' ,'音乐随拍' ,'音乐现场' ,'戏曲' ,'说唱' ,'口技' ,'乐器' ,'教学' ,'穿搭' ,'美妆' ,'美甲' ,'美发' ,'T台秀' ,'手游' ,'PC游戏' ,'主机游戏' ,'页游' ,'桌游' ,'街机')
+--WHERE   level = 2
+GROUP BY a.id
+;
+
+
+
+--每个视频的三级标签
+DROP TABLE IF EXISTS videoid_third_tag_name ;
+
+CREATE TABLE videoid_third_tag_name AS
+SELECT  a.id AS videoid
+        ,concat_ws(',',collect_set(c.tag_name)) tag_name
+FROM    videoods.wx_video a
+LEFT JOIN videoods.wx_video_tag_rel b
+ON      a.id = b.video_id LEFT
+JOIN    (
+            SELECT  tag_id
+                    ,tag_name
+                    ,level
+            FROM    videoods.wx_video_tag
+        ) c
+ON      b.tag_id = c.tag_id
+--WHERE   tag_name IN ('就业' ,'租房' ,'劳务' ,'食品' ,'出行' ,'天灾' ,'人祸' ,'刑事案件' ,'民事纠纷' ,'娱乐圈' ,'仪仗队' ,'阅兵' ,'军演' ,'武器' ,'战争' ,'中国古代史' ,'中国近代史' ,'国外历史' ,'建筑' ,'雕塑' ,'手工' ,'书法' ,'绘画' ,'魔术' ,'杂技' ,'房屋' ,'室内设计' ,'防火防盗' ,'园艺' ,'音乐喷泉' ,'展会' ,'民俗' ,'奇人' ,'世界之最' ,'未解之谜' ,'外星文明' ,'时空穿梭' ,'灵异事件' ,'风水玄说' ,'新产品' ,'新技术' ,'科学发现' ,'动物' ,'物理' ,'数学' ,'天文地理' ,'物质' ,'证券' ,'金融投资' ,'宏观经济' ,'产业经济' ,'房地产' ,'婚姻' ,'恋爱' ,'体疗' ,'食疗' ,'中医' ,'营养学' ,'医疗原理' ,'急救知识' ,'儿童健康' ,'儿童教育' ,'美食推荐' ,'吃播' ,'探店' ,'大胃王' ,'电影解说' ,'电影剪辑' ,'电视剧解说' ,'电视剧剪辑' ,'动漫解说' ,'动漫剪辑' ,'流浪' ,'品城' ,'暗访' ,'国外' ,'国内' ,'舞蹈种类(略)' ,'奥运会' ,'NBA' ,'世界杯' ,'优质' ,'翻唱' ,'反串' ,'合成' ,'乐器种类' ,'内衣秀' ,'旗袍秀' ,'时装秀')
+WHERE   level = 2
+GROUP by  a.id
+;
+
+
+
+
+
+
+
+
+
+
+
+--每个视频的各种状态
+DROP TABLE IF EXISTS every_video_status_category ;
+
+CREATE TABLE every_video_status_category AS
+SELECT  a.video_id as videoid
+        ,视频审核状态 as video_edit
+        ,视频的数据状态 as video_data_stat
+        ,视频可搜状态 as video_recommend
+        ,视频种类 as video_category
+FROM    (
+            SELECT  video_id
+                    ,(
+                        CASE    WHEN audit_status=1 THEN '审核中'
+                                WHEN audit_status=2 THEN '不通过'
+                                WHEN audit_status=3 THEN '待修改'
+                                WHEN audit_status=4 THEN '自己可见'
+                                WHEN audit_status=5 THEN '通过' 
+                        END
+                    ) AS 视频审核状态
+                    ,(
+                        CASE    WHEN video_status=1 THEN '有效'  --公开
+                                WHEN video_status=2 THEN '已删除'
+                                WHEN video_status=3 THEN '已屏蔽'
+                                WHEN video_status=4 THEN '关注可见'
+                                WHEN video_status=5 THEN '分享可见'
+                                WHEN video_status=6 THEN '自己可见' 
+                        END
+                    ) AS 视频的数据状态
+                    ,(
+                        CASE    WHEN recommend_status=0 THEN '不可搜'
+                                WHEN recommend_status=-6 THEN '待推荐'
+                                WHEN recommend_status=1 THEN '普通推荐'     --普通推荐,编辑推荐都是推荐
+                                WHEN recommend_status=10 THEN '编辑推荐'
+                                WHEN recommend_status=-7 THEN '可搜索' 
+                        END
+                    ) AS 视频可搜状态
+            FROM    videoods.wx_video_status
+        ) a
+FULL OUTER JOIN (
+                    SELECT  video_id
+                            ,(
+                                CASE    WHEN category_id=1 THEN '内容'
+                                        WHEN category_id=2 THEN '场景'
+                                        WHEN category_id=3 THEN '工具'
+                                        WHEN category_id=4 THEN '未分类'
+                                        WHEN category_id=5 THEN '原创内容'
+                                        WHEN category_id=6 THEN '转载内容' 
+                                END
+                            ) AS 视频种类
+                            ,row_number() over(partition by video_id order by id desc) as rn
+                    FROM    videoods.wx_video_category_info
+                ) b
+ON      a.video_id = b.video_id
+AND     b.rn = 1
+;
+
+
+
+--赞赏总金额和赞赏人数、被赞赏视频数
+DROP TABLE IF EXISTS total_reward_video ;
+
+CREATE TABLE total_reward_video AS
+SELECT  video_id AS videoid
+        ,SUM(reward_amount)/100 AS total_reward    --赞赏总金额
+        ,coalesce(
+            SUM(
+                CASE    WHEN FROM_UNIXTIME(CAST( create_time/ 1000 AS BIGINT) )='${bizdate}' THEN reward_amount 
+                END
+            )
+            ,0
+        )/100 AS currentday_reward    --当日赞赏金额
+        ,COUNT(1) AS total_reward_times    --赞赏次数
+        ,COUNT(DISTINCT uid) AS reward_person    --赞赏人数
+FROM    videoods.wx_video_reward_record
+GROUP BY video_id
+;
+
+--付费数据
+DROP TABLE IF EXISTS total_price_video ;
+
+CREATE TABLE total_price_video AS
+SELECT  video_id AS videoid
+        ,SUM(price) AS total_price    --付费总金额
+        ,coalesce(
+            SUM(
+                CASE    WHEN FROM_UNIXTIME(CAST( gmt_payment_timestamp/ 1000 AS BIGINT) )='${bizdate}' THEN price 
+                END
+            )
+            ,0
+        ) AS currentday_price    --当日付费金额
+        ,COUNT(1) AS total_price_times    --付费次数
+        ,COUNT(DISTINCT uid) AS total_price_person    --付费人数
+       
+FROM    videoods.wx_video_purchase_record
+GROUP BY video_id
+;
+
+
+
+-- 新增视频类型判别
+DROP TABLE if_create_video;
+CREATE TABLE  IF NOT EXISTS  if_create_video AS 
+SELECT DISTINCT
+id
+,(CASE WHEN t2.video_id IS NULL THEN '上传视频'
+       WHEN t2.video_id IS NOT NULL THEN '创作视频'
+       END ) if_create
+,if_change
+,(CASE WHEN t2.from_scene=2 THEN '创作工具' WHEN t2.from_scene=3 THEN '普通上传转创作工具'
+ WHEN t2.from_scene=4 THEN '后台转加工' WHEN t2.from_scene=5 THEN '卡点视频' END ) video_type
+FROM 
+(SELECT 
+id
+FROM 
+videoods.wx_video
+) t1
+LEFT JOIN 
+(SELECT 
+video_id,from_scene
+FROM 
+videoods.produce_video_project
+) t2
+ON t1.id=t2.video_id
+LEFT JOIN 
+(SELECT  pvp.video_id,
+(CASE
+WHEN pvp.parent_project_id is not null and pvp.uid!=pvp_parent.uid THEN '再创作'
+WHEN  pvp.parent_project_id IS NOT NULL AND pvp.uid=pvp_parent.uid THEN '再编辑' END ) if_change
+FROM    videoods.produce_video_project pvp
+JOIN    videoods.produce_video_project pvp_parent
+ON     CAST (pvp.parent_project_id AS STRING ) = pvp_parent.project_id
+JOIN videoods.wx_video
+ON    pvp.video_id = wx_video.id
+UNION ALL 
+SELECT  pvp.video_id,'原创'
+FROM    videoods.produce_video_project pvp
+JOIN   videoods.wx_video
+ON    pvp.video_id = wx_video.id
+WHERE pvp.parent_project_id IS NULL
+) t3
+ON t1.id=t3.video_id
+;
+
+
+
+--CREATE TABLE dim_video AS
+INSERT OVERWRITE TABLE dim_video
+SELECT  a1.videoid    --视频id
+        ,a1.title    --视频标题
+        ,distrubute_title    --视频分发标题
+        ,video_category    --视频种类
+        ,a3.tag_name    --视频一级分类
+        ,a1.gmt_create    --上传时间  
+        ,tags    --视频标签
+        ,a1.uid    --生产者id
+        ,video_edit    --视频审核状态
+        ,video_data_stat    --视频的数据状态
+        ,video_recommend    --视频可搜状态
+        ,CASE    WHEN a7.cover_img_path IS NULL THEN '无' 
+                 ELSE '有' 
+         END AS is_img    --视频分发封面
+        ,(
+            UNIX_TIMESTAMP(datetrunc(GETDATE(), "DD"))-UNIX_TIMESTAMP(a1.gmt_create)
+        )/(60*60*24) AS existence_days    --发布距今天数
+        ,concat(
+            'https://admin.yishihui.com/cms/post-detail/'
+            ,a1.videoid
+            ,'/info'
+        ) AS video_url    --后台视频链接
+        ,a1.total_time    --视频时长
+        ,coalesce(a1.play_count,0) play_count    --播放人数
+        ,coalesce(a1.play_count_total,0) play_count_total    --播放次数
+        ,coalesce(total_reward,0) total_reward    --赞赏总金额
+        ,coalesce(currentday_reward,0) currentday_reward    --当日赞赏金额
+        ,coalesce(total_reward_times,0) total_reward_times    --赞赏次数
+        ,coalesce(reward_person,0) reward_person    --赞赏人数
+        ,coalesce(total_price,0) total_price    --付费总金额
+        ,coalesce(currentday_price,0) currentday_price    --当日付费金额
+        ,coalesce(total_price_times,0) total_price_times    --付费次数
+        ,coalesce(total_price_person,0) total_price_person    --付费人数
+        ----------------------------------新增----------------------------------
+        ,app_recommend_status    --app推荐状态
+        ,charge    --收费状态
+        ,CASE    WHEN a10.video_id IS NULL THEN '未加密' 
+                 ELSE '加密' 
+         END AS is_pwd    --是否加密
+        ,width    --视频宽度
+        ,height    --视频高度
+        ----------------------------------新增2---------------------------------
+        ,a11.tag_name as tag_name2     --二级标签
+        ,a12.tag_name as tag_name3     --三级标签
+        ---------------------------------新增3----------------------------------
+        ,a13.if_create --是否为创作视频
+        ---------------------------------新增4----------------------------------    
+        ,a13.if_change
+        ,a9.app_type
+        ,a9.descr
+        ,a13.video_type
+        ,a14.if_in_youthCommunity
+        ,a14.if_in_agedCommunity
+        ,a15.name
+FROM    (
+            SELECT  id AS videoid
+                    ,uid
+                    ,title
+                    ,play_count
+                    ,play_count_total
+                    ,total_time
+                    ,gmt_create
+                    ,width
+                    ,height
+            FROM    videoods.wx_video
+        ) a1
+LEFT JOIN (    --视频标签
+              SELECT  a.video_id
+                      ,concat_ws(',', collect_set(b.tag_name)) AS tags
+              FROM    (
+                          SELECT  video_id
+                                  ,tag_id
+                          FROM    videoods.wx_video_tag_rel
+                      ) a
+              LEFT JOIN (
+                            SELECT  tag_id
+                                    ,tag_name
+                            FROM    videoods.wx_video_tag
+                        ) b
+              ON      a.tag_id = b.tag_id
+              GROUP BY video_id
+          ) a2
+ON      a1.videoid = a2.video_id LEFT
+JOIN    (    --每个视频的一级分类
+            SELECT  videoid
+                    ,tag_name
+            FROM    videoid_top_tag_name
+        ) a3
+ON      a1.videoid = a3.videoid
+LEFT JOIN (    --每个视频的各种状态
+              SELECT  videoid
+                      ,video_edit
+                      ,video_data_stat
+                      ,video_recommend
+                      ,video_category
+              FROM    every_video_status_category
+          ) a4
+ON      a1.videoid = a4.videoid LEFT
+JOIN    (    --赞赏总金额和赞赏人数、被赞赏视频数
+            SELECT  videoid
+                    ,total_reward
+                    ,currentday_reward
+                    ,total_reward_times
+                    ,reward_person
+            FROM    total_reward_video
+        ) a5
+ON      a1.videoid = a5.videoid
+LEFT JOIN (    --付费数据
+              SELECT  videoid
+                      ,total_price
+                      ,currentday_price
+                      ,total_price_times
+                      ,total_price_person
+              FROM    total_price_video
+          ) a6
+ON      a1.videoid = a6.videoid LEFT
+JOIN    (    --分发标题
+            SELECT  video_id
+                    ,title AS distrubute_title
+                    ,cover_img_path
+            FROM    videoods.wx_video_recommend_ext
+        ) a7
+ON      a1.videoid = a7.video_id
+LEFT JOIN (    --app推荐
+              SELECT  video_id
+                      ,(
+                          CASE    WHEN app_recommend_status=-1 THEN '未分类'
+                                  WHEN app_recommend_status=0 THEN '不可搜'
+                                  WHEN app_recommend_status=-7 THEN '可搜索'
+                                  WHEN app_recommend_status=1 THEN '普通推荐'     --有app推荐
+                                  WHEN app_recommend_status=-6 THEN '待推荐'
+                                  WHEN app_recommend_status=10 THEN '编辑推荐'
+                                  WHEN app_recommend_status=20 THEN '实时推荐'
+                          END
+                      ) AS app_recommend_status    --app推荐状态
+              FROM    videoods.wx_video_status
+          ) a8
+ON      a1.videoid = a8.video_id LEFT
+JOIN    (    --是否免费
+            SELECT  video_id
+                    ,CASE    WHEN charge=0 THEN '免费'
+                             WHEN charge=1 THEN '收费' 
+                     END AS charge
+                    ,app_type
+                    ,descr
+            FROM    videoods.wx_video_detail
+        ) a9
+ON      a1.videoid = a9.video_id
+LEFT JOIN (    --是否加密
+              SELECT  video_id
+              FROM    wx_video_pwd
+          ) a10
+ON      a1.videoid = a10.video_id LEFT
+JOIN    videoid_second_tag_name a11    --二级标签
+ON      a1.videoid = a11.videoid
+LEFT JOIN videoid_third_tag_name a12    --三级标签
+ON      a1.videoid = a12.videoid
+LEFT JOIN if_create_video  a13
+ON  a1.videoid=a13.id
+LEFT JOIN (
+SELECT  video_id
+        ,CONCAT_WS(
+            ','
+            ,collect_set(
+                CASE    WHEN repository_type=1 AND data_status=1 THEN '进入青年社区'
+                        WHEN repository_type=1 AND data_status=0 THEN '未进入青年社区' 
+                END
+            )
+        ) if_in_youthCommunity
+        ,CONCAT_WS(
+            ','
+            ,collect_set(
+                CASE    WHEN repository_type=0 AND data_status=1 THEN '进入老年社区'
+                        WHEN repository_type=0 AND data_status=0 THEN '未进入老年社区' 
+                END
+            )
+        ) if_in_agedCommunity
+FROM    videoods.video_repository_type
+GROUP BY video_id
+) a14
+ON a1.videoid=a14.video_id
+LEFT JOIN 
+(SELECT 
+t1.video_id,t2.name
+FROM 
+videoods.video_topic t1 LEFT JOIN videoods.topic t2
+ON t1.topic_id=t2.id
+WHERE  t1.data_status=1
+) a15
+ON a1.videoid=a15.video_id
+;

+ 94 - 0
production_code/videoods.topic.json

@@ -0,0 +1,94 @@
+{
+  "name": "topic",
+  "project": "videoods",
+  "comment": "社区-话题",
+  "columns": [
+    {
+      "name": "id",
+      "type": "BIGINT",
+      "comment": "id"
+    },
+    {
+      "name": "name",
+      "type": "STRING",
+      "comment": "话题名称"
+    },
+    {
+      "name": "introduction",
+      "type": "STRING",
+      "comment": "话题简介"
+    },
+    {
+      "name": "image_1_1_path",
+      "type": "STRING",
+      "comment": "1:1封面图片地址"
+    },
+    {
+      "name": "image_16_9_path",
+      "type": "STRING",
+      "comment": "16:9封面图片地址"
+    },
+    {
+      "name": "image_9_16_path",
+      "type": "STRING",
+      "comment": "9:16封面图片地址"
+    },
+    {
+      "name": "audit_status",
+      "type": "BIGINT",
+      "comment": "审核状态:0 未审 1 已审"
+    },
+    {
+      "name": "is_participatory",
+      "type": "BIGINT",
+      "comment": "是否可参与:0 否 1 是"
+    },
+    {
+      "name": "distribution_state",
+      "type": "BIGINT",
+      "comment": "分发状态:0 不通过 1 不可搜 2 可搜索 3 可推荐"
+    },
+    {
+      "name": "create_uid",
+      "type": "BIGINT",
+      "comment": "话题创建者uid, 0为管理员创建"
+    },
+    {
+      "name": "hot_score",
+      "type": "DECIMAL",
+      "comment": "热度值"
+    },
+    {
+      "name": "participant_num",
+      "type": "BIGINT",
+      "comment": "参与值:参与人数"
+    },
+    {
+      "name": "data_status",
+      "type": "BIGINT",
+      "comment": "是否有效:0无效 1有效,默认有效"
+    },
+    {
+      "name": "create_time",
+      "type": "DATETIME",
+      "comment": "创建时间"
+    },
+    {
+      "name": "update_time",
+      "type": "DATETIME",
+      "comment": "更新时间"
+    }
+  ],
+  "partition_keys": [],
+  "dataworks_tasks": [
+    {
+      "id": 1001894077,
+      "name": "imp.topic"
+    },
+    {
+      "id": 1001894077,
+      "name": "imp.topic"
+    }
+  ],
+  "upstream_tables": []
+}

+ 251 - 0
production_code/videoods.topic.sql

@@ -0,0 +1,251 @@
+-- Task: imp.topic  ID: 1001894077  Type: DI
+{
+	"extend":{
+		"mode":"wizard",
+		"resourceGroup":"group_42901"
+	},
+	"transform":false,
+	"type":"job",
+	"version":"2.0",
+	"steps":[
+		{
+			"stepType":"mysql",
+			"copies":1,
+			"parameter":{
+				"envType":1,
+				"column":[
+					"id",
+					"name",
+					"introduction",
+					"image_1_1_path",
+					"image_16_9_path",
+					"image_9_16_path",
+					"audit_status",
+					"is_participatory",
+					"distribution_state",
+					"create_uid",
+					"hot_score",
+					"participant_num",
+					"data_status",
+					"create_time",
+					"update_time"
+				],
+				"tableComment":"社区-话题",
+				"where":"",
+				"connection":[
+					{
+						"datasource":"longvide_read",
+						"table":[
+							"topic"
+						]
+					}
+				],
+				"splitPk":"id"
+			},
+			"name":"Reader",
+			"gui":{
+				"x":100,
+				"y":100
+			},
+			"category":"reader"
+		},
+		{
+			"stepType":"odps",
+			"copies":1,
+			"parameter":{
+				"truncate":true,
+				"envType":1,
+				"datasource":"odps_first",
+				"column":[
+					"id",
+					"name",
+					"introduction",
+					"image_1_1_path",
+					"image_16_9_path",
+					"image_9_16_path",
+					"audit_status",
+					"is_participatory",
+					"distribution_state",
+					"create_uid",
+					"hot_score",
+					"participant_num",
+					"data_status",
+					"create_time",
+					"update_time"
+				],
+				"emptyAsNull":false,
+				"tableComment":"社区-话题",
+				"table":"topic"
+			},
+			"name":"Writer",
+			"gui":{
+				"x":100,
+				"y":200
+			},
+			"category":"writer"
+		},
+		{
+			"copies":1,
+			"parameter":{
+				"nodes":[],
+				"edges":[],
+				"groups":[],
+				"version":"2.0"
+			},
+			"name":"Processor",
+			"gui":{
+				"x":100,
+				"y":300
+			},
+			"category":"processor"
+		}
+	],
+	"order":{
+		"hops":[
+			{
+				"from":"Reader",
+				"gui":{
+					"sourceAnchor":1,
+					"targetAnchor":0
+				},
+				"to":"Writer"
+			}
+		]
+	},
+	"setting":{
+		"errorLimit":{
+			"record":""
+		},
+		"locale":"zh_CN",
+		"speed":{
+			"throttle":false,
+			"concurrent":2
+		}
+	}
+}
+
+-- Task: imp.topic  ID: 1001894077  Type: DI
+{
+	"extend":{
+		"mode":"wizard",
+		"resourceGroup":"group_42901"
+	},
+	"transform":false,
+	"type":"job",
+	"version":"2.0",
+	"steps":[
+		{
+			"stepType":"mysql",
+			"copies":1,
+			"parameter":{
+				"envType":1,
+				"column":[
+					"id",
+					"name",
+					"introduction",
+					"image_1_1_path",
+					"image_16_9_path",
+					"image_9_16_path",
+					"audit_status",
+					"is_participatory",
+					"distribution_state",
+					"create_uid",
+					"hot_score",
+					"participant_num",
+					"data_status",
+					"create_time",
+					"update_time"
+				],
+				"tableComment":"社区-话题",
+				"where":"",
+				"connection":[
+					{
+						"datasource":"longvide_read",
+						"table":[
+							"topic"
+						]
+					}
+				],
+				"splitPk":"id"
+			},
+			"name":"Reader",
+			"gui":{
+				"x":100,
+				"y":100
+			},
+			"category":"reader"
+		},
+		{
+			"stepType":"odps",
+			"copies":1,
+			"parameter":{
+				"truncate":true,
+				"envType":1,
+				"datasource":"odps_first",
+				"column":[
+					"id",
+					"name",
+					"introduction",
+					"image_1_1_path",
+					"image_16_9_path",
+					"image_9_16_path",
+					"audit_status",
+					"is_participatory",
+					"distribution_state",
+					"create_uid",
+					"hot_score",
+					"participant_num",
+					"data_status",
+					"create_time",
+					"update_time"
+				],
+				"emptyAsNull":false,
+				"tableComment":"社区-话题",
+				"table":"topic"
+			},
+			"name":"Writer",
+			"gui":{
+				"x":100,
+				"y":200
+			},
+			"category":"writer"
+		},
+		{
+			"copies":1,
+			"parameter":{
+				"nodes":[],
+				"edges":[],
+				"groups":[],
+				"version":"2.0"
+			},
+			"name":"Processor",
+			"gui":{
+				"x":100,
+				"y":300
+			},
+			"category":"processor"
+		}
+	],
+	"order":{
+		"hops":[
+			{
+				"from":"Reader",
+				"gui":{
+					"sourceAnchor":1,
+					"targetAnchor":0
+				},
+				"to":"Writer"
+			}
+		]
+	},
+	"setting":{
+		"errorLimit":{
+			"record":""
+		},
+		"locale":"zh_CN",
+		"speed":{
+			"throttle":false,
+			"concurrent":2
+		}
+	}
+}

+ 42 - 0
production_code/videoods.total_price_video.json

@@ -0,0 +1,42 @@
+{
+  "name": "total_price_video",
+  "project": "videoods",
+  "comment": "",
+  "columns": [
+    {
+      "name": "videoid",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "total_price",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "currentday_price",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "total_price_times",
+      "type": "BIGINT",
+      "comment": ""
+    },
+    {
+      "name": "total_price_person",
+      "type": "BIGINT",
+      "comment": ""
+    }
+  ],
+  "partition_keys": [],
+  "dataworks_tasks": [
+    {
+      "id": 1000579613,
+      "name": "视频维度表"
+    }
+  ],
+  "upstream_tables": [
+    "videoods.wx_video_purchase_record"
+  ]
+}

Некоторые файлы не были показаны из-за большого количества измененных файлов