xueyiming 15 horas atrás
pai
commit
850ec9be21
1 arquivos alterados com 41 adições e 24 exclusões
  1. 41 24
      app/services/element_search_service.py

+ 41 - 24
app/services/element_search_service.py

@@ -288,36 +288,51 @@ def query_monthly_element_demands(
     bizdate = _validate_partition_dt("bizdate", _partition_yyyymmdd(_today_shanghai()))
 
     sql = f"""
-WITH biz_month AS (
-    SELECT
-        TO_DATE(CONCAT(SUBSTR('{bizdate}', 1, 4), '-', SUBSTR('{bizdate}', 5, 2), '-01')) AS biz_m1
+WITH biz_day AS (
+    SELECT TO_DATE(CONCAT(SUBSTR('{bizdate}', 1, 4), '-', SUBSTR('{bizdate}', 5, 2), '-', SUBSTR('{bizdate}', 7, 2))) AS biz_dt
+),
+yesterday AS (
+    SELECT DATE_SUB((SELECT biz_dt FROM biz_day), 1) AS yest
 ),
-month_window AS (
+window_bounds AS (
     SELECT
-        CONCAT(
-            SUBSTR(CAST(ADD_MONTHS(biz_m1, -12) AS STRING), 1, 4),
-            SUBSTR(CAST(ADD_MONTHS(biz_m1, -12) AS STRING), 6, 2)
-        ) AS start_ym,
-        CONCAT(
-            SUBSTR(CAST(ADD_MONTHS(biz_m1, -1) AS STRING), 1, 4),
-            SUBSTR(CAST(ADD_MONTHS(biz_m1, -1) AS STRING), 6, 2)
-        ) AS end_ym
-    FROM biz_month
+        CAST((SELECT yest FROM yesterday) AS DATETIME) AS end_dt,
+        CAST(DATE_SUB((SELECT yest FROM yesterday), 359) AS DATETIME) AS start_dt
 ),
 cleaned_video_metrics AS (
     SELECT
         CAST(视频id AS STRING) AS vid,
-        SUBSTR(CAST(dt AS STRING), 1, 6) AS ym,
+        CAST(FLOOR(DATEDIFF(
+            (SELECT yest FROM yesterday),
+            TO_DATE(REGEXP_REPLACE(CAST(dt AS STRING), '-', ''), 'yyyyMMdd')
+        ) / 30) AS STRING) AS ym,
+        CONCAT(
+            REGEXP_REPLACE(CAST(DATE_SUB(
+                (SELECT yest FROM yesterday),
+                CAST(FLOOR(DATEDIFF(
+                    (SELECT yest FROM yesterday),
+                    TO_DATE(REGEXP_REPLACE(CAST(dt AS STRING), '-', ''), 'yyyyMMdd')
+                ) / 30) * 30 + 29 AS INT)
+            ) AS STRING), '-', ''),
+            '~',
+            REGEXP_REPLACE(CAST(DATE_SUB(
+                (SELECT yest FROM yesterday),
+                CAST(FLOOR(DATEDIFF(
+                    (SELECT yest FROM yesterday),
+                    TO_DATE(REGEXP_REPLACE(CAST(dt AS STRING), '-', ''), 'yyyyMMdd')
+                ) / 30) * 30 AS INT)
+            ) AS STRING), '-', '')
+        ) AS ym_range,
         rov_t0,
         COALESCE(`当日分发曝光pv`, 0) AS day_dist_pv
     FROM loghubods.video_dimension_detail_add_column
-    WHERE SUBSTR(CAST(dt AS STRING), 1, 6) >= (SELECT start_ym FROM month_window)
-      AND SUBSTR(CAST(dt AS STRING), 1, 6) <= (SELECT end_ym FROM month_window)
+    WHERE TO_DATE(REGEXP_REPLACE(CAST(dt AS STRING), '-', ''), 'yyyyMMdd') BETWEEN (SELECT start_dt FROM window_bounds) AND (SELECT end_dt FROM window_bounds)
       AND COALESCE(`当日分发曝光pv`, 0) >= {int(view_pv_count)}
 ),
 video_monthly_avg_metrics AS (
     SELECT
         ym,
+        MAX(ym_range) AS ym_range,
         vid,
         AVG(CASE WHEN rov_t0 = 0 THEN NULL ELSE rov_t0 END) AS vid_avg_rov,
         SUM(day_dist_pv) AS month_total_pv
@@ -377,13 +392,15 @@ element_freq AS (
         COUNT(1) AS 频次
     FROM element_monthly_metrics
     GROUP BY 原始元素
-)
-,element_month_list AS (
+),
+element_month_list AS (
     SELECT
-        原始元素,
-        TO_JSON(SORT_ARRAY(COLLECT_SET(ym))) AS month_list
-    FROM element_monthly_metrics
-    GROUP BY 原始元素
+        em.原始元素,
+        TO_JSON(SORT_ARRAY(COLLECT_SET(vm.ym_range))) AS month_list
+    FROM element_monthly_metrics em
+    JOIN video_monthly_avg_metrics vm
+      ON em.ym = vm.ym
+    GROUP BY em.原始元素
 )
 SELECT
     '逐月' AS strategy,
@@ -392,7 +409,7 @@ SELECT
     r.avg_rov AS weight,
     COALESCE(v.vid_count, 0) AS video_count,
     v.vid_list AS video_list,
-    ml.month_list AS month_list, 
+    ml.month_list AS month_list,
     COALESCE(f.频次, 0) AS frequency,
     '{{}}' AS ext_info
 FROM element_total_rov r
@@ -400,7 +417,7 @@ LEFT JOIN element_vid_stats v
   ON r.原始元素 = v.原始元素
 LEFT JOIN element_freq f
   ON r.原始元素 = f.原始元素
-LEFT JOIN element_month_list ml           -- 新增 JOIN
+LEFT JOIN element_month_list ml
   ON r.原始元素 = ml.原始元素
 WHERE r.原始元素 NOT IN (
     '元旦','腊八节','小年','除夕','春节','正月初一','正月初二','正月初三','正月初四','正月初五',