Просмотр исходного кода

feat: 修改每日执行顺序,所有品类跑top级别的数据

jihuaqiang 2 месяцев назад
Родитель
Сommit
2b346917f4
1 измененных файлов с 26 добавлено и 14 удалено
  1. 26 14
      examples/content_finder/db/schedule.py

+ 26 - 14
examples/content_finder/db/schedule.py

@@ -193,27 +193,39 @@ def get_total_token_coast_between(start_time: datetime, end_time: datetime) -> D
 
 
 def get_one_today_unprocessed_demand(*, dt: int) -> Optional[Dict[str, Any]]:
 def get_one_today_unprocessed_demand(*, dt: int) -> Optional[Dict[str, Any]]:
     """
     """
-    从 demand_content 中取「当天 dt」且尚未在 demand_find_task 中出现过的 1 条需求。
+    从 demand_content 中取「当天 dt」且尚未在 demand_find_task 中出现过的 1 条需求,
+    采用“分品类分层轮转”策略:
 
 
-    - 不按品类分组(不再使用 merge_leve2)
+    - 先取每个品类分数最高的 1 条(各品类 top1),按 score 降序优先执行
+    - 当各品类 top1 跑完后,再取各品类 top2,仍按 score 降序优先执行
+    - 依此类推
     - dt 与表字段一致:一般为 YYYYMMDD 整数(如 20260402)
     - dt 与表字段一致:一般为 YYYYMMDD 整数(如 20260402)
-    - 同 dt 下按 score 降序取第一条(最高分优先)
 
 
     Returns:
     Returns:
         {"demand_content_id": int, "query": str, "suggestion": Optional[str], "score": Any} 或 None
         {"demand_content_id": int, "query": str, "suggestion": Optional[str], "score": Any} 或 None
     """
     """
     sql = """
     sql = """
-    SELECT dc.id AS demand_content_id,
-           dc.name AS query,
-           dc.suggestion AS suggestion,
-           dc.score AS score
-    FROM demand_content dc
-    WHERE dc.dt = %s
-      AND NOT EXISTS (
-        SELECT 1 FROM demand_find_task t
-        WHERE t.demand_content_id = dc.id
-      )
-    ORDER BY dc.score DESC, dc.id DESC
+    SELECT x.demand_content_id,
+           x.query,
+           x.suggestion,
+           x.score
+    FROM (
+        SELECT dc.id AS demand_content_id,
+               dc.name AS query,
+               dc.suggestion AS suggestion,
+               dc.score AS score,
+               ROW_NUMBER() OVER (
+                   PARTITION BY COALESCE(dc.merge_leve2, '')
+                   ORDER BY dc.score DESC, dc.id DESC
+               ) AS rn
+        FROM demand_content dc
+        WHERE dc.dt = %s
+          AND NOT EXISTS (
+            SELECT 1 FROM demand_find_task t
+            WHERE t.demand_content_id = dc.id
+          )
+    ) x
+    ORDER BY x.rn ASC, x.score DESC, x.demand_content_id DESC
     LIMIT 1
     LIMIT 1
     """
     """
     conn = None
     conn = None