1 month ago · 308ca8f1eb
--- a/api-module/src/main/java/com/tzld/piaoquan/api/service/contentplatform/impl/ContentPlatformPlanServiceImpl.java
+++ b/api-module/src/main/java/com/tzld/piaoquan/api/service/contentplatform/impl/ContentPlatformPlanServiceImpl.java
@@ -834,8 +834,8 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
 
				     }
			
 
				 
			
 
				     /**
			
 
				-     * 先验池：A 段 dimension='传播头部' → B 段 其余 dimension。
			
 
				-     * 每段按 (point_type, standard_element) 分组，组按 total_rov DESC、组内 score DESC 取前 K；段间拼接 + video_id 去重。
			
 
				+     * 先验池：只取 dimension='传播的头部' 的行。
			
 
				+     * 按 (point_type, standard_element) 分组，组按 total_rov DESC、组内 score DESC 取前 K，结果按 limit 截断。
			
 
				      */
			
 
				     private List<VideoContentItemVO> fetchPriorCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {
			
 
				         String dt = demandVideoMapperExt.getMaxDt();
			
@@ -846,26 +846,24 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
 
				         String ghName = StringUtils.hasText(param.getGhName()) ? param.getGhName() : null;
			
 
				         int fetchLimit = Math.max(limit * 3, DEMAND_CANDIDATE_LIMIT);
			
 
				 
			
 
				-        List<ContentPlatformDemandVideo> stageA = demandVideoMapperExt.selectForRecommend(
			
 
				+        List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommend(
			
 
				                 dt, crowdSegment, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, ghName, null, fetchLimit, false);
			
 
				-        List<ContentPlatformDemandVideo> stageB = demandVideoMapperExt.selectForRecommend(
			
 
				-                dt, crowdSegment, DEMAND_STRATEGY_PRIOR, null, PRIOR_PREMIUM_DIMENSION, null, ghName, null, fetchLimit, false);
			
 
				 
			
 
				-        // 退化:该 ghName 在两阶段都无数据 → 退回渠道粒度
			
 
				-        if (ghName != null && stageA.isEmpty() && stageB.isEmpty()) {
			
 
				-            stageA = demandVideoMapperExt.selectForRecommend(
			
 
				+        // 退化:该 ghName 无数据 → 退回渠道粒度
			
 
				+        if (ghName != null && rows.isEmpty()) {
			
 
				+            rows = demandVideoMapperExt.selectForRecommend(
			
 
				                     dt, crowdSegment, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, null, null, fetchLimit, false);
			
 
				-            stageB = demandVideoMapperExt.selectForRecommend(
			
 
				-                    dt, crowdSegment, DEMAND_STRATEGY_PRIOR, null, PRIOR_PREMIUM_DIMENSION, null, null, null, fetchLimit, false);
			
 
				         }
			
 
				 
			
 
				         Function<ContentPlatformDemandVideo, String> keyFn = r ->
			
 
				                 (r.getPointType() == null ? "" : r.getPointType()) + "\u0001"
			
 
				                         + (r.getStandardElement() == null ? "" : r.getStandardElement());
			
 
				 
			
 
				-        List<VideoContentItemVO> outA = groupAndTopK(stageA, keyFn, TOP_K_PER_DEMAND, false);
			
 
				-        List<VideoContentItemVO> outB = groupAndTopK(stageB, keyFn, TOP_K_PER_DEMAND, false);
			
 
				-        return concatDedup(outA, outB, limit);
			
 
				+        List<VideoContentItemVO> out = groupAndTopK(rows, keyFn, TOP_K_PER_DEMAND, false);
			
 
				+        if (out.size() > limit) {
			
 
				+            out = new ArrayList<>(out.subList(0, limit));
			
 
				+        }
			
 
				+        return out;
			
 
				     }
			
 
				 
			
 
				     /**
			
--- a/docs/recommend-ordering.md
+++ b/docs/recommend-ordering.md
@@ -0,0 +1,264 @@
 
				+# `videoContentList` 推荐列表排序逻辑
			
 
				+
			
 
				+> 接口：`POST /contentPlatform/plan/videoContentList`
			
 
				+> 入口：`ContentPlatformPlanServiceImpl.getVideoContentList`
			
 
				+> 数据源：`content_platform_demand_video`（先验/后验/先验-场景）+ `content_platform_video`（全局热门）
			
 
				+>
			
 
				+> 适用：当前分支 `cooperation_video_candidate_pool_improved_lld_0509`（含 commit `2860bdce`）。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 1. 入参 → 路径分发
			
 
				+
			
 
				+```
			
 
				+title 非空  ───────────►  全站搜索 (getVideoContentListByTitle)
			
 
				+source=prior  ───────────►  单源「粉丝喜欢」(getSingleSourcePage / prior)
			
 
				+source=posterior  ───────►  单源「已发优质相似」(getSingleSourcePage / posterior)
			
 
				+source=hot   ───────────►  单源「全局热门」(getHotSourcePaged)
			
 
				+source 空（默认）  ──────►  四路随机穿插 (getInterleavedPage)
			
 
				+```
			
 
				+
			
 
				+> 当前前端：`prior` 和 `posterior` 两个 tab 已 disabled（提示"功能正在完善中"），生产实际只走「全部」（四路穿插）和「全局热门」单源。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 2. 关键常量
			
 
				+
			
 
				+| 常量 | 值 | 含义 |
			
 
				+|---|---|---|
			
 
				+| `DEMAND_CANDIDATE_LIMIT` | 10000 | 每个 demand 池最大候选条数 |
			
 
				+| `HOT_CANDIDATE_LIMIT` | 10000 | hot 池候选条数 |
			
 
				+| `TOP_K_PER_DEMAND` | 3 | demand 池组内取前 K |
			
 
				+| `DEMAND_STRATEGY_PRIOR` | `"先验需求"` | 先验池过滤值 |
			
 
				+| `DEMAND_STRATEGY_PRIOR_SCENE` | `"先验需求-场景"` | 场景池过滤值 |
			
 
				+| `DEMAND_STRATEGY_POSTERIOR` | `"后验需求"` | 后验池过滤值 |
			
 
				+| `PRIOR_PREMIUM_DIMENSION` | `"传播的头部"` | 先验池 A 段 dimension |
			
 
				+| `POSTERIOR_FILTER_ABS_LIKE` | `"绝对高效率%"` | 后验池 A 段 `demand_filter_sort_strategy` LIKE |
			
 
				+| `POSTERIOR_FILTER_REL_LIKE` | `"相对裂变率%"` | 后验池 B 段 `demand_filter_sort_strategy` LIKE |
			
 
				+| `POSTERIOR_DRIVE_DIMENSION_TIME` | `"昨日"` | 后验池强制 `drive_dimension_time` |
			
 
				+
			
 
				+公共强过滤（所有 demand 池 SQL）：`dt = max(dt)` AND `status = 1` AND `crowd_segment = user.channel`。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 3. 四个候选池的构造顺序
			
 
				+
			
 
				+### 3.1 `fetchPriorSceneCandidates`（场景池）
			
 
				+
			
 
				+**目的：用户所属 channel 在"场景"维度命中的先验。**
			
 
				+
			
 
				+```sql
			
 
				+SELECT ... FROM content_platform_demand_video
			
 
				+WHERE dt=:maxDt AND status=1
			
 
				+  AND crowd_segment=:userChannel
			
 
				+  AND demand_strategy='先验需求-场景'
			
 
				+  AND channel_level3=:ghName        -- 若传入
			
 
				+ORDER BY total_rov DESC, score DESC
			
 
				+LIMIT 10000
			
 
				+```
			
 
				+
			
 
				+退化：如果 `ghName` 非空但查 0 条，**去掉 `ghName` 再查一次**（拿全渠道兜底）。
			
 
				+
			
 
				+后处理：
			
 
				+1. `videoId` 为 null 的丢弃
			
 
				+2. 同 `videoId` 仅保留首次
			
 
				+3. **不做分组、不做 rov<=0 过滤**（场景型本身稀缺）
			
 
				+
			
 
				+输出顺序 = SQL 返回顺序 = `total_rov DESC, score DESC`。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 3.2 `fetchPriorCandidates`（先验池）
			
 
				+
			
 
				+**目的：先验需求里,只取 `dimension='传播的头部'` 维度的内容。**
			
 
				+
			
 
				+单段查询：
			
 
				+
			
 
				+```sql
			
 
				+SELECT ... WHERE ... AND demand_strategy='先验需求' AND dimension='传播的头部' ...
			
 
				+ORDER BY total_rov DESC, score DESC LIMIT 30000
			
 
				+```
			
 
				+
			
 
				+退化：查询为空且 `ghName` 非空 → 去 `ghName` 重查。
			
 
				+
			
 
				+进 `groupAndTopK`：
			
 
				+- 分组键：`(point_type, standard_element)`
			
 
				+- **过滤** `rov <= 0` 或 null（近 7 日无表现）
			
 
				+- 不做 selfTitle 过滤
			
 
				+- 组排序：组内最大 `total_rov` DESC
			
 
				+- 组内排序：`score` DESC，组内 `videoId` 去重，每组最多 3 条
			
 
				+
			
 
				+最后按 `limit=10000` 截断。
			
 
				+
			
 
				+最终顺序：**组按总 ROV，组内按 score。**
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 3.3 `fetchPosteriorCandidates`（后验池）
			
 
				+
			
 
				+**目的：后验需求里，"昨日"驱动的"绝对高效率"先出，再出"相对裂变率"。**
			
 
				+
			
 
				+A、B 两段独立查询：
			
 
				+
			
 
				+```sql
			
 
				+-- A 段: demand_filter_sort_strategy LIKE '绝对高效率%'
			
 
				+SELECT ... WHERE ... AND demand_strategy='后验需求'
			
 
				+  AND demand_filter_sort_strategy LIKE '绝对高效率%'
			
 
				+  AND drive_dimension_time='昨日'
			
 
				+  AND (title IS NULL OR demand_content_title IS NULL OR title <> demand_content_title)
			
 
				+ORDER BY total_rov DESC, score DESC LIMIT 30000
			
 
				+
			
 
				+-- B 段: demand_filter_sort_strategy LIKE '相对裂变率%'
			
 
				+SELECT ... AND demand_filter_sort_strategy LIKE '相对裂变率%' AND drive_dimension_time='昨日' ...
			
 
				+```
			
 
				+
			
 
				+退化：A、B 都空且 `ghName` 非空 → 去 `ghName` 重查（`drive_dimension_time='昨日'` 仍保留）。
			
 
				+
			
 
				+每段进 `groupAndTopK`：
			
 
				+- 分组键：`demand_content_id`
			
 
				+- **过滤** `rov <= 0` 或 null
			
 
				+- **过滤** `excludeSelfTitle=true` → 用 `TitleNormalizer.isSelfTitle` 跳过自标题
			
 
				+- 组排序：组内最大 `total_rov` DESC
			
 
				+- 组内排序：`score` DESC，去重，每组最多 3 条
			
 
				+
			
 
				+A 段 + B 段 顺序拼接 → 跨段 `videoId` 去重 → 截 10000。
			
 
				+
			
 
				+最终顺序：**A 段(绝对高效率) 在前；段内组按总 ROV，组内按 score。**
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 3.4 `fetchHotCandidates`（热门池）
			
 
				+
			
 
				+复用现有 `planMapperExt.getVideoList(...)`：
			
 
				+- `dt = videoMaxDt`，`datastatDt = videoDatastatMaxDt`
			
 
				+- `sort/type/channel/strategy` 由请求和 `param.getSort()` 派生
			
 
				+- 一次性查前 10000 条，**未分组、未二次过滤**
			
 
				+- 顺序由 SQL 决定（一般是 `fission_rate DESC` 或 `score DESC`）
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 4. 「全部」模式：四路随机穿插（`getInterleavedPage`）
			
 
				+
			
 
				+```
			
 
				+priorScene → 标 source='prior'
			
 
				+prior      → 标 source='prior'
			
 
				+posterior  → 标 source='posterior'
			
 
				+hot        → 标 source='hot'
			
 
				+```
			
 
				+
			
 
				+> priorScene 和 prior **对外都是 `source='prior'`**（前端"粉丝喜欢"统一标签）；浮层细节通过 `demandStrategy` 字段区分场景。
			
 
				+
			
 
				+### 算法
			
 
				+
			
 
				+1. 维护 4 个池的 `pointer[i]` 和 `exhausted[i]`，以及全局 `emittedIds`。
			
 
				+2. 种子：`seed = userId ^ LocalDate.now().toString().hashCode()`
			
 
				+   - 同一用户同一天翻页/刷新得到的顺序一致
			
 
				+   - 跨天/跨用户顺序变化
			
 
				+3. 循环直到 4 池全空：
			
 
				+   1. 在未耗尽池中等概率随机选一个
			
 
				+   2. 跳过该池里已被 `emittedIds` 包含的视频
			
 
				+   3. 取出第一条未发的，加入 `merged`、记入 `emittedIds`
			
 
				+   4. 若该池指针越界，标为 exhausted
			
 
				+4. `paginateCandidates`：`totalSize = merged.size()`，按 `pageNum/pageSize` 内存切片返回。
			
 
				+
			
 
				+### 时间复杂度
			
 
				+- 每池 1 次 DB Query
			
 
				+- 主循环 O(总池容量)，每个池最大 10000 → 上界约 4 万次操作
			
 
				+- 单页响应 = 1 次 maxDt + 4 次 SELECT + 内存穿插
			
 
				+
			
 
				+### 排序稳定性
			
 
				+- 同一用户同一天，所有分页之间顺序一致
			
 
				+- `priorScene` / `prior` / `posterior` 内部相对顺序保留（场景 → 总 ROV → score），随机只影响"哪一池先出"
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 5. 「粉丝喜欢」单源（`source=prior`）
			
 
				+
			
 
				+```java
			
 
				+scene = fetchPriorSceneCandidates(...)
			
 
				+prior = fetchPriorCandidates(...)
			
 
				+list  = interleavePriorWithScene(scene, prior)  // 严格 1:1
			
 
				+```
			
 
				+
			
 
				+`interleavePriorWithScene`：
			
 
				+- 单次循环：先从 `scene` 取一条未发的 → 再从 `prior` 取一条未发的，**严格 1:1 交替**
			
 
				+- 用 `seen<videoId>` 跨池去重，场景优先（先到先得）
			
 
				+- 一侧用完后，另一侧剩余按原顺序追加
			
 
				+
			
 
				+每条 VO 设 `source='prior'`，然后 `paginateCandidates` 切片。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 6. 「已发优质相似」单源（`source=posterior`）
			
 
				+
			
 
				+```java
			
 
				+list = fetchPosteriorCandidates(...)   // 顺序 = 绝对高效率段 → 相对裂变率段
			
 
				+```
			
 
				+
			
 
				+每条 VO 设 `source='posterior'`，`paginateCandidates` 切片。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 7. 「全局热门」单源（`source=hot`）
			
 
				+
			
 
				+不走候选池：复用原 `planMapperExt.getVideoCount + getVideoList` 真分页链路，DB 端 LIMIT/OFFSET。VO `source='hot'`。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 8. 字段全景：每条 VO 必有 `source` 字段
			
 
				+
			
 
				+| `source` | 含义 | 数据来源 | 浮层 demand 字段 |
			
 
				+|---|---|---|---|
			
 
				+| `prior` | 粉丝喜欢（场景 + 先验头部 + 先验其他） | `content_platform_demand_video` | 有完整字段，可看 `demandStrategy` 区分 |
			
 
				+| `posterior` | 已发优质相似 | `content_platform_demand_video` | 有完整字段 |
			
 
				+| `hot` | 全局热门 | `content_platform_video` | 只有基础字段（demand 相关字段为空） |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 9. 一图总览
			
 
				+
			
 
				+```
			
 
				+        ┌────────────────────────────────────────────────────────┐
			
 
				+        │ 1. 拉 4 个候选池 (每个池都自己分阶段、分组、去重、排序) │
			
 
				+        └────────────────────────────────────────────────────────┘
			
 
				+                              │
			
 
				+   ┌───────────────────────┬──┴──┬─────────────────────────────┐
			
 
				+   ▼                       ▼     ▼                             ▼
			
 
				+priorScene(10000)    prior(10000)  posterior(10000)         hot(10000)
			
 
				+  total_rov DESC,    A: 传播头部     A: 绝对高效率 + 昨日       SQL 默认
			
 
				+  score DESC          B: 其他维度    B: 相对裂变率 + 昨日       (sort 决定)
			
 
				+                     组(point_type, standard_element) top3
			
 
				+                     组(demand_content_id) top3
			
 
				+                              │
			
 
				+                              ▼
			
 
				+        ┌────────────────────────────────────────────────────────┐
			
 
				+        │ 2. 四路随机穿插                                          │
			
 
				+        │   - 种子 = userId ^ 今天                                  │
			
 
				+        │   - 跨池 videoId 去重                                     │
			
 
				+        │   - 池内顺序保留                                          │
			
 
				+        └────────────────────────────────────────────────────────┘
			
 
				+                              │
			
 
				+                              ▼
			
 
				+        ┌────────────────────────────────────────────────────────┐
			
 
				+        │ 3. paginateCandidates                                  │
			
 
				+        │   totalSize = 全量, 按 pageNum/pageSize 切片            │
			
 
				+        └────────────────────────────────────────────────────────┘
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 10. 关键代码位置
			
 
				+
			
 
				+| 内容 | 位置 |
			
 
				+|---|---|
			
 
				+| 主入口路由 | `ContentPlatformPlanServiceImpl.java:626` |
			
 
				+| 单源分页 | `ContentPlatformPlanServiceImpl.java:649` |
			
 
				+| 单源 hot | `ContentPlatformPlanServiceImpl.java:698` |
			
 
				+| 四路穿插 | `ContentPlatformPlanServiceImpl.java:743` |
			
 
				+| 场景池 fetcher | `ContentPlatformPlanServiceImpl.java:812` |
			
 
				+| 先验池 fetcher | `ContentPlatformPlanServiceImpl.java:840` |
			
 
				+| 后验池 fetcher | `ContentPlatformPlanServiceImpl.java:875` |
			
 
				+| `groupAndTopK` 通用排序 | `ContentPlatformPlanServiceImpl.java:912` |
			
 
				+| 段间拼接 + 去重 | `ContentPlatformPlanServiceImpl.java:956` |
			
 
				+| 热门池 fetcher | `ContentPlatformPlanServiceImpl.java:976` |
			
 
				+| Mapper SQL | `mapper/contentplatform/ext/ContentPlatformDemandVideoMapperExt.xml` (selectForRecommend) |