Просмотр исходного кода

Merge branch 'cooperation_video_candidate_pool_improved_lld_0509' of Server/growth-manager into master

liulidong 8 часов назад
Родитель
Сommit
bbdecd4c76

+ 133 - 50
api-module/src/main/java/com/tzld/piaoquan/api/service/contentplatform/impl/ContentPlatformPlanServiceImpl.java

@@ -623,6 +623,8 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
     private static final String DEMAND_STRATEGY_PRIOR = "人群需求";
     private static final String DEMAND_STRATEGY_POSTERIOR = "优质相似";
     private static final String PRIOR_PREMIUM_DIMENSION = "传播的头部";
+    /** 第三路池:dimension='增长的头部',与 PRIOR_PREMIUM_DIMENSION 语义不同(增长强度 vs 传播强度)。 */
+    private static final String GROWTH_PREMIUM_DIMENSION = "增长的头部";
     /** match_method 取值,priorScene 池识别用(0519+ 起替代旧 demand_strategy='人群需求-场景') */
     private static final String MATCH_METHOD_PRIOR_SCENE = "场景已看视频";
     /** match_method 取值,prior / posterior 池识别用 */
@@ -694,11 +696,17 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
         }
         List<VideoContentItemVO> list;
         if (SOURCE_PRIOR.equals(source)) {
-            // 粉丝喜欢 = priorScene(场景已看视频) 与 prior(人群需求·票圈推荐库) 池间交替,块大小 1~2 随机,起始池由 seed 决定;
-            // K=2 保证两池 top-2 必在前 4 条,池间交替避免连续同源,seed=userId^date 同用户同日稳定
+            // 粉丝喜欢:
+            //   公众号入口(type∈{0,1,4}):3 池 — priorScene + prior(传播头部) + growth(增长头部),每位独立等概率抽 + seed=nanoTime
+            //   企微入口  (type∈{2,3}):2 池 — priorScene + prior(传播头部),严格 1:1 交替(无随机)
             List<VideoContentItemVO> scene = fetchPriorSceneCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
             List<VideoContentItemVO> prior = fetchPriorCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
-            list = interleavePriorWithScene(scene, prior, user);
+            if (isGzhEntryType(param.getType())) {
+                List<VideoContentItemVO> growth = fetchPriorGrowthCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
+                list = interleavePriorPoolsRandom(scene, prior, growth, user);
+            } else {
+                list = interleavePriorWithScene(scene, prior);
+            }
         } else {
             list = fetchPosteriorCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
         }
@@ -709,45 +717,92 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
     }
 
     /**
-     * priorScene 与 prior 池间交替混合输出:
-     *  - 块大小 1~2 随机(K=2),池间交替;起始池由 seed 决定
-     *  - seed = userId XOR LocalDate.toString().hashCode():同一用户同一天刷新顺序稳定,跨用户/跨日不同
-     *  - K=2 保证两池 top-2 必落在前 4 条(只是顺序按 seed 微调,不再机械 1:1)
+     * 3 池每位独立等概率抽样(公众号入口用):
+     *  - 池: [scene, prior(传播头部), growth(增长头部)]
+     *  - 每个输出位置在「未耗尽池」中等概率抽 1,从该池头部取下一条
+     *  - seed = nanoTime ^ userId:每次接口请求都换 seed,第一条来自哪一路每次都不同
+     *  - 跨池 video_id / 标题去重;翻页 P1/P2 不保证序列一致(刷新即换排)
+     */
+    private List<VideoContentItemVO> interleavePriorPoolsRandom(List<VideoContentItemVO> scene,
+                                                                List<VideoContentItemVO> prior,
+                                                                List<VideoContentItemVO> growth,
+                                                                ContentPlatformAccount user) {
+        long userSeed = (user == null || user.getId() == null) ? 0L : user.getId();
+        long seed = System.nanoTime() ^ userSeed;
+        return interleaveMultiPools(Arrays.asList(scene, prior, growth), new Random(seed), 1);
+    }
+
+    /**
+     * 企微入口用:priorScene 与 prior 池严格 1:1 交替输出(无随机):
+     *  - 起始池固定 scene,交替 1:1 各取 1 条
      *  - 跨池 video_id / 标题去重;一侧用完后,剩余按原顺序追加输出,不丢数据
      */
     private List<VideoContentItemVO> interleavePriorWithScene(List<VideoContentItemVO> scene,
-                                                              List<VideoContentItemVO> prior,
-                                                              ContentPlatformAccount user) {
+                                                              List<VideoContentItemVO> prior) {
         Set<Long> seenIds = new HashSet<>();
         Set<String> seenTitles = new HashSet<>();
         List<VideoContentItemVO> out = new ArrayList<>();
         int si = 0, pi = 0;
+        while (si < scene.size() || pi < prior.size()) {
+            while (si < scene.size()) {
+                VideoContentItemVO v = scene.get(si++);
+                if (tryEmit(v, seenIds, seenTitles, out)) break;
+            }
+            while (pi < prior.size()) {
+                VideoContentItemVO v = prior.get(pi++);
+                if (tryEmit(v, seenIds, seenTitles, out)) break;
+            }
+        }
+        return out;
+    }
 
-        long userSeed = (user == null || user.getId() == null) ? 0L : user.getId();
-        long seed = userSeed ^ LocalDate.now().toString().hashCode();
-        Random rng = new Random(seed);
-
-        boolean fromScene = rng.nextBoolean();
+    /**
+     * 通用 N 池随机穿插:
+     *  - maxBlockSize=1 → 每位独立等概率从所有未耗尽池抽 1(允许连续同源)
+     *  - maxBlockSize=K(>=2) → 块大小 1~K 随机,块间切「其他未耗尽池」(避免连续同源)
+     *  - 跨池 video_id / 标题去重;某池跳过去重后耗尽即标记 exhausted
+     */
+    private List<VideoContentItemVO> interleaveMultiPools(List<List<VideoContentItemVO>> pools,
+                                                          Random rng,
+                                                          int maxBlockSize) {
+        int n = pools.size();
+        int[] pointers = new int[n];
+        boolean[] exhausted = new boolean[n];
+        for (int i = 0; i < n; i++) {
+            if (pools.get(i) == null || pools.get(i).isEmpty()) exhausted[i] = true;
+        }
+        Set<Long> seenIds = new HashSet<>();
+        Set<String> seenTitles = new HashSet<>();
+        List<VideoContentItemVO> out = new ArrayList<>();
 
-        while (si < scene.size() || pi < prior.size()) {
-            // 当前选中的池已空 → 强制切到另一池
-            if (fromScene && si >= scene.size()) fromScene = false;
-            else if (!fromScene && pi >= prior.size()) fromScene = true;
+        int current = -1;
+        while (true) {
+            List<Integer> alive = new ArrayList<>(n);
+            for (int i = 0; i < n; i++) if (!exhausted[i]) alive.add(i);
+            if (alive.isEmpty()) break;
+
+            if (maxBlockSize <= 1) {
+                // K=1: 每位都从所有未耗尽池等概率抽,允许连续同源
+                current = alive.get(rng.nextInt(alive.size()));
+            } else if (current < 0 || exhausted[current]) {
+                // K>=2 首次或当前池耗尽: 从所有未耗尽池随机
+                current = alive.get(rng.nextInt(alive.size()));
+            } else if (alive.size() > 1) {
+                // K>=2 块切换: 从「其他未耗尽池」随机抽 1
+                List<Integer> others = new ArrayList<>(alive.size() - 1);
+                for (int i : alive) if (i != current) others.add(i);
+                current = others.get(rng.nextInt(others.size()));
+            }
+            // alive.size()==1 时 current 维持(只剩这一池,直到耗尽)
 
-            int blockSize = 1 + rng.nextInt(2); // 1 or 2
+            int blockSize = maxBlockSize <= 1 ? 1 : 1 + rng.nextInt(maxBlockSize);
             int emitted = 0;
-            if (fromScene) {
-                while (si < scene.size() && emitted < blockSize) {
-                    VideoContentItemVO v = scene.get(si++);
-                    if (tryEmit(v, seenIds, seenTitles, out)) emitted++;
-                }
-            } else {
-                while (pi < prior.size() && emitted < blockSize) {
-                    VideoContentItemVO v = prior.get(pi++);
-                    if (tryEmit(v, seenIds, seenTitles, out)) emitted++;
-                }
+            List<VideoContentItemVO> pool = pools.get(current);
+            while (emitted < blockSize && pointers[current] < pool.size()) {
+                VideoContentItemVO v = pool.get(pointers[current]++);
+                if (tryEmit(v, seenIds, seenTitles, out)) emitted++;
             }
-            fromScene = !fromScene;
+            if (pointers[current] >= pool.size()) exhausted[current] = true;
         }
         return out;
     }
@@ -806,8 +861,10 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
     }
 
     /**
-     * 四路随机穿插 + 跨路 video_id 去重。
-     * 4 池: priorScene / prior / posterior / hot,priorScene 与 prior 对外都标 source='prior'(粉丝喜欢)。
+     * 默认 tab 随机穿插 + 跨路 video_id 去重。
+     * 公众号入口 5 池: priorScene / prior(传播头部) / priorGrowth(增长头部) / posterior / hot
+     * 企微入口   4 池: priorScene / prior(传播头部) / posterior / hot(沿用旧逻辑,不加 growth)
+     * prior 类(scene/prior/growth)对外都标 source='prior'(粉丝喜欢)。
      * 每步在未耗尽的池中等概率随机选一个,从该池头部取下一条(池内顺序由 fetcher 决定)。
      * 用 (userId ^ 当天日期) 作为种子,保证同一用户当天翻页顺序一致、刷新一致。
      */
@@ -821,7 +878,14 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
         for (VideoContentItemVO v : posterior) v.setSource(SOURCE_POSTERIOR);
         for (VideoContentItemVO v : hot) v.setSource(SOURCE_HOT);
 
-        List<List<VideoContentItemVO>> pools = Arrays.asList(priorScene, prior, posterior, hot);
+        List<List<VideoContentItemVO>> pools;
+        if (isGzhEntryType(param.getType())) {
+            List<VideoContentItemVO> priorGrowth = fetchPriorGrowthCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
+            for (VideoContentItemVO v : priorGrowth) v.setSource(SOURCE_PRIOR);
+            pools = Arrays.asList(priorScene, prior, priorGrowth, posterior, hot);
+        } else {
+            pools = Arrays.asList(priorScene, prior, posterior, hot);
+        }
         int N = pools.size();
         int[] pointers = new int[N];
         boolean[] exhausted = new boolean[N];
@@ -896,7 +960,7 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
      * 后处理:
      *   1. 同 video_id 仅保留 total_rov 最大的代表行(利用 SQL 已按 total_rov DESC, score DESC 排好,首次即最大)
      *   2. 过滤 rov 为 null 或 <=0(视频近 7 日无表现)
-     *   3. 输出顺序按 rov DESC,相同 rov 按 total_rov DESC 兜底
+     *   3. 输出顺序按 sceneSumRov DESC,相同再按 total_rov DESC 兜底
      */
     private List<VideoContentItemVO> fetchPriorSceneCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {
         String channelName = resolveChannelName(param);
@@ -932,11 +996,11 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
             if (r.getRov() == null || r.getRov() <= 0) continue;
             filtered.add(r);
         }
-        // 3. 按 rov DESC 排序,次级 total_rov DESC
+        // 3. 按 sceneSumRov DESC 排序,次级 total_rov DESC
         filtered.sort((a, b) -> {
             int c = Double.compare(
-                    b.getRov() == null ? 0d : b.getRov(),
-                    a.getRov() == null ? 0d : a.getRov());
+                    b.getSceneSumRov() == null ? 0d : b.getSceneSumRov(),
+                    a.getSceneSumRov() == null ? 0d : a.getSceneSumRov());
             if (c != 0) return c;
             return Double.compare(
                     b.getTotalRov() == null ? 0d : b.getTotalRov(),
@@ -946,14 +1010,31 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
     }
 
     /**
-     * 人群需求池(prior):只取 dimension='传播的头部' 的行。
-     * total_rov 在 prior 池里代表「群体对(point_type, standard_element)这个特征的需求强度」,
-     * 不同渠道分布差异大,所以按 channel 内 total_rov 分位保留 top 50% 特征组,
-     * 过滤掉群体根本不爱的弱需求题材。
-     * 再按 (point_type, standard_element) 分组,组按 total_rov DESC、组内 score DESC 取前 K,
-     * 结果按 limit 截断。
+     * 人群需求池(prior):dimension='传播的头部'。
+     * 复用 fetchPriorDimensionCandidates 的 pipeline,仅 dimension 不同。
      */
     private List<VideoContentItemVO> fetchPriorCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {
+        return fetchPriorDimensionCandidates(param, user, limit, PRIOR_PREMIUM_DIMENSION);
+    }
+
+    /**
+     * 人群需求池(priorGrowth):dimension='增长的头部'。
+     * 与 prior 池(传播头部)分开各算各的 top 50% 分位,二者跨池 video_id 去重在 interleave 阶段处理。
+     */
+    private List<VideoContentItemVO> fetchPriorGrowthCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {
+        return fetchPriorDimensionCandidates(param, user, limit, GROWTH_PREMIUM_DIMENSION);
+    }
+
+    /**
+     * 人群需求池共用 pipeline,按 dimension 切分:
+     *   1. SQL: demand_strategy='人群需求' + dimension=<指定> + match_method='票圈推荐库'
+     *   2. 退化:ghName 无数据 → 去 ghName;crowd_segment 在对侧渠道 0 行 → 去 crowd_segment
+     *   3. 近 7 日 rov >= DEMAND_MIN_ROV 过滤
+     *   4. 按 (point_type, standard_element) 分组,按 total_rov 分位保留 top 50%
+     *   5. 组按 total_rov DESC、组内 score DESC,top K=3
+     *   6. 截断到 limit
+     */
+    private List<VideoContentItemVO> fetchPriorDimensionCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit, String dimension) {
         String channelName = resolveChannelName(param);
         String dt = demandVideoMapperExt.getMaxDt(channelName);
         if (!StringUtils.hasText(dt)) {
@@ -964,22 +1045,18 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
         int fetchLimit = Math.max(limit * 3, DEMAND_CANDIDATE_LIMIT);
 
         String category = StringUtils.hasText(param.getCategory()) ? param.getCategory() : null;
-        // prior 池加 match_method='票圈推荐库' 过滤,与 priorScene 池('场景已看视频')互斥,避免在新数据格式下两池抓重
         List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommend(
-                dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, ghName, null, category, MATCH_METHOD_PRIOR, fetchLimit, false);
+                dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR, dimension, null, null, ghName, null, category, MATCH_METHOD_PRIOR, fetchLimit, false);
 
-        // 退化:该 ghName 无数据 → 退回渠道粒度
         if (ghName != null && rows.isEmpty()) {
             rows = demandVideoMapperExt.selectForRecommend(
-                    dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, null, null, category, MATCH_METHOD_PRIOR, fetchLimit, false);
+                    dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR, dimension, null, null, null, null, category, MATCH_METHOD_PRIOR, fetchLimit, false);
         }
-        // 跨渠道退化:channel_name 命中但 crowd_segment 在对侧 0 行 → 去 crowd_segment 拉通用数据
         if (channelName != null && rows.isEmpty()) {
             rows = demandVideoMapperExt.selectForRecommend(
-                    dt, channelName, null, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, null, null, category, MATCH_METHOD_PRIOR, fetchLimit, false);
+                    dt, channelName, null, DEMAND_STRATEGY_PRIOR, dimension, null, null, null, null, category, MATCH_METHOD_PRIOR, fetchLimit, false);
         }
 
-        // 近 7 日 rov 下限,过滤掉低质量近期表现的视频(0513 验证 ≥0.02 保留 ~41%)
         rows = rows.stream()
                 .filter(r -> r.getRov() != null && r.getRov() >= DEMAND_MIN_ROV)
                 .collect(Collectors.toList());
@@ -1328,6 +1405,12 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
         return result;
     }
 
+    /** type ∈ {0 自动回复, 1 服务号推送, 4 公众号推送} 为公众号入口;{2,3} 为企微。type=null/其他 视为非公众号(走企微稳定逻辑)。 */
+    private boolean isGzhEntryType(Integer type) {
+        if (type == null) return false;
+        return type == 0 || type == 1 || type == 4;
+    }
+
     private String getVideoContentListType(Integer type) {
         switch (type) {
             case 0:

+ 2 - 2
api-module/src/main/resources/mapper/contentplatform/ext/ContentPlatformPlanMapperExt.xml

@@ -101,7 +101,7 @@
     <select id="getVideoCount" resultType="java.lang.Integer">
         select count(1)
         from content_platform_video_agg
-        where dt = #{dt} and status = 1 and score > #{minScore}
+        where dt = #{dt} and status = 1 and score >= #{minScore}
         <if test="param.title!= null and param.title!= ''">
             and title like concat('%', #{param.title}, '%')
         </if>
@@ -116,7 +116,7 @@
         left join content_platform_video_datastat_agg datastat
             on datastat.dt = #{datastatDt} and datastat.type = #{type} and datastat.channel = #{channel}
                    and datastat.strategy = #{strategy} and datastat.video_id = video.video_id
-        where video.dt = #{dt} and video.status = 1 and video.score > #{minScore}
+        where video.dt = #{dt} and video.status = 1 and video.score >= #{minScore}
         <if test="param.title!= null and param.title!= ''">
             and video.title like concat('%', #{param.title}, '%')
         </if>