Просмотр исходного кода

推荐池改为需求强度优先 + 组内 score top3 + 两段拼接

先验池:dimension='传播头部' 段优先,其余 dimension 段次之;
段内按 (point_type, standard_element) 分组。
后验池:绝对高效率 段优先,相对裂变率 段次之;段内按 demand_content_id 分组。
两池均按 total_rov DESC 排序需求,组内取 score top 3,段间拼接 + video_id 去重。
单源 prior/posterior 路径与穿插共用候选构建,移除独立分页 mapper。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
刘立冬 1 день назад
Родитель
Сommit
8ba7b1df44

+ 8 - 23
api-module/src/main/java/com/tzld/piaoquan/api/dao/mapper/contentplatform/ext/ContentPlatformDemandVideoMapperExt.java

@@ -28,36 +28,21 @@ public interface ContentPlatformDemandVideoMapperExt {
     String getMaxDt();
 
     /**
-     * 推荐场景候选池查询:按 demand_strategy 取指定 crowd_segment 的候选视频,按 score DESC 取前 N 行。
-     * 用于穿插模式(需要先 materialize 三池再跨池去重);不在 SQL 层做 video_id 去重,调用方在 Java 侧按 video_id 保留首条(即得分最高的一条)。
-     * excludeSelfTitle=true 时过滤掉 video.title == demand_content_title 的行(用于后验需求避免推种子视频自身或同标题重复视频)。
+     * 推荐场景候选池查询:按 demand_strategy 取指定 crowd_segment 的候选行,
+     * 按 total_rov DESC, score DESC 排,最多取 limit 行。
+     * dimension:等值过滤;dimensionExclude:排除该 dimension(包含 NULL 视为通过);
+     * demandFilterSortStrategyLike:对 demand_filter_sort_strategy 做 LIKE 过滤;
+     * excludeSelfTitle=true 时过滤掉 video.title == demand_content_title 的行。
      */
     List<ContentPlatformDemandVideo> selectForRecommend(@Param("dt") String dt,
                                                        @Param("crowdSegment") String crowdSegment,
                                                        @Param("demandStrategy") String demandStrategy,
+                                                       @Param("dimension") String dimension,
+                                                       @Param("dimensionExclude") String dimensionExclude,
+                                                       @Param("demandFilterSortStrategyLike") String demandFilterSortStrategyLike,
                                                        @Param("limit") int limit,
                                                        @Param("excludeSelfTitle") boolean excludeSelfTitle);
 
-    /**
-     * 单源真分页:按 demand_strategy 取指定 crowd_segment 的候选视频,按 score DESC 分页。
-     * 信任离线侧 (crowd_segment, strategy) 内 video_id 已去重的约定,不做 Java 端去重。
-     * excludeSelfTitle=true 时过滤掉 video.title == demand_content_title 的行。
-     */
-    List<ContentPlatformDemandVideo> selectForRecommendPaged(@Param("dt") String dt,
-                                                             @Param("crowdSegment") String crowdSegment,
-                                                             @Param("demandStrategy") String demandStrategy,
-                                                             @Param("offset") int offset,
-                                                             @Param("pageSize") int pageSize,
-                                                             @Param("excludeSelfTitle") boolean excludeSelfTitle);
-
-    /**
-     * 单源真分页对应的总数查询,用于 Page.totalSize;过滤条件需与 selectForRecommendPaged 保持一致。
-     */
-    int countForRecommend(@Param("dt") String dt,
-                          @Param("crowdSegment") String crowdSegment,
-                          @Param("demandStrategy") String demandStrategy,
-                          @Param("excludeSelfTitle") boolean excludeSelfTitle);
-
     List<ContentPlatformDemandVideo> selectActiveVideos(@Param("dt") String dt);
 
     int updateStatusByVideoId(@Param("videoId") Long videoId, @Param("dt") String dt, @Param("status") Integer status, @Param("updateTimestamp") Long updateTimestamp);

+ 112 - 60
api-module/src/main/java/com/tzld/piaoquan/api/service/contentplatform/impl/ContentPlatformPlanServiceImpl.java

@@ -609,8 +609,12 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
 
     private static final int DEMAND_CANDIDATE_LIMIT = 10000;
     private static final int HOT_CANDIDATE_LIMIT = 10000;
+    private static final int TOP_K_PER_DEMAND = 3;
     private static final String DEMAND_STRATEGY_PRIOR = "先验需求";
     private static final String DEMAND_STRATEGY_POSTERIOR = "后验需求";
+    private static final String PRIOR_PREMIUM_DIMENSION = "传播头部";
+    private static final String POSTERIOR_FILTER_ABS_LIKE = "绝对高效率%";
+    private static final String POSTERIOR_FILTER_REL_LIKE = "相对裂变率%";
     private static final String SOURCE_PRIOR = "prior";
     private static final String SOURCE_POSTERIOR = "posterior";
     private static final String SOURCE_HOT = "hot";
@@ -636,54 +640,20 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
     }
 
     /**
-     * 单一来源分页:从对应候选池取最多 N 条,按候选池顺序分页。
+     * 单一来源分页:与穿插使用同一套候选构建逻辑(先验/后验各 2 阶段、组内 score top K),
+     * 再按 pageNum/pageSize 在内存中分页。totalSize = 去重后总数。
      */
     private Page<VideoContentItemVO> getSingleSourcePage(VideoContentListParam param, ContentPlatformAccount user, String source) {
         if (SOURCE_HOT.equals(source)) {
             return getHotSourcePaged(param, user);
         }
-        String demandStrategy = SOURCE_PRIOR.equals(source) ? DEMAND_STRATEGY_PRIOR : DEMAND_STRATEGY_POSTERIOR;
-        return getDemandSourcePaged(param, user, source, demandStrategy);
-    }
-
-    /**
-     * 单源 prior/posterior:SQL OFFSET/LIMIT 真分页,totalSize 来自 COUNT(*)。
-     * 信任离线侧 (crowd_segment, strategy) 内 video_id 已去重的约定。
-     */
-    private Page<VideoContentItemVO> getDemandSourcePaged(VideoContentListParam param, ContentPlatformAccount user,
-                                                          String source, String demandStrategy) {
-        int pageSize = param.getPageSize();
-        int pageNum = param.getPageNum();
-        Page<VideoContentItemVO> result = new Page<>(pageNum, pageSize);
-        String dt = demandVideoMapperExt.getMaxDt();
-        if (!StringUtils.hasText(dt)) {
-            result.setTotalSize(0);
-            result.setObjs(new ArrayList<>());
-            return result;
-        }
-        String crowdSegment = user.getChannel();
-        boolean excludeSelfTitle = DEMAND_STRATEGY_POSTERIOR.equals(demandStrategy);
-        int count = demandVideoMapperExt.countForRecommend(dt, crowdSegment, demandStrategy, excludeSelfTitle);
-        result.setTotalSize(count);
-        if (count == 0) {
-            result.setObjs(new ArrayList<>());
-            return result;
-        }
-        int offset = (pageNum - 1) * pageSize;
-        if (offset >= count) {
-            result.setObjs(new ArrayList<>());
-            return result;
-        }
-        List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommendPaged(dt, crowdSegment, demandStrategy, offset, pageSize, excludeSelfTitle);
-        if (excludeSelfTitle) {
-            rows.removeIf(r -> TitleNormalizer.isSelfTitle(r.getTitle(), r.getDemandContentTitle()));
-        }
-        List<VideoContentItemVO> list = buildDemandVideoContentItemVOList(rows);
+        List<VideoContentItemVO> list = SOURCE_PRIOR.equals(source)
+                ? fetchPriorCandidates(user, DEMAND_CANDIDATE_LIMIT)
+                : fetchPosteriorCandidates(user, DEMAND_CANDIDATE_LIMIT);
         for (VideoContentItemVO v : list) {
             v.setSource(source);
         }
-        result.setObjs(list);
-        return result;
+        return paginateCandidates(param, list);
     }
 
     /**
@@ -783,42 +753,124 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
         return result;
     }
 
+    /**
+     * 先验池:A 段 dimension='传播头部' → B 段 其余 dimension。
+     * 每段按 (point_type, standard_element) 分组,组按 total_rov DESC、组内 score DESC 取前 K;段间拼接 + video_id 去重。
+     */
     private List<VideoContentItemVO> fetchPriorCandidates(ContentPlatformAccount user, int limit) {
-        return fetchDemandCandidates(user, DEMAND_STRATEGY_PRIOR, limit);
-    }
+        String dt = demandVideoMapperExt.getMaxDt();
+        if (!StringUtils.hasText(dt)) {
+            return new ArrayList<>();
+        }
+        String crowdSegment = user.getChannel();
+        int fetchLimit = Math.max(limit * 3, DEMAND_CANDIDATE_LIMIT);
 
-    private List<VideoContentItemVO> fetchPosteriorCandidates(ContentPlatformAccount user, int limit) {
-        return fetchDemandCandidates(user, DEMAND_STRATEGY_POSTERIOR, limit);
+        List<ContentPlatformDemandVideo> stageA = demandVideoMapperExt.selectForRecommend(
+                dt, crowdSegment, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, fetchLimit, false);
+        List<ContentPlatformDemandVideo> stageB = demandVideoMapperExt.selectForRecommend(
+                dt, crowdSegment, DEMAND_STRATEGY_PRIOR, null, PRIOR_PREMIUM_DIMENSION, null, fetchLimit, false);
+
+        Function<ContentPlatformDemandVideo, String> keyFn = r ->
+                (r.getPointType() == null ? "" : r.getPointType()) + "\u0001"
+                        + (r.getStandardElement() == null ? "" : r.getStandardElement());
+
+        List<VideoContentItemVO> outA = groupAndTopK(stageA, keyFn, TOP_K_PER_DEMAND, false);
+        List<VideoContentItemVO> outB = groupAndTopK(stageB, keyFn, TOP_K_PER_DEMAND, false);
+        return concatDedup(outA, outB, limit);
     }
 
-    private List<VideoContentItemVO> fetchDemandCandidates(ContentPlatformAccount user, String demandStrategy, int limit) {
+    /**
+     * 后验池:A 段 "绝对高效率" → B 段 "相对裂变率"。
+     * 每段按 demand_content_id 分组,组按 total_rov DESC、组内 score DESC 取前 K;段间拼接 + video_id 去重。
+     */
+    private List<VideoContentItemVO> fetchPosteriorCandidates(ContentPlatformAccount user, int limit) {
         String dt = demandVideoMapperExt.getMaxDt();
         if (!StringUtils.hasText(dt)) {
             return new ArrayList<>();
         }
         String crowdSegment = user.getChannel();
-        boolean excludeSelfTitle = DEMAND_STRATEGY_POSTERIOR.equals(demandStrategy);
-        // 超量拉取,再按 video_id 去重保留首条(即得分最高的一条)
-        List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommend(dt, crowdSegment, demandStrategy, limit * 3, excludeSelfTitle);
+        int fetchLimit = Math.max(limit * 3, DEMAND_CANDIDATE_LIMIT);
+
+        List<ContentPlatformDemandVideo> stageAbs = demandVideoMapperExt.selectForRecommend(
+                dt, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, POSTERIOR_FILTER_ABS_LIKE, fetchLimit, true);
+        List<ContentPlatformDemandVideo> stageRel = demandVideoMapperExt.selectForRecommend(
+                dt, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, POSTERIOR_FILTER_REL_LIKE, fetchLimit, true);
+
+        Function<ContentPlatformDemandVideo, String> keyFn = r ->
+                r.getDemandContentId() == null ? "" : r.getDemandContentId();
+
+        List<VideoContentItemVO> outAbs = groupAndTopK(stageAbs, keyFn, TOP_K_PER_DEMAND, true);
+        List<VideoContentItemVO> outRel = groupAndTopK(stageRel, keyFn, TOP_K_PER_DEMAND, true);
+        return concatDedup(outAbs, outRel, limit);
+    }
+
+    /**
+     * 行级按 keyFn 分组:
+     *   1. 组按"组内最大 total_rov" 倒序
+     *   2. 组内按 score 倒序,组内 video_id 去重,最多取 topK 条
+     *   3. excludeSelfTitle=true 时先在 Java 端用 TitleNormalizer 过滤自标题行
+     */
+    private List<VideoContentItemVO> groupAndTopK(List<ContentPlatformDemandVideo> rows,
+                                                  Function<ContentPlatformDemandVideo, String> keyFn,
+                                                  int topK,
+                                                  boolean excludeSelfTitle) {
         if (CollectionUtils.isEmpty(rows)) {
             return new ArrayList<>();
         }
-        LinkedHashMap<Long, ContentPlatformDemandVideo> distinct = new LinkedHashMap<>();
-        for (ContentPlatformDemandVideo row : rows) {
-            if (row.getVideoId() == null) {
-                continue;
+        LinkedHashMap<String, List<ContentPlatformDemandVideo>> groups = new LinkedHashMap<>();
+        Map<String, Double> groupRov = new HashMap<>();
+        for (ContentPlatformDemandVideo r : rows) {
+            if (r.getVideoId() == null) continue;
+            if (excludeSelfTitle && TitleNormalizer.isSelfTitle(r.getTitle(), r.getDemandContentTitle())) continue;
+            String k = keyFn.apply(r);
+            if (k == null) continue;
+            groups.computeIfAbsent(k, x -> new ArrayList<>()).add(r);
+            double tr = r.getTotalRov() == null ? 0d : r.getTotalRov();
+            Double prev = groupRov.get(k);
+            if (prev == null || tr > prev) {
+                groupRov.put(k, tr);
             }
-            if (excludeSelfTitle && TitleNormalizer.isSelfTitle(row.getTitle(), row.getDemandContentTitle())) {
-                continue;
+        }
+        List<Map.Entry<String, List<ContentPlatformDemandVideo>>> sortedGroups = new ArrayList<>(groups.entrySet());
+        sortedGroups.sort((a, b) -> Double.compare(
+                groupRov.getOrDefault(b.getKey(), 0d),
+                groupRov.getOrDefault(a.getKey(), 0d)));
+
+        List<ContentPlatformDemandVideo> out = new ArrayList<>();
+        for (Map.Entry<String, List<ContentPlatformDemandVideo>> e : sortedGroups) {
+            List<ContentPlatformDemandVideo> g = new ArrayList<>(e.getValue());
+            g.sort((a, b) -> Double.compare(
+                    b.getScore() == null ? 0d : b.getScore(),
+                    a.getScore() == null ? 0d : a.getScore()));
+            Set<Long> seen = new HashSet<>();
+            int taken = 0;
+            for (ContentPlatformDemandVideo r : g) {
+                if (!seen.add(r.getVideoId())) continue;
+                out.add(r);
+                if (++taken >= topK) break;
             }
-            if (!distinct.containsKey(row.getVideoId())) {
-                distinct.put(row.getVideoId(), row);
-                if (distinct.size() >= limit) {
-                    break;
-                }
+        }
+        return buildDemandVideoContentItemVOList(out);
+    }
+
+    private List<VideoContentItemVO> concatDedup(List<VideoContentItemVO> a, List<VideoContentItemVO> b, int limit) {
+        Set<Long> seen = new HashSet<>();
+        List<VideoContentItemVO> out = new ArrayList<>();
+        for (VideoContentItemVO v : a) {
+            if (v.getVideoId() == null) continue;
+            if (seen.add(v.getVideoId())) {
+                out.add(v);
+                if (out.size() >= limit) return out;
+            }
+        }
+        for (VideoContentItemVO v : b) {
+            if (v.getVideoId() == null) continue;
+            if (seen.add(v.getVideoId())) {
+                out.add(v);
+                if (out.size() >= limit) return out;
             }
         }
-        return buildDemandVideoContentItemVOList(new ArrayList<>(distinct.values()));
+        return out;
     }
 
     private List<VideoContentItemVO> fetchHotCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {

+ 10 - 39
api-module/src/main/resources/mapper/contentplatform/ext/ContentPlatformDemandVideoMapperExt.xml

@@ -90,55 +90,26 @@
         <if test="demandStrategy != null and demandStrategy != ''">
             AND demand_strategy = #{demandStrategy}
         </if>
-        <if test="excludeSelfTitle">
-            AND (title IS NULL OR demand_content_title IS NULL OR title &lt;&gt; demand_content_title)
+        <if test="dimension != null and dimension != ''">
+            AND dimension = #{dimension}
         </if>
-        ORDER BY score DESC
-        LIMIT #{limit}
-    </select>
-
-    <select id="selectActiveVideos" resultType="com.tzld.piaoquan.api.model.po.contentplatform.ContentPlatformDemandVideo">
-        SELECT DISTINCT video_id
-        FROM content_platform_demand_video
-        WHERE dt = #{dt} AND status = 1
-    </select>
-
-    <select id="selectForRecommendPaged" resultType="com.tzld.piaoquan.api.model.po.contentplatform.ContentPlatformDemandVideo">
-        SELECT id, dt, channel_name, channel_level3, crowd_segment, dimension, point_type, standard_element,
-               category_name, demand_id, crowd_package, conversion_target, partner, account, scene_value,
-               demand_strategy, drive_dimension_time, demand_filter_sort_strategy, demand_type,
-               demand_content_id, demand_content_title, demand_content_topic,
-               crowd_count, video_count, visit_uv, uv_ratio, total_rov, online_action, match_experiment_id,
-               video_id, config_code, score, sim, rov,
-               match_text, title, cover, video, experiment_id, status, create_timestamp, update_timestamp
-        FROM content_platform_demand_video
-        WHERE dt = #{dt} AND status = 1
-        <if test="crowdSegment != null and crowdSegment != ''">
-            AND crowd_segment = #{crowdSegment}
+        <if test="dimensionExclude != null and dimensionExclude != ''">
+            AND (dimension IS NULL OR dimension &lt;&gt; #{dimensionExclude})
         </if>
-        <if test="demandStrategy != null and demandStrategy != ''">
-            AND demand_strategy = #{demandStrategy}
+        <if test="demandFilterSortStrategyLike != null and demandFilterSortStrategyLike != ''">
+            AND demand_filter_sort_strategy LIKE #{demandFilterSortStrategyLike}
         </if>
         <if test="excludeSelfTitle">
             AND (title IS NULL OR demand_content_title IS NULL OR title &lt;&gt; demand_content_title)
         </if>
-        ORDER BY score DESC
-        LIMIT #{offset}, #{pageSize}
+        ORDER BY total_rov DESC, score DESC
+        LIMIT #{limit}
     </select>
 
-    <select id="countForRecommend" resultType="int">
-        SELECT COUNT(*)
+    <select id="selectActiveVideos" resultType="com.tzld.piaoquan.api.model.po.contentplatform.ContentPlatformDemandVideo">
+        SELECT DISTINCT video_id
         FROM content_platform_demand_video
         WHERE dt = #{dt} AND status = 1
-        <if test="crowdSegment != null and crowdSegment != ''">
-            AND crowd_segment = #{crowdSegment}
-        </if>
-        <if test="demandStrategy != null and demandStrategy != ''">
-            AND demand_strategy = #{demandStrategy}
-        </if>
-        <if test="excludeSelfTitle">
-            AND (title IS NULL OR demand_content_title IS NULL OR title &lt;&gt; demand_content_title)
-        </if>
     </select>
 
     <update id="updateStatusByVideoId">