Просмотр исходного кода

Merge branch 'cooperation_video_candidate_pool_improved_lld_0509' of Server/growth-manager into master

liulidong 13 часов назад
Родитель
Сommit
ef1cb9178a

+ 3 - 1
.gitignore

@@ -39,4 +39,6 @@ xxl-job
 
 .DS_Store
 
-*.log
+*.log
+
+scripts/

+ 20 - 0
api-module/src/main/java/com/tzld/piaoquan/api/dao/mapper/contentplatform/ext/ContentPlatformDemandVideoMapperExt.java

@@ -29,6 +29,26 @@ public interface ContentPlatformDemandVideoMapperExt {
 
     String getMaxDt();
 
+    /**
+     * 推荐场景候选池查询:按 demand_strategy 取指定 crowd_segment 的候选行,
+     * 按 total_rov DESC, score DESC 排,最多取 limit 行。
+     * dimension:等值过滤;dimensionExclude:排除该 dimension(包含 NULL 视为通过);
+     * demandFilterSortStrategyLike:对 demand_filter_sort_strategy 做 LIKE 过滤;
+     * excludeSelfTitle=true 时过滤掉 video.title == demand_content_title 的行。
+     */
+    List<ContentPlatformDemandVideo> selectForRecommend(@Param("dt") String dt,
+                                                       @Param("channelName") String channelName,
+                                                       @Param("crowdSegment") String crowdSegment,
+                                                       @Param("demandStrategy") String demandStrategy,
+                                                       @Param("dimension") String dimension,
+                                                       @Param("dimensionExclude") String dimensionExclude,
+                                                       @Param("demandFilterSortStrategyLike") String demandFilterSortStrategyLike,
+                                                       @Param("channelLevel3") String channelLevel3,
+                                                       @Param("driveDimensionTime") String driveDimensionTime,
+                                                       @Param("category") String category,
+                                                       @Param("limit") int limit,
+                                                       @Param("excludeSelfTitle") boolean excludeSelfTitle);
+
     List<ContentPlatformDemandVideo> selectActiveVideos(@Param("dt") String dt);
 
     int updateStatusByVideoId(@Param("videoId") Long videoId, @Param("dt") String dt, @Param("status") Integer status, @Param("updateTimestamp") Long updateTimestamp);

+ 51 - 0
api-module/src/main/java/com/tzld/piaoquan/api/job/contentplatform/ContentPlatformDemandVideoJob.java

@@ -3,9 +3,11 @@ package com.tzld.piaoquan.api.job.contentplatform;
 import com.alibaba.fastjson.JSONArray;
 import com.alibaba.fastjson.JSONObject;
 import com.google.common.collect.Lists;
+import com.aliyun.odps.data.Record;
 import com.tzld.piaoquan.api.dao.mapper.contentplatform.ext.ContentPlatformDemandVideoMapperExt;
 import com.tzld.piaoquan.api.model.po.contentplatform.ContentPlatformDemandVideo;
 import com.tzld.piaoquan.growth.common.component.HttpPoolClient;
+import com.tzld.piaoquan.growth.common.component.ODPSManager;
 import com.tzld.piaoquan.growth.common.model.bo.VideoDetail;
 import com.tzld.piaoquan.growth.common.service.MessageAttachmentService;
 import com.tzld.piaoquan.growth.common.utils.DateUtil;
@@ -36,6 +38,11 @@ public class ContentPlatformDemandVideoJob {
     @Autowired
     private MessageAttachmentService messageAttachmentService;
 
+    @Autowired
+    private ODPSManager odpsManager;
+
+    private static final int ODPS_CATEGORY_CHUNK = 5000;
+
     @ApolloJsonValue("${sync.channel.names:[\"公众号合作-即转-稳定\",\"群/企微合作-稳定\",\"公众号合作-Daily-自选\"]}")
     private List<String> syncChannelNames;
 
@@ -222,6 +229,16 @@ public class ContentPlatformDemandVideoJob {
                 .filter(v -> StringUtils.hasText(v.getTitle()))
                 .collect(Collectors.toList());
 
+        // 从 ODPS video_merge_tag 拉取 merge_leve2(二级品类),填到 category 列,供下发侧黑名单过滤使用
+        Map<Long, String> videoIdToCategory = fetchVideoCategoryMap(
+                saveList.stream().map(ContentPlatformDemandVideo::getVideoId).distinct().collect(Collectors.toList()));
+        for (ContentPlatformDemandVideo demandVideo : saveList) {
+            String cat = videoIdToCategory.get(demandVideo.getVideoId());
+            if (StringUtils.hasText(cat)) {
+                demandVideo.setCategory(cat);
+            }
+        }
+
 //        // 按crowd_segment粒度去重,相同videoId保留分数最高的一条
 //        saveList = saveList.stream()
 //                .collect(Collectors.groupingBy(v -> v.getCrowdSegment() + "_"
@@ -242,6 +259,40 @@ public class ContentPlatformDemandVideoJob {
         log.info("syncByChannel success, dt={}, channelName={}, count={}", dt, syncChannelName, saveList.size());
     }
 
+    /**
+     * 批查 loghubods.video_merge_tag 拿 videoid → merge_leve2(二级品类)。
+     * 该表无分区,按 videoid IN (...) 分批拉取,缺失/异常一律返回不含该 id 的 entry,调用方按 null 处理。
+     */
+    private Map<Long, String> fetchVideoCategoryMap(List<Long> videoIds) {
+        Map<Long, String> result = new HashMap<>();
+        if (CollectionUtils.isEmpty(videoIds)) {
+            return result;
+        }
+        for (List<Long> partition : Lists.partition(videoIds, ODPS_CATEGORY_CHUNK)) {
+            String inClause = partition.stream().map(String::valueOf).collect(Collectors.joining(","));
+            String sql = "SELECT videoid, merge_leve2 FROM loghubods.video_merge_tag WHERE videoid IN (" + inClause + ")";
+            try {
+                List<Record> records = odpsManager.query(sql);
+                if (CollectionUtils.isEmpty(records)) {
+                    continue;
+                }
+                for (Record r : records) {
+                    Object vid = r.get("videoid");
+                    Object cat = r.get("merge_leve2");
+                    if (vid == null || cat == null) continue;
+                    try {
+                        result.put(Long.parseLong(vid.toString()), cat.toString());
+                    } catch (NumberFormatException ignore) {
+                    }
+                }
+            } catch (Exception e) {
+                log.error("fetchVideoCategoryMap error, chunkSize={}", partition.size(), e);
+            }
+        }
+        log.info("fetchVideoCategoryMap done, input={}, mapped={}", videoIds.size(), result.size());
+        return result;
+    }
+
     @XxlJob("checkContentPlatformDemandVideoStatusJob")
     public ReturnT<String> checkContentPlatformDemandVideoStatusJob(String param) {
         String dt = demandVideoMapperExt.getMaxDt();

+ 6 - 0
api-module/src/main/java/com/tzld/piaoquan/api/model/param/contentplatform/VideoContentListParam.java

@@ -18,4 +18,10 @@ public class VideoContentListParam extends PageParam {
 
     @ApiModelProperty(value = "排序 0-平台推荐 1-行业裂变率 2-本渠道裂变率 3-推荐指数")
     private Integer sort = 0;
+
+    @ApiModelProperty(value = "数据来源: prior-人群需求 / posterior-优质相似 / hot-全局热门 / 空-全部穿插")
+    private String source;
+
+    @ApiModelProperty(value = "公众号名称(对应 demand.channel_level3),仅 prior/posterior 路使用,无数据时退化为渠道粒度")
+    private String ghName;
 }

+ 21 - 10
api-module/src/main/java/com/tzld/piaoquan/api/model/po/contentplatform/ContentPlatformDemandVideo.java

@@ -9,10 +9,10 @@ public class ContentPlatformDemandVideo {
 
     private String channelName;
 
-    private String crowdSegment;
-
     private String channelLevel3;
 
+    private String crowdSegment;
+
     private String demandId;
 
     private String crowdPackage;
@@ -49,6 +49,8 @@ public class ContentPlatformDemandVideo {
 
     private String categoryName;
 
+    private String category;
+
     private Integer crowdCount;
 
     private Integer videoCount;
@@ -119,14 +121,6 @@ public class ContentPlatformDemandVideo {
         this.channelName = channelName;
     }
 
-    public String getCrowdSegment() {
-        return crowdSegment;
-    }
-
-    public void setCrowdSegment(String crowdSegment) {
-        this.crowdSegment = crowdSegment;
-    }
-
     public String getChannelLevel3() {
         return channelLevel3;
     }
@@ -135,6 +129,14 @@ public class ContentPlatformDemandVideo {
         this.channelLevel3 = channelLevel3;
     }
 
+    public String getCrowdSegment() {
+        return crowdSegment;
+    }
+
+    public void setCrowdSegment(String crowdSegment) {
+        this.crowdSegment = crowdSegment;
+    }
+
     public String getDemandId() {
         return demandId;
     }
@@ -279,6 +281,14 @@ public class ContentPlatformDemandVideo {
         this.categoryName = categoryName;
     }
 
+    public String getCategory() {
+        return category;
+    }
+
+    public void setCategory(String category) {
+        this.category = category;
+    }
+
     public Integer getCrowdCount() {
         return crowdCount;
     }
@@ -461,6 +471,7 @@ public class ContentPlatformDemandVideo {
         sb.append(", standardElement=").append(standardElement);
         sb.append(", elementDimension=").append(elementDimension);
         sb.append(", categoryName=").append(categoryName);
+        sb.append(", category=").append(category);
         sb.append(", crowdCount=").append(crowdCount);
         sb.append(", videoCount=").append(videoCount);
         sb.append(", visitUv=").append(visitUv);

+ 24 - 0
api-module/src/main/java/com/tzld/piaoquan/api/model/vo/contentplatform/VideoContentItemVO.java

@@ -87,9 +87,33 @@ public class VideoContentItemVO {
     @ApiModelProperty(value = "实验id")
     private String experimentId;
 
+    @ApiModelProperty(value = "数据来源: prior / posterior / hot")
+    private String source;
+
+    @ApiModelProperty(value = "相似度")
+    private Double sim;
+
+    @ApiModelProperty(value = "rov")
+    private Double rov;
+
     @ApiModelProperty(value = "搜索来源 vector-向量搜索 manager-管理后台搜索")
     private String searchSource;
 
+    @ApiModelProperty(value = "渠道类名称")
+    private String channelName;
+
     @ApiModelProperty(value = "三级渠道")
     private String channelLevel3;
+
+    @ApiModelProperty(value = "需求侧视频数量")
+    private Integer videoCount;
+
+    @ApiModelProperty(value = "需求侧访问UV")
+    private Long visitUv;
+
+    @ApiModelProperty(value = "需求侧UV占比")
+    private Double uvRatio;
+
+    @ApiModelProperty(value = "需求侧总ROV")
+    private Double totalRov;
 }

+ 501 - 3
api-module/src/main/java/com/tzld/piaoquan/api/service/contentplatform/impl/ContentPlatformPlanServiceImpl.java

@@ -27,6 +27,7 @@ import com.tzld.piaoquan.api.service.VideoMultiService;
 import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformAccountService;
 import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformCooperateAccountService;
 import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformPlanService;
+import com.tzld.piaoquan.api.util.TitleNormalizer;
 import com.tzld.piaoquan.growth.common.common.enums.GhTypeEnum;
 import com.tzld.piaoquan.growth.common.common.enums.StrategyStatusEnum;
 import com.tzld.piaoquan.growth.common.dao.mapper.ext.CgiReplyBucketDataMapperExt;
@@ -46,6 +47,7 @@ import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Service;
 import org.springframework.util.StringUtils;
 
+import java.time.LocalDate;
 import java.util.*;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -606,6 +608,50 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
         return gzhPlanMapper.selectByExample(example);
     }
 
+    private static final int DEMAND_CANDIDATE_LIMIT = 10000;
+    private static final int HOT_CANDIDATE_LIMIT = 10000;
+    private static final int TOP_K_PER_DEMAND = 3;
+    private static final String DEMAND_STRATEGY_PRIOR = "人群需求";
+    private static final String DEMAND_STRATEGY_PRIOR_SCENE = "人群需求-场景";
+    private static final String DEMAND_STRATEGY_POSTERIOR = "优质相似";
+    private static final String PRIOR_PREMIUM_DIMENSION = "传播的头部";
+    /** prior/posterior 池视频近 7 日 rov 下限(质量过滤):0.02 在 prior 池 0513 全量保留 ~41%,
+     * cdjh 优质相似 0514 验证保留 ~54%,量大有意义;
+     * priorScene 池基数小(单 channel 几十~百条),仍保持 >0(在 groupAndTopK 内兜底)。 */
+    private static final double DEMAND_MIN_ROV = 0.02;
+    /** type → channel_name 映射(强过滤):同 crowd_segment 跨渠道客户(如 gzyhc/wxm)按入口平台切数据源 */
+    private static final String CHANNEL_NAME_GZH = "公众号合作-即转-稳定";
+    /** 服务号推送 / 公众号推送 走 Daily 人群_渠道,与即转稳定数据隔离 */
+    private static final String CHANNEL_NAME_GZH_DAILY = "公众号合作-Daily-自选";
+    private static final String CHANNEL_NAME_QW  = "群/企微合作-稳定";
+    private static final double PRIOR_GROUP_KEEP_RATIO = 0.5;
+    /** posterior 按 demand_content_id 分组后保留 total_rov 排名前 50% 的需求组,
+     * 砍掉群体表现弱的需求,避免低 total_rov 的 demand 带回来的相似变体稀释结果。 */
+    private static final double POSTERIOR_GROUP_KEEP_RATIO = 0.5;
+    private static final String SOURCE_PRIOR = "prior";
+    private static final String SOURCE_POSTERIOR = "posterior";
+    private static final String SOURCE_HOT = "hot";
+
+    /**
+     * 推导 channel_name(人群_渠道) 作为 demand 池强过滤。
+     * 信号优先级:
+     *   1. type 明确时按 type 映射 — 0→公众号合作-即转-稳定;1/4→公众号合作-Daily-自选;2/3→群/企微合作-稳定
+     *   2. type=999/null 但带 ghName(公众号参数)→ 公众号入口,映射即转稳定(与历史一致)
+     *   3. 否则 null,不限 channel_name(保留原行为)
+     * type 取值: 0-自动回复(公众号入口) / 1-服务号推送 / 2-企微-社群 / 3-企微-自动回复 / 4-公众号推送 / 999-不限。
+     * 作用:解决 crowd_segment 跨渠道客户(如 gzyhc/wxm)在企微/公众号入口下被对侧数据污染的问题。
+     */
+    private String resolveChannelName(VideoContentListParam param) {
+        Integer type = param.getType();
+        if (type != null) {
+            if (type == 2 || type == 3) return CHANNEL_NAME_QW;
+            if (type == 1 || type == 4) return CHANNEL_NAME_GZH_DAILY;
+            if (type == 0) return CHANNEL_NAME_GZH;
+        }
+        if (StringUtils.hasText(param.getGhName())) return CHANNEL_NAME_GZH;
+        return null;
+    }
+
     @Override
     public Page<VideoContentItemVO> getVideoContentList(VideoContentListParam param) {
         ContentPlatformAccount user = LoginUserContext.getUser();
@@ -613,27 +659,472 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
         if (StringUtils.hasText(param.getTitle())) {
             return getVideoContentListByTitle(param);
         }
-        Page<VideoContentItemVO> result = new Page<>(param.getPageNum(), param.getPageSize());
-        int offset = (param.getPageNum() - 1) * param.getPageSize();
+        String source = param.getSource();
+        if (SOURCE_PRIOR.equalsIgnoreCase(source)) {
+            return getSingleSourcePage(param, user, SOURCE_PRIOR);
+        }
+        if (SOURCE_POSTERIOR.equalsIgnoreCase(source)) {
+            return getSingleSourcePage(param, user, SOURCE_POSTERIOR);
+        }
+        if (SOURCE_HOT.equalsIgnoreCase(source)) {
+            return getSingleSourcePage(param, user, SOURCE_HOT);
+        }
+        return getInterleavedPage(param, user);
+    }
+
+    /**
+     * 单一来源分页:与穿插使用同一套候选构建逻辑(人群需求/优质相似各 2 阶段、组内 score top K),
+     * 再按 pageNum/pageSize 在内存中分页。totalSize = 去重后总数。
+     */
+    private Page<VideoContentItemVO> getSingleSourcePage(VideoContentListParam param, ContentPlatformAccount user, String source) {
+        if (SOURCE_HOT.equals(source)) {
+            return getHotSourcePaged(param, user);
+        }
+        List<VideoContentItemVO> list;
+        if (SOURCE_PRIOR.equals(source)) {
+            // 粉丝喜欢 = 人群需求-场景 与 人群需求 严格 1:1 穿插,场景先出,prior 用完顺位补齐
+            List<VideoContentItemVO> scene = fetchPriorSceneCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
+            List<VideoContentItemVO> prior = fetchPriorCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
+            list = interleavePriorWithScene(scene, prior);
+        } else {
+            list = fetchPosteriorCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
+        }
+        for (VideoContentItemVO v : list) {
+            v.setSource(source);
+        }
+        return paginateCandidates(param, list);
+    }
+
+    /**
+     * priorScene 与 prior 严格 1:1 穿插 + 跨池 video_id / 标题 去重(priorScene 优先到达)。
+     * 一侧用完后,另一侧剩余按原顺序追加。
+     */
+    private List<VideoContentItemVO> interleavePriorWithScene(List<VideoContentItemVO> scene, List<VideoContentItemVO> prior) {
+        Set<Long> seenIds = new HashSet<>();
+        Set<String> seenTitles = new HashSet<>();
+        List<VideoContentItemVO> out = new ArrayList<>();
+        int si = 0, pi = 0;
+        while (si < scene.size() || pi < prior.size()) {
+            while (si < scene.size()) {
+                VideoContentItemVO v = scene.get(si++);
+                if (tryEmit(v, seenIds, seenTitles, out)) break;
+            }
+            while (pi < prior.size()) {
+                VideoContentItemVO v = prior.get(pi++);
+                if (tryEmit(v, seenIds, seenTitles, out)) break;
+            }
+        }
+        return out;
+    }
+
+    private boolean tryEmit(VideoContentItemVO v, Set<Long> seenIds, Set<String> seenTitles, List<VideoContentItemVO> out) {
+        if (v.getVideoId() == null) return false;
+        if (seenIds.contains(v.getVideoId())) return false;
+        String nt = TitleNormalizer.normalize(v.getTitle());
+        if (!nt.isEmpty() && seenTitles.contains(nt)) return false;
+        seenIds.add(v.getVideoId());
+        if (!nt.isEmpty()) seenTitles.add(nt);
+        out.add(v);
+        return true;
+    }
+
+    /**
+     * 单源 hot:复用原 planMapperExt.getVideoCount + getVideoList 真分页链路。
+     */
+    private Page<VideoContentItemVO> getHotSourcePaged(VideoContentListParam param, ContentPlatformAccount user) {
+        int pageSize = param.getPageSize();
+        int pageNum = param.getPageNum();
+        Page<VideoContentItemVO> result = new Page<>(pageNum, pageSize);
         String dt = planMapperExt.getVideoMaxDt();
         String datastatDt = planMapperExt.getVideoDatastatMaxDt();
+        if (!StringUtils.hasText(dt)) {
+            result.setTotalSize(0);
+            result.setObjs(new ArrayList<>());
+            return result;
+        }
         int count = planMapperExt.getVideoCount(param, dt, videoMinScore);
         result.setTotalSize(count);
         if (count == 0) {
             result.setObjs(new ArrayList<>());
             return result;
         }
+        int offset = (pageNum - 1) * pageSize;
+        if (offset >= count) {
+            result.setObjs(new ArrayList<>());
+            return result;
+        }
         String sort = getVideoContentListSort(param.getSort());
         String type = getVideoContentListType(param.getType());
         String channel = getVideoContentListChannel(param.getSort(), user.getChannel());
         String strategy = param.getSort() == 3 ? "recommend" : "normal";
         List<ContentPlatformVideo> videoList = planMapperExt.getVideoList(param, dt, datastatDt, type, channel, strategy,
-                videoMinScore, offset, param.getPageSize(), sort);
+                videoMinScore, offset, pageSize, sort);
         List<VideoContentItemVO> list = buildVideoContentItemVOList(videoList, type, "sum", user.getChannel(), datastatDt);
+        if (list == null) {
+            list = new ArrayList<>();
+        }
+        for (VideoContentItemVO v : list) {
+            v.setSource(SOURCE_HOT);
+        }
         result.setObjs(list);
         return result;
     }
 
+    /**
+     * 四路随机穿插 + 跨路 video_id 去重。
+     * 4 池: priorScene / prior / posterior / hot,priorScene 与 prior 对外都标 source='prior'(粉丝喜欢)。
+     * 每步在未耗尽的池中等概率随机选一个,从该池头部取下一条(池内顺序由 fetcher 决定)。
+     * 用 (userId ^ 当天日期) 作为种子,保证同一用户当天翻页顺序一致、刷新一致。
+     */
+    private Page<VideoContentItemVO> getInterleavedPage(VideoContentListParam param, ContentPlatformAccount user) {
+        List<VideoContentItemVO> priorScene = fetchPriorSceneCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
+        List<VideoContentItemVO> prior = fetchPriorCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
+        List<VideoContentItemVO> posterior = fetchPosteriorCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
+        List<VideoContentItemVO> hot = fetchHotCandidates(param, user, HOT_CANDIDATE_LIMIT);
+        for (VideoContentItemVO v : priorScene) v.setSource(SOURCE_PRIOR);
+        for (VideoContentItemVO v : prior) v.setSource(SOURCE_PRIOR);
+        for (VideoContentItemVO v : posterior) v.setSource(SOURCE_POSTERIOR);
+        for (VideoContentItemVO v : hot) v.setSource(SOURCE_HOT);
+
+        List<List<VideoContentItemVO>> pools = Arrays.asList(priorScene, prior, posterior, hot);
+        int N = pools.size();
+        int[] pointers = new int[N];
+        boolean[] exhausted = new boolean[N];
+        Set<Long> emittedIds = new HashSet<>();
+        Set<String> emittedTitles = new HashSet<>();
+        List<VideoContentItemVO> merged = new ArrayList<>();
+
+        long userSeed = user.getId() == null ? 0L : user.getId();
+        long seed = userSeed ^ LocalDate.now().toString().hashCode();
+        Random rng = new Random(seed);
+
+        while (true) {
+            boolean allExhausted = true;
+            for (boolean e : exhausted) {
+                if (!e) { allExhausted = false; break; }
+            }
+            if (allExhausted) break;
+
+            List<Integer> alive = new ArrayList<>(N);
+            for (int i = 0; i < N; i++) {
+                if (!exhausted[i]) alive.add(i);
+            }
+            int cur = alive.get(rng.nextInt(alive.size()));
+
+            List<VideoContentItemVO> pool = pools.get(cur);
+            while (pointers[cur] < pool.size() && shouldSkipForDedup(pool.get(pointers[cur]), emittedIds, emittedTitles)) {
+                pointers[cur]++;
+            }
+            if (pointers[cur] < pool.size()) {
+                VideoContentItemVO item = pool.get(pointers[cur]++);
+                emittedIds.add(item.getVideoId());
+                String nt = TitleNormalizer.normalize(item.getTitle());
+                if (!nt.isEmpty()) emittedTitles.add(nt);
+                merged.add(item);
+            } else {
+                exhausted[cur] = true;
+            }
+        }
+        return paginateCandidates(param, merged);
+    }
+
+    /**
+     * 穿插去重判断:同 video_id 已出过 → 跳;同标题(归一化后)已出过 → 跳。
+     * 标题归一化用 TitleNormalizer(去 emoji/空白/全半角),应对运营把同段内容重复上传成多个 video_id 的情况。
+     */
+    private boolean shouldSkipForDedup(VideoContentItemVO item, Set<Long> emittedIds, Set<String> emittedTitles) {
+        if (item.getVideoId() != null && emittedIds.contains(item.getVideoId())) {
+            return true;
+        }
+        String nt = TitleNormalizer.normalize(item.getTitle());
+        return !nt.isEmpty() && emittedTitles.contains(nt);
+    }
+
+    private Page<VideoContentItemVO> paginateCandidates(VideoContentListParam param, List<VideoContentItemVO> all) {
+        int pageSize = param.getPageSize();
+        int pageNum = param.getPageNum();
+        Page<VideoContentItemVO> result = new Page<>(pageNum, pageSize);
+        result.setTotalSize(all.size());
+        if (all.isEmpty()) {
+            result.setObjs(new ArrayList<>());
+            return result;
+        }
+        int from = Math.min((pageNum - 1) * pageSize, all.size());
+        int to = Math.min(pageNum * pageSize, all.size());
+        result.setObjs(new ArrayList<>(all.subList(from, to)));
+        return result;
+    }
+
+    /**
+     * 人群需求-场景池: demand_strategy='人群需求-场景'。
+     * 退化策略: ghName 非空且查不到数据 → 退回渠道粒度(不限 channel_level3)。
+     * 后处理:
+     *   1. 同 video_id 仅保留 total_rov 最大的代表行(利用 SQL 已按 total_rov DESC, score DESC 排好,首次即最大)
+     *   2. 过滤 rov 为 null 或 <=0(视频近 7 日无表现)
+     *   3. 输出顺序按 rov DESC,相同 rov 按 total_rov DESC 兜底
+     */
+    private List<VideoContentItemVO> fetchPriorSceneCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {
+        String dt = demandVideoMapperExt.getMaxDt();
+        if (!StringUtils.hasText(dt)) {
+            return new ArrayList<>();
+        }
+        String crowdSegment = user.getChannel();
+        String ghName = StringUtils.hasText(param.getGhName()) ? param.getGhName() : null;
+        String channelName = resolveChannelName(param);
+
+        String category = StringUtils.hasText(param.getCategory()) ? param.getCategory() : null;
+        List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommend(
+                dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR_SCENE, null, null, null, ghName, null, category, limit, false);
+        if (ghName != null && rows.isEmpty()) {
+            rows = demandVideoMapperExt.selectForRecommend(
+                    dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR_SCENE, null, null, null, null, null, category, limit, false);
+        }
+        // 跨渠道退化:channel_name 命中但 crowd_segment 在对侧渠道下 0 行(如公众号账号切到企微入口)→ 去 crowd_segment,只按 channel_name 拉通用数据
+        if (channelName != null && rows.isEmpty()) {
+            rows = demandVideoMapperExt.selectForRecommend(
+                    dt, channelName, null, DEMAND_STRATEGY_PRIOR_SCENE, null, null, null, null, null, category, limit, false);
+        }
+        // 1. 同 video_id 取 total_rov 最大的代表行(SQL 已排序,putIfAbsent 保留首次)
+        LinkedHashMap<Long, ContentPlatformDemandVideo> bestPerVideo = new LinkedHashMap<>();
+        for (ContentPlatformDemandVideo r : rows) {
+            if (r.getVideoId() == null) continue;
+            bestPerVideo.putIfAbsent(r.getVideoId(), r);
+        }
+        // 2. 过滤 rov<=0/null
+        List<ContentPlatformDemandVideo> filtered = new ArrayList<>(bestPerVideo.size());
+        for (ContentPlatformDemandVideo r : bestPerVideo.values()) {
+            if (r.getRov() == null || r.getRov() <= 0) continue;
+            filtered.add(r);
+        }
+        // 3. 按 rov DESC 排序,次级 total_rov DESC
+        filtered.sort((a, b) -> {
+            int c = Double.compare(
+                    b.getRov() == null ? 0d : b.getRov(),
+                    a.getRov() == null ? 0d : a.getRov());
+            if (c != 0) return c;
+            return Double.compare(
+                    b.getTotalRov() == null ? 0d : b.getTotalRov(),
+                    a.getTotalRov() == null ? 0d : a.getTotalRov());
+        });
+        return buildDemandVideoContentItemVOList(filtered);
+    }
+
+    /**
+     * 人群需求池(prior):只取 dimension='传播的头部' 的行。
+     * total_rov 在 prior 池里代表「群体对(point_type, standard_element)这个特征的需求强度」,
+     * 不同渠道分布差异大,所以按 channel 内 total_rov 分位保留 top 50% 特征组,
+     * 过滤掉群体根本不爱的弱需求题材。
+     * 再按 (point_type, standard_element) 分组,组按 total_rov DESC、组内 score DESC 取前 K,
+     * 结果按 limit 截断。
+     */
+    private List<VideoContentItemVO> fetchPriorCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {
+        String dt = demandVideoMapperExt.getMaxDt();
+        if (!StringUtils.hasText(dt)) {
+            return new ArrayList<>();
+        }
+        String crowdSegment = user.getChannel();
+        String ghName = StringUtils.hasText(param.getGhName()) ? param.getGhName() : null;
+        String channelName = resolveChannelName(param);
+        int fetchLimit = Math.max(limit * 3, DEMAND_CANDIDATE_LIMIT);
+
+        String category = StringUtils.hasText(param.getCategory()) ? param.getCategory() : null;
+        List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommend(
+                dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, ghName, null, category, fetchLimit, false);
+
+        // 退化:该 ghName 无数据 → 退回渠道粒度
+        if (ghName != null && rows.isEmpty()) {
+            rows = demandVideoMapperExt.selectForRecommend(
+                    dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, null, null, category, fetchLimit, false);
+        }
+        // 跨渠道退化:channel_name 命中但 crowd_segment 在对侧 0 行 → 去 crowd_segment 拉通用数据
+        if (channelName != null && rows.isEmpty()) {
+            rows = demandVideoMapperExt.selectForRecommend(
+                    dt, channelName, null, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, null, null, category, fetchLimit, false);
+        }
+
+        // 近 7 日 rov 下限,过滤掉低质量近期表现的视频(0513 验证 ≥0.02 保留 ~41%)
+        rows = rows.stream()
+                .filter(r -> r.getRov() != null && r.getRov() >= DEMAND_MIN_ROV)
+                .collect(Collectors.toList());
+
+        Function<ContentPlatformDemandVideo, String> keyFn = r ->
+                (r.getPointType() == null ? "" : r.getPointType()) + "\u0001"
+                        + (r.getStandardElement() == null ? "" : r.getStandardElement());
+
+        rows = retainTopGroupsByTotalRov(rows, keyFn, PRIOR_GROUP_KEEP_RATIO);
+
+        List<VideoContentItemVO> out = groupAndTopK(rows, keyFn, TOP_K_PER_DEMAND, false);
+        if (out.size() > limit) {
+            out = new ArrayList<>(out.subList(0, limit));
+        }
+        return out;
+    }
+
+    /**
+     * 按 channel 内"特征组(由 keyFn 定义)"分位过滤:保留 total_rov 排名 top (keepRatio*100%) 的组。
+     * 同组内的 total_rov 在 SQL dimension 过滤后应该是常量,这里取 max 作为组代表,以应对脏数据。
+     * 组数不足 2 时全部保留,避免对空/单组数据产生意外裁剪。
+     */
+    private List<ContentPlatformDemandVideo> retainTopGroupsByTotalRov(
+            List<ContentPlatformDemandVideo> rows,
+            Function<ContentPlatformDemandVideo, String> keyFn,
+            double keepRatio) {
+        if (CollectionUtils.isEmpty(rows)) return rows;
+        LinkedHashMap<String, Double> groupMax = new LinkedHashMap<>();
+        for (ContentPlatformDemandVideo r : rows) {
+            String key = keyFn.apply(r);
+            double tr = r.getTotalRov() == null ? 0d : r.getTotalRov();
+            groupMax.merge(key, tr, Math::max);
+        }
+        int total = groupMax.size();
+        if (total < 2) return rows;
+        int keep = Math.max(1, (int) Math.ceil(total * keepRatio));
+        if (keep >= total) return rows;
+
+        List<Map.Entry<String, Double>> sorted = new ArrayList<>(groupMax.entrySet());
+        sorted.sort((a, b) -> Double.compare(b.getValue(), a.getValue()));
+        Set<String> keepKeys = new HashSet<>();
+        for (int i = 0; i < keep; i++) keepKeys.add(sorted.get(i).getKey());
+
+        List<ContentPlatformDemandVideo> out = new ArrayList<>(rows.size());
+        for (ContentPlatformDemandVideo r : rows) {
+            if (keepKeys.contains(keyFn.apply(r))) out.add(r);
+        }
+        return out;
+    }
+
+    /**
+     * 优质相似池(posterior):一段查询,不再按 demand_filter_sort_strategy 子分类。
+     * 按 demand_content_id 分组,组按 total_rov DESC、组内 score DESC 取前 K;
+     * 跨组用 video_id + 归一化标题去重,截到 limit。
+     */
+    private List<VideoContentItemVO> fetchPosteriorCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {
+        String dt = demandVideoMapperExt.getMaxDt();
+        if (!StringUtils.hasText(dt)) {
+            return new ArrayList<>();
+        }
+        String crowdSegment = user.getChannel();
+        String ghName = StringUtils.hasText(param.getGhName()) ? param.getGhName() : null;
+        String channelName = resolveChannelName(param);
+        int fetchLimit = Math.max(limit * 3, DEMAND_CANDIDATE_LIMIT);
+
+        String category = StringUtils.hasText(param.getCategory()) ? param.getCategory() : null;
+        // 优质相似池:drive_dimension_time 一律不限制(含主查与退化路径),避免仅「昨日」窗口召回过少。
+        List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommend(
+                dt, channelName, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, null, ghName, null, category, fetchLimit, true);
+
+        // 退化:该 ghName 无数据 → 退回渠道粒度
+        if (ghName != null && rows.isEmpty()) {
+            rows = demandVideoMapperExt.selectForRecommend(
+                    dt, channelName, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, null, null, null, category, fetchLimit, true);
+        }
+        // 跨渠道退化:channel_name 命中但 crowd_segment 在对侧 0 行 → 去 crowd_segment 拉通用数据
+        if (channelName != null && rows.isEmpty()) {
+            rows = demandVideoMapperExt.selectForRecommend(
+                    dt, channelName, null, DEMAND_STRATEGY_POSTERIOR, null, null, null, null, null, category, fetchLimit, true);
+        }
+
+        // 近 7 日 rov 下限,与 prior 池一致(cdjh 0514 验证 ≥0.02 保留 ~54%)
+        rows = rows.stream()
+                .filter(r -> r.getRov() != null && r.getRov() >= DEMAND_MIN_ROV)
+                .collect(Collectors.toList());
+
+        Function<ContentPlatformDemandVideo, String> keyFn = r ->
+                r.getDemandContentId() == null ? "" : r.getDemandContentId();
+
+        // 按 demand_content_id 的 total_rov 中位数过滤:保留中位数及以上(top 50%)的 demand 组,
+        // 砍掉群体表现弱的需求,避免低 total_rov 的 demand 带回来的相似变体稀释结果。
+        rows = retainTopGroupsByTotalRov(rows, keyFn, POSTERIOR_GROUP_KEEP_RATIO);
+
+        List<VideoContentItemVO> out = groupAndTopK(rows, keyFn, TOP_K_PER_DEMAND, true);
+        // 单段也要去归一化标题重复(同段内运营把同内容上传成多 video_id 的情况)
+        return concatDedup(out, Collections.emptyList(), limit);
+    }
+
+    /**
+     * 行级按 keyFn 分组:
+     *   1. 组按"组内最大 total_rov" 倒序
+     *   2. 组内按 score 倒序,组内 video_id 去重,最多取 topK 条
+     *   3. excludeSelfTitle=true 时先在 Java 端用 TitleNormalizer 过滤自标题行
+     *   4. rov 为 null 或 <=0 的行视为"近 7 日无表现",直接丢弃不入池
+     */
+    private List<VideoContentItemVO> groupAndTopK(List<ContentPlatformDemandVideo> rows,
+                                                  Function<ContentPlatformDemandVideo, String> keyFn,
+                                                  int topK,
+                                                  boolean excludeSelfTitle) {
+        if (CollectionUtils.isEmpty(rows)) {
+            return new ArrayList<>();
+        }
+        LinkedHashMap<String, List<ContentPlatformDemandVideo>> groups = new LinkedHashMap<>();
+        Map<String, Double> groupRov = new HashMap<>();
+        for (ContentPlatformDemandVideo r : rows) {
+            if (r.getVideoId() == null) continue;
+            if (r.getRov() == null || r.getRov() <= 0) continue;
+            if (excludeSelfTitle && TitleNormalizer.isSelfTitle(r.getTitle(), r.getDemandContentTitle())) continue;
+            String k = keyFn.apply(r);
+            if (k == null) continue;
+            groups.computeIfAbsent(k, x -> new ArrayList<>()).add(r);
+            double tr = r.getTotalRov() == null ? 0d : r.getTotalRov();
+            Double prev = groupRov.get(k);
+            if (prev == null || tr > prev) {
+                groupRov.put(k, tr);
+            }
+        }
+        List<Map.Entry<String, List<ContentPlatformDemandVideo>>> sortedGroups = new ArrayList<>(groups.entrySet());
+        sortedGroups.sort((a, b) -> Double.compare(
+                groupRov.getOrDefault(b.getKey(), 0d),
+                groupRov.getOrDefault(a.getKey(), 0d)));
+
+        List<ContentPlatformDemandVideo> out = new ArrayList<>();
+        for (Map.Entry<String, List<ContentPlatformDemandVideo>> e : sortedGroups) {
+            List<ContentPlatformDemandVideo> g = new ArrayList<>(e.getValue());
+            g.sort((a, b) -> Double.compare(
+                    b.getScore() == null ? 0d : b.getScore(),
+                    a.getScore() == null ? 0d : a.getScore()));
+            Set<Long> seen = new HashSet<>();
+            int taken = 0;
+            for (ContentPlatformDemandVideo r : g) {
+                if (!seen.add(r.getVideoId())) continue;
+                out.add(r);
+                if (++taken >= topK) break;
+            }
+        }
+        return buildDemandVideoContentItemVOList(out);
+    }
+
+    /**
+     * 段间拼接 + video_id + 归一化标题 去重。标题归一化用 TitleNormalizer,
+     * 应对运营把同段内容重复上传成多个 video_id 的情况(单源 posterior 也能去掉同标题不同 id 的重复)。
+     */
+    private List<VideoContentItemVO> concatDedup(List<VideoContentItemVO> a, List<VideoContentItemVO> b, int limit) {
+        Set<Long> seenIds = new HashSet<>();
+        Set<String> seenTitles = new HashSet<>();
+        List<VideoContentItemVO> out = new ArrayList<>();
+        for (VideoContentItemVO v : a) {
+            if (tryEmit(v, seenIds, seenTitles, out) && out.size() >= limit) return out;
+        }
+        for (VideoContentItemVO v : b) {
+            if (tryEmit(v, seenIds, seenTitles, out) && out.size() >= limit) return out;
+        }
+        return out;
+    }
+
+    private List<VideoContentItemVO> fetchHotCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {
+        String dt = planMapperExt.getVideoMaxDt();
+        String datastatDt = planMapperExt.getVideoDatastatMaxDt();
+        if (!StringUtils.hasText(dt)) {
+            return new ArrayList<>();
+        }
+        String sort = getVideoContentListSort(param.getSort());
+        String type = getVideoContentListType(param.getType());
+        String channel = getVideoContentListChannel(param.getSort(), user.getChannel());
+        String strategy = param.getSort() == 3 ? "recommend" : "normal";
+        List<ContentPlatformVideo> videoList = planMapperExt.getVideoList(param, dt, datastatDt, type, channel, strategy,
+                videoMinScore, 0, limit, sort);
+        List<VideoContentItemVO> result = buildVideoContentItemVOList(videoList, type, "sum", user.getChannel(), datastatDt);
+        return result == null ? new ArrayList<>() : result;
+    }
+
     /**
      * 按标题通过 manager 平台接口查询视频列表,支持最大查询条数限制
      */
@@ -1330,7 +1821,14 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
             item.setStandardElement(video.getStandardElement());
             item.setCategoryName(video.getCategoryName());
             item.setExperimentId(video.getExperimentId());
+            item.setSim(video.getSim());
+            item.setRov(video.getRov());
+            item.setChannelName(video.getChannelName());
             item.setChannelLevel3(video.getChannelLevel3());
+            item.setVideoCount(video.getVideoCount());
+            item.setVisitUv(video.getVisitUv());
+            item.setUvRatio(video.getUvRatio());
+            item.setTotalRov(video.getTotalRov());
             result.add(item);
         }
         return result;

+ 44 - 0
api-module/src/main/java/com/tzld/piaoquan/api/util/TitleNormalizer.java

@@ -0,0 +1,44 @@
+package com.tzld.piaoquan.api.util;
+
+import java.text.Normalizer;
+import java.util.regex.Pattern;
+
+/**
+ * 标题归一化:用于「视频标题 == 需求种子标题」的模糊比对,
+ * 规避脏数据中的 emoji 差异、空白差异、全/半角差异。
+ */
+public final class TitleNormalizer {
+
+    private TitleNormalizer() {
+    }
+
+    // 覆盖常见 emoji 区间(符号、表情、补充符号、各类杂项符号)
+    private static final Pattern EMOJI = Pattern.compile(
+            "[\\p{So}\\p{Cn}]" +
+                    "|[\uD83C-\uDBFF][\uDC00-\uDFFF]" +
+                    "|[\u2600-\u27BF]" +
+                    "|[\uFE00-\uFE0F]" +
+                    "|[\u200D]"
+    );
+
+    private static final Pattern WHITESPACE = Pattern.compile("\\s+");
+
+    public static String normalize(String s) {
+        if (s == null || s.isEmpty()) {
+            return "";
+        }
+        String n = Normalizer.normalize(s, Normalizer.Form.NFKC);
+        n = EMOJI.matcher(n).replaceAll("");
+        n = WHITESPACE.matcher(n).replaceAll("");
+        return n;
+    }
+
+    public static boolean isSelfTitle(String title, String demandContentTitle) {
+        if (title == null || demandContentTitle == null) {
+            return false;
+        }
+        String a = normalize(title);
+        String b = normalize(demandContentTitle);
+        return !a.isEmpty() && a.equals(b);
+    }
+}

+ 1 - 0
api-module/src/main/resources/mapper/contentplatform/ContentPlatformDemandVideoMapper.xml

@@ -26,6 +26,7 @@
     <result column="standard_element" jdbcType="VARCHAR" property="standardElement" />
     <result column="element_dimension" jdbcType="VARCHAR" property="elementDimension" />
     <result column="category_name" jdbcType="VARCHAR" property="categoryName" />
+    <result column="category" jdbcType="VARCHAR" property="category" />
     <result column="crowd_count" jdbcType="INTEGER" property="crowdCount" />
     <result column="video_count" jdbcType="INTEGER" property="videoCount" />
     <result column="visit_uv" jdbcType="BIGINT" property="visitUv" />

+ 51 - 4
api-module/src/main/resources/mapper/contentplatform/ext/ContentPlatformDemandVideoMapperExt.xml

@@ -5,7 +5,7 @@
     <insert id="batchInsert" parameterType="java.util.List">
         INSERT INTO content_platform_demand_video
         (dt, channel_name, crowd_segment, dimension, point_type, standard_element, element_dimension,
-         category_name, demand_id, crowd_package, conversion_target, partner, account, scene_value,
+         category_name, category, demand_id, crowd_package, conversion_target, partner, account, scene_value,
          demand_strategy, drive_dimension_time, demand_filter_sort_strategy, demand_type,
          demand_content_id, demand_content_title, demand_content_topic,
          crowd_count, video_count, visit_uv, uv_ratio, total_rov, online_action, match_experiment_id,
@@ -14,7 +14,7 @@
         VALUES
         <foreach collection="list" item="item" separator=",">
             (#{item.dt}, #{item.channelName}, #{item.crowdSegment}, #{item.dimension}, #{item.pointType}, #{item.standardElement}, #{item.elementDimension},
-             #{item.categoryName}, #{item.demandId}, #{item.crowdPackage}, #{item.conversionTarget}, #{item.partner}, #{item.account}, #{item.sceneValue},
+             #{item.categoryName}, #{item.category}, #{item.demandId}, #{item.crowdPackage}, #{item.conversionTarget}, #{item.partner}, #{item.account}, #{item.sceneValue},
              #{item.demandStrategy}, #{item.driveDimensionTime}, #{item.demandFilterSortStrategy}, #{item.demandType},
              #{item.demandContentId}, #{item.demandContentTitle}, #{item.demandContentTopic},
              #{item.crowdCount}, #{item.videoCount}, #{item.visitUv}, #{item.uvRatio}, #{item.totalRov}, #{item.onlineAction}, #{item.matchExperimentId},
@@ -32,8 +32,8 @@
     </delete>
 
     <select id="selectByCondition" resultType="com.tzld.piaoquan.api.model.po.contentplatform.ContentPlatformDemandVideo">
-        SELECT id, dt, channel_name, crowd_segment, dimension, point_type, standard_element, element_dimension,
-               category_name, demand_id, crowd_package, conversion_target, partner, account, scene_value,
+        SELECT id, dt, channel_name, channel_level3, crowd_segment, dimension, point_type, standard_element, element_dimension,
+               category_name, category, demand_id, crowd_package, conversion_target, partner, account, scene_value,
                demand_strategy, drive_dimension_time, demand_filter_sort_strategy, demand_type,
                demand_content_id, demand_content_title, demand_content_topic,
                crowd_count, video_count, visit_uv, uv_ratio, total_rov, online_action, match_experiment_id,
@@ -78,6 +78,53 @@
         SELECT MAX(dt) FROM content_platform_demand_video WHERE status = 1
     </select>
 
+    <select id="selectForRecommend" resultType="com.tzld.piaoquan.api.model.po.contentplatform.ContentPlatformDemandVideo">
+        SELECT id, dt, channel_name, channel_level3, crowd_segment, dimension, point_type, standard_element,
+               category_name, category, demand_id, crowd_package, conversion_target, partner, account, scene_value,
+               demand_strategy, drive_dimension_time, demand_filter_sort_strategy, demand_type,
+               demand_content_id, demand_content_title, demand_content_topic,
+               crowd_count, video_count, visit_uv, uv_ratio, total_rov, online_action, match_experiment_id,
+               video_id, config_code, score, sim, rov,
+               match_text, title, cover, video, experiment_id, status, create_timestamp, update_timestamp
+        FROM content_platform_demand_video
+        WHERE dt = #{dt} AND status = 1
+        <if test="channelName == null || channelName != '公众号合作-Daily-自选'">
+        AND (category IS NULL OR category NOT IN ('早中晚好','节日祝福'))
+        </if>
+        <if test="channelName != null and channelName != ''">
+            AND channel_name = #{channelName}
+        </if>
+        <if test="crowdSegment != null and crowdSegment != ''">
+            AND crowd_segment = #{crowdSegment}
+        </if>
+        <if test="demandStrategy != null and demandStrategy != ''">
+            AND demand_strategy = #{demandStrategy}
+        </if>
+        <if test="dimension != null and dimension != ''">
+            AND dimension = #{dimension}
+        </if>
+        <if test="dimensionExclude != null and dimensionExclude != ''">
+            AND (dimension IS NULL OR dimension &lt;&gt; #{dimensionExclude})
+        </if>
+        <if test="demandFilterSortStrategyLike != null and demandFilterSortStrategyLike != ''">
+            AND demand_filter_sort_strategy LIKE #{demandFilterSortStrategyLike}
+        </if>
+        <if test="channelLevel3 != null and channelLevel3 != ''">
+            AND channel_level3 = #{channelLevel3}
+        </if>
+        <if test="driveDimensionTime != null and driveDimensionTime != ''">
+            AND drive_dimension_time = #{driveDimensionTime}
+        </if>
+        <if test="category != null and category != ''">
+            AND category = #{category}
+        </if>
+        <if test="excludeSelfTitle">
+            AND (title IS NULL OR demand_content_title IS NULL OR title &lt;&gt; demand_content_title)
+        </if>
+        ORDER BY total_rov DESC, score DESC
+        LIMIT #{limit}
+    </select>
+
     <select id="selectActiveVideos" resultType="com.tzld.piaoquan.api.model.po.contentplatform.ContentPlatformDemandVideo">
         SELECT DISTINCT video_id
         FROM content_platform_demand_video

+ 1 - 1
api-module/src/main/resources/mapper/contentplatform/ext/ContentPlatformPlanMapperExt.xml

@@ -264,7 +264,7 @@
         FROM (
         SELECT video_id, category, title, cover, video,
             SUM(IFNULL(rovn, 0) * (1 + LN(GREATEST(IFNULL(exposure, 0) / 5000.0, 1.0)))
-                * POWER(0.9, DATEDIFF(STR_TO_DATE(#{aggDt}, '%Y%m%d'), STR_TO_DATE(dt, '%Y%m%d'))))
+                * POWER(0.8, DATEDIFF(STR_TO_DATE(#{aggDt}, '%Y%m%d'), STR_TO_DATE(dt, '%Y%m%d'))))
                 OVER (PARTITION BY video_id) AS weighted_score,
             ROW_NUMBER() OVER (PARTITION BY video_id ORDER BY dt DESC) AS rn
         FROM content_platform_video

+ 315 - 0
docs/recommend-ordering.md

@@ -0,0 +1,315 @@
+# `videoContentList` 推荐列表排序逻辑
+
+> 接口:`POST /contentPlatform/plan/videoContentList`
+> 入口:`ContentPlatformPlanServiceImpl.getVideoContentList`
+> 数据源:`content_platform_demand_video`(人群需求/优质相似/人群需求-场景)+ `content_platform_video`(全局热门)
+>
+> 数据组重命名:旧值「先验需求/后验需求/先验需求-场景」→ 新值「人群需求/优质相似/人群需求-场景」(2026-05 改名,本文档已对齐新值)。
+>
+> 适用:当前分支 `cooperation_video_candidate_pool_improved_lld_0509`(含 commit `2860bdce`)。
+
+---
+
+## 1. 入参 → 路径分发
+
+```
+title 非空  ───────────►  全站搜索 (getVideoContentListByTitle)
+source=prior  ───────────►  单源「粉丝喜欢」(getSingleSourcePage / prior)
+source=posterior  ───────►  单源「已发优质相似」(getSingleSourcePage / posterior)
+source=hot   ───────────►  单源「全局热门」(getHotSourcePaged)
+source 空(默认)  ──────►  四路随机穿插 (getInterleavedPage)
+```
+
+> 当前前端:`prior` 和 `posterior` 两个 tab 已 disabled(提示"功能正在完善中"),生产实际只走「全部」(四路穿插)和「全局热门」单源。
+
+---
+
+## 2. 关键常量
+
+| 常量 | 值 | 含义 |
+|---|---|---|
+| `DEMAND_CANDIDATE_LIMIT` | 10000 | 每个 demand 池最大候选条数 |
+| `HOT_CANDIDATE_LIMIT` | 10000 | hot 池候选条数 |
+| `TOP_K_PER_DEMAND` | 3 | demand 池组内取前 K |
+| `DEMAND_STRATEGY_PRIOR` | `"人群需求"` | 人群需求池过滤值(prior,旧名「先验需求」) |
+| `DEMAND_STRATEGY_PRIOR_SCENE` | `"人群需求-场景"` | 场景池过滤值(旧名「先验需求-场景」) |
+| `DEMAND_STRATEGY_POSTERIOR` | `"优质相似"` | 优质相似池过滤值(posterior,旧名「后验需求」) |
+| `PRIOR_PREMIUM_DIMENSION` | `"传播的头部"` | prior 池 dimension 强过滤 |
+| `DEMAND_MIN_ROV` | `0.02` | prior / posterior 池近 7 日 rov 下限(priorScene 池基数小,仍 >0) |
+| `CHANNEL_NAME_GZH` | `"公众号合作-即转-稳定"` | `type=0` 自动回复 / ghName fallback |
+| `CHANNEL_NAME_GZH_DAILY` | `"公众号合作-Daily-自选"` | `type=1` 服务号推送 / `type=4` 公众号推送 |
+| `CHANNEL_NAME_QW`  | `"群/企微合作-稳定"`     | 企微入口对应 `channel_name` 强过滤值 |
+| `PRIOR_GROUP_KEEP_RATIO` | `0.5` | prior 池"特征组"按 total_rov 分位保留比例 |
+| `POSTERIOR_GROUP_KEEP_RATIO` | `0.5` | posterior 池 demand_content_id 组按 total_rov 中位数及以上保留 |
+
+> 优质相似池 SQL **不**加 `drive_dimension_time` 条件(主查与 ghName/跨渠道退化路径一致),可包含「昨日」以外驱动时间档位的数据。
+
+公共强过滤(所有 demand 池 SQL):`dt = max(dt)` AND `status = 1` AND `crowd_segment = user.channel` AND `channel_name = resolveChannelName(param)`(可空)。  
+例外:`channel_name = '公众号合作-Daily-自选'` 时不加「早中晚好 / 节日祝福」`category` 黑名单条件(其它渠道仍过滤)。
+
+### 2.1 `resolveChannelName(param)` — 入口 → `channel_name` 映射
+
+为避免同一 `crowd_segment` 在公众号系与企微系都存在(已确认 `gzyhc` / `wxm` 双渠道并存)时,选企微入口却拉到公众号数据,**3 个 demand 池 SQL 在源头加 `channel_name` 强过滤**。映射依据 `VideoContentListParam`:
+
+| `param.type` | 含义 | 映射 `channel_name` |
+|---|---|---|
+| `0` 自动回复 | 公众号系-即转 | `CHANNEL_NAME_GZH` |
+| `1` 服务号推送 / `4` 公众号推送 | 公众号系-Daily | `CHANNEL_NAME_GZH_DAILY` |
+| `2` 企微-社群 / `3` 企微-自动回复 | 企微系 | `CHANNEL_NAME_QW` |
+| `999` / 其它 | 不限平台 | 进入 fallback |
+
+**Fallback**:`type` 空或不在上表时,若 `param.ghName` 非空(只有公众号入口会带 ghName)→ `CHANNEL_NAME_GZH`;否则返回 `null`(不加 channel_name 过滤,保留旧行为)。
+
+> 设计动机:`crowd_segment` 是按客户(广告主)切的,而同一客户可能同时投公众号 + 企微;数据落库时按"人群_渠道"打 `channel_name`。前端切换入口后 `type` 一定会变,所以让 `type` 当一级路由信号最稳。
+
+---
+
+## 3. 四个候选池的构造顺序
+
+### 3.1 `fetchPriorSceneCandidates`(场景池)
+
+**目的:用户所属 channel 在"场景"维度命中的人群需求-场景行,按视频近 7 日表现(rov)排序。**
+
+```sql
+SELECT ... FROM content_platform_demand_video
+WHERE dt=:maxDt AND status=1
+  AND crowd_segment=:userChannel
+  AND channel_name=:resolvedChannelName  -- 若 resolveChannelName 命中(公众号/企微)
+  AND demand_strategy='人群需求-场景'
+  AND channel_level3=:ghName             -- 若传入
+ORDER BY total_rov DESC, score DESC
+LIMIT 10000
+```
+
+退化阶梯(细→粗):
+1. `ghName` 非空但查 0 条 → **去掉 `ghName` 再查一次**(拿全 channel 兜底)
+2. 仍为 0 且 `channel_name` 非空 → **再去掉 `crowd_segment`**,只按 `channel_name` 拉(跨渠道兜底,如公众号账号切到企微入口)
+3. `channel_name` 在所有退化层**始终保留**(强过滤,跨渠道污染必须先挡掉)
+
+后处理(顺序):
+1. 同 `videoId` 去重,保留首次出现(SQL 已按 `total_rov DESC, score DESC` 排序 → 首次 = 该视频的"最强代表需求"行)
+2. **过滤** `rov` 为 null 或 ≤0 的视频
+3. 重排序:`rov DESC` 主键,`total_rov DESC` 次级 tiebreaker
+
+输出顺序:**视频按 rov(近 7 日表现)DESC**;同 rov 时按代表需求的 total_rov DESC。
+
+> 设计动机:场景需求里同一视频会对应多个特征点,但前端只下发一条;用 total_rov 选"最强代表需求",再以视频自身的 rov 决定整体排序,避免按需求维度排序导致同 dimension/标准要素的视频在列表里扎堆。
+
+---
+
+### 3.2 `fetchPriorCandidates`(人群需求池,prior)
+
+**目的:人群需求里,只取 `dimension='传播的头部'` 维度,并按 channel 内"特征需求强度"分位裁掉弱题材。**
+
+单段查询:
+
+```sql
+SELECT ... WHERE ... AND channel_name=:resolvedChannelName(可空)
+  AND demand_strategy='人群需求' AND dimension='传播的头部' ...
+ORDER BY total_rov DESC, score DESC LIMIT 30000
+```
+
+退化阶梯:`ghName` 非空且 0 条 → 去 `ghName` 重查;仍 0 且 `channel_name` 非空 → 再去 `crowd_segment` 重查(跨渠道兜底)。`channel_name` 始终保留。
+
+**[新] 特征组分位裁剪 (`retainTopGroupsByTotalRov`,`keepRatio=0.5`)**:
+- 按 `(point_type, standard_element)` 分组,取每组 `max(total_rov)`(即该特征的人群需求强度)
+- 按组 total_rov DESC 排,**保留 top ⌈N×50%⌉ 个特征组**
+- 各渠道 total_rov 分布差异大(cdjh 0.003~0.057,xycsd17 0.014~0.037),用分位避免绝对阈值伤弱渠道
+
+> 设计动机:`total_rov` 在 prior 池 = 群体对(point_type, standard_element)特征的需求强度。低 total_rov 说明群体不爱这个题材,把对应视频堆在列表底部没意义,直接剪掉。
+
+进 `groupAndTopK`:
+- 分组键:`(point_type, standard_element)`
+- **过滤** `rov <= 0` 或 null(近 7 日无表现)
+- 不做 selfTitle 过滤
+- 组排序:组内最大 `total_rov` DESC
+- 组内排序:`score` DESC,组内 `videoId` 去重,每组最多 3 条
+
+最后按 `limit=10000` 截断。
+
+最终顺序:**保留 top 50% 特征组内,组按总 ROV,组内按 score。**
+
+---
+
+### 3.3 `fetchPosteriorCandidates`(优质相似池,posterior)
+
+**目的:取所有"优质相似"行(不再按 `demand_filter_sort_strategy` 子分类),按需求组的 total_rov 中位数砍掉群体表现弱的需求,组内取 top K。**
+
+> 与旧版差异:旧版"绝对高效率%"(A 段)无条件优先于"相对裂变率%"(B 段)。新版统一按 `total_rov DESC, score DESC` 排,段间优先级取消;由于 B 段(相对裂变率)群体强度普遍低于 A 段,中位数裁剪后大概率被砍掉,这是已知且接受的行为。
+
+单段查询:
+
+```sql
+SELECT ... WHERE ... AND channel_name=:resolvedChannelName(可空)
+  AND demand_strategy='优质相似'
+  -- 不加 drive_dimension_time 条件
+  AND (title IS NULL OR demand_content_title IS NULL OR title <> demand_content_title)
+ORDER BY total_rov DESC, score DESC LIMIT 30000
+```
+
+**`drive_dimension_time`**: 优质相似池**一律不限制**(主查与退化路径均不传该维度),与公众号/渠道粒度无关。
+
+退化阶梯:主查询空且 `ghName` 非空 → 去 `ghName` 重查;仍空且 `channel_name` 非空 → 再去 `crowd_segment` 重查(跨渠道兜底)。`channel_name` 强过滤始终保留。
+
+**[新] 近 7 日 rov 下限(`DEMAND_MIN_ROV=0.02`)**:
+- 与 prior 池一致,过滤掉视频 rov < 0.02 的低质量近期表现
+- cdjh 优质相似 0514 全量验证保留 ~54%,priorScene 池基数小不加该过滤
+
+**[新] 需求组分位裁剪 (`retainTopGroupsByTotalRov`,`keepRatio=POSTERIOR_GROUP_KEEP_RATIO=0.5`)**:
+- 按 `demand_content_id` 分组,取每组 `max(total_rov)`(需求组的群体强度)
+- 按组 total_rov DESC 排,**保留 top ⌈N×50%⌉ 个需求组**(即中位数及以上)
+- 组数 < 2 时全部保留,避免空/单组数据被意外裁掉
+
+> 设计动机:同一 `demand_strategy='优质相似'` 里,低 total_rov 的 demand 会带回很多 sim=1.0 的克隆视频稀释推荐,先在需求组层面砍一刀比在视频层面砍更稳定。
+
+进 `groupAndTopK`:
+- 分组键:`demand_content_id`
+- **过滤** `rov <= 0` 或 null
+- **过滤** `excludeSelfTitle=true` → 用 `TitleNormalizer.isSelfTitle` 跳过自标题
+- 组排序:组内最大 `total_rov` DESC
+- 组内排序:`score` DESC,去重,每组最多 3 条
+
+最后 `concatDedup(out, [], limit)` 用 `videoId` + `TitleNormalizer.normalize(title)` 双键去重,截到 10000。
+
+最终顺序:**保留 top 50% 需求组内,组按总 ROV,组内按 score。**
+
+---
+
+### 3.4 `fetchHotCandidates`(热门池)
+
+复用现有 `planMapperExt.getVideoList(...)`:
+- `dt = videoMaxDt`,`datastatDt = videoDatastatMaxDt`
+- `sort/type/channel/strategy` 由请求和 `param.getSort()` 派生
+- 一次性查前 10000 条,**未分组、未二次过滤**
+- 顺序由 SQL 决定(一般是 `fission_rate DESC` 或 `score DESC`)
+
+---
+
+## 4. 「全部」模式:四路随机穿插(`getInterleavedPage`)
+
+```
+priorScene → 标 source='prior'
+prior      → 标 source='prior'
+posterior  → 标 source='posterior'
+hot        → 标 source='hot'
+```
+
+> priorScene 和 prior **对外都是 `source='prior'`**(前端"粉丝喜欢"统一标签);浮层细节通过 `demandStrategy` 字段区分场景。
+
+### 算法
+
+1. 维护 4 个池的 `pointer[i]` 和 `exhausted[i]`,以及全局 `emittedIds` + `emittedTitles`。
+2. 种子:`seed = userId ^ LocalDate.now().toString().hashCode()`
+   - 同一用户同一天翻页/刷新得到的顺序一致
+   - 跨天/跨用户顺序变化
+3. 循环直到 4 池全空:
+   1. 在未耗尽池中等概率随机选一个
+   2. 跳过该池里 **video_id 已发** 或 **标题(归一化后)已发** 的候选(`shouldSkipForDedup`)
+   3. 取出第一条未发的,加入 `merged`、记入 `emittedIds` 和 `emittedTitles`
+   4. 若该池指针越界,标为 exhausted
+4. `paginateCandidates`:`totalSize = merged.size()`,按 `pageNum/pageSize` 内存切片返回。
+
+> 标题去重用 `TitleNormalizer.normalize`(去 emoji/空白/全半角),应对运营把同段内容重复上传成多个 video_id(如 `🔴她走了,台湾再无洪秀柱!` 对应 67396144 / 67812469 两条)。
+> 单源 `source=prior` 模式的 `interleavePriorWithScene` 也用同一套(video_id + 标题)去重。
+
+### 时间复杂度
+- 每池 1 次 DB Query
+- 主循环 O(总池容量),每个池最大 10000 → 上界约 4 万次操作
+- 单页响应 = 1 次 maxDt + 4 次 SELECT + 内存穿插
+
+### 排序稳定性
+- 同一用户同一天,所有分页之间顺序一致
+- `priorScene` / `prior` / `posterior` 内部相对顺序保留(场景按视频 rov;prior/posterior 按组 total_rov + 组内 score),随机只影响"哪一池先出"
+
+---
+
+## 5. 「粉丝喜欢」单源(`source=prior`)
+
+```java
+scene = fetchPriorSceneCandidates(...)
+prior = fetchPriorCandidates(...)
+list  = interleavePriorWithScene(scene, prior)  // 严格 1:1
+```
+
+`interleavePriorWithScene`:
+- 单次循环:先从 `scene` 取一条未发的 → 再从 `prior` 取一条未发的,**严格 1:1 交替**
+- 用 `seen<videoId>` 跨池去重,场景优先(先到先得)
+- 一侧用完后,另一侧剩余按原顺序追加
+
+每条 VO 设 `source='prior'`,然后 `paginateCandidates` 切片。
+
+---
+
+## 6. 「已发优质相似」单源(`source=posterior`)
+
+```java
+list = fetchPosteriorCandidates(...)   // 顺序 = total_rov DESC, score DESC (单段)
+```
+
+每条 VO 设 `source='posterior'`,`paginateCandidates` 切片。
+
+---
+
+## 7. 「全局热门」单源(`source=hot`)
+
+不走候选池:复用原 `planMapperExt.getVideoCount + getVideoList` 真分页链路,DB 端 LIMIT/OFFSET。VO `source='hot'`。
+
+---
+
+## 8. 字段全景:每条 VO 必有 `source` 字段
+
+| `source` | 含义 | 数据来源 | 浮层 demand 字段 |
+|---|---|---|---|
+| `prior` | 粉丝喜欢(人群需求-场景 + 人群需求-头部) | `content_platform_demand_video` | 有完整字段,可看 `demandStrategy` 区分 |
+| `posterior` | 已发优质相似 | `content_platform_demand_video` | 有完整字段 |
+| `hot` | 全局热门 | `content_platform_video` | 只有基础字段(demand 相关字段为空) |
+
+---
+
+## 9. 一图总览
+
+```
+        ┌────────────────────────────────────────────────────────┐
+        │ 1. 拉 4 个候选池 (每个池都自己分阶段、分组、去重、排序) │
+        └────────────────────────────────────────────────────────┘
+                              │
+   ┌───────────────────────┬──┴──┬─────────────────────────────┐
+   ▼                       ▼     ▼                             ▼
+priorScene(10000)    prior(10000)  posterior(10000)         hot(10000)
+  视频维度 rov DESC   传播头部       优质相似(不按 drive_dimension_time 过滤) SQL 默认
+  (代表需求 total_rov 选最大)        组 total_rov 中位数及以上            (sort 决定)
+                     组(point_type, standard_element) top3
+                     组(demand_content_id) top3
+                              │
+                              ▼
+        ┌────────────────────────────────────────────────────────┐
+        │ 2. 四路随机穿插                                          │
+        │   - 种子 = userId ^ 今天                                  │
+        │   - 跨池 videoId 去重                                     │
+        │   - 池内顺序保留                                          │
+        └────────────────────────────────────────────────────────┘
+                              │
+                              ▼
+        ┌────────────────────────────────────────────────────────┐
+        │ 3. paginateCandidates                                  │
+        │   totalSize = 全量, 按 pageNum/pageSize 切片            │
+        └────────────────────────────────────────────────────────┘
+```
+
+---
+
+## 10. 关键代码位置
+
+| 内容 | 位置 |
+|---|---|
+| 主入口路由 | `ContentPlatformPlanServiceImpl.java:626` |
+| 单源分页 | `ContentPlatformPlanServiceImpl.java:649` |
+| 单源 hot | `ContentPlatformPlanServiceImpl.java:698` |
+| 四路穿插 | `ContentPlatformPlanServiceImpl.java:743` |
+| 场景池 fetcher | `ContentPlatformPlanServiceImpl.java:812` |
+| 人群需求池 fetcher (prior) | `ContentPlatformPlanServiceImpl.java:840` |
+| 优质相似池 fetcher (posterior) | `ContentPlatformPlanServiceImpl.java:875` |
+| `groupAndTopK` 通用排序 | `ContentPlatformPlanServiceImpl.java:912` |
+| 段间拼接 + 去重 | `ContentPlatformPlanServiceImpl.java:956` |
+| 热门池 fetcher | `ContentPlatformPlanServiceImpl.java:976` |
+| Mapper SQL | `mapper/contentplatform/ext/ContentPlatformDemandVideoMapperExt.xml` (selectForRecommend) |