|
@@ -27,6 +27,7 @@ import com.tzld.piaoquan.api.service.VideoMultiService;
|
|
|
import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformAccountService;
|
|
import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformAccountService;
|
|
|
import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformCooperateAccountService;
|
|
import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformCooperateAccountService;
|
|
|
import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformPlanService;
|
|
import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformPlanService;
|
|
|
|
|
+import com.tzld.piaoquan.api.util.TitleNormalizer;
|
|
|
import com.tzld.piaoquan.growth.common.common.enums.GhTypeEnum;
|
|
import com.tzld.piaoquan.growth.common.common.enums.GhTypeEnum;
|
|
|
import com.tzld.piaoquan.growth.common.common.enums.StrategyStatusEnum;
|
|
import com.tzld.piaoquan.growth.common.common.enums.StrategyStatusEnum;
|
|
|
import com.tzld.piaoquan.growth.common.dao.mapper.ext.CgiReplyBucketDataMapperExt;
|
|
import com.tzld.piaoquan.growth.common.dao.mapper.ext.CgiReplyBucketDataMapperExt;
|
|
@@ -46,6 +47,7 @@ import org.springframework.beans.factory.annotation.Value;
|
|
|
import org.springframework.stereotype.Service;
|
|
import org.springframework.stereotype.Service;
|
|
|
import org.springframework.util.StringUtils;
|
|
import org.springframework.util.StringUtils;
|
|
|
|
|
|
|
|
|
|
+import java.time.LocalDate;
|
|
|
import java.util.*;
|
|
import java.util.*;
|
|
|
import java.util.concurrent.ExecutorService;
|
|
import java.util.concurrent.ExecutorService;
|
|
|
import java.util.concurrent.Executors;
|
|
import java.util.concurrent.Executors;
|
|
@@ -606,6 +608,50 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
|
|
|
return gzhPlanMapper.selectByExample(example);
|
|
return gzhPlanMapper.selectByExample(example);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ private static final int DEMAND_CANDIDATE_LIMIT = 10000;
|
|
|
|
|
+ private static final int HOT_CANDIDATE_LIMIT = 10000;
|
|
|
|
|
+ private static final int TOP_K_PER_DEMAND = 3;
|
|
|
|
|
+ private static final String DEMAND_STRATEGY_PRIOR = "人群需求";
|
|
|
|
|
+ private static final String DEMAND_STRATEGY_PRIOR_SCENE = "人群需求-场景";
|
|
|
|
|
+ private static final String DEMAND_STRATEGY_POSTERIOR = "优质相似";
|
|
|
|
|
+ private static final String PRIOR_PREMIUM_DIMENSION = "传播的头部";
|
|
|
|
|
+ /** prior/posterior 池视频近 7 日 rov 下限(质量过滤):0.02 在 prior 池 0513 全量保留 ~41%,
|
|
|
|
|
+ * cdjh 优质相似 0514 验证保留 ~54%,量大有意义;
|
|
|
|
|
+ * priorScene 池基数小(单 channel 几十~百条),仍保持 >0(在 groupAndTopK 内兜底)。 */
|
|
|
|
|
+ private static final double DEMAND_MIN_ROV = 0.02;
|
|
|
|
|
+ /** type → channel_name 映射(强过滤):同 crowd_segment 跨渠道客户(如 gzyhc/wxm)按入口平台切数据源 */
|
|
|
|
|
+ private static final String CHANNEL_NAME_GZH = "公众号合作-即转-稳定";
|
|
|
|
|
+ /** 服务号推送 / 公众号推送 走 Daily 人群_渠道,与即转稳定数据隔离 */
|
|
|
|
|
+ private static final String CHANNEL_NAME_GZH_DAILY = "公众号合作-Daily-自选";
|
|
|
|
|
+ private static final String CHANNEL_NAME_QW = "群/企微合作-稳定";
|
|
|
|
|
+ private static final double PRIOR_GROUP_KEEP_RATIO = 0.5;
|
|
|
|
|
+ /** posterior 按 demand_content_id 分组后保留 total_rov 排名前 50% 的需求组,
|
|
|
|
|
+ * 砍掉群体表现弱的需求,避免低 total_rov 的 demand 带回来的相似变体稀释结果。 */
|
|
|
|
|
+ private static final double POSTERIOR_GROUP_KEEP_RATIO = 0.5;
|
|
|
|
|
+ private static final String SOURCE_PRIOR = "prior";
|
|
|
|
|
+ private static final String SOURCE_POSTERIOR = "posterior";
|
|
|
|
|
+ private static final String SOURCE_HOT = "hot";
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 推导 channel_name(人群_渠道) 作为 demand 池强过滤。
|
|
|
|
|
+ * 信号优先级:
|
|
|
|
|
+ * 1. type 明确时按 type 映射 — 0→公众号合作-即转-稳定;1/4→公众号合作-Daily-自选;2/3→群/企微合作-稳定
|
|
|
|
|
+ * 2. type=999/null 但带 ghName(公众号参数)→ 公众号入口,映射即转稳定(与历史一致)
|
|
|
|
|
+ * 3. 否则 null,不限 channel_name(保留原行为)
|
|
|
|
|
+ * type 取值: 0-自动回复(公众号入口) / 1-服务号推送 / 2-企微-社群 / 3-企微-自动回复 / 4-公众号推送 / 999-不限。
|
|
|
|
|
+ * 作用:解决 crowd_segment 跨渠道客户(如 gzyhc/wxm)在企微/公众号入口下被对侧数据污染的问题。
|
|
|
|
|
+ */
|
|
|
|
|
+ private String resolveChannelName(VideoContentListParam param) {
|
|
|
|
|
+ Integer type = param.getType();
|
|
|
|
|
+ if (type != null) {
|
|
|
|
|
+ if (type == 2 || type == 3) return CHANNEL_NAME_QW;
|
|
|
|
|
+ if (type == 1 || type == 4) return CHANNEL_NAME_GZH_DAILY;
|
|
|
|
|
+ if (type == 0) return CHANNEL_NAME_GZH;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (StringUtils.hasText(param.getGhName())) return CHANNEL_NAME_GZH;
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
@Override
|
|
@Override
|
|
|
public Page<VideoContentItemVO> getVideoContentList(VideoContentListParam param) {
|
|
public Page<VideoContentItemVO> getVideoContentList(VideoContentListParam param) {
|
|
|
ContentPlatformAccount user = LoginUserContext.getUser();
|
|
ContentPlatformAccount user = LoginUserContext.getUser();
|
|
@@ -613,27 +659,472 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
|
|
|
if (StringUtils.hasText(param.getTitle())) {
|
|
if (StringUtils.hasText(param.getTitle())) {
|
|
|
return getVideoContentListByTitle(param);
|
|
return getVideoContentListByTitle(param);
|
|
|
}
|
|
}
|
|
|
- Page<VideoContentItemVO> result = new Page<>(param.getPageNum(), param.getPageSize());
|
|
|
|
|
- int offset = (param.getPageNum() - 1) * param.getPageSize();
|
|
|
|
|
|
|
+ String source = param.getSource();
|
|
|
|
|
+ if (SOURCE_PRIOR.equalsIgnoreCase(source)) {
|
|
|
|
|
+ return getSingleSourcePage(param, user, SOURCE_PRIOR);
|
|
|
|
|
+ }
|
|
|
|
|
+ if (SOURCE_POSTERIOR.equalsIgnoreCase(source)) {
|
|
|
|
|
+ return getSingleSourcePage(param, user, SOURCE_POSTERIOR);
|
|
|
|
|
+ }
|
|
|
|
|
+ if (SOURCE_HOT.equalsIgnoreCase(source)) {
|
|
|
|
|
+ return getSingleSourcePage(param, user, SOURCE_HOT);
|
|
|
|
|
+ }
|
|
|
|
|
+ return getInterleavedPage(param, user);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 单一来源分页:与穿插使用同一套候选构建逻辑(人群需求/优质相似各 2 阶段、组内 score top K),
|
|
|
|
|
+ * 再按 pageNum/pageSize 在内存中分页。totalSize = 去重后总数。
|
|
|
|
|
+ */
|
|
|
|
|
+ private Page<VideoContentItemVO> getSingleSourcePage(VideoContentListParam param, ContentPlatformAccount user, String source) {
|
|
|
|
|
+ if (SOURCE_HOT.equals(source)) {
|
|
|
|
|
+ return getHotSourcePaged(param, user);
|
|
|
|
|
+ }
|
|
|
|
|
+ List<VideoContentItemVO> list;
|
|
|
|
|
+ if (SOURCE_PRIOR.equals(source)) {
|
|
|
|
|
+ // 粉丝喜欢 = 人群需求-场景 与 人群需求 严格 1:1 穿插,场景先出,prior 用完顺位补齐
|
|
|
|
|
+ List<VideoContentItemVO> scene = fetchPriorSceneCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
|
|
|
|
|
+ List<VideoContentItemVO> prior = fetchPriorCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
|
|
|
|
|
+ list = interleavePriorWithScene(scene, prior);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ list = fetchPosteriorCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
|
|
|
|
|
+ }
|
|
|
|
|
+ for (VideoContentItemVO v : list) {
|
|
|
|
|
+ v.setSource(source);
|
|
|
|
|
+ }
|
|
|
|
|
+ return paginateCandidates(param, list);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * priorScene 与 prior 严格 1:1 穿插 + 跨池 video_id / 标题 去重(priorScene 优先到达)。
|
|
|
|
|
+ * 一侧用完后,另一侧剩余按原顺序追加。
|
|
|
|
|
+ */
|
|
|
|
|
+ private List<VideoContentItemVO> interleavePriorWithScene(List<VideoContentItemVO> scene, List<VideoContentItemVO> prior) {
|
|
|
|
|
+ Set<Long> seenIds = new HashSet<>();
|
|
|
|
|
+ Set<String> seenTitles = new HashSet<>();
|
|
|
|
|
+ List<VideoContentItemVO> out = new ArrayList<>();
|
|
|
|
|
+ int si = 0, pi = 0;
|
|
|
|
|
+ while (si < scene.size() || pi < prior.size()) {
|
|
|
|
|
+ while (si < scene.size()) {
|
|
|
|
|
+ VideoContentItemVO v = scene.get(si++);
|
|
|
|
|
+ if (tryEmit(v, seenIds, seenTitles, out)) break;
|
|
|
|
|
+ }
|
|
|
|
|
+ while (pi < prior.size()) {
|
|
|
|
|
+ VideoContentItemVO v = prior.get(pi++);
|
|
|
|
|
+ if (tryEmit(v, seenIds, seenTitles, out)) break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return out;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private boolean tryEmit(VideoContentItemVO v, Set<Long> seenIds, Set<String> seenTitles, List<VideoContentItemVO> out) {
|
|
|
|
|
+ if (v.getVideoId() == null) return false;
|
|
|
|
|
+ if (seenIds.contains(v.getVideoId())) return false;
|
|
|
|
|
+ String nt = TitleNormalizer.normalize(v.getTitle());
|
|
|
|
|
+ if (!nt.isEmpty() && seenTitles.contains(nt)) return false;
|
|
|
|
|
+ seenIds.add(v.getVideoId());
|
|
|
|
|
+ if (!nt.isEmpty()) seenTitles.add(nt);
|
|
|
|
|
+ out.add(v);
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 单源 hot:复用原 planMapperExt.getVideoCount + getVideoList 真分页链路。
|
|
|
|
|
+ */
|
|
|
|
|
+ private Page<VideoContentItemVO> getHotSourcePaged(VideoContentListParam param, ContentPlatformAccount user) {
|
|
|
|
|
+ int pageSize = param.getPageSize();
|
|
|
|
|
+ int pageNum = param.getPageNum();
|
|
|
|
|
+ Page<VideoContentItemVO> result = new Page<>(pageNum, pageSize);
|
|
|
String dt = planMapperExt.getVideoMaxDt();
|
|
String dt = planMapperExt.getVideoMaxDt();
|
|
|
String datastatDt = planMapperExt.getVideoDatastatMaxDt();
|
|
String datastatDt = planMapperExt.getVideoDatastatMaxDt();
|
|
|
|
|
+ if (!StringUtils.hasText(dt)) {
|
|
|
|
|
+ result.setTotalSize(0);
|
|
|
|
|
+ result.setObjs(new ArrayList<>());
|
|
|
|
|
+ return result;
|
|
|
|
|
+ }
|
|
|
int count = planMapperExt.getVideoCount(param, dt, videoMinScore);
|
|
int count = planMapperExt.getVideoCount(param, dt, videoMinScore);
|
|
|
result.setTotalSize(count);
|
|
result.setTotalSize(count);
|
|
|
if (count == 0) {
|
|
if (count == 0) {
|
|
|
result.setObjs(new ArrayList<>());
|
|
result.setObjs(new ArrayList<>());
|
|
|
return result;
|
|
return result;
|
|
|
}
|
|
}
|
|
|
|
|
+ int offset = (pageNum - 1) * pageSize;
|
|
|
|
|
+ if (offset >= count) {
|
|
|
|
|
+ result.setObjs(new ArrayList<>());
|
|
|
|
|
+ return result;
|
|
|
|
|
+ }
|
|
|
String sort = getVideoContentListSort(param.getSort());
|
|
String sort = getVideoContentListSort(param.getSort());
|
|
|
String type = getVideoContentListType(param.getType());
|
|
String type = getVideoContentListType(param.getType());
|
|
|
String channel = getVideoContentListChannel(param.getSort(), user.getChannel());
|
|
String channel = getVideoContentListChannel(param.getSort(), user.getChannel());
|
|
|
String strategy = param.getSort() == 3 ? "recommend" : "normal";
|
|
String strategy = param.getSort() == 3 ? "recommend" : "normal";
|
|
|
List<ContentPlatformVideo> videoList = planMapperExt.getVideoList(param, dt, datastatDt, type, channel, strategy,
|
|
List<ContentPlatformVideo> videoList = planMapperExt.getVideoList(param, dt, datastatDt, type, channel, strategy,
|
|
|
- videoMinScore, offset, param.getPageSize(), sort);
|
|
|
|
|
|
|
+ videoMinScore, offset, pageSize, sort);
|
|
|
List<VideoContentItemVO> list = buildVideoContentItemVOList(videoList, type, "sum", user.getChannel(), datastatDt);
|
|
List<VideoContentItemVO> list = buildVideoContentItemVOList(videoList, type, "sum", user.getChannel(), datastatDt);
|
|
|
|
|
+ if (list == null) {
|
|
|
|
|
+ list = new ArrayList<>();
|
|
|
|
|
+ }
|
|
|
|
|
+ for (VideoContentItemVO v : list) {
|
|
|
|
|
+ v.setSource(SOURCE_HOT);
|
|
|
|
|
+ }
|
|
|
result.setObjs(list);
|
|
result.setObjs(list);
|
|
|
return result;
|
|
return result;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 四路随机穿插 + 跨路 video_id 去重。
|
|
|
|
|
+ * 4 池: priorScene / prior / posterior / hot,priorScene 与 prior 对外都标 source='prior'(粉丝喜欢)。
|
|
|
|
|
+ * 每步在未耗尽的池中等概率随机选一个,从该池头部取下一条(池内顺序由 fetcher 决定)。
|
|
|
|
|
+ * 用 (userId ^ 当天日期) 作为种子,保证同一用户当天翻页顺序一致、刷新一致。
|
|
|
|
|
+ */
|
|
|
|
|
+ private Page<VideoContentItemVO> getInterleavedPage(VideoContentListParam param, ContentPlatformAccount user) {
|
|
|
|
|
+ List<VideoContentItemVO> priorScene = fetchPriorSceneCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
|
|
|
|
|
+ List<VideoContentItemVO> prior = fetchPriorCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
|
|
|
|
|
+ List<VideoContentItemVO> posterior = fetchPosteriorCandidates(param, user, DEMAND_CANDIDATE_LIMIT);
|
|
|
|
|
+ List<VideoContentItemVO> hot = fetchHotCandidates(param, user, HOT_CANDIDATE_LIMIT);
|
|
|
|
|
+ for (VideoContentItemVO v : priorScene) v.setSource(SOURCE_PRIOR);
|
|
|
|
|
+ for (VideoContentItemVO v : prior) v.setSource(SOURCE_PRIOR);
|
|
|
|
|
+ for (VideoContentItemVO v : posterior) v.setSource(SOURCE_POSTERIOR);
|
|
|
|
|
+ for (VideoContentItemVO v : hot) v.setSource(SOURCE_HOT);
|
|
|
|
|
+
|
|
|
|
|
+ List<List<VideoContentItemVO>> pools = Arrays.asList(priorScene, prior, posterior, hot);
|
|
|
|
|
+ int N = pools.size();
|
|
|
|
|
+ int[] pointers = new int[N];
|
|
|
|
|
+ boolean[] exhausted = new boolean[N];
|
|
|
|
|
+ Set<Long> emittedIds = new HashSet<>();
|
|
|
|
|
+ Set<String> emittedTitles = new HashSet<>();
|
|
|
|
|
+ List<VideoContentItemVO> merged = new ArrayList<>();
|
|
|
|
|
+
|
|
|
|
|
+ long userSeed = user.getId() == null ? 0L : user.getId();
|
|
|
|
|
+ long seed = userSeed ^ LocalDate.now().toString().hashCode();
|
|
|
|
|
+ Random rng = new Random(seed);
|
|
|
|
|
+
|
|
|
|
|
+ while (true) {
|
|
|
|
|
+ boolean allExhausted = true;
|
|
|
|
|
+ for (boolean e : exhausted) {
|
|
|
|
|
+ if (!e) { allExhausted = false; break; }
|
|
|
|
|
+ }
|
|
|
|
|
+ if (allExhausted) break;
|
|
|
|
|
+
|
|
|
|
|
+ List<Integer> alive = new ArrayList<>(N);
|
|
|
|
|
+ for (int i = 0; i < N; i++) {
|
|
|
|
|
+ if (!exhausted[i]) alive.add(i);
|
|
|
|
|
+ }
|
|
|
|
|
+ int cur = alive.get(rng.nextInt(alive.size()));
|
|
|
|
|
+
|
|
|
|
|
+ List<VideoContentItemVO> pool = pools.get(cur);
|
|
|
|
|
+ while (pointers[cur] < pool.size() && shouldSkipForDedup(pool.get(pointers[cur]), emittedIds, emittedTitles)) {
|
|
|
|
|
+ pointers[cur]++;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (pointers[cur] < pool.size()) {
|
|
|
|
|
+ VideoContentItemVO item = pool.get(pointers[cur]++);
|
|
|
|
|
+ emittedIds.add(item.getVideoId());
|
|
|
|
|
+ String nt = TitleNormalizer.normalize(item.getTitle());
|
|
|
|
|
+ if (!nt.isEmpty()) emittedTitles.add(nt);
|
|
|
|
|
+ merged.add(item);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ exhausted[cur] = true;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return paginateCandidates(param, merged);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 穿插去重判断:同 video_id 已出过 → 跳;同标题(归一化后)已出过 → 跳。
|
|
|
|
|
+ * 标题归一化用 TitleNormalizer(去 emoji/空白/全半角),应对运营把同段内容重复上传成多个 video_id 的情况。
|
|
|
|
|
+ */
|
|
|
|
|
+ private boolean shouldSkipForDedup(VideoContentItemVO item, Set<Long> emittedIds, Set<String> emittedTitles) {
|
|
|
|
|
+ if (item.getVideoId() != null && emittedIds.contains(item.getVideoId())) {
|
|
|
|
|
+ return true;
|
|
|
|
|
+ }
|
|
|
|
|
+ String nt = TitleNormalizer.normalize(item.getTitle());
|
|
|
|
|
+ return !nt.isEmpty() && emittedTitles.contains(nt);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private Page<VideoContentItemVO> paginateCandidates(VideoContentListParam param, List<VideoContentItemVO> all) {
|
|
|
|
|
+ int pageSize = param.getPageSize();
|
|
|
|
|
+ int pageNum = param.getPageNum();
|
|
|
|
|
+ Page<VideoContentItemVO> result = new Page<>(pageNum, pageSize);
|
|
|
|
|
+ result.setTotalSize(all.size());
|
|
|
|
|
+ if (all.isEmpty()) {
|
|
|
|
|
+ result.setObjs(new ArrayList<>());
|
|
|
|
|
+ return result;
|
|
|
|
|
+ }
|
|
|
|
|
+ int from = Math.min((pageNum - 1) * pageSize, all.size());
|
|
|
|
|
+ int to = Math.min(pageNum * pageSize, all.size());
|
|
|
|
|
+ result.setObjs(new ArrayList<>(all.subList(from, to)));
|
|
|
|
|
+ return result;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 人群需求-场景池: demand_strategy='人群需求-场景'。
|
|
|
|
|
+ * 退化策略: ghName 非空且查不到数据 → 退回渠道粒度(不限 channel_level3)。
|
|
|
|
|
+ * 后处理:
|
|
|
|
|
+ * 1. 同 video_id 仅保留 total_rov 最大的代表行(利用 SQL 已按 total_rov DESC, score DESC 排好,首次即最大)
|
|
|
|
|
+ * 2. 过滤 rov 为 null 或 <=0(视频近 7 日无表现)
|
|
|
|
|
+ * 3. 输出顺序按 rov DESC,相同 rov 按 total_rov DESC 兜底
|
|
|
|
|
+ */
|
|
|
|
|
+ private List<VideoContentItemVO> fetchPriorSceneCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {
|
|
|
|
|
+ String dt = demandVideoMapperExt.getMaxDt();
|
|
|
|
|
+ if (!StringUtils.hasText(dt)) {
|
|
|
|
|
+ return new ArrayList<>();
|
|
|
|
|
+ }
|
|
|
|
|
+ String crowdSegment = user.getChannel();
|
|
|
|
|
+ String ghName = StringUtils.hasText(param.getGhName()) ? param.getGhName() : null;
|
|
|
|
|
+ String channelName = resolveChannelName(param);
|
|
|
|
|
+
|
|
|
|
|
+ String category = StringUtils.hasText(param.getCategory()) ? param.getCategory() : null;
|
|
|
|
|
+ List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommend(
|
|
|
|
|
+ dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR_SCENE, null, null, null, ghName, null, category, limit, false);
|
|
|
|
|
+ if (ghName != null && rows.isEmpty()) {
|
|
|
|
|
+ rows = demandVideoMapperExt.selectForRecommend(
|
|
|
|
|
+ dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR_SCENE, null, null, null, null, null, category, limit, false);
|
|
|
|
|
+ }
|
|
|
|
|
+ // 跨渠道退化:channel_name 命中但 crowd_segment 在对侧渠道下 0 行(如公众号账号切到企微入口)→ 去 crowd_segment,只按 channel_name 拉通用数据
|
|
|
|
|
+ if (channelName != null && rows.isEmpty()) {
|
|
|
|
|
+ rows = demandVideoMapperExt.selectForRecommend(
|
|
|
|
|
+ dt, channelName, null, DEMAND_STRATEGY_PRIOR_SCENE, null, null, null, null, null, category, limit, false);
|
|
|
|
|
+ }
|
|
|
|
|
+ // 1. 同 video_id 取 total_rov 最大的代表行(SQL 已排序,putIfAbsent 保留首次)
|
|
|
|
|
+ LinkedHashMap<Long, ContentPlatformDemandVideo> bestPerVideo = new LinkedHashMap<>();
|
|
|
|
|
+ for (ContentPlatformDemandVideo r : rows) {
|
|
|
|
|
+ if (r.getVideoId() == null) continue;
|
|
|
|
|
+ bestPerVideo.putIfAbsent(r.getVideoId(), r);
|
|
|
|
|
+ }
|
|
|
|
|
+ // 2. 过滤 rov<=0/null
|
|
|
|
|
+ List<ContentPlatformDemandVideo> filtered = new ArrayList<>(bestPerVideo.size());
|
|
|
|
|
+ for (ContentPlatformDemandVideo r : bestPerVideo.values()) {
|
|
|
|
|
+ if (r.getRov() == null || r.getRov() <= 0) continue;
|
|
|
|
|
+ filtered.add(r);
|
|
|
|
|
+ }
|
|
|
|
|
+ // 3. 按 rov DESC 排序,次级 total_rov DESC
|
|
|
|
|
+ filtered.sort((a, b) -> {
|
|
|
|
|
+ int c = Double.compare(
|
|
|
|
|
+ b.getRov() == null ? 0d : b.getRov(),
|
|
|
|
|
+ a.getRov() == null ? 0d : a.getRov());
|
|
|
|
|
+ if (c != 0) return c;
|
|
|
|
|
+ return Double.compare(
|
|
|
|
|
+ b.getTotalRov() == null ? 0d : b.getTotalRov(),
|
|
|
|
|
+ a.getTotalRov() == null ? 0d : a.getTotalRov());
|
|
|
|
|
+ });
|
|
|
|
|
+ return buildDemandVideoContentItemVOList(filtered);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 人群需求池(prior):只取 dimension='传播的头部' 的行。
|
|
|
|
|
+ * total_rov 在 prior 池里代表「群体对(point_type, standard_element)这个特征的需求强度」,
|
|
|
|
|
+ * 不同渠道分布差异大,所以按 channel 内 total_rov 分位保留 top 50% 特征组,
|
|
|
|
|
+ * 过滤掉群体根本不爱的弱需求题材。
|
|
|
|
|
+ * 再按 (point_type, standard_element) 分组,组按 total_rov DESC、组内 score DESC 取前 K,
|
|
|
|
|
+ * 结果按 limit 截断。
|
|
|
|
|
+ */
|
|
|
|
|
+ private List<VideoContentItemVO> fetchPriorCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {
|
|
|
|
|
+ String dt = demandVideoMapperExt.getMaxDt();
|
|
|
|
|
+ if (!StringUtils.hasText(dt)) {
|
|
|
|
|
+ return new ArrayList<>();
|
|
|
|
|
+ }
|
|
|
|
|
+ String crowdSegment = user.getChannel();
|
|
|
|
|
+ String ghName = StringUtils.hasText(param.getGhName()) ? param.getGhName() : null;
|
|
|
|
|
+ String channelName = resolveChannelName(param);
|
|
|
|
|
+ int fetchLimit = Math.max(limit * 3, DEMAND_CANDIDATE_LIMIT);
|
|
|
|
|
+
|
|
|
|
|
+ String category = StringUtils.hasText(param.getCategory()) ? param.getCategory() : null;
|
|
|
|
|
+ List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommend(
|
|
|
|
|
+ dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, ghName, null, category, fetchLimit, false);
|
|
|
|
|
+
|
|
|
|
|
+ // 退化:该 ghName 无数据 → 退回渠道粒度
|
|
|
|
|
+ if (ghName != null && rows.isEmpty()) {
|
|
|
|
|
+ rows = demandVideoMapperExt.selectForRecommend(
|
|
|
|
|
+ dt, channelName, crowdSegment, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, null, null, category, fetchLimit, false);
|
|
|
|
|
+ }
|
|
|
|
|
+ // 跨渠道退化:channel_name 命中但 crowd_segment 在对侧 0 行 → 去 crowd_segment 拉通用数据
|
|
|
|
|
+ if (channelName != null && rows.isEmpty()) {
|
|
|
|
|
+ rows = demandVideoMapperExt.selectForRecommend(
|
|
|
|
|
+ dt, channelName, null, DEMAND_STRATEGY_PRIOR, PRIOR_PREMIUM_DIMENSION, null, null, null, null, category, fetchLimit, false);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 近 7 日 rov 下限,过滤掉低质量近期表现的视频(0513 验证 ≥0.02 保留 ~41%)
|
|
|
|
|
+ rows = rows.stream()
|
|
|
|
|
+ .filter(r -> r.getRov() != null && r.getRov() >= DEMAND_MIN_ROV)
|
|
|
|
|
+ .collect(Collectors.toList());
|
|
|
|
|
+
|
|
|
|
|
+ Function<ContentPlatformDemandVideo, String> keyFn = r ->
|
|
|
|
|
+ (r.getPointType() == null ? "" : r.getPointType()) + "\u0001"
|
|
|
|
|
+ + (r.getStandardElement() == null ? "" : r.getStandardElement());
|
|
|
|
|
+
|
|
|
|
|
+ rows = retainTopGroupsByTotalRov(rows, keyFn, PRIOR_GROUP_KEEP_RATIO);
|
|
|
|
|
+
|
|
|
|
|
+ List<VideoContentItemVO> out = groupAndTopK(rows, keyFn, TOP_K_PER_DEMAND, false);
|
|
|
|
|
+ if (out.size() > limit) {
|
|
|
|
|
+ out = new ArrayList<>(out.subList(0, limit));
|
|
|
|
|
+ }
|
|
|
|
|
+ return out;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 按 channel 内"特征组(由 keyFn 定义)"分位过滤:保留 total_rov 排名 top (keepRatio*100%) 的组。
|
|
|
|
|
+ * 同组内的 total_rov 在 SQL dimension 过滤后应该是常量,这里取 max 作为组代表,以应对脏数据。
|
|
|
|
|
+ * 组数不足 2 时全部保留,避免对空/单组数据产生意外裁剪。
|
|
|
|
|
+ */
|
|
|
|
|
+ private List<ContentPlatformDemandVideo> retainTopGroupsByTotalRov(
|
|
|
|
|
+ List<ContentPlatformDemandVideo> rows,
|
|
|
|
|
+ Function<ContentPlatformDemandVideo, String> keyFn,
|
|
|
|
|
+ double keepRatio) {
|
|
|
|
|
+ if (CollectionUtils.isEmpty(rows)) return rows;
|
|
|
|
|
+ LinkedHashMap<String, Double> groupMax = new LinkedHashMap<>();
|
|
|
|
|
+ for (ContentPlatformDemandVideo r : rows) {
|
|
|
|
|
+ String key = keyFn.apply(r);
|
|
|
|
|
+ double tr = r.getTotalRov() == null ? 0d : r.getTotalRov();
|
|
|
|
|
+ groupMax.merge(key, tr, Math::max);
|
|
|
|
|
+ }
|
|
|
|
|
+ int total = groupMax.size();
|
|
|
|
|
+ if (total < 2) return rows;
|
|
|
|
|
+ int keep = Math.max(1, (int) Math.ceil(total * keepRatio));
|
|
|
|
|
+ if (keep >= total) return rows;
|
|
|
|
|
+
|
|
|
|
|
+ List<Map.Entry<String, Double>> sorted = new ArrayList<>(groupMax.entrySet());
|
|
|
|
|
+ sorted.sort((a, b) -> Double.compare(b.getValue(), a.getValue()));
|
|
|
|
|
+ Set<String> keepKeys = new HashSet<>();
|
|
|
|
|
+ for (int i = 0; i < keep; i++) keepKeys.add(sorted.get(i).getKey());
|
|
|
|
|
+
|
|
|
|
|
+ List<ContentPlatformDemandVideo> out = new ArrayList<>(rows.size());
|
|
|
|
|
+ for (ContentPlatformDemandVideo r : rows) {
|
|
|
|
|
+ if (keepKeys.contains(keyFn.apply(r))) out.add(r);
|
|
|
|
|
+ }
|
|
|
|
|
+ return out;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 优质相似池(posterior):一段查询,不再按 demand_filter_sort_strategy 子分类。
|
|
|
|
|
+ * 按 demand_content_id 分组,组按 total_rov DESC、组内 score DESC 取前 K;
|
|
|
|
|
+ * 跨组用 video_id + 归一化标题去重,截到 limit。
|
|
|
|
|
+ */
|
|
|
|
|
+ private List<VideoContentItemVO> fetchPosteriorCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {
|
|
|
|
|
+ String dt = demandVideoMapperExt.getMaxDt();
|
|
|
|
|
+ if (!StringUtils.hasText(dt)) {
|
|
|
|
|
+ return new ArrayList<>();
|
|
|
|
|
+ }
|
|
|
|
|
+ String crowdSegment = user.getChannel();
|
|
|
|
|
+ String ghName = StringUtils.hasText(param.getGhName()) ? param.getGhName() : null;
|
|
|
|
|
+ String channelName = resolveChannelName(param);
|
|
|
|
|
+ int fetchLimit = Math.max(limit * 3, DEMAND_CANDIDATE_LIMIT);
|
|
|
|
|
+
|
|
|
|
|
+ String category = StringUtils.hasText(param.getCategory()) ? param.getCategory() : null;
|
|
|
|
|
+ // 优质相似池:drive_dimension_time 一律不限制(含主查与退化路径),避免仅「昨日」窗口召回过少。
|
|
|
|
|
+ List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommend(
|
|
|
|
|
+ dt, channelName, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, null, ghName, null, category, fetchLimit, true);
|
|
|
|
|
+
|
|
|
|
|
+ // 退化:该 ghName 无数据 → 退回渠道粒度
|
|
|
|
|
+ if (ghName != null && rows.isEmpty()) {
|
|
|
|
|
+ rows = demandVideoMapperExt.selectForRecommend(
|
|
|
|
|
+ dt, channelName, crowdSegment, DEMAND_STRATEGY_POSTERIOR, null, null, null, null, null, category, fetchLimit, true);
|
|
|
|
|
+ }
|
|
|
|
|
+ // 跨渠道退化:channel_name 命中但 crowd_segment 在对侧 0 行 → 去 crowd_segment 拉通用数据
|
|
|
|
|
+ if (channelName != null && rows.isEmpty()) {
|
|
|
|
|
+ rows = demandVideoMapperExt.selectForRecommend(
|
|
|
|
|
+ dt, channelName, null, DEMAND_STRATEGY_POSTERIOR, null, null, null, null, null, category, fetchLimit, true);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 近 7 日 rov 下限,与 prior 池一致(cdjh 0514 验证 ≥0.02 保留 ~54%)
|
|
|
|
|
+ rows = rows.stream()
|
|
|
|
|
+ .filter(r -> r.getRov() != null && r.getRov() >= DEMAND_MIN_ROV)
|
|
|
|
|
+ .collect(Collectors.toList());
|
|
|
|
|
+
|
|
|
|
|
+ Function<ContentPlatformDemandVideo, String> keyFn = r ->
|
|
|
|
|
+ r.getDemandContentId() == null ? "" : r.getDemandContentId();
|
|
|
|
|
+
|
|
|
|
|
+ // 按 demand_content_id 的 total_rov 中位数过滤:保留中位数及以上(top 50%)的 demand 组,
|
|
|
|
|
+ // 砍掉群体表现弱的需求,避免低 total_rov 的 demand 带回来的相似变体稀释结果。
|
|
|
|
|
+ rows = retainTopGroupsByTotalRov(rows, keyFn, POSTERIOR_GROUP_KEEP_RATIO);
|
|
|
|
|
+
|
|
|
|
|
+ List<VideoContentItemVO> out = groupAndTopK(rows, keyFn, TOP_K_PER_DEMAND, true);
|
|
|
|
|
+ // 单段也要去归一化标题重复(同段内运营把同内容上传成多 video_id 的情况)
|
|
|
|
|
+ return concatDedup(out, Collections.emptyList(), limit);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 行级按 keyFn 分组:
|
|
|
|
|
+ * 1. 组按"组内最大 total_rov" 倒序
|
|
|
|
|
+ * 2. 组内按 score 倒序,组内 video_id 去重,最多取 topK 条
|
|
|
|
|
+ * 3. excludeSelfTitle=true 时先在 Java 端用 TitleNormalizer 过滤自标题行
|
|
|
|
|
+ * 4. rov 为 null 或 <=0 的行视为"近 7 日无表现",直接丢弃不入池
|
|
|
|
|
+ */
|
|
|
|
|
+ private List<VideoContentItemVO> groupAndTopK(List<ContentPlatformDemandVideo> rows,
|
|
|
|
|
+ Function<ContentPlatformDemandVideo, String> keyFn,
|
|
|
|
|
+ int topK,
|
|
|
|
|
+ boolean excludeSelfTitle) {
|
|
|
|
|
+ if (CollectionUtils.isEmpty(rows)) {
|
|
|
|
|
+ return new ArrayList<>();
|
|
|
|
|
+ }
|
|
|
|
|
+ LinkedHashMap<String, List<ContentPlatformDemandVideo>> groups = new LinkedHashMap<>();
|
|
|
|
|
+ Map<String, Double> groupRov = new HashMap<>();
|
|
|
|
|
+ for (ContentPlatformDemandVideo r : rows) {
|
|
|
|
|
+ if (r.getVideoId() == null) continue;
|
|
|
|
|
+ if (r.getRov() == null || r.getRov() <= 0) continue;
|
|
|
|
|
+ if (excludeSelfTitle && TitleNormalizer.isSelfTitle(r.getTitle(), r.getDemandContentTitle())) continue;
|
|
|
|
|
+ String k = keyFn.apply(r);
|
|
|
|
|
+ if (k == null) continue;
|
|
|
|
|
+ groups.computeIfAbsent(k, x -> new ArrayList<>()).add(r);
|
|
|
|
|
+ double tr = r.getTotalRov() == null ? 0d : r.getTotalRov();
|
|
|
|
|
+ Double prev = groupRov.get(k);
|
|
|
|
|
+ if (prev == null || tr > prev) {
|
|
|
|
|
+ groupRov.put(k, tr);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ List<Map.Entry<String, List<ContentPlatformDemandVideo>>> sortedGroups = new ArrayList<>(groups.entrySet());
|
|
|
|
|
+ sortedGroups.sort((a, b) -> Double.compare(
|
|
|
|
|
+ groupRov.getOrDefault(b.getKey(), 0d),
|
|
|
|
|
+ groupRov.getOrDefault(a.getKey(), 0d)));
|
|
|
|
|
+
|
|
|
|
|
+ List<ContentPlatformDemandVideo> out = new ArrayList<>();
|
|
|
|
|
+ for (Map.Entry<String, List<ContentPlatformDemandVideo>> e : sortedGroups) {
|
|
|
|
|
+ List<ContentPlatformDemandVideo> g = new ArrayList<>(e.getValue());
|
|
|
|
|
+ g.sort((a, b) -> Double.compare(
|
|
|
|
|
+ b.getScore() == null ? 0d : b.getScore(),
|
|
|
|
|
+ a.getScore() == null ? 0d : a.getScore()));
|
|
|
|
|
+ Set<Long> seen = new HashSet<>();
|
|
|
|
|
+ int taken = 0;
|
|
|
|
|
+ for (ContentPlatformDemandVideo r : g) {
|
|
|
|
|
+ if (!seen.add(r.getVideoId())) continue;
|
|
|
|
|
+ out.add(r);
|
|
|
|
|
+ if (++taken >= topK) break;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return buildDemandVideoContentItemVOList(out);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 段间拼接 + video_id + 归一化标题 去重。标题归一化用 TitleNormalizer,
|
|
|
|
|
+ * 应对运营把同段内容重复上传成多个 video_id 的情况(单源 posterior 也能去掉同标题不同 id 的重复)。
|
|
|
|
|
+ */
|
|
|
|
|
+ private List<VideoContentItemVO> concatDedup(List<VideoContentItemVO> a, List<VideoContentItemVO> b, int limit) {
|
|
|
|
|
+ Set<Long> seenIds = new HashSet<>();
|
|
|
|
|
+ Set<String> seenTitles = new HashSet<>();
|
|
|
|
|
+ List<VideoContentItemVO> out = new ArrayList<>();
|
|
|
|
|
+ for (VideoContentItemVO v : a) {
|
|
|
|
|
+ if (tryEmit(v, seenIds, seenTitles, out) && out.size() >= limit) return out;
|
|
|
|
|
+ }
|
|
|
|
|
+ for (VideoContentItemVO v : b) {
|
|
|
|
|
+ if (tryEmit(v, seenIds, seenTitles, out) && out.size() >= limit) return out;
|
|
|
|
|
+ }
|
|
|
|
|
+ return out;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private List<VideoContentItemVO> fetchHotCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {
|
|
|
|
|
+ String dt = planMapperExt.getVideoMaxDt();
|
|
|
|
|
+ String datastatDt = planMapperExt.getVideoDatastatMaxDt();
|
|
|
|
|
+ if (!StringUtils.hasText(dt)) {
|
|
|
|
|
+ return new ArrayList<>();
|
|
|
|
|
+ }
|
|
|
|
|
+ String sort = getVideoContentListSort(param.getSort());
|
|
|
|
|
+ String type = getVideoContentListType(param.getType());
|
|
|
|
|
+ String channel = getVideoContentListChannel(param.getSort(), user.getChannel());
|
|
|
|
|
+ String strategy = param.getSort() == 3 ? "recommend" : "normal";
|
|
|
|
|
+ List<ContentPlatformVideo> videoList = planMapperExt.getVideoList(param, dt, datastatDt, type, channel, strategy,
|
|
|
|
|
+ videoMinScore, 0, limit, sort);
|
|
|
|
|
+ List<VideoContentItemVO> result = buildVideoContentItemVOList(videoList, type, "sum", user.getChannel(), datastatDt);
|
|
|
|
|
+ return result == null ? new ArrayList<>() : result;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
/**
|
|
/**
|
|
|
* 按标题通过 manager 平台接口查询视频列表,支持最大查询条数限制
|
|
* 按标题通过 manager 平台接口查询视频列表,支持最大查询条数限制
|
|
|
*/
|
|
*/
|
|
@@ -1330,7 +1821,14 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
|
|
|
item.setStandardElement(video.getStandardElement());
|
|
item.setStandardElement(video.getStandardElement());
|
|
|
item.setCategoryName(video.getCategoryName());
|
|
item.setCategoryName(video.getCategoryName());
|
|
|
item.setExperimentId(video.getExperimentId());
|
|
item.setExperimentId(video.getExperimentId());
|
|
|
|
|
+ item.setSim(video.getSim());
|
|
|
|
|
+ item.setRov(video.getRov());
|
|
|
|
|
+ item.setChannelName(video.getChannelName());
|
|
|
item.setChannelLevel3(video.getChannelLevel3());
|
|
item.setChannelLevel3(video.getChannelLevel3());
|
|
|
|
|
+ item.setVideoCount(video.getVideoCount());
|
|
|
|
|
+ item.setVisitUv(video.getVisitUv());
|
|
|
|
|
+ item.setUvRatio(video.getUvRatio());
|
|
|
|
|
+ item.setTotalRov(video.getTotalRov());
|
|
|
result.add(item);
|
|
result.add(item);
|
|
|
}
|
|
}
|
|
|
return result;
|
|
return result;
|