|
|
@@ -615,6 +615,7 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
|
|
|
private static final String DEMAND_STRATEGY_PRIOR_SCENE = "先验需求-场景";
|
|
|
private static final String DEMAND_STRATEGY_POSTERIOR = "后验需求";
|
|
|
private static final String PRIOR_PREMIUM_DIMENSION = "传播的头部";
|
|
|
+ private static final double PRIOR_GROUP_KEEP_RATIO = 0.5;
|
|
|
private static final String POSTERIOR_FILTER_ABS_LIKE = "绝对高效率%";
|
|
|
private static final String POSTERIOR_FILTER_REL_LIKE = "相对裂变率%";
|
|
|
private static final String POSTERIOR_DRIVE_DIMENSION_TIME = "昨日";
|
|
|
@@ -666,32 +667,38 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * priorScene 与 prior 严格 1:1 穿插 + 跨池 video_id 去重(priorScene 优先到达)。
|
|
|
+ * priorScene 与 prior 严格 1:1 穿插 + 跨池 video_id / 标题 去重(priorScene 优先到达)。
|
|
|
* 一侧用完后,另一侧剩余按原顺序追加。
|
|
|
*/
|
|
|
private List<VideoContentItemVO> interleavePriorWithScene(List<VideoContentItemVO> scene, List<VideoContentItemVO> prior) {
|
|
|
- Set<Long> seen = new HashSet<>();
|
|
|
+ Set<Long> seenIds = new HashSet<>();
|
|
|
+ Set<String> seenTitles = new HashSet<>();
|
|
|
List<VideoContentItemVO> out = new ArrayList<>();
|
|
|
int si = 0, pi = 0;
|
|
|
while (si < scene.size() || pi < prior.size()) {
|
|
|
while (si < scene.size()) {
|
|
|
VideoContentItemVO v = scene.get(si++);
|
|
|
- if (v.getVideoId() != null && seen.add(v.getVideoId())) {
|
|
|
- out.add(v);
|
|
|
- break;
|
|
|
- }
|
|
|
+ if (tryEmit(v, seenIds, seenTitles, out)) break;
|
|
|
}
|
|
|
while (pi < prior.size()) {
|
|
|
VideoContentItemVO v = prior.get(pi++);
|
|
|
- if (v.getVideoId() != null && seen.add(v.getVideoId())) {
|
|
|
- out.add(v);
|
|
|
- break;
|
|
|
- }
|
|
|
+ if (tryEmit(v, seenIds, seenTitles, out)) break;
|
|
|
}
|
|
|
}
|
|
|
return out;
|
|
|
}
|
|
|
|
|
|
+ private boolean tryEmit(VideoContentItemVO v, Set<Long> seenIds, Set<String> seenTitles, List<VideoContentItemVO> out) {
|
|
|
+ if (v.getVideoId() == null) return false;
|
|
|
+ if (seenIds.contains(v.getVideoId())) return false;
|
|
|
+ String nt = TitleNormalizer.normalize(v.getTitle());
|
|
|
+ if (!nt.isEmpty() && seenTitles.contains(nt)) return false;
|
|
|
+ seenIds.add(v.getVideoId());
|
|
|
+ if (!nt.isEmpty()) seenTitles.add(nt);
|
|
|
+ out.add(v);
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* 单源 hot:复用原 planMapperExt.getVideoCount + getVideoList 真分页链路。
|
|
|
*/
|
|
|
@@ -755,6 +762,7 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
|
|
|
int[] pointers = new int[N];
|
|
|
boolean[] exhausted = new boolean[N];
|
|
|
Set<Long> emittedIds = new HashSet<>();
|
|
|
+ Set<String> emittedTitles = new HashSet<>();
|
|
|
List<VideoContentItemVO> merged = new ArrayList<>();
|
|
|
|
|
|
long userSeed = user.getId() == null ? 0L : user.getId();
|
|
|
@@ -775,12 +783,14 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
|
|
|
int cur = alive.get(rng.nextInt(alive.size()));
|
|
|
|
|
|
List<VideoContentItemVO> pool = pools.get(cur);
|
|
|
- while (pointers[cur] < pool.size() && emittedIds.contains(pool.get(pointers[cur]).getVideoId())) {
|
|
|
+ while (pointers[cur] < pool.size() && shouldSkipForDedup(pool.get(pointers[cur]), emittedIds, emittedTitles)) {
|
|
|
pointers[cur]++;
|
|
|
}
|
|
|
if (pointers[cur] < pool.size()) {
|
|
|
VideoContentItemVO item = pool.get(pointers[cur]++);
|
|
|
emittedIds.add(item.getVideoId());
|
|
|
+ String nt = TitleNormalizer.normalize(item.getTitle());
|
|
|
+ if (!nt.isEmpty()) emittedTitles.add(nt);
|
|
|
merged.add(item);
|
|
|
} else {
|
|
|
exhausted[cur] = true;
|
|
|
@@ -789,6 +799,18 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
|
|
|
return paginateCandidates(param, merged);
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * 穿插去重判断:同 video_id 已出过 → 跳;同标题(归一化后)已出过 → 跳。
|
|
|
+ * 标题归一化用 TitleNormalizer(去 emoji/空白/全半角),应对运营把同段内容重复上传成多个 video_id 的情况。
|
|
|
+ */
|
|
|
+ private boolean shouldSkipForDedup(VideoContentItemVO item, Set<Long> emittedIds, Set<String> emittedTitles) {
|
|
|
+ if (item.getVideoId() != null && emittedIds.contains(item.getVideoId())) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ String nt = TitleNormalizer.normalize(item.getTitle());
|
|
|
+ return !nt.isEmpty() && emittedTitles.contains(nt);
|
|
|
+ }
|
|
|
+
|
|
|
private Page<VideoContentItemVO> paginateCandidates(VideoContentListParam param, List<VideoContentItemVO> all) {
|
|
|
int pageSize = param.getPageSize();
|
|
|
int pageNum = param.getPageNum();
|
|
|
@@ -853,7 +875,11 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
|
|
|
|
|
|
/**
|
|
|
* 先验池:只取 dimension='传播的头部' 的行。
|
|
|
- * 按 (point_type, standard_element) 分组,组按 total_rov DESC、组内 score DESC 取前 K,结果按 limit 截断。
|
|
|
+ * total_rov 在 prior 池里代表「群体对(point_type, standard_element)这个特征的需求强度」,
|
|
|
+ * 不同渠道分布差异大,所以按 channel 内 total_rov 分位保留 top 50% 特征组,
|
|
|
+ * 过滤掉群体根本不爱的弱需求题材。
|
|
|
+ * 再按 (point_type, standard_element) 分组,组按 total_rov DESC、组内 score DESC 取前 K,
|
|
|
+ * 结果按 limit 截断。
|
|
|
*/
|
|
|
private List<VideoContentItemVO> fetchPriorCandidates(VideoContentListParam param, ContentPlatformAccount user, int limit) {
|
|
|
String dt = demandVideoMapperExt.getMaxDt();
|
|
|
@@ -877,6 +903,8 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
|
|
|
(r.getPointType() == null ? "" : r.getPointType()) + "\u0001"
|
|
|
+ (r.getStandardElement() == null ? "" : r.getStandardElement());
|
|
|
|
|
|
+ rows = retainTopGroupsByTotalRov(rows, keyFn, PRIOR_GROUP_KEEP_RATIO);
|
|
|
+
|
|
|
List<VideoContentItemVO> out = groupAndTopK(rows, keyFn, TOP_K_PER_DEMAND, false);
|
|
|
if (out.size() > limit) {
|
|
|
out = new ArrayList<>(out.subList(0, limit));
|
|
|
@@ -884,6 +912,39 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
|
|
|
return out;
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * 按 channel 内"特征组(由 keyFn 定义)"分位过滤:保留 total_rov 排名 top (keepRatio*100%) 的组。
|
|
|
+ * 同组内的 total_rov 在 SQL dimension 过滤后应该是常量,这里取 max 作为组代表,以应对脏数据。
|
|
|
+ * 组数不足 2 时全部保留,避免对空/单组数据产生意外裁剪。
|
|
|
+ */
|
|
|
+ private List<ContentPlatformDemandVideo> retainTopGroupsByTotalRov(
|
|
|
+ List<ContentPlatformDemandVideo> rows,
|
|
|
+ Function<ContentPlatformDemandVideo, String> keyFn,
|
|
|
+ double keepRatio) {
|
|
|
+ if (CollectionUtils.isEmpty(rows)) return rows;
|
|
|
+ LinkedHashMap<String, Double> groupMax = new LinkedHashMap<>();
|
|
|
+ for (ContentPlatformDemandVideo r : rows) {
|
|
|
+ String key = keyFn.apply(r);
|
|
|
+ double tr = r.getTotalRov() == null ? 0d : r.getTotalRov();
|
|
|
+ groupMax.merge(key, tr, Math::max);
|
|
|
+ }
|
|
|
+ int total = groupMax.size();
|
|
|
+ if (total < 2) return rows;
|
|
|
+ int keep = Math.max(1, (int) Math.ceil(total * keepRatio));
|
|
|
+ if (keep >= total) return rows;
|
|
|
+
|
|
|
+ List<Map.Entry<String, Double>> sorted = new ArrayList<>(groupMax.entrySet());
|
|
|
+ sorted.sort((a, b) -> Double.compare(b.getValue(), a.getValue()));
|
|
|
+ Set<String> keepKeys = new HashSet<>();
|
|
|
+ for (int i = 0; i < keep; i++) keepKeys.add(sorted.get(i).getKey());
|
|
|
+
|
|
|
+ List<ContentPlatformDemandVideo> out = new ArrayList<>(rows.size());
|
|
|
+ for (ContentPlatformDemandVideo r : rows) {
|
|
|
+ if (keepKeys.contains(keyFn.apply(r))) out.add(r);
|
|
|
+ }
|
|
|
+ return out;
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* 后验池:A 段 "绝对高效率" → B 段 "相对裂变率"。
|
|
|
* 每段按 demand_content_id 分组,组按 total_rov DESC、组内 score DESC 取前 K;段间拼接 + video_id 去重。
|