Просмотр исходного кода

Merge cooperation_video_candidate_pool_improved_lld_0509 into test: posterior 单源标题去重

刘立冬 1 день назад
Родитель
Сommit
72d71afda8

+ 8 - 11
api-module/src/main/java/com/tzld/piaoquan/api/service/contentplatform/impl/ContentPlatformPlanServiceImpl.java

@@ -1029,22 +1029,19 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
         return buildDemandVideoContentItemVOList(out);
     }
 
+    /**
+     * 段间拼接 + video_id + 归一化标题 去重。标题归一化用 TitleNormalizer,
+     * 应对运营把同段内容重复上传成多个 video_id 的情况(单源 posterior 也能去掉同标题不同 id 的重复)。
+     */
     private List<VideoContentItemVO> concatDedup(List<VideoContentItemVO> a, List<VideoContentItemVO> b, int limit) {
-        Set<Long> seen = new HashSet<>();
+        Set<Long> seenIds = new HashSet<>();
+        Set<String> seenTitles = new HashSet<>();
         List<VideoContentItemVO> out = new ArrayList<>();
         for (VideoContentItemVO v : a) {
-            if (v.getVideoId() == null) continue;
-            if (seen.add(v.getVideoId())) {
-                out.add(v);
-                if (out.size() >= limit) return out;
-            }
+            if (tryEmit(v, seenIds, seenTitles, out) && out.size() >= limit) return out;
         }
         for (VideoContentItemVO v : b) {
-            if (v.getVideoId() == null) continue;
-            if (seen.add(v.getVideoId())) {
-                out.add(v);
-                if (out.size() >= limit) return out;
-            }
+            if (tryEmit(v, seenIds, seenTitles, out) && out.size() >= limit) return out;
         }
         return out;
     }