Просмотр исходного кода

Merge branch 'cooperation_video_candidate_pool_improved_lld_0509' into test

刘立冬 4 дней назад
Родитель
Сommit
27792c1ce7

+ 7 - 0
api-module/src/main/java/com/tzld/piaoquan/api/service/contentplatform/impl/ContentPlatformPlanServiceImpl.java

@@ -27,6 +27,7 @@ import com.tzld.piaoquan.api.service.VideoMultiService;
 import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformAccountService;
 import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformAccountService;
 import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformCooperateAccountService;
 import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformCooperateAccountService;
 import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformPlanService;
 import com.tzld.piaoquan.api.service.contentplatform.ContentPlatformPlanService;
+import com.tzld.piaoquan.api.util.TitleNormalizer;
 import com.tzld.piaoquan.growth.common.common.enums.GhTypeEnum;
 import com.tzld.piaoquan.growth.common.common.enums.GhTypeEnum;
 import com.tzld.piaoquan.growth.common.common.enums.StrategyStatusEnum;
 import com.tzld.piaoquan.growth.common.common.enums.StrategyStatusEnum;
 import com.tzld.piaoquan.growth.common.dao.mapper.ext.CgiReplyBucketDataMapperExt;
 import com.tzld.piaoquan.growth.common.dao.mapper.ext.CgiReplyBucketDataMapperExt;
@@ -672,6 +673,9 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
             return result;
             return result;
         }
         }
         List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommendPaged(dt, crowdSegment, demandStrategy, offset, pageSize, excludeSelfTitle);
         List<ContentPlatformDemandVideo> rows = demandVideoMapperExt.selectForRecommendPaged(dt, crowdSegment, demandStrategy, offset, pageSize, excludeSelfTitle);
+        if (excludeSelfTitle) {
+            rows.removeIf(r -> TitleNormalizer.isSelfTitle(r.getTitle(), r.getDemandContentTitle()));
+        }
         List<VideoContentItemVO> list = buildDemandVideoContentItemVOList(rows);
         List<VideoContentItemVO> list = buildDemandVideoContentItemVOList(rows);
         for (VideoContentItemVO v : list) {
         for (VideoContentItemVO v : list) {
             v.setSource(source);
             v.setSource(source);
@@ -802,6 +806,9 @@ public class ContentPlatformPlanServiceImpl implements ContentPlatformPlanServic
             if (row.getVideoId() == null) {
             if (row.getVideoId() == null) {
                 continue;
                 continue;
             }
             }
+            if (excludeSelfTitle && TitleNormalizer.isSelfTitle(row.getTitle(), row.getDemandContentTitle())) {
+                continue;
+            }
             if (!distinct.containsKey(row.getVideoId())) {
             if (!distinct.containsKey(row.getVideoId())) {
                 distinct.put(row.getVideoId(), row);
                 distinct.put(row.getVideoId(), row);
                 if (distinct.size() >= limit) {
                 if (distinct.size() >= limit) {

+ 44 - 0
api-module/src/main/java/com/tzld/piaoquan/api/util/TitleNormalizer.java

@@ -0,0 +1,44 @@
+package com.tzld.piaoquan.api.util;
+
+import java.text.Normalizer;
+import java.util.regex.Pattern;
+
+/**
+ * 标题归一化:用于「视频标题 == 需求种子标题」的模糊比对,
+ * 规避脏数据中的 emoji 差异、空白差异、全/半角差异。
+ */
+public final class TitleNormalizer {
+
+    private TitleNormalizer() {
+    }
+
+    // 覆盖常见 emoji 区间(符号、表情、补充符号、各类杂项符号)
+    private static final Pattern EMOJI = Pattern.compile(
+            "[\\p{So}\\p{Cn}]" +
+                    "|[\uD83C-\uDBFF][\uDC00-\uDFFF]" +
+                    "|[\u2600-\u27BF]" +
+                    "|[\uFE00-\uFE0F]" +
+                    "|[\u200D]"
+    );
+
+    private static final Pattern WHITESPACE = Pattern.compile("\\s+");
+
+    public static String normalize(String s) {
+        if (s == null || s.isEmpty()) {
+            return "";
+        }
+        String n = Normalizer.normalize(s, Normalizer.Form.NFKC);
+        n = EMOJI.matcher(n).replaceAll("");
+        n = WHITESPACE.matcher(n).replaceAll("");
+        return n;
+    }
+
+    public static boolean isSelfTitle(String title, String demandContentTitle) {
+        if (title == null || demandContentTitle == null) {
+            return false;
+        }
+        String a = normalize(title);
+        String b = normalize(demandContentTitle);
+        return !a.isEmpty() && a.equals(b);
+    }
+}