Browse Source

TitleSimilarCheck

wangyunpeng 11 months ago
parent
commit
29b9edd59a

+ 0 - 4
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/filter/strategy/CategoryStrategy.java

@@ -1,6 +1,5 @@
 package com.tzld.longarticle.recommend.server.service.filter.strategy;
 
-import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
 import com.tzld.longarticle.recommend.server.service.filter.FilterParam;
 import com.tzld.longarticle.recommend.server.service.filter.FilterStrategy;
 import lombok.extern.slf4j.Slf4j;
@@ -8,7 +7,6 @@ import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Component;
 
 import java.util.List;
-import java.util.Map;
 
 /**
  * @author dyp
@@ -18,8 +16,6 @@ import java.util.Map;
 public class CategoryStrategy implements FilterStrategy {
     @Value("${aaa:528}")
     private String aaa;
-    @ApolloJsonValue("${bbb:{}}")
-    protected Map<String, String> bbb;
 
     @Override
     public List<Long> filter(FilterParam param) {

+ 0 - 51
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/rank/RankService.java

@@ -1,25 +1,18 @@
 package com.tzld.longarticle.recommend.server.service.rank;
 
 
-import com.tzld.longarticle.recommend.server.model.Content;
 import com.tzld.longarticle.recommend.server.service.score.ScoreParam;
 import com.tzld.longarticle.recommend.server.service.score.ScoreResult;
 import com.tzld.longarticle.recommend.server.service.score.ScoreService;
 import com.tzld.longarticle.recommend.server.service.score.ScoreStrategy;
 import com.tzld.longarticle.recommend.server.service.score.strategy.ContentPoolStrategy;
 import com.tzld.longarticle.recommend.server.service.score.strategy.SimilarityStrategy;
-import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
 
-<<<<<<< HEAD
-import java.util.*;
-=======
 import java.util.Collections;
-import java.util.List;
 import java.util.Map;
->>>>>>> 432e804 (init)
 
 /**
  * @author dyp
@@ -80,48 +73,4 @@ public class RankService {
     }
 
 
-    private List<Content> removeDuplicateContent(List<Content> contentList, Set<String> existsContentTitle) {
-        List<Content> result = new ArrayList<>();
-        for (Content content : contentList) {
-            if (existsContentTitle.contains(content.getTitle())
-                    || isDuplicateContent(content.getTitle(), existsContentTitle)) {
-                continue;
-            }
-            result.add(content);
-            existsContentTitle.add(content.getTitle());
-        }
-        return result;
-    }
-
-    private static final double SIMILARITY_THRESHOLD = 0.8; // 相似度阈值
-
-    private boolean  isDuplicateContent(String title, Set<String> existsContentTitle) {
-        boolean result = false;
-        for (String existsTitle : existsContentTitle) {
-            if (isSimilar(title, existsTitle, SIMILARITY_THRESHOLD)) {
-                return true;
-            }
-        }
-        return result;
-    }
-
-    private boolean isSimilar(String titleA, String titleB, double threshold) {
-        if (titleA.isEmpty() || titleB.isEmpty()) {
-            return false;
-        }
-        Set<Character> setA = new HashSet<>();
-        for (char c : titleA.toCharArray()) {
-            setA.add(c);
-        }
-        Set<Character> setB = new HashSet<>();
-        for (char c : titleB.toCharArray()) {
-            setB.add(c);
-        }
-        Set<Character> setCross = new HashSet<>(setA);
-        setCross.retainAll(setB);
-        int minLen = Math.max(Math.min(setA.size(), setB.size()), 1);
-        double rate = (double) setCross.size() / minLen;
-        return rate >= threshold;
-    }
-
 }

+ 49 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/util/TitleSimilarCheckUtil.java

@@ -0,0 +1,49 @@
+package com.tzld.longarticle.recommend.server.util;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+public class TitleSimilarCheckUtil {
+
+    private static final double SIMILARITY_THRESHOLD = 0.8; // 相似度阈值
+
+    public static boolean isDuplicateContent(String title, Set<String> existsContentTitle) {
+        boolean result = false;
+        for (String existsTitle : existsContentTitle) {
+            if (isSimilar(title, existsTitle, SIMILARITY_THRESHOLD)) {
+                return true;
+            }
+        }
+        return result;
+    }
+
+    private static boolean isSimilar(String titleA, String titleB, double threshold) {
+        if (titleA.isEmpty() || titleB.isEmpty()) {
+            return false;
+        }
+        Set<Character> setA = new HashSet<>();
+        for (char c : titleA.toCharArray()) {
+            setA.add(c);
+        }
+        Set<Character> setB = new HashSet<>();
+        for (char c : titleB.toCharArray()) {
+            setB.add(c);
+        }
+        Set<Character> setCross = new HashSet<>(setA);
+        setCross.retainAll(setB);
+        int minLen = Math.max(Math.min(setA.size(), setB.size()), 1);
+        double rate = (double) setCross.size() / minLen;
+        return rate >= threshold;
+    }
+
+    public static void main(String[] args) {
+        String title = "多子女家庭,老人大概率过得比独子家庭的要幸福";
+        Set<String> existsContentTitle = new HashSet<>(Arrays.asList("以后买房,请记住7字真言:“买旧、买大、不买三!”",
+                "人到晚年才明白:多子女家庭,老人大概率过得比独子家庭的要幸福",
+                "可供中国使用3800年?山东意外发现巨大宝藏,西方当场酸了!",
+                "陕西女孩去医院体检后,发现左肾不见了,意外牵出8年前手术疑云"));
+        boolean result = isDuplicateContent(title, existsContentTitle);
+        System.out.println(result);
+    }
+}