Bladeren bron

相似度判断优化

wangyunpeng 3 maanden geleden
bovenliggende
commit
66d036b9b8

+ 36 - 12
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/SensitiveStrategy.java

@@ -19,8 +19,10 @@ import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.CollectionUtils;
 import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.scheduling.annotation.Scheduled;
 import org.springframework.stereotype.Component;
 
+import javax.annotation.PostConstruct;
 import java.util.*;
 import java.util.concurrent.*;
 import java.util.stream.Collectors;
@@ -37,6 +39,28 @@ public class SensitiveStrategy implements FilterStrategy {
     @ApolloJsonValue("${UnSafeTitles:[]}")
     private static List<String> UnSafeTitles;
 
+    private volatile List<Set<Character>> unsafeTitleCache = Collections.emptyList();
+
+    @PostConstruct
+    public void init() {
+        refreshUnsafeTitles();
+    }
+
+    @Scheduled(fixedRate = 30 * 60 * 1000) // 30分钟刷新一次
+    public void refreshUnsafeTitles() {
+        List<String> allUnSafeTitles = UnSafeTitles;
+        long t1 = System.currentTimeMillis();
+        List<ArticleUnsafeTitle> articleUnsafeTitleList = articleUnsafeTitleRepository.getByStatus(StatusEnum.ONE.getCode());
+        long t2 = System.currentTimeMillis();
+        log.info("SensitiveStrategy get unsafe title DBList cost time:{}", t2 - t1);
+        if (CollectionUtils.isNotEmpty(articleUnsafeTitleList)) {
+            for (ArticleUnsafeTitle articleUnsafeTitle : articleUnsafeTitleList) {
+                allUnSafeTitles.add(articleUnsafeTitle.getTitle());
+            }
+        }
+        unsafeTitleCache = TitleSimilarCheckUtil.makeCache(allUnSafeTitles);
+    }
+
     private final static ExecutorService pool = new CommonThreadPoolExecutor(
             5,
             5,
@@ -66,19 +90,19 @@ public class SensitiveStrategy implements FilterStrategy {
 //        if (CollectionUtil.isNotEmpty(articleSensitiveList)) {
 //            articleSensitiveMap = articleSensitiveList.stream().collect(Collectors.toMap(ArticleSensitive::getMd5, o -> o));
 //        }
-        List<String> allUnSafeTitles = UnSafeTitles;
-        long t1 = System.currentTimeMillis();
-        List<ArticleUnsafeTitle> articleUnsafeTitleList = articleUnsafeTitleRepository.getByStatus(StatusEnum.ONE.getCode());
-        long t2 = System.currentTimeMillis();
-        log.info("SensitiveStrategy get unsafe title DBList cost time:{}", t2 - t1);
-        if (CollectionUtils.isNotEmpty(articleUnsafeTitleList)) {
-            for (ArticleUnsafeTitle articleUnsafeTitle : articleUnsafeTitleList) {
-                allUnSafeTitles.add(articleUnsafeTitle.getTitle());
-            }
-        }
-        List<Set<Character>> unsafeTitleCache = TitleSimilarCheckUtil.makeCache(allUnSafeTitles);
+//        List<String> allUnSafeTitles = UnSafeTitles;
+//        long t1 = System.currentTimeMillis();
+//        List<ArticleUnsafeTitle> articleUnsafeTitleList = articleUnsafeTitleRepository.getByStatus(StatusEnum.ONE.getCode());
+//        long t2 = System.currentTimeMillis();
+//        log.info("SensitiveStrategy get unsafe title DBList cost time:{}", t2 - t1);
+//        if (CollectionUtils.isNotEmpty(articleUnsafeTitleList)) {
+//            for (ArticleUnsafeTitle articleUnsafeTitle : articleUnsafeTitleList) {
+//                allUnSafeTitles.add(articleUnsafeTitle.getTitle());
+//            }
+//        }
+//        List<Set<Character>> unsafeTitleCache = TitleSimilarCheckUtil.makeCache(allUnSafeTitles);
         long t3 = System.currentTimeMillis();
-        log.info("SensitiveStrategy make unsafe title cache cost time:{}", t3 - t2);
+//        log.info("SensitiveStrategy make unsafe title cache cost time:{}", t3 - t2);
         List<String> allTitles = param.getContents().stream().map(Content::getTitle).collect(Collectors.toList());
         Map<String, Boolean> similarityMap = new HashMap<>(similarityCache.getAllPresent(allTitles));
         long t4 = System.currentTimeMillis();

+ 2 - 6
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/util/TitleSimilarCheckUtil.java

@@ -31,12 +31,8 @@ public class TitleSimilarCheckUtil {
             return false;
         }
         Set<Character> titleCache = makeCache(title);
-        for (Set<Character> existTitleCache : existsContentCache) {
-            if (isSimilar(titleCache, existTitleCache, threshold)) {
-                return true;
-            }
-        }
-        return false;
+        return existsContentCache.parallelStream()
+              .anyMatch(existTitleCache -> isSimilar(titleCache, existTitleCache, threshold));
     }
 
     public static boolean isDuplicateContent(String title, List<String> existsContentTitle, double threshold) {