Explorar el Código

Merge branch 'feature/20241202-improve-perf' of Server/long-article-recommend into master

fengzhoutian hace 7 meses
padre
commit
9823610f2e

+ 17 - 44
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/HistoryTitleStrategy.java

@@ -15,10 +15,7 @@ import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Component;
 import org.springframework.util.StringUtils;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Objects;
+import java.util.*;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Future;
@@ -43,9 +40,8 @@ public class HistoryTitleStrategy implements FilterStrategy {
 
     @Override
     public FilterResult filter(FilterParam param) {
-        long start = System.currentTimeMillis();
         FilterResult filterResult = new FilterResult();
-        List<String> result = new ArrayList<>();
+        List<String> result = new ArrayList<>(param.getContents().size());
         List<Content> filterContents = new ArrayList<>();
         List<Article> allArticleList = articleListRemoteService.articleList(param.getGhId(), allIndex, param.getType());
         List<String> allTitleList = allArticleList.stream().map(Article::getTitle).distinct().collect(Collectors.toList());
@@ -58,45 +54,22 @@ public class HistoryTitleStrategy implements FilterStrategy {
         if (Objects.nonNull(contentPoolConfig)) {
             firstSecondContentPool.addAll(Arrays.asList(contentPoolConfig[0], contentPoolConfig[1]));
         }
-        List<Future<Content>> futures = new ArrayList<>();
-        CountDownLatch cdl = new CountDownLatch(param.getContents().size());
+        List<Set<Character>> firstSecondTitleCache = TitleSimilarCheckUtil.makeCache(firstSecondTitleList);
+        List<Set<Character>> allTitleCache = TitleSimilarCheckUtil.makeCache(allTitleList);
+        // TODO: batching for parallelism
         for (Content content : param.getContents()) {
-            Future<Content> future = pool.submit(() -> {
-                try {
-                    boolean isDuplicate;
-                    if (CollectionUtils.isNotEmpty(firstSecondContentPool) && firstSecondContentPool.contains(content.getContentPoolType())) {
-                        // 四个内容池 配置 判断头条,次头条
-                        isDuplicate = TitleSimilarCheckUtil.isDuplicateContent(content.getTitle(), firstSecondTitleList, TitleSimilarCheckUtil.SIMILARITY_THRESHOLD);
-                    } else {
-                        isDuplicate = TitleSimilarCheckUtil.isDuplicateContent(content.getTitle(), allTitleList, TitleSimilarCheckUtil.SIMILARITY_THRESHOLD);
-                    }
-                    if (isDuplicate) {
-                        content.setFilterReason("历史已发布文章");
-                    }
-                    return content;
-                } finally {
-                    cdl.countDown();
-                }
-            });
-            futures.add(future);
-        }
-        try {
-            cdl.await();
-        } catch (InterruptedException e) {
-            log.error("filter error", e);
-            return null;
-        }
-
-        for (Future<Content> f : futures) {
-            try {
-                Content content = f.get();
-                if (StringUtils.hasText(content.getFilterReason())) {
-                    filterContents.add(content);
-                } else {
-                    result.add(content.getId());
-                }
-            } catch (Exception e) {
-                log.error("future get error ", e);
+            boolean isDuplicate;
+            if (CollectionUtils.isNotEmpty(firstSecondContentPool) && firstSecondContentPool.contains(content.getContentPoolType())) {
+                // 四个内容池 配置 判断头条,次头条
+                isDuplicate = TitleSimilarCheckUtil.isDuplicateContentByCache(content.getTitle(), firstSecondTitleCache, TitleSimilarCheckUtil.SIMILARITY_THRESHOLD);
+            } else {
+                isDuplicate = TitleSimilarCheckUtil.isDuplicateContentByCache(content.getTitle(), allTitleCache, TitleSimilarCheckUtil.SIMILARITY_THRESHOLD);
+            }
+            if (isDuplicate) {
+                content.setFilterReason("历史已发布文章");
+                filterContents.add(content);
+            } else {
+                result.add(content.getId());
             }
         }
         filterResult.setContentIds(result);