Bladeren bron

Update DeDuplicationStrategy: fix perf

StrayWarrior 4 maanden geleden
bovenliggende
commit
3f961bae0b

+ 11 - 26
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/DeDuplicationStrategy.java

@@ -10,9 +10,7 @@ import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.CollectionUtils;
 import org.springframework.stereotype.Component;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
 import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.ExecutorService;
@@ -51,7 +49,7 @@ public class DeDuplicationStrategy implements FilterStrategy {
             Future<List<Content>> future = pool.submit(() -> {
                 try {
                     List<Content> res = new ArrayList<>();
-                    List<String> titles = new ArrayList<>();
+                    Set<String> titles = new HashSet<>();
                     if (CollectionUtils.isEmpty(contents)) {
                         return new ArrayList<>();
                     }
@@ -89,8 +87,8 @@ public class DeDuplicationStrategy implements FilterStrategy {
     }
 
     private List<String> groupDeduplication(List<Content> contentList, List<Content> filterContents) {
-        List<String> result = new CopyOnWriteArrayList<>();
-        List<String> titles = new CopyOnWriteArrayList<>();
+        List<String> result = new ArrayList<>();
+        Set<String> titles = new HashSet<>();
         Map<String, List<Content>> contentMap = contentList.stream().collect(Collectors.groupingBy(Content::getContentPoolType));
 
         List<String> contentPoolList = ContentPoolEnum.getOrderContentPool();
@@ -104,27 +102,14 @@ public class DeDuplicationStrategy implements FilterStrategy {
             if (CollectionUtils.isEmpty(contents)) {
                 continue;
             }
-            CountDownLatch cdl = new CountDownLatch(contents.size());
             for (Content content : contents) {
-                pool.submit(() -> {
-                    try {
-                        if (titles.contains(content.getTitle())) {
-                            content.setFilterReason("重复文章");
-                            filterContents.add(content);
-                        } else {
-                            result.add(content.getId());
-                            titles.add(content.getTitle());
-                        }
-                    } finally {
-                        cdl.countDown();
-                    }
-                });
-            }
-            try {
-                cdl.await();
-            } catch (InterruptedException e) {
-                log.error("filter error", e);
-                return null;
+                if (titles.contains(content.getTitle())) {
+                    content.setFilterReason("重复文章");
+                    filterContents.add(content);
+                } else {
+                    result.add(content.getId());
+                    titles.add(content.getTitle());
+                }
             }
         }
         return result;