|
@@ -10,9 +10,7 @@ import lombok.extern.slf4j.Slf4j;
|
|
|
import org.apache.commons.collections4.CollectionUtils;
|
|
|
import org.springframework.stereotype.Component;
|
|
|
|
|
|
-import java.util.ArrayList;
|
|
|
-import java.util.List;
|
|
|
-import java.util.Map;
|
|
|
+import java.util.*;
|
|
|
import java.util.concurrent.CopyOnWriteArrayList;
|
|
|
import java.util.concurrent.CountDownLatch;
|
|
|
import java.util.concurrent.ExecutorService;
|
|
@@ -51,7 +49,7 @@ public class DeDuplicationStrategy implements FilterStrategy {
|
|
|
Future<List<Content>> future = pool.submit(() -> {
|
|
|
try {
|
|
|
List<Content> res = new ArrayList<>();
|
|
|
- List<String> titles = new ArrayList<>();
|
|
|
+ Set<String> titles = new HashSet<>();
|
|
|
if (CollectionUtils.isEmpty(contents)) {
|
|
|
return new ArrayList<>();
|
|
|
}
|
|
@@ -89,8 +87,8 @@ public class DeDuplicationStrategy implements FilterStrategy {
|
|
|
}
|
|
|
|
|
|
private List<String> groupDeduplication(List<Content> contentList, List<Content> filterContents) {
|
|
|
- List<String> result = new CopyOnWriteArrayList<>();
|
|
|
- List<String> titles = new CopyOnWriteArrayList<>();
|
|
|
+ List<String> result = new ArrayList<>();
|
|
|
+ Set<String> titles = new HashSet<>();
|
|
|
Map<String, List<Content>> contentMap = contentList.stream().collect(Collectors.groupingBy(Content::getContentPoolType));
|
|
|
|
|
|
List<String> contentPoolList = ContentPoolEnum.getOrderContentPool();
|
|
@@ -104,27 +102,14 @@ public class DeDuplicationStrategy implements FilterStrategy {
|
|
|
if (CollectionUtils.isEmpty(contents)) {
|
|
|
continue;
|
|
|
}
|
|
|
- CountDownLatch cdl = new CountDownLatch(contents.size());
|
|
|
for (Content content : contents) {
|
|
|
- pool.submit(() -> {
|
|
|
- try {
|
|
|
- if (titles.contains(content.getTitle())) {
|
|
|
- content.setFilterReason("重复文章");
|
|
|
- filterContents.add(content);
|
|
|
- } else {
|
|
|
- result.add(content.getId());
|
|
|
- titles.add(content.getTitle());
|
|
|
- }
|
|
|
- } finally {
|
|
|
- cdl.countDown();
|
|
|
- }
|
|
|
- });
|
|
|
- }
|
|
|
- try {
|
|
|
- cdl.await();
|
|
|
- } catch (InterruptedException e) {
|
|
|
- log.error("filter error", e);
|
|
|
- return null;
|
|
|
+ if (titles.contains(content.getTitle())) {
|
|
|
+ content.setFilterReason("重复文章");
|
|
|
+ filterContents.add(content);
|
|
|
+ } else {
|
|
|
+ result.add(content.getId());
|
|
|
+ titles.add(content.getTitle());
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
return result;
|