|
@@ -0,0 +1,91 @@
|
|
|
+package com.tzld.longarticle.recommend.server.service.filter.strategy;
|
|
|
+
|
|
|
+import com.tzld.longarticle.recommend.server.common.ThreadPoolFactory;
|
|
|
+import com.tzld.longarticle.recommend.server.common.enums.ArticleTypeEnum;
|
|
|
+import com.tzld.longarticle.recommend.server.model.dto.Content;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.ArticleListRemoteService;
|
|
|
+import com.tzld.longarticle.recommend.server.service.filter.FilterParam;
|
|
|
+import com.tzld.longarticle.recommend.server.service.filter.FilterResult;
|
|
|
+import com.tzld.longarticle.recommend.server.service.filter.FilterStrategy;
|
|
|
+import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.stereotype.Component;
|
|
|
+import org.springframework.util.StringUtils;
|
|
|
+
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.Arrays;
|
|
|
+import java.util.List;
|
|
|
+import java.util.concurrent.CountDownLatch;
|
|
|
+import java.util.concurrent.ExecutorService;
|
|
|
+import java.util.concurrent.Future;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+
|
|
|
+@Component
|
|
|
+@Slf4j
|
|
|
+public class InfiniteHisTitleStrategy implements FilterStrategy {
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ private ArticleListRemoteService articleListRemoteService;
|
|
|
+
|
|
|
+ private static final List<Integer> firstSecondIndex = Arrays.asList(1, 2);
|
|
|
+ private static final List<Integer> allIndex = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8);
|
|
|
+
|
|
|
+ private final ExecutorService pool = ThreadPoolFactory.deDuplicatePool();
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public FilterResult filter(FilterParam param) {
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
+ FilterResult filterResult = new FilterResult();
|
|
|
+ List<String> result = new ArrayList<>();
|
|
|
+ List<Content> filterContents = new ArrayList<>();
|
|
|
+ List<Article> allArticleList = articleListRemoteService.articleList(param.getGhId(), allIndex, param.getType());
|
|
|
+ List<String> allTitleList = allArticleList.stream().map(Article::getTitle).distinct().collect(Collectors.toList());
|
|
|
+ List<Article> qunfaArticleList = articleListRemoteService.articleList(param.getGhId(), firstSecondIndex, ArticleTypeEnum.QUNFA.getVal());
|
|
|
+ List<String> qunfaTitleList = qunfaArticleList.stream().map(Article::getTitle).distinct().collect(Collectors.toList());
|
|
|
+ List<Future<Content>> futures = new ArrayList<>();
|
|
|
+ CountDownLatch cdl = new CountDownLatch(param.getContents().size());
|
|
|
+ for (Content content : param.getContents()) {
|
|
|
+ Future<Content> future = pool.submit(() -> {
|
|
|
+ try {
|
|
|
+ boolean isDuplicate = TitleSimilarCheckUtil.isDuplicateContent(content.getTitle(), qunfaTitleList);
|
|
|
+ if (!isDuplicate) {
|
|
|
+ isDuplicate = TitleSimilarCheckUtil.isDuplicateContent(content.getTitle(), allTitleList);
|
|
|
+ }
|
|
|
+ if (isDuplicate) {
|
|
|
+ content.setFilterReason("历史已发布文章");
|
|
|
+ }
|
|
|
+ return content;
|
|
|
+ } finally {
|
|
|
+ cdl.countDown();
|
|
|
+ }
|
|
|
+ });
|
|
|
+ futures.add(future);
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ cdl.await();
|
|
|
+ } catch (InterruptedException e) {
|
|
|
+ log.error("filter error", e);
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (Future<Content> f : futures) {
|
|
|
+ try {
|
|
|
+ Content content = f.get();
|
|
|
+ if (StringUtils.hasText(content.getFilterReason())) {
|
|
|
+ filterContents.add(content);
|
|
|
+ } else {
|
|
|
+ result.add(content.getId());
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("future get error ", e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ filterResult.setContentIds(result);
|
|
|
+ filterResult.setFilterContent(filterContents);
|
|
|
+ log.info("InfiniteHisTitleStrategy cost:{}", System.currentTimeMillis() - start);
|
|
|
+ return filterResult;
|
|
|
+ }
|
|
|
+
|
|
|
+}
|