|
@@ -15,16 +15,11 @@ import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.stereotype.Component;
|
|
|
-import org.springframework.util.StringUtils;
|
|
|
|
|
|
-import java.util.ArrayList;
|
|
|
-import java.util.HashMap;
|
|
|
-import java.util.List;
|
|
|
-import java.util.Map;
|
|
|
+import java.util.*;
|
|
|
import java.util.concurrent.CountDownLatch;
|
|
|
import java.util.concurrent.ExecutorService;
|
|
|
import java.util.concurrent.Future;
|
|
|
-import java.util.concurrent.TimeUnit;
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
@Component
|
|
@@ -43,63 +38,35 @@ public class SensitiveStrategy implements FilterStrategy {
|
|
|
|
|
|
@Override
|
|
|
public FilterResult filter(FilterParam param) {
|
|
|
- long start = System.currentTimeMillis();
|
|
|
FilterResult filterResult = new FilterResult();
|
|
|
- List<String> result = new ArrayList<>();
|
|
|
+ List<String> result = new ArrayList<>(param.getContents().size());
|
|
|
List<Content> filterContents = new ArrayList<>();
|
|
|
|
|
|
- CountDownLatch cdl = new CountDownLatch(param.getContents().size());
|
|
|
- List<Future<Content>> futures = new ArrayList<>();
|
|
|
- Map<String, String> titleMd5Map = new HashMap<>();
|
|
|
- Map<String, ArticleSensitive> articleSensitiveMap = new HashMap<>();
|
|
|
- List<String> md5List = new ArrayList<>();
|
|
|
- for (Content content : param.getContents()) {
|
|
|
- String md5 = Md5Util.encoderByMd5(content.getTitle());
|
|
|
- md5List.add(md5);
|
|
|
- titleMd5Map.put(content.getTitle(), md5);
|
|
|
- }
|
|
|
- List<ArticleSensitive> articleSensitiveList = getArticleSensitive(md5List);
|
|
|
- if (CollectionUtil.isNotEmpty(articleSensitiveList)) {
|
|
|
- articleSensitiveMap = articleSensitiveList.stream().collect(Collectors.toMap(ArticleSensitive::getMd5, o -> o));
|
|
|
- }
|
|
|
+// Map<String, String> titleMd5Map = new HashMap<>();
|
|
|
+// Map<String, ArticleSensitive> articleSensitiveMap = new HashMap<>();
|
|
|
+// List<String> md5List = new ArrayList<>();
|
|
|
+// for (Content content : param.getContents()) {
|
|
|
+// String md5 = Md5Util.encoderByMd5(content.getTitle());
|
|
|
+// md5List.add(md5);
|
|
|
+// titleMd5Map.put(content.getTitle(), md5);
|
|
|
+// }
|
|
|
+// List<ArticleSensitive> articleSensitiveList = getArticleSensitive(md5List);
|
|
|
+// if (CollectionUtil.isNotEmpty(articleSensitiveList)) {
|
|
|
+// articleSensitiveMap = articleSensitiveList.stream().collect(Collectors.toMap(ArticleSensitive::getMd5, o -> o));
|
|
|
+// }
|
|
|
|
|
|
+ List<Set<Character>> unsafeTitleCache = TitleSimilarCheckUtil.makeCache(UnSafeTitles);
|
|
|
for (Content content : param.getContents()) {
|
|
|
- Map<String, ArticleSensitive> finalArticleSensitiveMap = articleSensitiveMap;
|
|
|
- Future<Content> future = pool.submit(() -> {
|
|
|
- try {
|
|
|
-// boolean isSensitive = articleSensitiveRemoteService.articleSensitive(content.getTitle(),
|
|
|
-// titleMd5Map,
|
|
|
-// finalArticleSensitiveMap);
|
|
|
-// if (isSensitive) {
|
|
|
-// content.setFilterReason("安全违规");
|
|
|
-// } else
|
|
|
- if (TitleSimilarCheckUtil.isDuplicateContent(content.getTitle(), UnSafeTitles, TitleSimilarCheckUtil.SIMILARITY_THRESHOLD)) {
|
|
|
- content.setFilterReason("安全违规");
|
|
|
- }
|
|
|
- return content;
|
|
|
- } finally {
|
|
|
- cdl.countDown();
|
|
|
- }
|
|
|
- });
|
|
|
- futures.add(future);
|
|
|
- }
|
|
|
- try {
|
|
|
- cdl.await(5000, TimeUnit.MILLISECONDS);
|
|
|
- } catch (InterruptedException e) {
|
|
|
- log.error("filter error", e);
|
|
|
- return null;
|
|
|
- }
|
|
|
-
|
|
|
- for (Future<Content> f : futures) {
|
|
|
try {
|
|
|
- Content content = f.get();
|
|
|
- if (StringUtils.hasText(content.getFilterReason())) {
|
|
|
+ if (TitleSimilarCheckUtil.isDuplicateContentByCache(content.getTitle(), unsafeTitleCache,
|
|
|
+ TitleSimilarCheckUtil.SIMILARITY_THRESHOLD)) {
|
|
|
+ content.setFilterReason("安全违规");
|
|
|
filterContents.add(content);
|
|
|
} else {
|
|
|
result.add(content.getId());
|
|
|
}
|
|
|
} catch (Exception e) {
|
|
|
- log.error("future get error ", e);
|
|
|
+ log.error("similar check error ", e);
|
|
|
}
|
|
|
}
|
|
|
filterResult.setContentIds(result);
|