Просмотр исходного кода

Merge branch 'wyp/0213-videoAuditFilter' into test

wangyunpeng 10 месяцев назад
Родитель
Сommit
452e85d5ef

+ 4 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/FilterService.java

@@ -72,6 +72,9 @@ public class FilterService {
                     continue;
                 }
                 FilterResult filterResult = f.get();
+                if (Objects.isNull(filterResult)) {
+                    continue;
+                }
                 contentIdsList.add(filterResult.getContentIds());
                 if (CollectionUtils.isNotEmpty(filterResult.getFilterContent())) {
                     for (Content content : filterResult.getFilterContent()) {
@@ -106,6 +109,7 @@ public class FilterService {
         strategies.add(ServiceBeanFactory.getBean(KeywordStrategy.class));
         strategies.add(ServiceBeanFactory.getBean(ArticlePromotionStrategy.class));
         strategies.add(ServiceBeanFactory.getBean(VideoPoolBadAuditStrategy.class));
+        strategies.add(ServiceBeanFactory.getBean(VideoAuditStrategy.class));
         if (param.getScene().equals(FWH_COLD_START)) {
             strategies.add(ServiceBeanFactory.getBean(HistoryTitleForFwhColdStartStrategy.class));
         } else {

+ 36 - 12
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/SensitiveStrategy.java

@@ -19,8 +19,10 @@ import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.CollectionUtils;
 import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.scheduling.annotation.Scheduled;
 import org.springframework.stereotype.Component;
 
+import javax.annotation.PostConstruct;
 import java.util.*;
 import java.util.concurrent.*;
 import java.util.stream.Collectors;
@@ -37,6 +39,28 @@ public class SensitiveStrategy implements FilterStrategy {
     @ApolloJsonValue("${UnSafeTitles:[]}")
     private static List<String> UnSafeTitles;
 
+    private volatile List<Set<Character>> unsafeTitleCache = Collections.emptyList();
+
+    @PostConstruct
+    public void init() {
+        refreshUnsafeTitles();
+    }
+
+    @Scheduled(fixedRate = 30 * 60 * 1000) // 30分钟刷新一次
+    public void refreshUnsafeTitles() {
+        List<String> allUnSafeTitles = UnSafeTitles;
+        long t1 = System.currentTimeMillis();
+        List<ArticleUnsafeTitle> articleUnsafeTitleList = articleUnsafeTitleRepository.getByStatus(StatusEnum.ONE.getCode());
+        long t2 = System.currentTimeMillis();
+        log.info("SensitiveStrategy get unsafe title DBList cost time:{}", t2 - t1);
+        if (CollectionUtils.isNotEmpty(articleUnsafeTitleList)) {
+            for (ArticleUnsafeTitle articleUnsafeTitle : articleUnsafeTitleList) {
+                allUnSafeTitles.add(articleUnsafeTitle.getTitle());
+            }
+        }
+        unsafeTitleCache = TitleSimilarCheckUtil.makeCache(allUnSafeTitles);
+    }
+
     private final static ExecutorService pool = new CommonThreadPoolExecutor(
             5,
             5,
@@ -66,19 +90,19 @@ public class SensitiveStrategy implements FilterStrategy {
 //        if (CollectionUtil.isNotEmpty(articleSensitiveList)) {
 //            articleSensitiveMap = articleSensitiveList.stream().collect(Collectors.toMap(ArticleSensitive::getMd5, o -> o));
 //        }
-        List<String> allUnSafeTitles = UnSafeTitles;
-        long t1 = System.currentTimeMillis();
-        List<ArticleUnsafeTitle> articleUnsafeTitleList = articleUnsafeTitleRepository.getByStatus(StatusEnum.ONE.getCode());
-        long t2 = System.currentTimeMillis();
-        log.info("SensitiveStrategy get unsafe title DBList cost time:{}", t2 - t1);
-        if (CollectionUtils.isNotEmpty(articleUnsafeTitleList)) {
-            for (ArticleUnsafeTitle articleUnsafeTitle : articleUnsafeTitleList) {
-                allUnSafeTitles.add(articleUnsafeTitle.getTitle());
-            }
-        }
-        List<Set<Character>> unsafeTitleCache = TitleSimilarCheckUtil.makeCache(allUnSafeTitles);
+//        List<String> allUnSafeTitles = UnSafeTitles;
+//        long t1 = System.currentTimeMillis();
+//        List<ArticleUnsafeTitle> articleUnsafeTitleList = articleUnsafeTitleRepository.getByStatus(StatusEnum.ONE.getCode());
+//        long t2 = System.currentTimeMillis();
+//        log.info("SensitiveStrategy get unsafe title DBList cost time:{}", t2 - t1);
+//        if (CollectionUtils.isNotEmpty(articleUnsafeTitleList)) {
+//            for (ArticleUnsafeTitle articleUnsafeTitle : articleUnsafeTitleList) {
+//                allUnSafeTitles.add(articleUnsafeTitle.getTitle());
+//            }
+//        }
+//        List<Set<Character>> unsafeTitleCache = TitleSimilarCheckUtil.makeCache(allUnSafeTitles);
         long t3 = System.currentTimeMillis();
-        log.info("SensitiveStrategy make unsafe title cache cost time:{}", t3 - t2);
+//        log.info("SensitiveStrategy make unsafe title cache cost time:{}", t3 - t2);
         List<String> allTitles = param.getContents().stream().map(Content::getTitle).collect(Collectors.toList());
         Map<String, Boolean> similarityMap = new HashMap<>(similarityCache.getAllPresent(allTitles));
         long t4 = System.currentTimeMillis();

+ 61 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/strategy/VideoAuditStrategy.java

@@ -0,0 +1,61 @@
+package com.tzld.longarticle.recommend.server.service.recommend.filter.strategy;
+
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
+import com.tzld.longarticle.recommend.server.common.enums.aigc.PublishPlanInputSourceTypesEnum;
+import com.tzld.longarticle.recommend.server.common.enums.longArticle.ArticleVideoAuditStatusEnum;
+import com.tzld.longarticle.recommend.server.model.dto.Content;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticleTitleAudit;
+import com.tzld.longarticle.recommend.server.repository.longArticle.LongArticleTitleAuditRepository;
+import com.tzld.longarticle.recommend.server.service.recommend.filter.FilterParam;
+import com.tzld.longarticle.recommend.server.service.recommend.filter.FilterResult;
+import com.tzld.longarticle.recommend.server.service.recommend.filter.FilterStrategy;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+@Component
+@Slf4j
+public class VideoAuditStrategy implements FilterStrategy {
+
+    @Autowired
+    LongArticleTitleAuditRepository titleAuditRepository;
+
+    @ApolloJsonValue("${whiteAccountList:[]}")
+    private List<String> whiteAccountList;
+
+    /**
+     * 保留视频内容池和视频审核通过的内容
+     * 不对其他内容做废弃操作
+     */
+    @Override
+    public FilterResult filter(FilterParam param) {
+        FilterResult filterResult = new FilterResult();
+        List<String> result = new ArrayList<>(param.getContents().size());
+        // 仅白名单账号执行下方过滤,其他账号直接返回
+        if (!whiteAccountList.contains(param.getAccountName())) {
+            filterResult.setContentIds(param.getContents().stream().map(Content::getId).collect(Collectors.toList()));
+            return filterResult;
+        }
+        List<String> sourceIds = param.getContents().stream().map(Content::getSourceId).collect(Collectors.toList());
+        List<LongArticleTitleAudit> titleAuditList = titleAuditRepository.getByContentIdIn(sourceIds);
+        Map<String, Integer> titleAuditMap = titleAuditList.stream()
+                .collect(Collectors.toMap(LongArticleTitleAudit::getContentId, LongArticleTitleAudit::getStatus));
+        for (Content content : param.getContents()) {
+            // 视频内容池 或 视频审核通过
+            if (Objects.equals(content.getSourceType(), PublishPlanInputSourceTypesEnum.longArticleVideoPoolSource.getVal())
+                    || (Objects.nonNull(titleAuditMap.get(content.getSourceId()))
+                    && titleAuditMap.get(content.getSourceId()) == ArticleVideoAuditStatusEnum.PASS.getCode())) {
+                result.add(content.getId());
+            }
+        }
+        filterResult.setContentIds(result);
+        return filterResult;
+    }
+
+}

+ 5 - 7
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java

@@ -193,13 +193,11 @@ public class RecallService implements ApplicationContextAware {
         long t2 = System.currentTimeMillis();
         CostMonitor.logCost("Recall", "GetAllContents", t2 - t1);
         ContentCountMonitor.logCount("Recall", "GetAllContents", Objects.isNull(content) ? 0 : content.size());
-        if (whiteAccountList.contains(param.getAccountName())) {
-//            // 临时过滤文章视频不匹配content
-//            filterNotMatchContent(content);
-            // 过滤仅保留审核通过content
-            filterAuditPassContent(content);
-        }
-        ContentCountMonitor.logCount("Recall", "Filter", Objects.isNull(content) ? 0 : content.size());
+//        if (whiteAccountList.contains(param.getAccountName())) {
+//            // 过滤仅保留审核通过content
+//            filterAuditPassContent(content);
+//            ContentCountMonitor.logCount("Recall", "Filter", Objects.isNull(content) ? 0 : content.size());
+//        }
         if (CollectionUtils.isEmpty(content)) {
             FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
                     "内容召回失败\n"

+ 2 - 6
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/util/TitleSimilarCheckUtil.java

@@ -31,12 +31,8 @@ public class TitleSimilarCheckUtil {
             return false;
         }
         Set<Character> titleCache = makeCache(title);
-        for (Set<Character> existTitleCache : existsContentCache) {
-            if (isSimilar(titleCache, existTitleCache, threshold)) {
-                return true;
-            }
-        }
-        return false;
+        return existsContentCache.parallelStream()
+              .anyMatch(existTitleCache -> isSimilar(titleCache, existTitleCache, threshold));
     }
 
     public static boolean isDuplicateContent(String title, List<String> existsContentTitle, double threshold) {