Browse Source

待发布内容预过滤

wangyunpeng 4 tháng trước cách đây
mục cha
commit
6589bb34b0

+ 3 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java

@@ -1,5 +1,6 @@
 package com.tzld.longarticle.recommend.server.mapper.longArticle;
 
+import com.tzld.longarticle.recommend.server.model.dto.Content;
 import com.tzld.longarticle.recommend.server.model.dto.GetOffVideos;
 import com.tzld.longarticle.recommend.server.model.dto.LongArticlesCrawlerVideos;
 import com.tzld.longarticle.recommend.server.model.dto.LongArticlesMatchVideos;
@@ -114,4 +115,6 @@ public interface LongArticleBaseMapper {
     List<PublishPlanAccountDTO> getGroupPublishPlanAccounts();
 
     Long getMinGzhWaitingPublishContent(String planId, String accountId);
+
+    List<Content> getPublishContentGzhWaiting(String planId, String accountId);
 }

+ 83 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/aigc/PublishPlanSetting.java

@@ -0,0 +1,83 @@
+package com.tzld.longarticle.recommend.server.model.entity.aigc;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.persistence.Column;
+import javax.persistence.Entity;
+import javax.persistence.Id;
+import javax.persistence.Table;
+
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+@Entity
+@Table(name = "publish_plan_setting")
+public class PublishPlanSetting {
+
+    @Id
+    @Column(name = "plan_id")
+    private String planId;
+    @Column(name = "content_allocation_rules")
+    private Integer contentAllocationRules;
+    @Column(name = "content_sorting_rules")
+    private Integer contentSortingRules;
+    @Column(name = "action_content_allocation_rule")
+    private Integer actionContentAllocationRule;
+    @Column(name = "publish_rate")
+    private Integer publishRate;
+    @Column(name = "publish_date")
+    private String publishDate;
+    @Column(name = "publish_window_start")
+    private String publishWindowStart;
+    @Column(name = "publish_window_end")
+    private String publishWindowEnd;
+    @Column(name = "publish_time_interval")
+    private Integer publishTimeInterval;
+    @Column(name = "publish_num")
+    private Integer publishNum;
+    @Column(name = "publish_per_num")
+    private Integer publishPerNum;
+    @Column(name = "publish_per_min_num")
+    private Integer publishPerMinNum;
+    @Column(name = "publish_bgm_type")
+    private Integer publishBgmType;
+    @Column(name = "publish_push_time")
+    private String publishPushTime;
+    @Column(name = "push_type")
+    private Integer pushType;
+    @Column(name = "trigger_event")
+    private Integer triggerEvent;
+    @Column(name = "push_content_sorting_rules")
+    private Integer pushContentSortingRules;
+    @Column(name = "bili_district")
+    private String biliDistrict;
+    @Column(name = "score_judge_flag")
+    private Integer scoreJudgeFlag;
+    @Column(name = "first_item_score_judge_task_id")
+    private String firstItemScoreJudgeTaskId;
+    @Column(name = "second_item_score_judge_task_id")
+    private String secondItemScoreJudgeTaskId;
+    @Column(name = "other_item_score_judge_task_id")
+    private String otherItemScoreJudgeTaskId;
+    @Column(name = "miniprogram_insert_flag")
+    private Integer miniprogramInsertFlag;
+    @Column(name = "gzh_article_sort_flag")
+    private Integer gzhArticleSortFlag;
+    @Column(name = "gzh_trigger_sync_frequency")
+    private Integer gzhTriggerSyncFrequency;
+    @Column(name = "gzh_trigger_send_content_type")
+    private Integer gzhTriggerSendContentType;
+    @Column(name = "gzh_only_miniprogram_flag")
+    private Integer gzhOnlyMiniprogramFlag;
+    @Column(name = "gzh_max_miniprogram_num")
+    private Integer gzhMaxMiniprogramNum;
+    @Column(name = "gzh_auto_reply_delay_flag")
+    private Integer gzhAutoReplyDelayFlag;
+    @Column(name = "gzh_auto_reply_delay_millisecond")
+    private Integer gzhAutoReplyDelayMillisecond;
+    @Column(name = "statement")
+    private String statement;
+
+}

+ 12 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/aigc/PublishPlanSettingRepository.java

@@ -0,0 +1,12 @@
+package com.tzld.longarticle.recommend.server.repository.aigc;
+
+import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishPlanSetting;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.stereotype.Repository;
+
+@Repository
+public interface PublishPlanSettingRepository extends JpaRepository<PublishPlanSetting, String> {
+
+    PublishPlanSetting getByPlanId(String planId);
+
+}

+ 101 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/XxlJobService.java

@@ -9,12 +9,15 @@ import com.google.common.collect.Lists;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import com.tzld.longarticle.recommend.server.common.CommonThreadPoolExecutor;
 import com.tzld.longarticle.recommend.server.common.HttpPoolFactory;
+import com.tzld.longarticle.recommend.server.common.constant.SceneConstants;
 import com.tzld.longarticle.recommend.server.common.enums.FieshuTableColumnDataTypeEnum;
 import com.tzld.longarticle.recommend.server.common.enums.aigc.ChannelEnum;
 import com.tzld.longarticle.recommend.server.common.enums.aigc.MiniprogramUseTypeEnum;
 import com.tzld.longarticle.recommend.server.common.enums.aigc.ProduceContentAuditStatusEnum;
+import com.tzld.longarticle.recommend.server.common.enums.aigc.PushTypeEnum;
 import com.tzld.longarticle.recommend.server.common.enums.longArticle.ArticleVideoAuditStatusEnum;
 import com.tzld.longarticle.recommend.server.common.enums.recommend.AccountBusinessTypeEnum;
+import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleTypeEnum;
 import com.tzld.longarticle.recommend.server.common.enums.recommend.ContentPoolEnum;
 import com.tzld.longarticle.recommend.server.common.enums.recommend.FeishuRobotIdEnum;
 import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
@@ -31,13 +34,17 @@ import com.tzld.longarticle.recommend.server.model.entity.crawler.GetOffVideoCra
 import com.tzld.longarticle.recommend.server.model.entity.crawler.LongArticlesVideo;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.*;
 import com.tzld.longarticle.recommend.server.model.param.ArticleFindSourceParam;
+import com.tzld.longarticle.recommend.server.model.param.RecommendParam;
+import com.tzld.longarticle.recommend.server.model.param.RecommendRequest;
 import com.tzld.longarticle.recommend.server.model.param.TitleHisCacheParam;
+import com.tzld.longarticle.recommend.server.model.vo.ArticleSortResponseDataItem;
 import com.tzld.longarticle.recommend.server.model.vo.FeishuTableDTO;
 import com.tzld.longarticle.recommend.server.remote.ODPSManager;
 import com.tzld.longarticle.recommend.server.remote.aigc.AIGCProduceContentAuditService;
 import com.tzld.longarticle.recommend.server.remote.aigc.AIGCWaitingPublishContentService;
 import com.tzld.longarticle.recommend.server.repository.aigc.ProducePlanRepository;
 import com.tzld.longarticle.recommend.server.repository.aigc.PublishPlanRepository;
+import com.tzld.longarticle.recommend.server.repository.aigc.PublishPlanSettingRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.GetOffVideoCrawlerRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.LongArticlesVideoRepository;
 import com.tzld.longarticle.recommend.server.repository.longArticle.*;
@@ -45,6 +52,10 @@ import com.tzld.longarticle.recommend.server.repository.model.PushMessageCallbac
 import com.tzld.longarticle.recommend.server.service.recommend.ArticleCategoryService;
 import com.tzld.longarticle.recommend.server.service.recommend.ArticlePromotionService;
 import com.tzld.longarticle.recommend.server.service.recommend.ArticleService;
+import com.tzld.longarticle.recommend.server.service.recommend.RecommendService;
+import com.tzld.longarticle.recommend.server.service.recommend.filter.FilterParam;
+import com.tzld.longarticle.recommend.server.service.recommend.filter.FilterResult;
+import com.tzld.longarticle.recommend.server.service.recommend.filter.FilterService;
 import com.tzld.longarticle.recommend.server.service.recommend.recall.RecallService;
 import com.tzld.longarticle.recommend.server.util.DateUtils;
 import com.tzld.longarticle.recommend.server.util.LarkRobotUtil;
@@ -60,11 +71,13 @@ import org.apache.http.client.methods.CloseableHttpResponse;
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.utils.URIBuilder;
 import org.apache.http.impl.client.CloseableHttpClient;
+import org.springframework.beans.BeanUtils;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.dao.DuplicateKeyException;
 import org.springframework.data.domain.Page;
 import org.springframework.data.domain.PageRequest;
+import org.springframework.data.redis.core.RedisTemplate;
 import org.springframework.stereotype.Service;
 import org.springframework.util.StringUtils;
 
@@ -131,6 +144,15 @@ public class XxlJobService {
     private PublishContentGzhWaitingRepository publishContentGzhWaitingRepository;
     @Autowired
     private LongArticleTitleAuditRepository titleAuditRepository;
+    @Autowired
+    private FilterService filterService;
+    @Autowired
+    private RecommendService recommendService;
+    @Autowired
+    private PublishPlanSettingRepository publishPlanSettingRepository;
+
+    @Autowired
+    private RedisTemplate<String, String> redisTemplate;
 
     ExecutorService thread = new CommonThreadPoolExecutor(
             5,
@@ -1095,4 +1117,83 @@ public class XxlJobService {
         log.info("syncGzhWaitingPublishContent success planId: {} accountId: {}", planId, accountId);
     }
 
+    @XxlJob("ContentPreFilterJob")
+    public ReturnT<String> gzhWaitingPublishContentPreFilter(String param) {
+        List<PublishPlanAccountDTO> planAccountList = longArticleBaseMapper.getGroupPublishPlanAccounts();
+        ExecutorService thread = new CommonThreadPoolExecutor(
+                syncPublishContentThreadPoolSize, syncPublishContentThreadPoolSize, 0L, TimeUnit.SECONDS,
+                new LinkedBlockingQueue<>(),
+                new ThreadFactoryBuilder().setNameFormat("ContentPreFilter-%d").build(),
+                new ThreadPoolExecutor.AbortPolicy());
+        CountDownLatch cdl = new CountDownLatch(planAccountList.size());
+        String dateStr = DateUtils.getCurrentDateStr("yyyyMMdd");
+        for (PublishPlanAccountDTO item : planAccountList) {
+            String redisKey = "ContentPreFilterJob:" + dateStr + ":" + item.getPlanId() + "-" + item.getAccountId();
+            thread.submit(() -> {
+                try {
+                    String value = redisTemplate.opsForValue().get(redisKey);
+                    if (StringUtils.hasText(value)) {
+                        return;
+                    }
+                    Account account = accountRepository.getById(item.getAccountId());
+                    List<Content> contentList = longArticleBaseMapper.getPublishContentGzhWaiting(item.getPlanId(), item.getAccountId());
+                    // 预处理过滤
+                    if (Objects.isNull(account) || CollectionUtil.isEmpty(contentList)) {
+                        return;
+                    }
+                    FilterParam filterParam = buildFilterParam(item.getPlanId(), account, contentList);
+                    FilterResult filterResult = filterService.filter(filterParam, true);
+                    if (CollectionUtil.isNotEmpty(filterResult.getFilterContent())) {
+                        List<ArticleSortResponseDataItem> filterContentList = new ArrayList<>();
+                        for (Content filterContent : filterResult.getFilterContent()) {
+                            ArticleSortResponseDataItem saveItem = new ArticleSortResponseDataItem();
+                            BeanUtils.copyProperties(filterContent, saveItem);
+                            filterContentList.add(saveItem);
+                        }
+                        recommendService.updateWaitingContentFilter(filterContentList);
+                    }
+                } finally {
+                    redisTemplate.opsForValue().set(redisKey, "1", 1, TimeUnit.DAYS);
+                    cdl.countDown();
+                }
+            });
+        }
+        try {
+            cdl.await();
+        } catch (InterruptedException e) {
+            log.error("gzhWaitingPublishContentPreFilter error", e);
+        }
+        return ReturnT.SUCCESS;
+    }
+
+    private FilterParam buildFilterParam(String planId, Account account, List<Content> contentList) {
+        PublishPlanSetting publishPlanSetting = publishPlanSettingRepository.getByPlanId(planId);
+        String type = ArticleTypeEnum.QUNFA.getVal();
+        if (Objects.equals(publishPlanSetting.getPushType(), PushTypeEnum.AUTO_PUBLISH.getVal())
+                || Objects.equals(publishPlanSetting.getPushType(), PushTypeEnum.ROBOPOST.getVal())) {
+            type = ArticleTypeEnum.WUXIANLIU.getVal();
+        }
+        FilterParam filterParam = new FilterParam();
+        filterParam.setPlanId(planId);
+        filterParam.setAccountId(account.getId());
+        filterParam.setAccountName(account.getName());
+        filterParam.setGhId(account.getGhId());
+        filterParam.setType(type);
+        filterParam.setStrategy(getStrategy(planId, account.getGhId(), account.getName(), publishPlanSetting.getPushType()));
+        filterParam.setScene(SceneConstants.DEFAULT);
+        filterParam.setContents(contentList);
+        return filterParam;
+    }
+
+    private String getStrategy(String planId, String ghId, String accountName, Integer pushType) {
+        RecommendRequest request = new RecommendRequest();
+        request.setGhId(ghId);
+        request.setAccountName(accountName);
+        request.setPushType(pushType);
+        RecommendParam param = new RecommendParam();
+        param.setPlanId(planId);
+        recommendService.setStrategy(request, param);
+        return param.getStrategy();
+    }
+
 }

+ 1 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/RecommendService.java

@@ -129,7 +129,7 @@ public class RecommendService {
         }
     }
 
-    private void setStrategy(RecommendRequest request, RecommendParam param) {
+    public void setStrategy(RecommendRequest request, RecommendParam param) {
         int historyCount = articleRepository.countByGhIdAndTypeAndItemIndex(request.getGhId(),
                 ArticleTypeEnum.QUNFA.getVal(), 1);
         if (historyCount < 10) {

+ 61 - 6
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/filter/FilterService.java

@@ -4,14 +4,19 @@ import com.tzld.longarticle.recommend.server.common.ContentCountMonitor;
 import com.tzld.longarticle.recommend.server.common.CostMonitor;
 import com.tzld.longarticle.recommend.server.common.ThreadPoolFactory;
 import com.tzld.longarticle.recommend.server.common.enums.recommend.RankStrategyEnum;
+import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
 import com.tzld.longarticle.recommend.server.model.dto.Content;
 import com.tzld.longarticle.recommend.server.service.ServiceBeanFactory;
 import com.tzld.longarticle.recommend.server.service.recommend.filter.strategy.*;
 import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
+import com.tzld.longarticle.recommend.server.util.DateUtils;
 import com.tzld.longarticle.recommend.server.util.JSONUtils;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.CollectionUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.data.redis.core.RedisTemplate;
 import org.springframework.stereotype.Service;
+import org.springframework.util.StringUtils;
 
 import java.util.*;
 import java.util.concurrent.CountDownLatch;
@@ -27,15 +32,37 @@ import static com.tzld.longarticle.recommend.server.common.constant.SceneConstan
 public class FilterService {
     private final ExecutorService pool = ThreadPoolFactory.filterPool();
 
-    public FilterResult filter(FilterParam param) {
-        return contentFilter(param);
+    @Autowired
+    LongArticleBaseMapper longArticleBaseMapper;
+    @Autowired
+    private RedisTemplate<String, String> redisTemplate;
+
+
+    public FilterResult filter(FilterParam param, Boolean preFilter) {
+        return contentFilter(param, preFilter);
     }
 
-    private FilterResult contentFilter(FilterParam param) {
+    private FilterResult contentFilter(FilterParam param, Boolean preFilter) {
         long start = System.currentTimeMillis();
         FilterResult result = new FilterResult();
 
-        List<FilterStrategy> strategies = getStrategies(param);
+        List<FilterStrategy> strategies = getAllStrategies(param);
+
+        // 已同步待发布内容 预过滤
+        if (!preFilter) {
+            String dateStr = DateUtils.getCurrentDateStr("yyyyMMdd");
+            String redisKey = "ContentPreFilterJob:" + dateStr + ":" + param.getPlanId() + "-" + param.getAccountId();
+            String value = redisTemplate.opsForValue().get(redisKey);
+            if (StringUtils.hasText(value)) {
+                Long minTime = longArticleBaseMapper.getMinGzhWaitingPublishContent(param.getPlanId(), param.getAccountId());
+                Long todayMinTime = DateUtils.getTodayStart();
+                if (Objects.nonNull(minTime) && minTime < todayMinTime) {
+                    strategies = getUnPreFilterStrategies(param);
+                }
+            }
+        } else {
+            strategies = getPreFilterStrategies(param);
+        }
         log.info("FilterStrategy {}", JSONUtils.toJson(CommonCollectionUtils.toList(strategies,
                 s -> s.getClass().getSimpleName())));
         CountDownLatch cdl = new CountDownLatch(strategies.size());
@@ -98,14 +125,14 @@ public class FilterService {
         return result;
     }
 
-    private List<FilterStrategy> getStrategies(FilterParam param) {
+    private List<FilterStrategy> getAllStrategies(FilterParam param) {
         List<FilterStrategy> strategies = new ArrayList<>();
         strategies.add(ServiceBeanFactory.getBean(BadStrategy.class));
         strategies.add(ServiceBeanFactory.getBean(SensitiveStrategy.class));
         strategies.add(ServiceBeanFactory.getBean(DeDuplicationStrategy.class));
         strategies.add(ServiceBeanFactory.getBean(KeywordStrategy.class));
         strategies.add(ServiceBeanFactory.getBean(ArticlePromotionStrategy.class));
-        strategies.add(ServiceBeanFactory.getBean(VideoPoolBadAuditStrategy.class));
+//        strategies.add(ServiceBeanFactory.getBean(VideoPoolBadAuditStrategy.class));
         if (param.getScene().equals(FWH_COLD_START)) {
             strategies.add(ServiceBeanFactory.getBean(HistoryTitleForFwhColdStartStrategy.class));
         } else {
@@ -118,4 +145,32 @@ public class FilterService {
         }
         return strategies;
     }
+
+    private List<FilterStrategy> getPreFilterStrategies(FilterParam param) {
+        List<FilterStrategy> strategies = new ArrayList<>();
+        strategies.add(ServiceBeanFactory.getBean(BadStrategy.class));
+        strategies.add(ServiceBeanFactory.getBean(SensitiveStrategy.class));
+        strategies.add(ServiceBeanFactory.getBean(DeDuplicationStrategy.class));
+        strategies.add(ServiceBeanFactory.getBean(KeywordStrategy.class));
+        strategies.add(ServiceBeanFactory.getBean(ArticlePromotionStrategy.class));
+        if (param.getScene().equals(FWH_COLD_START)) {
+            strategies.add(ServiceBeanFactory.getBean(HistoryTitleForFwhColdStartStrategy.class));
+        } else {
+            if (Objects.equals(param.getStrategy(), RankStrategyEnum.INFINITE_STRATEGY.getStrategy())) {
+                strategies.add(ServiceBeanFactory.getBean(InfiniteHisTitleStrategy.class));
+            } else {
+                strategies.add(ServiceBeanFactory.getBean(HistoryTitleStrategy.class));
+            }
+        }
+        return strategies;
+    }
+
+    private List<FilterStrategy> getUnPreFilterStrategies(FilterParam param) {
+        List<FilterStrategy> strategies = new ArrayList<>();
+        if (!param.getScene().equals(FWH_COLD_START)
+                && !Objects.equals(param.getStrategy(), RankStrategyEnum.INFINITE_STRATEGY.getStrategy())) {
+            strategies.add(ServiceBeanFactory.getBean(LowScoreStrategy.class));
+        }
+        return strategies;
+    }
 }

+ 1 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/strategy/ColdStartBackupRecallStrategy.java

@@ -30,7 +30,7 @@ public class ColdStartBackupRecallStrategy implements RecallStrategy {
         // 处理 content
         FilterParam filterParam = FilterParamFactory.create(param, content);
         filterParam.setBackup(true);
-        FilterResult filterResult = filterService.filter(filterParam);
+        FilterResult filterResult = filterService.filter(filterParam, false);
         // 处理 content
         RecallResult.RecallData result = new RecallResult.RecallData();
         result.setContents(content.stream().filter(o -> filterResult.getContentIds().contains(o.getId()))

+ 1 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/strategy/DefaultRecallStrategy.java

@@ -29,7 +29,7 @@ public class DefaultRecallStrategy implements RecallStrategy {
         List<Content> content = param.getContent();
         // 处理 content
         FilterParam filterParam = FilterParamFactory.create(param, content);
-        FilterResult filterResult = filterService.filter(filterParam);
+        FilterResult filterResult = filterService.filter(filterParam, false);
         long t2 = System.currentTimeMillis();
         CostMonitor.logCost("Recall", "ProcessFilter", t2 - t1);
         // 处理 content

+ 5 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/web/XxlJobController.java

@@ -127,4 +127,9 @@ public class XxlJobController {
         service.syncGzhWaitingPublishContent(null);
     }
 
+    @GetMapping("/gzhWaitingPublishContentPreFilter")
+    public void gzhWaitingPublishContentPreFilter() {
+        service.gzhWaitingPublishContentPreFilter(null);
+    }
+
 }

+ 9 - 0
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -473,4 +473,13 @@
         and status = 1
     </select>
 
+    <select id="getPublishContentGzhWaiting"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.Content">
+        select id, source_type, source_id, title, content_pool_type
+        from publish_content_gzh_waiting
+        where plan_id = #{planId}
+          and publish_account_id = #{accountId}
+          and status = 1
+    </select>
+
 </mapper>