Bladeren bron

topContentReSendJob

wangyunpeng 2 weken geleden
bovenliggende
commit
36021a6a58

+ 3 - 4
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java

@@ -1,9 +1,6 @@
 package com.tzld.longarticle.recommend.server.mapper.longArticle;
 
-import com.tzld.longarticle.recommend.server.model.dto.Content;
-import com.tzld.longarticle.recommend.server.model.dto.GetOffVideos;
-import com.tzld.longarticle.recommend.server.model.dto.LongArticlesCrawlerVideos;
-import com.tzld.longarticle.recommend.server.model.dto.LongArticlesMatchVideos;
+import com.tzld.longarticle.recommend.server.model.dto.*;
 import com.tzld.longarticle.recommend.server.model.dto.aigc.PublishPlanAccountDTO;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.*;
 import com.tzld.longarticle.recommend.server.model.param.ArticleVideoPoolSourceParam;
@@ -121,4 +118,6 @@ public interface LongArticleBaseMapper {
     void updateContentStatusBySourceId(String sourceId);
 
     void deleteGzhWaitingByPlanIdAccountId(String planId, String accountId);
+
+    List<PublishedTopContentDTO> getPublishedTopContent(String lastPublishDateStr);
 }

+ 10 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/PublishedTopContentDTO.java

@@ -0,0 +1,10 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+@Data
+public class PublishedTopContentDTO {
+    private String title;
+    private Double readRate;
+    private Long publishNum;
+}

+ 48 - 14
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticlePromotionService.java

@@ -15,6 +15,7 @@ import com.tzld.longarticle.recommend.server.mapper.aigc.PublishContentMapper;
 import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
 import com.tzld.longarticle.recommend.server.model.dto.CrawlerContent;
 import com.tzld.longarticle.recommend.server.model.dto.PublishContentDTO;
+import com.tzld.longarticle.recommend.server.model.dto.PublishedTopContentDTO;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.PublishSortLog;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.*;
@@ -50,6 +51,7 @@ import org.springframework.util.StringUtils;
 
 import java.net.URLDecoder;
 import java.util.*;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 
 @Service
@@ -504,7 +506,8 @@ public class ArticlePromotionService {
     }
 
     private void articleAddDependPlan(String produceId, ProducePlanDetailVO detail, String inputSourceValue,
-                                      String inputSourceLabel, Integer inputSourceType, List<ArticleUnsafeTitle> unsafeTitleList) {
+                                      String inputSourceLabel, Integer inputSourceType, List<ArticleUnsafeTitle> unsafeTitleList,
+                                      Map<String, PublishedTopContentDTO> lastTopContentMap) {
         try {
             // 获取生产计划的详细信息
             if (detail == null) {
@@ -520,16 +523,32 @@ public class ArticlePromotionService {
             List<ProducePlanInputSourceParam> inputSources = detail.getInputSourceGroups().get(0).getInputSources();
             List<String> dependValues = new ArrayList<>();
             Iterator<ProducePlanInputSourceParam> iterator = inputSources.iterator();
+            Double filterReadRate = 0.85;
+            Integer filterPublishNum = 20;
+            Integer inputSourceSize = inputSources.size();
             while (iterator.hasNext()) {
                 ProducePlanInputSourceParam inputSource = iterator.next();
                 dependValues.add(inputSource.getInputSourceValue());
                 // 违规文章移除
                 if (unsafeTitleList.stream().anyMatch(unsafeTitle -> inputSource.getInputSourceLabel().contains(unsafeTitle.getTitle()))) {
                     iterator.remove();
+                    continue;
+                }
+                // TOP内容表现低于阈值移除
+                for (Map.Entry<String, PublishedTopContentDTO> entry : lastTopContentMap.entrySet()) {
+                    String title = entry.getKey();
+                    PublishedTopContentDTO topContent = entry.getValue();
+                    // 过滤掉标题包含TOP内容的文章
+                    if (inputSource.getInputSourceLabel().contains(title)) {
+                        if (topContent.getReadRate() < filterReadRate && topContent.getPublishNum() >= filterPublishNum) {
+                            iterator.remove();
+                            break;
+                        }
+                    }
                 }
             }
             // 如果计划 ID 已存在,直接返回
-            if (dependValues.contains(inputSourceValue)) {
+            if (dependValues.contains(inputSourceValue) && inputSources.size() == inputSourceSize) {
                 log.info("depend_value exist: {}", inputSourceValue);
                 return;
             }
@@ -654,22 +673,17 @@ public class ArticlePromotionService {
         List<CrawlerContent> crawlerContentList = aigcBaseMapper.getCrawlerContentByChannelContentIdIn(channelContentIds);
         Map<String, Long> crawlerContentMap = crawlerContentList.stream().collect(Collectors.toMap(
                 CrawlerContent::getChannelContentId, CrawlerContent::getId));
-        ProducePlanDetailVO detail = aigcProducePlanDetailService.articleGetProducePlanDetail(topProducePlanId);
+        ProducePlanDetailVO detail = getProducePlanDetail(topProducePlanId);
         List<ArticleUnsafeTitle> unsafeTitleList = articleUnsafeTitleRepository.getByStatus(1);
-        int retryTimes = 5;
-        while (Objects.isNull(detail) && retryTimes > 0) {
-            detail = aigcProducePlanDetailService.articleGetProducePlanDetail(topProducePlanId);
-            retryTimes--;
-            try {
-                Thread.sleep(1000);
-            } catch (InterruptedException e) {
-                log.error("topContentReSendJob detail sleep error: ", e);
-            }
-        }
         if (Objects.isNull(detail)) {
             log.error("topContentReSendJob detail is null");
             return ReturnT.FAIL;
         }
+        // 获取TOP内容最近发布表现
+        String lastPublishDateStr = DateUtils.getBeforeDaysDateStr("yyyyMMdd", 100);
+        List<PublishedTopContentDTO> lastTopContent = longArticleBaseMapper.getPublishedTopContent(lastPublishDateStr);
+        Map<String, PublishedTopContentDTO> lastTopContentMap = lastTopContent.stream().collect(Collectors.toMap(
+                PublishedTopContentDTO::getTitle, Function.identity()));
         for (PublishContentDTO item : publishContentList) {
             if (TitleSimilarCheckUtil.isDuplicateContent(item.getTitle(), existsTitles, TitleSimilarCheckUtil.ARTICLE_PROMOTION_THRESHOLD)) {
                 continue;
@@ -681,8 +695,28 @@ public class ArticlePromotionService {
                 continue;
             }
             articleAddDependPlan(topProducePlanId, detail, String.valueOf(crawlerContentId), inputSourceLabel,
-                    ProducePlanInputSourceTypeEnum.contentID.getVal(), unsafeTitleList);
+                    ProducePlanInputSourceTypeEnum.contentID.getVal(), unsafeTitleList, lastTopContentMap);
         }
         return ReturnT.SUCCESS;
     }
+
+    private ProducePlanDetailVO getProducePlanDetail(String planId) {
+        ProducePlanDetailVO detail = aigcProducePlanDetailService.articleGetProducePlanDetail(planId);
+        int retryTimes = 5;
+        while (Objects.isNull(detail) && retryTimes > 0) {
+            detail = aigcProducePlanDetailService.articleGetProducePlanDetail(planId);
+            retryTimes--;
+            try {
+                Thread.sleep(1000);
+            } catch (InterruptedException e) {
+                log.error("getProducePlanDetail sleep error: ", e);
+            }
+        }
+        if (Objects.isNull(detail)) {
+            log.error("getProducePlanDetail detail is null");
+            return null;
+        }
+        return detail;
+    }
+
 }

+ 29 - 11
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -342,17 +342,17 @@
             resultType="com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatSortStrategy">
         select dss.*
         from datastat_sort_strategy dss
-                 join (select title, max(read_rate) as read_rate
-                       from datastat_sort_strategy
-                       where date_str &lt; #{dateStr}
-                         and type = 9
-                         and read_rate > 1
-                         and view_count > 10000
-                         and position in (1, 2)
-                         and title not in (select title from article_unsafe_title where status = 1)
-                         and first_level is not null
-                         and strategy is not null
-                       GROUP BY title) top on dss.title = top.title and dss.read_rate = top.read_rate
+             join (select title, max(read_rate) as read_rate
+                   from datastat_sort_strategy
+                   where date_str &lt; #{dateStr}
+                     and type = 9
+                     and read_rate > 1.3
+                     and view_count > 10000
+                     and position = 1
+                     and title not in (select title from article_unsafe_title where status = 1)
+                     and first_level is not null
+                     and strategy is not null
+                   GROUP BY title) top on dss.title = top.title and dss.read_rate = top.read_rate
     </select>
 
     <select id="getVideoEndScreenTransformationTask"
@@ -487,6 +487,24 @@
           and status = 1
     </select>
 
+    <select id="getPublishedTopContent"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.PublishedTopContentDTO">
+        select title, round(avg(read_rate), 4) as readRate, count(1) as publishNum
+        from (SELECT *
+              FROM (
+                       SELECT
+                           t.*,
+                           @rn := IF(@current_title = t.title, @rn + 1, 1) AS rn,
+                           @current_title := t.title
+                       FROM datastat_sort_strategy t
+                        CROSS JOIN (SELECT @rn := 0, @current_title := '') AS vars
+                       WHERE t.date_str > #{lastPublishDateStr} and t.produce_plan_name = 'TOP100'
+                       ORDER BY t.title, STR_TO_DATE(t.date_str, '%Y-%m-%d') DESC
+                   ) AS ranked_data
+              ORDER BY title, date_str DESC) t
+        GROUP BY t.title
+    </select>
+
     <update id="updateContentStatusBySourceId">
         update publish_content_gzh_waiting set status = 0 where source_id = #{sourceId}
     </update>