Przeglądaj źródła

Merge branch 'wyp/1122-daysDecreaseTime' of Server/long-article-recommend into master

wangyunpeng 7 miesięcy temu
rodzic
commit
af794eec02

+ 3 - 6
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/aigc/AigcBaseMapper.java

@@ -1,9 +1,6 @@
 package com.tzld.longarticle.recommend.server.mapper.aigc;
 
-import com.tzld.longarticle.recommend.server.model.dto.AccountTypeFansDTO;
-import com.tzld.longarticle.recommend.server.model.dto.CrawlerContent;
-import com.tzld.longarticle.recommend.server.model.dto.NotPublishPlan;
-import com.tzld.longarticle.recommend.server.model.dto.ProduceContentDTO;
+import com.tzld.longarticle.recommend.server.model.dto.*;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.*;
 import com.tzld.longarticle.recommend.server.model.param.MiniprogramTaskParam;
 import com.tzld.longarticle.recommend.server.model.param.PublishContentParam;
@@ -14,7 +11,7 @@ import java.util.List;
 @Mapper
 public interface AigcBaseMapper {
 
-    List<PublishContent> getPublishContentByTitle(List<PublishContentParam> list);
+    List<PublishContentDTO> getPublishContentByTitle(List<PublishContentParam> list);
 
     List<PublishPlanMiniprogramTask> getMiniProgramTask(List<MiniprogramTaskParam> miniprogramTaskParamList);
 
@@ -32,7 +29,7 @@ public interface AigcBaseMapper {
 
     List<AccountTypeFansDTO> getAccountTypeFans();
 
-    List<PublishContent> getHisPublishByTitles(List<String> titleList);
+    List<PublishContentDTO> getHisPublishByTitles(List<String> titleList);
 
     List<PublishContent> getNearestPublishContent(String publishAccountId, Long publishTimestamp, Integer size);
 

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/Content.java

@@ -30,6 +30,7 @@ public class Content {
     private String crawlerCoverUrl;
     private Integer crawlerViewCount;
     private Long crawlerTimestamp;
+    private Long rootPublishTimestamp;
 //    private Integer crawlerLikeCount;
 //    private Long crawlerPublishTimestamp;
 //    private String crawlerAccountName;

+ 17 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/PublishContentDTO.java

@@ -0,0 +1,17 @@
+package com.tzld.longarticle.recommend.server.model.dto;
+
+import lombok.Data;
+
+@Data
+public class PublishContentDTO {
+
+    private String id;
+    private String planId;
+    private Integer sourceType;
+    private String sourceId;
+    private String crawlerChannelContentId;
+    private String publishAccountId;
+    private Long publishTimestamp;
+    private String title;
+}
+

+ 0 - 2
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/aigc/PublishContent.java

@@ -179,7 +179,5 @@ public class PublishContent {
 
     @Column(name = "update_timestamp")
     private Long updateTimestamp;
-
-    private String title;
 }
 

+ 3 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/aigc/PublishContentRepository.java

@@ -4,7 +4,10 @@ import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContent;
 import org.springframework.data.jpa.repository.JpaRepository;
 import org.springframework.stereotype.Repository;
 
+import java.util.List;
+
 @Repository
 public interface PublishContentRepository extends JpaRepository<PublishContent, String> {
 
+    List<PublishContent> getByIdIn(List<String> publishContentIds);
 }

+ 21 - 20
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/DataDashboardService.java

@@ -11,6 +11,7 @@ import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleTypeE
 import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
 import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
 import com.tzld.longarticle.recommend.server.model.dto.ProduceContentDTO;
+import com.tzld.longarticle.recommend.server.model.dto.PublishContentDTO;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.*;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
@@ -193,20 +194,20 @@ public class DataDashboardService {
             }
             return null;
         }).filter(Objects::nonNull).collect(Collectors.toList());
-        List<PublishContent> publishContents = new ArrayList<>();
+        List<PublishContentDTO> publishContents = new ArrayList<>();
         for (List<PublishContentParam> partitions : Lists.partition(publishContentParamList, 100)) {
             publishContents.addAll(aigcBaseMapper.getPublishContentByTitle(partitions));
         }
         log.info("newSortStrategyData publishContents finish");
-        Map<String, Map<String, Map<Long, PublishContent>>> publishContentMap = publishContents.stream()
+        Map<String, Map<String, Map<Long, PublishContentDTO>>> publishContentMap = publishContents.stream()
                 .filter(o -> Objects.nonNull(o.getPublishTimestamp()))
-                .sorted(Comparator.comparingLong(PublishContent::getPublishTimestamp)).collect(
-                        Collectors.groupingBy(PublishContent::getPublishAccountId,
-                                Collectors.groupingBy(PublishContent::getTitle,
-                                        Collectors.toMap(PublishContent::getPublishTimestamp, o -> o,
+                .sorted(Comparator.comparingLong(PublishContentDTO::getPublishTimestamp)).collect(
+                        Collectors.groupingBy(PublishContentDTO::getPublishAccountId,
+                                Collectors.groupingBy(PublishContentDTO::getTitle,
+                                        Collectors.toMap(PublishContentDTO::getPublishTimestamp, o -> o,
                                                 (existing, replacement) -> replacement))));
         // 获取发布内容排版
-        List<String> publishContentIds = publishContents.stream().map(PublishContent::getId).collect(Collectors.toList());
+        List<String> publishContentIds = publishContents.stream().map(PublishContentDTO::getId).collect(Collectors.toList());
         List<PublishContentLayout> publishContentLayoutList = new ArrayList<>();
         for (List<String> partitions : Lists.partition(publishContentIds, 1000)) {
             publishContentLayoutList.addAll(publishContentLayOutRepository.findByPublishContentIdIn(partitions));
@@ -216,7 +217,7 @@ public class DataDashboardService {
                 .collect(Collectors.toMap(PublishContentLayout::getPublishContentId, o -> o,
                         (existing, replacement) -> replacement));
         //获取发布计划
-        List<String> publishPlanIds = publishContents.stream().map(PublishContent::getPlanId).distinct()
+        List<String> publishPlanIds = publishContents.stream().map(PublishContentDTO::getPlanId).distinct()
                 .collect(Collectors.toList());
         List<PublishPlan> publishPlanList = publishPlanRepository.findByIdIn(publishPlanIds);
         log.info("newSortStrategyData publishPlanList finish");
@@ -225,7 +226,7 @@ public class DataDashboardService {
         // 获取生成记录
         List<String> contentSourceIds = publishContents.stream()
                 .filter(o -> Arrays.asList(1, 2).contains(o.getSourceType()))
-                .map(PublishContent::getSourceId).distinct().collect(Collectors.toList());
+                .map(PublishContentDTO::getSourceId).distinct().collect(Collectors.toList());
         List<ProducePlanExeRecord> planExeRecordList = new ArrayList<>();
         for (List<String> partitions : Lists.partition(contentSourceIds, 1000)) {
             planExeRecordList.addAll(producePlanExeRecordRepository.findByPlanExeIdIn(partitions));
@@ -246,7 +247,7 @@ public class DataDashboardService {
         Map<String, List<ProducePlanInputSource>> inputSourceMap = inputSourceList.stream()
                 .collect(Collectors.groupingBy(ProducePlanInputSource::getPlanId));
         // 获取抓取内容关联
-        List<String> crawlerChannelContentIds = publishContents.stream().map(PublishContent::getCrawlerChannelContentId)
+        List<String> crawlerChannelContentIds = publishContents.stream().map(PublishContentDTO::getCrawlerChannelContentId)
                 .distinct().collect(Collectors.toList());
         List<CrawlerPlanResultRel> resultRelList = aigcBaseMapper.getCrawlerPlanRelByChannelContentIds(crawlerChannelContentIds);
         log.info("newSortStrategyData resultRelList finish");
@@ -262,7 +263,7 @@ public class DataDashboardService {
         // 获取小程序任务
         List<MiniprogramTaskParam> miniprogramTaskParamList = new ArrayList<>();
         Set<String> distinct = new HashSet<>();
-        for (PublishContent publishContent : publishContents) {
+        for (PublishContentDTO publishContent : publishContents) {
             String key = publishContent.getPlanId() + publishContent.getPublishAccountId();
             if (distinct.contains(key)) {
                 continue;
@@ -360,7 +361,7 @@ public class DataDashboardService {
 
     private void setObjAigcInfo(Article article, NewSortStrategyExport obj, String date,
                                 Map<String, PublishAccount> publishAccountMap,
-                                Map<String, Map<String, Map<Long, PublishContent>>> publishContentMap,
+                                Map<String, Map<String, Map<Long, PublishContentDTO>>> publishContentMap,
                                 Map<String, PublishContentLayout> publishContentLayoutMap,
                                 Map<String, PublishPlan> publishPlanMap,
                                 Map<String, List<PublishPlanMiniprogramTask>> miniprogramTaskMap,
@@ -371,19 +372,19 @@ public class DataDashboardService {
                                 Map<String, CrawlerPlan> crawlerPlanMap,
                                 Map<String, ProducePlan> sourceTitlePlanMap) {
         PublishAccount publishAccount = publishAccountMap.get(article.getGhId());
-        Map<String, Map<Long, PublishContent>> titleContentMap = publishContentMap.get(publishAccount.getId());
+        Map<String, Map<Long, PublishContentDTO>> titleContentMap = publishContentMap.get(publishAccount.getId());
         if (Objects.isNull(titleContentMap)) {
             return;
         }
-        Map<Long, PublishContent> publishTimeContentMap = titleContentMap.get(article.getTitle());
+        Map<Long, PublishContentDTO> publishTimeContentMap = titleContentMap.get(article.getTitle());
         if (Objects.isNull(publishTimeContentMap) || publishTimeContentMap.isEmpty()) {
             return;
         }
-        PublishContent publishContent = null;
+        PublishContentDTO publishContent = null;
         List<String> hisPublishTimeStrList = publishTimeContentMap.keySet().stream()
                 .map(o -> DateUtils.timestampToYMDStr(o / 1000, "yyyyMMdd")).collect(Collectors.toList());
         String publishTime = DateUtils.findNearestDate(hisPublishTimeStrList, date, "yyyyMMdd");
-        for (Map.Entry<Long, PublishContent> entry : publishTimeContentMap.entrySet()) {
+        for (Map.Entry<Long, PublishContentDTO> entry : publishTimeContentMap.entrySet()) {
             String str = DateUtils.timestampToYMDStr(entry.getKey() / 1000, "yyyyMMdd");
             if (publishTime.equals(str)) {
                 publishContent = entry.getValue();
@@ -1404,11 +1405,11 @@ public class DataDashboardService {
                         Collectors.toMap(Article::getItemIndex, o -> o,
                                 (existing, replacement) -> replacement))));
         List<String> titleList = articleList.stream().map(Article::getTitle).distinct().collect(Collectors.toList());
-        List<PublishContent> hisPublishList = new ArrayList<>();
+        List<PublishContentDTO> hisPublishList = new ArrayList<>();
         for (List<String> partitions : Lists.partition(new ArrayList<>(titleList), 100)) {
             hisPublishList.addAll(aigcBaseMapper.getHisPublishByTitles(partitions));
         }
-        Map<String, List<PublishContent>> hisPublishMap = hisPublishList.stream().collect(Collectors.groupingBy(PublishContent::getTitle));
+        Map<String, List<PublishContentDTO>> hisPublishMap = hisPublishList.stream().collect(Collectors.groupingBy(PublishContentDTO::getTitle));
         String ymd = DateUtils.timestampToYMDStr(minTimestamp - 86400 * 7, "yyyy-MM-dd");
         List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByUpdateTimeGreaterThanEqual(ymd);
         Map<String, Map<String, Map<String, AccountAvgInfo>>> accountAvgInfoMap = accountAvgInfoList.stream()
@@ -1475,10 +1476,10 @@ public class DataDashboardService {
                     }
                 }
             }
-            List<PublishContent> hisPublish = hisPublishMap.get(article.getTitle());
+            List<PublishContentDTO> hisPublish = hisPublishMap.get(article.getTitle());
             if (CollectionUtils.isNotEmpty(hisPublish)) {
                 long hisMinDate = hisPublish.stream().filter(o -> Objects.nonNull(o.getPublishTimestamp()))
-                        .mapToLong(PublishContent::getPublishTimestamp).min().orElse(0);
+                        .mapToLong(PublishContentDTO::getPublishTimestamp).min().orElse(0);
                 int explorationInterval = (int) ((article.getPublishTimestamp() - (hisMinDate / 1000)) / 86400);
                 item.setFirstExplorationIntervalAvg(explorationInterval);
             }

+ 11 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java

@@ -10,6 +10,7 @@ import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
 import com.tzld.longarticle.recommend.server.model.dto.Content;
 import com.tzld.longarticle.recommend.server.model.dto.ContentHisPublishArticle;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.CrawlerMetaArticle;
+import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContent;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountCorrelation;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
@@ -19,6 +20,7 @@ import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCat
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
 import com.tzld.longarticle.recommend.server.remote.aigc.AIGCWaitingPublishContentService;
 import com.tzld.longarticle.recommend.server.repository.aigc.CrawlerMetaArticleRepository;
+import com.tzld.longarticle.recommend.server.repository.aigc.PublishContentRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.AccountCorrelationRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository;
@@ -82,6 +84,8 @@ public class RecallService implements ApplicationContextAware {
     @Autowired
     ArticleCategoryRepository articleCategoryRepository;
     @Autowired
+    PublishContentRepository publishContentRepository;
+    @Autowired
     AccountCategoryRepository accountCategoryRepository;
 
     private final Map<String, RecallStrategy> strategyMap = new HashMap<>();
@@ -178,6 +182,9 @@ public class RecallService implements ApplicationContextAware {
         // 查询晋升rootProduceContentId
         List<ArticlePoolPromotionSource> sourceList = articlePoolPromotionSourceRepository.getByChannelContentIdIn(channelContentIds);
         Map<String, ArticlePoolPromotionSource> sourceMap = sourceList.stream().collect(Collectors.toMap(ArticlePoolPromotionSource::getChannelContentId, Function.identity()));
+        List<String> publishContentIds = sourceList.stream().map(ArticlePoolPromotionSource::getRootPublishContentId).collect(Collectors.toList());
+        List<PublishContent> publishContentList = publishContentRepository.getByIdIn(publishContentIds);
+        Map<String, PublishContent> publishContentMap = publishContentList.stream().collect(Collectors.toMap(PublishContent::getId, Function.identity()));
         // 根据produceContentId查询category
         List<ArticleCategory> articleCategoryList = articleCategoryRepository.findAll();
         Map<String, ArticleCategory> categoryMap = articleCategoryList.stream().collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
@@ -188,6 +195,10 @@ public class RecallService implements ApplicationContextAware {
             ArticleCategory category = null;
             if (Objects.nonNull(source) && Objects.nonNull(source.getRootProduceContentId())) {
                 category = categoryMap.get(source.getRootProduceContentId());
+                PublishContent publishContent = publishContentMap.get(source.getRootPublishContentId());
+                if (Objects.nonNull(publishContent)) {
+                    content.setRootPublishTimestamp(publishContent.getPublishTimestamp());
+                }
             }
             if (Objects.isNull(category)) {
                 category = coldStartCategoryMap.get(content.getCrawlerChannelContentId());

+ 3 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/score/strategy/CrawlerDaysDecreaseStrategy.java

@@ -13,6 +13,7 @@ import org.springframework.util.CollectionUtils;
 
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Optional;
 
 /**
  * 抓取时间超过60天的文章,分数减少
@@ -49,7 +50,8 @@ public class CrawlerDaysDecreaseStrategy implements ScoreStrategy {
         double maxReleaseScore = -0.15;
         double minScoreDays = 7;
         double maxScoreDays = 60.0;
-        int days = (int) ((now - content.getCrawlerTimestamp()) / 86400000);
+        long timestamp = Optional.ofNullable(content.getRootPublishTimestamp()).orElse(content.getCrawlerTimestamp());
+        int days = (int) ((now - timestamp) / 86400000);
         if (days < minScoreDays) {
             return 0.0;
         }

+ 2 - 2
long-article-recommend-service/src/main/resources/mapper/aigc/AigcBaseMapper.xml

@@ -8,7 +8,7 @@
     </select>
 
     <select id="getPublishContentByTitle"
-            resultType="com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContent">
+            resultType="com.tzld.longarticle.recommend.server.model.dto.PublishContentDTO">
         select content.*, output.output as title
         from publish_content content
         join publish_content_output output
@@ -132,7 +132,7 @@
     </select>
 
     <select id="getHisPublishByTitles"
-            resultType="com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContent">
+            resultType="com.tzld.longarticle.recommend.server.model.dto.PublishContentDTO">
         select content.*, output.output as title
         from publish_content content
         join publish_content_output output