wangyunpeng 11 months ago
parent
commit
429827dfa6

+ 4 - 2
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/ContentPoolEnum.java

@@ -10,7 +10,8 @@ import java.util.Objects;
 public enum ContentPoolEnum {
     autoArticlePoolLevel1("autoArticlePoolLevel1", "内容池1层"),
     autoArticlePoolLevel2("autoArticlePoolLevel2", "内容池2层"),
-    autoArticlePoolLevel3("autoArticlePoolLevel3", "冷启层"),
+    autoArticlePoolLevel3("autoArticlePoolLevel3", "内容池3层"),
+    autoArticlePoolLevel4("autoArticlePoolLevel4", "冷启层"),
 
     ;
 
@@ -29,7 +30,7 @@ public enum ContentPoolEnum {
                 return poolEnum;
             }
         }
-        return autoArticlePoolLevel3;
+        return autoArticlePoolLevel4;
     }
 
     public static List<String> getOrderContentPool() {
@@ -37,6 +38,7 @@ public enum ContentPoolEnum {
         result.add(autoArticlePoolLevel1.getContentPool());
         result.add(autoArticlePoolLevel2.getContentPool());
         result.add(autoArticlePoolLevel3.getContentPool());
+        result.add(autoArticlePoolLevel4.getContentPool());
         return result;
     }
 }

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/RankStrategyEnum.java

@@ -9,6 +9,7 @@ public enum RankStrategyEnum {
     ArticleRankV2("ArticleRankV2", "ArticleRankV2", "rankV2Strategy"),
     ArticleRankV3("ArticleRankV3", "ArticleRankV3", "rankV3Strategy"),
     ArticleRankV4("ArticleRankV4", "ArticleRankV4", "rankV4Strategy"),
+    ArticleRankV5("ArticleRankV5", "ArticleRankV5", "rankV5Strategy"),
 
     default_strategy("ArticleRankV1", "默认策略", "defaultRankStrategy"),
     ;

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/PublishSortLogRepository.java

@@ -9,6 +9,8 @@ import java.util.List;
 @Repository
 public interface PublishSortLogRepository extends JpaRepository<PublishSortLog, Long> {
 
+    List<PublishSortLog> findByDateStr(String dateStr);
+
     List<PublishSortLog> findByDateStrAndTitleIn(String dateStr, List<String> titles);
 
     List<PublishSortLog> findByCrawlerChannelContentIdIn(List<String> crawlerChannelContentIds);

+ 15 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/AccountContentPoolConfigService.java

@@ -5,11 +5,15 @@ import com.ctrip.framework.apollo.spring.annotation.ApolloConfigChangeListener;
 import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
 import com.google.common.collect.BiMap;
 import com.google.common.collect.HashBiMap;
+import com.tzld.longarticle.recommend.server.service.score.AccountIndexReplacePoolConfig;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.stereotype.Component;
 
 import javax.annotation.PostConstruct;
+import java.util.Arrays;
+import java.util.HashMap;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 /**
  * @author dyp
@@ -21,6 +25,9 @@ public class AccountContentPoolConfigService {
     @ApolloJsonValue("${accountContentPoolConfig:{}}")
     private Map<String, String[]> accountContentPoolMap;
 
+    @ApolloJsonValue("${accountIndexReplacePoolConfig:{}}")
+    private Map<String, AccountIndexReplacePoolConfig[]> accountIndexReplacePoolConfigMap;
+
     private BiMap<String, Integer> contentPoolLevelMap;
 
     @ApolloJsonValue("${contentPoolLevelConfig:{}}")
@@ -44,6 +51,14 @@ public class AccountContentPoolConfigService {
         return accountContentPoolMap.get("default");
     }
 
+    public Map<Integer, AccountIndexReplacePoolConfig> getContentReplacePools(String accountName) {
+        if (accountIndexReplacePoolConfigMap.containsKey(accountName)) {
+            return Arrays.asList(accountIndexReplacePoolConfigMap.get(accountName)).stream()
+                    .collect(Collectors.toMap(AccountIndexReplacePoolConfig::getIndex, o -> o));
+        }
+        return new HashMap<>();
+    }
+
     public String getContentPoolByLevel(int level) {
 
         return contentPoolLevelMap.inverse().getOrDefault(level, "");

+ 159 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/rank/strategy/RankV5Strategy.java

@@ -0,0 +1,159 @@
+package com.tzld.longarticle.recommend.server.service.rank.strategy;
+
+
+import com.tzld.longarticle.recommend.server.common.enums.ContentPoolEnum;
+import com.tzld.longarticle.recommend.server.model.Content;
+import com.tzld.longarticle.recommend.server.service.AccountContentPoolConfigService;
+import com.tzld.longarticle.recommend.server.service.rank.RankItem;
+import com.tzld.longarticle.recommend.server.service.rank.RankParam;
+import com.tzld.longarticle.recommend.server.service.rank.RankResult;
+import com.tzld.longarticle.recommend.server.service.rank.RankStrategy;
+import com.tzld.longarticle.recommend.server.service.score.AccountIndexReplacePoolConfig;
+import com.tzld.longarticle.recommend.server.service.score.ScoreParam;
+import com.tzld.longarticle.recommend.server.service.score.ScoreResult;
+import com.tzld.longarticle.recommend.server.service.score.ScoreService;
+import com.tzld.longarticle.recommend.server.service.score.strategy.*;
+import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
+import com.tzld.longarticle.recommend.server.util.JSONUtils;
+import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Service;
+
+import java.util.*;
+
+/**
+ * @author dyp
+ */
+@Service
+@Slf4j
+public class RankV5Strategy implements RankStrategy {
+
+    @Autowired
+    private ScoreService scoreService;
+    @Autowired
+    private AccountContentPoolConfigService accountContentPoolConfigService;
+
+    public RankResult rank(RankParam param) {
+
+        log.info("RankParam {}", JSONUtils.toJson(param));
+        ScoreResult scoreResult = scoreService.score(convertToScoreParam(param));
+        log.info("ScoreResult {}", JSONUtils.toJson(scoreResult));
+
+        Map<String, Map<String, Double>> scoreMap = scoreResult.getScoreMap();
+        String[] contentPools = accountContentPoolConfigService.getContentPools(param.getAccountName());
+        Map<Integer, AccountIndexReplacePoolConfig> indexReplacePoolConfigMap = accountContentPoolConfigService.getContentReplacePools(param.getAccountName());
+
+        List<RankItem> items = CommonCollectionUtils.toList(param.getContents(), c -> {
+            RankItem item = new RankItem();
+            item.setContent(c);
+            item.setScoreMap(scoreMap.get(c.getId()));
+            double score;
+            if (contentPools[0].equals(item.getContent().getContentPoolType())
+                    || contentPools[1].equals(item.getContent().getContentPoolType())) {
+                score = item.getScore(SimilarityStrategy.class.getSimpleName())
+                        + item.getScore(CategoryStrategy.class.getSimpleName())
+                        + item.getScore(ViewCountRateStrategy.class.getSimpleName())
+                        + item.getScore(PublishTimesStrategy.class.getSimpleName())
+                        + item.getScore(FlowCtlDecreaseStrategy.class.getSimpleName());
+            } else {
+                score = item.getScore(SimilarityStrategy.class.getSimpleName())
+                        + item.getScore(CategoryStrategy.class.getSimpleName())
+                        + item.getScore(AccountPreDistributeStrategy.class.getSimpleName())
+                        + item.getScore(PublishTimesStrategy.class.getSimpleName())
+                        + item.getScore(FlowCtlDecreaseStrategy.class.getSimpleName());
+            }
+            item.setScore(score);
+            return item;
+        });
+
+        // 1 排序
+        Collections.sort(items, (o1, o2) -> -Double.compare(o1.getScore(), o2.getScore()));
+        log.info("SortResult {}", JSONUtils.toJson(items));
+        // 2 相似去重
+        List<Content> contents = CommonCollectionUtils.toList(items, RankItem::getContent);
+        contents = deduplication(contents);
+        log.info("Deduplication {}", JSONUtils.toJson(contents));
+
+        // 3 文章按照内容池分组
+        Map<String, List<Content>> contentMap = new HashMap<>();
+        for (Content c : contents) {
+            List<Content> data = contentMap.computeIfAbsent(c.getContentPoolType(), k -> new ArrayList<>());
+            data.add(c);
+        }
+        log.info("ContentMap {}", JSONUtils.toJson(contentMap));
+        // 4 选文章
+        List<Content> result = new ArrayList<>();
+
+        // 头
+        List<Content> pool1 = contentMap.get(contentPools[0]);
+        if (CollectionUtils.isNotEmpty(pool1)) {
+            result.add(pool1.get(0));
+        } else {
+            // 替补
+            AccountIndexReplacePoolConfig replacePoolConfig = indexReplacePoolConfigMap.get(1);
+            if (Objects.nonNull(replacePoolConfig)) {
+                List<Content> pool1Replace = contentMap.get(replacePoolConfig.getContentPool());
+                if (CollectionUtils.isNotEmpty(pool1Replace)) {
+                    result.add(pool1Replace.get(0));
+                }
+            }
+        }
+        // 次
+        List<Content> pool2 = contentMap.get(contentPools[1]);
+        if (CollectionUtils.isNotEmpty(pool2)) {
+            result.add(pool2.get(0));
+            // 替补 头条内容不足使用次条内容
+            if (result.size() == 1 && pool2.size() > 1) {
+                result.add(pool2.get(1));
+            }
+        } else {
+            // 替补 根据设置替补内容池查找内容尽心替补
+            AccountIndexReplacePoolConfig replacePoolConfig = indexReplacePoolConfigMap.get(2);
+            if (Objects.nonNull(replacePoolConfig)) {
+                List<Content> pool2Replace = contentMap.get(replacePoolConfig.getContentPool());
+                if (CollectionUtils.isNotEmpty(pool2Replace)) {
+                    result.add(pool2Replace.get(0));
+                }
+            }
+        }
+
+        // 3-8
+        List<Content> pool = contentMap.get(contentPools[2]);
+        if (CollectionUtils.isNotEmpty(pool)) {
+            result.addAll(pool.subList(0, Math.min(pool.size(), param.getSize() - result.size())));
+        }
+
+        return new RankResult(result);
+    }
+
+    private ScoreParam convertToScoreParam(RankParam param) {
+        ScoreParam scoreParam = new ScoreParam();
+        scoreParam.setGhId(param.getGhId());
+        scoreParam.setAccountName(param.getAccountName());
+        scoreParam.setContents(param.getContents());
+        scoreParam.setStrategy(param.getStrategy());
+        return scoreParam;
+    }
+
+    private List<Content> deduplication(List<Content> contents) {
+        List<String> titles = new ArrayList<>();
+        List<Content> result = new ArrayList<>();
+        // 遍历所有列表
+        for (String contentPool : ContentPoolEnum.getOrderContentPool()) {
+            for (Content c : contents) {
+                if (!contentPool.equals(c.getContentPoolType())) {
+                    continue;
+                }
+                if (!TitleSimilarCheckUtil.isDuplicateContent(c.getTitle(), titles)) {
+                    result.add(c);
+                    titles.add(c.getTitle());
+                }
+            }
+        }
+
+        return result;
+    }
+
+}

+ 13 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/score/AccountIndexReplacePoolConfig.java

@@ -0,0 +1,13 @@
+package com.tzld.longarticle.recommend.server.service.score;
+
+import lombok.Getter;
+import lombok.Setter;
+
+@Getter
+@Setter
+public class AccountIndexReplacePoolConfig {
+
+    private Integer index;
+    private String contentPool;
+
+}

+ 13 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/score/AccountPublishTimesConfig.java

@@ -0,0 +1,13 @@
+package com.tzld.longarticle.recommend.server.service.score;
+
+import lombok.Getter;
+import lombok.Setter;
+
+@Getter
+@Setter
+public class AccountPublishTimesConfig {
+    private Integer index;
+    private Integer times;
+    private Integer weight;
+
+}

+ 3 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/score/ScoreService.java

@@ -93,11 +93,13 @@ public class ScoreService implements ApplicationContextAware {
             strategies.add(strategyMap.get(FlowCtlDecreaseStrategy.class.getSimpleName()));
             strategies.add(strategyMap.get(ViewCountRateStrategy.class.getSimpleName()));
         }
-        if (StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV4.getStrategy())) {
+        if (StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV4.getStrategy())
+                || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV5.getStrategy())) {
             strategies.add(strategyMap.get(CategoryStrategy.class.getSimpleName()));
             strategies.add(strategyMap.get(AccountPreDistributeStrategy.class.getSimpleName()));
             strategies.add(strategyMap.get(FlowCtlDecreaseStrategy.class.getSimpleName()));
             strategies.add(strategyMap.get(ViewCountRateStrategy.class.getSimpleName()));
+            strategies.add(strategyMap.get(PublishTimesStrategy.class.getSimpleName()));
         }
 
         return strategies;

+ 79 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/score/strategy/PublishTimesStrategy.java

@@ -0,0 +1,79 @@
+package com.tzld.longarticle.recommend.server.service.score.strategy;
+
+import cn.hutool.core.collection.CollectionUtil;
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
+import com.tzld.longarticle.recommend.server.model.Content;
+import com.tzld.longarticle.recommend.server.repository.aigc.CrawlerMetaArticleRepository;
+import com.tzld.longarticle.recommend.server.repository.crawler.PublishSortLogRepository;
+import com.tzld.longarticle.recommend.server.repository.entity.crawler.PublishSortLog;
+import com.tzld.longarticle.recommend.server.service.AccountContentPoolConfigService;
+import com.tzld.longarticle.recommend.server.service.score.*;
+import com.tzld.longarticle.recommend.server.util.DateUtils;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
+import org.springframework.util.CollectionUtils;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+@Component
+@Slf4j
+public class PublishTimesStrategy implements ScoreStrategy {
+
+    @Autowired
+    CrawlerMetaArticleRepository crawlerMetaArticleRepository;
+    @Autowired
+    AccountContentPoolConfigService accountContentPoolConfigService;
+    @Autowired
+    private PublishSortLogRepository publishSortLogRepository;
+
+    @ApolloJsonValue("${accountPublishTimesConfig:{}}")
+    private Map<String, AccountPublishTimesConfig[]> accountPublishTimesConfigMap;
+
+
+    @Override
+    public List<Score> score(ScoreParam param) {
+        List<Score> scores = new ArrayList<>();
+        if (CollectionUtils.isEmpty(param.getContents())) {
+            return scores;
+        }
+        AccountPublishTimesConfig[] indexPublishTimesArr = accountPublishTimesConfigMap.get(param.getAccountName());
+        if (Objects.isNull(indexPublishTimesArr) || indexPublishTimesArr.length == 0) {
+            return scores;
+        }
+        // 获取今日已发布内容
+        String dateStr = DateUtils.getCurrentDateStr("yyyy-MM-dd");
+        List<PublishSortLog> hisPublishContentList = publishSortLogRepository.findByDateStr(dateStr);
+        Map<String, List<PublishSortLog>> hisPublishedContentMap = hisPublishContentList.stream().collect(Collectors.groupingBy(PublishSortLog::getTitle));
+        Map<Integer, AccountPublishTimesConfig> indexPublishTimesMap = Arrays.stream(indexPublishTimesArr).collect(Collectors.toMap(AccountPublishTimesConfig::getIndex, o -> o));
+        String[] contentPools = accountContentPoolConfigService.getContentPools(param.getAccountName());
+        for (Content content : param.getContents()) {
+            Score score = new Score();
+            score.setStrategy(this);
+            score.setContentId(content.getId());
+            for (int i = 0; i < contentPools.length; i++) {
+                if (CollectionUtils.isEmpty(hisPublishedContentMap.get(content.getTitle()))
+                        || Objects.isNull(indexPublishTimesMap.get(i))) {
+                    break;
+                }
+                int val = i;
+                List<PublishSortLog> filterArticles = hisPublishedContentMap.get(content.getTitle()).stream()
+                        .filter(o -> (val == 1 && o.getIndex() == 1)||(val == 2 && o.getIndex() == 2)||(val == 3 && o.getIndex() >= 3))
+                        .collect(Collectors.toList());
+                if (CollectionUtil.isEmpty(filterArticles)) {
+                    break;
+                }
+                if (contentPools[i].equals(content.getContentPoolType())) {
+                    if (filterArticles.size() >= indexPublishTimesMap.get(i).getTimes()) {
+                        score.setScore(score.getScore() + indexPublishTimesMap.get(i).getWeight());
+                    }
+                }
+            }
+            scores.add(score);
+        }
+
+        return scores;
+    }
+
+}