浏览代码

2025-04-15 冷启动晋级优化

luojunhui 2 月之前
父节点
当前提交
95c516f8f1

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/recommend/RankStrategyEnum.java

@@ -23,6 +23,7 @@ public enum RankStrategyEnum {
     ArticleRankV15("ArticleRankV15", "ArticleRankV15", "rankV15Strategy"),
     ArticleRankV16("ArticleRankV16", "ArticleRankV16", "rankV16Strategy"),
     ArticleRankV17("ArticleRankV17", "ArticleRankV17", "rankV17Strategy"),
+    ArticleRankV18("ArticleRankV18", "ArticleRankV18", "rankV18Strategy"),
 
     HIS_JUMP_STRATEGY("ArticleRankHisJump", "历史表现跳过相似度策略", "hisJumpRankStrategy"),
     INFINITE_STRATEGY("ArticleRankInfinite", "无限发表", "infiniteRankStrategy"),

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/recommend/ScoreStrategyEnum.java

@@ -16,6 +16,7 @@ public enum ScoreStrategyEnum {
     VIEW_COUNT_RATE_CORRELATION("ViewCountRateCorrelationStrategy"),
     VIEW_COUNT_RATE("ViewCountRateStrategy"),
     VIEW_COUNT_RATE_V2("ViewCountRateV2Strategy"),
+    VIEW_COUNT_RATE_V3("ViewCountRateV3Strategy"),
     VIEW_COUNT("ViewCountStrategy"),
     VIEW_MULTIPLIER("ViewMultiplierStrategy"),
     CRAWLER_DAYS_DECREASE_STRATEGY("CrawlerDaysDecreaseStrategy"),

+ 172 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/rank/strategy/RankV18Strategy.java

@@ -0,0 +1,172 @@
+package com.tzld.longarticle.recommend.server.service.recommend.rank.strategy;
+
+
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
+import com.tzld.longarticle.recommend.server.common.enums.aigc.PublishPlanInputSourceTypesEnum;
+import com.tzld.longarticle.recommend.server.common.enums.recommend.ScoreStrategyEnum;
+import com.tzld.longarticle.recommend.server.model.dto.Content;
+import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
+import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
+import com.tzld.longarticle.recommend.server.service.recommend.config.AccountContentPoolConfigService;
+import com.tzld.longarticle.recommend.server.service.recommend.config.StrategyIndexScoreWeightService;
+import com.tzld.longarticle.recommend.server.service.recommend.rank.*;
+import com.tzld.longarticle.recommend.server.service.recommend.score.AccountIndexReplacePoolConfig;
+import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreResult;
+import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreService;
+import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.lang3.RandomUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * @author dyp
+ */
+@Service
+@Slf4j
+public class RankV18Strategy implements RankStrategy {
+
+    @Autowired
+    private ScoreService scoreService;
+    @Autowired
+    private AccountContentPoolConfigService accountContentPoolConfigService;
+    @Autowired
+    private ArticleRepository articleRepository;
+    @Autowired
+    private StrategyIndexScoreWeightService weightService;
+
+    @ApolloJsonValue("${touliu.account.ghIds:[\"gh_93e00e187787\", \"gh_ac43e43b253b\", \"gh_68e7fdc09fe4\",\"gh_77f36c109fb1\", \"gh_b181786a6c8c\", \"gh_1ee2e1b39ccf\"]}")
+    private List<String> touliuAccountGhIds;
+    @Value("${topProducePlanId:}")
+    private String topProducePlanId;
+
+    public RankResult rank(RankParam param) {
+        List<Content> result = new ArrayList<>();
+
+        ScoreResult scoreResult = scoreService.score(RankStrategy.convertToScoreParam(param));
+
+        Map<String, Map<String, Double>> scoreMap = scoreResult.getScoreMap();
+        String[] contentPools = accountContentPoolConfigService.getContentPools(param.getAccountName());
+        Map<Integer, AccountIndexReplacePoolConfig> indexReplacePoolConfigMap = accountContentPoolConfigService.getContentReplacePools(param.getAccountName());
+
+        List<RankItem> items = CommonCollectionUtils.toList(param.getContents(), c -> {
+            RankItem item = new RankItem();
+            item.setContent(c);
+            c.setScoreMap(scoreMap.get(c.getId()));
+            item.setScoreMap(scoreMap.get(c.getId()));
+            double score;
+            int index = weightService.getIndex(item.getContent().getContentPoolType(), contentPools);
+            if (contentPools[0].equals(item.getContent().getContentPoolType())
+                    || contentPools[1].equals(item.getContent().getContentPoolType())) {
+                score = item.getScore(ScoreStrategyEnum.SIMILARITY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.SIMILARITY.value())
+                        + item.getScore(ScoreStrategyEnum.CATEGORY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.CATEGORY.value())
+                        + item.getScore(ScoreStrategyEnum.FLOW_CTL_DECREASE.value())
+                        + item.getScore(ScoreStrategyEnum.CRAWLER_DAYS_DECREASE_STRATEGY.value());
+                if (item.getScore(ScoreStrategyEnum.PUBLISH_TIMES.value()) >= 0) {
+                    score += item.getScore(ScoreStrategyEnum.VIEW_COUNT_RATE_V2.value())
+                            * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                            ScoreStrategyEnum.VIEW_COUNT_RATE_V2.value());
+                }
+            } else {
+                score = item.getScore(ScoreStrategyEnum.SIMILARITY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.SIMILARITY.value())
+                        + item.getScore(ScoreStrategyEnum.CATEGORY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.CATEGORY.value())
+                        + item.getScore(ScoreStrategyEnum.ACCOUNT_PRE_DISTRIBUTE.value())
+                        + item.getScore(ScoreStrategyEnum.PUBLISH_TIMES.value())
+                        + item.getScore(ScoreStrategyEnum.CRAWLER_DAYS_DECREASE_STRATEGY.value())
+                        + item.getScore(ScoreStrategyEnum.FLOW_CTL_DECREASE.value());
+            }
+            c.setScore(score);
+            item.setScore(score);
+            return item;
+        });
+        // 相似度评分为0 报警返回
+        List<Article> hisPublishFirstArticleList = articleRepository.getByGhIdAndItemIndexAndTypeEqualsAndStatusEquals(
+                param.getGhId(), 1, param.getType(), 1);
+        if (RankStrategy.SimilarityScoreZero(items, param, hisPublishFirstArticleList)) {
+            return new RankResult(result);
+        }
+        // 安全分降权
+        RankService.safeScoreDecrease(items);
+
+        // 1 排序
+        Collections.sort(items, (o1, o2) -> -Double.compare(o1.getScore(), o2.getScore()));
+        // 2 相似去重
+        List<Content> contents = CommonCollectionUtils.toList(items, RankItem::getContent);
+
+        // 3 文章按照内容池分组
+        Map<String, List<Content>> contentMap = new HashMap<>();
+        for (Content c : contents) {
+            List<Content> data = contentMap.computeIfAbsent(c.getContentPoolType(), k -> new ArrayList<>());
+            data.add(c);
+        }
+        // 4 选文章
+        String[] publishPool = Arrays.copyOf(contentPools, contentPools.length);
+
+        // 头
+        List<Content> pool1 = contentMap.get(contentPools[0]);
+        if (CollectionUtils.isNotEmpty(pool1)) {
+            pool1 = RankService.contentSourceTypeFilter(param.getStrategy(), pool1, 1);
+        }
+        RankService.printSortLog(param.getStrategy(), param.getAccountName(), "头条", pool1);
+        if (CollectionUtils.isNotEmpty(pool1)) {
+            if (topProducePlanId.equals(pool1.get(0).getProducePlanId())) {
+                int i = RandomUtils.nextInt(0, 2);
+                if (i == 0) {
+                    for (Content content : pool1) {
+                        if (!topProducePlanId.equals(content.getProducePlanId())) {
+                            result.add(content);
+                            break;
+                        }
+                    }
+                }
+            }
+            if (CollectionUtils.isEmpty(result)) {
+                result.add(pool1.get(0));
+            }
+        } else {
+            RankStrategy.sendFeishuFirstPoolEmpty(param, contentPools[0]);
+            return new RankResult(result);
+        }
+
+        // 次
+        RankService.commonAddSecondContent(param, result, publishPool, contentPools, contentMap,
+                indexReplacePoolConfigMap, param.getStrategy());
+
+        // 3-8
+        // RankService.commonAdd38Content(param, result, contentPools, contentMap, param.getStrategy());
+        List<Content> pool = contentMap.get(contentPools[2]);
+        if (CollectionUtils.isNotEmpty(pool)) {
+            Integer videoSourceType = PublishPlanInputSourceTypesEnum.longArticleVideoPoolSource.getVal();
+            Queue<Content> videoPoolQueue = pool.stream().filter(o -> Objects.equals(o.getSourceType(), videoSourceType))
+                    .collect(Collectors.toCollection(LinkedList::new));
+            Queue<Content> otherPoolQueue = pool.stream().filter(o -> !Objects.equals(o.getSourceType(), videoSourceType))
+                    .collect(Collectors.toCollection(LinkedList::new));
+            for (int i = 3; i < param.getSize() + 1; i++) {
+                Integer sourceType = RankService.getStrategyPoolSourceType(param.getStrategy(), i);
+                if (Objects.equals(sourceType, videoSourceType) && !videoPoolQueue.isEmpty()) {
+                    result.add(videoPoolQueue.poll());
+                } else if (!otherPoolQueue.isEmpty()) {
+                    result.add(otherPoolQueue.poll());
+                }
+            }
+        }
+
+        RankStrategy.deduplication(result, contentMap, publishPool);
+
+        return new RankResult(result);
+    }
+
+}

+ 3 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/score/ScoreService.java

@@ -129,12 +129,14 @@ public class ScoreService implements ApplicationContextAware {
                 || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV14.getStrategy())
                 || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV15.getStrategy())
                 || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV16.getStrategy())
-                || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV17.getStrategy())) {
+                || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV17.getStrategy())
+                || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV18.getStrategy())){
             strategies.add(strategyMap.get(ScoreStrategyEnum.CATEGORY.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.ACCOUNT_PRE_DISTRIBUTE.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.FLOW_CTL_DECREASE.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.VIEW_COUNT_RATE.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.VIEW_COUNT_RATE_V2.value()));
+            strategies.add(strategyMap.get(ScoreStrategyEnum.VIEW_COUNT_RATE_V3.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.VIEW_COUNT_RATE_CORRELATION.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.PUBLISH_TIMES.value()));
 //            strategies.add(strategyMap.get(ScoreStrategyEnum.HIS_FISSION_FANS_RATE_RATE.value()));

+ 157 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/score/strategy/ViewCountRateV3Strategy.java

@@ -0,0 +1,157 @@
+package com.tzld.longarticle.recommend.server.service.recommend.score.strategy;
+
+import com.tzld.longarticle.recommend.server.model.dto.Content;
+import com.tzld.longarticle.recommend.server.model.dto.ContentHisPublishArticle;
+import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
+import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository;
+import com.tzld.longarticle.recommend.server.service.recommend.config.AccountContentPoolConfigService;
+import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
+import com.tzld.longarticle.recommend.server.service.recommend.score.Score;
+import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreParam;
+import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreStrategy;
+import com.tzld.longarticle.recommend.server.util.MathUtils;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+@Component
+@Slf4j
+public class ViewCountRateV3Strategy implements ScoreStrategy {
+
+    @Autowired
+    AccountIndexAvgViewCountService accountIndexAvgViewCountService;
+    @Autowired
+    AccountContentPoolConfigService accountContentPoolConfigService;
+    @Autowired
+    AccountAvgInfoRepository accountAvgInfoRepository;
+
+    @Override
+    public List<Score> score(ScoreParam param) {
+        long start = System.currentTimeMillis();
+        List<Score> scores = new ArrayList<>();
+        String[] contentPools = accountContentPoolConfigService.getContentPools(param.getAccountName());
+        List<AccountAvgInfo> avgInfoList = accountAvgInfoRepository.getAllByGhIdEqualsAndStatusEquals(param.getGhId(), 1);
+        double avgViewCountFirst = accountIndexAvgViewCountService.getAvgReadCountByDB(avgInfoList, param.getGhId(), 1);
+        // 缺省头条均值设置为2w,次条为1w
+        if (avgViewCountFirst < 10) {
+            avgViewCountFirst = 20000D;
+        }
+        for (Content content : param.getContents()) {
+            for (int i = 0; i < contentPools.length; i++) {
+                if (!contentPools[i].equals(content.getContentPoolType())) {
+                    continue;
+                }
+                double avgViewCountPos = accountIndexAvgViewCountService.getAvgReadCountByDB(avgInfoList, param.getGhId(), i + 1);
+                // 缺省头条均值设置为2w,次条为1w
+                if (avgViewCountPos < 10) {
+                    if (i == 0) {
+                        avgViewCountPos = 20000D;
+                    } else if (i == 1) {
+                        avgViewCountPos = 10000D;
+                    } else {
+                        avgViewCountPos = 400D;
+                    }
+                }
+                // 阅读量之和
+                double showViewCountSum = 0D;
+                // 阅读均值置信区间上限之和
+                double readAvgCiUpperSum = 0D;
+                // 头条阅读量之和
+                double showViewCountSumFirst = 0D;
+                // 头条阅读均值置信区间上限之和
+                double readAvgCiUpperSumFirst = 0D;
+                // 次条阅读量之和
+                double showViewCountSumSecond = 0D;
+                // 次条阅读均值置信区间上限之和
+                double readAvgCiUpperSumSecond = 0D;
+                // 最大阅读均值置信区间上限
+                double maxReadAvgCiUpper = 0D;
+                if (CollectionUtils.isEmpty(content.getHisPublishArticleList())) {
+                    continue;
+                }
+                for (ContentHisPublishArticle hisItem : content.getHisPublishArticleList()) {
+                    // 过滤掉发布时间晚于19点数据
+                    if (ScoreStrategy.hisContentLateFilter(hisItem.getPublishTimestamp())) {
+                        continue;
+                    }
+                    if (hisItem.isInnerAccount() && Objects.nonNull(hisItem.getViewCount())
+                            && hisItem.getViewCount() > 0 && Objects.nonNull(hisItem.getReadAvgCiUpper())
+                            && hisItem.getReadAvgCiUpper() > 0) {
+                        maxReadAvgCiUpper = Math.max(maxReadAvgCiUpper, hisItem.getReadAvgCiUpper());
+                        if (hisItem.getItemIndex() == 1) {
+                            showViewCountSumFirst += hisItem.getViewCount();
+                            readAvgCiUpperSumFirst += hisItem.getReadAvgCiUpper();
+                        } else if (hisItem.getItemIndex() == 2) {
+                            if (Objects.nonNull(hisItem.getFirstViewCount()) &&  hisItem.getFirstViewCount() > 0 &&
+                                    Objects.nonNull(hisItem.getFirstViewCountRate()) && hisItem.getFirstViewCountRate() > 0) {
+                                showViewCountSumSecond += hisItem.getViewCount();
+                                if (hisItem.getFirstViewCountRate() > 1) {
+                                    // 对于头条均值倍数大于1的情况,次条均值线性增加,用于debias;
+                                    // TODO: 对于小于1的情况,是否要减去?
+                                    readAvgCiUpperSumSecond += hisItem.getReadAvgCiUpper() * hisItem.getFirstViewCountRate();
+                                } else {
+                                    readAvgCiUpperSumSecond += hisItem.getReadAvgCiUpper();
+                                }
+                            }
+                        } else {
+                            if (Objects.nonNull(hisItem.getFirstViewCount()) && hisItem.getFirstViewCount() > 0
+                                    && Objects.nonNull(hisItem.getFirstViewCountRate()) && hisItem.getFirstViewCountRate() > 0) {
+                                showViewCountSum += hisItem.getViewCount();
+                                if (hisItem.getFirstViewCountRate() > 1) {
+                                    // 对于头条均值倍数大于1的情况,次条均值线性增加,用于debias;
+                                    // TODO: 对于小于1的情况,是否要减去?
+                                    readAvgCiUpperSum += hisItem.getReadAvgCiUpper() * hisItem.getFirstViewCountRate();
+                                } else {
+                                    readAvgCiUpperSum += hisItem.getReadAvgCiUpper();
+                                }
+                            }
+                        }
+                    }
+                }
+                double viewCountRate = 0D; // 设置默认值
+                double bigRateW = 1D;
+                // 头条表现选头条 + 次条
+                if (showViewCountSumFirst > 0) {
+                    showViewCountSum = showViewCountSumFirst + showViewCountSumSecond;
+                    readAvgCiUpperSum = readAvgCiUpperSumFirst + readAvgCiUpperSumSecond;
+                } else if (showViewCountSumSecond > 0) {
+                    showViewCountSum = showViewCountSumSecond;
+                    readAvgCiUpperSum = readAvgCiUpperSumSecond;
+                    // 如果是大号头条,则降权
+                    if (avgViewCountFirst >= 3000 && i == 0) {
+                        bigRateW = 0.001D;
+                    }
+                }
+                // 均值倍数
+                if (readAvgCiUpperSum > 0) {
+                    viewCountRate = showViewCountSum / readAvgCiUpperSum;
+                }
+                // 置信度
+                double viewCountRateW = MathUtils.sigmoid(readAvgCiUpperSum, 0.0002, avgViewCountPos);
+                double viewCountRateScore = 0;
+
+                if (viewCountRate > 0) {
+                    // 最终分数 = 置信度 * 均值倍数
+                    if (viewCountRate > 1 && bigRateW < 1) {
+                        // 如果是大号头条,则降权
+                        viewCountRateScore = viewCountRateW * ((viewCountRate - 1) * bigRateW + 1);
+                    } else {
+                        viewCountRateScore = viewCountRateW * viewCountRate;
+                    }
+                }
+                Score score = new Score();
+                score.setStrategy(this);
+                score.setContentId(content.getId());
+                score.setScore(viewCountRateScore);
+                scores.add(score);
+                break;
+            }
+        }
+        return scores;
+    }
+}

+ 5 - 5
long-article-recommend-service/src/main/resources/application-dev.yml

@@ -15,13 +15,13 @@ spring:
         min-idle: 0
   datasource:
     crawler:
-#      jdbc-url: jdbc:mysql://rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com:3306/piaoquan-crawler?useUnicode=true&characterEncoding=utf-8&zeroDateTimeBehavior=convertToNull&useSSL=false
-#      username: crawler
-#      password: crawler123456@
-      # 测试库
-      jdbc-url: jdbc:mysql://rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com:3306/piaoquan-crawler?useUnicode=true&characterEncoding=utf-8&zeroDateTimeBehavior=convertToNull&useSSL=false
+      jdbc-url: jdbc:mysql://rm-bp1159bu17li9hi94.mysql.rds.aliyuncs.com:3306/piaoquan-crawler?useUnicode=true&characterEncoding=utf-8&zeroDateTimeBehavior=convertToNull&useSSL=false
       username: crawler
       password: crawler123456@
+      # 测试库
+#      jdbc-url: jdbc:mysql://rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com:3306/piaoquan-crawler?useUnicode=true&characterEncoding=utf-8&zeroDateTimeBehavior=convertToNull&useSSL=false
+#      username: crawler
+#      password: crawler123456@
       driver-class-name: com.mysql.jdbc.Driver
       hikari:
         connection-timeout: 30000