wangyunpeng 2 дней назад
Родитель
Сommit
a9279497aa

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/recommend/RankStrategyEnum.java

@@ -22,6 +22,7 @@ public enum RankStrategyEnum {
 
     ArticleRankV15("ArticleRankV15", "ArticleRankV15", "rankV15Strategy"),
     ArticleRankV16("ArticleRankV16", "ArticleRankV16", "rankV16Strategy"),
+    ArticleRankV17("ArticleRankV17", "ArticleRankV17", "rankV17Strategy"),
 
     HIS_JUMP_STRATEGY("ArticleRankHisJump", "历史表现跳过相似度策略", "hisJumpRankStrategy"),
     INFINITE_STRATEGY("ArticleRankInfinite", "无限发表", "infiniteRankStrategy"),

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/recommend/ScoreStrategyEnum.java

@@ -15,6 +15,7 @@ public enum ScoreStrategyEnum {
     SIMILARITY("SimilarityStrategy"),
     VIEW_COUNT_RATE_CORRELATION("ViewCountRateCorrelationStrategy"),
     VIEW_COUNT_RATE("ViewCountRateStrategy"),
+    VIEW_COUNT_RATE_V2("ViewCountRateV2Strategy"),
     VIEW_COUNT("ViewCountStrategy"),
     VIEW_MULTIPLIER("ViewMultiplierStrategy"),
     CRAWLER_DAYS_DECREASE_STRATEGY("CrawlerDaysDecreaseStrategy"),

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/ContentHisPublishArticle.java

@@ -20,6 +20,7 @@ public class ContentHisPublishArticle {
     private Long updateTime;
     private Long publishTimestamp;
     private Integer avgViewCount;
+    private Double readAvgCiUpper;
     private Double viewCountRate;
     private Integer firstViewCount;
     private Double firstViewCountRate;

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/crawler/AccountAvgInfo.java

@@ -46,6 +46,8 @@ public class AccountAvgInfo implements Serializable {
     private Integer businessType; // 1 表示长文,2 表示投流,3 表示企微
     @Column(name = "read_rate_avg")
     private Double readRateAvg;
+    @Column(name = "read_avg_ci_upper")
+    private Double readAvgCiUpper;
 
     @Data
     public static class PK implements Serializable {

+ 172 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/rank/strategy/RankV17Strategy.java

@@ -0,0 +1,172 @@
+package com.tzld.longarticle.recommend.server.service.recommend.rank.strategy;
+
+
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
+import com.tzld.longarticle.recommend.server.common.enums.aigc.PublishPlanInputSourceTypesEnum;
+import com.tzld.longarticle.recommend.server.common.enums.recommend.ScoreStrategyEnum;
+import com.tzld.longarticle.recommend.server.model.dto.Content;
+import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
+import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
+import com.tzld.longarticle.recommend.server.service.recommend.config.AccountContentPoolConfigService;
+import com.tzld.longarticle.recommend.server.service.recommend.config.StrategyIndexScoreWeightService;
+import com.tzld.longarticle.recommend.server.service.recommend.rank.*;
+import com.tzld.longarticle.recommend.server.service.recommend.score.AccountIndexReplacePoolConfig;
+import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreResult;
+import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreService;
+import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.lang3.RandomUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * @author dyp
+ */
+@Service
+@Slf4j
+public class RankV17Strategy implements RankStrategy {
+
+    @Autowired
+    private ScoreService scoreService;
+    @Autowired
+    private AccountContentPoolConfigService accountContentPoolConfigService;
+    @Autowired
+    private ArticleRepository articleRepository;
+    @Autowired
+    private StrategyIndexScoreWeightService weightService;
+
+    @ApolloJsonValue("${touliu.account.ghIds:[\"gh_93e00e187787\", \"gh_ac43e43b253b\", \"gh_68e7fdc09fe4\",\"gh_77f36c109fb1\", \"gh_b181786a6c8c\", \"gh_1ee2e1b39ccf\"]}")
+    private List<String> touliuAccountGhIds;
+    @Value("${topProducePlanId:}")
+    private String topProducePlanId;
+
+    public RankResult rank(RankParam param) {
+        List<Content> result = new ArrayList<>();
+
+        ScoreResult scoreResult = scoreService.score(RankStrategy.convertToScoreParam(param));
+
+        Map<String, Map<String, Double>> scoreMap = scoreResult.getScoreMap();
+        String[] contentPools = accountContentPoolConfigService.getContentPools(param.getAccountName());
+        Map<Integer, AccountIndexReplacePoolConfig> indexReplacePoolConfigMap = accountContentPoolConfigService.getContentReplacePools(param.getAccountName());
+
+        List<RankItem> items = CommonCollectionUtils.toList(param.getContents(), c -> {
+            RankItem item = new RankItem();
+            item.setContent(c);
+            c.setScoreMap(scoreMap.get(c.getId()));
+            item.setScoreMap(scoreMap.get(c.getId()));
+            double score;
+            int index = weightService.getIndex(item.getContent().getContentPoolType(), contentPools);
+            if (contentPools[0].equals(item.getContent().getContentPoolType())
+                    || contentPools[1].equals(item.getContent().getContentPoolType())) {
+                score = item.getScore(ScoreStrategyEnum.SIMILARITY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.SIMILARITY.value())
+                        + item.getScore(ScoreStrategyEnum.CATEGORY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.CATEGORY.value())
+                        + item.getScore(ScoreStrategyEnum.FLOW_CTL_DECREASE.value())
+                        + item.getScore(ScoreStrategyEnum.CRAWLER_DAYS_DECREASE_STRATEGY.value());
+                if (item.getScore(ScoreStrategyEnum.PUBLISH_TIMES.value()) >= 0) {
+                    score += item.getScore(ScoreStrategyEnum.VIEW_COUNT_RATE_V2.value())
+                            * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                            ScoreStrategyEnum.VIEW_COUNT_RATE_V2.value());
+                }
+            } else {
+                score = item.getScore(ScoreStrategyEnum.SIMILARITY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.SIMILARITY.value())
+                        + item.getScore(ScoreStrategyEnum.CATEGORY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.CATEGORY.value())
+                        + item.getScore(ScoreStrategyEnum.ACCOUNT_PRE_DISTRIBUTE.value())
+                        + item.getScore(ScoreStrategyEnum.PUBLISH_TIMES.value())
+                        + item.getScore(ScoreStrategyEnum.CRAWLER_DAYS_DECREASE_STRATEGY.value())
+                        + item.getScore(ScoreStrategyEnum.FLOW_CTL_DECREASE.value());
+            }
+            c.setScore(score);
+            item.setScore(score);
+            return item;
+        });
+        // 相似度评分为0 报警返回
+        List<Article> hisPublishFirstArticleList = articleRepository.getByGhIdAndItemIndexAndTypeEqualsAndStatusEquals(
+                param.getGhId(), 1, param.getType(), 1);
+        if (RankStrategy.SimilarityScoreZero(items, param, hisPublishFirstArticleList)) {
+            return new RankResult(result);
+        }
+        // 安全分降权
+        RankService.safeScoreDecrease(items);
+
+        // 1 排序
+        Collections.sort(items, (o1, o2) -> -Double.compare(o1.getScore(), o2.getScore()));
+        // 2 相似去重
+        List<Content> contents = CommonCollectionUtils.toList(items, RankItem::getContent);
+
+        // 3 文章按照内容池分组
+        Map<String, List<Content>> contentMap = new HashMap<>();
+        for (Content c : contents) {
+            List<Content> data = contentMap.computeIfAbsent(c.getContentPoolType(), k -> new ArrayList<>());
+            data.add(c);
+        }
+        // 4 选文章
+        String[] publishPool = Arrays.copyOf(contentPools, contentPools.length);
+
+        // 头
+        List<Content> pool1 = contentMap.get(contentPools[0]);
+        if (CollectionUtils.isNotEmpty(pool1)) {
+            pool1 = RankService.contentSourceTypeFilter(param.getStrategy(), pool1, 1);
+        }
+        RankService.printSortLog(param.getStrategy(), param.getAccountName(), "头条", pool1);
+        if (CollectionUtils.isNotEmpty(pool1)) {
+            if (topProducePlanId.equals(pool1.get(0).getProducePlanId())) {
+                int i = RandomUtils.nextInt(0, 2);
+                if (i == 0) {
+                    for (Content content : pool1) {
+                        if (!topProducePlanId.equals(content.getProducePlanId())) {
+                            result.add(content);
+                            break;
+                        }
+                    }
+                }
+            }
+            if (CollectionUtils.isEmpty(result)) {
+                result.add(pool1.get(0));
+            }
+        } else {
+            RankStrategy.sendFeishuFirstPoolEmpty(param, contentPools[0]);
+            return new RankResult(result);
+        }
+
+        // 次
+        RankService.commonAddSecondContent(param, result, publishPool, contentPools, contentMap,
+                indexReplacePoolConfigMap, param.getStrategy());
+
+        // 3-8
+        // RankService.commonAdd38Content(param, result, contentPools, contentMap, param.getStrategy());
+        List<Content> pool = contentMap.get(contentPools[2]);
+        if (CollectionUtils.isNotEmpty(pool)) {
+            Integer videoSourceType = PublishPlanInputSourceTypesEnum.longArticleVideoPoolSource.getVal();
+            Queue<Content> videoPoolQueue = pool.stream().filter(o -> Objects.equals(o.getSourceType(), videoSourceType))
+                    .collect(Collectors.toCollection(LinkedList::new));
+            Queue<Content> otherPoolQueue = pool.stream().filter(o -> !Objects.equals(o.getSourceType(), videoSourceType))
+                    .collect(Collectors.toCollection(LinkedList::new));
+            for (int i = 3; i < param.getSize() + 1; i++) {
+                Integer sourceType = RankService.getStrategyPoolSourceType(param.getStrategy(), i);
+                if (Objects.equals(sourceType, videoSourceType) && !videoPoolQueue.isEmpty()) {
+                    result.add(videoPoolQueue.poll());
+                } else if (!otherPoolQueue.isEmpty()) {
+                    result.add(otherPoolQueue.poll());
+                }
+            }
+        }
+
+        RankStrategy.deduplication(result, contentMap, publishPool);
+
+        return new RankResult(result);
+    }
+
+}

+ 4 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java

@@ -593,6 +593,7 @@ public class RecallService implements ApplicationContextAware {
                 article.setArticleDetailInfoList(articleDetailInfoMap.get(hisArticle.getWxSn()));
                 // 设置账号位置阅读均值
                 int avgViewCount = 0;
+                double readAvgCiUpper = 0.0;
                 Map<String, Map<String, AccountAvgInfo>> dateAvgMap = accountAvgInfoIndexMap.get(hisArticle.getGhId());
                 String hisPublishDate = DateUtils.timestampToYMDStr(article.getPublishTimestamp(), "yyyy-MM-dd");
                 if (Objects.nonNull(dateAvgMap)) {
@@ -603,6 +604,8 @@ public class RecallService implements ApplicationContextAware {
                         article.setInnerAccount(true);
                         avgViewCount = Optional.ofNullable(indexMap.get(hisArticle.getItemIndex().toString()).getReadAvg())
                                 .orElse(0.0).intValue();
+                        readAvgCiUpper = Optional.ofNullable(indexMap.get(hisArticle.getItemIndex().toString()).getReadAvgCiUpper())
+                                .orElse(0.0).intValue();
                     } else {
                         if (ArticleTypeEnum.QUNFA.getVal().equals(type)) {
                             log.error("历史表现阅读均值获取失败 ghId:{} accountName:{} date:{} index:{}",
@@ -618,6 +621,7 @@ public class RecallService implements ApplicationContextAware {
                     }
                 }
                 article.setAvgViewCount(avgViewCount);
+                article.setReadAvgCiUpper(readAvgCiUpper);
                 if (Objects.nonNull(article.getAvgViewCount()) && article.getAvgViewCount() > 0
                         && Objects.nonNull(article.getViewCount())) {
                     article.setViewCountRate((article.getViewCount() * 1.0) / article.getAvgViewCount());

+ 3 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/score/ScoreService.java

@@ -128,11 +128,13 @@ public class ScoreService implements ApplicationContextAware {
                 || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV13.getStrategy())
                 || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV14.getStrategy())
                 || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV15.getStrategy())
-                || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV16.getStrategy())) {
+                || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV16.getStrategy())
+                || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV17.getStrategy())) {
             strategies.add(strategyMap.get(ScoreStrategyEnum.CATEGORY.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.ACCOUNT_PRE_DISTRIBUTE.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.FLOW_CTL_DECREASE.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.VIEW_COUNT_RATE.value()));
+            strategies.add(strategyMap.get(ScoreStrategyEnum.VIEW_COUNT_RATE_V2.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.VIEW_COUNT_RATE_CORRELATION.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.PUBLISH_TIMES.value()));
 //            strategies.add(strategyMap.get(ScoreStrategyEnum.HIS_FISSION_FANS_RATE_RATE.value()));

+ 154 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/score/strategy/ViewCountRateV2Strategy.java

@@ -0,0 +1,154 @@
+package com.tzld.longarticle.recommend.server.service.recommend.score.strategy;
+
+import com.tzld.longarticle.recommend.server.model.dto.Content;
+import com.tzld.longarticle.recommend.server.model.dto.ContentHisPublishArticle;
+import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
+import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository;
+import com.tzld.longarticle.recommend.server.service.recommend.config.AccountContentPoolConfigService;
+import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
+import com.tzld.longarticle.recommend.server.service.recommend.score.Score;
+import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreParam;
+import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreStrategy;
+import com.tzld.longarticle.recommend.server.util.MathUtils;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+@Component
+@Slf4j
+public class ViewCountRateV2Strategy implements ScoreStrategy {
+
+    @Autowired
+    AccountIndexAvgViewCountService accountIndexAvgViewCountService;
+    @Autowired
+    AccountContentPoolConfigService accountContentPoolConfigService;
+    @Autowired
+    AccountAvgInfoRepository accountAvgInfoRepository;
+
+    @Override
+    public List<Score> score(ScoreParam param) {
+        long start = System.currentTimeMillis();
+        List<Score> scores = new ArrayList<>();
+        String[] contentPools = accountContentPoolConfigService.getContentPools(param.getAccountName());
+        List<AccountAvgInfo> avgInfoList = accountAvgInfoRepository.getAllByGhIdEqualsAndStatusEquals(param.getGhId(), 1);
+        double avgViewCountFirst = accountIndexAvgViewCountService.getAvgReadCountByDB(avgInfoList, param.getGhId(), 1);
+        double avgViewCountSecond = accountIndexAvgViewCountService.getAvgReadCountByDB(avgInfoList, param.getGhId(), 2);
+        double avgViewCountThird = accountIndexAvgViewCountService.getAvgReadCountByDB(avgInfoList, param.getGhId(), 3);
+        // 缺省头条均值设置为2w,次条为1w
+        if (avgViewCountFirst < 10) {
+            avgViewCountFirst = 20000D;
+            avgViewCountSecond = 10000D;
+            avgViewCountThird = 400D;
+        }
+        for (Content content : param.getContents()) {
+            for (int i = 0; i < contentPools.length; i++) {
+                if (!contentPools[i].equals(content.getContentPoolType())) {
+                    continue;
+                }
+                double avgViewCountPos = accountIndexAvgViewCountService.getAvgReadCountByDB(avgInfoList, param.getGhId(), i + 1);
+                // 缺省头条均值设置为2w,次条为1w
+                if (avgViewCountPos < 10) {
+                    if (i == 0) {
+                        avgViewCountPos = 20000D;
+                    } else if (i == 1) {
+                        avgViewCountPos = 10000D;
+                    } else {
+                        avgViewCountPos = 400D;
+                    }
+                }
+                double showViewCountSum = 0D;
+                double avgViewCountSum = 0D;
+                double showViewCountSumFirst = 0D;
+                double avgViewCountSumFirst = 0D;
+                double showViewCountSumSecond = 0D;
+                double avgViewCountSumSecond = 0D;
+                double maxAvgViewCount = 0D;
+                if (CollectionUtils.isEmpty(content.getHisPublishArticleList())) {
+                    continue;
+                }
+                for (ContentHisPublishArticle hisItem : content.getHisPublishArticleList()) {
+                    // 过滤掉发布时间晚于19点数据
+                    if (ScoreStrategy.hisContentLateFilter(hisItem.getPublishTimestamp())) {
+                        continue;
+                    }
+                    if (hisItem.isInnerAccount() && Objects.nonNull(hisItem.getViewCount())
+                            && hisItem.getViewCount() > 0 && Objects.nonNull(hisItem.getReadAvgCiUpper())
+                            && hisItem.getReadAvgCiUpper() > 0) {
+                        maxAvgViewCount = Math.max(maxAvgViewCount, hisItem.getReadAvgCiUpper());
+                        if (hisItem.getItemIndex() == 1) {
+                            showViewCountSumFirst += hisItem.getViewCount();
+                            avgViewCountSumFirst += hisItem.getReadAvgCiUpper();
+                        } else if (hisItem.getItemIndex() == 2) {
+                            if (Objects.nonNull(hisItem.getFirstViewCount()) &&  hisItem.getFirstViewCount() > 0 &&
+                                    Objects.nonNull(hisItem.getFirstViewCountRate()) && hisItem.getFirstViewCountRate() > 0) {
+                                showViewCountSumSecond += hisItem.getViewCount();
+                                if (hisItem.getFirstViewCountRate() > 1) {
+                                    // 对于头条均值倍数大于1的情况,次条均值线性增加,用于debias;
+                                    // TODO: 对于小于1的情况,是否要减去?
+                                    avgViewCountSumSecond += hisItem.getReadAvgCiUpper() * hisItem.getFirstViewCountRate();
+                                } else {
+                                    avgViewCountSumSecond += hisItem.getReadAvgCiUpper();
+                                }
+                            }
+                        } else {
+                            if (Objects.nonNull(hisItem.getFirstViewCount()) && hisItem.getFirstViewCount() > 0
+                                    && Objects.nonNull(hisItem.getFirstViewCountRate()) && hisItem.getFirstViewCountRate() > 0) {
+                                showViewCountSum += hisItem.getViewCount();
+                                if (hisItem.getFirstViewCountRate() > 1) {
+                                    // 对于头条均值倍数大于1的情况,次条均值线性增加,用于debias;
+                                    // TODO: 对于小于1的情况,是否要减去?
+                                    avgViewCountSum += hisItem.getReadAvgCiUpper() * hisItem.getFirstViewCountRate();
+                                } else {
+                                    avgViewCountSum += hisItem.getReadAvgCiUpper();
+                                }
+                            }
+                        }
+                    }
+                }
+                double viewCountRate = 0D; // 设置默认值
+                double bigRateW = 1D;
+                // 如果有头条反馈数据,优先选取头条反馈数据;
+                if (showViewCountSumFirst > 0) {
+                    showViewCountSum = showViewCountSumFirst;
+                    avgViewCountSum = avgViewCountSumFirst;
+                } else if (showViewCountSumSecond > 0) {
+                    showViewCountSum = showViewCountSumSecond;
+                    avgViewCountSum = avgViewCountSumSecond;
+                    // 如果是大号头条,则降权
+                    if (avgViewCountFirst >= 3000 && i == 0) {
+                        bigRateW = 0.001D;
+                    }
+                }
+                // 均值倍数
+                if (avgViewCountSum > 0) {
+                    viewCountRate = showViewCountSum / avgViewCountSum;
+                }
+                // 置信度
+                double viewCountRateW = MathUtils.sigmoid(avgViewCountSum, 0.0002, avgViewCountPos);
+                double viewCountRateScore = 0;
+
+                if (viewCountRate > 0) {
+                    // 最终分数 = 置信度 * 均值倍数
+                    if (viewCountRate > 1 && bigRateW < 1) {
+                        // 如果是大号头条,则降权
+                        viewCountRateScore = viewCountRateW * ((viewCountRate - 1) * bigRateW + 1);
+                    } else {
+                        viewCountRateScore = viewCountRateW * viewCountRate;
+                    }
+                }
+                Score score = new Score();
+                score.setStrategy(this);
+                score.setContentId(content.getId());
+                score.setScore(viewCountRateScore);
+                scores.add(score);
+                break;
+            }
+        }
+        return scores;
+    }
+}