Explorar o código

Merge branch 'wyp/20260211-I2IRecommendStrategy' of Server/long-article-recommend into master

wangyunpeng hai 3 semanas
pai
achega
1f495c79a6

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/recommend/RankStrategyEnum.java

@@ -25,6 +25,7 @@ public enum RankStrategyEnum {
     ArticleRankV17("ArticleRankV17", "ArticleRankV17", "rankV17Strategy"),
     ArticleRankV18("ArticleRankV18", "ArticleRankV18", "rankV18Strategy"),
     ArticleRankV19("ArticleRankV19", "ArticleRankV19", "rankV19Strategy"),
+    ArticleRankV20("ArticleRankV20", "ArticleRankV20", "rankV20Strategy"),
 
     HIS_JUMP_STRATEGY("ArticleRankHisJump", "历史表现跳过相似度策略", "hisJumpRankStrategy"),
     INFINITE_STRATEGY("ArticleRankInfinite", "无限发表", "infiniteRankStrategy"),

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/recommend/ScoreStrategyEnum.java

@@ -20,6 +20,7 @@ public enum ScoreStrategyEnum {
     VIEW_COUNT("ViewCountStrategy"),
     VIEW_MULTIPLIER("ViewMultiplierStrategy"),
     CRAWLER_DAYS_DECREASE_STRATEGY("CrawlerDaysDecreaseStrategy"),
+    I2I_RECOMMEND_STRATEGY("I2IRecommendStrategy"),
     ;
 
     private final String value;

+ 44 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/I2IRecommend.java

@@ -0,0 +1,44 @@
+package com.tzld.longarticle.recommend.server.model.entity.longArticle;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.persistence.*;
+
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+@Entity
+@Table(name = "i2i_recommend")
+public class I2IRecommend {
+
+    @Id
+    @GeneratedValue(strategy = GenerationType.IDENTITY)
+    private Integer id;
+
+    @Column(name = "account_name")
+    private String accountName;
+
+    @Column(name = "gh_id")
+    private String ghId;
+
+    @Column(name = "recommend_title")
+    private String recommendTitle;
+
+    @Column(name = "recommend_score")
+    private Double recommendScore;
+
+    @Column(name = "base_cnt")
+    private Integer baseCnt;
+
+    @Column(name = "collinear_cnt")
+    private Integer collinearCnt;
+
+    @Column(name = "status")
+    private Integer status;
+
+    @Column(name = "version")
+    private String version;
+
+}

+ 14 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/longArticle/I2IRecommendRepository.java

@@ -0,0 +1,14 @@
+package com.tzld.longarticle.recommend.server.repository.longArticle;
+
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.I2IRecommend;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+
+@Repository
+public interface I2IRecommendRepository extends JpaRepository<I2IRecommend, Integer> {
+
+    List<I2IRecommend> findByGhIdAndStatus(String ghId, Integer status);
+
+}

+ 205 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/rank/strategy/RankV20Strategy.java

@@ -0,0 +1,205 @@
+package com.tzld.longarticle.recommend.server.service.recommend.rank.strategy;
+
+
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
+import com.tzld.longarticle.recommend.server.common.enums.aigc.PublishPlanInputSourceTypesEnum;
+import com.tzld.longarticle.recommend.server.common.enums.recommend.ScoreStrategyEnum;
+import com.tzld.longarticle.recommend.server.model.dto.Content;
+import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
+import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
+import com.tzld.longarticle.recommend.server.service.recommend.config.AccountContentPoolConfigService;
+import com.tzld.longarticle.recommend.server.service.recommend.config.StrategyIndexScoreWeightService;
+import com.tzld.longarticle.recommend.server.service.recommend.rank.*;
+import com.tzld.longarticle.recommend.server.service.recommend.score.AccountIndexReplacePoolConfig;
+import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreResult;
+import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreService;
+import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.lang3.RandomUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+@Service
+@Slf4j
+public class RankV20Strategy implements RankStrategy {
+
+    @Autowired
+    private ScoreService scoreService;
+    @Autowired
+    private RankService rankService;
+    @Autowired
+    private AccountContentPoolConfigService accountContentPoolConfigService;
+    @Autowired
+    private ArticleRepository articleRepository;
+    @Autowired
+    private StrategyIndexScoreWeightService weightService;
+
+    @ApolloJsonValue("${touliu.account.ghIds:[\"gh_93e00e187787\", \"gh_ac43e43b253b\", \"gh_68e7fdc09fe4\",\"gh_77f36c109fb1\", \"gh_b181786a6c8c\", \"gh_1ee2e1b39ccf\"]}")
+    private List<String> touliuAccountGhIds;
+    @Value("${topProducePlanId:}")
+    private String topProducePlanId;
+    @Value("${hisFissionOpenRateMissingBaseline:0.32}")
+    private double hisFissionOpenRateMissingBaseline;
+
+    public RankResult rank(RankParam param) {
+        List<Content> result = new ArrayList<>();
+
+        ScoreResult scoreResult = scoreService.score(RankStrategy.convertToScoreParam(param));
+
+        Map<String, Map<String, Double>> scoreMap = scoreResult.getScoreMap();
+        String[] contentPools = accountContentPoolConfigService.getContentPools(param.getAccountName());
+        Map<Integer, AccountIndexReplacePoolConfig> indexReplacePoolConfigMap = accountContentPoolConfigService.getContentReplacePools(param.getAccountName());
+
+        List<RankItem> items = CommonCollectionUtils.toList(param.getContents(), c -> {
+            RankItem item = new RankItem();
+            item.setContent(c);
+            c.setScoreMap(scoreMap.get(c.getId()));
+            item.setScoreMap(scoreMap.get(c.getId()));
+            double score;
+            int index = weightService.getIndex(item.getContent().getContentPoolType(), contentPools);
+            if (contentPools[0].equals(item.getContent().getContentPoolType())) {
+                score = item.getScore(ScoreStrategyEnum.SIMILARITY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.SIMILARITY.value())
+                        + item.getScore(ScoreStrategyEnum.CATEGORY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.CATEGORY.value())
+                        + item.getScore(ScoreStrategyEnum.HIS_FISSION_OPEN_RATE.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.HIS_FISSION_OPEN_RATE.value())
+                        + item.getScore(ScoreStrategyEnum.I2I_RECOMMEND_STRATEGY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.I2I_RECOMMEND_STRATEGY.value())
+                        + item.getScore(ScoreStrategyEnum.FLOW_CTL_DECREASE.value())
+                        + item.getScore(ScoreStrategyEnum.CRAWLER_DAYS_DECREASE_STRATEGY.value());
+                if (item.getScore(ScoreStrategyEnum.PUBLISH_TIMES.value()) >= 0) {
+                    score += item.getScore(ScoreStrategyEnum.VIEW_COUNT_RATE.value())
+                            * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                            ScoreStrategyEnum.VIEW_COUNT_RATE.value());
+                }
+                if (item.getScore(ScoreStrategyEnum.HIS_FISSION_OPEN_RATE.value()) == 0) {
+                    score += hisFissionOpenRateMissingBaseline * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                            ScoreStrategyEnum.HIS_FISSION_OPEN_RATE.value());
+                }
+            } else if (contentPools[1].equals(item.getContent().getContentPoolType())) {
+                score = item.getScore(ScoreStrategyEnum.SIMILARITY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.SIMILARITY.value())
+                        + item.getScore(ScoreStrategyEnum.CATEGORY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.CATEGORY.value())
+                        + item.getScore(ScoreStrategyEnum.HIS_FISSION_OPEN_RATE.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.HIS_FISSION_OPEN_RATE.value())
+                        + item.getScore(ScoreStrategyEnum.FLOW_CTL_DECREASE.value())
+                        + item.getScore(ScoreStrategyEnum.CRAWLER_DAYS_DECREASE_STRATEGY.value());
+                if (item.getScore(ScoreStrategyEnum.PUBLISH_TIMES.value()) >= 0) {
+                    score += item.getScore(ScoreStrategyEnum.VIEW_COUNT_RATE.value())
+                            * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                            ScoreStrategyEnum.VIEW_COUNT_RATE.value());
+                }
+                if (item.getScore(ScoreStrategyEnum.HIS_FISSION_OPEN_RATE.value()) == 0) {
+                    score += hisFissionOpenRateMissingBaseline * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                            ScoreStrategyEnum.HIS_FISSION_OPEN_RATE.value());
+                }
+            } else {
+                score = item.getScore(ScoreStrategyEnum.SIMILARITY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.SIMILARITY.value())
+                        + item.getScore(ScoreStrategyEnum.CATEGORY.value())
+                        * weightService.getWeight(param.getStrategy(), param.getGhId(), index,
+                        ScoreStrategyEnum.CATEGORY.value())
+                        + item.getScore(ScoreStrategyEnum.ACCOUNT_PRE_DISTRIBUTE.value())
+                        + item.getScore(ScoreStrategyEnum.PUBLISH_TIMES.value())
+                        + item.getScore(ScoreStrategyEnum.CRAWLER_DAYS_DECREASE_STRATEGY.value())
+                        + item.getScore(ScoreStrategyEnum.FLOW_CTL_DECREASE.value());
+            }
+            c.setScore(score);
+            item.setScore(score);
+            return item;
+        });
+        // 相似度评分为0 报警返回
+        List<Article> hisPublishFirstArticleList = articleRepository.getByGhIdAndItemIndexAndTypeEqualsAndStatusEquals(
+                param.getGhId(), 1, param.getType(), 1);
+        if (RankStrategy.SimilarityScoreZero(items, param, hisPublishFirstArticleList)) {
+            return new RankResult(result);
+        }
+        // 安全分降权
+        RankService.safeScoreDecrease(items);
+
+        // 1 排序
+        Collections.sort(items, (o1, o2) -> -Double.compare(o1.getScore(), o2.getScore()));
+        // 2 相似去重
+        List<Content> contents = CommonCollectionUtils.toList(items, RankItem::getContent);
+
+        // 3 文章按照内容池分组
+        Map<String, List<Content>> contentMap = new HashMap<>();
+        for (Content c : contents) {
+            List<Content> data = contentMap.computeIfAbsent(c.getContentPoolType(), k -> new ArrayList<>());
+            data.add(c);
+        }
+        // 4 选文章
+        String[] publishPool = Arrays.copyOf(contentPools, contentPools.length);
+
+        // 头
+        List<Content> pool1 = contentMap.get(contentPools[0]);
+        if (CollectionUtils.isNotEmpty(pool1)) {
+            pool1 = RankService.contentSourceTypeFilter(param.getStrategy(), pool1, 1);
+        }
+        RankService.printSortLog(param.getStrategy(), param.getAccountName(), "头条", pool1);
+        if (CollectionUtils.isNotEmpty(pool1)) {
+            if (topProducePlanId.equals(pool1.get(0).getProducePlanId())) {
+                int i = RandomUtils.nextInt(0, 2);
+                if (i == 0) {
+                    for (Content content : pool1) {
+                        if (!topProducePlanId.equals(content.getProducePlanId())) {
+                            result.add(content);
+                            break;
+                        }
+                    }
+                }
+            }
+            if (CollectionUtils.isEmpty(result)) {
+                result.add(pool1.get(0));
+            }
+        } else {
+            RankStrategy.sendFeishuFirstPoolEmpty(param, contentPools[0]);
+            return new RankResult(result);
+        }
+
+        // 次
+        RankService.commonAddSecondContent(param, result, publishPool, contentPools, contentMap,
+                indexReplacePoolConfigMap, param.getStrategy());
+
+        // 3-8
+        // RankService.commonAdd38Content(param, result, contentPools, contentMap, param.getStrategy());
+        List<Content> pool = contentMap.get(contentPools[2]);
+        RankService.printSortLog(param.getStrategy(), param.getAccountName(), "3-8", pool);
+        if (CollectionUtils.isNotEmpty(pool)) {
+            Integer videoSourceType = PublishPlanInputSourceTypesEnum.longArticleVideoPoolSource.getVal();
+            Queue<Content> videoPoolQueue = pool.stream().filter(o -> Objects.equals(o.getSourceType(), videoSourceType))
+                    .collect(Collectors.toCollection(LinkedList::new));
+            Queue<Content> otherPoolQueue = pool.stream().filter(o -> !Objects.equals(o.getSourceType(), videoSourceType))
+                    .collect(Collectors.toCollection(LinkedList::new));
+            for (int i = 3; i < param.getSize() + 1; i++) {
+                Integer sourceType = RankService.getStrategyPoolSourceType(param.getStrategy(), i);
+                if (Objects.equals(sourceType, videoSourceType) && !videoPoolQueue.isEmpty()) {
+                    result.add(videoPoolQueue.poll());
+                } else if (!otherPoolQueue.isEmpty()) {
+                    result.add(otherPoolQueue.poll());
+                }
+            }
+        }
+
+        rankService.checkPublishContentStatus(result, contentMap, publishPool);
+        RankStrategy.deduplication(result, contentMap, publishPool);
+
+        return new RankResult(result);
+    }
+
+}

+ 11 - 7
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/score/ScoreService.java

@@ -131,22 +131,26 @@ public class ScoreService implements ApplicationContextAware {
                 || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV16.getStrategy())
                 || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV17.getStrategy())
                 || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV18.getStrategy())
-                || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV19.getStrategy())) {
+                || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV19.getStrategy())
+                || StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV20.getStrategy())) {
             strategies.add(strategyMap.get(ScoreStrategyEnum.CATEGORY.value()));
-            strategies.add(strategyMap.get(ScoreStrategyEnum.ACCOUNT_PRE_DISTRIBUTE.value()));
+            //strategies.add(strategyMap.get(ScoreStrategyEnum.ACCOUNT_PRE_DISTRIBUTE.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.FLOW_CTL_DECREASE.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.VIEW_COUNT_RATE.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.VIEW_COUNT_RATE_V2.value()));
-            strategies.add(strategyMap.get(ScoreStrategyEnum.VIEW_COUNT_RATE_CORRELATION.value()));
+            //strategies.add(strategyMap.get(ScoreStrategyEnum.VIEW_COUNT_RATE_CORRELATION.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.PUBLISH_TIMES.value()));
 //            strategies.add(strategyMap.get(ScoreStrategyEnum.HIS_FISSION_FANS_RATE_RATE.value()));
 //            strategies.add(strategyMap.get(ScoreStrategyEnum.HIS_FISSION_FANS_SUM_RATE.value()));
-            strategies.add(strategyMap.get(ScoreStrategyEnum.HIS_FISSION_AVG_READ_RATE_RATE.value()));
-            strategies.add(strategyMap.get(ScoreStrategyEnum.HIS_FISSION_AVG_READ_RATE_CORRELATION_RATE.value()));
-            strategies.add(strategyMap.get(ScoreStrategyEnum.HIS_FISSION_AVG_READ_SUM_RATE.value()));
-            strategies.add(strategyMap.get(ScoreStrategyEnum.HIS_FISSION_DE_WEIGHT_AVG_READ_SUM_RATE.value()));
+//            strategies.add(strategyMap.get(ScoreStrategyEnum.HIS_FISSION_AVG_READ_RATE_RATE.value()));
+//            strategies.add(strategyMap.get(ScoreStrategyEnum.HIS_FISSION_AVG_READ_RATE_CORRELATION_RATE.value()));
+//            strategies.add(strategyMap.get(ScoreStrategyEnum.HIS_FISSION_AVG_READ_SUM_RATE.value()));
+//            strategies.add(strategyMap.get(ScoreStrategyEnum.HIS_FISSION_DE_WEIGHT_AVG_READ_SUM_RATE.value()));
             strategies.add(strategyMap.get(ScoreStrategyEnum.HIS_FISSION_OPEN_RATE.value()));
         }
+        if (StringUtils.equals(param.getStrategy(), RankStrategyEnum.ArticleRankV20.getStrategy())) {
+            strategies.add(strategyMap.get(ScoreStrategyEnum.I2I_RECOMMEND_STRATEGY.value()));
+        }
 
         return strategies;
     }

+ 74 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/score/strategy/I2IRecommendStrategy.java

@@ -0,0 +1,74 @@
+package com.tzld.longarticle.recommend.server.service.recommend.score.strategy;
+
+import com.tzld.longarticle.recommend.server.common.enums.recommend.ContentPoolEnum;
+import com.tzld.longarticle.recommend.server.model.dto.Content;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.I2IRecommend;
+import com.tzld.longarticle.recommend.server.repository.longArticle.I2IRecommendRepository;
+import com.tzld.longarticle.recommend.server.service.recommend.score.Score;
+import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreParam;
+import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreStrategy;
+import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * @author dyp
+ */
+@Component
+@Slf4j
+public class I2IRecommendStrategy implements ScoreStrategy {
+
+    @Autowired
+    private I2IRecommendRepository i2iRecommendRepository;
+
+    @Override
+    public List<Score> score(ScoreParam param) {
+        List<Score> scores = new ArrayList<>();
+        List<Content> contents = param.getContents();
+        List<I2IRecommend> i2IRecommendList = i2iRecommendRepository.findByGhIdAndStatus(param.getGhId(), 1);
+        if (CollectionUtils.isEmpty(i2IRecommendList)) {
+            return scores;
+        }
+        for (Content content : contents) {
+            Score score = new Score();
+            score.setStrategy(this);
+            score.setContentId(content.getId());
+            // 仅对头条内容生效
+            if (ContentPoolEnum.autoArticlePoolLevel1.getContentPool().equals(content.getContentPoolType())) {
+                for (I2IRecommend i2IRecommend : i2IRecommendList) {
+                    if (TitleSimilarCheckUtil.isSimilar(content.getTitle(), i2IRecommend.getRecommendTitle(),
+                            TitleSimilarCheckUtil.ARTICLE_PROMOTION_THRESHOLD)) {
+                        Double recommendScore = i2IRecommend.getRecommendScore();
+                        // ① log 压缩(解决你的极端长尾)
+                        double eps = 1e-6;
+                        double logScore = Math.log(recommendScore + eps);
+                        // ② 用你真实分布的 log 区间归一化
+                        double logMin = Math.log(0.0001);
+                        double logMax = Math.log(0.18);
+                        double normalized = (logScore - logMin) / (logMax - logMin);
+                        normalized = Math.max(0.0, Math.min(1.0, normalized));
+                        // ③ 指数拉开头部
+                        double alpha = 3.5;
+                        double expScore = (Math.exp(alpha * normalized) - 1.0) / (Math.exp(alpha) - 1.0);
+                        // ④ 幂次爆破
+                        double stretched = Math.pow(expScore, 2.2);
+                        // ⑤ 映射到 0~2
+                        double amplifiedScore = stretched * 2.0;
+                        double weightedScore = 2.0 + amplifiedScore;
+
+                        score.setScore(weightedScore);
+                        scores.add(score);
+                        break;
+                    }
+                }
+            }
+        }
+        return scores;
+    }
+
+}