소스 검색

冷启层内容 增加发布权重

wangyunpeng 10 달 전
부모
커밋
b27338e007

+ 15 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/PublishSortLogRepository.java

@@ -0,0 +1,15 @@
+package com.tzld.longarticle.recommend.server.repository.crawler;
+
+import com.tzld.longarticle.recommend.server.repository.entity.crawler.PublishSortLog;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+
+@Repository
+public interface PublishSortLogRepository extends JpaRepository<PublishSortLog, Long> {
+
+    List<PublishSortLog> findByDateStr(String dateStr);
+
+    List<PublishSortLog> findByCrawlerChannelContentIdIn(List<String> crawlerChannelContentIds);
+}

+ 40 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/entity/crawler/PublishSortLog.java

@@ -0,0 +1,40 @@
+package com.tzld.longarticle.recommend.server.repository.entity.crawler;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.persistence.*;
+
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+@Entity
+@Table(name = "publish_sort_log")
+public class PublishSortLog {
+
+    @Id
+    @GeneratedValue(strategy = GenerationType.IDENTITY)
+    private Long id;
+    @Column(name = "date_str")
+    private String dateStr;
+    @Column(name = "gh_id")
+    private String ghId;
+    @Column(name = "account_name")
+    private String accountName;
+    @Column(name = "crawler_channel_content_id")
+    private String crawlerChannelContentId;
+    @Column(name = "title")
+    private String title;
+    @Column(name = "`index`")
+    private Integer index;
+    @Column(name = "index_avg_count")
+    private Double indexAvgCount;
+    @Column(name = "category")
+    private String category;
+    @Column(name = "strategy")
+    private String strategy;
+    @Column(name = "create_timestamp")
+    private Long createTimestamp;
+
+}

+ 33 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/AccountIndexAvgViewCountService.java

@@ -0,0 +1,33 @@
+package com.tzld.longarticle.recommend.server.service;
+
+import cn.hutool.core.io.resource.ResourceUtil;
+import com.alibaba.fastjson.JSONObject;
+import com.tzld.longarticle.recommend.server.service.score.AvgReadDTO;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Component;
+
+import javax.annotation.PostConstruct;
+
+@Component
+@Slf4j
+public class AccountIndexAvgViewCountService {
+
+    private JSONObject accountInfo;
+
+    @PostConstruct
+    public void init() {
+        String cardJSON = ResourceUtil.readUtf8Str("file/AccountInfo.json");
+        accountInfo = JSONObject.parseObject(cardJSON);
+    }
+
+
+    public AvgReadDTO getAvgReadDto(String key) {
+        return accountInfo.getObject(key, AvgReadDTO.class);
+    }
+
+    public double getAvgReadCount(String ghId, Integer index) {
+        AvgReadDTO dto =getAvgReadDto(ghId + "_" + index);
+        return dto == null ? 1.0 : dto.getReadAvg();
+    }
+
+}

+ 26 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/RecommendService.java

@@ -4,13 +4,16 @@ import com.alibaba.fastjson.JSONObject;
 import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
 import com.tzld.longarticle.recommend.server.model.*;
 import com.tzld.longarticle.recommend.server.repository.crawler.PublishContentSortLogRepository;
+import com.tzld.longarticle.recommend.server.repository.crawler.PublishSortLogRepository;
 import com.tzld.longarticle.recommend.server.repository.entity.crawler.PublishContentSortLog;
+import com.tzld.longarticle.recommend.server.repository.entity.crawler.PublishSortLog;
 import com.tzld.longarticle.recommend.server.service.rank.RankParam;
 import com.tzld.longarticle.recommend.server.service.rank.RankResult;
 import com.tzld.longarticle.recommend.server.service.rank.RankService;
 import com.tzld.longarticle.recommend.server.service.recall.RecallParam;
 import com.tzld.longarticle.recommend.server.service.recall.RecallResult;
 import com.tzld.longarticle.recommend.server.service.recall.RecallService;
+import com.tzld.longarticle.recommend.server.util.DateUtils;
 import com.tzld.longarticle.recommend.server.util.JSONUtils;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.CollectionUtils;
@@ -39,6 +42,10 @@ public class RecommendService {
     private RankService rankService;
     @Autowired
     private PublishContentSortLogRepository publishContentSortLogRepository;
+    @Autowired
+    private PublishSortLogRepository publishSortLogRepository;
+    @Autowired
+    AccountIndexAvgViewCountService accountIndexAvgViewCountService;
 
     @ApolloJsonValue("${accountStrategyConfig:{}}")
     private Map<String, String> accountStrategyConfigMap;
@@ -152,6 +159,25 @@ public class RecommendService {
         log.setPublishContentId(JSONObject.toJSONString(publishContentIds));
         log.setCreateTimestamp(System.currentTimeMillis());
         publishContentSortLogRepository.save(log);
+        // 仅记录3-8条 冷启层内容
+        List<PublishSortLog> publishSortLogSaveList = new ArrayList<>();
+        for (int i = 3; i < rankResult.getContents().size() + 1; i++) {
+            Content content = rankResult.getContents().get(i - 1);
+
+            PublishSortLog sortLog = new PublishSortLog();
+            sortLog.setDateStr(DateUtils.getCurrentDateStr("yyyy-MM-dd"));
+            sortLog.setGhId(param.getGhId());
+            sortLog.setAccountName(param.getAccountName());
+            sortLog.setCrawlerChannelContentId(content.getCrawlerChannelContentId());
+            sortLog.setTitle(content.getCrawlerTitle());
+            sortLog.setIndex(i);
+            sortLog.setIndexAvgCount(accountIndexAvgViewCountService.getAvgReadCount(param.getGhId(), i));
+            sortLog.setCategory(content.getCategory());
+            sortLog.setStrategy(param.getStrategy());
+            sortLog.setCreateTimestamp(System.currentTimeMillis());
+            publishSortLogSaveList.add(sortLog);
+        }
+        publishSortLogRepository.saveAll(publishSortLogSaveList);
     }
 
 }

+ 3 - 5
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/rank/strategy/RankV3Strategy.java

@@ -10,10 +10,7 @@ import com.tzld.longarticle.recommend.server.service.rank.RankStrategy;
 import com.tzld.longarticle.recommend.server.service.score.ScoreParam;
 import com.tzld.longarticle.recommend.server.service.score.ScoreResult;
 import com.tzld.longarticle.recommend.server.service.score.ScoreService;
-import com.tzld.longarticle.recommend.server.service.score.strategy.AccountPreDistributeStrategy;
-import com.tzld.longarticle.recommend.server.service.score.strategy.CategoryStrategy;
-import com.tzld.longarticle.recommend.server.service.score.strategy.SimilarityStrategy;
-import com.tzld.longarticle.recommend.server.service.score.strategy.ViewMultiplierStrategy;
+import com.tzld.longarticle.recommend.server.service.score.strategy.*;
 import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
 import com.tzld.longarticle.recommend.server.util.JSONUtils;
 import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
@@ -52,7 +49,8 @@ public class RankV3Strategy implements RankStrategy {
             double score = 2 * item.getScore(SimilarityStrategy.class.getSimpleName())
                     + item.getScore(ViewMultiplierStrategy.class.getSimpleName())
                     + item.getScore(CategoryStrategy.class.getSimpleName())
-                    + item.getScore(AccountPreDistributeStrategy.class.getSimpleName());
+                    + item.getScore(AccountPreDistributeStrategy.class.getSimpleName())
+                    + item.getScore(ColdStartDecreaseStrategy.class.getSimpleName());
             item.setScore(score);
             return item;
         });

+ 6 - 6
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recall/RecallService.java

@@ -1,7 +1,5 @@
 package com.tzld.longarticle.recommend.server.service.recall;
 
-import cn.hutool.core.io.resource.ResourceUtil;
-import com.alibaba.fastjson.JSONObject;
 import com.tzld.longarticle.recommend.server.common.ThreadPoolFactory;
 import com.tzld.longarticle.recommend.server.model.Content;
 import com.tzld.longarticle.recommend.server.model.ContentHisPublishArticle;
@@ -10,6 +8,7 @@ import com.tzld.longarticle.recommend.server.repository.aigc.CrawlerMetaArticleR
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
 import com.tzld.longarticle.recommend.server.repository.entity.aigc.CrawlerMetaArticle;
 import com.tzld.longarticle.recommend.server.repository.entity.crawler.Article;
+import com.tzld.longarticle.recommend.server.service.AccountIndexAvgViewCountService;
 import com.tzld.longarticle.recommend.server.service.recall.strategy.ColdStartBackupRecallStrategy;
 import com.tzld.longarticle.recommend.server.service.recall.strategy.DefaultRecallStrategy;
 import com.tzld.longarticle.recommend.server.service.score.AvgReadDTO;
@@ -45,12 +44,15 @@ public class RecallService implements ApplicationContextAware {
     @Autowired
     CrawlerMetaArticleRepository crawlerMetaArticleRepository;
     @Autowired
-    private AIGCRemoteService aigcRemoteService;
+    AIGCRemoteService aigcRemoteService;
+    @Autowired
+    AccountIndexAvgViewCountService accountIndexAvgViewCountService;
 
     private final Map<String, RecallStrategy> strategyMap = new HashMap<>();
     private ApplicationContext applicationContext;
     private final ExecutorService pool = ThreadPoolFactory.recallPool();
 
+
     @PostConstruct
     public void init() {
         Map<String, RecallStrategy> type = applicationContext.getBeansOfType(RecallStrategy.class);
@@ -176,8 +178,6 @@ public class RecallService implements ApplicationContextAware {
 
     public void setTitleAvgViewCount(List<Content> contentList) {
         long start = System.currentTimeMillis();
-        String cardJSON = ResourceUtil.readUtf8Str("file/AccountInfo.json");
-        JSONObject jsonObject = JSONObject.parseObject(cardJSON);
 
         List<String> titleList = contentList.stream().map(Content::getTitle).collect(Collectors.toList());
         List<String> crawlerTitleList = contentList.stream().map(Content::getCrawlerTitle).collect(Collectors.toList());
@@ -214,7 +214,7 @@ public class RecallService implements ApplicationContextAware {
             for (Article hisArticle : hisArticles) {
                 ContentHisPublishArticle article = new ContentHisPublishArticle();
                 BeanUtils.copyProperties(hisArticle, article);
-                AvgReadDTO dto = jsonObject.getObject(hisArticle.getGhId() + "_" + hisArticle.getItemIndex(), AvgReadDTO.class);
+                AvgReadDTO dto = accountIndexAvgViewCountService.getAvgReadDto(hisArticle.getGhId() + "_" + hisArticle.getItemIndex());
                 int avgViewCount = 0;
                 if (Objects.nonNull(dto)) {
                     article.setInnerAccount(true);

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/score/ScoreService.java

@@ -90,6 +90,7 @@ public class ScoreService implements ApplicationContextAware {
             strategies.add(strategyMap.get(ViewMultiplierStrategy.class.getSimpleName()));
             strategies.add(strategyMap.get(CategoryStrategy.class.getSimpleName()));
             strategies.add(strategyMap.get(AccountPreDistributeStrategy.class.getSimpleName()));
+            strategies.add(strategyMap.get(ColdStartDecreaseStrategy.class.getSimpleName()));
         }
 
         return strategies;

+ 96 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/score/strategy/ColdStartDecreaseStrategy.java

@@ -0,0 +1,96 @@
+package com.tzld.longarticle.recommend.server.service.score.strategy;
+
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
+import com.tzld.longarticle.recommend.server.model.Content;
+import com.tzld.longarticle.recommend.server.repository.crawler.PublishSortLogRepository;
+import com.tzld.longarticle.recommend.server.repository.entity.crawler.PublishSortLog;
+import com.tzld.longarticle.recommend.server.service.AccountContentPoolConfigService;
+import com.tzld.longarticle.recommend.server.service.score.Score;
+import com.tzld.longarticle.recommend.server.service.score.ScoreParam;
+import com.tzld.longarticle.recommend.server.service.score.ScoreStrategy;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.stereotype.Component;
+import org.springframework.util.CollectionUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+
+@Component
+@Slf4j
+public class ColdStartDecreaseStrategy implements ScoreStrategy {
+
+    @Autowired
+    private AccountContentPoolConfigService accountContentPoolConfigService;
+    @Autowired
+    private PublishSortLogRepository publishSortLogRepository;
+
+    // 账号score减少
+    @ApolloJsonValue("${accountColdStartScoreDecreaseConfig:{\"default\":-100}}")
+    private Map<String, Integer> accountColdStartScoreDecreaseMap;
+
+    // 计算阅读均值base
+    @ApolloJsonValue("${totalAvgReadCountBase:{\"default\":100}}")
+    private Map<String, Integer> totalAvgReadCountBaseMap;
+
+    @Override
+    public List<Score> score(ScoreParam param) {
+        List<Score> scores = new ArrayList<>();
+        if (CollectionUtils.isEmpty(param.getContents())) {
+            return scores;
+        }
+        String[] contentPools = accountContentPoolConfigService.getContentPools(param.getAccountName());
+        List<String> crawlerChannelContentIds = param.getContents().stream().map(Content::getCrawlerChannelContentId).collect(Collectors.toList());
+        // 获取历史已发布内容
+        List<PublishSortLog> hisPublishContentList = publishSortLogRepository.findByCrawlerChannelContentIdIn(crawlerChannelContentIds);
+        Map<String, List<PublishSortLog>> hisPublishedContentMap = hisPublishContentList.stream().collect(Collectors.groupingBy(PublishSortLog::getCrawlerChannelContentId));
+        for (Content content : param.getContents()) {
+            // 仅判断3-8条 冷启层
+            if (!contentPools[2].equals(content.getContentPoolType())) {
+                continue;
+            }
+            Score score = new Score();
+            score.setStrategy(this);
+            score.setContentId(content.getId());
+            Integer scoreVal = getContentScore(param.getAccountName(), hisPublishedContentMap, content.getCrawlerChannelContentId());
+            if (scoreVal != 0) {
+                score.setScore(scoreVal);
+                scores.add(score);
+            }
+        }
+
+        return scores;
+    }
+
+    private Integer getContentScore(String accountName,
+                                    Map<String, List<PublishSortLog>> hisPublishedContentMap,
+                                    String crawlerChannelContentId) {
+        Integer weight = getColdStartScoreDecreaseWeight(accountName);
+        Integer totalAvgReadCountBase = getAvgReadCountBase(accountName);
+        if (hisPublishedContentMap.containsKey(crawlerChannelContentId)) {
+            List<PublishSortLog> publishContents = hisPublishedContentMap.get(crawlerChannelContentId);
+            double sumViewCount = publishContents.stream().mapToDouble(PublishSortLog::getIndexAvgCount).sum();
+            if (sumViewCount > totalAvgReadCountBase) {
+                return weight;
+            }
+        }
+        return 0;
+    }
+
+    public Integer getColdStartScoreDecreaseWeight(String accountName) {
+        if (accountColdStartScoreDecreaseMap.containsKey(accountName)) {
+            return accountColdStartScoreDecreaseMap.get(accountName);
+        }
+        return accountColdStartScoreDecreaseMap.get("default");
+    }
+
+    public Integer getAvgReadCountBase(String accountName) {
+        if (totalAvgReadCountBaseMap.containsKey(accountName)) {
+            return totalAvgReadCountBaseMap.get(accountName);
+        }
+        return totalAvgReadCountBaseMap.get("default");
+    }
+}

+ 7 - 17
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/score/strategy/ViewMultiplierStrategy.java

@@ -1,20 +1,20 @@
 package com.tzld.longarticle.recommend.server.service.score.strategy;
 
-import cn.hutool.core.io.resource.ResourceUtil;
-import com.alibaba.fastjson.JSONObject;
 import com.tzld.longarticle.recommend.server.model.Content;
 import com.tzld.longarticle.recommend.server.repository.adplatform.ChangwenArticleDatastatRepository;
 import com.tzld.longarticle.recommend.server.repository.adplatform.ChangwenArticleRepository;
 import com.tzld.longarticle.recommend.server.repository.entity.adplatform.ChangwenArticle;
 import com.tzld.longarticle.recommend.server.repository.entity.adplatform.ChangwenArticleDatastat;
-import com.tzld.longarticle.recommend.server.service.score.*;
+import com.tzld.longarticle.recommend.server.service.AccountIndexAvgViewCountService;
+import com.tzld.longarticle.recommend.server.service.score.Score;
+import com.tzld.longarticle.recommend.server.service.score.ScoreParam;
+import com.tzld.longarticle.recommend.server.service.score.ScoreStrategy;
 import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
 import com.tzld.longarticle.recommend.server.util.NormalizationUtils;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Component;
 import org.springframework.util.CollectionUtils;
 
-import javax.annotation.PostConstruct;
 import java.util.*;
 import java.util.stream.Collectors;
 
@@ -28,14 +28,8 @@ public class ViewMultiplierStrategy implements ScoreStrategy {
     ChangwenArticleRepository changwenArticleRepository;
     @Autowired
     ChangwenArticleDatastatRepository changwenArticleDatastatRepository;
-
-    private JSONObject jsonObject;
-
-    @PostConstruct
-    public void init() {
-        String cardJSON = ResourceUtil.readUtf8Str("file/AccountInfo.json");
-        jsonObject = JSONObject.parseObject(cardJSON);
-    }
+    @Autowired
+    AccountIndexAvgViewCountService accountIndexAvgViewCountService;
 
     @Override
     public List<Score> score(ScoreParam param) {
@@ -54,7 +48,7 @@ public class ViewMultiplierStrategy implements ScoreStrategy {
             ChangwenArticleDatastat datastat = changwenArticleDatastatDTOMap.get(c.getCrawlerChannelContentId());
 
             if (Objects.nonNull(article) && Objects.nonNull(datastat)) {
-                double avgReadCount = getAvgReadCount(article.getAccountId(), article.getItemIndex());
+                double avgReadCount = accountIndexAvgViewCountService.getAvgReadCount(article.getAccountId(), article.getItemIndex());
                 Integer readCount = datastat.getReadCount();
                 score.setScore(NormalizationUtils.min(Math.max(readCount, 0) / avgReadCount - 1));
             } else {
@@ -88,8 +82,4 @@ public class ViewMultiplierStrategy implements ScoreStrategy {
         return result.stream().collect(Collectors.toMap(ChangwenArticleDatastat::getArticleId, o -> o));
     }
 
-    private double getAvgReadCount(String ghId, Integer index) {
-        AvgReadDTO dto = jsonObject.getObject(ghId + "_" + index, AvgReadDTO.class);
-        return dto == null ? 1.0 : dto.getReadAvg();
-    }
 }