Browse Source

Merge branch 'wyp/1122-hisArticleCategoryFilter' of Server/long-article-recommend into master

wangyunpeng 7 months ago
parent
commit
f0618309f1

+ 4 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/longArticle/AccountCategoryRepository.java

@@ -4,9 +4,13 @@ import com.tzld.longarticle.recommend.server.model.entity.longArticle.AccountCat
 import org.springframework.data.jpa.repository.JpaRepository;
 import org.springframework.stereotype.Repository;
 
+import java.util.List;
+
 @Repository
 public interface AccountCategoryRepository extends JpaRepository<AccountCategory, AccountCategory.PK> {
 
     AccountCategory getByGhIdAndStatus(String ghId, Integer status);
 
+    List<AccountCategory> getByStatus(Integer status);
+
 }

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/XxlJobService.java

@@ -197,6 +197,7 @@ public class XxlJobService {
             if (count > 0) {
                 return;
             }
+            saveList = saveList.stream().filter(item -> StringUtils.hasText(item.getRootSourceId())).collect(Collectors.toList());
             List<String> rootSourceIds = saveList.stream().map(LongArticlesRootSourceId::getRootSourceId).distinct().collect(Collectors.toList());
             List<LongArticlesRootSourceId> existList = longArticlesRootSourceIdRepository.getByRootSourceIdIn(rootSourceIds);
             for (LongArticlesRootSourceId existItem : existList) {

+ 23 - 20
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java

@@ -1,7 +1,9 @@
 package com.tzld.longarticle.recommend.server.service.recommend.recall;
 
+import com.alibaba.fastjson.JSONObject;
 import com.google.common.collect.Lists;
 import com.tzld.longarticle.recommend.server.common.ThreadPoolFactory;
+import com.tzld.longarticle.recommend.server.common.enums.StatusEnum;
 import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleTypeEnum;
 import com.tzld.longarticle.recommend.server.common.enums.recommend.FeishuRobotIdEnum;
 import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
@@ -12,6 +14,7 @@ import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountCorrelation;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.AccountCategory;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
 import com.tzld.longarticle.recommend.server.remote.aigc.AIGCWaitingPublishContentService;
@@ -20,6 +23,7 @@ import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRe
 import com.tzld.longarticle.recommend.server.repository.crawler.AccountCorrelationRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
+import com.tzld.longarticle.recommend.server.repository.longArticle.AccountCategoryRepository;
 import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCategoryRepository;
 import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
 import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
@@ -39,6 +43,7 @@ import org.springframework.beans.factory.annotation.Value;
 import org.springframework.context.ApplicationContext;
 import org.springframework.context.ApplicationContextAware;
 import org.springframework.stereotype.Service;
+import org.springframework.util.StringUtils;
 
 import javax.annotation.PostConstruct;
 import java.util.*;
@@ -76,6 +81,8 @@ public class RecallService implements ApplicationContextAware {
     ArticlePoolPromotionSourceRepository articlePoolPromotionSourceRepository;
     @Autowired
     ArticleCategoryRepository articleCategoryRepository;
+    @Autowired
+    AccountCategoryRepository accountCategoryRepository;
 
     private final Map<String, RecallStrategy> strategyMap = new HashMap<>();
     private ApplicationContext applicationContext;
@@ -159,36 +166,18 @@ public class RecallService implements ApplicationContextAware {
                             + "账号名称: " + param.getAccountName());
             return content;
         }
-        // 标题历史均值
-        setTitleAvgViewCount(content, param.getGhId(), param.getType());
         // category 查询
         setContentCategory(content);
+        // 标题历史均值
+        setTitleAvgViewCount(content, param.getGhId(), param.getType());
         return content;
     }
 
     public void setContentCategory(List<Content> contentList) {
-//        Map<String, String> articleMd5Map = new HashMap<>();
-//        List<String> md5List = new ArrayList<>();
-//        for (Content content : contentList) {
-//            String md5 = generateArticleUniqueMd5(content.getCrawlerLink());
-//            md5List.add(md5);
-//            articleMd5Map.put(content.getId(), md5);
-//        }
-//        List<CrawlerMetaArticle> categoryList = getByUniqueIndexIn(md5List);
-//        if (CollectionUtils.isEmpty(categoryList)) {
-//            return;
-//        }
-//        Map<String, List<String>> categoryMap = categoryList.stream().collect(Collectors.groupingBy(CrawlerMetaArticle::getUniqueIndex,
-//                Collectors.mapping(CrawlerMetaArticle::getCategory, Collectors.toList())));
-//        for (Content content : contentList) {
-//            String md5 = articleMd5Map.get(content.getId());
-//            content.setCategory(categoryMap.get(md5));
-//        }
         List<String> channelContentIds = contentList.stream().map(Content::getCrawlerChannelContentId).collect(Collectors.toList());
         // 查询晋升rootProduceContentId
         List<ArticlePoolPromotionSource> sourceList = articlePoolPromotionSourceRepository.getByChannelContentIdIn(channelContentIds);
         Map<String, ArticlePoolPromotionSource> sourceMap = sourceList.stream().collect(Collectors.toMap(ArticlePoolPromotionSource::getChannelContentId, Function.identity()));
-        List<String> produceContentIds = sourceMap.values().stream().map(ArticlePoolPromotionSource::getRootProduceContentId).collect(Collectors.toList());
         // 根据produceContentId查询category
         List<ArticleCategory> articleCategoryList = articleCategoryRepository.findAll();
         Map<String, ArticleCategory> categoryMap = articleCategoryList.stream().collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
@@ -291,6 +280,9 @@ public class RecallService implements ApplicationContextAware {
                 .filter(o -> "1".equals(o.getPosition()))
                 .filter(o -> Objects.nonNull(o.getReadAvg()) && o.getReadAvg() > 0 && o.getFans() > 1000)
                 .collect(Collectors.groupingBy(AccountAvgInfo::getGhId, Collectors.toMap(AccountAvgInfo::getUpdateTime, o -> o)));
+        List<AccountCategory> accountCategoryList = accountCategoryRepository.getByStatus(StatusEnum.ONE.getCode());
+        Map<String, JSONObject> accountCategoryMap = accountCategoryList.stream().filter(o -> StringUtils.hasText(o.getCategoryMap()))
+                .collect(Collectors.toMap(AccountCategory::getGhId, o -> JSONObject.parseObject(o.getCategoryMap())));
         for (Content content : contentList) {
             List<Article> hisArticles = new ArrayList<>();
             Map<Integer, List<Article>> indexArticleMap = map.get(content.getTitle());
@@ -321,6 +313,17 @@ public class RecallService implements ApplicationContextAware {
                 if (ScoreStrategy.hisContentLateFilter(hisArticle.getPublishTimestamp())) {
                     continue;
                 }
+                // 历史表现 文章品类如果与历史发布账号负相关 则过滤,不计算该历史发布表现
+                JSONObject categoryWeightMap = accountCategoryMap.get(hisArticle.getGhId());
+                if (Objects.nonNull(categoryWeightMap) && CollectionUtils.isNotEmpty(content.getCategory())) {
+                    String category = content.getCategory().get(0);
+                    if (categoryWeightMap.containsKey(category)) {
+                        double weight = categoryWeightMap.getDoubleValue(category);
+                        if (weight < 0) {
+                            continue;
+                        }
+                    }
+                }
                 ContentHisPublishArticle article = new ContentHisPublishArticle();
                 BeanUtils.copyProperties(hisArticle, article);
                 article.setViewCount(hisArticle.getShowViewCount());