瀏覽代碼

历史发布阅读均值倍数低过滤

wangyunpeng 10 月之前
父節點
當前提交
5646d667fd

+ 12 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/ThreadPoolFactory.java

@@ -15,7 +15,7 @@ public final class ThreadPoolFactory {
             32,
             128,
             0L, TimeUnit.SECONDS,
-            new LinkedBlockingQueue<>(10000),
+            new LinkedBlockingQueue<>(1000),
             new ThreadFactoryBuilder().setNameFormat("DEFAULT-%d").build(),
             new ThreadPoolExecutor.AbortPolicy());
     public final static ExecutorService RECALL = new CommonThreadPoolExecutor(
@@ -39,6 +39,13 @@ public final class ThreadPoolFactory {
             new LinkedBlockingQueue<>(1000),
             new ThreadFactoryBuilder().setNameFormat("ScoreService-%d").build(),
             new ThreadPoolExecutor.AbortPolicy());
+    private final static ExecutorService DeDuplicate = new CommonThreadPoolExecutor(
+            128,
+            128,
+            0L, TimeUnit.SECONDS,
+            new LinkedBlockingQueue<>(10000),
+            new ThreadFactoryBuilder().setNameFormat("DeDuplicate-%d").build(),
+            new ThreadPoolExecutor.AbortPolicy());
 
     public static ExecutorService defaultPool() {
         return DEFAULT;
@@ -56,5 +63,9 @@ public final class ThreadPoolFactory {
         return SCORE;
     }
 
+    public static ExecutorService deDuplicatePool() {
+        return DeDuplicate;
+    }
+
 
 }

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/ArticleRepository.java

@@ -16,4 +16,6 @@ public interface ArticleRepository extends JpaRepository<Article, String> {
 
     List<Article> getByGhIdInAndAppMsgIdInAndItemIndex(Set<String> ghIds, Set<String> appMsgIds, Integer itemIndex);
 
+    List<Article> getByGhIdInAndUpdateTimeGreaterThan(Set<String> ghIds, Long updateTime);
+
 }

+ 2 - 4
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/filter/FilterService.java

@@ -3,10 +3,7 @@ package com.tzld.longarticle.recommend.server.service.filter;
 import com.tzld.longarticle.recommend.server.common.ThreadPoolFactory;
 import com.tzld.longarticle.recommend.server.model.Content;
 import com.tzld.longarticle.recommend.server.service.ServiceBeanFactory;
-import com.tzld.longarticle.recommend.server.service.filter.strategy.BadStrategy;
-import com.tzld.longarticle.recommend.server.service.filter.strategy.HistoryTitleForFwhColdStartStrategy;
-import com.tzld.longarticle.recommend.server.service.filter.strategy.HistoryTitleStrategy;
-import com.tzld.longarticle.recommend.server.service.filter.strategy.SensitiveStrategy;
+import com.tzld.longarticle.recommend.server.service.filter.strategy.*;
 import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
 import com.tzld.longarticle.recommend.server.util.JSONUtils;
 import lombok.extern.slf4j.Slf4j;
@@ -102,6 +99,7 @@ public class FilterService {
                 strategies.add(ServiceBeanFactory.getBean(HistoryTitleStrategy.class));
                 strategies.add(ServiceBeanFactory.getBean(BadStrategy.class));
                 strategies.add(ServiceBeanFactory.getBean(SensitiveStrategy.class));
+                strategies.add(ServiceBeanFactory.getBean(LowScoreStrategy.class));
                 break;
         }
         return strategies;

+ 1 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/filter/strategy/HistoryTitleForFwhColdStartStrategy.java

@@ -49,7 +49,7 @@ public class HistoryTitleForFwhColdStartStrategy implements FilterStrategy {
         }
         filterResult.setContentIds(result);
         filterResult.setFilterContent(filterContents);
-        log.info("HistoryTitleStrategy cost:{}", System.currentTimeMillis() - start);
+        log.info("HistoryTitleForFwhColdStartStrategy cost:{}", System.currentTimeMillis() - start);
         return filterResult;
     }
 

+ 1 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/filter/strategy/HistoryTitleStrategy.java

@@ -40,7 +40,7 @@ public class HistoryTitleStrategy implements FilterStrategy {
     private static final List<Integer> firstSecondIndex = Arrays.asList(1, 2);
     private static final List<Integer> allIndex = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8);
 
-    private final ExecutorService pool = ThreadPoolFactory.defaultPool();
+    private final ExecutorService pool = ThreadPoolFactory.deDuplicatePool();
 
     @Override
     public FilterResult filter(FilterParam param) {

+ 15 - 46
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/filter/strategy/LowScoreStrategy.java

@@ -7,18 +7,12 @@ import com.tzld.longarticle.recommend.server.service.AccountIndexAvgViewCountSer
 import com.tzld.longarticle.recommend.server.service.filter.FilterParam;
 import com.tzld.longarticle.recommend.server.service.filter.FilterResult;
 import com.tzld.longarticle.recommend.server.service.filter.FilterStrategy;
-import com.tzld.longarticle.recommend.server.service.score.Score;
-import com.tzld.longarticle.recommend.server.util.MathUtils;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.CollectionUtils;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Component;
 
 import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Objects;
-import java.util.stream.Collectors;
 
 @Component
 @Slf4j
@@ -33,13 +27,8 @@ public class LowScoreStrategy implements FilterStrategy {
     public FilterResult filter(FilterParam param) {
         long start = System.currentTimeMillis();
         FilterResult filterResult = new FilterResult();
-        if (param.getContents().size() <= 1000) {
-            filterResult.setContentIds(param.getContents().stream().map(Content::getId).collect(Collectors.toList()));
-            return filterResult;
-        }
         filterResult.setContentIds(new ArrayList<>());
-        List<Score> scores = new ArrayList<>();
-        Map<String, Content> contentMap = param.getContents().stream().collect(Collectors.toMap(Content::getId, c -> c));
+        filterResult.setFilterContent(new ArrayList<>());
         for (Content content : param.getContents()) {
             String[] contentPools = accountContentPoolConfigService.getContentPools(param.getAccountName());
             if (!contentPools[2].equals(content.getContentPoolType())) {
@@ -50,42 +39,22 @@ public class LowScoreStrategy implements FilterStrategy {
                 filterResult.getContentIds().add(content.getId());
                 continue;
             }
-            double avgViewCountPos = accountIndexAvgViewCountService.getAvgReadCount(param.getGhId(), 3);
-            double showViewCountSum = 0D;
-            double avgViewCountSum = 0D;
-            for (ContentHisPublishArticle hisItem : content.getHisPublishArticleList()) {
-                if (hisItem.isInnerAccount() && Objects.nonNull(hisItem.getViewCount())
-                        && hisItem.getViewCount() > 0 && Objects.nonNull(hisItem.getAvgViewCount())
-                        && hisItem.getAvgViewCount() > 0) {
-                    if (!(hisItem.getItemIndex() == 1) && !(hisItem.getItemIndex() == 2)) {
-                        showViewCountSum += hisItem.getViewCount();
-                        avgViewCountSum += hisItem.getAvgViewCount();
-                    }
-                }
-            }
-            double viewCountRate = 0D; // 设置默认值
-            double minRate = 5D;
-            if (avgViewCountSum > 0) {
-                viewCountRate = showViewCountSum / avgViewCountSum;
+            long publishCount = content.getHisPublishArticleList().stream().filter(ContentHisPublishArticle::isInnerAccount).count();
+            if (publishCount == 0) {
+                continue;
             }
-            double viewCountRateW = MathUtils.sigmoid(avgViewCountSum, 0.0005, avgViewCountPos);
-            double viewCountRateScore = 0;
-            if (viewCountRate > 0) {
-                viewCountRateScore = (Math.min(viewCountRate, minRate) - 1D) * viewCountRateW;
+            int hisViewCount = content.getHisPublishArticleList().stream().filter(ContentHisPublishArticle::isInnerAccount)
+                    .mapToInt(ContentHisPublishArticle::getViewCount).sum();
+            int hisAvgViewCount = content.getHisPublishArticleList().stream().filter(ContentHisPublishArticle::isInnerAccount)
+                    .mapToInt(ContentHisPublishArticle::getAvgViewCount).sum();
+            double rate = (hisViewCount * 1.0) / hisAvgViewCount;
+            if ((publishCount >= 2 && rate < 0.8)
+                    || (publishCount == 1 && rate < 0.5)) {
+                content.setFilterReason("低评分内容");
+                filterResult.getFilterContent().add(content);
+            } else {
+                filterResult.getContentIds().add(content.getId());
             }
-            Score score = new Score();
-            score.setContentId(content.getId());
-            score.setScore(viewCountRateScore);
-            scores.add(score);
-        }
-        scores.sort((o1, o2) -> -(o1.getScore().compareTo(o2.getScore())));
-        filterResult.getContentIds().addAll(scores.subList(0, Math.min(100, scores.size())).stream()
-                .map(Score::getContentId).collect(Collectors.toList()));
-        if (scores.size() > 100) {
-            List<Content> filterContent = scores.subList(100, scores.size() - 1).stream()
-                    .map(o -> contentMap.get(o.getContentId())).collect(Collectors.toList());
-            filterContent.forEach(o -> o.setFilterReason("低评分内容"));
-            filterResult.setFilterContent(filterContent);
         }
         log.info("LowScoreStrategy cost:{}", System.currentTimeMillis() - start);
         return filterResult;

+ 1 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/filter/strategy/SensitiveStrategy.java

@@ -39,7 +39,7 @@ public class SensitiveStrategy implements FilterStrategy {
     @ApolloJsonValue("${UnSafeTitles:[]}")
     private static List<String> UnSafeTitles;
 
-    private final ExecutorService pool = ThreadPoolFactory.defaultPool();
+    private final ExecutorService pool = ThreadPoolFactory.deDuplicatePool();
 
     @Override
     public FilterResult filter(FilterParam param) {

+ 84 - 9
long-article-recommend-service/src/test/java/com/tzld/longarticle/recommend/server/RecommendTest.java

@@ -1,31 +1,106 @@
 package com.tzld.longarticle.recommend.server;
 
+import com.alibaba.fastjson.JSONArray;
 import com.alibaba.fastjson.JSONObject;
+import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository;
+import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository;
+import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
+import com.tzld.longarticle.recommend.server.repository.entity.crawler.AccountAvgInfo;
+import com.tzld.longarticle.recommend.server.repository.entity.crawler.Article;
+import com.tzld.longarticle.recommend.server.repository.entity.crawler.ArticleDetailInfo;
 import com.tzld.longarticle.recommend.server.service.RecommendService;
 import com.tzld.longarticle.recommend.server.service.recall.RecallParam;
 import com.tzld.longarticle.recommend.server.service.recall.RecallResult;
 import com.tzld.longarticle.recommend.server.service.recall.RecallService;
+import org.apache.commons.collections4.CollectionUtils;
 import org.junit.jupiter.api.Test;
 import org.springframework.boot.test.context.SpringBootTest;
 
 import javax.annotation.Resource;
+import java.text.SimpleDateFormat;
+import java.util.*;
+import java.util.stream.Collectors;
 
-@SpringBootTest(classes = RecommendTest.class)
+@SpringBootTest(classes = Application.class)
 public class RecommendTest {
 
     @Resource
     private RecommendService recommendService;
     @Resource
     private RecallService recallService;
+    @Resource
+    private ArticleRepository articleRepository;
+    @Resource
+    private ArticleDetailInfoRepository articleDetailInfoRepository;
+    @Resource
+    private AccountAvgInfoRepository accountAvgInfoRepository;
+
+    @Test
+    void recall() {
+        RecallParam param = new RecallParam();
+        param.setAccountId("20231213123536190184852");
+        param.setPlanId("20240718181730864154902");
+        RecallResult recallResult = recallService.recall(param);
+        System.out.println(JSONObject.toJSONString(recallResult));
+    }
+
+    @Test
+    void exportData() {
+        Set<String> ghIds = new HashSet<>(Arrays.asList("gh_adca24a8f429", "gh_e0eb490115f5", "gh_51e4ad40466d", "gh_95ed5ecf9363"));
+        List<Article> articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThan(ghIds, 1722441600L);
 
+        Map<String, Map<Integer, List<Article>>> map = articleList.stream()
+                .collect(Collectors.groupingBy(Article::getTitle, Collectors.groupingBy(Article::getItemIndex)));
+        Set<String> snList = articleList.stream().map(Article::getWxSn).collect(Collectors.toSet());
+        List<ArticleDetailInfo> articleDetailInfoList = articleDetailInfoRepository.getAllByWxSnIn(snList);
+        Map<String, List<ArticleDetailInfo>> articleDetailInfoMap = articleDetailInfoList.stream()
+                .collect(Collectors.groupingBy(ArticleDetailInfo::getWxSn));
 
-//    @Test
-//    void recall() {
-//        RecallParam param = new RecallParam();
-//        param.setAccountId("20231213123536190184852");
-//        param.setPlanId("20240718181730864154902");
-//        RecallResult recallResult = recallService.recall(param);
-//        System.out.println(JSONObject.toJSONString(recallResult));
-//    }
+        List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdIn(ghIds);
+        Map<String, Map<String, AccountAvgInfo>> accountAvgInfoIndexMap = accountAvgInfoList.stream().collect(
+                Collectors.groupingBy(AccountAvgInfo::getGhId, Collectors.toMap(AccountAvgInfo::getPosition, o -> o)));
+        JSONArray jsonArray = new JSONArray();
+        for (Article article : articleList) {
+            List<ArticleDetailInfo> articleDetailInfos = articleDetailInfoMap.get(article.getWxSn());
+            if (CollectionUtils.isEmpty(articleDetailInfos)) {
+                continue;
+            }
+            Date minDate = articleDetailInfos.stream().map(ArticleDetailInfo::getRecallDt).min(Date::compareTo).orElse(new Date());
+            int firstLevel = 0;
+            int sumFission0 = 0;
+            int sumFission1 = 0;
+            int sumFission2 = 0;
+            for (ArticleDetailInfo articleDetailInfo : articleDetailInfos) {
+                if (articleDetailInfo.getRecallDt().equals(minDate)) {
+                    firstLevel += Optional.ofNullable(articleDetailInfo.getFirstLevel()).orElse(0);
+                    sumFission0 += Optional.ofNullable(articleDetailInfo.getFission0()).orElse(0);
+                    sumFission1 += Optional.ofNullable(articleDetailInfo.getFission1()).orElse(0);
+                    sumFission2 += Optional.ofNullable(articleDetailInfo.getFission2()).orElse(0);
+                }
+            }
+            Map<String, AccountAvgInfo> accountAvgInfoMap = accountAvgInfoIndexMap.get(article.getGhId());
+            AccountAvgInfo avgInfo = accountAvgInfoMap.get(article.getItemIndex().toString());
+            SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
+            String date = sdf.format(new Date(article.getUpdateTime() * 1000));
+            JSONObject obj = new JSONObject();
+            obj.put("ghId", article.getGhId());
+            obj.put("accountName", article.getAccountName());
+            obj.put("title", article.getTitle());
+            obj.put("index", article.getItemIndex());
+            obj.put("viewCount", article.getShowViewCount());
+            obj.put("time", date);
+            if (Objects.nonNull(avgInfo)) {
+                obj.put("fans", avgInfo.getFans());
+                obj.put("avgViewCount", avgInfo.getReadAvg());
+                obj.put("viewCountRate", (article.getShowViewCount() * 1.0) / avgInfo.getReadAvg());
+            }
+            obj.put("firstLevel", sumFission0);
+            obj.put("fission0", sumFission0);
+            obj.put("fission1", sumFission1);
+            obj.put("fission2", sumFission2);
+            jsonArray.add(obj);
+        }
+        System.out.println(jsonArray.toJSONString());
+    }
 
 }