Bladeren bron

Merge branch '2025-04-15-luojunhui-cold-start-level-up-improve' of Server/long-article-recommend into master

luojunhui 1 week geleden
bovenliggende
commit
a0820b3e0e

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java

@@ -32,6 +32,8 @@ public interface LongArticleBaseMapper {
     List<DatastatSortStrategy> getArticlePromotion(Integer viewCount, Double viewCountRate,
                                                    Integer fans, String dateStr, List<Integer> positions);
 
+    List<DatastatSortStrategy> getArticlePromotionCandidates(Integer fans, String dateStr, List<Integer> positions);
+
     void batchInsertLongArticlesRootSourceId(List<LongArticlesRootSourceId> list);
 
     void batchInsertGetOffVideos(List<GetOffVideos> list);

+ 73 - 3
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticlePromotionService.java

@@ -44,6 +44,7 @@ import com.xxl.job.core.biz.model.ReturnT;
 import com.xxl.job.core.handler.annotation.XxlJob;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.math3.distribution.NormalDistribution;
 import org.springframework.beans.BeanUtils;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Value;
@@ -52,7 +53,9 @@ import org.springframework.util.StringUtils;
 
 import java.net.URLDecoder;
 import java.util.*;
+import java.util.function.Function;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 @Service
 @Slf4j
@@ -88,7 +91,6 @@ public class ArticlePromotionService {
     AigcBaseMapper aigcBaseMapper;
     @Autowired
     VideoTitleReWriteRepository videoTitleReWriteRepository;
-
     @ApolloJsonValue("${articlePromotionProduceConfig:{}}")
     private Map<String, Map<String, Map<String, String>>> produceConfig;
     @Value("${topProducePlanId:}")
@@ -96,13 +98,81 @@ public class ArticlePromotionService {
 
     private final List<String> contentPoolType = Arrays.asList("autoArticlePoolLevel1", "autoArticlePoolLevel3", "autoArticlePoolLevel4");
 
+    /**
+     *
+     * @param readAvg:对照组分子
+     * @param fansA:对照组分母
+     * @param viewCount: 实验组分子
+     * @param fansB: 实验组分母
+     * @param confidence: 置信度
+     * @return boolean: true / false
+     */
+    public static boolean isExperimentGroupStatisticallySuperior(double readAvg, int fansA, double viewCount, int fansB, double confidence) {
+
+        if (fansA <= 0 || fansB <= 0) {
+            throw new IllegalArgumentException("样本量必须大于零");
+        }
+        if (confidence <= 0 || confidence >= 1) {
+            throw new IllegalArgumentException("置信水平必须在 (0,1) 区间");
+        }
+        double pA = readAvg / fansA;
+        double pB = viewCount / fansB;
+
+        if (pA <= 0) {
+            throw new IllegalArgumentException("A组比例必须大于零");
+        }
+        double varA = pA * (1 - pA) / fansA;
+        double varB = pB * (1 - pB) / fansB;
+        double se = Math.sqrt(varA + varB);
+
+        NormalDistribution normal = new NormalDistribution();
+        double alpha = (1 - confidence) / 2;
+        double zThreshold = normal.inverseCumulativeProbability(1 - alpha);
+        double ciLow = (pB - pA - zThreshold * se) / pA;
+        return ciLow > 0;
+    }
+
     public void articlePromotion(String pos, String way, String accountNickName, String tag,
                                  Integer viewCountFilter, Double viewCountRateFilter, List<Integer> positionFilter) {
         String today = DateUtils.getCurrentDateStr("yyyyMMdd");
         String dateStrFilter = DateUtils.getBeforeDaysDateStr("yyyyMMdd", 10);
         // 获取内部表现
-        List<DatastatSortStrategy> list = longArticleBaseMapper.getArticlePromotion(viewCountFilter, viewCountRateFilter,
-                10000, dateStrFilter, positionFilter);
+        List<DatastatSortStrategy> list;
+        if (pos.equals("【2】")) {
+            list = longArticleBaseMapper.getArticlePromotionCandidates(10000, dateStrFilter, positionFilter);
+
+            // 使用阅读均值倍数+阅读量晋级
+            List<DatastatSortStrategy> listStrategy1 = list.stream()
+                    .filter(o -> o.getReadRate() >= 1.33 && o.getViewCount() >= 100)
+                    .collect(Collectors.toList());
+
+            // 使用显著性检验晋级
+            List<DatastatSortStrategy> listStrategy2 = list.stream()
+                    .filter(o -> {
+                        try {
+                            return isExperimentGroupStatisticallySuperior(o.getAvgViewCount() * 1.1 * 30, o.getFans() * 30,
+                                    o.getViewCount(), o.getFans(), 0.95);
+                        }
+                        catch (Exception e) {
+                            log.error("显著性检验, 出现异常: {}", e.getMessage());
+                            return false;
+                        }
+                    })
+                    .collect(Collectors.toList());
+
+            list = Stream.concat(listStrategy1.stream(), listStrategy2.stream())
+                    .collect(Collectors.collectingAndThen(
+                            Collectors.toMap(
+                                    DatastatSortStrategy::getWxSn,
+                                    Function.identity(),
+                                    (existing, replacement) -> existing
+                            ),
+                            map -> new ArrayList<>(map.values())
+                    ));
+        } else {
+            list = longArticleBaseMapper.getArticlePromotion(viewCountFilter, viewCountRateFilter,
+                    10000, dateStrFilter, positionFilter);
+        }
         list = filterEarlyContent(list, true);
         log.info("优质{}文章数量: {}", accountNickName, list.size());
         List<DatastatSortStrategy> distinct = filterSameTitle(list);

+ 13 - 0
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -111,6 +111,19 @@
         </foreach>
     </select>
 
+    <select id="getArticlePromotionCandidates"
+            resultType="com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatSortStrategy">
+        select *
+        from datastat_sort_strategy
+        where type = 9
+        and fans > #{fans}
+        and date_str > #{dateStr}
+        and position in
+        <foreach collection="positions" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
+
     <insert id="batchInsertLongArticlesRootSourceId">
         INSERT INTO long_articles_root_source_id (root_source_id, account_name, gh_id, article_title, request_time,
         trace_id, push_type, video_id)