瀏覽代碼

2025-04-15 冷启动晋级优化

luojunhui 2 月之前
父節點
當前提交
1d2a40f9c7

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java

@@ -32,6 +32,8 @@ public interface LongArticleBaseMapper {
     List<DatastatSortStrategy> getArticlePromotion(Integer viewCount, Double viewCountRate,
                                                    Integer fans, String dateStr, List<Integer> positions);
 
+    List<DatastatSortStrategy> getArticlePromotionV2(Integer fans, String dateStr, List<Integer> positions);
+
     void batchInsertLongArticlesRootSourceId(List<LongArticlesRootSourceId> list);
 
     void batchInsertGetOffVideos(List<GetOffVideos> list);

+ 73 - 2
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticlePromotionService.java

@@ -44,6 +44,7 @@ import com.xxl.job.core.biz.model.ReturnT;
 import com.xxl.job.core.handler.annotation.XxlJob;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.math3.distribution.NormalDistribution;
 import org.springframework.beans.BeanUtils;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Value;
@@ -52,7 +53,9 @@ import org.springframework.util.StringUtils;
 
 import java.net.URLDecoder;
 import java.util.*;
+import java.util.function.Function;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 @Service
 @Slf4j
@@ -94,6 +97,41 @@ public class ArticlePromotionService {
     @Value("${topProducePlanId:}")
     private String topProducePlanId;
 
+    public static boolean abZTestCorrected(double readAvg, int fansA , double viewCount, int fansB, double confidence) {
+        // 检查基础参数合法性
+        if (fansA <= 0 || fansB <= 0) {
+            throw new IllegalArgumentException("样本量必须大于零");
+        }
+        if (confidence <= 0 || confidence >= 1) {
+            throw new IllegalArgumentException("置信水平必须在 (0,1) 区间");
+        }
+
+        // 计算比例 (使用浮点除法)
+        double pA = (double) readAvg / fansA;
+        double pB = (double) viewCount / fansB;
+
+        // 防止除零错误
+        if (pA <= 0) {
+            throw new IllegalArgumentException("A组比例必须大于零");
+        }
+
+        // 计算合并方差
+        double varA = pA * (1 - pA) / fansA;
+        double varB = pB * (1 - pB) / fansB;
+        double se = Math.sqrt(varA + varB);
+
+        // 计算Z阈值 (双尾检验)
+        NormalDistribution normal = new NormalDistribution();
+        double alpha = (1 - confidence) / 2;
+        double zThreshold = normal.inverseCumulativeProbability(1 - alpha);
+
+        // 计算置信区间下限
+        double ciLow = (pB - pA - zThreshold * se) / pA;
+
+        // 判断是否拒绝原假设
+        return ciLow > 0;
+    }
+    
     private final List<String> contentPoolType = Arrays.asList("autoArticlePoolLevel1", "autoArticlePoolLevel3", "autoArticlePoolLevel4");
 
     public void articlePromotion(String pos, String way, String accountNickName, String tag,
@@ -101,8 +139,41 @@ public class ArticlePromotionService {
         String today = DateUtils.getCurrentDateStr("yyyyMMdd");
         String dateStrFilter = DateUtils.getBeforeDaysDateStr("yyyyMMdd", 10);
         // 获取内部表现
-        List<DatastatSortStrategy> list = longArticleBaseMapper.getArticlePromotion(viewCountFilter, viewCountRateFilter,
-                10000, dateStrFilter, positionFilter);
+        List<DatastatSortStrategy> list;
+        if (pos.equals("【2】")) {
+            list = longArticleBaseMapper.getArticlePromotionV2(10000, dateStrFilter, positionFilter);
+
+            // 使用阅读均值倍数+阅读量晋级
+            List<DatastatSortStrategy> listStrategy1 = list.stream()
+                    .filter(o -> o.getReadRate() >= 1.33 && o.getViewCount() >= 100)
+                    .collect(Collectors.toList());
+
+            // 使用显著性检验晋级
+            List<DatastatSortStrategy> listStrategy2 = list.stream()
+                    .filter(o -> abZTestCorrected(
+                            o.getAvgViewCount() * 1.1 * 30, o.getFans() * 30, o.getViewCount(), o.getFans(), 0.95))
+                    .collect(Collectors.toList());
+
+            // merge 两个 list, 通过wx_sn去重
+            // 临时用 Map 存储去重(保留第一个出现的元素)
+            // 冲突时保留已有元素
+            // 从 Map 的 values 中提取结果
+            list = Stream.concat(listStrategy1.stream(), listStrategy2.stream())
+                    // 使用标题作为去重标识
+                    .collect(Collectors.collectingAndThen(
+                            // 临时用 Map 存储去重(保留第一个出现的元素)
+                            Collectors.toMap(
+                                    DatastatSortStrategy::getWxSn,
+                                    Function.identity(),
+                                    (existing, replacement) -> existing // 冲突时保留已有元素
+                            ),
+                            // 从 Map 的 values 中提取结果
+                            map -> new ArrayList<>(map.values())
+                    ));
+        } else {
+            list = longArticleBaseMapper.getArticlePromotion(viewCountFilter, viewCountRateFilter,
+                    10000, dateStrFilter, positionFilter);
+        }
         list = filterEarlyContent(list, true);
         log.info("优质{}文章数量: {}", accountNickName, list.size());
         List<DatastatSortStrategy> distinct = filterSameTitle(list);

+ 13 - 0
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -111,6 +111,19 @@
         </foreach>
     </select>
 
+    <select id="getArticlePromotionV2"
+            resultType="com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatSortStrategy">
+        select *
+        from datastat_sort_strategy
+        where type = 9
+        and fans > #{fans}
+        and date_str > #{dateStr}
+        and position in
+        <foreach collection="positions" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
+
     <insert id="batchInsertLongArticlesRootSourceId">
         INSERT INTO long_articles_root_source_id (root_source_id, account_name, gh_id, article_title, request_time,
         trace_id, push_type, video_id)