Explorar o código

长文增加表现

luojunhui hai 2 días
pai
achega
4a97c42654

+ 22 - 0
core/src/main/java/com/tzld/videoVector/dao/mapper/pgVector/ext/ArticleQualityMapperExt.java

@@ -0,0 +1,22 @@
+package com.tzld.videoVector.dao.mapper.pgVector.ext;
+
+import com.tzld.videoVector.model.po.pgVector.ArticleQuality;
+import org.apache.ibatis.annotations.Param;
+
+import java.util.List;
+
+/**
+ * article_quality 自定义 Mapper
+ */
+public interface ArticleQualityMapperExt {
+
+    /**
+     * 批量 upsert(ON CONFLICT DO UPDATE)
+     */
+    int batchUpsert(@Param("list") List<ArticleQuality> list);
+
+    /**
+     * 按 contentId 批量查询质量分(取最新 dt)
+     */
+    List<ArticleQuality> selectByContentIds(@Param("contentIds") List<String> contentIds);
+}

+ 352 - 0
core/src/main/java/com/tzld/videoVector/job/ArticleQualitySyncJob.java

@@ -0,0 +1,352 @@
+package com.tzld.videoVector.job;
+
+import com.alibaba.fastjson.JSON;
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+import com.tzld.videoVector.dao.mapper.pgVector.ext.ArticleQualityMapperExt;
+import com.tzld.videoVector.model.po.pgVector.ArticleQuality;
+import com.tzld.videoVector.util.ArticleQualityCalculator;
+import com.tzld.videoVector.util.OdpsUtil;
+import com.xxl.job.core.biz.model.ReturnT;
+import com.xxl.job.core.handler.annotation.XxlJob;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.stereotype.Component;
+
+import javax.annotation.Resource;
+import java.time.LocalDate;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 文章质量评分同步 Job
+ * 从 ODPS loghubods.article_title_his_cache 拉取发布表现数据,
+ * 解析 his_publish_article_list JSON,聚合计算综合质量分,写入 pgVector article_quality 表。
+ */
+@Component
+public class ArticleQualitySyncJob {
+
+    private static final Logger log = LoggerFactory.getLogger(ArticleQualitySyncJob.class);
+
+    private static final DateTimeFormatter DT_FMT = DateTimeFormatter.ofPattern("yyyyMMdd");
+    private static final int DB_BATCH_SIZE = 200;
+
+    @Resource
+    private ArticleQualityMapperExt articleQualityMapperExt;
+
+    /**
+     * 同步文章质量评分
+     *
+     * 可选参数(逗号分隔):
+     * wRead=0.4      — 阅读维度权重,默认 0.4
+     * wOpen=0.3      — 打开率维度权重,默认 0.3
+     * wFission=0.3   — 裂变率维度权重,默认 0.3
+     * confidenceThreshold=3 — 置信度发文次数阈值,默认 3
+     * dryRun=true    — 仅打印不写库
+     * dt=20260607    — ODPS 分区日期,默认昨天
+     * maxRows=10000  — 最多读取行数,默认不限(全量)
+     */
+    @XxlJob("articleQualityJob")
+    public ReturnT<String> articleQualityJob(String param) {
+        log.info("===== articleQualityJob 开始, param: {} =====", param);
+
+        double wRead = parseParamDouble(param, "wRead", 0.4);
+        double wOpen = parseParamDouble(param, "wOpen", 0.3);
+        double wFission = parseParamDouble(param, "wFission", 0.3);
+        int confidenceThreshold = (int) parseParamDouble(param, "confidenceThreshold", 3);
+        boolean dryRun = parseParamBool(param, "dryRun");
+        String odpsDt = parseParamString(param, "dt", LocalDate.now().minusDays(1).format(DT_FMT));
+        int maxRows = (int) parseParamDouble(param, "maxRows", 0);
+
+        String dt = LocalDate.now().format(DT_FMT);
+        log.info("权重: r={} o={} f={}, 置信度阈值: {}, ODPS分区dt={}, 写入dt={}",
+                wRead, wOpen, wFission, confidenceThreshold, odpsDt, dt);
+
+        // Step 0: 探针——确认分区有数据
+        String probeSql = "SELECT COUNT(*) AS cnt FROM loghubods.article_title_his_cache WHERE dt = '" + odpsDt + "' AND type = '9'";
+        log.info("探针 SQL: {}", probeSql);
+        try {
+            long[] rowCount = {0};
+            OdpsUtil.getOdpsDataStream(probeSql, record -> {
+                rowCount[0] = record.getBigint("cnt") != null ? record.getBigint("cnt") : 0;
+            });
+            log.info("探针: dt={} 总行数={}", odpsDt, rowCount[0]);
+            if (rowCount[0] == 0) {
+                log.warn("分区 dt={} 无数据,请确认分区值是否正确", odpsDt);
+                return ReturnT.FAIL;
+            }
+        } catch (Exception e) {
+            log.error("探针查询失败: {}", e.getMessage(), e);
+            return ReturnT.FAIL;
+        }
+
+        // Step 1: 从 ODPS 一次流式读取(OdpsUtil 底层已是流式,不会 OOM)
+        List<ArticleQuality> rawList = new ArrayList<>();
+        long[] totalRows = {0};
+        long[] parseFailCount = {0};
+        long[] emptyDataCount = {0};
+        long[] validCount = {0};
+
+        String sql = maxRows > 0
+                ? "SELECT source_id, his_publish_article_list "
+                    + "FROM loghubods.article_title_his_cache "
+                    + "WHERE dt = '" + odpsDt + "' "
+                    + "AND type = '9' "
+                    + "AND his_publish_article_list IS NOT NULL "
+                    + "LIMIT " + maxRows
+                : "SELECT source_id, his_publish_article_list "
+                    + "FROM loghubods.article_title_his_cache "
+                    + "WHERE dt = '" + odpsDt + "' "
+                    + "AND type = '9' "
+                    + "AND his_publish_article_list IS NOT NULL";
+
+        log.info("ODPS SQL: {}", sql);
+        try {
+            OdpsUtil.getOdpsDataStream(sql, record -> {
+                String contentId = record.getString("source_id");
+                String hisPublishStr = record.getString("his_publish_article_list");
+
+                if (contentId == null || contentId.isEmpty()) return;
+                if (hisPublishStr == null || hisPublishStr.isEmpty()) return;
+
+                // 采样前 3 条原始数据
+                if (totalRows[0] < 3) {
+                    String preview = hisPublishStr.length() > 500
+                            ? hisPublishStr.substring(0, 500) + "..."
+                            : hisPublishStr;
+                    System.out.println("[采样" + totalRows[0] + "] contentId=" + contentId + ", json=" + preview);
+                }
+
+                ArticleQuality aq = aggregateFromHisPublishList(contentId, hisPublishStr);
+                if (aq != null) {
+                    synchronized (rawList) { rawList.add(aq); }
+                    validCount[0]++;
+                } else {
+                    // 区分解析失败 vs 空数据
+                    if (isJsonParseFail(hisPublishStr)) {
+                        parseFailCount[0]++;
+                        if (parseFailCount[0] <= 5) {
+                            String preview = hisPublishStr.length() > 300
+                                    ? hisPublishStr.substring(0, 300) + "..."
+                                    : hisPublishStr;
+                            System.out.println("[解析失败#" + parseFailCount[0] + "] contentId=" + contentId + ", json=" + preview);
+                        }
+                    } else {
+                        emptyDataCount[0]++;
+                    }
+                }
+                totalRows[0]++;
+                if (totalRows[0] % 10000 == 0) {
+                    System.out.println("[进度] " + totalRows[0] + " 行, 有效=" + validCount[0] + ", 解析失败=" + parseFailCount[0] + ", 无数据=" + emptyDataCount[0]);
+                }
+            });
+        } catch (Exception e) {
+            log.error("ODPS 查询异常: {}", e.getMessage(), e);
+            return ReturnT.FAIL;
+        }
+
+        System.out.println("[完成] 总行数=" + totalRows[0] + ", 有效=" + validCount[0] + ", 解析失败=" + parseFailCount[0] + ", 无数据=" + emptyDataCount[0]);
+        if (rawList.isEmpty()) {
+            log.warn("无有效文章表现数据");
+            return ReturnT.FAIL;
+        }
+
+        // Step 2: 计算质量分
+        ArticleQualityCalculator.calculateAll(rawList, wRead, wOpen, wFission, confidenceThreshold);
+
+        if (dryRun) {
+            log.info("===== DRY RUN 模式, 不写入DB =====");
+            printTopBottom(rawList);
+            return ReturnT.SUCCESS;
+        }
+
+        // Step 2.5: 按 contentId 去重(ODPS 同 contentId 可能多行)
+        Map<String, ArticleQuality> deduped = new LinkedHashMap<>();
+        for (ArticleQuality aq : rawList) {
+            deduped.putIfAbsent(aq.getContentId(), aq);
+        }
+        List<ArticleQuality> list = new ArrayList<>(deduped.values());
+        System.out.println("[去重] " + rawList.size() + " → " + list.size() + " 条");
+
+        // Step 3: 分批写入
+        System.out.println("[写入DB] 开始, 共 " + list.size() + " 条, dt=" + dt);
+        // 采样第一条数据
+        if (!list.isEmpty()) {
+            ArticleQuality sample = list.get(0);
+            System.out.println("[写入DB 采样] contentId=" + sample.getContentId()
+                    + ", qualityScore=" + round2(sample.getQualityScore())
+                    + ", dt=" + dt);
+        }
+        int totalUpserted = 0;
+        for (int i = 0; i < list.size(); i += DB_BATCH_SIZE) {
+            int end = Math.min(i + DB_BATCH_SIZE, list.size());
+            List<ArticleQuality> batch = list.subList(i, end);
+            // 设置 dt
+            for (ArticleQuality aq : batch) {
+                aq.setDt(dt);
+            }
+            int n = articleQualityMapperExt.batchUpsert(batch);
+            totalUpserted += n;
+            if ((i / DB_BATCH_SIZE) % 50 == 0) {
+                System.out.println("[写入DB 进度] " + end + "/" + rawList.size() + ", 本批" + n + "条, 累计" + totalUpserted + "条");
+            }
+        }
+        System.out.println("[写入DB 完成] upserted=" + totalUpserted);
+        return ReturnT.SUCCESS;
+    }
+
+    /**
+     * 解析 his_publish_article_list 并聚合为单条 ArticleQuality
+     */
+    private ArticleQuality aggregateFromHisPublishList(String contentId, String hisPublishListJson) {
+        JSONArray publishList;
+        try {
+            publishList = JSON.parseArray(hisPublishListJson);
+        } catch (Exception e) {
+            log.info("contentId={} his_publish_article_list JSON 解析失败: {}", contentId, e.getMessage());
+            return null;
+        }
+        if (publishList == null || publishList.isEmpty()) {
+            return null;
+        }
+
+        long totalRead = 0;
+        double totalAvgRead = 0;
+        long totalFans = 0;
+        int maxItemIndex = -1;
+        double totalFirstLevel = 0;
+        double totalFission = 0;
+
+        for (int i = 0; i < publishList.size(); i++) {
+            JSONObject pub = publishList.getJSONObject(i);
+            if (pub == null) continue;
+
+            long viewCount = pub.getLongValue("viewCount");
+            totalRead += viewCount;
+
+            Double avgView = pub.getDouble("avgViewCount");
+            if (avgView != null) {
+                totalAvgRead += avgView;
+            }
+
+            // 取最新发文(itemIndex 最大)的粉丝量
+            int itemIndex = pub.getIntValue("itemIndex");
+            if (itemIndex > maxItemIndex) {
+                maxItemIndex = itemIndex;
+                totalFans = pub.getLongValue("fans");
+            }
+
+            // 首层和裂变
+            JSONArray fissionList = pub.getJSONArray("articleDetailInfoList");
+            if (fissionList != null) {
+                double pubFirstLevel = 0;
+                double pubFission = 0;
+                for (int j = 0; j < fissionList.size(); j++) {
+                    JSONObject fi = fissionList.getJSONObject(j);
+                    if (fi == null) continue;
+                    pubFirstLevel += fi.getDoubleValue("firstLevel");
+                    pubFission += fi.getDoubleValue("fission0")
+                            + fi.getDoubleValue("fission1")
+                            + fi.getDoubleValue("fission2");
+                }
+                totalFirstLevel += pubFirstLevel;
+                totalFission += pubFission;
+            }
+        }
+
+        if (totalRead <= 0 && totalAvgRead <= 0) {
+            return null;
+        }
+
+        ArticleQuality aq = new ArticleQuality();
+        aq.setContentId(contentId);
+        aq.setTotalRead(totalRead);
+        aq.setAvgRead(totalAvgRead);
+        aq.setTotalFans(totalFans);
+        aq.setPublishCount(publishList.size());
+        aq.setOpenRate(totalRead > 0 ? totalFirstLevel / totalRead : 0);
+        aq.setFissionRate(totalFirstLevel > 0 ? totalFission / totalFirstLevel : 0);
+        return aq;
+    }
+
+    private static boolean isJsonParseFail(String json) {
+        try {
+            JSON.parseArray(json);
+            return false;
+        } catch (Exception e) {
+            return true;
+        }
+    }
+
+    // ===== 辅助方法 =====
+
+    private static double parseParamDouble(String param, String key, double defaultValue) {
+        if (param == null || param.isEmpty()) return defaultValue;
+        for (String part : param.split(",")) {
+            String[] kv = part.trim().split("=", 2);
+            if (kv.length == 2 && kv[0].trim().equals(key)) {
+                try { return Double.parseDouble(kv[1].trim()); } catch (NumberFormatException ignored) { }
+            }
+        }
+        return defaultValue;
+    }
+
+    private static String parseParamString(String param, String key, String defaultValue) {
+        if (param == null || param.isEmpty()) return defaultValue;
+        for (String part : param.split(",")) {
+            String[] kv = part.trim().split("=", 2);
+            if (kv.length == 2 && kv[0].trim().equals(key)) {
+                return kv[1].trim();
+            }
+        }
+        return defaultValue;
+    }
+
+    private static boolean parseParamBool(String param, String key) {
+        if (param == null || param.isEmpty()) return false;
+        for (String part : param.split(",")) {
+            String[] kv = part.trim().split("=", 2);
+            if (kv.length == 2 && kv[0].trim().equals(key)) {
+                return "true".equalsIgnoreCase(kv[1].trim());
+            }
+        }
+        return false;
+    }
+
+    private static void printTopBottom(List<ArticleQuality> list) {
+        List<ArticleQuality> sorted = new ArrayList<>(list);
+        sorted.sort((a, b) -> Double.compare(
+                b.getQualityScore() == null ? 0 : b.getQualityScore(),
+                a.getQualityScore() == null ? 0 : a.getQualityScore()));
+
+        int show = Math.min(5, sorted.size());
+        System.out.println("===== Top " + show + " 高质量文章 =====");
+        for (int i = 0; i < show; i++) {
+            ArticleQuality aq = sorted.get(i);
+            System.out.println("[" + (i + 1) + "] contentId=" + aq.getContentId()
+                    + ", qualityScore=" + round2(aq.getQualityScore())
+                    + ", readScore=" + round2(aq.getReadScore())
+                    + ", openScore=" + round2(aq.getOpenScore())
+                    + ", fissionScore=" + round2(aq.getFissionScore())
+                    + ", conf=" + round2(aq.getConfidence()));
+        }
+        System.out.println("===== Bottom " + show + " 低质量文章 =====");
+        for (int i = sorted.size() - 1; i >= Math.max(0, sorted.size() - show); i--) {
+            ArticleQuality aq = sorted.get(i);
+            System.out.println("[" + (sorted.size() - i) + "] contentId=" + aq.getContentId()
+                    + ", qualityScore=" + round2(aq.getQualityScore())
+                    + ", readScore=" + round2(aq.getReadScore())
+                    + ", openScore=" + round2(aq.getOpenScore())
+                    + ", fissionScore=" + round2(aq.getFissionScore())
+                    + ", conf=" + round2(aq.getConfidence()));
+        }
+    }
+
+    private static double round2(Double v) {
+        if (v == null) return 0;
+        return Math.round(v * 100.0) / 100.0;
+    }
+}

+ 67 - 0
core/src/main/java/com/tzld/videoVector/model/po/pgVector/ArticleQuality.java

@@ -0,0 +1,67 @@
+package com.tzld.videoVector.model.po.pgVector;
+
+import java.util.Date;
+
+/**
+ * 文章质量评分表(对应 pgVector 库 article_quality)
+ * 数据来源:ODPS loghubods.article_title_his_cache.his_publish_article_list
+ */
+public class ArticleQuality {
+
+    private Long id;
+    private String contentId;
+
+    // 原始聚合指标
+    private Long totalRead;
+    private Double avgRead;
+    private Long totalFans;
+    private Integer publishCount;
+    private Double openRate;
+    private Double fissionRate;
+
+    // 分维度得分(百分位归一化)
+    private Double readScore;
+    private Double openScore;
+    private Double fissionScore;
+
+    // 综合评分
+    private Double qualityScore;
+    private Double confidence;
+
+    private String dt;
+    private Date createTime;
+    private Date updateTime;
+
+    public Long getId() { return id; }
+    public void setId(Long id) { this.id = id; }
+    public String getContentId() { return contentId; }
+    public void setContentId(String contentId) { this.contentId = contentId; }
+    public Long getTotalRead() { return totalRead; }
+    public void setTotalRead(Long totalRead) { this.totalRead = totalRead; }
+    public Double getAvgRead() { return avgRead; }
+    public void setAvgRead(Double avgRead) { this.avgRead = avgRead; }
+    public Long getTotalFans() { return totalFans; }
+    public void setTotalFans(Long totalFans) { this.totalFans = totalFans; }
+    public Integer getPublishCount() { return publishCount; }
+    public void setPublishCount(Integer publishCount) { this.publishCount = publishCount; }
+    public Double getOpenRate() { return openRate; }
+    public void setOpenRate(Double openRate) { this.openRate = openRate; }
+    public Double getFissionRate() { return fissionRate; }
+    public void setFissionRate(Double fissionRate) { this.fissionRate = fissionRate; }
+    public Double getReadScore() { return readScore; }
+    public void setReadScore(Double readScore) { this.readScore = readScore; }
+    public Double getOpenScore() { return openScore; }
+    public void setOpenScore(Double openScore) { this.openScore = openScore; }
+    public Double getFissionScore() { return fissionScore; }
+    public void setFissionScore(Double fissionScore) { this.fissionScore = fissionScore; }
+    public Double getQualityScore() { return qualityScore; }
+    public void setQualityScore(Double qualityScore) { this.qualityScore = qualityScore; }
+    public Double getConfidence() { return confidence; }
+    public void setConfidence(Double confidence) { this.confidence = confidence; }
+    public String getDt() { return dt; }
+    public void setDt(String dt) { this.dt = dt; }
+    public Date getCreateTime() { return createTime; }
+    public void setCreateTime(Date createTime) { this.createTime = createTime; }
+    public Date getUpdateTime() { return updateTime; }
+    public void setUpdateTime(Date updateTime) { this.updateTime = updateTime; }
+}

+ 15 - 3
core/src/main/java/com/tzld/videoVector/model/vo/recall/RecallSignalsVO.java

@@ -27,9 +27,21 @@ public class RecallSignalsVO {
     @Data
     public static class QualitySignal {
         private boolean hasData;
-        private Double ctr;   // conversionEfficiencyScore
-        private Double viral; // viralScore
-        private Double roi;   // revenueScore
+        private Double ctr;   // conversionEfficiencyScore(素材)
+        private Double viral; // viralScore(素材)
+        private Double roi;   // revenueScore(素材)
+
+        // 文章质量维度分(ARTICLE 模态专用)
+        private Double readScore;    // read_score
+        private Double openScore;    // open_score
+        private Double fissionScore; // fission_score
+
+        // 文章原始指标(ARTICLE 模态专用)
+        private Long totalRead;      // 总阅读
+        private Double avgRead;      // 阅读均值
+        private Double openRate;     // 总打开率
+        private Double fissionRate;  // 总裂变率
+        private Integer publishCount; // 发文次数
     }
 
     @Data

+ 18 - 0
core/src/main/java/com/tzld/videoVector/service/rank/RankServiceImpl.java

@@ -106,6 +106,24 @@ public class RankServiceImpl implements RankService {
                 : params.getDeconstructBoost();
         if (codeBoost == null) codeBoost = params.getDeconstructBoost();
 
+        // ARTICLE 模态:优先用质量分(read/open/fission),无质量数据时退化为纯 sim
+        if (modality == Modality.ARTICLE) {
+            QualitySignal qs = signals.getQuality();
+            if (qs != null && qs.isHasData()
+                    && qs.getReadScore() != null && qs.getOpenScore() != null && qs.getFissionScore() != null) {
+                double qualTotalW = params.getWRead() + params.getWOpen() + params.getWFission();
+                if (qualTotalW <= 0) qualTotalW = 1;
+                double qualityScore = (params.getWRead() * qs.getReadScore()
+                        + params.getWOpen() * qs.getOpenScore()
+                        + params.getWFission() * qs.getFissionScore()) / qualTotalW;
+                double composite = params.getAlpha() * codeBoost * simNorm + (1 - params.getAlpha()) * qualityScore;
+                return ScoreBreakdown.of(composite, simNorm, 0, codeBoost, lowerBound, passesThreshold);
+            }
+            // 无质量数据 → 纯 sim
+            double composite = codeBoost * params.getAlpha() * simNorm;
+            return ScoreBreakdown.of(composite, simNorm, 0, codeBoost, lowerBound, passesThreshold);
+        }
+
         boolean hasRov = rov != null && Double.isFinite(rov);
 
         if (!hasRov) {

+ 9 - 0
core/src/main/java/com/tzld/videoVector/service/rank/RankingParams.java

@@ -64,6 +64,15 @@ public class RankingParams {
     private Double priorViral;
     private Double priorRoi;
 
+    /** 文章质量子维度权重——阅读,默认 0.4 */
+    private double wRead = 0.4;
+
+    /** 文章质量子维度权重——打开率,默认 0.3 */
+    private double wOpen = 0.3;
+
+    /** 文章质量子维度权重——裂变率,默认 0.3 */
+    private double wFission = 0.3;
+
     /**
      * 返回全局默认 RankingParams(与前端 DEFAULT_RANKING_PARAMS 一致)。
      */

+ 61 - 0
core/src/main/java/com/tzld/videoVector/service/recall/impl/VectorRecallTestServiceImpl.java

@@ -10,6 +10,7 @@ import com.tzld.videoVector.common.constant.VectorConstants;
 import com.tzld.videoVector.common.enums.Modality;
 import com.tzld.videoVector.dao.mapper.pgVector.DeconstructVectorConfigMapper;
 import com.tzld.videoVector.dao.mapper.pgVector.ext.ArticleDeconstructResultMapperExt;
+import com.tzld.videoVector.dao.mapper.pgVector.ext.ArticleQualityMapperExt;
 import com.tzld.videoVector.dao.mapper.pgVector.ext.MaterialDeconstructResultMapperExt;
 import com.tzld.videoVector.dao.mapper.pgVector.ext.MaterialQualityMapperExt;
 import com.tzld.videoVector.model.entity.ArticleMatch;
@@ -23,6 +24,7 @@ import com.tzld.videoVector.model.param.recall.MatchByMaterialIdParam;
 import com.tzld.videoVector.model.param.recall.MatchByTextParam;
 import com.tzld.videoVector.model.param.recall.MatchByVideoIdParam;
 import com.tzld.videoVector.model.po.pgVector.ArticleDeconstructResult;
+import com.tzld.videoVector.model.po.pgVector.ArticleQuality;
 import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfig;
 import com.tzld.videoVector.model.po.pgVector.DeconstructVectorConfigExample;
 import com.tzld.videoVector.model.po.pgVector.MaterialDeconstructResult;
@@ -112,6 +114,9 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
     @Autowired
     private ArticleDeconstructResultMapperExt articleDeconstructResultMapperExt;
 
+    @Autowired(required = false)
+    private ArticleQualityMapperExt articleQualityMapperExt;
+
     @Autowired
     private DeconstructVectorConfigMapper deconstructVectorConfigMapper;
 
@@ -1082,6 +1087,7 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
                 .filter(java.util.Objects::nonNull)
                 .collect(Collectors.toList());
         Map<String, ArticleDeconstructResult> rowByArticleId = loadArticleDeconstructRows(articleIds);
+        Map<String, ArticleQuality> qualityByArticleId = loadArticleQualityRows(articleIds);
 
         List<VideoMatchEnrichedVO> items = new ArrayList<>(matches.size());
         for (ArticleMatch m : matches) {
@@ -1125,6 +1131,23 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
 
             applyCompatibilityFields(vo);
             applySignals(vo, requestConfigCode, "ann");
+
+            // 文章质量分 enrich
+            ArticleQuality aq = qualityByArticleId.get(m.getArticleId());
+            if (aq != null && vo.getSignals() != null) {
+                RecallSignalsVO.QualitySignal qs = new RecallSignalsVO.QualitySignal();
+                qs.setHasData(true);
+                qs.setReadScore(aq.getReadScore());
+                qs.setOpenScore(aq.getOpenScore());
+                qs.setFissionScore(aq.getFissionScore());
+                qs.setTotalRead(aq.getTotalRead());
+                qs.setAvgRead(aq.getAvgRead());
+                qs.setOpenRate(aq.getOpenRate());
+                qs.setFissionRate(aq.getFissionRate());
+                qs.setPublishCount(aq.getPublishCount());
+                vo.getSignals().setQuality(qs);
+            }
+
             items.add(vo);
         }
         return items;
@@ -1153,6 +1176,31 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
         return result;
     }
 
+    private Map<String, ArticleQuality> loadArticleQualityRows(List<String> articleIds) {
+        if (CollectionUtils.isEmpty(articleIds)) {
+            return Collections.emptyMap();
+        }
+        if (articleQualityMapperExt == null) {
+            return Collections.emptyMap();
+        }
+        Map<String, ArticleQuality> result = new HashMap<>();
+        try {
+            List<ArticleQuality> rows = articleQualityMapperExt.selectByContentIds(articleIds);
+            if (CollectionUtils.isEmpty(rows)) {
+                return result;
+            }
+            for (ArticleQuality row : rows) {
+                if (row == null || !StringUtils.hasText(row.getContentId())) {
+                    continue;
+                }
+                result.putIfAbsent(row.getContentId(), row);
+            }
+        } catch (Exception e) {
+            log.error("批量加载 article_quality 失败: {}", e.getMessage(), e);
+        }
+        return result;
+    }
+
     private JSONObject parseArticleResultJson(ArticleDeconstructResult row) {
         if (row == null || !StringUtils.hasText(row.getResult())) {
             return null;
@@ -2422,6 +2470,19 @@ public class VectorRecallTestServiceImpl implements VectorRecallTestService {
 
         applyCompatibilityFields(vo);
         applySignals(vo, configCode, "self");
+
+        // 文章质量分 enrich
+        Map<String, ArticleQuality> selfQualityMap = loadArticleQualityRows(Collections.singletonList(articleId));
+        ArticleQuality aq = selfQualityMap.get(articleId);
+        if (aq != null && vo.getSignals() != null) {
+            RecallSignalsVO.QualitySignal qs = new RecallSignalsVO.QualitySignal();
+            qs.setHasData(true);
+            qs.setReadScore(aq.getReadScore());
+            qs.setOpenScore(aq.getOpenScore());
+            qs.setFissionScore(aq.getFissionScore());
+            vo.getSignals().setQuality(qs);
+        }
+
         return vo;
     }
 

+ 147 - 0
core/src/main/java/com/tzld/videoVector/util/ArticleQualityCalculator.java

@@ -0,0 +1,147 @@
+package com.tzld.videoVector.util;
+
+import com.tzld.videoVector.model.po.pgVector.ArticleQuality;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * 文章质量分计算工具
+ * 基于文章发布表现数据(阅读、打开、裂变)计算综合质量分
+ */
+public class ArticleQualityCalculator {
+
+    private static final Logger log = LoggerFactory.getLogger(ArticleQualityCalculator.class);
+
+    private static final double DEFAULT_PRIOR = 0.5;
+
+    /**
+     * 批量计算质量分
+     *
+     * @param list                 原始聚合指标列表
+     * @param wRead                阅读维度权重
+     * @param wOpen                打开率维度权重
+     * @param wFission             裂变率维度权重
+     * @param confidenceThreshold  置信度发文次数阈值
+     */
+    public static void calculateAll(List<ArticleQuality> list,
+                                    double wRead, double wOpen, double wFission,
+                                    int confidenceThreshold) {
+        if (list == null || list.isEmpty()) {
+            return;
+        }
+
+        double qualTotalW = wRead + wOpen + wFission;
+        if (qualTotalW <= 0) qualTotalW = 1;
+
+        int totalCount = list.size();
+        log.info("开始计算文章质量分, 总数: {}, 权重: r={} o={} f={}, 置信度阈值: {}",
+                totalCount, wRead, wOpen, wFission, confidenceThreshold);
+
+        // Step 1: 提取原始维度值
+        List<DimValues> dimValuesList = new ArrayList<>(totalCount);
+        for (ArticleQuality aq : list) {
+            DimValues dv = new DimValues();
+            dv.totalRead = nullToZero(aq.getTotalRead());
+            dv.openRate = nullToZero(aq.getOpenRate());
+            dv.fissionRate = nullToZero(aq.getFissionRate());
+            dv.publishCount = aq.getPublishCount() != null ? aq.getPublishCount() : 0;
+            dimValuesList.add(dv);
+        }
+
+        // Step 2: 各维度百分位排名
+        computePercentileRanks(dimValuesList, dv -> dv.totalRead, (dv, r) -> dv.readPct = r);
+        computePercentileRanks(dimValuesList, dv -> dv.openRate, (dv, r) -> dv.openPct = r);
+        computePercentileRanks(dimValuesList, dv -> dv.fissionRate, (dv, r) -> dv.fissionPct = r);
+
+        // Step 3: 加权计算综合分 + 置信度收缩
+        int lowConfCount = 0;
+        int noDataCount = 0;
+
+        for (int i = 0; i < list.size(); i++) {
+            ArticleQuality aq = list.get(i);
+            DimValues dv = dimValuesList.get(i);
+
+            double confidence = dv.publishCount >= confidenceThreshold
+                    ? 1.0
+                    : (double) dv.publishCount / confidenceThreshold;
+
+            if (dv.publishCount <= 0) {
+                noDataCount++;
+            } else if (confidence < 1.0) {
+                lowConfCount++;
+            }
+
+            double rawScore = (wRead * dv.readPct + wOpen * dv.openPct + wFission * dv.fissionPct) / qualTotalW;
+            double qualityScore = confidence * rawScore + (1 - confidence) * DEFAULT_PRIOR;
+
+            aq.setReadScore(round2(dv.readPct));
+            aq.setOpenScore(round2(dv.openPct));
+            aq.setFissionScore(round2(dv.fissionPct));
+            aq.setQualityScore(round2(qualityScore));
+            aq.setConfidence(round2(confidence));
+        }
+
+        log.info("文章质量分计算完成, 总数: {}, 无发文: {}, 低于置信度阈值: {}",
+                totalCount, noDataCount, lowConfCount);
+    }
+
+    // ===== 百分位排名计算(对标 MaterialQualityCalculator) =====
+
+    @FunctionalInterface
+    private interface ValueExtractor {
+        double extract(DimValues dv);
+    }
+
+    @FunctionalInterface
+    private interface RankSetter {
+        void set(DimValues dv, double rank);
+    }
+
+    private static void computePercentileRanks(List<DimValues> list,
+                                               ValueExtractor getter,
+                                               RankSetter setter) {
+        int n = list.size();
+        List<DimValues> sorted = list.stream()
+                .sorted(Comparator.comparingDouble(getter::extract))
+                .collect(Collectors.toList());
+
+        for (int i = 0; i < n; ) {
+            double val = getter.extract(sorted.get(i));
+            int j = i;
+            while (j < n && Double.compare(getter.extract(sorted.get(j)), val) == 0) {
+                j++;
+            }
+            double avgRank = (i + j - 1) / 2.0;
+            double pct = (n > 1) ? avgRank / (n - 1) : 0.5;
+            for (int k = i; k < j; k++) {
+                setter.set(sorted.get(k), pct);
+            }
+            i = j;
+        }
+    }
+
+    // ===== 辅助方法 =====
+
+    private static double nullToZero(Long v) { return v == null ? 0 : (double) v; }
+    private static double nullToZero(Double v) { return v == null ? 0 : v; }
+
+    private static double round2(double v) {
+        return Math.round(v * 100.0) / 100.0;
+    }
+
+    private static class DimValues {
+        double totalRead;
+        double openRate;
+        double fissionRate;
+        int publishCount;
+
+        double readPct = 0.5;
+        double openPct = 0.5;
+        double fissionPct = 0.5;
+    }
+}

+ 71 - 0
core/src/main/resources/mapper/pgVector/ext/ArticleQualityMapperExt.xml

@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
+<mapper namespace="com.tzld.videoVector.dao.mapper.pgVector.ext.ArticleQualityMapperExt">
+
+    <resultMap id="BaseResultMap" type="com.tzld.videoVector.model.po.pgVector.ArticleQuality">
+        <id column="id" property="id"/>
+        <result column="content_id" property="contentId"/>
+        <result column="total_read" property="totalRead"/>
+        <result column="avg_read" property="avgRead"/>
+        <result column="total_fans" property="totalFans"/>
+        <result column="publish_count" property="publishCount"/>
+        <result column="open_rate" property="openRate"/>
+        <result column="fission_rate" property="fissionRate"/>
+        <result column="read_score" property="readScore"/>
+        <result column="open_score" property="openScore"/>
+        <result column="fission_score" property="fissionScore"/>
+        <result column="quality_score" property="qualityScore"/>
+        <result column="confidence" property="confidence"/>
+        <result column="dt" property="dt"/>
+        <result column="create_time" property="createTime"/>
+        <result column="update_time" property="updateTime"/>
+    </resultMap>
+
+    <insert id="batchUpsert">
+        INSERT INTO article_quality (
+            content_id,
+            total_read, avg_read, total_fans, publish_count,
+            open_rate, fission_rate,
+            read_score, open_score, fission_score,
+            quality_score, confidence,
+            dt, create_time, update_time
+        )
+        VALUES
+        <foreach collection="list" item="item" separator=",">
+        (
+            #{item.contentId},
+            #{item.totalRead}, #{item.avgRead}, #{item.totalFans}, #{item.publishCount},
+            #{item.openRate}, #{item.fissionRate},
+            #{item.readScore}, #{item.openScore}, #{item.fissionScore},
+            #{item.qualityScore}, #{item.confidence},
+            #{item.dt}, NOW(), NOW()
+        )
+        </foreach>
+        ON CONFLICT (content_id, dt)
+        DO UPDATE SET
+            total_read = EXCLUDED.total_read,
+            avg_read = EXCLUDED.avg_read,
+            total_fans = EXCLUDED.total_fans,
+            publish_count = EXCLUDED.publish_count,
+            open_rate = EXCLUDED.open_rate,
+            fission_rate = EXCLUDED.fission_rate,
+            read_score = EXCLUDED.read_score,
+            open_score = EXCLUDED.open_score,
+            fission_score = EXCLUDED.fission_score,
+            quality_score = EXCLUDED.quality_score,
+            confidence = EXCLUDED.confidence,
+            dt = EXCLUDED.dt,
+            update_time = NOW()
+    </insert>
+
+    <select id="selectByContentIds" resultMap="BaseResultMap">
+        SELECT DISTINCT ON (content_id) *
+        FROM article_quality
+        WHERE content_id IN
+        <foreach collection="contentIds" item="id" open="(" separator="," close=")">
+            #{id}
+        </foreach>
+        ORDER BY content_id, dt DESC
+    </select>
+
+</mapper>