Bläddra i källkod

Merge branch 'feature_20260616_v839_year_valid_play' of algorithm/recommend-server into master

yangxiaohui 2 veckor sedan
förälder
incheckning
63bbef6d60

+ 5 - 1
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/RecommendService.java

@@ -561,8 +561,12 @@ public class RecommendService {
         Map<String, String> userNetworkSeqFeature = unionIdFeature.getOrDefault("alg_user_network_seq_feature", new HashMap<>());
         List<String> actVidSeq = FeatureUtils.extractVidsFromUserNetworkSeqFeature(userNetworkSeqFeature, "a_v_s");
         List<String> netVidSeq = FeatureUtils.extractVidsFromUserNetworkSeqFeature(userNetworkSeqFeature, "n_v_s");
+        // rp_vid: 用户近期有效播放 vid 序列, YearValidPlayDkElementsRecallStrategy 需要拿到这批 vid
+        // 的 dk_elements -> 必须进 userNetworkSeqVideoInfoMap, 否则 dk_elements lookup 全 miss.
+        // 上游未填充时返回空, 对其他实验零影响.
+        List<String> rpVidSeq = FeatureUtils.extractVidsFromUserNetworkSeqFeature(userNetworkSeqFeature, "rp_vid");
 
-        List<String> allVids = Stream.of(actVidSeq, netVidSeq)
+        List<String> allVids = Stream.of(actVidSeq, netVidSeq, rpVidSeq)
                 .flatMap(Collection::stream)
                 .distinct()
                 .filter(StringUtils::isNotBlank)

+ 297 - 93
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelV839.java

@@ -7,7 +7,9 @@ import com.tzld.piaoquan.recommend.server.common.base.RankItem;
 import com.tzld.piaoquan.recommend.server.model.MachineInfo;
 import com.tzld.piaoquan.recommend.server.model.Video;
 import com.tzld.piaoquan.recommend.server.service.FeatureService;
+import com.tzld.piaoquan.recommend.server.service.funnel.FunnelContext;
 import com.tzld.piaoquan.recommend.server.service.rank.RankParam;
+import com.tzld.piaoquan.recommend.server.service.rank.RankResult;
 import com.tzld.piaoquan.recommend.server.service.rank.bo.UserShareReturnProfile;
 import com.tzld.piaoquan.recommend.server.service.rank.extractor.ExtractVideoMergeCate;
 import com.tzld.piaoquan.recommend.server.service.rank.tansform.FeatureV6;
@@ -15,7 +17,9 @@ import com.tzld.piaoquan.recommend.server.service.recall.strategy.*;
 import com.tzld.piaoquan.recommend.server.service.score.ScorerUtils;
 import com.tzld.piaoquan.recommend.server.util.*;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
 import org.apache.commons.collections4.MapUtils;
+import org.apache.commons.lang3.RandomUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
@@ -23,6 +27,8 @@ import org.springframework.stereotype.Service;
 import java.util.*;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 @Service
 @Slf4j
@@ -33,6 +39,62 @@ public class RankStrategy4RegionMergeModelV839 extends RankStrategy4RegionMergeM
     @Autowired
     private FeatureService featureService;
 
+    /**
+     * V839 个性化召回白名单 (7 路: V566 基础 6 路 + 1 路 valid_play dk_elements 行为路实验):召回 key 含 mid/uid,
+     * 依赖该用户行为信号。
+     * V839 vs V562 唯一差异: 把 YearShareDkElements (分享行为) 替换为
+     * YearValidPlayDkElements (有效播放行为) — 形成"分享 vs 有效播放"双行为源 dk_elements 召回的纯净 AB 对照。
+     * 注:YearReturnCate2 因线上效果不佳, 2026-06-04 起移到非个性化白名单。
+     */
+    private static final Set<String> PERSONAL_RECALL_PUSH_FROMS = new HashSet<>(Arrays.asList(
+            UserCate1RecallStrategy.PUSH_FORM,
+            UserCate2RecallStrategy.PUSH_FORM,
+            Return1Cate2RosRecallStrategy.PUSH_FORM,
+            Return1Cate2StrRecallStrategy.PUSH_FORM,
+            YearShareCate1RecallStrategy.PUSH_FROM,
+            YearShareCate2RecallStrategy.PUSH_FROM,
+            YearValidPlayDkElementsRecallStrategy.PUSH_FROM
+    ));
+
+    /**
+     * V839 非个性化召回白名单 (17 路):只依赖 headVid + 地域/品类/相似度(vid-vid CF 也归此类)。
+     * 含 5 路旧地域、新地域、城市、head province/cate、先验省份、return 相似、scene CF、YearReturnCate2。
+     */
+    private static final Set<String> NON_PERSONAL_RECALL_PUSH_FROMS = new HashSet<>(Arrays.asList(
+            RegionHRecallStrategy.PUSH_FORM,
+            RegionHDupRecallStrategy.PUSH_FORM,
+            Region24HRecallStrategy.PUSH_FORM,
+            RegionRelative24HRecallStrategy.PUSH_FORM,
+            RegionRelative24HDupRecallStrategy.PUSH_FORM,
+            RegionRealtimeRecallStrategyV1.PUSH_FORM,
+            CityRovnRecallStrategy.PUSH_FROM,
+            HeadProvinceCate1RecallStrategy.PUSH_FORM,
+            HeadProvinceCate2RecallStrategy.PUSH_FORM,
+            HeadCate2RovRecallStrategy.PUSH_FROM,
+            PrioriProvinceRovnRecallStrategy.PUSH_FROM,
+            PrioriProvinceStrRecallStrategy.PUSH_FROM,
+            PrioriProvinceRosRecallStrategy.PUSH_FROM,
+            ReturnVideoRecallStrategy.PUSH_FORM,
+            SceneCFRovnRecallStrategy.PUSH_FORM,
+            SceneCFRosnRecallStrategy.PUSH_FORM,
+            YearReturnCate2RecallStrategy.PUSH_FROM
+    ));
+
+    /** PERSONAL ∪ NON_PERSONAL = 23 路。用于 fetchCoarseRankScores 跳过流量池等不参与截断的 vid。 */
+    private static final Set<String> ALL_ROV_PUSH_FROMS;
+    static {
+        Set<String> all = new HashSet<>(PERSONAL_RECALL_PUSH_FROMS);
+        all.addAll(NON_PERSONAL_RECALL_PUSH_FROMS);
+        ALL_ROV_PUSH_FROMS = Collections.unmodifiableSet(all);
+    }
+
+    /*
+     * 设计要点:
+     *   - fail-closed 白名单:RecallService 未来加新路不会自动进 V839,避免污染 vs V562 AB 对比
+     *   - 流量池 3 路 (flow_pool / quick_flow_pool / recall_strategy_hotspot) 不在任何名单——独立通道
+     *   - 调用顺序 = 个性化优先:同 vid 双类命中时归个性化,保护用户兴趣信号
+     */
+
     @Override
     public List<Video> mergeAndRankRovRecall(RankParam param) {
         Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : new HashMap<>(0);
@@ -46,44 +108,30 @@ public class RankStrategy4RegionMergeModelV839 extends RankStrategy4RegionMergeM
         Set<Long> setVideo = new HashSet<>();
         setVideo.add(param.getHeadVid());
         List<Video> rovRecallRank = new ArrayList<>();
-        // -------------------5路特殊旧召回------------------
-        RecallUtils.extractOldSpecialRecall(mergeWeight.getOrDefault("oldSpecialN", (double) param.getSize()).intValue(), param, setVideo, rovRecallRank);
-        //-------------------return相似召回------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("v6", 5.0).intValue(), param, ReturnVideoRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------新地域召回------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("v1", 5.0).intValue(), param, RegionRealtimeRecallStrategyV1.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------scene cf rovn------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("sceneCFRovn", 5.0).intValue(), param, SceneCFRovnRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------scene cf rosn------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("sceneCFRosn", 5.0).intValue(), param, SceneCFRosnRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        // -------------------user cate1------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("cate1RecallN", 5.0).intValue(), param, UserCate1RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        // -------------------user cate2------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("cate2RecallN", 5.0).intValue(), param, UserCate2RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        // -------------------head province cate1------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("headCate1RecallN", 3.0).intValue(), param, HeadProvinceCate1RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        // -------------------head province cate2------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("headCate2RecallN", 3.0).intValue(), param, HeadProvinceCate2RecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------head cate2 of rovn------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("headCate2Rov", 5.0).intValue(), param, HeadCate2RovRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------city rovn------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("cityRov", 5.0).intValue(), param, CityRovnRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------priori province rovn------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("prioriProvinceRov", 3.0).intValue(), param, PrioriProvinceRovnRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------priori province str------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("prioriProvinceStr", 1.0).intValue(), param, PrioriProvinceStrRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------priori province ros------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("prioriProvinceRos", 1.0).intValue(), param, PrioriProvinceRosRecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        //-------------------return1 cate2 ros------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("return1Cate2Ros", 5.0).intValue(), param, Return1Cate2RosRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        //-------------------return1 cate2 str------------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("return1Cate2Str", 5.0).intValue(), param, Return1Cate2StrRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-        //--------------deconstruction keywords ros-------------
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("deconstructionKeywordsRos", 5.0).intValue(), param, UserDeconstructionKeywordsRecallStrategy.PUSH_FORM, setVideo, rovRecallRank);
-
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("yearShareCate1", 5.0).intValue(), param, YearShareCate1RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("yearShareCate2", 5.0).intValue(), param, YearShareCate2RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
-        RecallUtils.extractRecall(mergeWeight.getOrDefault("yearReturnCate2", 5.0).intValue(), param, YearReturnCate2RecallStrategy.PUSH_FROM, setVideo, rovRecallRank);
+
+        // ============================================================
+        // V839 实验:统一粗排分截断 (个性化 / 非个性化 两配额, 动态补足)
+        // 总配额 coarseRankTopN,个性化占 personalRatio。先个性化按上限抢位,
+        // 个性化不足时剩余名额转给非个性化,保证精排算力满载。
+        //
+        // 粗排分 = alg_vid_recommend_exp_feature_20250212.rovn_1h / rovn_24h 平均
+        // ============================================================
+        int totalTopN = mergeWeight.getOrDefault("coarseRankTopN", 80.0).intValue();
+        double personalRatio = mergeWeight.getOrDefault("personalRatio", 0.4);
+        int personalTopN = (int) Math.round(totalTopN * personalRatio);
+        Map<Long, Double> coarseRankMap = fetchCoarseRankScores(param);
+
+        int personalCandidates = RecallUtils.countDistinctCandidates(param, setVideo, PERSONAL_RECALL_PUSH_FROMS);
+        int sizeBeforePersonal = rovRecallRank.size();
+        RecallUtils.extractAllAndTruncateByCoarseRank(personalTopN, param, setVideo, rovRecallRank, coarseRankMap, PERSONAL_RECALL_PUSH_FROMS);
+        int personalActual = rovRecallRank.size() - sizeBeforePersonal;
+        int nonPersonalBudget = totalTopN - personalActual;  // 个性化不足时, 名额转给非个性化
+        int nonPersonalCandidates = RecallUtils.countDistinctCandidates(param, setVideo, NON_PERSONAL_RECALL_PUSH_FROMS);
+        int sizeBeforeNonPersonal = rovRecallRank.size();
+        RecallUtils.extractAllAndTruncateByCoarseRank(nonPersonalBudget, param, setVideo, rovRecallRank, coarseRankMap, NON_PERSONAL_RECALL_PUSH_FROMS);
+        int nonPersonalActual = rovRecallRank.size() - sizeBeforeNonPersonal;
+        log.info("coarse_rank_summary exp=839 quota={} pc={} ps={} nc={} ns={}",
+                totalTopN, personalCandidates, personalActual, nonPersonalCandidates, nonPersonalActual);
 
         // 记录召回源中的视频
         this.rankBeforePostProcessor(rovRecallRank);
@@ -97,8 +145,22 @@ public class RankStrategy4RegionMergeModelV839 extends RankStrategy4RegionMergeM
         // 1. 批量获取特征  省份参数要对齐  headvid  要传递过来!
         // k1:视频、k2:表、k3:特征、v:特征值
         Map<String, String> headVideoInfo = param.getHeadInfo();
+
+        // 用户的序列特征
+        Map<String, Map<String, String>> unionIdFeature = featureService.getUnionIdFeature(param.getUnionId());
+        Map<String, String> userNetworkSeqFeature = unionIdFeature.getOrDefault("alg_user_network_seq_feature", new HashMap<>());
+        List<String> actVidSeq = FeatureUtils.extractVidsFromUserNetworkSeqFeature(userNetworkSeqFeature, "a_v_s");
+        List<String> netVidSeq = FeatureUtils.extractVidsFromUserNetworkSeqFeature(userNetworkSeqFeature, "n_v_s");
+
         List<String> vids = CommonCollectionUtils.toListDistinct(rovRecallRank, v -> String.valueOf(v.getVideoId()));
-        Map<String, Map<String, Map<String, String>>> videoBaseInfoMap = featureService.getVideoBaseInfo("", vids);
+
+        List<String> allVids = Stream.of(actVidSeq, netVidSeq, vids)
+                .flatMap(Collection::stream)
+                .distinct()
+                .filter(StringUtils::isNotBlank)
+                .collect(Collectors.toList());
+
+        Map<String, Map<String, Map<String, String>>> videoBaseInfoMap = featureService.getVideoBaseInfo("", allVids);
         Map<String, Map<String, Map<String, String>>> videoBCData = featureService.getVideoStatistics(vids);
 
         FeatureService.Feature feature = featureService.getFeatureV4(param, headVideoInfo, videoBaseInfoMap, vids);
@@ -112,15 +174,20 @@ public class RankStrategy4RegionMergeModelV839 extends RankStrategy4RegionMergeM
         Map<String, Map<String, String>> userBehaviorVideoMap = param.getBehaviorVideos();
         Map<String, String> creativeInfo = param.getCreativeInfoFeature();
 
+        Map<String, String> featureMapToString = new HashMap<>();
+        FeatureV6.parseStringFeatureMap(featureMapToString, param);
+        FeatureV6.putVideoStringFeatures("h", headVideoInfo, featureMapToString);
+
         // 3. 特征处理
         List<RankItem> rankItems = CommonCollectionUtils.toList(rovRecallRank, RankItem::new);
         Map<String, Float> userFeatureMap = getUserFeature(currentMs, param, creativeInfo, headVideoInfo, userProfile, featureOriginUser);
         batchGetVideoFeature(currentMs, userProfile, creativeInfo, headVideoInfo, videoBaseInfoMap,
-                newC7Map, newC8Map, featureOriginUser, userBehaviorVideoMap, featureOriginVideo, rankItems);
+                newC7Map, newC8Map, featureOriginUser, userBehaviorVideoMap, featureOriginVideo, featureMapToString, userFeatureMap, rankItems);
+
 
         // 4. 排序模型计算
         Map<String, Float> sceneFeatureMap = new HashMap<>(0);
-        List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_fm_xgb_20250729.conf").scoring(sceneFeatureMap, userFeatureMap, userFeatureMap, rankItems);
+        List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_dnn_20260407.conf").scoring(sceneFeatureMap, userFeatureMap, rankItems);
 
         // 5. 排序公式特征
         double xgbRovNegRate = mergeWeight.getOrDefault("xgbRovNegRate", 0.059);
@@ -142,9 +209,15 @@ public class RankStrategy4RegionMergeModelV839 extends RankStrategy4RegionMergeM
         double b0Ror1hW = mergeWeight.getOrDefault("b0_ror_1h_w", 0d);
         double b0Ror24hW = mergeWeight.getOrDefault("b0_ror_24h_w", 0d);
 
+        double cnRovn1hW = mergeWeight.getOrDefault("cn_rovn_1h_w", 0d);
+        double cnRovn24hW = mergeWeight.getOrDefault("cn_rovn_24h_w", 0d);
+
+        double dnRovn1hW = mergeWeight.getOrDefault("dn_rovn_1h_w", 0d);
+        double dnRovn24hW = mergeWeight.getOrDefault("dn_rovn_24h_w", 0d);
+
         Map<String, Map<String, String>> vid2MapFeature = this.getVideoRedisFeature(vids, "redis:vid_hasreturn_vor:");
 
-        // 获取权重
+
         Map<String, String> contextInfo = getContextInfo(param);
 
         List<Video> result = new ArrayList<>();
@@ -158,8 +231,9 @@ public class RankStrategy4RegionMergeModelV839 extends RankStrategy4RegionMergeM
             double hasReturnRovScore = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>()).getOrDefault("rov", "0"));
             item.getScoresMap().put("hasReturnRovScore", hasReturnRovScore);
 
-            double norXGBScore = item.getScoresMap().getOrDefault("NorXGBScore", 0d);
-            double newNorXGBScore = norPowerCalibration(xgbNorPowerWeight, xgbNorPowerExp, norXGBScore);
+            double norDNNScore = item.getScoresMap().getOrDefault("NorDNNScore", 0d);
+            double newNorDNNScore = norPowerCalibration(xgbNorPowerWeight, xgbNorPowerExp, norDNNScore);
+            item.getScoresMap().put("newNorDNNScore", newNorDNNScore);
             item.getScoresMap().put("rosAdd", rosAdd);
             item.getScoresMap().put("rosW", rosW);
 
@@ -169,6 +243,7 @@ public class RankStrategy4RegionMergeModelV839 extends RankStrategy4RegionMergeM
             item.getScoresMap().put("vorW", vorW);
 
             Map<String, String> bcData = videoBCData.getOrDefault(String.valueOf(item.getVideoId()), new HashMap<>()).getOrDefault("alg_vid_feature_b_c_data", new HashMap<>());
+            Map<String, String> cdNData = videoBCData.getOrDefault(String.valueOf(item.getVideoId()), new HashMap<>()).getOrDefault("alg_vid_feature_cn_dn_data", new HashMap<>());
 
             double c1Rovn1h = Double.parseDouble(bcData.getOrDefault("c1_rovn_1h", "0"));
             double c1Rovn24h = Double.parseDouble(bcData.getOrDefault("c1_rovn_24h", "0"));
@@ -198,7 +273,25 @@ public class RankStrategy4RegionMergeModelV839 extends RankStrategy4RegionMergeM
             item.getScoresMap().put("b0Ror24hW", b0Ror24hW);
             item.getScoresMap().put("b0Ror24h", b0Ror24h);
 
-            score = fmRov * (rosAdd + rosW * newNorXGBScore) * (vorAdd + vorW * vor) + c1RovnScore + b0StrScore + b0RorScore;
+            double cnRovn1h = Double.parseDouble(cdNData.getOrDefault("cn_rovn_1h", "0"));
+            double cnRovn24h = Double.parseDouble(cdNData.getOrDefault("cn_rovn_24h", "0"));
+            double cnRovnScore = cnRovn1hW * cnRovn1h + cnRovn24hW * cnRovn24h;
+            item.getScoresMap().put("cnRovnScore", cnRovnScore);
+            item.getScoresMap().put("cnRovn1hW", cnRovn1hW);
+            item.getScoresMap().put("cnRovn1h", cnRovn1h);
+            item.getScoresMap().put("cnRovn24hW", cnRovn24hW);
+            item.getScoresMap().put("cnRovn24h", cnRovn24h);
+
+            double dnRovn1h = Double.parseDouble(cdNData.getOrDefault("dn_rovn_1h", "0"));
+            double dnRovn24h = Double.parseDouble(cdNData.getOrDefault("dn_rovn_24h", "0"));
+            double dnRovnScore = dnRovn1hW * dnRovn1h + dnRovn24hW * dnRovn24h;
+            item.getScoresMap().put("dnRovnScore", dnRovnScore);
+            item.getScoresMap().put("dnRovn1hW", dnRovn1hW);
+            item.getScoresMap().put("dnRovn1h", dnRovn1h);
+            item.getScoresMap().put("dnRovn24hW", dnRovn24hW);
+            item.getScoresMap().put("dnRovn24h", dnRovn24h);
+
+            score = fmRov * (rosAdd + rosW * newNorDNNScore) * (vorAdd + vorW * vor) + c1RovnScore + b0StrScore + b0RorScore + cnRovnScore + dnRovnScore;
 
             Video video = item.getVideo();
             video.setScore(score);
@@ -232,7 +325,7 @@ public class RankStrategy4RegionMergeModelV839 extends RankStrategy4RegionMergeM
             if (MapUtils.isNotEmpty(contextInfo)) {
                 video.getMetaFeatureMap().put("context", contextInfo);
             }
-            if (Objects.nonNull(video.getRankVideoInfoMap()) && video.getRankVideoInfoMap().containsKey(video.getVideoId())){
+            if (Objects.nonNull(video.getRankVideoInfoMap()) && video.getRankVideoInfoMap().containsKey(video.getVideoId())) {
                 video.getRankVideoInfoMap().get(video.getVideoId()).setScore(score);
                 video.getRankVideoInfoMap().get(video.getVideoId()).setScoresMap(video.getScoresMap());
             }
@@ -243,6 +336,148 @@ public class RankStrategy4RegionMergeModelV839 extends RankStrategy4RegionMergeM
         return result;
     }
 
+    /**
+     * 同 V567 极简 fusion:只保留"流量池相关 + 兜底相关"逻辑
+     *   1. rov 空兜底:rov 池为空时流量池直接顶上 (Basic 段 1)
+     *   7. 流量池按比例强插:topK 头部锁 rov + topK..size 按 flowPoolP / newFlowPoolSelectRate 概率门
+     *      混入 flowVideos / douHotFlowPoolVideos,否则用 rov 中段;一侧用光时另一侧兜底回填 (Basic 段 7)
+     *
+     * 删除(相对 Basic):标签 filter / rov boost / 强插 / 品类降权 / 节日降权 / 密度控制
+     */
+    @Override
+    public RankResult mergeAndSort(RankParam param, List<Video> rovVideos, List<Video> flowVideos, List<Video> douHotFlowPoolVideos) {
+
+        // 1 兜底策略,rov池子不足时,用冷启池填补。直接返回。
+        if (CollectionUtils.isEmpty(rovVideos)) {
+            if (param.getSize() < flowVideos.size()) {
+                return new RankResult(flowVideos.subList(0, param.getSize()));
+            } else {
+                return new RankResult(flowVideos);
+            }
+        }
+
+        // 7 流量池按比例强插
+        FunnelContext funnelCtx = param.getFunnelContext();
+        List<Video> result = new ArrayList<>();
+        for (int i = 0; i < param.getTopK() && i < rovVideos.size(); i++) {
+            result.add(rovVideos.get(i));
+        }
+        double flowPoolP = getFlowPoolP(param);
+        int flowPoolIndex = 0;
+        int rovPoolIndex = param.getTopK();
+        for (int i = 0; i < param.getSize() - param.getTopK(); i++) {
+            double rand = RandomUtils.nextDouble(0, 1);
+            if (rand < flowPoolP) {
+                if (flowPoolIndex < flowVideos.size()) {
+                    Video v = flowVideos.get(flowPoolIndex++);
+                    result.add(v);
+                    markColdStartInserted(funnelCtx, v);
+                } else {
+                    break;
+                }
+            } else if (this.isInsertDouHotFlowPoolVideo()) {
+                if (flowPoolIndex < douHotFlowPoolVideos.size()) {
+                    Video v = douHotFlowPoolVideos.get(flowPoolIndex++);
+                    result.add(v);
+                    markColdStartInserted(funnelCtx, v);
+                } else {
+                    break;
+                }
+            } else {
+                if (rovPoolIndex < rovVideos.size()) {
+                    result.add(rovVideos.get(rovPoolIndex++));
+                } else {
+                    break;
+                }
+            }
+        }
+        if (rovPoolIndex >= rovVideos.size()) {
+            for (int i = flowPoolIndex; i < flowVideos.size() && result.size() < param.getSize(); i++) {
+                Video v = flowVideos.get(i);
+                result.add(v);
+                markColdStartInserted(funnelCtx, v);
+            }
+        }
+        if (flowPoolIndex >= flowVideos.size()) {
+            for (int i = rovPoolIndex; i < rovVideos.size() && result.size() < param.getSize(); i++) {
+                result.add(rovVideos.get(i));
+            }
+        }
+
+        return new RankResult(result);
+    }
+
+    /**
+     * V839 实验:拉取粗排分(按 vid → score 返回)。
+     *
+     * 数据源:alg_vid_recommend_exp_feature_20250212。
+     * 表里没有现成 rovn 字段,需要从原子字段 (return_n_uv_*, exp_*) 用 plusSmooth 算出来。
+     * 公式 = FeatureV6.oneTypeStatFeature 同口径:rovn = plusSmooth(return_n_uv, exp, plus, 1)
+     * 默认 plus=30 与 FeatureV6.largerSmoothPlus 对齐,AB 对比不会因口径不同污染结论。
+     *
+     * Apollo 可调维度:
+     *   - coarseRovn1hW / coarseRovn24hW:1h 和 24h 的加权(默认 0.5/0.5)
+     *   - coarseRovn1hSmoothPlus / coarseRovn24hSmoothPlus:贝叶斯平滑系数(默认 30/30)
+     *
+     * 缺失自动归一化:单值缺失时剩下的撑起全部权重;两值都缺失则 caller 兜底 RovScore。
+     */
+    private Map<Long, Double> fetchCoarseRankScores(RankParam param) {
+        if (param == null || param.getRecallResult() == null
+                || CollectionUtils.isEmpty(param.getRecallResult().getData())) {
+            return Collections.emptyMap();
+        }
+        Map<String, Double> mergeWeight = this.mergeWeight != null ? this.mergeWeight : Collections.emptyMap();
+        double w1h = mergeWeight.getOrDefault("coarseRovn1hW", 0.5);
+        double w24h = mergeWeight.getOrDefault("coarseRovn24hW", 0.5);
+        double plus1h = mergeWeight.getOrDefault("coarseRovn1hSmoothPlus", 30.0);
+        double plus24h = mergeWeight.getOrDefault("coarseRovn24hSmoothPlus", 30.0);
+        // 只对参与统一截断的 23 路 vid 拉粗排分(跳过流量池 3 路,省 proto + RPC 延迟)
+        List<String> vids = param.getRecallResult().getData().stream()
+                .filter(d -> d != null && CollectionUtils.isNotEmpty(d.getVideos()))
+                .filter(d -> ALL_ROV_PUSH_FROMS.contains(d.getPushFrom()))
+                .flatMap(d -> d.getVideos().stream())
+                .map(v -> String.valueOf(v.getVideoId()))
+                .distinct()
+                .collect(Collectors.toList());
+        if (vids.isEmpty()) return Collections.emptyMap();
+
+        Map<String, Map<String, Map<String, String>>> feats = featureService.getVideoCoarseRankFeature(vids);
+        Map<Long, Double> result = new HashMap<>(vids.size());
+        for (String vid : vids) {
+            Map<String, String> row = feats.getOrDefault(vid, Collections.emptyMap())
+                    .getOrDefault("alg_vid_recommend_exp_feature_20250212", Collections.emptyMap());
+            Double rovn1h = computeRovn(row, "1h", plus1h);
+            Double rovn24h = computeRovn(row, "24h", plus24h);
+            // 加权平均,缺失自动归一化
+            double sumW = (rovn1h != null ? w1h : 0) + (rovn24h != null ? w24h : 0);
+            if (sumW <= 0) continue;
+            double sumWS = (rovn1h != null ? rovn1h * w1h : 0) + (rovn24h != null ? rovn24h * w24h : 0);
+            try {
+                result.put(Long.parseLong(vid), sumWS / sumW);
+            } catch (NumberFormatException ignore) { }
+        }
+        return result;
+    }
+
+    /**
+     * 与 FeatureV6.oneTypeStatFeature 同口径:rovn = plusSmooth(return_n_uv, exp, plus, 1)
+     *
+     * 字段语义(区分 0 vs null):
+     *   - exp 是 period 有效性 anchor:null 或 ≤0 → 整个 period 无效(return null)
+     *   - return_n_uv 缺失视为 0(真实信号"无回访"):rovn=0,参与加权(不会让另一时段兜底)
+     */
+    private static Double computeRovn(Map<String, String> row, String period, double smoothPlus) {
+        Double exp = parseDoubleOrNull(row.get("exp_" + period));
+        if (exp == null || exp <= 0) return null;
+        Double returnNuv = parseDoubleOrNull(row.get("return_n_uv_" + period));
+        return FeatureUtils.plusSmooth(returnNuv != null ? returnNuv : 0, exp, smoothPlus, 1);
+    }
+
+    private static Double parseDoubleOrNull(String s) {
+        if (StringUtils.isBlank(s)) return null;
+        try { return Double.parseDouble(s); } catch (NumberFormatException e) { return null; }
+    }
+
     private UserShareReturnProfile parseUserProfile(Map<String, Map<String, String>> userOriginInfo) {
         if (null != userOriginInfo) {
             Map<String, String> c9 = userOriginInfo.get("alg_recsys_feature_user_share_return_stat");
@@ -319,15 +554,28 @@ public class RankStrategy4RegionMergeModelV839 extends RankStrategy4RegionMergeM
                                       Map<String, Map<String, String>> userOriginInfo,
                                       Map<String, Map<String, String>> historyVideoMap,
                                       Map<String, Map<String, Map<String, String>>> videoOriginInfo,
+                                      Map<String, String> featureMapToString,
+                                      Map<String, Float> userFeatureMap,
                                       List<RankItem> rankItems) {
-        if (null != rankItems && !rankItems.isEmpty()) {
+        if (CollectionUtils.isNotEmpty(rankItems)) {
             List<Future<Integer>> futures = new ArrayList<>();
             for (RankItem item : rankItems) {
                 String vid = item.getVideoId() + "";
                 Map<String, String> rankInfo = videoBaseInfoMap.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
                 Future<Integer> future = ThreadPoolFactory.defaultPool().submit(() -> {
-                    item.featureMap = getVideoFeature(currentMs, vid, userProfile, creativeInfo, headInfo, rankInfo, c7Map, c8Map, userOriginInfo, historyVideoMap, videoOriginInfo);
-                    item.norFeatureMap = item.featureMap;
+                    Map<String, Float> featureMap = new HashMap<>(userFeatureMap);
+                    Map<String, Float> videoFeature = getVideoFeature(currentMs, vid, userProfile, creativeInfo, headInfo, rankInfo, c7Map, c8Map, userOriginInfo, historyVideoMap, videoOriginInfo);
+                    featureMap.putAll(videoFeature);
+                    item.featureMap = featureMap;
+
+                    Map<String, String> userNetworkSeqFeature = userOriginInfo.getOrDefault("alg_user_network_seq_feature", new HashMap<>());
+
+                    Map<String, String> featureMapString = new HashMap<>(featureMapToString);
+                    FeatureV6.putVideoStringFeatures("r", rankInfo, featureMapString);
+                    featureMapString.put("r@vid", "r_vid_" + vid);
+                    FeatureV6.putProfileVideoCrossStringFeature(currentMs, userProfile, historyVideoMap, featureMapString);
+                    FeatureV6.putUserNetworkSeqFeature(featureMapString, userNetworkSeqFeature, videoBaseInfoMap);
+                    item.featureMapString = featureMapString;
                     return 1;
                 });
                 futures.add(future);
@@ -399,48 +647,4 @@ public class RankStrategy4RegionMergeModelV839 extends RankStrategy4RegionMergeM
         }
         return newScore;
     }
-
-    private Map<String, Double> findSimCateScore(String headCate2, int length) {
-        if (StringUtils.isBlank(headCate2)) {
-            return new HashMap<>();
-        }
-
-        String redisKey = String.format("alg_recsys_good_cate_pair_list:%s", headCate2);
-        String cate2Value = redisTemplate.opsForValue().get(redisKey);
-        if (StringUtils.isEmpty(cate2Value)) {
-            return new HashMap<>();
-        }
-
-        return this.parsePair(cate2Value, length);
-    }
-
-    private Map<String, Double> parsePair(String value, int length) {
-        if (StringUtils.isBlank(value)) {
-            return new HashMap<>();
-        }
-
-        String[] split = value.split("\t");
-        if (split.length != 2) {
-            return new HashMap<>();
-        }
-
-        String[] valueList = split[0].trim().split(",");
-        String[] scoreList = split[1].trim().split(",");
-        if (valueList.length != scoreList.length) {
-            return new HashMap<>();
-        }
-
-        int minLength = Math.min(length, valueList.length);
-        Map<String, Double> resultMap = new HashMap<>();
-        for (int i = 0; i < minLength; i++) {
-            resultMap.put(valueList[i].trim(), Double.parseDouble(scoreList[i].trim()));
-        }
-
-        return resultMap;
-    }
-
-    private String findVideoMergeCate2(Map<String, Map<String, Map<String, String>>> featureOriginVideo, String vid) {
-        Map<String, String> videoInfo = featureOriginVideo.getOrDefault(vid, new HashMap<>()).getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
-        return videoInfo.get("merge_second_level_cate");
-    }
 }

+ 6 - 0
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/recall/RecallService.java

@@ -182,13 +182,19 @@ public class RecallService implements ApplicationContextAware {
         // V536/V562/V565/V569 各承载一个 dk_elements 实验, 互相隔离便于独立归因:
         //   V562 → YearShareDkElements:    用户近期 share 行为 join dk_elements -> elements_rovn_recall 倒排
         //   V536 → YearShareDkElements:    同 V562, 但在 V536 rank 类中归非个性化白名单 (配额归属差异)
+        //   V839 → YearValidPlayDkElements: V839 用 valid_play 行为源替换 V562 的 share 行为源,
+        //                                    形成"分享 vs 有效播放"双行为 dk_elements 召回 AB 对照.
         //   V565 → UserProfileDkElements:  用户元素画像 (s_z_y_s/zt_gyf)         -> elements_rovn_recall 倒排
         //   V569 → YearReturnDkElements:   用户近期 click 回流行为 join dk_elements -> elements_rovn_recall 倒排
         boolean isHit562Exp = experimentService.judgeHitAlgoExp(param.getAppType(), param.getRootSessionId(), abExpCodes, "562");
         boolean isHit536Exp = experimentService.judgeHitAlgoExp(param.getAppType(), param.getRootSessionId(), abExpCodes, "536");
+        boolean isHit839Exp = experimentService.judgeHitAlgoExp(param.getAppType(), param.getRootSessionId(), abExpCodes, "839");
         if (isHit562Exp || isHit536Exp) {
             strategies.add(strategyMap.get(YearShareDkElementsRecallStrategy.class.getSimpleName()));
         }
+        if (isHit839Exp) {
+            strategies.add(strategyMap.get(YearValidPlayDkElementsRecallStrategy.class.getSimpleName()));
+        }
         boolean isHit565Exp = experimentService.judgeHitAlgoExp(param.getAppType(), param.getRootSessionId(), abExpCodes, "565");
         if (isHit565Exp) {
             strategies.add(strategyMap.get(UserProfileDkElementsRecallStrategy.class.getSimpleName()));

+ 198 - 0
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/recall/strategy/YearValidPlayDkElementsRecallStrategy.java

@@ -0,0 +1,198 @@
+package com.tzld.piaoquan.recommend.server.service.recall.strategy;
+
+import com.tzld.piaoquan.recommend.server.model.Video;
+import com.tzld.piaoquan.recommend.server.service.filter.FilterParam;
+import com.tzld.piaoquan.recommend.server.service.filter.FilterResult;
+import com.tzld.piaoquan.recommend.server.service.filter.FilterService;
+import com.tzld.piaoquan.recommend.server.service.recall.FilterParamFactory;
+import com.tzld.piaoquan.recommend.server.service.recall.RecallParam;
+import com.tzld.piaoquan.recommend.server.service.recall.RecallStrategy;
+import com.tzld.piaoquan.recommend.server.util.DkElementsUtils;
+import com.tzld.piaoquan.recommend.server.util.FeatureUtils;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.collections4.MapUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.math.NumberUtils;
+import org.apache.commons.lang3.tuple.Pair;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.data.redis.core.RedisTemplate;
+import org.springframework.stereotype.Component;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * 视频解构 实质元素 rovn 召回 (用户近期 有效播放 行为 -> dk_elements)
+ *   触发源相对 YearShareDkElementsRecallStrategy 改为有效播放行为, 其余 Redis 倒排、参数完全一致.
+ *
+ *   数据通道差异: share/click 走 a_v_s + a_t_s 配对序列+类型过滤; 有效播放走独立 rp_vid 序列,
+ *   该序列上游已只保留"有效播放" vid, 无需 type 过滤.
+ *
+ *   挑 kw 逻辑: 按 rp_vid 时间序遍历, 每个 vid 摊平多个 element, distinct 取前 topN (30) 个
+ *               -> 一次 multiGet elements_rovn_recall:{kw} 倒排.
+ *
+ *   上游 ODPS: alg_recsys_recall_elements_rovn (原始元素 -> top-50 vid + rovn 得分)
+ *   Redis key: elements_rovn_recall:{原始元素}
+ *   value: vid1,vid2,...\tscore1,score2,...
+ */
+@Slf4j
+@Component
+public class YearValidPlayDkElementsRecallStrategy implements RecallStrategy {
+
+    @Autowired
+    @Qualifier("redisTemplate")
+    private RedisTemplate<String, String> redisTemplate;
+
+    @Autowired
+    private FilterService filterService;
+
+    private final String CLASS_NAME = this.getClass().getSimpleName();
+
+    public static final String PUSH_FROM = "recall_user_year_valid_play_dk_elements";
+    public static final String redisKeyPrefix = "elements_rovn_recall";
+
+    /** 用户近期有效播放 vid 序列 key, 上游已只保留有效播放, 无需 type 过滤 */
+    public static final String VID_SEQ_KEY = "rp_vid";
+
+    /** 元素贡献分过滤阈值 (parse 时丢弃 c < 0.8 的 element, 噪声元素不进召回) */
+    public static final double MIN_CONTRIB_SCORE = 0.8;
+    /** 摊平后按时间序 distinct 取前 N 个 element 进 Redis 倒排查询 */
+    public static final int topN = 30;
+
+    @Override
+    public List<Video> recall(RecallParam param) {
+
+        List<Video> videosResult = new ArrayList<>();
+        try {
+
+            if (MapUtils.isEmpty(param.getUserNetworkSeqVideoInfoMap())) {
+                return videosResult;
+            }
+
+            List<Pair<Long, String>> userValidPlayVideoElement = this.parseUserValidPlayVideoAndElements(param.getUserNetworkSeqFeature(), param.getUserNetworkSeqVideoInfoMap());
+            if (CollectionUtils.isEmpty(userValidPlayVideoElement)) {
+                return videosResult;
+            }
+            // 按用户近期 有效播放 行为时间序遍历, distinct 取前 topN 个高贡献分 element
+            // (贡献分过滤已在 parseUserValidPlayVideoAndElements 内 c >= MIN_CONTRIB_SCORE 完成)
+            List<String> allElements = userValidPlayVideoElement.stream()
+                    .map(Pair::getValue)
+                    .filter(StringUtils::isNotBlank)
+                    .distinct()
+                    .limit(topN)
+                    .collect(Collectors.toList());
+
+            List<String> keys = this.getRedisKey(allElements);
+            List<String> values = redisTemplate.opsForValue().multiGet(keys);
+
+            // 保留 Redis 倒排的真实 rovn 分 (而非位置分): scoresMap 的 score 会写到 Video.rovScore,
+            // 粗排截断 coarseMap miss 的 vid 会 fallback 用 Video.rovScore 排序, 真实分更有信号.
+            Map<Long, Double> scoresMap = recall(param.getVideoId(), values);
+            List<Long> ids = scoresMap.entrySet().stream()
+                    .sorted(Comparator.comparingDouble((Map.Entry<Long, Double> e) -> e.getValue()).reversed())
+                    .map(Map.Entry::getKey)
+                    .collect(Collectors.toList());
+
+            FilterParam filterParam = FilterParamFactory.create(param, ids, pushFrom(), scoresMap);
+            FilterResult filterResult = filterService.filter(filterParam);
+            if (filterResult != null && CollectionUtils.isNotEmpty(filterResult.getVideoIds())) {
+                for (Long vid : filterResult.getVideoIds()) {
+                    Video video = new Video();
+                    video.setVideoId(vid);
+                    video.setRovScore(scoresMap.getOrDefault(vid, 0.0));
+                    video.setPushFrom(pushFrom());
+                    videosResult.add(video);
+                }
+            }
+        } catch (Exception e) {
+            log.error("recall is wrong in {}, error={}", CLASS_NAME, e);
+        }
+
+        return videosResult;
+    }
+
+    /**
+     * 摊平: 每个有效播放 vid 一般有多个 dk_element, 输出 (vid, element) pair 序列, 按 vid 时间序保留.
+     * rp_vid 序列上游已是"有效播放过滤后"的 vid 列表, 不再做 type 过滤.
+     */
+    private List<Pair<Long, String>> parseUserValidPlayVideoAndElements(Map<String, String> userNetworkSeqFeature, Map<Long, Map<String, String>> userNetworkSeqVideoInfoMap) {
+        List<Pair<Long, String>> result = new ArrayList<>();
+        List<String> rpVidSeq = FeatureUtils.extractVidsFromUserNetworkSeqFeature(userNetworkSeqFeature, VID_SEQ_KEY);
+        if (CollectionUtils.isEmpty(rpVidSeq)) {
+            return result;
+        }
+
+        for (String vidStr : rpVidSeq) {
+            long videoIdL = NumberUtils.toLong(vidStr, -1);
+            if (videoIdL <= 0) {
+                continue;
+            }
+
+            Map<String, String> videoBaseInfo = userNetworkSeqVideoInfoMap.getOrDefault(videoIdL, new HashMap<>());
+            String dkElementsStr = videoBaseInfo.get("dk_elements");
+            if (StringUtils.isBlank(dkElementsStr)) {
+                continue;
+            }
+            List<String> kws = DkElementsUtils.parseElementKws(dkElementsStr, MIN_CONTRIB_SCORE);
+            for (String kw : kws) {
+                result.add(Pair.of(videoIdL, kw));
+            }
+        }
+        return result;
+    }
+
+    private List<String> getRedisKey(List<String> elementList) {
+        List<String> keys = new ArrayList<>();
+        for (String element : elementList) {
+            keys.add(String.format("%s:%s", redisKeyPrefix, element));
+        }
+        return keys;
+    }
+
+    /**
+     * 解析 multiGet 拿到的 N 个 Redis value, 拼成 vid -> 真实 score map.
+     * value 格式: vid1,vid2,...\tscore1,score2,...  (rovn 真实分)
+     * 同 vid 在多个 element 倒排里出现时, 取 max score (跟 AbstractRedisRecallStrategy 一致).
+     */
+    private Map<Long, Double> recall(Long headVid, List<String> values) {
+        Map<Long, Double> scoresMap = new HashMap<>();
+        if (CollectionUtils.isEmpty(values)) {
+            return scoresMap;
+        }
+        for (String value : values) {
+            if (StringUtils.isBlank(value)) {
+                continue;
+            }
+            String[] cells = value.split("\t");
+            if (cells.length != 2) {
+                continue;
+            }
+            List<Long> ids;
+            List<Double> scores;
+            try {
+                ids = Arrays.stream(cells[0].split(",")).map(Long::valueOf).collect(Collectors.toList());
+                scores = Arrays.stream(cells[1].split(",")).map(Double::valueOf).collect(Collectors.toList());
+            } catch (NumberFormatException nfe) {
+                continue;
+            }
+            if (ids.isEmpty() || ids.size() != scores.size()) {
+                continue;
+            }
+            for (int i = 0; i < ids.size(); i++) {
+                long id = ids.get(i);
+                if (headVid != null && headVid == id) {
+                    continue;
+                }
+                scoresMap.merge(id, scores.get(i), Math::max);
+            }
+        }
+        return scoresMap;
+    }
+
+    @Override
+    public String pushFrom() {
+        return PUSH_FROM;
+    }
+}