Browse Source

feat:修改565实验

zhaohaipeng 1 month ago
parent
commit
5a062830a3

+ 108 - 14
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelV565.java

@@ -183,13 +183,22 @@ public class RankStrategy4RegionMergeModelV565 extends RankStrategy4RegionMergeM
             Map<String, Double> featureMapDouble = item.featureMapDouble;
             item.featureMap = FeatureBucketUtils.bucketFeatureV2("20250218_bucket_322.txt", featureMapDouble);
         }
-        // 4 排序模型计算
-        double xgbRovNegRate = mergeWeight.getOrDefault("xgbRovNegRate", 0.05);
-        double calcVorMode = mergeWeight.getOrDefault("calc_vor_mode", 1d);
 
+
+        // 4.1 排序模型计算 - str
         Map<String, String> sceneFeatureMap = new HashMap<>(0);
         List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_xgb_str_20250228.conf").scoring(sceneFeatureMap, userFeatureMap, rankItems);
+
+        // 4.2 排序模型计算 - ros
+        this.addRosScore(rankItems, userFeatureMapDouble, sceneFeatureMap);
+
         // 5 排序公式特征
+        double xgbRovNegRate = mergeWeight.getOrDefault("xgbRovNegRate", 0.05);
+        double calcVorMode = mergeWeight.getOrDefault("calcVorMode", 1d);
+        double calcRosMode = mergeWeight.getOrDefault("calcRosMode", 1d);
+        double rosAdd = mergeWeight.getOrDefault("ros_add", 0.00001);
+        double vorAdd = mergeWeight.getOrDefault("vor_add", 0.1);
+
         Map<String, Map<String, String>> vid2MapFeature = this.getVideoRedisFeature(vids, "redis:vid_hasreturn_vor_4share:");
         List<Video> result = new ArrayList<>();
         for (RankItem item : items) {
@@ -202,26 +211,34 @@ public class RankStrategy4RegionMergeModelV565 extends RankStrategy4RegionMergeM
 
 
             Map<String, String> vidFeatureMap = vid2MapFeature.getOrDefault(String.valueOf(item.getVideoId()), new HashMap<>());
-            double ros24h = Double.parseDouble(vidFeatureMap.getOrDefault("ros_24h", "0"));
+
             double vor24h = Double.parseDouble(vidFeatureMap.getOrDefault("vor_24h", "0"));
-            if (calcVorMode == 1d) {
-                vor24h = ExtractorUtils.calLog(vor24h);
-            } else if (calcVorMode == 2d) {
-                double vorCoefficient = mergeWeight.getOrDefault("vor_coefficient", 1d);
-                vor24h = vorCoefficient * vor24h;
-            }
+            double vor = this.handleVor(vor24h, calcVorMode, item, mergeWeight);
 
-            item.getScoresMap().put("hasReturnRovScore", ros24h);
-            item.getScoresMap().put("vor", vor24h);
+            item.getScoresMap().put("originVor", vor24h);
+            item.getScoresMap().put("vor", vor);
             item.getScoresMap().put("calcVorMode", calcVorMode);
-            score = fmRov * (0.1 + ros24h) * (0.1 + vor24h);
 
+            double originScoreRos = item.getScoreRos();
+            double ros = this.handleRos(originScoreRos, calcRosMode, item, mergeWeight);
+
+            item.getScoresMap().put("RosXGBScore", originScoreRos);
+            item.getScoresMap().put("hasReturnRovScore", ros);
+            item.getScoresMap().put("calcRosMode", calcRosMode);
+
+            item.getScoresMap().put("rosAdd", rosAdd);
+            item.getScoresMap().put("vorAdd", vorAdd);
+            score = fmRov * (rosAdd + ros) * (vorAdd + vor);
 
             Video video = item.getVideo();
             video.setScore(score);
             video.setSortScore(score);
             video.setScoresMap(item.getScoresMap());
-            video.setAllFeatureMap(item.getAllFeatureMap());
+
+            Map<String, String> allFeatureMap = new HashMap<>();
+            item.getFeatureMapDouble().forEach((key, value) -> allFeatureMap.put(key, String.valueOf(value)));
+            video.setAllFeatureMap(allFeatureMap);
+
             if (MapUtils.isNotEmpty(feature.getVideoFeature()) && MapUtils.isNotEmpty(feature.getVideoFeature().get(item.getVideoId() + ""))) {
                 video.getMetaFeatureMap().putAll(feature.getVideoFeature().get(item.getVideoId() + ""));
             }
@@ -234,6 +251,10 @@ public class RankStrategy4RegionMergeModelV565 extends RankStrategy4RegionMergeM
             if (MapUtils.isNotEmpty(feature.getUserFeature())) {
                 video.getMetaFeatureMap().putAll(feature.getUserFeature());
             }
+
+            // 将空的特征表过滤
+            video.getMetaFeatureMap().entrySet().removeIf(e -> MapUtils.isEmpty(e.getValue()));
+
             result.add(video);
         }
         result.sort(Comparator.comparingDouble(o -> -o.getSortScore()));
@@ -243,4 +264,77 @@ public class RankStrategy4RegionMergeModelV565 extends RankStrategy4RegionMergeM
         return result;
     }
 
+    private double handleRos(double originScoreRos, double calcRosMode, RankItem item, Map<String, Double> mergeWeight) {
+        if (originScoreRos == 0) {
+            return 0;
+        }
+
+        double scoreRos = ExtractorUtils.inverseLog(originScoreRos);
+        if (calcRosMode == 1d) {
+            double rosPower = mergeWeight.getOrDefault("le_ros_power", 5d);
+            if (scoreRos > 1) {
+                rosPower = mergeWeight.getOrDefault("gt_1_ros_poewr", 1.5d);
+            }
+            item.getScoresMap().put("rosPower", rosPower);
+            scoreRos = Math.pow(scoreRos, rosPower);
+        } else if (calcRosMode == 2d) {
+            double modelRosCoefficient = mergeWeight.getOrDefault("model_ros_coefficient", 8d);
+            item.getScoresMap().put("modelRosCoefficient", modelRosCoefficient);
+            scoreRos = ExtractorUtils.inverseLog(originScoreRos * modelRosCoefficient);
+        } else if (calcRosMode == 3d) {
+            double rosPower = mergeWeight.getOrDefault("ros_power", 5d);
+            item.getScoresMap().put("rosPower", rosPower);
+            scoreRos = Math.pow(scoreRos, rosPower);
+        }
+
+        return scoreRos;
+    }
+
+    private double handleVor(double originVor, double calcVorMode, RankItem item, Map<String, Double> mergeWeight) {
+        if (originVor == 0) {
+            return 0;
+        }
+        double vor = originVor;
+        if (calcVorMode == 1d) {
+            vor = ExtractorUtils.calLog(originVor);
+        } else if (calcVorMode == 2d) {
+            double vorCoefficient = mergeWeight.getOrDefault("vor_coefficient", 1d);
+            item.getScoresMap().put("vorCoefficient", vorCoefficient);
+            vor = vorCoefficient * originVor;
+        } else if (calcVorMode == 3d) {
+            double vorPower = mergeWeight.getOrDefault("vor_power", 1d);
+            item.getScoresMap().put("vorPower", vorPower);
+            vor = Math.pow(originVor, vorPower);
+        }
+
+        return vor;
+    }
+
+    /**
+     * ros模型打分
+     */
+    private void addRosScore(List<RankItem> rankItems, Map<String, Double> userFeatureMapDouble, Map<String, String> sceneFeatureMap) {
+        List<RankItem> rosRankItems = new ArrayList<>(rankItems.size());
+        for (RankItem rankItem : rankItems) {
+            RankItem rosRankItem = new RankItem(rankItem.getVideo());
+            rosRankItem.rosFeatureMap = FeatureBucketUtils.bucketFeatureV2("20250306_ros_bucket_229.txt", rankItem.featureMapDouble);
+            rosRankItems.add(rosRankItem);
+        }
+        Map<String, String> userFeatureMap = FeatureBucketUtils.bucketFeatureV2("20250306_ros_bucket_229.txt", userFeatureMapDouble);
+        ScorerUtils.getScorerPipeline("feeds_score_config_xgb_ros_binary_20250319.conf").scoring(sceneFeatureMap, userFeatureMap, rosRankItems);
+
+        // 将ros分数补充到之前的列表中
+        Map<Long, RankItem> vidRosMap = new HashMap<>(rosRankItems.size());
+        for (RankItem rosRankItem : rosRankItems) {
+            vidRosMap.put(rosRankItem.getVideoId(), rosRankItem);
+        }
+
+        for (RankItem rankItem : rankItems) {
+            if (vidRosMap.containsKey(rankItem.getVideoId())) {
+                RankItem rosRankItem = vidRosMap.get(rankItem.getVideoId());
+                rankItem.setScoreRos(rosRankItem.getScoreRos());
+                rankItem.getScoresMap().put("RosXGBScore", rosRankItem.getScoreRos());
+            }
+        }
+    }
 }

+ 1 - 0
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/score/ScorerUtils.java

@@ -41,6 +41,7 @@ public final class ScorerUtils {
         ScorerUtils.init("feeds_score_config_xgb_rov_20250109.conf");
         ScorerUtils.init("feeds_score_config_xgb_str_20250228.conf");
         ScorerUtils.init("feeds_score_config_xgb_ros_20250311.conf");
+        ScorerUtils.init("feeds_score_config_xgb_ros_binary_20250319.conf");
         ScorerUtils.init4Recall("feeds_recall_config_region_v1.conf");
         ScorerUtils.init4Recall("feeds_recall_config_region_ros.conf");
         ScorerUtils.init4Recall("feeds_score_config_bless.conf");

+ 244 - 0
recommend-server-service/src/main/resources/feeds_score_config_xgb_ros_binary_20250319.conf

@@ -0,0 +1,244 @@
+scorer-config = {
+  rov-score-config = {
+    scorer-name = "com.tzld.piaoquan.recommend.server.service.score.XGBoostScorer"
+    scorer-priority = 99
+    model-path = "zhangbo/model_xgb_for_ros_binary_v1.tar.gz"
+    param = {
+      localDir = "xgboost/model_xgb_for_ros_binary_v1"
+      features = [
+            "b1_return_1_uv_24h"
+            "b1_return_1_uv_3h"
+            "b1_ros_24h"
+            "b1_ros_3h"
+            "b1_ros_minus_24h"
+            "b1_ros_minus_3h"
+            "b1_ros_one_24h"
+            "b1_ros_one_3h"
+            "b1_rovn_24h"
+            "b1_rovn_3h"
+            "b1_str_one_24h"
+            "b1_str_one_3h"
+            "b2_return_n_uv_24h"
+            "b2_return_n_uv_3h"
+            "b2_ros_24h"
+            "b2_ros_3h"
+            "b2_ros_minus_24h"
+            "b2_ros_minus_3h"
+            "b2_ros_one_24h"
+            "b2_ros_one_3h"
+            "b2_rovn_24h"
+            "b2_rovn_3h"
+            "b2_str_one_24h"
+            "b2_str_one_3h"
+            "b3_return_n_uv_168h"
+            "b3_return_n_uv_24h"
+            "b3_ros_168h"
+            "b3_ros_24h"
+            "b3_ros_minus_168h"
+            "b3_ros_minus_24h"
+            "b3_ros_one_168h"
+            "b3_ros_one_24h"
+            "b3_rovn_168h"
+            "b3_rovn_24h"
+            "b3_str_one_168h"
+            "b3_str_one_24h"
+            "b5_return_n_uv_24h"
+            "b5_ros_24h"
+            "b5_ros_minus_24h"
+            "b5_ros_one_24h"
+            "b5_rovn_24h"
+            "b5_str_one_24h"
+            "b6_return_n_uv_24h"
+            "b6_ros_24h"
+            "b6_ros_minus_24h"
+            "b6_ros_one_24h"
+            "b6_rovn_24h"
+            "b6_str_one_24h"
+            "b7_return_n_uv_24h"
+            "b7_ros_24h"
+            "b7_ros_minus_24h"
+            "b7_ros_one_24h"
+            "b7_rovn_24h"
+            "b7_str_one_24h"
+            "b8_return_n_uv_24h"
+            "b8_ros_24h"
+            "b8_ros_minus_24h"
+            "b8_ros_one_24h"
+            "b8_rovn_24h"
+            "b8_str_one_24h"
+            "b9_return_n_uv_24h"
+            "b9_ros_24h"
+            "b9_ros_minus_24h"
+            "b9_ros_one_24h"
+            "b9_rovn_24h"
+            "b9_str_one_24h"
+            "b11_return_n_uv_24h"
+            "b11_ros_24h"
+            "b11_ros_minus_24h"
+            "b11_ros_one_24h"
+            "b11_rovn_24h"
+            "b11_str_one_24h"
+            "b12_return_n_uv_14d"
+            "b12_ros_14d"
+            "b12_ros_minus_14d"
+            "b12_ros_one_14d"
+            "b12_rovn_14d"
+            "b12_str_one_14d"
+            "b13_return_n_uv_24h"
+            "b13_return_n_uv_3h"
+            "b13_ros_24h"
+            "b13_ros_3h"
+            "b13_ros_minus_24h"
+            "b13_ros_minus_3h"
+            "b13_ros_one_24h"
+            "b13_ros_one_3h"
+            "b13_rovn_24h"
+            "b13_rovn_3h"
+            "b13_str_one_24h"
+            "b13_str_one_3h"
+            "c1_click_12h"
+            "c1_click_168h"
+            "c1_click_24h"
+            "c1_click_72h"
+            "c1_return_1_uv_12h"
+            "c1_return_1_uv_168h"
+            "c1_return_1_uv_24h"
+            "c1_return_1_uv_72h"
+            "c1_ros_12h"
+            "c1_ros_168h"
+            "c1_ros_24h"
+            "c1_ros_72h"
+            "c1_ros_minus_12h"
+            "c1_ros_minus_168h"
+            "c1_ros_minus_24h"
+            "c1_ros_minus_72h"
+            "c1_ros_one_12h"
+            "c1_ros_one_168h"
+            "c1_ros_one_24h"
+            "c1_ros_one_72h"
+            "c1_rovn_12h"
+            "c1_rovn_168h"
+            "c1_rovn_24h"
+            "c1_rovn_72h"
+            "c1_str_one_12h"
+            "c1_str_one_168h"
+            "c1_str_one_24h"
+            "c1_str_one_72h"
+            "c2_click_12h"
+            "c2_click_168h"
+            "c2_click_24h"
+            "c2_click_72h"
+            "c2_is_return_1_12h"
+            "c2_is_return_1_168h"
+            "c2_is_return_1_24h"
+            "c2_is_return_1_72h"
+            "c2_is_share_12h"
+            "c2_is_share_168h"
+            "c2_is_share_24h"
+            "c2_is_share_72h"
+            "c2_return_n_uv_12h"
+            "c2_return_n_uv_168h"
+            "c2_return_n_uv_24h"
+            "c2_return_n_uv_72h"
+            "c2_share_cnt_12h"
+            "c2_share_cnt_168h"
+            "c2_share_cnt_24h"
+            "c2_share_cnt_72h"
+            "c3_click_12h"
+            "c3_click_168h"
+            "c3_click_24h"
+            "c3_click_72h"
+            "c3_is_return_1_12h"
+            "c3_is_return_1_168h"
+            "c3_is_return_1_24h"
+            "c3_is_return_1_72h"
+            "c3_is_share_12h"
+            "c3_is_share_168h"
+            "c3_is_share_24h"
+            "c3_is_share_72h"
+            "c3_return_n_uv_12h"
+            "c3_return_n_uv_168h"
+            "c3_return_n_uv_24h"
+            "c3_return_n_uv_72h"
+            "c3_share_cnt_12h"
+            "c3_share_cnt_168h"
+            "c3_share_cnt_24h"
+            "c3_share_cnt_72h"
+            "c4_avg_ros_168h"
+            "c4_avg_ros_24h"
+            "c4_avg_ros_72h"
+            "c4_avg_ros_minus_168h"
+            "c4_avg_ros_minus_24h"
+            "c4_avg_ros_minus_72h"
+            "c4_avg_ros_one_168h"
+            "c4_avg_ros_one_24h"
+            "c4_avg_ros_one_72h"
+            "c4_avg_rovn_168h"
+            "c4_avg_rovn_24h"
+            "c4_avg_rovn_72h"
+            "c4_avg_str_one_168h"
+            "c4_avg_str_one_24h"
+            "c4_avg_str_one_72h"
+            "c4_diff_ros_168h"
+            "c4_diff_ros_24h"
+            "c4_diff_ros_72h"
+            "c4_diff_ros_minus_168h"
+            "c4_diff_ros_minus_24h"
+            "c4_diff_ros_minus_72h"
+            "c4_diff_ros_one_168h"
+            "c4_diff_ros_one_24h"
+            "c4_diff_ros_one_72h"
+            "c4_diff_rovn_168h"
+            "c4_diff_rovn_24h"
+            "c4_diff_rovn_72h"
+            "c4_diff_str_one_168h"
+            "c4_diff_str_one_24h"
+            "c4_diff_str_one_72h"
+            "c5_avgscore_tags_1d"
+            "c5_avgscore_tags_3d"
+            "c5_avgscore_tags_7d"
+            "c5_matchnum_tags_1d"
+            "c5_matchnum_tags_3d"
+            "c5_matchnum_tags_7d"
+            "c5_maxscore_tags_1d"
+            "c5_maxscore_tags_3d"
+            "c5_maxscore_tags_7d"
+            "c6_avgscore_tags_1d"
+            "c6_avgscore_tags_3d"
+            "c6_avgscore_tags_7d"
+            "c6_matchnum_tags_1d"
+            "c6_matchnum_tags_3d"
+            "c6_matchnum_tags_7d"
+            "c6_maxscore_tags_1d"
+            "c6_maxscore_tags_3d"
+            "c6_maxscore_tags_7d"
+            "d1_ros_cf_rank"
+            "d1_ros_cf_score"
+            "d1_rov_cf_rank"
+            "d1_rov_cf_score"
+            "d2_rank"
+            "d2_score"
+            "d3_exp"
+            "d3_return_n"
+            "d3_rovn"
+            "festive_sim"
+            "head_title_festive_sim"
+            "head_title_merge1_sim"
+            "head_title_merge2_sim"
+            "merge1_sim"
+            "merge2_sim"
+            "title_sim"
+            "day_of_week"
+            "hour"
+            "create_ts_diff"
+            "is_greeting"
+            "total_time"
+            "width"
+            "height"
+            "width/height"
+            "size"
+            "bit_rate"
+      ]
+    }
+  }
+}