Kaynağa Gözat

ros-添加时间特征&特征平滑&扩大召回&vor值域压缩

jch 2 ay önce
ebeveyn
işleme
9518d9172c

+ 19 - 49
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelV564.java

@@ -39,7 +39,6 @@ public class RankStrategy4RegionMergeModelV564 extends RankStrategy4RegionMergeM
     private static final List<String> cfRovList = Collections.singletonList("rovn");
     private static final List<String> videoSimAttrs = Arrays.asList("cate1_list", "cate2", "cate2_list",
             "keywords", "style", "theme", "title", "topic", "user_value");
-    private static final double smoothPlus = 5.0;
 
     @Override
     public List<Video> mergeAndRankRovRecall(RankParam param) {
@@ -120,9 +119,6 @@ public class RankStrategy4RegionMergeModelV564 extends RankStrategy4RegionMergeM
         Map<String, String> c8 = featureOriginUser.getOrDefault("alg_mid_feature_sharecf", new HashMap<>());
         Map<String, String> c9 = featureOriginUser.getOrDefault("alg_mid_feature_returncf", new HashMap<>());
 
-        // time feature
-        addTimeFeature(userFeatureMapDouble);
-
         if (!c1.isEmpty()) {
             userFeatureMapDouble.put("playcnt_6h", Double.parseDouble(c1.getOrDefault("playcnt_6h", "0")));
             userFeatureMapDouble.put("playcnt_1d", Double.parseDouble(c1.getOrDefault("playcnt_1d", "0")));
@@ -219,7 +215,6 @@ public class RankStrategy4RegionMergeModelV564 extends RankStrategy4RegionMergeM
                     double share = tuple4.second.isEmpty() ? 0 : Double.parseDouble(tuple4.second.getOrDefault("share_pv_" + prefix2, "0.0"));
                     double returns = tuple4.third.isEmpty() ? 0 : Double.parseDouble(tuple4.third.getOrDefault("return_uv_" + prefix2, "0.0"));
 
-                    double f0 = ExtractorUtils.calLog(exp);
                     double f1 = ExtractorUtils.calDiv(share, exp);
                     double f2 = ExtractorUtils.calLog(share);
                     double f3 = ExtractorUtils.calDiv(returns, exp);
@@ -227,7 +222,6 @@ public class RankStrategy4RegionMergeModelV564 extends RankStrategy4RegionMergeM
                     double f5 = f3 * f4;
                     double f6 = ExtractorUtils.calDiv(returns, share);
 
-                    String key0 = tuple4.name + "_" + prefix2 + "_" + "EXP";
                     String key1 = tuple4.name + "_" + prefix2 + "_" + "STR";
                     String key2 = tuple4.name + "_" + prefix2 + "_" + "log(share)";
                     String key3 = tuple4.name + "_" + prefix2 + "_" + "ROV";
@@ -235,7 +229,6 @@ public class RankStrategy4RegionMergeModelV564 extends RankStrategy4RegionMergeM
                     String key5 = tuple4.name + "_" + prefix2 + "_" + "ROV*log(return)";
                     String key6 = tuple4.name + "_" + prefix2 + "_" + "ROS";
 
-                    featureMap.put(key0, f0);
                     featureMap.put(key1, f1);
                     featureMap.put(key2, f2);
                     featureMap.put(key3, f3);
@@ -310,7 +303,7 @@ public class RankStrategy4RegionMergeModelV564 extends RankStrategy4RegionMergeM
         // 3 连续值特征分桶
         readBucketFile();
         Map<String, String> userFeatureMap = new HashMap<>(userFeatureMapDouble.size());
-        Map<String, String> norUserFeatureMap = FeatureBucketUtils.noBucketFeature(userFeatureMapDouble);
+        Map<String, String> norUserFeatureMap = FeatureBucketUtils.bucketFeature("20241209_nor_bucket.txt", userFeatureMapDouble);
         for (Map.Entry<String, Double> entry : userFeatureMapDouble.entrySet()) {
             String name = entry.getKey();
             Double score = entry.getValue();
@@ -338,7 +331,7 @@ public class RankStrategy4RegionMergeModelV564 extends RankStrategy4RegionMergeM
                 }
             }
             item.featureMap = featureMap;
-            item.norFeatureMap = FeatureBucketUtils.noBucketFeature(featureMapDouble);
+            item.norFeatureMap = FeatureBucketUtils.bucketFeature("20241209_nor_bucket.txt", featureMapDouble);
         }
         // 4 排序模型计算
         double fmRovLogBase = mergeWeight.getOrDefault("fmRovLogBase", 3.5);
@@ -347,8 +340,10 @@ public class RankStrategy4RegionMergeModelV564 extends RankStrategy4RegionMergeM
         double xgbNorWeight = mergeWeight.getOrDefault("xgbNorWeight", 1.8968);
         double xgbNorPowerWeight = mergeWeight.getOrDefault("xgbNorPowerWeight", 1.2216);
         double xgbNorPowerExp = mergeWeight.getOrDefault("xgbNorPowerExp", 1.3217);
+        double vorLogWeight = mergeWeight.getOrDefault("vorLogWeight", 10.0);
+        double vorLogBase = mergeWeight.getOrDefault("vorLogBase", 2.6);
         Map<String, String> sceneFeatureMap = new HashMap<>(0);
-        List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_fm_xgb_20250208.conf").scoring(sceneFeatureMap, userFeatureMap, norUserFeatureMap, rankItems);
+        List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_fm_xgb_20241209.conf").scoring(sceneFeatureMap, userFeatureMap, norUserFeatureMap, rankItems);
         // 5 排序公式特征
         Map<String, Map<String, String>> vid2MapFeature = this.getVideoRedisFeature(vids, "redis:vid_hasreturn_vor:");
         List<Video> result = new ArrayList<>();
@@ -358,14 +353,15 @@ public class RankStrategy4RegionMergeModelV564 extends RankStrategy4RegionMergeM
             item.getScoresMap().put("fmRovOrigin", fmRovOrigin);
             double fmRov = restoreScore(fmRovOrigin);
             item.getScoresMap().put("fmRov", fmRov);
-            double newFmRov = log(1 + fmRov, fmRovLogBase);
+            double newFmRov = log(1.0, 1 + fmRov, fmRovLogBase);
             double hasReturnRovScore = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>()).getOrDefault("rov", "0"));
             item.getScoresMap().put("hasReturnRovScore", hasReturnRovScore);
             double norXGBScore = item.getScoresMap().getOrDefault("NorXGBScore", 0d);
             double newNorXGBScore = norCalibration(xgbNorScaleType, xgbNorBias, xgbNorWeight, xgbNorPowerWeight, xgbNorPowerExp, norXGBScore);
             double vor = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>()).getOrDefault("vor", "0"));
+            double newVor = log(vorLogWeight, 1.0 + vor / 10.0, vorLogBase);
             item.getScoresMap().put("vor", vor);
-            score = newFmRov * (0.1 + newNorXGBScore) * (0.1 + vor);
+            score = newFmRov * (0.1 + newNorXGBScore) * (0.1 + newVor);
             Video video = item.getVideo();
             video.setScore(score);
             video.setSortScore(score);
@@ -423,19 +419,17 @@ public class RankStrategy4RegionMergeModelV564 extends RankStrategy4RegionMergeM
             List<String> infoPeriod = tuple3.second;
             Map<String, String> infoMap = tuple3.third;
             for (String period : infoPeriod) {
-                double view = getVideoOneInfo("view_" + period, infoMap);
                 double share = getVideoOneInfo("share_" + period, infoMap);
                 double return_ = getVideoOneInfo("return_" + period, infoMap);
                 double view_hasreturn = getVideoOneInfo("view_hasreturn_" + period, infoMap);
                 double share_hasreturn = getVideoOneInfo("share_hasreturn_" + period, infoMap);
-                double ros = ExtractorUtils.smoothDiv(return_, share, smoothPlus);
-                double rov = ExtractorUtils.smoothDiv(return_, view, smoothPlus);
-                double r_cnt = ExtractorUtils.smoothDiv(return_, view_hasreturn, smoothPlus);
-                double r_rate = ExtractorUtils.smoothDiv(view_hasreturn, view, smoothPlus);
-                double r_cnt4s = ExtractorUtils.smoothDiv(return_, share_hasreturn, smoothPlus);
-                double str = ExtractorUtils.smoothDiv(share, view, smoothPlus);
-
-                featureMap.put(infoType + "_" + period + "_" + "view", ExtractorUtils.calLog(view));
+                double ros = getVideoOneInfo("ros_" + period, infoMap);
+                double rov = getVideoOneInfo("rov_" + period, infoMap);
+                double r_cnt = getVideoOneInfo("r_cnt_" + period, infoMap);
+                double r_rate = getVideoOneInfo("r_rate_" + period, infoMap);
+                double r_cnt4s = getVideoOneInfo("r_cnt4s_" + period, infoMap);
+                double str = getVideoOneInfo("str_" + period, infoMap);
+
                 featureMap.put(infoType + "_" + period + "_" + "share", ExtractorUtils.calLog(share));
                 featureMap.put(infoType + "_" + period + "_" + "return", ExtractorUtils.calLog(return_));
                 featureMap.put(infoType + "_" + period + "_" + "view_hasreturn", ExtractorUtils.calLog(view_hasreturn));
@@ -502,32 +496,6 @@ public class RankStrategy4RegionMergeModelV564 extends RankStrategy4RegionMergeM
         }
     }
 
-    private void addTimeFeature(Map<String, Double> featureMap) {
-        Calendar calendar = Calendar.getInstance();
-        calendar.setTimeInMillis(System.currentTimeMillis());
-
-        int week = calendar.get(Calendar.DAY_OF_WEEK);
-        int hour = calendar.get(Calendar.HOUR_OF_DAY) + 1;
-        featureMap.put("week", week * 1.0);
-        featureMap.put("hour", hour * 1.0);
-    }
-
-    private double rovCalibration(double bias, double weight, double squareWeight, double cubeWeight, double score) {
-        double newScore = bias + weight * score;
-        if (Math.abs(squareWeight) > 1E-8) {
-            newScore += squareWeight * Math.pow(score, 2);
-        }
-        if (Math.abs(cubeWeight) > 1E-8) {
-            newScore += cubeWeight * Math.pow(score, 3);
-        }
-        if (newScore < 1E-8) {
-            newScore = score;
-        } else if (newScore > 0.9) {
-            newScore = 0.9;
-        }
-        return newScore;
-    }
-
     private double norCalibration(double scaleType, double polyBias, double polyWeight, double powerWeight, double powerExp, double score) {
         if (scaleType < 1) {
             return norPolyCalibration(polyBias, polyWeight, score);
@@ -548,13 +516,15 @@ public class RankStrategy4RegionMergeModelV564 extends RankStrategy4RegionMergeM
         double newScore = weight * Math.pow(score, exp);
         if (newScore > 100) {
             newScore = 100;
+        } else if (newScore < score) {
+            newScore = score;
         }
         return newScore;
     }
 
-    private double log(double x, double base) {
+    private double log(double weight, double x, double base) {
         if (base > 1) {
-            return Math.log(x) / Math.log(base);
+            return weight * Math.log(x) / Math.log(base);
         } else {
             return x;
         }

+ 11 - 6
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelV567.java

@@ -56,7 +56,7 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
         oldRovs.addAll(extractAndSort(param, RegionRelative24HRecallStrategy.PUSH_FORM));
         oldRovs.addAll(extractAndSort(param, RegionRelative24HDupRecallStrategy.PUSH_FORM));
         removeDuplicate(oldRovs);
-        int sizeReturn = param.getSize();
+        int sizeReturn = param.getSize() + 2;
         List<Video> v0 = oldRovs.size() <= sizeReturn
                 ? oldRovs
                 : oldRovs.subList(0, sizeReturn);
@@ -73,7 +73,7 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
         //-------------------新地域召回------------------
         List<Video> v1 = extractAndSort(param, RegionRealtimeRecallStrategyV1.PUSH_FORM);
         v1 = v1.stream().filter(r -> !setVideo.contains(r.getVideoId())).collect(Collectors.toList());
-        v1 = v1.subList(0, Math.min(mergeWeight.getOrDefault("v1", 5.0).intValue(), v1.size()));
+        v1 = v1.subList(0, Math.min(mergeWeight.getOrDefault("v1", 10.0).intValue(), v1.size()));
         rovRecallRank.addAll(v1);
         setVideo.addAll(v1.stream().map(Video::getVideoId).collect(Collectors.toSet()));
         //-------------------scene cf rovn------------------
@@ -347,6 +347,8 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
         double xgbNorWeight = mergeWeight.getOrDefault("xgbNorWeight", 1.8968);
         double xgbNorPowerWeight = mergeWeight.getOrDefault("xgbNorPowerWeight", 1.2216);
         double xgbNorPowerExp = mergeWeight.getOrDefault("xgbNorPowerExp", 1.3217);
+        double vorLogWeight = mergeWeight.getOrDefault("vorLogWeight", 10.0);
+        double vorLogBase = mergeWeight.getOrDefault("vorLogBase", 2.6);
         Map<String, String> sceneFeatureMap = new HashMap<>(0);
         List<RankItem> items = ScorerUtils.getScorerPipeline("feeds_score_config_fm_xgb_20250208.conf").scoring(sceneFeatureMap, userFeatureMap, norUserFeatureMap, rankItems);
         // 5 排序公式特征
@@ -358,14 +360,15 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
             item.getScoresMap().put("fmRovOrigin", fmRovOrigin);
             double fmRov = restoreScore(fmRovOrigin);
             item.getScoresMap().put("fmRov", fmRov);
-            double newFmRov = log(1 + fmRov, fmRovLogBase);
+            double newFmRov = log(1.0, 1 + fmRov, fmRovLogBase);
             double hasReturnRovScore = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>()).getOrDefault("rov", "0"));
             item.getScoresMap().put("hasReturnRovScore", hasReturnRovScore);
             double norXGBScore = item.getScoresMap().getOrDefault("NorXGBScore", 0d);
             double newNorXGBScore = norCalibration(xgbNorScaleType, xgbNorBias, xgbNorWeight, xgbNorPowerWeight, xgbNorPowerExp, norXGBScore);
             double vor = Double.parseDouble(vid2MapFeature.getOrDefault(item.getVideoId() + "", new HashMap<>()).getOrDefault("vor", "0"));
+            double newVor = log(vorLogWeight, 1.0 + vor / 10.0, vorLogBase);
             item.getScoresMap().put("vor", vor);
-            score = newFmRov * (0.1 + newNorXGBScore) * (0.1 + vor);
+            score = newFmRov * (0.1 + newNorXGBScore) * (0.1 + newVor);
             Video video = item.getVideo();
             video.setScore(score);
             video.setSortScore(score);
@@ -532,13 +535,15 @@ public class RankStrategy4RegionMergeModelV567 extends RankStrategy4RegionMergeM
         double newScore = weight * Math.pow(score, exp);
         if (newScore > 100) {
             newScore = 100;
+        } else if (newScore < score) {
+            newScore = score;
         }
         return newScore;
     }
 
-    private double log(double x, double base) {
+    private double log(double weight, double x, double base) {
         if (base > 1) {
-            return Math.log(x) / Math.log(base);
+            return weight * Math.log(x) / Math.log(base);
         } else {
             return x;
         }

+ 2 - 2
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/score/NorXGBRegressionScorer.java

@@ -147,8 +147,8 @@ public class NorXGBRegressionScorer extends XGBRegressionModelScorer {
         }
         if (pro < 1E-8) {
             pro = 0;
-        } else if (pro > 50) {
-            pro = 50;
+        } else if (pro > 80) {
+            pro = 80;
         }
         item.getScoresMap().put("NorXGBScore", pro);
         return pro;

+ 1 - 1
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/score/ScorerPipeline.java

@@ -201,7 +201,7 @@ public class ScorerPipeline {
             if (!scorer.isEnable()) {
                 continue;
             }
-            if (scorer.scorerConfigInfo.getModelPath().equals("zhangbo/model_xgb_for_recsys_nor.tar.gz")) {
+            if (scorer.scorerConfigInfo.getModelPath().contains("_nor.tar.gz")) {
                 userFeatMap = norUserFeatureMap;
             }
 

+ 2 - 2
recommend-server-service/src/main/resources/feeds_score_config_fm_xgb_20250208.conf

@@ -7,9 +7,9 @@ scorer-config = {
   nor-score-config = {
     scorer-name = "com.tzld.piaoquan.recommend.server.service.score.NorXGBRegressionScorer"
     scorer-priority = 97
-    model-path = "zhangbo/model_xgb_for_recsys_nor_v2.tar.gz"
+    model-path = "zhangbo/model_xgb_for_recsys_v2_nor.tar.gz"
     param = {
-      localDir = "xgboost/recsys_nor_v2"
+      localDir = "xgboost/recsys_v2_nor"
       features = [
       "b111213_1d_ROS",
       "b111213_1d_ROV*log(return)",