فهرست منبع

Merge branch 'master' into dev-xym-add-PAI

xueyiming 8 ماه پیش
والد
کامیت
5d36ed3b51

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 4 - 0
ad-engine-server/src/main/resources/20250217_ad_bucket_688.txt


+ 2 - 2
ad-engine-server/src/main/resources/ad_score_config_xgboost_683.conf

@@ -2,6 +2,6 @@ scorer-config = {
   xgb-score-config = {
     scorer-name = "com.tzld.piaoquan.ad.engine.service.score.scorer.XGBoostScorer683"
     scorer-priority = 99
-    model-path = "zhangbo/model_xgb_351_1000_v2.tar.gz"
+    model-path = "fengzhoutian/model_xgb_351_1000_14d_v1.tar.gz"
   }
-}
+}

+ 50 - 20
ad-engine-service/src/main/java/com/tzld/piaoquan/ad/engine/service/score/strategy/RankStrategyBy680.java

@@ -17,6 +17,7 @@ import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Component;
 import org.xm.Similarity;
 
+import javax.annotation.PostConstruct;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
@@ -28,6 +29,8 @@ import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
+import static com.tzld.piaoquan.ad.engine.commons.math.Const.*;
+
 @Slf4j
 @Component
 public class RankStrategyBy680 extends RankStrategyBasic {
@@ -42,6 +45,14 @@ public class RankStrategyBy680 extends RankStrategyBasic {
     @ApolloJsonValue("${rank.score.weight.680:{}}")
     private Map<String, Double> weightMap;
 
+    @ApolloJsonValue("${rank.score.neg_sample_rate:0.01}")
+    Double negSampleRate;
+
+    @PostConstruct
+    public void afterInit() {
+        this.readBucketFile();
+    }
+
     @Override
     public List<AdRankItem> adItemRank(RankRecommendRequestParam request, ScoreParam scoreParam) {
 
@@ -179,7 +190,6 @@ public class RankStrategyBy680 extends RankStrategyBasic {
 
         long time3 = System.currentTimeMillis();
         // 分桶
-        this.readBucketFile();
         userFeatureMap = this.featureBucket(userFeatureMap);
         CountDownLatch cdl4 = new CountDownLatch(adRankItems.size());
         for (AdRankItem adRankItem : adRankItems) {
@@ -202,6 +212,18 @@ public class RankStrategyBy680 extends RankStrategyBasic {
         // getScorerPipeline
         List<AdRankItem> result = ScorerUtils.getScorerPipeline(ScorerUtils.XGBOOST_SCORE_CONF_20240909).scoring(sceneFeatureMap, userFeatureMap, adRankItems);
         long time5 = System.currentTimeMillis();
+
+        // calibrate score for negative sampling
+        /* 02-11 update: 因模型换回基线无采样模型,取消校准
+        for (AdRankItem item : result) {
+            double originalScore = item.getLrScore();
+            double calibratedScore = originalScore / (originalScore + (1 - originalScore) / negSampleRate);
+            item.setLrScore(calibratedScore);
+            item.getScoreMap().put("originCtcvrScore", originalScore);
+            item.getScoreMap().put("ctcvrScore", calibratedScore);
+        }
+        */
+
         // loop
         double cpmCoefficient = weightParam.getOrDefault("cpmCoefficient", 0.9);
 
@@ -308,17 +330,19 @@ public class RankStrategyBy680 extends RankStrategyBasic {
                 double click = Double.parseDouble(feature.getOrDefault("ad_click_" + time, "0"));
                 double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
                 double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
-                double f2 = NumUtil.div(conver, view);
-                double ecpm = NumUtil.div(income * 1000, view);
-                cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(NumUtil.div(click, view)));
-                cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(f2));
-                cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.div(conver, click)));
+                double cpc = NumUtil.div(income, click);
+                double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
+                double ctcvr = NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR);
+                double ecpm = ctr * cpc * 1000;
+                cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(ctr));
+                cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(ctcvr));
+                cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
                 cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
                 cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(ecpm));
 
                 cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
                 cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
-                cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * f2));
+                cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * ctcvr));
             }
         }
 
@@ -341,17 +365,19 @@ public class RankStrategyBy680 extends RankStrategyBasic {
                 double click = Double.parseDouble(feature.getOrDefault("ad_click_" + time, "0"));
                 double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
                 double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
-                double f2 = NumUtil.div(conver, view);
-                double ecpm = NumUtil.div(income * 1000, view);
-                cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(NumUtil.div(click, view)));
-                cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(f2));
-                cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.div(conver, click)));
+                double cpc = NumUtil.div(income, click);
+                double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
+                double ctcvr = NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR);
+                double ecpm = ctr * cpc * 1000;
+                cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(ctr));
+                cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(ctcvr));
+                cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
                 cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
                 cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(ecpm));
 
                 cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
                 cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
-                cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * f2));
+                cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * ctcvr));
             }
         }
 
@@ -435,11 +461,13 @@ public class RankStrategyBy680 extends RankStrategyBasic {
             double click = Double.parseDouble(d1Feature.getOrDefault("ad_click_" + prefix, "0"));
             double conver = Double.parseDouble(d1Feature.getOrDefault("ad_conversion_" + prefix, "0"));
             double income = Double.parseDouble(d1Feature.getOrDefault("ad_income_" + prefix, "0"));
-            featureMap.put("d1_feature_" + prefix + "_ctr", String.valueOf(NumUtil.div(click, view)));
-            featureMap.put("d1_feature_" + prefix + "_ctcvr", String.valueOf(NumUtil.div(conver, view)));
-            featureMap.put("d1_feature_" + prefix + "_cvr", String.valueOf(NumUtil.div(conver, click)));
+            double cpc = NumUtil.div(income, click);
+            double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
+            featureMap.put("d1_feature_" + prefix + "_ctr", String.valueOf(ctr));
+            featureMap.put("d1_feature_" + prefix + "_ctcvr", String.valueOf(NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR)));
+            featureMap.put("d1_feature_" + prefix + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
             featureMap.put("d1_feature_" + prefix + "_conver", String.valueOf(conver));
-            featureMap.put("d1_feature_" + prefix + "_ecpm", String.valueOf(NumUtil.div(income * 1000, view)));
+            featureMap.put("d1_feature_" + prefix + "_ecpm", String.valueOf(ctr * cpc * 1000));
         }
     }
 
@@ -582,7 +610,8 @@ public class RankStrategyBy680 extends RankStrategyBasic {
             return;
         }
         synchronized (this) {
-            InputStream resourceStream = RankStrategyBy680.class.getClassLoader().getResourceAsStream("20240718_ad_bucket_688.txt");
+            String bucketFile = "20240718_ad_bucket_688.txt";
+            InputStream resourceStream = RankStrategyBy680.class.getClassLoader().getResourceAsStream(bucketFile);
             if (resourceStream != null) {
                 try (BufferedReader reader = new BufferedReader(new InputStreamReader(resourceStream))) {
                     Map<String, double[]> bucketsMap = new HashMap<>();
@@ -605,8 +634,9 @@ public class RankStrategyBy680 extends RankStrategyBasic {
                     this.bucketsMap = bucketsMap;
                     this.bucketsLen = bucketsLen;
                 } catch (IOException e) {
-                    log.error("something is wrong in parse bucket file:", e);
+                    log.error("something is wrong in parse bucket file: ", e);
                 }
+                log.info("load bucket file success: {}", bucketFile);
             } else {
                 log.error("no bucket file");
             }
@@ -634,4 +664,4 @@ public class RankStrategyBy680 extends RankStrategyBasic {
         return newFeatureMap;
     }
 
-}
+}

+ 2 - 4
ad-engine-service/src/main/java/com/tzld/piaoquan/ad/engine/service/score/strategy/RankStrategyBy683.java

@@ -215,7 +215,6 @@ public class RankStrategyBy683 extends RankStrategyBasic {
         long time5 = System.currentTimeMillis();
 
         // calibrate score for negative sampling
-        /* 02-11 update: 因模型换回基线无采样模型,取消校准
         for (AdRankItem item : result) {
             double originalScore = item.getLrScore();
             double calibratedScore = originalScore / (originalScore + (1 - originalScore) / negSampleRate);
@@ -223,7 +222,6 @@ public class RankStrategyBy683 extends RankStrategyBasic {
             item.getScoreMap().put("originCtcvrScore", originalScore);
             item.getScoreMap().put("ctcvrScore", calibratedScore);
         }
-        */
 
         // loop
         double cpmCoefficient = weightParam.getOrDefault("cpmCoefficient", 0.9);
@@ -611,7 +609,7 @@ public class RankStrategyBy683 extends RankStrategyBasic {
             return;
         }
         synchronized (this) {
-            String bucketFile = "20240718_ad_bucket_688.txt";
+            String bucketFile = "20250217_ad_bucket_688.txt";
             InputStream resourceStream = RankStrategyBy683.class.getClassLoader().getResourceAsStream(bucketFile);
             if (resourceStream != null) {
                 try (BufferedReader reader = new BufferedReader(new InputStreamReader(resourceStream))) {
@@ -665,4 +663,4 @@ public class RankStrategyBy683 extends RankStrategyBasic {
         return newFeatureMap;
     }
 
-}
+}

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است