Prechádzať zdrojové kódy

Update RankStrategyBy683: sync 680 and change model to model_xgb_351_1000_30d_v1

StrayWarrior 9 mesiacov pred
rodič
commit
93eb4db4b5

+ 3 - 4
ad-engine-server/src/main/resources/ad_score_config_xgboost_683.conf

@@ -1,8 +1,7 @@
 scorer-config = {
-  lr-rov-score-config = {
+  xgb-score-config = {
     scorer-name = "com.tzld.piaoquan.ad.engine.service.score.scorer.XGBoostScorer683"
     scorer-priority = 99
-    model-path = "zhangbo/model_xgb_351_1000.tar.gz"
+    model-path = "fengzhoutian/model_xgb_351_1000_30d_v1.tar.gz"
   }
-
-}
+}

+ 94 - 48
ad-engine-service/src/main/java/com/tzld/piaoquan/ad/engine/service/score/strategy/RankStrategyBy683.java

@@ -1,5 +1,6 @@
 package com.tzld.piaoquan.ad.engine.service.score.strategy;
 
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
 import com.tzld.piaoquan.ad.engine.commons.score.ScoreParam;
 import com.tzld.piaoquan.ad.engine.commons.score.ScorerUtils;
 import com.tzld.piaoquan.ad.engine.commons.thread.ThreadPoolFactory;
@@ -9,8 +10,10 @@ import com.tzld.piaoquan.ad.engine.commons.dto.AdPlatformCreativeDTO;
 import com.tzld.piaoquan.ad.engine.commons.param.RankRecommendRequestParam;
 import com.tzld.piaoquan.recommend.feature.domain.ad.base.AdRankItem;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
 import org.apache.commons.collections4.MapUtils;
 import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Component;
 import org.xm.Similarity;
 
@@ -33,12 +36,31 @@ public class RankStrategyBy683 extends RankStrategyBasic {
 
     private Map<String, Double> bucketsLen = new HashMap<>();
 
+    @Value("${word2vec.exp:694}")
+    private String word2vecExp;
+
+    // FIXME(zhoutian): 可能需要独立配置
+    @ApolloJsonValue("${rank.score.weight.680:{}}")
+    private Map<String, Double> weightMap;
+
+    final static double negSampleRate = 0.01;
+
     @Override
     public List<AdRankItem> adItemRank(RankRecommendRequestParam request, ScoreParam scoreParam) {
+
+        Map<String, Double> weightParam = ObjUtil.nullOrDefault(weightMap, new HashMap<>());
+
+
+        Map<Long, Double> creativeScoreCoefficient = getCreativeScoreCoefficient();
         Set<String> noApiAdVerIds = getNoApiAdVerIds();
 
         long ts = System.currentTimeMillis() / 1000;
 
+        String brand = scoreParam.getRequestContext().getMachineinfoBrand();
+        if (StringUtils.isNotEmpty(brand)) {
+            scoreParam.getRequestContext().setMachineinfoBrand(brand + "-n");
+        }
+
         long start = System.currentTimeMillis();
         // 特征处理
         // feature1
@@ -67,7 +89,7 @@ public class RankStrategyBy683 extends RankStrategyBasic {
         Map<String, String> sceneFeatureMap = this.handleSceneFeature(ts);
         long time1 = System.currentTimeMillis();
 
-        List<AdRankItem> adRankItems = new ArrayList<>(request.getAdIdList().size());
+        List<AdRankItem> adRankItems = new ArrayList<>();
         Random random = new Random();
         List<Future<AdRankItem>> futures = new ArrayList<>();
         CountDownLatch cdl1 = new CountDownLatch(request.getAdIdList().size());
@@ -89,6 +111,7 @@ public class RankStrategyBy683 extends RankStrategyBasic {
                     } else {
                         adRankItem.getExt().put("isApi", "1");
                     }
+
                     adRankItem.getExt().put("recallsources", dto.getRecallSources());
 
                     String cidStr = dto.getCreativeId().toString();
@@ -138,27 +161,25 @@ public class RankStrategyBy683 extends RankStrategyBasic {
             String title = b1Feature.getOrDefault("cidtitle", "");
             ThreadPoolFactory.defaultPool().submit(() -> {
                 try {
-                    this.handleE1AndE2Feature(e1Feature, e2Feature, title, item.getFeatureMap());
+                    this.handleE1AndE2Feature(e1Feature, e2Feature, title, item.getFeatureMap(), scoreParam);
                 } finally {
                     cdl2.countDown();
                 }
             });
             ThreadPoolFactory.defaultPool().submit(() -> {
                 try {
-                    this.handleD3AndB1Feature(d3Feature, title, item.getFeatureMap());
+                    this.handleD3AndB1Feature(d3Feature, title, item.getFeatureMap(), scoreParam);
                 } finally {
                     cdl2.countDown();
                 }
             });
         }
-        long time31 = System.currentTimeMillis();
         try {
             cdl2.await(150, TimeUnit.MILLISECONDS);
         } catch (Exception e) {
             log.error("handleE1AndE2Feature and handleD3AndB1Feature wait timeout", e);
         }
 
-        // feature4
         long time3 = System.currentTimeMillis();
         // 分桶
         this.readBucketFile();
@@ -184,52 +205,73 @@ public class RankStrategyBy683 extends RankStrategyBasic {
         // getScorerPipeline
         List<AdRankItem> result = ScorerUtils.getScorerPipeline(ScorerUtils.XGBOOST_SCORE_CONF_683).scoring(sceneFeatureMap, userFeatureMap, adRankItems);
         long time5 = System.currentTimeMillis();
+
+        // calibrate score for negative sampling
+        for (AdRankItem item : result) {
+            double originalScore = item.getLrScore();
+            double calibratedScore = originalScore / (originalScore + (1 - originalScore) / negSampleRate);
+            item.setLrScore(calibratedScore);
+            item.getScoreMap().put("originCtcvrScore", originalScore);
+            item.getScoreMap().put("ctcvrScore", calibratedScore);
+        }
+
         // loop
+        double cpmCoefficient = weightParam.getOrDefault("cpmCoefficient", 0.9);
+
         for (AdRankItem item : result) {
-            item.setScore(item.getLrScore() * item.getCpa());
+
+            double scoreCoefficient = creativeScoreCoefficient.getOrDefault(item.getAdId(), 1d);
+            item.setScore(item.getLrScore() * scoreCoefficient * item.getCpa());
+
             item.getScoreMap().put("cpa", item.getCpa());
             item.getScoreMap().put("cpm", item.getCpm());
+            item.getScoreMap().put("cpmCoefficient", cpmCoefficient);
+            item.getScoreMap().put("scoreCoefficient", scoreCoefficient);
             item.getFeatureMap().putAll(userFeatureMap);
             item.getFeatureMap().putAll(sceneFeatureMap);
 
             // 没有转化回传的广告主,使用后台配置的CPM
             if (noApiAdVerIds.contains(item.getAdVerId())) {
-                item.setScore(item.getCpm() / 1000);
+                item.setScore(item.getCpm() * cpmCoefficient / 1000);
             }
+        }
+
+
+        result.sort(ComparatorUtil.equalsRandomComparator());
 
+        if (CollectionUtils.isNotEmpty(result)) {
+            AdRankItem top1Item = result.get(0);
             for (Map.Entry<String, Map<String, String>> entry : videoFeature.entrySet()) {
                 if (MapUtils.isNotEmpty(entry.getValue())) {
-                    item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
+                    top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
                 }
             }
 
             for (Map.Entry<String, Map<String, String>> entry : userFeature.entrySet()) {
                 if (MapUtils.isNotEmpty(entry.getValue())) {
-                    item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
+                    top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
                 }
             }
 
-            Map<String, Map<String, String>> adVerFeature = allAdVerFeature.getOrDefault(item.getAdVerId(), new HashMap<>());
+            Map<String, Map<String, String>> adVerFeature = allAdVerFeature.getOrDefault(top1Item.getAdVerId(), new HashMap<>());
             for (Map.Entry<String, Map<String, String>> entry : adVerFeature.entrySet()) {
                 if (MapUtils.isNotEmpty(entry.getValue())) {
-                    item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
+                    top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
                 }
             }
 
-            Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(String.valueOf(item.getAdId()), new HashMap<>());
+            Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(String.valueOf(top1Item.getAdId()), new HashMap<>());
             for (Map.Entry<String, Map<String, String>> entry : cidFeature.entrySet()) {
                 if (MapUtils.isNotEmpty(entry.getValue())) {
-                    item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
+                    top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
                 }
             }
         }
-
-        log.info("cost={}, feature1={}, feature2={}, feature31={}, feature32={}, feature4={}, getScorerPipeline={}, " +
-                        "adIdSize={}, adRankItemsSize={}",
-                time5 - start, time1 - start, time2 - time1, time31 - time2, time3 - time31, time4 - time3,
-                time5 - time4, request.getAdIdList().size(), adRankItems.size());
-
-        result.sort(ComparatorUtil.equalsRandomComparator());
+        long time6 = System.currentTimeMillis();
+        log.info("cost={}, getFeature={}, handleFeature={},  similar={}, bucketFeature={}, getScorerPipeline={}, " +
+                        "other={}, adIdSize={}, adRankItemsSize={}",
+                time6 - start, time1 - start, time2 - time1, time3 - time2, time4 - time3,
+                time5 - time4, time6 - time5, request.getAdIdList().size(), adRankItems.size());
 
         return result;
     }
@@ -278,11 +320,14 @@ public class RankStrategyBy683 extends RankStrategyBasic {
                 double view = Double.parseDouble(feature.getOrDefault("ad_view_" + time, "0"));
                 double click = Double.parseDouble(feature.getOrDefault("ad_click_" + time, "0"));
                 double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
+                double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
                 double f2 = NumUtil.div(conver, view);
+                double ecpm = NumUtil.div(income * 1000, view);
                 cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(NumUtil.div(click, view)));
                 cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(f2));
                 cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.div(conver, click)));
                 cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
+                cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(ecpm));
 
                 cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
                 cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
@@ -310,11 +355,12 @@ public class RankStrategyBy683 extends RankStrategyBasic {
                 double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
                 double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
                 double f2 = NumUtil.div(conver, view);
+                double ecpm = NumUtil.div(income * 1000, view);
                 cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(NumUtil.div(click, view)));
                 cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(f2));
                 cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.div(conver, click)));
                 cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
-                // cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(NumUtil.div(income * 1000, view)));
+                cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(ecpm));
 
                 cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
                 cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
@@ -352,7 +398,7 @@ public class RankStrategyBy683 extends RankStrategyBasic {
         featureMap.put("ctr_all", String.valueOf(NumUtil.div(clickAll, viewAll)));
         featureMap.put("ctcvr_all", String.valueOf(NumUtil.div(converAll, viewAll)));
         featureMap.put("cvr_all", String.valueOf(NumUtil.div(clickAll, converAll)));
-        // featureMap.put("ecpm_all", String.valueOf(NumUtil.div(incomeAll * 1000, viewAll)));
+        featureMap.put("ecpm_all", String.valueOf(NumUtil.div(incomeAll * 1000, viewAll)));
 
         return midActionList;
     }
@@ -387,17 +433,11 @@ public class RankStrategyBy683 extends RankStrategyBasic {
             featureMap.put("actionstatic_ctr", String.valueOf(ctr));
         }
         if (midActionStatic.containsKey("actionstatic_view_" + cid) && midActionStatic.containsKey("actionstatic_conver_" + cid)) {
-            double ctcvr = NumUtil.div(
-                    midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0),
-                    midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0)
-            );
+            double ctcvr = NumUtil.div(midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0), midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0));
             featureMap.put("actionstatic_ctcvr", String.valueOf(ctcvr));
         }
         if (midActionStatic.containsKey("actionstatic_conver_" + cid) && midActionStatic.containsKey("actionstatic_click_" + cid)) {
-            double cvr = NumUtil.div(
-                    midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0),
-                    midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0)
-            );
+            double cvr = NumUtil.div(midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0), midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0));
             featureMap.put("actionstatic_cvr", String.valueOf(cvr));
         }
     }
@@ -412,7 +452,7 @@ public class RankStrategyBy683 extends RankStrategyBasic {
             featureMap.put("d1_feature_" + prefix + "_ctcvr", String.valueOf(NumUtil.div(conver, view)));
             featureMap.put("d1_feature_" + prefix + "_cvr", String.valueOf(NumUtil.div(conver, click)));
             featureMap.put("d1_feature_" + prefix + "_conver", String.valueOf(conver));
-            // featureMap.put("d1_feature_" + prefix + "_ecpm", String.valueOf(NumUtil.div(income * 1000, view)));
+            featureMap.put("d1_feature_" + prefix + "_ecpm", String.valueOf(NumUtil.div(income * 1000, view)));
         }
     }
 
@@ -421,8 +461,8 @@ public class RankStrategyBy683 extends RankStrategyBasic {
             return;
         }
 
-        // List<String> prefixes1 = Arrays.asList("ctr", "ctcvr", "ecpm");
-        List<String> prefixes1 = Arrays.asList("ctr", "ctcvr");
+        List<String> prefixes1 = Arrays.asList("ctr", "ctcvr", "ecpm");
+        // List<String> prefixes1 = Arrays.asList("ctr", "ctcvr");
         List<String> prefixes2 = Arrays.asList("1d", "3d", "7d", "14d");
 
         for (String prefix1 : prefixes1) {
@@ -438,24 +478,28 @@ public class RankStrategyBy683 extends RankStrategyBasic {
         }
     }
 
-    private void handleD3AndB1Feature(Map<String, String> d3Feature, String cTitle, Map<String, String> featureMap) {
+    private void handleD3AndB1Feature(Map<String, String> d3Feature, String cTitle, Map<String, String> featureMap,
+                                      ScoreParam scoreParam) {
         if (MapUtils.isEmpty(d3Feature) || !d3Feature.containsKey("title") || StringUtils.isEmpty(cTitle)) {
             return;
         }
         String vTitle = d3Feature.get("title");
-        double score = Similarity.conceptSimilarity(cTitle, vTitle);
+        double score;
+        if (scoreParam.getExpCodeSet().contains(word2vecExp)) {
+            score = SimilarityUtils.word2VecSimilarity(cTitle, vTitle);
+        } else {
+            score = Similarity.conceptSimilarity(cTitle, vTitle);
+        }
         featureMap.put("ctitle_vtitle_similarity", String.valueOf(score));
     }
 
-    private void handleE1AndE2Feature(Map<String, String> e1Feature, Map<String, String> e2Feature, String title, Map<String, String> featureMap) {
+    private void handleE1AndE2Feature(Map<String, String> e1Feature, Map<String, String> e2Feature, String title,
+                                      Map<String, String> featureMap, ScoreParam scoreParam) {
         if (StringUtils.isEmpty(title)) {
             return;
         }
 
-        List<Tuple2<Map<String, String>, String>> tuple2List = Arrays.asList(
-                new Tuple2<>(e1Feature, "e1"),
-                new Tuple2<>(e2Feature, "e2")
-        );
+        List<Tuple2<Map<String, String>, String>> tuple2List = Arrays.asList(new Tuple2<>(e1Feature, "e1"), new Tuple2<>(e2Feature, "e2"));
 
         List<String> tagsFieldList = Arrays.asList("tags_3d", "tags_7d", "tags_14d");
         for (Tuple2<Map<String, String>, String> tuple2 : tuple2List) {
@@ -468,7 +512,13 @@ public class RankStrategyBy683 extends RankStrategyBasic {
             for (String tagsField : tagsFieldList) {
                 if (StringUtils.isNotEmpty(feature.get(tagsField))) {
                     String tags = feature.get(tagsField);
-                    Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                    // Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                    Double[] doubles;
+                    if (scoreParam.getExpCodeSet().contains(word2vecExp)) {
+                        doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
+                    } else {
+                        doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                    }
                     featureMap.put(prefix + "_" + tagsField + "_matchnum", String.valueOf(doubles[0]));
                     featureMap.put(prefix + "_" + tagsField + "_maxscore", String.valueOf(doubles[1]));
                     featureMap.put(prefix + "_" + tagsField + "_avgscore", String.valueOf(doubles[2]));
@@ -527,9 +577,7 @@ public class RankStrategyBy683 extends RankStrategyBasic {
         for (Map.Entry<String, String> entry : d2Feature.entrySet()) {
             String key = entry.getKey();
             String value = entry.getValue();
-            Map<String, Double> valueMap = Arrays.stream(value.split(","))
-                    .map(r -> r.split(":"))
-                    .collect(Collectors.toMap(rList -> rList[0], rList -> Double.parseDouble(rList[2])));
+            Map<String, Double> valueMap = Arrays.stream(value.split(",")).map(r -> r.split(":")).collect(Collectors.toMap(rList -> rList[0], rList -> Double.parseDouble(rList[2])));
             vidRankMaps.put(key, valueMap);
         }
         return vidRankMaps;
@@ -562,9 +610,7 @@ public class RankStrategyBy683 extends RankStrategyBasic {
                                 String key = rList[0];
                                 double value1 = Double.parseDouble(rList[1]);
                                 bucketsLen.put(key, value1);
-                                double[] value2 = Arrays.stream(rList[2].split(","))
-                                        .mapToDouble(Double::valueOf)
-                                        .toArray();
+                                double[] value2 = Arrays.stream(rList[2].split(",")).mapToDouble(Double::valueOf).toArray();
                                 bucketsMap.put(key, value2);
                             }
                         }
@@ -601,4 +647,4 @@ public class RankStrategyBy683 extends RankStrategyBasic {
         return newFeatureMap;
     }
 
-}
+}