Bladeren bron

Update PAIModelV1 and RankStrategyBy688

StrayWarrior 1 maand geleden
bovenliggende
commit
1533536d53

File diff suppressed because it is too large
+ 1 - 1
ad-engine-commons/src/main/java/com/tzld/piaoquan/ad/engine/commons/score/model/PAIModelV1.java


+ 148 - 44
ad-engine-service/src/main/java/com/tzld/piaoquan/ad/engine/service/score/strategy/RankStrategyBy688.java

@@ -1,5 +1,6 @@
 package com.tzld.piaoquan.ad.engine.service.score.strategy;
 
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
 import com.tzld.piaoquan.ad.engine.commons.score.ScoreParam;
 import com.tzld.piaoquan.ad.engine.commons.score.ScorerUtils;
 import com.tzld.piaoquan.ad.engine.commons.thread.ThreadPoolFactory;
@@ -9,12 +10,14 @@ import com.tzld.piaoquan.ad.engine.commons.dto.AdPlatformCreativeDTO;
 import com.tzld.piaoquan.ad.engine.commons.param.RankRecommendRequestParam;
 import com.tzld.piaoquan.recommend.feature.domain.ad.base.AdRankItem;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
 import org.apache.commons.collections4.MapUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Component;
 import org.xm.Similarity;
 
+import javax.annotation.PostConstruct;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
@@ -26,11 +29,12 @@ import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
+import static com.tzld.piaoquan.ad.engine.commons.math.Const.*;
+
 @Slf4j
 @Component
 public class RankStrategyBy688 extends RankStrategyBasic {
 
-
     private Map<String, double[]> bucketsMap = new HashMap<>();
 
     private Map<String, Double> bucketsLen = new HashMap<>();
@@ -38,9 +42,28 @@ public class RankStrategyBy688 extends RankStrategyBasic {
     @Value("${word2vec.exp:694}")
     private String word2vecExp;
 
+    // FIXME(zhoutian): 可能需要独立配置
+    @ApolloJsonValue("${rank.score.weight.680:{}}")
+    private Map<String, Double> weightMap;
+
+    @ApolloJsonValue("${rank.score.neg_sample_rate:0.01}")
+    Double negSampleRate;
+
+    Set<String> sparseFeatureSet;
+
+    @PostConstruct
+    public void afterInit() {
+        this.readBucketFile();
+        this.initSparseFeatureNames();
+    }
+
     @Override
     public List<AdRankItem> adItemRank(RankRecommendRequestParam request, ScoreParam scoreParam) {
 
+        Map<String, Double> weightParam = ObjUtil.nullOrDefault(weightMap, new HashMap<>());
+
+
+        Map<Long, Double> creativeScoreCoefficient = getCreativeScoreCoefficient();
         Set<String> noApiAdVerIds = getNoApiAdVerIds();
 
         long ts = System.currentTimeMillis() / 1000;
@@ -75,6 +98,18 @@ public class RankStrategyBy688 extends RankStrategyBasic {
         Map<String, String> e1Feature = userFeature.getOrDefault("alg_mid_feature_return_tags", new HashMap<>());
         Map<String, String> e2Feature = userFeature.getOrDefault("alg_mid_feature_share_tags", new HashMap<>());
 
+        userFeatureMap.put("brand", request.getMachineInfo().getBrand().toUpperCase());
+        userFeatureMap.put("region", request.getRegion());
+        userFeatureMap.put("city", request.getCity());
+        userFeatureMap.put("vid", String.valueOf(request.getVideoId()));
+        userFeatureMap.put("cate1", d3Feature.get("merge_first_level_cate"));
+        userFeatureMap.put("cate2", d3Feature.get("merge_second_level_cate"));
+        userFeatureMap.put("user_vid_return_tags_2h", e1Feature.getOrDefault("tags_2h", null));
+        userFeatureMap.put("user_vid_return_tags_1d", e1Feature.getOrDefault("tags_1d", null));
+        userFeatureMap.put("user_vid_return_tags_3d", e1Feature.getOrDefault("tags_3d", null));
+        userFeatureMap.put("user_vid_return_tags_7d", e1Feature.getOrDefault("tags_7d", null));
+        userFeatureMap.put("user_vid_return_tags_14d", e1Feature.getOrDefault("tags_14d", null));
+
         Map<String, String> sceneFeatureMap = this.handleSceneFeature(ts);
         long time1 = System.currentTimeMillis();
 
@@ -100,6 +135,7 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                     } else {
                         adRankItem.getExt().put("isApi", "1");
                     }
+
                     adRankItem.getExt().put("recallsources", dto.getRecallSources());
 
                     String cidStr = dto.getCreativeId().toString();
@@ -117,6 +153,10 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                     this.handleC1UIFeature(midTimeDiffMap, actionStaticMap, cidFeatureMap, cidStr);
                     this.handleD1Feature(d1Feature, cidFeatureMap);
                     this.handleD2Feature(vidRankMaps, cidFeatureMap, cidStr);
+
+                    cidFeatureMap.put("cid", dto.getCreativeId() != null ? String.valueOf(dto.getCreativeId()) : null);
+                    cidFeatureMap.put("adid", dto.getAdId() != null ? String.valueOf(dto.getAdId()) : null);
+                    cidFeatureMap.put("adverid", dto.getAdVerId());
                     return adRankItem;
                 } finally {
                     cdl1.countDown();
@@ -162,17 +202,14 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                 }
             });
         }
-        long time31 = System.currentTimeMillis();
         try {
             cdl2.await(150, TimeUnit.MILLISECONDS);
         } catch (Exception e) {
             log.error("handleE1AndE2Feature and handleD3AndB1Feature wait timeout", e);
         }
 
-        // feature4
         long time3 = System.currentTimeMillis();
         // 分桶
-        this.readBucketFile();
         userFeatureMap = this.featureBucket(userFeatureMap);
         CountDownLatch cdl4 = new CountDownLatch(adRankItems.size());
         for (AdRankItem adRankItem : adRankItems) {
@@ -193,54 +230,75 @@ public class RankStrategyBy688 extends RankStrategyBasic {
         long time4 = System.currentTimeMillis();
         // 打分排序
         // getScorerPipeline
-        List<AdRankItem> result = ScorerUtils.getScorerPipeline(ScorerUtils.XGBOOST_SCORE_CONF_20240909).scoring(sceneFeatureMap, userFeatureMap, adRankItems);
+        List<AdRankItem> result = ScorerUtils.getScorerPipeline(ScorerUtils.PAI_SCORE_CONF_20250214).scoring(sceneFeatureMap, userFeatureMap, adRankItems);
         long time5 = System.currentTimeMillis();
+
+        // calibrate score for negative sampling
+        for (AdRankItem item : result) {
+            double originalScore = item.getLrScore();
+            double calibratedScore = originalScore / (originalScore + (1 - originalScore) / negSampleRate);
+            item.setLrScore(calibratedScore);
+            item.getScoreMap().put("originCtcvrScore", originalScore);
+            item.getScoreMap().put("ctcvrScore", calibratedScore);
+        }
+
         // loop
+        double cpmCoefficient = weightParam.getOrDefault("cpmCoefficient", 0.9);
+
         for (AdRankItem item : result) {
-            item.setScore(item.getLrScore() * item.getCpa());
+
+            double scoreCoefficient = creativeScoreCoefficient.getOrDefault(item.getAdId(), 1d);
+            item.setScore(item.getLrScore() * scoreCoefficient * item.getCpa());
+
             item.getScoreMap().put("cpa", item.getCpa());
             item.getScoreMap().put("cpm", item.getCpm());
+            item.getScoreMap().put("cpmCoefficient", cpmCoefficient);
+            item.getScoreMap().put("scoreCoefficient", scoreCoefficient);
             item.getFeatureMap().putAll(userFeatureMap);
             item.getFeatureMap().putAll(sceneFeatureMap);
 
             // 没有转化回传的广告主,使用后台配置的CPM
             if (noApiAdVerIds.contains(item.getAdVerId())) {
-                item.setScore(item.getCpm() / 1000);
+                item.setScore(item.getCpm() * cpmCoefficient / 1000);
             }
+        }
+
+
+        result.sort(ComparatorUtil.equalsRandomComparator());
 
+        if (CollectionUtils.isNotEmpty(result)) {
+            AdRankItem top1Item = result.get(0);
             for (Map.Entry<String, Map<String, String>> entry : videoFeature.entrySet()) {
                 if (MapUtils.isNotEmpty(entry.getValue())) {
-                    item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
+                    top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
                 }
             }
 
             for (Map.Entry<String, Map<String, String>> entry : userFeature.entrySet()) {
                 if (MapUtils.isNotEmpty(entry.getValue())) {
-                    item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
+                    top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
                 }
             }
 
-            Map<String, Map<String, String>> adVerFeature = allAdVerFeature.getOrDefault(item.getAdVerId(), new HashMap<>());
+            Map<String, Map<String, String>> adVerFeature = allAdVerFeature.getOrDefault(top1Item.getAdVerId(), new HashMap<>());
             for (Map.Entry<String, Map<String, String>> entry : adVerFeature.entrySet()) {
                 if (MapUtils.isNotEmpty(entry.getValue())) {
-                    item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
+                    top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
                 }
             }
 
-            Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(String.valueOf(item.getAdId()), new HashMap<>());
+            Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(String.valueOf(top1Item.getAdId()), new HashMap<>());
             for (Map.Entry<String, Map<String, String>> entry : cidFeature.entrySet()) {
                 if (MapUtils.isNotEmpty(entry.getValue())) {
-                    item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
+                    top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
                 }
             }
         }
-
-        log.info("cost={}, feature1={}, feature2={}, feature31={}, feature32={}, feature4={}, getScorerPipeline={}, " +
-                        "adIdSize={}, adRankItemsSize={}",
-                time5 - start, time1 - start, time2 - time1, time31 - time2, time3 - time31, time4 - time3,
-                time5 - time4, request.getAdIdList().size(), adRankItems.size());
-
-        result.sort(ComparatorUtil.equalsRandomComparator());
+        long time6 = System.currentTimeMillis();
+        log.info("cost={}, getFeature={}, handleFeature={},  similar={}, bucketFeature={}, getScorerPipeline={}, " +
+                        "other={}, adIdSize={}, adRankItemsSize={}",
+                time6 - start, time1 - start, time2 - time1, time3 - time2, time4 - time3,
+                time5 - time4, time6 - time5, request.getAdIdList().size(), adRankItems.size());
 
         return result;
     }
@@ -289,15 +347,20 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                 double view = Double.parseDouble(feature.getOrDefault("ad_view_" + time, "0"));
                 double click = Double.parseDouble(feature.getOrDefault("ad_click_" + time, "0"));
                 double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
-                double f2 = NumUtil.div(conver, view);
-                cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(NumUtil.div(click, view)));
-                cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(f2));
-                cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.div(conver, click)));
+                double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
+                double cpc = NumUtil.div(income, click);
+                double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
+                double ctcvr = NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR);
+                double ecpm = ctr * cpc * 1000;
+                cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(ctr));
+                cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(ctcvr));
+                cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
                 cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
+                cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(ecpm));
 
                 cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
                 cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
-                cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * f2));
+                cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * ctcvr));
             }
         }
 
@@ -320,16 +383,19 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                 double click = Double.parseDouble(feature.getOrDefault("ad_click_" + time, "0"));
                 double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
                 double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
-                double f2 = NumUtil.div(conver, view);
-                cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(NumUtil.div(click, view)));
-                cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(f2));
-                cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.div(conver, click)));
+                double cpc = NumUtil.div(income, click);
+                double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
+                double ctcvr = NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR);
+                double ecpm = ctr * cpc * 1000;
+                cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(ctr));
+                cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(ctcvr));
+                cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
                 cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
-                // cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(NumUtil.div(income * 1000, view)));
+                cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(ecpm));
 
                 cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
                 cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
-                cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * f2));
+                cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * ctcvr));
             }
         }
 
@@ -363,8 +429,19 @@ public class RankStrategyBy688 extends RankStrategyBasic {
         featureMap.put("ctr_all", String.valueOf(NumUtil.div(clickAll, viewAll)));
         featureMap.put("ctcvr_all", String.valueOf(NumUtil.div(converAll, viewAll)));
         featureMap.put("cvr_all", String.valueOf(NumUtil.div(clickAll, converAll)));
-        // featureMap.put("ecpm_all", String.valueOf(NumUtil.div(incomeAll * 1000, viewAll)));
-
+        featureMap.put("ecpm_all", String.valueOf(NumUtil.div(incomeAll * 1000, viewAll)));
+        if (CollectionUtils.isNotEmpty(midActionList)) {
+            List<String> cidList = new ArrayList<>();
+            List<String> converList = new ArrayList<>();
+            for (TupleMapEntry<Tuple5> tupleMapEntry : midActionList) {
+                String cid = tupleMapEntry.key;
+                String conver = tupleMapEntry.value.f3;
+                cidList.add(cid);
+                converList.add(conver);
+            }
+            featureMap.put("user_cid_click_list", String.join(",", cidList));
+            featureMap.put("user_cid_conver_list", String.join(",", converList));
+        }
         return midActionList;
     }
 
@@ -413,11 +490,13 @@ public class RankStrategyBy688 extends RankStrategyBasic {
             double click = Double.parseDouble(d1Feature.getOrDefault("ad_click_" + prefix, "0"));
             double conver = Double.parseDouble(d1Feature.getOrDefault("ad_conversion_" + prefix, "0"));
             double income = Double.parseDouble(d1Feature.getOrDefault("ad_income_" + prefix, "0"));
-            featureMap.put("d1_feature_" + prefix + "_ctr", String.valueOf(NumUtil.div(click, view)));
-            featureMap.put("d1_feature_" + prefix + "_ctcvr", String.valueOf(NumUtil.div(conver, view)));
-            featureMap.put("d1_feature_" + prefix + "_cvr", String.valueOf(NumUtil.div(conver, click)));
+            double cpc = NumUtil.div(income, click);
+            double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
+            featureMap.put("d1_feature_" + prefix + "_ctr", String.valueOf(ctr));
+            featureMap.put("d1_feature_" + prefix + "_ctcvr", String.valueOf(NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR)));
+            featureMap.put("d1_feature_" + prefix + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
             featureMap.put("d1_feature_" + prefix + "_conver", String.valueOf(conver));
-            // featureMap.put("d1_feature_" + prefix + "_ecpm", String.valueOf(NumUtil.div(income * 1000, view)));
+            featureMap.put("d1_feature_" + prefix + "_ecpm", String.valueOf(ctr * cpc * 1000));
         }
     }
 
@@ -426,8 +505,8 @@ public class RankStrategyBy688 extends RankStrategyBasic {
             return;
         }
 
-        // List<String> prefixes1 = Arrays.asList("ctr", "ctcvr", "ecpm");
-        List<String> prefixes1 = Arrays.asList("ctr", "ctcvr");
+        List<String> prefixes1 = Arrays.asList("ctr", "ctcvr", "ecpm");
+        // List<String> prefixes1 = Arrays.asList("ctr", "ctcvr");
         List<String> prefixes2 = Arrays.asList("1d", "3d", "7d", "14d");
 
         for (String prefix1 : prefixes1) {
@@ -477,7 +556,7 @@ public class RankStrategyBy688 extends RankStrategyBasic {
             for (String tagsField : tagsFieldList) {
                 if (StringUtils.isNotEmpty(feature.get(tagsField))) {
                     String tags = feature.get(tagsField);
-                    //Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                    // Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
                     Double[] doubles;
                     if (scoreParam.getExpCodeSet().contains(word2vecExp)) {
                         doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
@@ -560,7 +639,8 @@ public class RankStrategyBy688 extends RankStrategyBasic {
             return;
         }
         synchronized (this) {
-            InputStream resourceStream = RankStrategyBy688.class.getClassLoader().getResourceAsStream("20240718_ad_bucket_688.txt");
+            String bucketFile = "20250217_ad_bucket_688.txt";
+            InputStream resourceStream = this.getClass().getClassLoader().getResourceAsStream(bucketFile);
             if (resourceStream != null) {
                 try (BufferedReader reader = new BufferedReader(new InputStreamReader(resourceStream))) {
                     Map<String, double[]> bucketsMap = new HashMap<>();
@@ -583,18 +663,44 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                     this.bucketsMap = bucketsMap;
                     this.bucketsLen = bucketsLen;
                 } catch (IOException e) {
-                    log.error("something is wrong in parse bucket file:", e);
+                    log.error("something is wrong in parse bucket file: ", e);
                 }
+                log.info("load bucket file success: {}", bucketFile);
             } else {
                 log.error("no bucket file");
             }
         }
     }
 
+    private void initSparseFeatureNames() {
+        this.sparseFeatureSet = new HashSet<String>() {{
+            add("brand");
+            add("region");
+            add("city");
+            add("vid");
+            add("cate1");
+            add("cate2");
+            add("cid");
+            add("adid");
+            add("adverid");
+            add("user_cid_click_list");
+            add("user_cid_conver_list");
+            add("user_vid_return_tags_2h");
+            add("user_vid_return_tags_1d");
+            add("user_vid_return_tags_3d");
+            add("user_vid_return_tags_7d");
+            add("user_vid_return_tags_14d");
+        }};
+    }
+
     private Map<String, String> featureBucket(Map<String, String> featureMap) {
         Map<String, String> newFeatureMap = new ConcurrentHashMap<>(featureMap.size());
         for (Map.Entry<String, String> entry : featureMap.entrySet()) {
             String name = entry.getKey();
+            if (this.sparseFeatureSet.contains(name)) {
+                newFeatureMap.put(name, entry.getValue());
+                continue;
+            }
             double score = Double.parseDouble(entry.getValue());
             // 注意:0值、不在分桶文件中的特征,会被过滤掉。
             if (score > 1E-8) {
@@ -608,8 +714,6 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                 }
             }
         }
-
         return newFeatureMap;
     }
-
 }

Some files were not shown because too many files changed in this diff