Bläddra i källkod

Merge branch 'feature/20250312-pai-dnn-v11' of algorithm/ad-engine into master

fengzhoutian 1 månad sedan
förälder
incheckning
380b0dbea2

+ 2 - 0
ad-engine-commons/src/main/java/com/tzld/piaoquan/ad/engine/commons/score/ScorerUtils.java

@@ -29,6 +29,7 @@ public final class ScorerUtils {
     public static String XGBOOST_SCORE_CONF_683 = "ad_score_config_xgboost_683.conf";
     public static String XGBOOST_SCORE_CONF_20240909 = "ad_score_config_xgboost_20240909.conf";
     public static String XGBOOST_SCORE_CONF_20241105 = "ad_score_config_xgboost_20241105.conf";
+    public static String PAI_SCORE_CONF_20250214 = "ad_score_config_pai_20250214.conf";
 
     public static void warmUp() {
         log.info("scorer warm up ");
@@ -39,6 +40,7 @@ public final class ScorerUtils {
         ScorerUtils.init(XGBOOST_SCORE_CONF);
         ScorerUtils.init(XGBOOST_SCORE_CONF_20240909);
         ScorerUtils.init(XGBOOST_SCORE_CONF_20241105);
+        ScorerUtils.init(PAI_SCORE_CONF_20250214);
     }
 
     private ScorerUtils() {

+ 176 - 0
ad-engine-commons/src/main/java/com/tzld/piaoquan/ad/engine/commons/score/model/PAIModelV1.java

@@ -0,0 +1,176 @@
+package com.tzld.piaoquan.ad.engine.commons.score.model;
+
+import com.aliyun.openservices.eas.predict.http.HttpConfig;
+import com.aliyun.openservices.eas.predict.http.PredictClient;
+import com.aliyun.openservices.eas.predict.request.TFDataType;
+import com.aliyun.openservices.eas.predict.request.TFRequest;
+import com.aliyun.openservices.eas.predict.response.TFResponse;
+import com.tzld.piaoquan.recommend.feature.domain.ad.base.AdRankItem;
+import org.apache.commons.collections4.MapUtils;
+import org.apache.commons.lang.math.NumberUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.util.CollectionUtils;
+
+import java.util.*;
+
+
+public class PAIModelV1 {
+
+    private static final Logger LOGGER = LoggerFactory.getLogger(PAIModelV1.class);
+
+    private PAIModelV1() {
+    }
+
+    private static final PAIModelV1 model;
+
+    public static PAIModelV1 getModel() {
+        return model;
+    }
+
+    private static final PredictClient client;
+
+    static {
+        model = new PAIModelV1();
+        client = new PredictClient(new HttpConfig());
+        client.setEndpoint("1894469520484605.cn-hangzhou.pai-eas.aliyuncs.com");
+        client.setToken("ODI1MmUxODgzZDc3ODM0ZmQwZWU0YTVjZjdlOWVlMGFlZGJjNTlkYQ==");
+        client.setModelName("ad_rank_dnn_v11_easyrec");
+    }
+
+    private static final String[] sparseStrFeatures = {
+            "brand", "region", "city", "cate1", "cate2", "user_cid_click_list", "user_cid_conver_list", "user_vid_return_tags_2h", "user_vid_return_tags_1d", "user_vid_return_tags_3d", "user_vid_return_tags_7d", "user_vid_return_tags_14d"
+    };
+
+    private static final String[] sparseLongFeatures = {
+            "vid"
+    };
+
+    private static final String[] sparseAdLongFeatures = {
+            "cid", "adid", "adverid"
+    };
+
+    private final String[] userFeatures = {
+            "viewAll", "clickAll", "converAll", "incomeAll", "ctr_all", "ctcvr_all", "cvr_all", "ecpm_all"
+    };
+
+    private final String[] itemFeatures = {
+            "actionstatic_click", "actionstatic_ctcvr", "actionstatic_ctr", "actionstatic_view", "b2_12h_click", "b2_12h_conver",
+            "b2_12h_conver_x_ctcvr", "b2_12h_conver_x_log_view", "b2_12h_ctcvr", "b2_12h_ctr", "b2_12h_cvr", "b2_12h_ecpm",
+            "b2_1d_click", "b2_1d_conver", "b2_1d_conver_x_ctcvr", "b2_1d_conver_x_log_view", "b2_1d_ctcvr", "b2_1d_ctr",
+            "b2_1d_cvr", "b2_1d_ecpm", "b2_3d_click", "b2_3d_conver", "b2_3d_conver_x_ctcvr", "b2_3d_conver_x_log_view",
+            "b2_3d_ctcvr", "b2_3d_ctr", "b2_3d_cvr", "b2_3d_ecpm", "b2_3h_click", "b2_3h_conver", "b2_3h_conver_x_ctcvr",
+            "b2_3h_conver_x_log_view", "b2_3h_ctcvr", "b2_3h_ctr", "b2_3h_cvr", "b2_3h_ecpm", "b2_6h_click", "b2_6h_conver",
+            "b2_6h_conver_x_ctcvr", "b2_6h_conver_x_log_view", "b2_6h_ctcvr", "b2_6h_ctr", "b2_6h_cvr", "b2_6h_ecpm", "b2_7d_click",
+            "b2_7d_conver", "b2_7d_conver_x_ctcvr", "b2_7d_conver_x_log_view", "b2_7d_ctcvr", "b2_7d_ctr", "b2_7d_cvr",
+            "b2_7d_ecpm", "b3_12h_click", "b3_12h_conver", "b3_12h_conver_x_ctcvr", "b3_12h_ctcvr", "b3_12h_ctr", "b3_12h_cvr",
+            "b3_12h_ecpm", "b3_1d_click", "b3_1d_conver", "b3_1d_conver_x_ctcvr", "b3_1d_conver_x_log_view", "b3_1d_ctcvr",
+            "b3_1d_ctr", "b3_1d_cvr", "b3_1d_ecpm", "b3_3d_click", "b3_3d_conver", "b3_3d_conver_x_ctcvr",
+            "b3_3d_conver_x_log_view", "b3_3d_ctcvr", "b3_3d_ctr", "b3_3d_cvr", "b3_3d_ecpm", "b3_3h_click", "b3_3h_conver",
+            "b3_3h_conver_x_ctcvr", "b3_3h_ctcvr", "b3_3h_ctr", "b3_3h_cvr", "b3_3h_ecpm", "b3_6h_click", "b3_6h_conver_x_ctcvr",
+            "b3_6h_ctcvr", "b3_6h_ctr", "b3_6h_cvr", "b3_6h_ecpm", "b3_7d_click", "b3_7d_conver", "b3_7d_conver_x_ctcvr",
+            "b3_7d_conver_x_log_view", "b3_7d_ctcvr", "b3_7d_ctr", "b3_7d_cvr", "b3_7d_ecpm", "b4_12h_click",
+            "b4_12h_conver_x_ctcvr", "b4_12h_conver_x_log_view", "b4_12h_ctcvr", "b4_12h_ctr", "b4_12h_cvr", "b4_12h_ecpm",
+            "b4_1d_click", "b4_1d_conver_x_ctcvr", "b4_1d_conver_x_log_view", "b4_1d_ctcvr", "b4_1d_ctr", "b4_1d_cvr", "b4_1d_ecpm",
+            "b4_3d_click", "b4_3d_conver_x_ctcvr", "b4_3d_conver_x_log_view", "b4_3d_ctcvr", "b4_3d_ctr", "b4_3d_cvr", "b4_3d_ecpm",
+            "b4_3h_click", "b4_3h_conver_x_ctcvr", "b4_3h_conver_x_log_view", "b4_3h_ctcvr", "b4_3h_ctr", "b4_3h_cvr", "b4_3h_ecpm",
+            "b4_6h_click", "b4_6h_conver_x_ctcvr", "b4_6h_conver_x_log_view", "b4_6h_ctcvr", "b4_6h_ctr", "b4_6h_cvr", "b4_6h_ecpm",
+            "b4_7d_click", "b4_7d_conver", "b4_7d_conver_x_ctcvr", "b4_7d_conver_x_log_view", "b4_7d_ctcvr", "b4_7d_ctr",
+            "b4_7d_cvr", "b4_7d_ecpm", "b5_12h_click", "b5_12h_conver", "b5_12h_conver_x_ctcvr", "b5_12h_ctcvr", "b5_12h_ctr",
+            "b5_12h_cvr", "b5_12h_ecpm", "b5_1d_click", "b5_1d_conver", "b5_1d_conver_x_ctcvr", "b5_1d_conver_x_log_view",
+            "b5_1d_ctcvr", "b5_1d_ctr", "b5_1d_cvr", "b5_1d_ecpm", "b5_3d_click", "b5_3d_conver", "b5_3d_conver_x_ctcvr",
+            "b5_3d_conver_x_log_view", "b5_3d_ctcvr", "b5_3d_ctr", "b5_3d_cvr", "b5_3d_ecpm", "b5_3h_click", "b5_3h_conver_x_ctcvr",
+            "b5_3h_conver_x_log_view", "b5_3h_ctcvr", "b5_3h_ctr", "b5_3h_cvr", "b5_3h_ecpm", "b5_6h_click",
+            "b5_6h_conver_x_log_view", "b5_6h_ctcvr", "b5_6h_ctr", "b5_6h_cvr", "b5_6h_ecpm", "b5_7d_click", "b5_7d_conver",
+            "b5_7d_conver_x_ctcvr", "b5_7d_conver_x_log_view", "b5_7d_ctcvr", "b5_7d_ctr", "b5_7d_cvr", "b5_7d_ecpm", "b6_7d_click",
+            "b6_7d_conver", "b6_7d_conver_x_log_view", "b6_7d_ctcvr", "b6_7d_ctr", "b6_7d_cvr", "b6_7d_ecpm", "b7_14d_ctr",
+            "b7_7d_click", "b7_7d_conver_x_ctcvr", "b7_7d_conver_x_log_view", "b7_7d_ctcvr", "b7_7d_ctr", "b7_7d_cvr", "b7_7d_ecpm",
+            "b8_12h_click", "b8_12h_conver_x_ctcvr", "b8_12h_ctcvr", "b8_12h_ctr", "b8_12h_cvr", "b8_12h_ecpm", "b8_1d_click",
+            "b8_1d_conver", "b8_1d_conver_x_ctcvr", "b8_1d_conver_x_log_view", "b8_1d_ctcvr", "b8_1d_ctr", "b8_1d_cvr",
+            "b8_1d_ecpm", "b8_3d_click", "b8_3d_conver", "b8_3d_conver_x_ctcvr", "b8_3d_ctcvr", "b8_3d_ctr", "b8_3d_cvr",
+            "b8_3d_ecpm", "b8_3h_click", "b8_3h_conver_x_ctcvr", "b8_3h_ctcvr", "b8_3h_ctr", "b8_3h_cvr", "b8_3h_ecpm",
+            "b8_6h_click", "b8_6h_conver_x_ctcvr", "b8_6h_ctcvr", "b8_6h_ctr", "b8_6h_cvr", "b8_6h_ecpm", "b8_7d_click",
+            "b8_7d_conver_x_ctcvr", "b8_7d_conver_x_log_view", "b8_7d_ctcvr", "b8_7d_ctr", "b8_7d_cvr", "b8_7d_ecpm", "cpa",
+            "d1_feature_12h_conver", "d1_feature_12h_ctcvr", "d1_feature_12h_ctr", "d1_feature_12h_cvr", "d1_feature_12h_ecpm",
+            "d1_feature_1d_conver", "d1_feature_1d_ctcvr", "d1_feature_1d_ctr", "d1_feature_1d_cvr", "d1_feature_1d_ecpm",
+            "d1_feature_3d_conver", "d1_feature_3d_ctcvr", "d1_feature_3d_ctr", "d1_feature_3d_ecpm", "d1_feature_3h_conver",
+            "d1_feature_3h_ctcvr", "d1_feature_3h_ctr", "d1_feature_3h_cvr", "d1_feature_3h_ecpm", "d1_feature_6h_ctcvr",
+            "d1_feature_6h_ctr", "d1_feature_6h_ecpm", "d1_feature_7d_conver", "d1_feature_7d_ctcvr", "d1_feature_7d_ctr",
+            "d1_feature_7d_cvr", "d1_feature_7d_ecpm", "e1_tags_14d_avgscore", "e1_tags_14d_maxscore", "e1_tags_3d_avgscore",
+            "e1_tags_3d_maxscore", "e1_tags_7d_avgscore", "e1_tags_7d_maxscore", "e2_tags_14d_avgscore", "e2_tags_14d_maxscore",
+            "e2_tags_3d_avgscore", "e2_tags_3d_maxscore", "e2_tags_7d_avgscore", "e2_tags_7d_maxscore", "timediff_conver",
+            "timediff_view", "vid_rank_ctcvr_14d", "vid_rank_ctcvr_1d", "vid_rank_ctcvr_3d", "vid_rank_ctcvr_7d",
+            "vid_rank_ctr_14d", "vid_rank_ctr_1d", "vid_rank_ctr_3d", "vid_rank_ctr_7d", "vid_rank_ecpm_14d", "vid_rank_ecpm_1d",
+            "vid_rank_ecpm_3d", "vid_rank_ecpm_7d"
+    };
+
+
+    public List<Float> score(final List<AdRankItem> items,
+                             final Map<String, String> userFeatureMap,
+                             final Map<String, String> sceneFeatureMap) {
+        try {
+            TFRequest request = new TFRequest();
+
+            for (String feature : sparseStrFeatures) {
+                String key = feature.replace("_x_", "*").replace("_view", "(view)");
+                String v = userFeatureMap.getOrDefault(key, "");
+                request.addFeed(feature, TFDataType.DT_STRING, new long[]{1}, new String[]{v});
+            }
+
+            for (String feature : sparseLongFeatures) {
+                String key = feature.replace("_x_", "*").replace("_view", "(view)");
+                long v = NumberUtils.toLong(userFeatureMap.getOrDefault(key, "0"), 0);
+                request.addFeed(feature, TFDataType.DT_INT64, new long[]{1}, new long[]{v});
+            }
+
+
+            for (String feature : userFeatures) {
+                String key = feature.replace("_x_", "*").replace("_view", "(view)");
+                double v = NumberUtils.toDouble(userFeatureMap.getOrDefault(key, "0.0"), 0.0);
+                request.addFeed(feature.toLowerCase(), TFDataType.DT_DOUBLE, new long[]{1}, new double[]{v});
+            }
+            Map<String, double[]> doubleFeed = new HashMap<>();
+            Map<String, long[]> longFeed = new HashMap<>();
+            for (int i = 0; i < items.size(); i++) {
+                for (String feature : itemFeatures) {
+                    String key = feature.replace("_x_", "*").replace("_view", "(view)");
+                    double[] doubles = doubleFeed.computeIfAbsent(feature, k -> new double[items.size()]);
+                    if (MapUtils.isEmpty(items.get(i).getFeatureMap())) {
+                        doubles[i] = 0.0;
+                        continue;
+                    }
+                    double v = NumberUtils.toDouble(items.get(i).getFeatureMap().getOrDefault(key, "0.0"), 0.0);
+                    doubles[i] = v;
+                }
+
+                for (String feature : sparseAdLongFeatures) {
+                    String key = feature.replace("_x_", "*").replace("_view", "(view)");
+                    long[] longs = longFeed.computeIfAbsent(feature, k -> new long[items.size()]);
+                    if (MapUtils.isEmpty(items.get(i).getFeatureMap())) {
+                        longs[i] = 0L;
+                        continue;
+                    }
+                    long v = NumberUtils.toLong(items.get(i).getFeatureMap().getOrDefault(key, "0"), 0L);
+                    longs[i] = v;
+                }
+            }
+            for (Map.Entry<String, double[]> entry : doubleFeed.entrySet()) {
+                request.addFeed(entry.getKey(), TFDataType.DT_DOUBLE, new long[]{items.size()}, entry.getValue());
+            }
+
+            for (Map.Entry<String, long[]> entry : longFeed.entrySet()) {
+                request.addFeed(entry.getKey(), TFDataType.DT_INT64, new long[]{items.size()}, entry.getValue());
+            }
+            request.addFetch("probs");
+            TFResponse response = client.predict(request);
+            List<Float> result = response.getFloatVals("probs");
+            if (!CollectionUtils.isEmpty(result)) {
+                return result;
+            }
+        } catch (Exception e) {
+            LOGGER.error("PAIModel score error", e);
+        }
+        return new ArrayList<>(Collections.nCopies(items.size(), 0.0f));
+    }
+
+}

+ 6 - 0
ad-engine-server/src/main/resources/ad_score_config_pai_20250214.conf

@@ -0,0 +1,6 @@
+scorer-config = {
+  pai-score-config = {
+    scorer-name = "com.tzld.piaoquan.ad.engine.service.score.scorer.PAIScorer"
+    scorer-priority = 99
+  }
+}

+ 94 - 0
ad-engine-service/src/main/java/com/tzld/piaoquan/ad/engine/service/score/scorer/PAIScorer.java

@@ -0,0 +1,94 @@
+package com.tzld.piaoquan.ad.engine.service.score.scorer;
+
+
+import com.google.common.collect.Lists;
+import com.tzld.piaoquan.ad.engine.commons.score.AbstractScorer;
+import com.tzld.piaoquan.ad.engine.commons.score.BaseXGBoostModelScorer;
+import com.tzld.piaoquan.ad.engine.commons.score.ScoreParam;
+import com.tzld.piaoquan.ad.engine.commons.score.ScorerConfigInfo;
+import com.tzld.piaoquan.ad.engine.commons.score.model.PAIModelV1;
+import com.tzld.piaoquan.ad.engine.commons.score.model.XGBoostModel683;
+import com.tzld.piaoquan.recommend.feature.domain.ad.base.AdRankItem;
+import com.tzld.piaoquan.recommend.feature.domain.ad.base.UserAdFeature;
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.collections4.MapUtils;
+import org.apache.commons.lang.exception.ExceptionUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.stereotype.Component;
+
+import java.util.*;
+import java.util.concurrent.*;
+
+public class PAIScorer extends AbstractScorer {
+
+    private final static Logger LOGGER = LoggerFactory.getLogger(PAIScorer.class);
+
+
+    public PAIScorer(ScorerConfigInfo configInfo) {
+        super(configInfo);
+    }
+
+    @Override
+    public List<AdRankItem> scoring(final ScoreParam param,
+                                    final UserAdFeature userAdFeature,
+                                    final List<AdRankItem> rankItems) {
+        throw new NoSuchMethodError();
+    }
+
+    public List<AdRankItem> scoring(final Map<String, String> sceneFeatureMap,
+                                    final Map<String, String> userFeatureMap,
+                                    final List<AdRankItem> rankItems) {
+        if (CollectionUtils.isEmpty(rankItems)) {
+            return rankItems;
+        }
+
+        long startTime = System.currentTimeMillis();
+
+        List<AdRankItem> result = rankByJava(sceneFeatureMap, userFeatureMap, rankItems);
+
+        LOGGER.debug("ctr ranker time java items size={}, time={} ", result != null ? result.size() : 0,
+                System.currentTimeMillis() - startTime);
+
+        return result;
+    }
+
+    private List<AdRankItem> rankByJava(final Map<String, String> sceneFeatureMap,
+                                        final Map<String, String> userFeatureMap,
+                                        final List<AdRankItem> items) {
+        long startTime = System.currentTimeMillis();
+        PAIModelV1 model = PAIModelV1.getModel();
+        // 所有都参与打分,按照ctr排序
+        multipleCtrScore(items, userFeatureMap, sceneFeatureMap, model);
+
+        // debug log
+        if (LOGGER.isDebugEnabled()) {
+            for (int i = 0; i < items.size(); i++) {
+                LOGGER.debug("before enter feeds model predict ctr score [{}] [{}]", items.get(i), items.get(i));
+            }
+        }
+
+        Collections.sort(items);
+
+        LOGGER.debug("ctr ranker java execute time: [{}]", System.currentTimeMillis() - startTime);
+        LOGGER.debug("[ctr ranker time java] items size={}, cost={} ", items != null ? items.size() : 0,
+                System.currentTimeMillis() - startTime);
+        return items;
+    }
+
+    private void multipleCtrScore(final List<AdRankItem> items,
+                                  final Map<String, String> userFeatureMap,
+                                  final Map<String, String> sceneFeatureMap,
+                                  final PAIModelV1 model) {
+
+        List<Float> score = model.score(items, userFeatureMap, sceneFeatureMap);
+        LOGGER.debug("PAIScorer score={}", score);
+        for (int i = 0; i < items.size(); i++) {
+            Double pro = Double.valueOf(score.get(i));
+            items.get(i).setLrScore(pro);
+            items.get(i).getScoreMap().put("ctcvrScore", pro);
+        }
+    }
+
+
+}

+ 150 - 44
ad-engine-service/src/main/java/com/tzld/piaoquan/ad/engine/service/score/strategy/RankStrategyBy688.java

@@ -1,5 +1,6 @@
 package com.tzld.piaoquan.ad.engine.service.score.strategy;
 
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
 import com.tzld.piaoquan.ad.engine.commons.score.ScoreParam;
 import com.tzld.piaoquan.ad.engine.commons.score.ScorerUtils;
 import com.tzld.piaoquan.ad.engine.commons.thread.ThreadPoolFactory;
@@ -9,12 +10,14 @@ import com.tzld.piaoquan.ad.engine.commons.dto.AdPlatformCreativeDTO;
 import com.tzld.piaoquan.ad.engine.commons.param.RankRecommendRequestParam;
 import com.tzld.piaoquan.recommend.feature.domain.ad.base.AdRankItem;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
 import org.apache.commons.collections4.MapUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Component;
 import org.xm.Similarity;
 
+import javax.annotation.PostConstruct;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
@@ -26,11 +29,12 @@ import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
 
+import static com.tzld.piaoquan.ad.engine.commons.math.Const.*;
+
 @Slf4j
 @Component
 public class RankStrategyBy688 extends RankStrategyBasic {
 
-
     private Map<String, double[]> bucketsMap = new HashMap<>();
 
     private Map<String, Double> bucketsLen = new HashMap<>();
@@ -38,9 +42,28 @@ public class RankStrategyBy688 extends RankStrategyBasic {
     @Value("${word2vec.exp:694}")
     private String word2vecExp;
 
+    // FIXME(zhoutian): 可能需要独立配置
+    @ApolloJsonValue("${rank.score.weight.680:{}}")
+    private Map<String, Double> weightMap;
+
+    @ApolloJsonValue("${rank.score.neg_sample_rate:0.01}")
+    Double negSampleRate;
+
+    Set<String> sparseFeatureSet;
+
+    @PostConstruct
+    public void afterInit() {
+        this.readBucketFile();
+        this.initSparseFeatureNames();
+    }
+
     @Override
     public List<AdRankItem> adItemRank(RankRecommendRequestParam request, ScoreParam scoreParam) {
 
+        Map<String, Double> weightParam = ObjUtil.nullOrDefault(weightMap, new HashMap<>());
+
+
+        Map<Long, Double> creativeScoreCoefficient = getCreativeScoreCoefficient();
         Set<String> noApiAdVerIds = getNoApiAdVerIds();
 
         long ts = System.currentTimeMillis() / 1000;
@@ -75,6 +98,18 @@ public class RankStrategyBy688 extends RankStrategyBasic {
         Map<String, String> e1Feature = userFeature.getOrDefault("alg_mid_feature_return_tags", new HashMap<>());
         Map<String, String> e2Feature = userFeature.getOrDefault("alg_mid_feature_share_tags", new HashMap<>());
 
+        userFeatureMap.put("brand", request.getMachineInfo().getBrand().toUpperCase());
+        userFeatureMap.put("region", request.getRegion());
+        userFeatureMap.put("city", request.getCity());
+        userFeatureMap.put("vid", String.valueOf(request.getVideoId()));
+        userFeatureMap.put("cate1", d3Feature.get("merge_first_level_cate"));
+        userFeatureMap.put("cate2", d3Feature.get("merge_second_level_cate"));
+        userFeatureMap.put("user_vid_return_tags_2h", e1Feature.getOrDefault("tags_2h", null));
+        userFeatureMap.put("user_vid_return_tags_1d", e1Feature.getOrDefault("tags_1d", null));
+        userFeatureMap.put("user_vid_return_tags_3d", e1Feature.getOrDefault("tags_3d", null));
+        userFeatureMap.put("user_vid_return_tags_7d", e1Feature.getOrDefault("tags_7d", null));
+        userFeatureMap.put("user_vid_return_tags_14d", e1Feature.getOrDefault("tags_14d", null));
+
         Map<String, String> sceneFeatureMap = this.handleSceneFeature(ts);
         long time1 = System.currentTimeMillis();
 
@@ -100,6 +135,7 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                     } else {
                         adRankItem.getExt().put("isApi", "1");
                     }
+
                     adRankItem.getExt().put("recallsources", dto.getRecallSources());
 
                     String cidStr = dto.getCreativeId().toString();
@@ -117,6 +153,10 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                     this.handleC1UIFeature(midTimeDiffMap, actionStaticMap, cidFeatureMap, cidStr);
                     this.handleD1Feature(d1Feature, cidFeatureMap);
                     this.handleD2Feature(vidRankMaps, cidFeatureMap, cidStr);
+
+                    cidFeatureMap.put("cid", dto.getCreativeId() != null ? String.valueOf(dto.getCreativeId()) : null);
+                    cidFeatureMap.put("adid", dto.getAdId() != null ? String.valueOf(dto.getAdId()) : null);
+                    cidFeatureMap.put("adverid", dto.getAdVerId());
                     return adRankItem;
                 } finally {
                     cdl1.countDown();
@@ -162,17 +202,14 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                 }
             });
         }
-        long time31 = System.currentTimeMillis();
         try {
             cdl2.await(150, TimeUnit.MILLISECONDS);
         } catch (Exception e) {
             log.error("handleE1AndE2Feature and handleD3AndB1Feature wait timeout", e);
         }
 
-        // feature4
         long time3 = System.currentTimeMillis();
         // 分桶
-        this.readBucketFile();
         userFeatureMap = this.featureBucket(userFeatureMap);
         CountDownLatch cdl4 = new CountDownLatch(adRankItems.size());
         for (AdRankItem adRankItem : adRankItems) {
@@ -193,54 +230,75 @@ public class RankStrategyBy688 extends RankStrategyBasic {
         long time4 = System.currentTimeMillis();
         // 打分排序
         // getScorerPipeline
-        List<AdRankItem> result = ScorerUtils.getScorerPipeline(ScorerUtils.XGBOOST_SCORE_CONF_20240909).scoring(sceneFeatureMap, userFeatureMap, adRankItems);
+        List<AdRankItem> result = ScorerUtils.getScorerPipeline(ScorerUtils.PAI_SCORE_CONF_20250214).scoring(sceneFeatureMap, userFeatureMap, adRankItems);
         long time5 = System.currentTimeMillis();
+
+        // calibrate score for negative sampling
+        for (AdRankItem item : result) {
+            double originalScore = item.getLrScore();
+            double calibratedScore = originalScore / (originalScore + (1 - originalScore) / negSampleRate);
+            item.setLrScore(calibratedScore);
+            item.getScoreMap().put("originCtcvrScore", originalScore);
+            item.getScoreMap().put("ctcvrScore", calibratedScore);
+        }
+
         // loop
+        double cpmCoefficient = weightParam.getOrDefault("cpmCoefficient", 0.9);
+
         for (AdRankItem item : result) {
-            item.setScore(item.getLrScore() * item.getCpa());
+
+            double scoreCoefficient = creativeScoreCoefficient.getOrDefault(item.getAdId(), 1d);
+            item.setScore(item.getLrScore() * scoreCoefficient * item.getCpa());
+
             item.getScoreMap().put("cpa", item.getCpa());
             item.getScoreMap().put("cpm", item.getCpm());
+            item.getScoreMap().put("cpmCoefficient", cpmCoefficient);
+            item.getScoreMap().put("scoreCoefficient", scoreCoefficient);
             item.getFeatureMap().putAll(userFeatureMap);
             item.getFeatureMap().putAll(sceneFeatureMap);
 
             // 没有转化回传的广告主,使用后台配置的CPM
             if (noApiAdVerIds.contains(item.getAdVerId())) {
-                item.setScore(item.getCpm() / 1000);
+                item.setScore(item.getCpm() * cpmCoefficient / 1000);
             }
+        }
+
+
+        result.sort(ComparatorUtil.equalsRandomComparator());
 
+        if (CollectionUtils.isNotEmpty(result)) {
+            AdRankItem top1Item = result.get(0);
             for (Map.Entry<String, Map<String, String>> entry : videoFeature.entrySet()) {
                 if (MapUtils.isNotEmpty(entry.getValue())) {
-                    item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
+                    top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
                 }
             }
 
             for (Map.Entry<String, Map<String, String>> entry : userFeature.entrySet()) {
                 if (MapUtils.isNotEmpty(entry.getValue())) {
-                    item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
+                    top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
                 }
             }
 
-            Map<String, Map<String, String>> adVerFeature = allAdVerFeature.getOrDefault(item.getAdVerId(), new HashMap<>());
+            Map<String, Map<String, String>> adVerFeature = allAdVerFeature.getOrDefault(top1Item.getAdVerId(), new HashMap<>());
             for (Map.Entry<String, Map<String, String>> entry : adVerFeature.entrySet()) {
                 if (MapUtils.isNotEmpty(entry.getValue())) {
-                    item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
+                    top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
                 }
             }
 
-            Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(String.valueOf(item.getAdId()), new HashMap<>());
+            Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(String.valueOf(top1Item.getAdId()), new HashMap<>());
             for (Map.Entry<String, Map<String, String>> entry : cidFeature.entrySet()) {
                 if (MapUtils.isNotEmpty(entry.getValue())) {
-                    item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
+                    top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
                 }
             }
         }
-
-        log.info("cost={}, feature1={}, feature2={}, feature31={}, feature32={}, feature4={}, getScorerPipeline={}, " +
-                        "adIdSize={}, adRankItemsSize={}",
-                time5 - start, time1 - start, time2 - time1, time31 - time2, time3 - time31, time4 - time3,
-                time5 - time4, request.getAdIdList().size(), adRankItems.size());
-
-        result.sort(ComparatorUtil.equalsRandomComparator());
+        long time6 = System.currentTimeMillis();
+        log.info("cost={}, getFeature={}, handleFeature={},  similar={}, bucketFeature={}, getScorerPipeline={}, " +
+                        "other={}, adIdSize={}, adRankItemsSize={}",
+                time6 - start, time1 - start, time2 - time1, time3 - time2, time4 - time3,
+                time5 - time4, time6 - time5, request.getAdIdList().size(), adRankItems.size());
 
         return result;
     }
@@ -289,15 +347,20 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                 double view = Double.parseDouble(feature.getOrDefault("ad_view_" + time, "0"));
                 double click = Double.parseDouble(feature.getOrDefault("ad_click_" + time, "0"));
                 double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
-                double f2 = NumUtil.div(conver, view);
-                cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(NumUtil.div(click, view)));
-                cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(f2));
-                cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.div(conver, click)));
+                double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
+                double cpc = NumUtil.div(income, click);
+                double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
+                double ctcvr = NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR);
+                double ecpm = ctr * cpc * 1000;
+                cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(ctr));
+                cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(ctcvr));
+                cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
                 cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
+                cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(ecpm));
 
                 cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
                 cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
-                cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * f2));
+                cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * ctcvr));
             }
         }
 
@@ -320,16 +383,19 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                 double click = Double.parseDouble(feature.getOrDefault("ad_click_" + time, "0"));
                 double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
                 double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
-                double f2 = NumUtil.div(conver, view);
-                cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(NumUtil.div(click, view)));
-                cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(f2));
-                cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.div(conver, click)));
+                double cpc = NumUtil.div(income, click);
+                double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
+                double ctcvr = NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR);
+                double ecpm = ctr * cpc * 1000;
+                cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(ctr));
+                cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(ctcvr));
+                cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
                 cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
-                // cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(NumUtil.div(income * 1000, view)));
+                cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(ecpm));
 
                 cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
                 cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
-                cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * f2));
+                cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * ctcvr));
             }
         }
 
@@ -363,8 +429,19 @@ public class RankStrategyBy688 extends RankStrategyBasic {
         featureMap.put("ctr_all", String.valueOf(NumUtil.div(clickAll, viewAll)));
         featureMap.put("ctcvr_all", String.valueOf(NumUtil.div(converAll, viewAll)));
         featureMap.put("cvr_all", String.valueOf(NumUtil.div(clickAll, converAll)));
-        // featureMap.put("ecpm_all", String.valueOf(NumUtil.div(incomeAll * 1000, viewAll)));
-
+        featureMap.put("ecpm_all", String.valueOf(NumUtil.div(incomeAll * 1000, viewAll)));
+        if (CollectionUtils.isNotEmpty(midActionList)) {
+            List<String> cidList = new ArrayList<>();
+            List<String> converList = new ArrayList<>();
+            for (TupleMapEntry<Tuple5> tupleMapEntry : midActionList) {
+                String cid = tupleMapEntry.key;
+                String conver = tupleMapEntry.value.f3;
+                cidList.add(cid);
+                converList.add(conver);
+            }
+            featureMap.put("user_cid_click_list", String.join(",", cidList));
+            featureMap.put("user_cid_conver_list", String.join(",", converList));
+        }
         return midActionList;
     }
 
@@ -413,11 +490,13 @@ public class RankStrategyBy688 extends RankStrategyBasic {
             double click = Double.parseDouble(d1Feature.getOrDefault("ad_click_" + prefix, "0"));
             double conver = Double.parseDouble(d1Feature.getOrDefault("ad_conversion_" + prefix, "0"));
             double income = Double.parseDouble(d1Feature.getOrDefault("ad_income_" + prefix, "0"));
-            featureMap.put("d1_feature_" + prefix + "_ctr", String.valueOf(NumUtil.div(click, view)));
-            featureMap.put("d1_feature_" + prefix + "_ctcvr", String.valueOf(NumUtil.div(conver, view)));
-            featureMap.put("d1_feature_" + prefix + "_cvr", String.valueOf(NumUtil.div(conver, click)));
+            double cpc = NumUtil.div(income, click);
+            double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
+            featureMap.put("d1_feature_" + prefix + "_ctr", String.valueOf(ctr));
+            featureMap.put("d1_feature_" + prefix + "_ctcvr", String.valueOf(NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR)));
+            featureMap.put("d1_feature_" + prefix + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
             featureMap.put("d1_feature_" + prefix + "_conver", String.valueOf(conver));
-            // featureMap.put("d1_feature_" + prefix + "_ecpm", String.valueOf(NumUtil.div(income * 1000, view)));
+            featureMap.put("d1_feature_" + prefix + "_ecpm", String.valueOf(ctr * cpc * 1000));
         }
     }
 
@@ -426,8 +505,8 @@ public class RankStrategyBy688 extends RankStrategyBasic {
             return;
         }
 
-        // List<String> prefixes1 = Arrays.asList("ctr", "ctcvr", "ecpm");
-        List<String> prefixes1 = Arrays.asList("ctr", "ctcvr");
+        List<String> prefixes1 = Arrays.asList("ctr", "ctcvr", "ecpm");
+        // List<String> prefixes1 = Arrays.asList("ctr", "ctcvr");
         List<String> prefixes2 = Arrays.asList("1d", "3d", "7d", "14d");
 
         for (String prefix1 : prefixes1) {
@@ -477,7 +556,7 @@ public class RankStrategyBy688 extends RankStrategyBasic {
             for (String tagsField : tagsFieldList) {
                 if (StringUtils.isNotEmpty(feature.get(tagsField))) {
                     String tags = feature.get(tagsField);
-                    //Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                    // Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
                     Double[] doubles;
                     if (scoreParam.getExpCodeSet().contains(word2vecExp)) {
                         doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
@@ -560,7 +639,8 @@ public class RankStrategyBy688 extends RankStrategyBasic {
             return;
         }
         synchronized (this) {
-            InputStream resourceStream = RankStrategyBy688.class.getClassLoader().getResourceAsStream("20240718_ad_bucket_688.txt");
+            String bucketFile = "20250217_ad_bucket_688.txt";
+            InputStream resourceStream = this.getClass().getClassLoader().getResourceAsStream(bucketFile);
             if (resourceStream != null) {
                 try (BufferedReader reader = new BufferedReader(new InputStreamReader(resourceStream))) {
                     Map<String, double[]> bucketsMap = new HashMap<>();
@@ -583,18 +663,46 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                     this.bucketsMap = bucketsMap;
                     this.bucketsLen = bucketsLen;
                 } catch (IOException e) {
-                    log.error("something is wrong in parse bucket file:", e);
+                    log.error("something is wrong in parse bucket file: ", e);
                 }
+                log.info("load bucket file success: {}", bucketFile);
             } else {
                 log.error("no bucket file");
             }
         }
     }
 
+    private void initSparseFeatureNames() {
+        this.sparseFeatureSet = new HashSet<String>() {{
+            add("brand");
+            add("region");
+            add("city");
+            add("vid");
+            add("cate1");
+            add("cate2");
+            add("cid");
+            add("adid");
+            add("adverid");
+            add("user_cid_click_list");
+            add("user_cid_conver_list");
+            add("user_vid_return_tags_2h");
+            add("user_vid_return_tags_1d");
+            add("user_vid_return_tags_3d");
+            add("user_vid_return_tags_7d");
+            add("user_vid_return_tags_14d");
+        }};
+    }
+
     private Map<String, String> featureBucket(Map<String, String> featureMap) {
         Map<String, String> newFeatureMap = new ConcurrentHashMap<>(featureMap.size());
         for (Map.Entry<String, String> entry : featureMap.entrySet()) {
             String name = entry.getKey();
+            if (this.sparseFeatureSet.contains(name)) {
+                if (entry.getValue() != null) {
+                    newFeatureMap.put(name, entry.getValue());
+                }
+                continue;
+            }
             double score = Double.parseDouble(entry.getValue());
             // 注意:0值、不在分桶文件中的特征,会被过滤掉。
             if (score > 1E-8) {
@@ -608,8 +716,6 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                 }
             }
         }
-
         return newFeatureMap;
     }
-
 }

+ 10 - 0
pom.xml

@@ -322,6 +322,16 @@
             <groupId>org.springframework.cloud</groupId>
             <artifactId>spring-cloud-starter-openfeign</artifactId>
         </dependency>
+
+        <!-- https://mvnrepository.com/artifact/com.aliyun.openservices.eas/eas-sdk -->
+        <dependency>
+            <groupId>com.aliyun.openservices.eas</groupId>
+            <artifactId>eas-sdk</artifactId>
+            <version>2.0.23</version>
+        </dependency>
+
+
+
         <!--easyexcel-->
     </dependencies>