zhaohaipeng vor 8 Monaten
Ursprung
Commit
ea79de61cd

+ 419 - 0
ad-engine-commons/src/main/java/com/tzld/piaoquan/ad/engine/commons/score/model/XGBoost351Model.java

@@ -0,0 +1,419 @@
+package com.tzld.piaoquan.ad.engine.commons.score.model;
+
+
+import com.tzld.piaoquan.ad.engine.commons.util.CompressUtil;
+import com.tzld.piaoquan.ad.engine.commons.util.PropertiesUtil;
+import ml.dmlc.xgboost4j.scala.DMatrix;
+import ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel;
+import org.apache.commons.lang.math.NumberUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Map;
+
+
+public class XGBoost351Model extends Model {
+    private static final Logger LOGGER = LoggerFactory.getLogger(XGBoost351Model.class);
+    private XGBoostClassificationModel model;
+
+    private String[] features = {
+            "cpa",
+            "b2_3h_ctr",
+            "b2_3h_ctcvr",
+            "b2_3h_cvr",
+            "b2_3h_conver",
+            "b2_3h_ecpm",
+            "b2_3h_click",
+            "b2_3h_conver*log(view)",
+            "b2_3h_conver*ctcvr",
+            "b2_6h_ctr",
+            "b2_6h_ctcvr",
+            "b2_6h_cvr",
+            "b2_6h_conver",
+            "b2_6h_ecpm",
+            "b2_6h_click",
+            "b2_6h_conver*log(view)",
+            "b2_6h_conver*ctcvr",
+            "b2_12h_ctr",
+            "b2_12h_ctcvr",
+            "b2_12h_cvr",
+            "b2_12h_conver",
+            "b2_12h_ecpm",
+            "b2_12h_click",
+            "b2_12h_conver*log(view)",
+            "b2_12h_conver*ctcvr",
+            "b2_1d_ctr",
+            "b2_1d_ctcvr",
+            "b2_1d_cvr",
+            "b2_1d_conver",
+            "b2_1d_ecpm",
+            "b2_1d_click",
+            "b2_1d_conver*log(view)",
+            "b2_1d_conver*ctcvr",
+            "b2_3d_ctr",
+            "b2_3d_ctcvr",
+            "b2_3d_cvr",
+            "b2_3d_conver",
+            "b2_3d_ecpm",
+            "b2_3d_click",
+            "b2_3d_conver*log(view)",
+            "b2_3d_conver*ctcvr",
+            "b2_7d_ctr",
+            "b2_7d_ctcvr",
+            "b2_7d_cvr",
+            "b2_7d_conver",
+            "b2_7d_ecpm",
+            "b2_7d_click",
+            "b2_7d_conver*log(view)",
+            "b2_7d_conver*ctcvr",
+            "b3_3h_ctr",
+            "b3_3h_ctcvr",
+            "b3_3h_cvr",
+            "b3_3h_conver",
+            "b3_3h_ecpm",
+            "b3_3h_click",
+            "b3_3h_conver*log(view)",
+            "b3_3h_conver*ctcvr",
+            "b3_6h_ctr",
+            "b3_6h_ctcvr",
+            "b3_6h_cvr",
+            "b3_6h_conver",
+            "b3_6h_ecpm",
+            "b3_6h_click",
+            "b3_6h_conver*log(view)",
+            "b3_6h_conver*ctcvr",
+            "b3_12h_ctr",
+            "b3_12h_ctcvr",
+            "b3_12h_cvr",
+            "b3_12h_conver",
+            "b3_12h_ecpm",
+            "b3_12h_click",
+            "b3_12h_conver*log(view)",
+            "b3_12h_conver*ctcvr",
+            "b3_1d_ctr",
+            "b3_1d_ctcvr",
+            "b3_1d_cvr",
+            "b3_1d_conver",
+            "b3_1d_ecpm",
+            "b3_1d_click",
+            "b3_1d_conver*log(view)",
+            "b3_1d_conver*ctcvr",
+            "b3_3d_ctr",
+            "b3_3d_ctcvr",
+            "b3_3d_cvr",
+            "b3_3d_conver",
+            "b3_3d_ecpm",
+            "b3_3d_click",
+            "b3_3d_conver*log(view)",
+            "b3_3d_conver*ctcvr",
+            "b3_7d_ctr",
+            "b3_7d_ctcvr",
+            "b3_7d_cvr",
+            "b3_7d_conver",
+            "b3_7d_ecpm",
+            "b3_7d_click",
+            "b3_7d_conver*log(view)",
+            "b3_7d_conver*ctcvr",
+            "b4_3h_ctr",
+            "b4_3h_ctcvr",
+            "b4_3h_cvr",
+            "b4_3h_conver",
+            "b4_3h_ecpm",
+            "b4_3h_click",
+            "b4_3h_conver*log(view)",
+            "b4_3h_conver*ctcvr",
+            "b4_6h_ctr",
+            "b4_6h_ctcvr",
+            "b4_6h_cvr",
+            "b4_6h_conver",
+            "b4_6h_ecpm",
+            "b4_6h_click",
+            "b4_6h_conver*log(view)",
+            "b4_6h_conver*ctcvr",
+            "b4_12h_ctr",
+            "b4_12h_ctcvr",
+            "b4_12h_cvr",
+            "b4_12h_conver",
+            "b4_12h_ecpm",
+            "b4_12h_click",
+            "b4_12h_conver*log(view)",
+            "b4_12h_conver*ctcvr",
+            "b4_1d_ctr",
+            "b4_1d_ctcvr",
+            "b4_1d_cvr",
+            "b4_1d_conver",
+            "b4_1d_ecpm",
+            "b4_1d_click",
+            "b4_1d_conver*log(view)",
+            "b4_1d_conver*ctcvr",
+            "b4_3d_ctr",
+            "b4_3d_ctcvr",
+            "b4_3d_cvr",
+            "b4_3d_conver",
+            "b4_3d_ecpm",
+            "b4_3d_click",
+            "b4_3d_conver*log(view)",
+            "b4_3d_conver*ctcvr",
+            "b4_7d_ctr",
+            "b4_7d_ctcvr",
+            "b4_7d_cvr",
+            "b4_7d_conver",
+            "b4_7d_ecpm",
+            "b4_7d_click",
+            "b4_7d_conver*log(view)",
+            "b4_7d_conver*ctcvr",
+            "b5_3h_ctr",
+            "b5_3h_ctcvr",
+            "b5_3h_cvr",
+            "b5_3h_conver",
+            "b5_3h_ecpm",
+            "b5_3h_click",
+            "b5_3h_conver*log(view)",
+            "b5_3h_conver*ctcvr",
+            "b5_6h_ctr",
+            "b5_6h_ctcvr",
+            "b5_6h_cvr",
+            "b5_6h_conver",
+            "b5_6h_ecpm",
+            "b5_6h_click",
+            "b5_6h_conver*log(view)",
+            "b5_6h_conver*ctcvr",
+            "b5_12h_ctr",
+            "b5_12h_ctcvr",
+            "b5_12h_cvr",
+            "b5_12h_conver",
+            "b5_12h_ecpm",
+            "b5_12h_click",
+            "b5_12h_conver*log(view)",
+            "b5_12h_conver*ctcvr",
+            "b5_1d_ctr",
+            "b5_1d_ctcvr",
+            "b5_1d_cvr",
+            "b5_1d_conver",
+            "b5_1d_ecpm",
+            "b5_1d_click",
+            "b5_1d_conver*log(view)",
+            "b5_1d_conver*ctcvr",
+            "b5_3d_ctr",
+            "b5_3d_ctcvr",
+            "b5_3d_cvr",
+            "b5_3d_conver",
+            "b5_3d_ecpm",
+            "b5_3d_click",
+            "b5_3d_conver*log(view)",
+            "b5_3d_conver*ctcvr",
+            "b5_7d_ctr",
+            "b5_7d_ctcvr",
+            "b5_7d_cvr",
+            "b5_7d_conver",
+            "b5_7d_ecpm",
+            "b5_7d_click",
+            "b5_7d_conver*log(view)",
+            "b5_7d_conver*ctcvr",
+            "b8_3h_ctr",
+            "b8_3h_ctcvr",
+            "b8_3h_cvr",
+            "b8_3h_conver",
+            "b8_3h_ecpm",
+            "b8_3h_click",
+            "b8_3h_conver*log(view)",
+            "b8_3h_conver*ctcvr",
+            "b8_6h_ctr",
+            "b8_6h_ctcvr",
+            "b8_6h_cvr",
+            "b8_6h_conver",
+            "b8_6h_ecpm",
+            "b8_6h_click",
+            "b8_6h_conver*log(view)",
+            "b8_6h_conver*ctcvr",
+            "b8_12h_ctr",
+            "b8_12h_ctcvr",
+            "b8_12h_cvr",
+            "b8_12h_conver",
+            "b8_12h_ecpm",
+            "b8_12h_click",
+            "b8_12h_conver*log(view)",
+            "b8_12h_conver*ctcvr",
+            "b8_1d_ctr",
+            "b8_1d_ctcvr",
+            "b8_1d_cvr",
+            "b8_1d_conver",
+            "b8_1d_ecpm",
+            "b8_1d_click",
+            "b8_1d_conver*log(view)",
+            "b8_1d_conver*ctcvr",
+            "b8_3d_ctr",
+            "b8_3d_ctcvr",
+            "b8_3d_cvr",
+            "b8_3d_conver",
+            "b8_3d_ecpm",
+            "b8_3d_click",
+            "b8_3d_conver*log(view)",
+            "b8_3d_conver*ctcvr",
+            "b8_7d_ctr",
+            "b8_7d_ctcvr",
+            "b8_7d_cvr",
+            "b8_7d_conver",
+            "b8_7d_ecpm",
+            "b8_7d_click",
+            "b8_7d_conver*log(view)",
+            "b8_7d_conver*ctcvr",
+            "b6_7d_ctr",
+            "b6_7d_ctcvr",
+            "b6_7d_cvr",
+            "b6_7d_conver",
+            "b6_7d_ecpm",
+            "b6_7d_click",
+            "b6_7d_conver*log(view)",
+            "b6_7d_conver*ctcvr",
+            "b6_14d_ctr",
+            "b6_14d_ctcvr",
+            "b6_14d_cvr",
+            "b6_14d_conver",
+            "b6_14d_ecpm",
+            "b6_14d_click",
+            "b6_14d_conver*log(view)",
+            "b6_14d_conver*ctcvr",
+            "b7_7d_ctr",
+            "b7_7d_ctcvr",
+            "b7_7d_cvr",
+            "b7_7d_conver",
+            "b7_7d_ecpm",
+            "b7_7d_click",
+            "b7_7d_conver*log(view)",
+            "b7_7d_conver*ctcvr",
+            "b7_14d_ctr",
+            "b7_14d_ctcvr",
+            "b7_14d_cvr",
+            "b7_14d_conver",
+            "b7_14d_ecpm",
+            "b7_14d_click",
+            "b7_14d_conver*log(view)",
+            "b7_14d_conver*ctcvr",
+            "viewAll",
+            "clickAll",
+            "converAll",
+            "incomeAll",
+            "ctr_all",
+            "ctcvr_all",
+            "cvr_all",
+            "ecpm_all",
+            "timediff_view",
+            "timediff_click",
+            "timediff_conver",
+            "actionstatic_view",
+            "actionstatic_click",
+            "actionstatic_conver",
+            "actionstatic_income",
+            "actionstatic_ctr",
+            "actionstatic_ctcvr",
+            "actionstatic_cvr",
+            "e1_tags_3d_matchnum",
+            "e1_tags_3d_maxscore",
+            "e1_tags_3d_avgscore",
+            "e1_tags_7d_matchnum",
+            "e1_tags_7d_maxscore",
+            "e1_tags_7d_avgscore",
+            "e1_tags_14d_matchnum",
+            "e1_tags_14d_maxscore",
+            "e1_tags_14d_avgscore",
+            "e2_tags_3d_matchnum",
+            "e2_tags_3d_maxscore",
+            "e2_tags_3d_avgscore",
+            "e2_tags_7d_matchnum",
+            "e2_tags_7d_maxscore",
+            "e2_tags_7d_avgscore",
+            "e2_tags_14d_matchnum",
+            "e2_tags_14d_maxscore",
+            "e2_tags_14d_avgscore",
+            "d1_feature_3h_ctr",
+            "d1_feature_3h_ctcvr",
+            "d1_feature_3h_cvr",
+            "d1_feature_3h_conver",
+            "d1_feature_3h_ecpm",
+            "d1_feature_6h_ctr",
+            "d1_feature_6h_ctcvr",
+            "d1_feature_6h_cvr",
+            "d1_feature_6h_conver",
+            "d1_feature_6h_ecpm",
+            "d1_feature_12h_ctr",
+            "d1_feature_12h_ctcvr",
+            "d1_feature_12h_cvr",
+            "d1_feature_12h_conver",
+            "d1_feature_12h_ecpm",
+            "d1_feature_1d_ctr",
+            "d1_feature_1d_ctcvr",
+            "d1_feature_1d_cvr",
+            "d1_feature_1d_conver",
+            "d1_feature_1d_ecpm",
+            "d1_feature_3d_ctr",
+            "d1_feature_3d_ctcvr",
+            "d1_feature_3d_cvr",
+            "d1_feature_3d_conver",
+            "d1_feature_3d_ecpm",
+            "d1_feature_7d_ctr",
+            "d1_feature_7d_ctcvr",
+            "d1_feature_7d_cvr",
+            "d1_feature_7d_conver",
+            "d1_feature_7d_ecpm",
+            "vid_rank_ctr_1d",
+            "vid_rank_ctr_3d",
+            "vid_rank_ctr_7d",
+            "vid_rank_ctr_14d",
+            "vid_rank_ctcvr_1d",
+            "vid_rank_ctcvr_3d",
+            "vid_rank_ctcvr_7d",
+            "vid_rank_ctcvr_14d",
+            "vid_rank_ecpm_1d",
+            "vid_rank_ecpm_3d",
+            "vid_rank_ecpm_7d",
+            "vid_rank_ecpm_14d"
+    };
+
+    @Override
+    public int getModelSize() {
+        if (this.model == null)
+            return 0;
+        return 1;
+    }
+
+    @Override
+    public boolean loadFromStream(InputStreamReader in) throws Exception {
+        return false;
+    }
+
+    public void cleanModel() {
+        this.model = null;
+    }
+
+    public Float score(Map<String, String> featureMap) {
+
+        try {
+            float[] values = new float[features.length];
+            for (int i = 0; i < features.length; i++) {
+                float v = NumberUtils.toFloat(featureMap.getOrDefault(features[i], "0.0"), 0.0f);
+                values[i] = v;
+            }
+            DMatrix dm = new DMatrix(values, 1, features.length, 0.0f);
+            float[][] result = model._booster().predict(dm, false, 0);
+            return result[0][0];
+        } catch (Exception e) {
+            return 0f;
+        }
+    }
+
+    @Override
+    public boolean loadFromStream(InputStream in) throws Exception {
+        String modelDir = PropertiesUtil.getString("model.xgboost.path351");
+        CompressUtil.decompressGzFile(in, modelDir);
+        String absolutePath = new File(modelDir).getAbsolutePath();
+        XGBoostClassificationModel model2 = XGBoostClassificationModel.load("file://" + absolutePath);
+        model2.setMissing(0.0f);
+        this.model = model2;
+        return true;
+    }
+
+}

+ 8 - 0
ad-engine-server/src/main/resources/ad_score_config_xgboost_351.conf

@@ -0,0 +1,8 @@
+scorer-config = {
+  lr-rov-score-config = {
+    scorer-name = "com.tzld.piaoquan.ad.engine.service.score.XGBoostScorer"
+    scorer-priority = 99
+    model-path = "zhangbo/model_xgb_1000.tar.gz"
+  }
+
+}

+ 2 - 1
ad-engine-server/src/main/resources/application.yml

@@ -132,4 +132,5 @@ grpc:
       negotiationType: PLAINTEXT
 model:
   xgboost:
-    path: xgboost
+    path: xgboost
+    path351: xgboost351

+ 162 - 0
ad-engine-service/src/main/java/com/tzld/piaoquan/ad/engine/service/score/XGBoost351Scorer.java

@@ -0,0 +1,162 @@
+package com.tzld.piaoquan.ad.engine.service.score;
+
+
+import com.tzld.piaoquan.ad.engine.commons.score.BaseXGBoostModelScorer;
+import com.tzld.piaoquan.ad.engine.commons.score.ScoreParam;
+import com.tzld.piaoquan.ad.engine.commons.score.ScorerConfigInfo;
+import com.tzld.piaoquan.ad.engine.commons.score.model.XGBoost351Model;
+import com.tzld.piaoquan.ad.engine.commons.score.model.XGBoostModel;
+import com.tzld.piaoquan.recommend.feature.domain.ad.base.AdRankItem;
+import com.tzld.piaoquan.recommend.feature.domain.ad.base.UserAdFeature;
+import org.apache.commons.collections4.CollectionUtils;
+import org.apache.commons.collections4.MapUtils;
+import org.apache.commons.lang.exception.ExceptionUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.*;
+import java.util.concurrent.*;
+
+
+public class XGBoost351Scorer extends BaseXGBoostModelScorer {
+
+    private static final int LOCAL_TIME_OUT = 150;
+    private final static Logger LOGGER = LoggerFactory.getLogger(XGBoost351Scorer.class);
+    private static final ExecutorService executorService = Executors.newFixedThreadPool(128);
+
+
+    public XGBoost351Scorer(ScorerConfigInfo configInfo) {
+        super(configInfo);
+    }
+
+    @Override
+    public void loadModel() {
+        doLoadModel(XGBoost351Model.class);
+    }
+
+    @Override
+    public List<AdRankItem> scoring(final ScoreParam param,
+                                    final UserAdFeature userAdFeature,
+                                    final List<AdRankItem> rankItems) {
+        throw new NoSuchMethodError();
+    }
+
+    public List<AdRankItem> scoring(final Map<String, String> sceneFeatureMap,
+                                    final Map<String, String> userFeatureMap,
+                                    final List<AdRankItem> rankItems) {
+        if (CollectionUtils.isEmpty(rankItems)) {
+            return rankItems;
+        }
+
+        long startTime = System.currentTimeMillis();
+
+        List<AdRankItem> result = rankByJava(sceneFeatureMap, userFeatureMap, rankItems);
+
+        LOGGER.debug("ctr ranker time java items size={}, time={} ", result != null ? result.size() : 0,
+                System.currentTimeMillis() - startTime);
+
+        return result;
+    }
+
+    private List<AdRankItem> rankByJava(final Map<String, String> sceneFeatureMap,
+                                      final Map<String, String> userFeatureMap,
+                                      final List<AdRankItem> items) {
+        long startTime = System.currentTimeMillis();
+        XGBoostModel model = (XGBoostModel) this.getModel();
+        LOGGER.debug("model size: [{}]", model.getModelSize());
+
+        // 所有都参与打分,按照ctr排序
+        multipleCtrScore(items, userFeatureMap, sceneFeatureMap, model);
+
+        // debug log
+        if (LOGGER.isDebugEnabled()) {
+            for (int i = 0; i < items.size(); i++) {
+                LOGGER.debug("before enter feeds model predict ctr score [{}] [{}]", items.get(i), items.get(i));
+            }
+        }
+
+        Collections.sort(items);
+
+        LOGGER.debug("ctr ranker java execute time: [{}]", System.currentTimeMillis() - startTime);
+        LOGGER.debug("[ctr ranker time java] items size={}, cost={} ", items != null ? items.size() : 0,
+                System.currentTimeMillis() - startTime);
+        return items;
+    }
+
+    private void multipleCtrScore(final List<AdRankItem> items,
+                                  final Map<String, String> userFeatureMap,
+                                  final Map<String, String> sceneFeatureMap,
+                                  final XGBoostModel model) {
+
+        List<Callable<Object>> calls = new ArrayList<Callable<Object>>();
+        for (int index = 0; index < items.size(); index++) {
+            final int fIndex = index;
+            calls.add(new Callable<Object>() {
+                @Override
+                public Object call() throws Exception {
+                    try {
+                        calcScore(model, items.get(fIndex), userFeatureMap, sceneFeatureMap);
+                    } catch (Exception e) {
+                        LOGGER.error("ctr exception: [{}] [{}]", items.get(fIndex), ExceptionUtils.getFullStackTrace(e));
+                    }
+                    return new Object();
+                }
+            });
+        }
+
+        List<Future<Object>> futures = null;
+        try {
+            futures = executorService.invokeAll(calls, LOCAL_TIME_OUT, TimeUnit.MILLISECONDS);
+        } catch (InterruptedException e) {
+            LOGGER.error("execute invoke fail: {}", ExceptionUtils.getFullStackTrace(e));
+        }
+
+        // 等待所有请求的结果返回, 超时也返回
+        int cancel = 0;
+        if (futures != null) {
+            for (Future<Object> future : futures) {
+                try {
+                    if (!future.isDone() || future.isCancelled() || future.get() == null) {
+                        cancel++;
+                    }
+                } catch (InterruptedException e) {
+                    LOGGER.error("InterruptedException {},{}", ExceptionUtils.getFullStackTrace(e));
+                } catch (ExecutionException e) {
+                    LOGGER.error("ExecutionException {},{}", sceneFeatureMap.size(),
+                            ExceptionUtils.getFullStackTrace(e));
+                }
+            }
+        }
+    }
+
+    public double calcScore(final XGBoostModel model,
+                            final AdRankItem item,
+                            final Map<String, String> userFeatureMap,
+                            final Map<String, String> sceneFeatureMap) {
+
+
+        Map<String, String> featureMap = new HashMap<>();
+        if (MapUtils.isNotEmpty(item.getFeatureMap())) {
+            featureMap.putAll(item.getFeatureMap());
+        }
+        if (MapUtils.isNotEmpty(userFeatureMap)) {
+            featureMap.putAll(userFeatureMap);
+        }
+        if (MapUtils.isNotEmpty(sceneFeatureMap)) {
+            featureMap.putAll(sceneFeatureMap);
+        }
+
+        double pro = 0.0;
+        if (MapUtils.isNotEmpty(featureMap)) {
+            try {
+                pro = model.score(featureMap);
+                // LOGGER.info("fea : {}, score:{}", JSONUtils.toJson(featureMap), pro);
+            } catch (Exception e) {
+                LOGGER.error("score error for doc={} exception={}", item.getVideoId(), ExceptionUtils.getFullStackTrace(e));
+            }
+        }
+        item.setLrScore(pro);
+        item.getScoreMap().put("ctcvrScore", pro);
+        return pro;
+    }
+}