|
@@ -0,0 +1,985 @@
|
|
|
|
|
+package com.tzld.piaoquan.ad.engine.service.score.strategy;
|
|
|
|
|
+
|
|
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
|
|
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
|
|
+import com.tzld.piaoquan.ad.engine.commons.dto.AdPlatformCreativeDTO;
|
|
|
|
|
+import com.tzld.piaoquan.ad.engine.commons.helper.DnnCidDataHelper;
|
|
|
|
|
+import com.tzld.piaoquan.ad.engine.commons.param.RankRecommendRequestParam;
|
|
|
|
|
+import com.tzld.piaoquan.ad.engine.commons.score.ScoreParam;
|
|
|
|
|
+import com.tzld.piaoquan.ad.engine.commons.score.ScorerUtils;
|
|
|
|
|
+import com.tzld.piaoquan.ad.engine.commons.thread.ThreadPoolFactory;
|
|
|
|
|
+import com.tzld.piaoquan.ad.engine.commons.util.*;
|
|
|
|
|
+import com.tzld.piaoquan.ad.engine.service.entity.CorrectCpaParam;
|
|
|
|
|
+import com.tzld.piaoquan.ad.engine.service.entity.GuaranteeView;
|
|
|
|
|
+import com.tzld.piaoquan.ad.engine.service.feature.Feature;
|
|
|
|
|
+import com.tzld.piaoquan.recommend.feature.domain.ad.base.AdRankItem;
|
|
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
|
|
+import org.apache.commons.collections4.CollectionUtils;
|
|
|
|
|
+import org.apache.commons.collections4.MapUtils;
|
|
|
|
|
+import org.apache.commons.lang.math.NumberUtils;
|
|
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
|
|
+import org.springframework.beans.factory.annotation.Value;
|
|
|
|
|
+import org.springframework.stereotype.Component;
|
|
|
|
|
+import org.xm.Similarity;
|
|
|
|
|
+
|
|
|
|
|
+import javax.annotation.PostConstruct;
|
|
|
|
|
+import java.io.BufferedReader;
|
|
|
|
|
+import java.io.IOException;
|
|
|
|
|
+import java.io.InputStream;
|
|
|
|
|
+import java.io.InputStreamReader;
|
|
|
|
|
+import java.util.*;
|
|
|
|
|
+import java.util.concurrent.ConcurrentHashMap;
|
|
|
|
|
+import java.util.concurrent.CountDownLatch;
|
|
|
|
|
+import java.util.concurrent.Future;
|
|
|
|
|
+import java.util.concurrent.TimeUnit;
|
|
|
|
|
+import java.util.stream.Collectors;
|
|
|
|
|
+
|
|
|
|
|
+import static com.tzld.piaoquan.ad.engine.commons.math.Const.*;
|
|
|
|
|
+
|
|
|
|
|
+@Slf4j
|
|
|
|
|
+@Component
|
|
|
|
|
+public class RankStrategyBy840 extends RankStrategyBasic {
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 空 Map 常量,避免频繁创建空 HashMap
|
|
|
|
|
+ */
|
|
|
|
|
+ private static final Map<String, String> EMPTY_STRING_MAP = Collections.emptyMap();
|
|
|
|
|
+ private static final Map<String, Map<String, String>> EMPTY_NESTED_MAP = Collections.emptyMap();
|
|
|
|
|
+
|
|
|
|
|
+ private Map<String, double[]> bucketsMap = new HashMap<>();
|
|
|
|
|
+
|
|
|
|
|
+ private Map<String, Double> bucketsLen = new HashMap<>();
|
|
|
|
|
+
|
|
|
|
|
+ @Value("${word2vec.exp:694}")
|
|
|
|
|
+ private String word2vecExp;
|
|
|
|
|
+
|
|
|
|
|
+ @ApolloJsonValue("${rank.score.params.840:{}}")
|
|
|
|
|
+ private Map<String, String> paramsMap;
|
|
|
|
|
+
|
|
|
|
|
+ // FIXME(zhoutian): 可能需要独立配置
|
|
|
|
|
+ @ApolloJsonValue("${rank.score.weight.680:{}}")
|
|
|
|
|
+ private Map<String, Double> weightMap;
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 人群分层&创意的权重
|
|
|
|
|
+ * 格式:{layer_creativeId: weight}
|
|
|
|
|
+ */
|
|
|
|
|
+ @ApolloJsonValue("${rank.score.weight.layer.and.creative:{}}")
|
|
|
|
|
+ private Map<String, Double> layerAndCreativeWeightMap;
|
|
|
|
|
+
|
|
|
|
|
+ @ApolloJsonValue("${rank.score.neg_sample_rate:0.01}")
|
|
|
|
|
+ Double negSampleRate;
|
|
|
|
|
+
|
|
|
|
|
+ Set<String> sparseFeatureSet;
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ @PostConstruct
|
|
|
|
|
+ public void afterInit() {
|
|
|
|
|
+ this.readBucketFile();
|
|
|
|
|
+ this.initSparseFeatureNames();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ @Override
|
|
|
|
|
+ public List<AdRankItem> adItemRank(RankRecommendRequestParam request, ScoreParam scoreParam) {
|
|
|
|
|
+ Map<String, Double> weightParam = ObjUtil.nullOrDefault(weightMap, new HashMap<>());
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ Map<Long, Double> creativeScoreCoefficient = getCreativeScoreCoefficient();
|
|
|
|
|
+ Set<String> noApiAdVerIds = getNoApiAdVerIds();
|
|
|
|
|
+
|
|
|
|
|
+ long ts = System.currentTimeMillis() / 1000;
|
|
|
|
|
+
|
|
|
|
|
+ String brand = scoreParam.getRequestContext().getMachineinfoBrand();
|
|
|
|
|
+ if (StringUtils.isNotEmpty(brand)) {
|
|
|
|
|
+ scoreParam.getRequestContext().setMachineinfoBrand(brand + "-n");
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
|
|
+ //过滤创意
|
|
|
|
|
+ filterRequestAdList(request, scoreParam);
|
|
|
|
|
+ // 特征处理
|
|
|
|
|
+ // feature1
|
|
|
|
|
+ Feature feature = this.getFeature(scoreParam, request);
|
|
|
|
|
+ if (feature == null) {
|
|
|
|
|
+ log.warn("adItemRank: feature is null, skip processing. request={}", request);
|
|
|
|
|
+ return new ArrayList<>();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ Map<String, Map<String, String>> userFeature = feature.getUserFeature();
|
|
|
|
|
+ Map<String, Map<String, String>> videoFeature = feature.getVideoFeature();
|
|
|
|
|
+ Map<String, Map<String, Map<String, String>>> allAdVerFeature = feature.getAdVerFeature();
|
|
|
|
|
+ Map<String, Map<String, Map<String, String>>> allCidFeature = feature.getCidFeature();
|
|
|
|
|
+ Map<String, Map<String, Map<String, String>>> allSkuFeature = feature.getSkuFeature();
|
|
|
|
|
+ Map<String, String> reqFeature = this.getReqFeature(scoreParam, request);
|
|
|
|
|
+
|
|
|
|
|
+ Map<String, String> userFeatureMap = new HashMap<>();
|
|
|
|
|
+ Map<String, String> c1Feature = userFeature.getOrDefault("alg_mid_feature_ad_action", EMPTY_STRING_MAP);
|
|
|
|
|
+ List<TupleMapEntry<Tuple5>> midActionList = this.handleC1Feature(c1Feature, userFeatureMap);
|
|
|
|
|
+
|
|
|
|
|
+ Map<String, Double> midTimeDiffMap = this.parseC1FeatureListToTimeDiffMap(midActionList, ts);
|
|
|
|
|
+ Map<String, Double> actionStaticMap = this.parseC1FeatureListToActionStaticMap(midActionList);
|
|
|
|
|
+
|
|
|
|
|
+ Map<String, String> d2Feature = videoFeature.getOrDefault("alg_cid_feature_vid_cf_rank", EMPTY_STRING_MAP);
|
|
|
|
|
+ Map<String, String> d3Feature = videoFeature.getOrDefault("alg_vid_feature_basic_info", EMPTY_STRING_MAP);
|
|
|
|
|
+
|
|
|
|
|
+ Map<String, Map<String, Double>> vidRankMaps = this.parseD2FeatureMap(d2Feature);
|
|
|
|
|
+
|
|
|
|
|
+ Map<String, String> e1Feature = userFeature.getOrDefault("alg_mid_feature_return_tags", EMPTY_STRING_MAP);
|
|
|
|
|
+ Map<String, String> e2Feature = userFeature.getOrDefault("alg_mid_feature_share_tags", EMPTY_STRING_MAP);
|
|
|
|
|
+
|
|
|
|
|
+ Map<String, String> g1Feature = userFeature.getOrDefault("mid_return_video_cate", EMPTY_STRING_MAP);
|
|
|
|
|
+ Map<String, String> g2Feature = userFeature.getOrDefault("mid_share_video_cate", EMPTY_STRING_MAP);
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ userFeatureMap.put("brand", reqFeature.getOrDefault("brand", ""));
|
|
|
|
|
+ userFeatureMap.put("region", reqFeature.getOrDefault("region", ""));
|
|
|
|
|
+ userFeatureMap.put("city", reqFeature.getOrDefault("city", ""));
|
|
|
|
|
+ userFeatureMap.put("vid", reqFeature.getOrDefault("vid", ""));
|
|
|
|
|
+ userFeatureMap.put("apptype", reqFeature.getOrDefault("apptype", ""));
|
|
|
|
|
+ userFeatureMap.put("is_first_layer", reqFeature.getOrDefault("is_first_layer", ""));
|
|
|
|
|
+ userFeatureMap.put("root_source_scene", reqFeature.getOrDefault("root_source_scene", ""));
|
|
|
|
|
+ userFeatureMap.put("root_source_channel", reqFeature.getOrDefault("root_source_channel", ""));
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ userFeatureMap.put("cate1", d3Feature.get("merge_first_level_cate"));
|
|
|
|
|
+ userFeatureMap.put("cate2", d3Feature.get("merge_second_level_cate"));
|
|
|
|
|
+ userFeatureMap.put("user_vid_return_tags_2h", e1Feature.getOrDefault("tags_2h", null));
|
|
|
|
|
+ userFeatureMap.put("user_vid_return_tags_1d", e1Feature.getOrDefault("tags_1d", null));
|
|
|
|
|
+ userFeatureMap.put("user_vid_return_tags_3d", e1Feature.getOrDefault("tags_3d", null));
|
|
|
|
|
+ userFeatureMap.put("user_vid_return_tags_7d", e1Feature.getOrDefault("tags_7d", null));
|
|
|
|
|
+ userFeatureMap.put("user_vid_return_tags_14d", e1Feature.getOrDefault("tags_14d", null));
|
|
|
|
|
+ userFeatureMap.put("title_split", d3Feature.getOrDefault("title_split", null));
|
|
|
|
|
+ userFeatureMap.put("user_vid_share_tags_1d", e2Feature.getOrDefault("tags_1d", null));
|
|
|
|
|
+ userFeatureMap.put("user_vid_share_tags_14d", e2Feature.getOrDefault("tags_14d", null));
|
|
|
|
|
+ userFeatureMap.put("user_vid_return_cate1_14d", g1Feature.getOrDefault("cate1_14d", null));
|
|
|
|
|
+ userFeatureMap.put("user_vid_return_cate2_14d", g1Feature.getOrDefault("cate2_14d", null));
|
|
|
|
|
+ userFeatureMap.put("user_vid_share_cate1_14d", g2Feature.getOrDefault("cate1_14d", null));
|
|
|
|
|
+ userFeatureMap.put("user_vid_share_cate2_14d", g2Feature.getOrDefault("cate2_14d", null));
|
|
|
|
|
+
|
|
|
|
|
+ Map<String, String> sceneFeatureMap = this.handleSceneFeature(ts);
|
|
|
|
|
+ long time1 = System.currentTimeMillis();
|
|
|
|
|
+
|
|
|
|
|
+ boolean isGuaranteedFlow = getIsGuaranteedFlow(scoreParam);
|
|
|
|
|
+ Map<String, GuaranteeView> map = getGuaranteeViewMap(request, isGuaranteedFlow);
|
|
|
|
|
+ Map<Long, CorrectCpaParam> correctCpaMap = getCorrectCpaParamMap(request, scoreParam, reqFeature);
|
|
|
|
|
+ List<AdRankItem> adRankItems = new ArrayList<>();
|
|
|
|
|
+ Random random = new Random();
|
|
|
|
|
+ List<Future<AdRankItem>> futures = new ArrayList<>();
|
|
|
|
|
+ CountDownLatch cdl1 = new CountDownLatch(request.getAdIdList().size());
|
|
|
|
|
+ for (AdPlatformCreativeDTO dto : request.getAdIdList()) {
|
|
|
|
|
+ Future<AdRankItem> future = ThreadPoolFactory.feature().submit(() -> {
|
|
|
|
|
+ AdRankItem adRankItem = new AdRankItem();
|
|
|
|
|
+ try {
|
|
|
|
|
+ adRankItem.setAdId(dto.getCreativeId());
|
|
|
|
|
+ adRankItem.setCreativeCode(dto.getCreativeCode());
|
|
|
|
|
+ adRankItem.setAdVerId(dto.getAdVerId());
|
|
|
|
|
+ adRankItem.setVideoId(request.getVideoId());
|
|
|
|
|
+ adRankItem.setCpa(dto.getCpa());
|
|
|
|
|
+ adRankItem.setId(dto.getAdId());
|
|
|
|
|
+ adRankItem.setCampaignId(dto.getCampaignId());
|
|
|
|
|
+ adRankItem.setCpm(ObjUtil.nullOrDefault(dto.getCpm(), 90).doubleValue());
|
|
|
|
|
+ adRankItem.setSkuId(dto.getSkuId());
|
|
|
|
|
+ adRankItem.setCustomerId(dto.getCustomerId());
|
|
|
|
|
+ adRankItem.setProfession(dto.getProfession());
|
|
|
|
|
+ adRankItem.setRandom(random.nextInt(1000));
|
|
|
|
|
+ if (noApiAdVerIds.contains(dto.getAdVerId())) {
|
|
|
|
|
+ adRankItem.getExt().put("isApi", "0");
|
|
|
|
|
+ } else {
|
|
|
|
|
+ adRankItem.getExt().put("isApi", "1");
|
|
|
|
|
+ }
|
|
|
|
|
+ adRankItem.getExt().put("recallsources", dto.getRecallSources());
|
|
|
|
|
+ adRankItem.getExt().put("correctCpaMap", JSONObject.toJSONString(correctCpaMap.get(dto.getAdId())));
|
|
|
|
|
+ adRankItem.getExt().put("correctionFactor", correctCpaMap.get(dto.getAdId()).getCorrectionFactor());
|
|
|
|
|
+ setGuaranteeWeight(map, dto.getAdVerId(), adRankItem.getExt(), isGuaranteedFlow, reqFeature);
|
|
|
|
|
+ String cidStr = dto.getCreativeId().toString();
|
|
|
|
|
+ Map<String, String> cidFeatureMap = adRankItem.getFeatureMap();
|
|
|
|
|
+ Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(cidStr, EMPTY_NESTED_MAP);
|
|
|
|
|
+ Map<String, String> b1Feature = cidFeature.getOrDefault("alg_cid_feature_basic_info", EMPTY_STRING_MAP);
|
|
|
|
|
+
|
|
|
|
|
+ Map<String, Map<String, String>> adVerFeature = allAdVerFeature.getOrDefault(dto.getAdVerId(), EMPTY_NESTED_MAP);
|
|
|
|
|
+ Map<String, Map<String, String>> skuFeature = allSkuFeature.getOrDefault(String.valueOf(dto.getSkuId()), EMPTY_NESTED_MAP);
|
|
|
|
|
+ Map<String, String> d1Feature = cidFeature.getOrDefault("alg_cid_feature_vid_cf", EMPTY_STRING_MAP);
|
|
|
|
|
+
|
|
|
|
|
+ this.handleB1Feature(b1Feature, cidFeatureMap, cidStr);
|
|
|
|
|
+ this.handleB2ToB5AndB8ToB9Feature(cidFeature, adVerFeature, cidFeatureMap);
|
|
|
|
|
+ this.handleB6ToB7Feature(cidFeature, cidFeatureMap);
|
|
|
|
|
+ this.handleC1UIFeature(midTimeDiffMap, actionStaticMap, cidFeatureMap, cidStr);
|
|
|
|
|
+ this.handleD1Feature(d1Feature, cidFeatureMap);
|
|
|
|
|
+ this.handleD2Feature(vidRankMaps, cidFeatureMap, cidStr);
|
|
|
|
|
+ this.handleH1AndH2Feature(skuFeature, adVerFeature, cidFeatureMap);
|
|
|
|
|
+ cidFeatureMap.put("cid", dto.getCreativeId() != null ? String.valueOf(dto.getCreativeId()) : "");
|
|
|
|
|
+ cidFeatureMap.put("adid", dto.getAdId() != null ? String.valueOf(dto.getAdId()) : "");
|
|
|
|
|
+ cidFeatureMap.put("adverid", dto.getAdVerId() != null ? dto.getAdVerId() : "");
|
|
|
|
|
+ cidFeatureMap.put("profession", dto.getProfession() != null ? dto.getProfession() : "");
|
|
|
|
|
+ cidFeatureMap.put("category_name", dto.getCategoryName() != null ? dto.getCategoryName() : "");
|
|
|
|
|
+ cidFeatureMap.put("material_md5", dto.getMaterialMd5() != null ? dto.getMaterialMd5() : "");
|
|
|
|
|
+ //DNN模型没训练过的cid才不传入广告相关的稀疏特征
|
|
|
|
|
+ if (CollectionUtils.isNotEmpty(DnnCidDataHelper.getCidSetV2()) && !DnnCidDataHelper.getCidSetV2().contains(adRankItem.getAdId())) {
|
|
|
|
|
+ cidFeatureMap.put("cid", "");
|
|
|
|
|
+ cidFeatureMap.put("adid", "");
|
|
|
|
|
+ cidFeatureMap.put("adverid", "");
|
|
|
|
|
+ }
|
|
|
|
|
+ return adRankItem;
|
|
|
|
|
+ } finally {
|
|
|
|
|
+ cdl1.countDown();
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+ futures.add(future);
|
|
|
|
|
+ }
|
|
|
|
|
+ try {
|
|
|
|
|
+ cdl1.await(300, TimeUnit.MILLISECONDS);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("handleE1AndE2Feature and handleD3AndB1Feature wait timeout", e);
|
|
|
|
|
+ }
|
|
|
|
|
+ for (Future<AdRankItem> future : futures) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ if (future.isDone()) {
|
|
|
|
|
+ adRankItems.add(future.get());
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("Feature handle error", e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ long time2 = System.currentTimeMillis();
|
|
|
|
|
+ // feature3
|
|
|
|
|
+ // 请求级别的 tag 分词缓存,所有广告共享(同一用户的 tags 相同)
|
|
|
|
|
+ Map<String, List<String>> tagWordsCache = new ConcurrentHashMap<>();
|
|
|
|
|
+ CountDownLatch cdl2 = new CountDownLatch(adRankItems.size() * 2);
|
|
|
|
|
+ for (AdRankItem item : adRankItems) {
|
|
|
|
|
+ String cidStr = String.valueOf(item.getAdId());
|
|
|
|
|
+ Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(cidStr, EMPTY_NESTED_MAP);
|
|
|
|
|
+ Map<String, String> b1Feature = cidFeature.getOrDefault("alg_cid_feature_basic_info", EMPTY_STRING_MAP);
|
|
|
|
|
+ String title = b1Feature.getOrDefault("cidtitle", "");
|
|
|
|
|
+ ThreadPoolFactory.defaultPool().submit(() -> {
|
|
|
|
|
+ try {
|
|
|
|
|
+ this.handleE1AndE2Feature(e1Feature, e2Feature, title, item.getFeatureMap(), scoreParam, tagWordsCache);
|
|
|
|
|
+ } finally {
|
|
|
|
|
+ cdl2.countDown();
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+ ThreadPoolFactory.defaultPool().submit(() -> {
|
|
|
|
|
+ try {
|
|
|
|
|
+ this.handleD3AndB1Feature(d3Feature, title, item.getFeatureMap(), scoreParam);
|
|
|
|
|
+ } finally {
|
|
|
|
|
+ cdl2.countDown();
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ try {
|
|
|
|
|
+ cdl2.await(150, TimeUnit.MILLISECONDS);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("handleE1AndE2Feature and handleD3AndB1Feature wait timeout", e);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ long time3 = System.currentTimeMillis();
|
|
|
|
|
+ // 分桶
|
|
|
|
|
+ userFeatureMap = this.featureBucket(userFeatureMap);
|
|
|
|
|
+ CountDownLatch cdl4 = new CountDownLatch(adRankItems.size());
|
|
|
|
|
+ for (AdRankItem adRankItem : adRankItems) {
|
|
|
|
|
+ ThreadPoolFactory.feature().submit(() -> {
|
|
|
|
|
+ try {
|
|
|
|
|
+ Map<String, String> featureMap = adRankItem.getFeatureMap();
|
|
|
|
|
+ adRankItem.setFeatureMap(this.featureBucket(featureMap));
|
|
|
|
|
+ } finally {
|
|
|
|
|
+ cdl4.countDown();
|
|
|
|
|
+ }
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+ try {
|
|
|
|
|
+ cdl4.await(100, TimeUnit.MILLISECONDS);
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("handleE1AndE2Feature and handleD3AndB1Feature wait timeout", e);
|
|
|
|
|
+ }
|
|
|
|
|
+ long time4 = System.currentTimeMillis();
|
|
|
|
|
+ // 打分排序
|
|
|
|
|
+ // getScorerPipeline
|
|
|
|
|
+
|
|
|
|
|
+ if (CollectionUtils.isEmpty(adRankItems)) {
|
|
|
|
|
+ log.error("adRankItems is empty");
|
|
|
|
|
+ }
|
|
|
|
|
+ List<AdRankItem> result = ScorerUtils.getScorerPipeline(ScorerUtils.PAI_SCORE_CONF_20250804).scoring(sceneFeatureMap, userFeatureMap, adRankItems);
|
|
|
|
|
+ if (CollectionUtils.isEmpty(result)) {
|
|
|
|
|
+ log.error("scoring result is empty");
|
|
|
|
|
+ }
|
|
|
|
|
+ long time5 = System.currentTimeMillis();
|
|
|
|
|
+ int viewLimit = NumberUtils.toInt(paramsMap.getOrDefault("viewLimit", "3000"));
|
|
|
|
|
+ // calibrate score for negative sampling or cold start
|
|
|
|
|
+ for (AdRankItem item : result) {
|
|
|
|
|
+ double originalScore = item.getLrScore();
|
|
|
|
|
+ double calibratedScore = originalScore / (originalScore + (1 - originalScore) / negSampleRate);
|
|
|
|
|
+ // 该创意尚未在模型中训练,打分不可靠
|
|
|
|
|
+ Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(String.valueOf(item.getAdId()), EMPTY_NESTED_MAP);
|
|
|
|
|
+ Map<String, String> b3Feature = cidFeature.getOrDefault("alg_cid_feature_cid_action", EMPTY_STRING_MAP);
|
|
|
|
|
+ double view3Day = Double.parseDouble(b3Feature.getOrDefault("ad_view_3d", "0"));
|
|
|
|
|
+ if ((CollectionUtils.isNotEmpty(DnnCidDataHelper.getCidSetV2()) && !DnnCidDataHelper.getCidSetV2().contains(item.getAdId()))
|
|
|
|
|
+ || view3Day <= viewLimit) {
|
|
|
|
|
+ double view = Double.parseDouble(b3Feature.getOrDefault("ad_view_14d", "0"));
|
|
|
|
|
+ double conver = Double.parseDouble(b3Feature.getOrDefault("ad_conversion_14d", "0"));
|
|
|
|
|
+ double smoothCxr = NumUtil.divSmoothV1(conver, view, 1.64);
|
|
|
|
|
+ //模型打分和统计计算取打分更低的
|
|
|
|
|
+ item.getScoreMap().put("cvcvrItemValue", 1.0);
|
|
|
|
|
+ if (smoothCxr <= calibratedScore) {
|
|
|
|
|
+ calibratedScore = smoothCxr;
|
|
|
|
|
+ item.getScoreMap().put("cvcvrItemValue", 2.0);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ item.setLrScore(calibratedScore);
|
|
|
|
|
+ item.getScoreMap().put("originCtcvrScore", originalScore);
|
|
|
|
|
+ item.getScoreMap().put("modelCtcvrScore", calibratedScore);
|
|
|
|
|
+ item.getScoreMap().put("ctcvrScore", calibratedScore);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ String calibModelName = paramsMap.getOrDefault("calibModelName", "dnnV3");
|
|
|
|
|
+ calculateCtcvrScore(result, request, scoreParam, calibModelName, reqFeature);
|
|
|
|
|
+ if (CollectionUtils.isEmpty(result)) {
|
|
|
|
|
+ log.error("calculateCtcvrScore result is empty");
|
|
|
|
|
+ }
|
|
|
|
|
+ // loop
|
|
|
|
|
+ double cpmCoefficient = weightParam.getOrDefault("cpmCoefficient", 0.9);
|
|
|
|
|
+ boolean isGuaranteeType = false;
|
|
|
|
|
+ // 查询人群分层信息
|
|
|
|
|
+ String peopleLayer = Optional.of(reqFeature)
|
|
|
|
|
+ .map(f -> f.get("layer"))
|
|
|
|
|
+ .map(s -> s.replace("-炸", ""))
|
|
|
|
|
+ .orElse(null);
|
|
|
|
|
+
|
|
|
|
|
+ // 控制曝光参数
|
|
|
|
|
+ String expOldKey = paramsMap.getOrDefault("expOldKey", "ad_view_yesterday");
|
|
|
|
|
+ double expOldThreshold = NumberUtils.toDouble(paramsMap.getOrDefault("expOldThreshold", "1000"));
|
|
|
|
|
+ String expNewKey = paramsMap.getOrDefault("expNewKey", "ad_view_today");
|
|
|
|
|
+ double expNewThreshold = NumberUtils.toDouble(paramsMap.getOrDefault("expNewThreshold", "3000"));
|
|
|
|
|
+ double expLowerWeight = NumberUtils.toDouble(paramsMap.getOrDefault("expLowerWeight", "0.2"));
|
|
|
|
|
+ double expUpperWeight = NumberUtils.toDouble(paramsMap.getOrDefault("expUpperWeight", "1.0"));
|
|
|
|
|
+ double expScale = NumberUtils.toDouble(paramsMap.getOrDefault("expScale", "10.0"));
|
|
|
|
|
+ for (AdRankItem item : result) {
|
|
|
|
|
+ double bid = item.getCpa();
|
|
|
|
|
+ if (scoreParam.getExpCodeSet().contains(correctCpaExp1) || scoreParam.getExpCodeSet().contains(correctCpaExp2)) {
|
|
|
|
|
+ Double correctionFactor = (Double) item.getExt().get("correctionFactor");
|
|
|
|
|
+ item.getScoreMap().put("correctionFactor", correctionFactor);
|
|
|
|
|
+ bid = bid * correctionFactor;
|
|
|
|
|
+ }
|
|
|
|
|
+ item.getScoreMap().put("ecpm", item.getLrScore() * bid * 1000);
|
|
|
|
|
+ if (isGuaranteedFlow && item.getExt().get("isGuaranteed") != null && (boolean) item.getExt().get("isGuaranteed")) {
|
|
|
|
|
+ isGuaranteeType = true;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 控制曝光权重
|
|
|
|
|
+ Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(String.valueOf(item.getAdId()), EMPTY_NESTED_MAP);
|
|
|
|
|
+ Map<String, String> b3Feature = cidFeature.getOrDefault("alg_cid_feature_cid_action", EMPTY_STRING_MAP);
|
|
|
|
|
+ double expWeight = getExpWeight(b3Feature,
|
|
|
|
|
+ expOldKey, expOldThreshold,
|
|
|
|
|
+ expNewKey, expNewThreshold,
|
|
|
|
|
+ expLowerWeight, expUpperWeight, expScale);
|
|
|
|
|
+
|
|
|
|
|
+ String layerAndCreativeWeightMapKey = getLayerAndCreativeWeightMapKey(peopleLayer, String.valueOf(item.getAdId()));
|
|
|
|
|
+ // 人群分层&创意的权重
|
|
|
|
|
+ double layerAndCreativeWeight = getLayerAndCreativeWeight(layerAndCreativeWeightMapKey);
|
|
|
|
|
+ double scoreCoefficient = creativeScoreCoefficient.getOrDefault(item.getAdId(), 1d);
|
|
|
|
|
+ double guaranteeScoreCoefficient = getGuaranteeScoreCoefficient(isGuaranteedFlow, item.getExt());
|
|
|
|
|
+ double score = expWeight * item.getLrScore() * bid * scoreCoefficient * guaranteeScoreCoefficient * layerAndCreativeWeight;
|
|
|
|
|
+ item.getScoreMap().put("guaranteeScoreCoefficient", guaranteeScoreCoefficient);
|
|
|
|
|
+ item.getScoreMap().put("cpa", item.getCpa());
|
|
|
|
|
+ item.getScoreMap().put("cpm", item.getCpm());
|
|
|
|
|
+ item.getScoreMap().put("bid", bid);
|
|
|
|
|
+ item.getScoreMap().put("cpmCoefficient", cpmCoefficient);
|
|
|
|
|
+ item.getScoreMap().put("scoreCoefficient", scoreCoefficient);
|
|
|
|
|
+ item.getFeatureMap().putAll(userFeatureMap);
|
|
|
|
|
+ item.getFeatureMap().putAll(sceneFeatureMap);
|
|
|
|
|
+
|
|
|
|
|
+ // 没有转化回传的广告主,使用后台配置的CPM
|
|
|
|
|
+ if (noApiAdVerIds.contains(item.getAdVerId())) {
|
|
|
|
|
+ score = item.getCpm() * cpmCoefficient / 1000;
|
|
|
|
|
+ }
|
|
|
|
|
+ item.setScore(score);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ result.sort(ComparatorUtil.equalsRandomComparator());
|
|
|
|
|
+
|
|
|
|
|
+ String logModelName = paramsMap.getOrDefault("logModelName", "dnnV3");
|
|
|
|
|
+ if (CollectionUtils.isNotEmpty(result)) {
|
|
|
|
|
+ AdRankItem top1Item = result.get(0);
|
|
|
|
|
+ List<String> participateCompetitionType = new ArrayList<>();
|
|
|
|
|
+ participateCompetitionType.add("engine");
|
|
|
|
|
+ top1Item.getExt().put("isGuaranteeType", isGuaranteeType);
|
|
|
|
|
+ if (isGuaranteeType) {
|
|
|
|
|
+ participateCompetitionType.add("guarantee");
|
|
|
|
|
+ }
|
|
|
|
|
+ top1Item.getExt().put("participateCompetitionType", StringUtils.join(participateCompetitionType, ","));
|
|
|
|
|
+ Double modelCtcvrScore = top1Item.getScoreMap().get("modelCtcvrScore");
|
|
|
|
|
+ Double ctcvrScore = top1Item.getScoreMap().get("ctcvrScore");
|
|
|
|
|
+ if (scoreParam.getExpCodeSet().contains(checkoutEcpmExp)) {
|
|
|
|
|
+ top1Item.getExt().put("ecpm", ctcvrScore * top1Item.getCpa() * 1000);
|
|
|
|
|
+ String filterEcpmValue = paramsMap.getOrDefault("filterEcpm", filterEcpm);
|
|
|
|
|
+ top1Item.getExt().put("filterEcpm", filterEcpmValue);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ top1Item.getExt().put("ecpm", modelCtcvrScore * top1Item.getCpa() * 1000);
|
|
|
|
|
+ }
|
|
|
|
|
+ putMetaFeature(top1Item, feature, reqFeature, sceneFeatureMap, request);
|
|
|
|
|
+ top1Item.getExt().put("model", logModelName);
|
|
|
|
|
+ String coefficientRate = paramsMap.getOrDefault("coefficientRate", "1");
|
|
|
|
|
+ top1Item.getExt().put("coefficientRate", coefficientRate);
|
|
|
|
|
+ }
|
|
|
|
|
+ long time6 = System.currentTimeMillis();
|
|
|
|
|
+ log.info("cost={}, getFeature={}, handleFeature={}, similar={}, bucketFeature={}, getScorerPipeline={}, " +
|
|
|
|
|
+ "other={}, adIdSize={}, adRankItemsSize={}",
|
|
|
|
|
+ time6 - start, time1 - start, time2 - time1, time3 - time2, time4 - time3,
|
|
|
|
|
+ time5 - time4, time6 - time5, request.getAdIdList().size(), adRankItems.size());
|
|
|
|
|
+
|
|
|
|
|
+ return result;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 获取人群分层和创意的权重
|
|
|
|
|
+ *
|
|
|
|
|
+ * @param key
|
|
|
|
|
+ * @return
|
|
|
|
|
+ */
|
|
|
|
|
+ private Double getLayerAndCreativeWeight(String key) {
|
|
|
|
|
+ if (StringUtils.isBlank(key)) {
|
|
|
|
|
+ return 1d;
|
|
|
|
|
+ }
|
|
|
|
|
+ return layerAndCreativeWeightMap.getOrDefault(key, 1d);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 获取人群分层和创意的权重key
|
|
|
|
|
+ *
|
|
|
|
|
+ * @param layer
|
|
|
|
|
+ * @param creativeId
|
|
|
|
|
+ * @return
|
|
|
|
|
+ */
|
|
|
|
|
+ private String getLayerAndCreativeWeightMapKey(String layer, String creativeId) {
|
|
|
|
|
+ if (StringUtils.isBlank(layer) || StringUtils.isBlank(creativeId)) {
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
+ return layer + "_" + creativeId;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ private void handleB1Feature(Map<String, String> b1Feature, Map<String, String> cidFeatureMap, String cid) {
|
|
|
|
|
+ cidFeatureMap.put("cid_" + cid, "0.1");
|
|
|
|
|
+ // if (StringUtils.isNotBlank(b1Feature.get("adid"))) {
|
|
|
|
|
+ // String adId = b1Feature.get("adid");
|
|
|
|
|
+ // cidFeatureMap.put("adid_" + adId, idDefaultValue);
|
|
|
|
|
+ // }
|
|
|
|
|
+ if (StringUtils.isNotBlank(b1Feature.get("adverid"))) {
|
|
|
|
|
+ String adVerId = b1Feature.get("adverid");
|
|
|
|
|
+ cidFeatureMap.put("adverid_" + adVerId, "0.1");
|
|
|
|
|
+ }
|
|
|
|
|
+ // if (StringUtils.isNotBlank(b1Feature.get("targeting_conversion"))) {
|
|
|
|
|
+ // String targetingConversion = b1Feature.get("targeting_conversion");
|
|
|
|
|
+ // cidFeatureMap.put("targeting_conversion_" + targetingConversion, idDefaultValue);
|
|
|
|
|
+ // }
|
|
|
|
|
+ if (StringUtils.isNotBlank(b1Feature.get("cpa"))) {
|
|
|
|
|
+ String cpa = b1Feature.get("cpa");
|
|
|
|
|
+ cidFeatureMap.put("cpa", cpa);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void handleB2ToB5AndB8ToB9Feature(Map<String, Map<String, String>> c1Feature, Map<String, Map<String, String>> adVerFeature, Map<String, String> cidFeatureMap) {
|
|
|
|
|
+ Map<String, String> b2Feature = adVerFeature.getOrDefault("alg_cid_feature_adver_action", EMPTY_STRING_MAP);
|
|
|
|
|
+ Map<String, String> b3Feature = c1Feature.getOrDefault("alg_cid_feature_cid_action", EMPTY_STRING_MAP);
|
|
|
|
|
+ Map<String, String> b4Feature = c1Feature.getOrDefault("alg_cid_feature_region_action", EMPTY_STRING_MAP);
|
|
|
|
|
+ Map<String, String> b5Feature = c1Feature.getOrDefault("alg_cid_feature_app_action", EMPTY_STRING_MAP);
|
|
|
|
|
+ Map<String, String> b8Feature = c1Feature.getOrDefault("alg_cid_feature_brand_action", EMPTY_STRING_MAP);
|
|
|
|
|
+ Map<String, String> b9Feature = c1Feature.getOrDefault("alg_cid_feature_weChatVersion_action", EMPTY_STRING_MAP);
|
|
|
|
|
+
|
|
|
|
|
+ List<String> timeList = Arrays.asList("1h", "2h", "3h", "6h", "12h", "1d", "3d", "7d", "yesterday", "today");
|
|
|
|
|
+ List<Tuple2<Map<String, String>, String>> featureList = Arrays.asList(
|
|
|
|
|
+ new Tuple2<>(b2Feature, "b2"),
|
|
|
|
|
+ new Tuple2<>(b3Feature, "b3"),
|
|
|
|
|
+ new Tuple2<>(b4Feature, "b4"),
|
|
|
|
|
+ new Tuple2<>(b5Feature, "b5"),
|
|
|
|
|
+ new Tuple2<>(b8Feature, "b8"),
|
|
|
|
|
+ new Tuple2<>(b9Feature, "b9")
|
|
|
|
|
+ );
|
|
|
|
|
+ for (Tuple2<Map<String, String>, String> tuple2 : featureList) {
|
|
|
|
|
+ Map<String, String> feature = tuple2.f1;
|
|
|
|
|
+ String prefix = tuple2.f2;
|
|
|
|
|
+ for (String time : timeList) {
|
|
|
|
|
+ double view = Double.parseDouble(feature.getOrDefault("ad_view_" + time, "0"));
|
|
|
|
|
+ double click = Double.parseDouble(feature.getOrDefault("ad_click_" + time, "0"));
|
|
|
|
|
+ double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
|
|
|
|
|
+ double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
|
|
|
|
|
+ double cpc = NumUtil.div(income, click);
|
|
|
|
|
+ double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
|
|
|
|
|
+ double ctcvr = NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR);
|
|
|
|
|
+ double ecpm = ctr * cpc * 1000;
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(ctr));
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(ctcvr));
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(ecpm));
|
|
|
|
|
+
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * ctcvr));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void handleB6ToB7Feature(Map<String, Map<String, String>> c1Feature, Map<String, String> cidFeatureMap) {
|
|
|
|
|
+ Map<String, String> b6Feature = c1Feature.getOrDefault("alg_cid_feature_week_action", EMPTY_STRING_MAP);
|
|
|
|
|
+ Map<String, String> b7Feature = c1Feature.getOrDefault("alg_cid_feature_hour_action", EMPTY_STRING_MAP);
|
|
|
|
|
+
|
|
|
|
|
+ List<String> timeList = Arrays.asList("7d", "14d");
|
|
|
|
|
+ List<Tuple2<Map<String, String>, String>> featureList = Arrays.asList(
|
|
|
|
|
+ new Tuple2<>(b6Feature, "b6"),
|
|
|
|
|
+ new Tuple2<>(b7Feature, "b7")
|
|
|
|
|
+ );
|
|
|
|
|
+ for (Tuple2<Map<String, String>, String> tuple2 : featureList) {
|
|
|
|
|
+ Map<String, String> feature = tuple2.f1;
|
|
|
|
|
+ String prefix = tuple2.f2;
|
|
|
|
|
+ for (String time : timeList) {
|
|
|
|
|
+ double view = Double.parseDouble(feature.getOrDefault("ad_view_" + time, "0"));
|
|
|
|
|
+ double click = Double.parseDouble(feature.getOrDefault("ad_click_" + time, "0"));
|
|
|
|
|
+ double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
|
|
|
|
|
+ double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
|
|
|
|
|
+ double cpc = NumUtil.div(income, click);
|
|
|
|
|
+ double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
|
|
|
|
|
+ double ctcvr = NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR);
|
|
|
|
|
+ double ecpm = ctr * cpc * 1000;
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(ctr));
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(ctcvr));
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(ecpm));
|
|
|
|
|
+
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
|
|
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * ctcvr));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private List<TupleMapEntry<Tuple5>> handleC1Feature(Map<String, String> c1Feature, Map<String, String> featureMap) {
|
|
|
|
|
+
|
|
|
|
|
+ //用户近1年内是否有转化
|
|
|
|
|
+ if (c1Feature.containsKey("user_has_conver_1y") && c1Feature.get("user_has_conver_1y") != null) {
|
|
|
|
|
+ featureMap.put("user_has_conver_1y", c1Feature.get("user_has_conver_1y"));
|
|
|
|
|
+ }
|
|
|
|
|
+ //用户历史转化过品类
|
|
|
|
|
+ if (c1Feature.containsKey("user_conver_ad_class") && c1Feature.get("user_conver_ad_class") != null) {
|
|
|
|
|
+ featureMap.put("user_conver_ad_class", c1Feature.get("user_conver_ad_class"));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 用户特征
|
|
|
|
|
+ List<TupleMapEntry<Tuple5>> midActionList = new ArrayList<>();
|
|
|
|
|
+ if (c1Feature.containsKey("action")) {
|
|
|
|
|
+ String action = c1Feature.get("action");
|
|
|
|
|
+ midActionList = Arrays.stream(action.split(","))
|
|
|
|
|
+ .map(r -> {
|
|
|
|
|
+ String[] rList = r.split(":");
|
|
|
|
|
+ Tuple5 tuple5 = new Tuple5(rList[1], rList[2], rList[3], rList[4], rList[5]);
|
|
|
|
|
+ return new TupleMapEntry<>(rList[0], tuple5);
|
|
|
|
|
+ })
|
|
|
|
|
+ // TODO 倒排
|
|
|
|
|
+ .sorted((a, b) -> Integer.compare(Integer.parseInt(b.value.f1), Integer.parseInt(a.value.f1)))
|
|
|
|
|
+ .collect(Collectors.toList());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ double viewAll = midActionList.size();
|
|
|
|
|
+ double clickAll = midActionList.stream().mapToInt(e -> Integer.parseInt(e.value.f2)).sum();
|
|
|
|
|
+ double converAll = midActionList.stream().mapToInt(e -> Integer.parseInt(e.value.f3)).sum();
|
|
|
|
|
+ double incomeAll = midActionList.stream().mapToInt(e -> Integer.parseInt(e.value.f4)).sum();
|
|
|
|
|
+ featureMap.put("viewAll", String.valueOf(viewAll));
|
|
|
|
|
+ featureMap.put("clickAll", String.valueOf(clickAll));
|
|
|
|
|
+ featureMap.put("converAll", String.valueOf(converAll));
|
|
|
|
|
+ featureMap.put("incomeAll", String.valueOf(incomeAll));
|
|
|
|
|
+ featureMap.put("ctr_all", String.valueOf(NumUtil.div(clickAll, viewAll)));
|
|
|
|
|
+ featureMap.put("ctcvr_all", String.valueOf(NumUtil.div(converAll, viewAll)));
|
|
|
|
|
+ featureMap.put("cvr_all", String.valueOf(NumUtil.div(clickAll, converAll)));
|
|
|
|
|
+ featureMap.put("ecpm_all", String.valueOf(NumUtil.div(incomeAll * 1000, viewAll)));
|
|
|
|
|
+ if (CollectionUtils.isNotEmpty(midActionList)) {
|
|
|
|
|
+ List<String> cidClickList = new ArrayList<>();
|
|
|
|
|
+ List<String> cidConverList = new ArrayList<>();
|
|
|
|
|
+ for (TupleMapEntry<Tuple5> tupleMapEntry : midActionList) {
|
|
|
|
|
+ String cid = tupleMapEntry.key;
|
|
|
|
|
+ String click = tupleMapEntry.value.f2;
|
|
|
|
|
+ String conver = tupleMapEntry.value.f3;
|
|
|
|
|
+ if (Objects.equals(click, "1")) {
|
|
|
|
|
+ cidClickList.add(cid);
|
|
|
|
|
+ }
|
|
|
|
|
+ if (Objects.equals(conver, "1")) {
|
|
|
|
|
+ cidConverList.add(cid);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ featureMap.put("user_cid_click_list", String.join(",", cidClickList));
|
|
|
|
|
+ featureMap.put("user_cid_conver_list", String.join(",", cidConverList));
|
|
|
|
|
+ }
|
|
|
|
|
+ return midActionList;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void handleC1UIFeature(Map<String, Double> midTimeDiffMap, Map<String, Double> midActionStatic, Map<String, String> featureMap, String cid) {
|
|
|
|
|
+ if (midTimeDiffMap.containsKey("timediff_view_" + cid)) {
|
|
|
|
|
+ featureMap.put("timediff_view", String.valueOf(midTimeDiffMap.getOrDefault("timediff_view_" + cid, 0.0)));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (midTimeDiffMap.containsKey("timediff_click_" + cid)) {
|
|
|
|
|
+ featureMap.put("timediff_click", String.valueOf(midTimeDiffMap.getOrDefault("timediff_click_" + cid, 0.0)));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (midTimeDiffMap.containsKey("timediff_conver_" + cid)) {
|
|
|
|
|
+ featureMap.put("timediff_conver", String.valueOf(midTimeDiffMap.getOrDefault("timediff_conver_" + cid, 0.0)));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (midActionStatic.containsKey("actionstatic_view_" + cid)) {
|
|
|
|
|
+ featureMap.put("actionstatic_view", String.valueOf(midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0)));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (midActionStatic.containsKey("actionstatic_click_" + cid)) {
|
|
|
|
|
+ featureMap.put("actionstatic_click", String.valueOf(midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0)));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (midActionStatic.containsKey("actionstatic_conver_" + cid)) {
|
|
|
|
|
+ featureMap.put("actionstatic_conver", String.valueOf(midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0)));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (midActionStatic.containsKey("actionstatic_income_" + cid)) {
|
|
|
|
|
+ featureMap.put("actionstatic_income", String.valueOf(midActionStatic.getOrDefault("actionstatic_income_" + cid, 0.0)));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (midActionStatic.containsKey("actionstatic_view_" + cid) && midActionStatic.containsKey("actionstatic_click_" + cid)) {
|
|
|
|
|
+ double ctr = NumUtil.div(
|
|
|
|
|
+ midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0),
|
|
|
|
|
+ midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0)
|
|
|
|
|
+ );
|
|
|
|
|
+ featureMap.put("actionstatic_ctr", String.valueOf(ctr));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (midActionStatic.containsKey("actionstatic_view_" + cid) && midActionStatic.containsKey("actionstatic_conver_" + cid)) {
|
|
|
|
|
+ double ctcvr = NumUtil.div(midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0), midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0));
|
|
|
|
|
+ featureMap.put("actionstatic_ctcvr", String.valueOf(ctcvr));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (midActionStatic.containsKey("actionstatic_conver_" + cid) && midActionStatic.containsKey("actionstatic_click_" + cid)) {
|
|
|
|
|
+ double cvr = NumUtil.div(midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0), midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0));
|
|
|
|
|
+ featureMap.put("actionstatic_cvr", String.valueOf(cvr));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void handleD1Feature(Map<String, String> d1Feature, Map<String, String> featureMap) {
|
|
|
|
|
+ for (String prefix : Arrays.asList("3h", "6h", "12h", "1d", "3d", "7d")) {
|
|
|
|
|
+ double view = Double.parseDouble(d1Feature.getOrDefault("ad_view_" + prefix, "0"));
|
|
|
|
|
+ double click = Double.parseDouble(d1Feature.getOrDefault("ad_click_" + prefix, "0"));
|
|
|
|
|
+ double conver = Double.parseDouble(d1Feature.getOrDefault("ad_conversion_" + prefix, "0"));
|
|
|
|
|
+ double income = Double.parseDouble(d1Feature.getOrDefault("ad_income_" + prefix, "0"));
|
|
|
|
|
+ double cpc = NumUtil.div(income, click);
|
|
|
|
|
+ double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
|
|
|
|
|
+ featureMap.put("d1_feature_" + prefix + "_ctr", String.valueOf(ctr));
|
|
|
|
|
+ featureMap.put("d1_feature_" + prefix + "_ctcvr", String.valueOf(NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR)));
|
|
|
|
|
+ featureMap.put("d1_feature_" + prefix + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
|
|
|
|
|
+ featureMap.put("d1_feature_" + prefix + "_conver", String.valueOf(conver));
|
|
|
|
|
+ featureMap.put("d1_feature_" + prefix + "_ecpm", String.valueOf(ctr * cpc * 1000));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void handleD2Feature(Map<String, Map<String, Double>> vidRankMaps, Map<String, String> featureMap, String cid) {
|
|
|
|
|
+ if (MapUtils.isEmpty(vidRankMaps)) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ List<String> prefixes1 = Arrays.asList("ctr", "ctcvr", "ecpm");
|
|
|
|
|
+ // List<String> prefixes1 = Arrays.asList("ctr", "ctcvr");
|
|
|
|
|
+ List<String> prefixes2 = Arrays.asList("1d", "3d", "7d", "14d");
|
|
|
|
|
+
|
|
|
|
|
+ for (String prefix1 : prefixes1) {
|
|
|
|
|
+ for (String prefix2 : prefixes2) {
|
|
|
|
|
+ String combinedKey = prefix1 + "_" + prefix2;
|
|
|
|
|
+ if (vidRankMaps.containsKey(combinedKey)) {
|
|
|
|
|
+ Double rank = vidRankMaps.get(combinedKey).getOrDefault(cid, 0.0);
|
|
|
|
|
+ if (rank >= 1.0) {
|
|
|
|
|
+ featureMap.put("vid_rank_" + combinedKey, String.valueOf(NumUtil.div(1, rank)));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void handleH1AndH2Feature(Map<String, Map<String, String>> skuFeature,
|
|
|
|
|
+ Map<String, Map<String, String>> adVerFeature,
|
|
|
|
|
+ Map<String, String> cidFeatureMap) {
|
|
|
|
|
+ Map<String, String> h1Feature = adVerFeature.getOrDefault("alg_mid_feature_adver_action", EMPTY_STRING_MAP);
|
|
|
|
|
+ Map<String, String> h2Feature = skuFeature.getOrDefault("alg_mid_feature_sku_action", EMPTY_STRING_MAP);
|
|
|
|
|
+ List<String> timeList = Arrays.asList("3d", "7d", "30d");
|
|
|
|
|
+ List<Tuple2<Map<String, String>, String>> featureList = Arrays.asList(
|
|
|
|
|
+ new Tuple2<>(h1Feature, "adverid"),
|
|
|
|
|
+ new Tuple2<>(h2Feature, "skuid")
|
|
|
|
|
+ );
|
|
|
|
|
+ for (Tuple2<Map<String, String>, String> tuple2 : featureList) {
|
|
|
|
|
+ Map<String, String> feature = tuple2.f1;
|
|
|
|
|
+ String prefix = tuple2.f2;
|
|
|
|
|
+ for (String time : timeList) {
|
|
|
|
|
+ String timeValue = feature.getOrDefault(time, "");
|
|
|
|
|
+ if (StringUtils.isNotEmpty(timeValue)) {
|
|
|
|
|
+ String[] split = timeValue.split(",");
|
|
|
|
|
+ cidFeatureMap.put("user" + "_" + prefix + "_" + "view" + "_" + time, split[0]);
|
|
|
|
|
+ cidFeatureMap.put("user" + "_" + prefix + "_" + "click" + "_" + time, split[1]);
|
|
|
|
|
+ cidFeatureMap.put("user" + "_" + prefix + "_" + "conver" + "_" + time, split[2]);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void handleD3AndB1Feature(Map<String, String> d3Feature, String cTitle, Map<String, String> featureMap,
|
|
|
|
|
+ ScoreParam scoreParam) {
|
|
|
|
|
+ if (MapUtils.isEmpty(d3Feature) || !d3Feature.containsKey("title") || StringUtils.isEmpty(cTitle)) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ String vTitle = d3Feature.get("title");
|
|
|
|
|
+ double score;
|
|
|
|
|
+ if (scoreParam.getExpCodeSet().contains(word2vecExp)) {
|
|
|
|
|
+ score = SimilarityUtils.word2VecSimilarity(cTitle, vTitle);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ score = Similarity.conceptSimilarity(cTitle, vTitle);
|
|
|
|
|
+ }
|
|
|
|
|
+ featureMap.put("ctitle_vtitle_similarity", String.valueOf(score));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void handleE1AndE2Feature(Map<String, String> e1Feature, Map<String, String> e2Feature, String title,
|
|
|
|
|
+ Map<String, String> featureMap, ScoreParam scoreParam,
|
|
|
|
|
+ Map<String, List<String>> tagWordsCache) {
|
|
|
|
|
+ if (StringUtils.isEmpty(title)) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 预先分词 title,在整个方法中复用,避免重复分词
|
|
|
|
|
+ List<String> titleWords = null;
|
|
|
|
|
+ if (scoreParam.getExpCodeSet().contains(word2vecExp)) {
|
|
|
|
|
+ titleWords = SimilarityUtils.segment(title);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ List<Tuple2<Map<String, String>, String>> tuple2List = Arrays.asList(new Tuple2<>(e1Feature, "e1"), new Tuple2<>(e2Feature, "e2"));
|
|
|
|
|
+
|
|
|
|
|
+ List<String> tagsFieldList = Arrays.asList("tags_3d", "tags_7d", "tags_14d");
|
|
|
|
|
+ for (Tuple2<Map<String, String>, String> tuple2 : tuple2List) {
|
|
|
|
|
+ Map<String, String> feature = tuple2.f1;
|
|
|
|
|
+ String prefix = tuple2.f2;
|
|
|
|
|
+ if (MapUtils.isEmpty(feature)) {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ for (String tagsField : tagsFieldList) {
|
|
|
|
|
+ if (StringUtils.isNotEmpty(feature.get(tagsField))) {
|
|
|
|
|
+ String tags = feature.get(tagsField);
|
|
|
|
|
+ Double[] doubles;
|
|
|
|
|
+ if (scoreParam.getExpCodeSet().contains(word2vecExp)) {
|
|
|
|
|
+ // 使用缓存的 title 分词结果和请求级别的 tag 分词缓存
|
|
|
|
|
+ doubles = ExtractorUtils.funcC34567ForTagsNewWithCache(tags, title, titleWords, tagWordsCache);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ doubles = ExtractorUtils.funcC34567ForTags(tags, title);
|
|
|
|
|
+ }
|
|
|
|
|
+ featureMap.put(prefix + "_" + tagsField + "_matchnum", String.valueOf(doubles[0]));
|
|
|
|
|
+ featureMap.put(prefix + "_" + tagsField + "_maxscore", String.valueOf(doubles[1]));
|
|
|
|
|
+ featureMap.put(prefix + "_" + tagsField + "_avgscore", String.valueOf(doubles[2]));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private Map<String, Double> parseC1FeatureListToTimeDiffMap(List<TupleMapEntry<Tuple5>> midActionList, long ts) {
|
|
|
|
|
+ Map<String, Double> midTimeDiffMap = new HashMap<>();
|
|
|
|
|
+ for (TupleMapEntry<Tuple5> entry : midActionList) {
|
|
|
|
|
+ String cid = entry.key;
|
|
|
|
|
+ double tsHistory = Double.parseDouble(entry.value.f1);
|
|
|
|
|
+ double click = Double.parseDouble(entry.value.f2);
|
|
|
|
|
+ double conver = Double.parseDouble(entry.value.f3);
|
|
|
|
|
+ double d = (ts - tsHistory) / 3600 / 24;
|
|
|
|
|
+ if (!midTimeDiffMap.containsKey("timediff_view_" + cid)) {
|
|
|
|
|
+ midTimeDiffMap.put("timediff_view_" + cid, NumUtil.div(1, d));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (!midTimeDiffMap.containsKey("timediff_click_" + cid) && click > 0) {
|
|
|
|
|
+ midTimeDiffMap.put("timediff_click_" + cid, NumUtil.div(1, d));
|
|
|
|
|
+ }
|
|
|
|
|
+ if (!midTimeDiffMap.containsKey("timediff_conver_" + cid) && conver > 0) {
|
|
|
|
|
+ midTimeDiffMap.put("timediff_conver_" + cid, NumUtil.div(1, d));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return midTimeDiffMap;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private Map<String, Double> parseC1FeatureListToActionStaticMap(List<TupleMapEntry<Tuple5>> midActionList) {
|
|
|
|
|
+ Map<String, Double> midActionStaticsMap = new HashMap<>();
|
|
|
|
|
+ for (TupleMapEntry<Tuple5> entry : midActionList) {
|
|
|
|
|
+ String cid = entry.key;
|
|
|
|
|
+ double click = Double.parseDouble(entry.value.f2);
|
|
|
|
|
+ double conver = Double.parseDouble(entry.value.f3);
|
|
|
|
|
+ double income = Double.parseDouble(entry.value.f4);
|
|
|
|
|
+
|
|
|
|
|
+ Double viewSum = midActionStaticsMap.getOrDefault("actionstatic_view_" + cid, 0.0);
|
|
|
|
|
+ midActionStaticsMap.put("actionstatic_view_" + cid, 1 + viewSum);
|
|
|
|
|
+
|
|
|
|
|
+ Double clickSum = midActionStaticsMap.getOrDefault("actionstatic_click_" + cid, 0.0);
|
|
|
|
|
+ midActionStaticsMap.put("actionstatic_click_" + cid, clickSum + click);
|
|
|
|
|
+
|
|
|
|
|
+ Double converSum = midActionStaticsMap.getOrDefault("actionstatic_conver_" + cid, 0.0);
|
|
|
|
|
+ midActionStaticsMap.put("actionstatic_conver_" + cid, converSum + conver);
|
|
|
|
|
+
|
|
|
|
|
+ Double incomSum = midActionStaticsMap.getOrDefault("actionstatic_income_" + cid, 0.0);
|
|
|
|
|
+ midActionStaticsMap.put("actionstatic_income_" + cid, incomSum + income);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return midActionStaticsMap;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private Map<String, Map<String, Double>> parseD2FeatureMap(Map<String, String> d2Feature) {
|
|
|
|
|
+ Map<String, Map<String, Double>> vidRankMaps = new HashMap<>();
|
|
|
|
|
+ for (Map.Entry<String, String> entry : d2Feature.entrySet()) {
|
|
|
|
|
+ String key = entry.getKey();
|
|
|
|
|
+ String value = entry.getValue();
|
|
|
|
|
+ Map<String, Double> valueMap = Arrays.stream(value.split(",")).map(r -> r.split(":")).collect(Collectors.toMap(rList -> rList[0], rList -> Double.parseDouble(rList[2])));
|
|
|
|
|
+ vidRankMaps.put(key, valueMap);
|
|
|
|
|
+ }
|
|
|
|
|
+ return vidRankMaps;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void readBucketFile() {
|
|
|
|
|
+ if (MapUtils.isNotEmpty(bucketsMap)) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ synchronized (this) {
|
|
|
|
|
+ String bucketFile = "20250217_ad_bucket_688.txt";
|
|
|
|
|
+ InputStream resourceStream = this.getClass().getClassLoader().getResourceAsStream(bucketFile);
|
|
|
|
|
+ if (resourceStream != null) {
|
|
|
|
|
+ try (BufferedReader reader = new BufferedReader(new InputStreamReader(resourceStream))) {
|
|
|
|
|
+ Map<String, double[]> bucketsMap = new HashMap<>();
|
|
|
|
|
+ Map<String, Double> bucketsLen = new HashMap<>();
|
|
|
|
|
+ String line;
|
|
|
|
|
+ while ((line = reader.readLine()) != null) {
|
|
|
|
|
+ // 替换空格和换行符,过滤空行
|
|
|
|
|
+ line = line.replace(" ", "").replaceAll("\n", "");
|
|
|
|
|
+ if (!line.isEmpty()) {
|
|
|
|
|
+ String[] rList = line.split("\t");
|
|
|
|
|
+ if (rList.length == 3) {
|
|
|
|
|
+ String key = rList[0];
|
|
|
|
|
+ double value1 = Double.parseDouble(rList[1]);
|
|
|
|
|
+ bucketsLen.put(key, value1);
|
|
|
|
|
+ double[] value2 = Arrays.stream(rList[2].split(",")).mapToDouble(Double::valueOf).toArray();
|
|
|
|
|
+ bucketsMap.put(key, value2);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ this.bucketsMap = bucketsMap;
|
|
|
|
|
+ this.bucketsLen = bucketsLen;
|
|
|
|
|
+ } catch (IOException e) {
|
|
|
|
|
+ log.error("something is wrong in parse bucket file: ", e);
|
|
|
|
|
+ }
|
|
|
|
|
+ log.info("load bucket file success: {}", bucketFile);
|
|
|
|
|
+ } else {
|
|
|
|
|
+ log.error("no bucket file");
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void initSparseFeatureNames() {
|
|
|
|
|
+ this.sparseFeatureSet = new HashSet<String>() {{
|
|
|
|
|
+ add("brand");
|
|
|
|
|
+ add("region");
|
|
|
|
|
+ add("city");
|
|
|
|
|
+ add("vid");
|
|
|
|
|
+ add("cate1");
|
|
|
|
|
+ add("cate2");
|
|
|
|
|
+ add("cid");
|
|
|
|
|
+ add("adid");
|
|
|
|
|
+ add("adverid");
|
|
|
|
|
+ add("user_cid_click_list");
|
|
|
|
|
+ add("user_cid_conver_list");
|
|
|
|
|
+ add("user_vid_return_tags_2h");
|
|
|
|
|
+ add("user_vid_return_tags_1d");
|
|
|
|
|
+ add("user_vid_return_tags_3d");
|
|
|
|
|
+ add("user_vid_return_tags_7d");
|
|
|
|
|
+ add("user_vid_return_tags_14d");
|
|
|
|
|
+ add("apptype");
|
|
|
|
|
+ add("hour");
|
|
|
|
|
+ add("hour_quarter");
|
|
|
|
|
+ add("root_source_scene");
|
|
|
|
|
+ add("root_source_channel");
|
|
|
|
|
+ add("is_first_layer");
|
|
|
|
|
+ add("title_split");
|
|
|
|
|
+ add("profession");
|
|
|
|
|
+ add("user_vid_share_tags_1d");
|
|
|
|
|
+ add("user_vid_share_tags_14d");
|
|
|
|
|
+ add("user_vid_return_cate1_14d");
|
|
|
|
|
+ add("user_vid_return_cate2_14d");
|
|
|
|
|
+ add("user_vid_share_cate1_14d");
|
|
|
|
|
+ add("user_vid_share_cate2_14d");
|
|
|
|
|
+ add("user_has_conver_1y");
|
|
|
|
|
+ add("user_adverid_view_3d");
|
|
|
|
|
+ add("user_adverid_click_3d");
|
|
|
|
|
+ add("user_adverid_conver_3d");
|
|
|
|
|
+ add("user_adverid_view_7d");
|
|
|
|
|
+ add("user_adverid_click_7d");
|
|
|
|
|
+ add("user_adverid_conver_7d");
|
|
|
|
|
+ add("user_adverid_view_30d");
|
|
|
|
|
+ add("user_adverid_click_30d");
|
|
|
|
|
+ add("user_adverid_conver_30d");
|
|
|
|
|
+ add("user_skuid_view_3d");
|
|
|
|
|
+ add("user_skuid_click_3d");
|
|
|
|
|
+ add("user_skuid_conver_3d");
|
|
|
|
|
+ add("user_skuid_view_7d");
|
|
|
|
|
+ add("user_skuid_click_7d");
|
|
|
|
|
+ add("user_skuid_conver_7d");
|
|
|
|
|
+ add("user_skuid_view_30d");
|
|
|
|
|
+ add("user_skuid_click_30d");
|
|
|
|
|
+ add("user_skuid_conver_30d");
|
|
|
|
|
+ add("user_conver_ad_class");
|
|
|
|
|
+ add("category_name");
|
|
|
|
|
+ add("material_md5");
|
|
|
|
|
+ }};
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private Map<String, String> featureBucket(Map<String, String> featureMap) {
|
|
|
|
|
+ // 使用 HashMap 替代 ConcurrentHashMap,分桶操作是单线程的
|
|
|
|
|
+ Map<String, String> newFeatureMap = new HashMap<>(featureMap.size());
|
|
|
|
|
+ for (Map.Entry<String, String> entry : featureMap.entrySet()) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ String name = entry.getKey();
|
|
|
|
|
+ if (this.sparseFeatureSet.contains(name)) {
|
|
|
|
|
+ if (entry.getValue() != null) {
|
|
|
|
|
+ newFeatureMap.put(name, entry.getValue());
|
|
|
|
|
+ }
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ double score = Double.parseDouble(entry.getValue());
|
|
|
|
|
+ // 注意:0值、不在分桶文件中的特征,会被过滤掉。
|
|
|
|
|
+ if (score > 1E-8) {
|
|
|
|
|
+ if (this.bucketsMap.containsKey(name) && this.bucketsLen.containsKey(name)) {
|
|
|
|
|
+ double[] buckets = this.bucketsMap.get(name);
|
|
|
|
|
+ double bucketNum = this.bucketsLen.get(name);
|
|
|
|
|
+ Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
|
|
|
|
|
+ newFeatureMap.put(name, String.valueOf(scoreNew));
|
|
|
|
|
+ } else {
|
|
|
|
|
+ newFeatureMap.put(name, String.valueOf(score));
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("featureBucket error: ", e);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ return newFeatureMap;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private double getExpWeight(Map<String, String> featureMap,
|
|
|
|
|
+ String expOldKey, double expOldThreshold,
|
|
|
|
|
+ String expNewKey, double expNewThreshold,
|
|
|
|
|
+ double expLowerWeight, double expUpperWeight, double expScale) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ if (null != featureMap) {
|
|
|
|
|
+ double oldView = Double.parseDouble(featureMap.getOrDefault(expOldKey, "0"));
|
|
|
|
|
+ if (oldView < expOldThreshold) {
|
|
|
|
|
+ double newView = Double.parseDouble(featureMap.getOrDefault(expNewKey, "0"));
|
|
|
|
|
+ return getExpWeight(expLowerWeight, expUpperWeight, expScale, expNewThreshold, newView);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
|
+ log.error("getExpWeight error: ", e);
|
|
|
|
|
+ }
|
|
|
|
|
+ return 1.0;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private double getExpWeight(double lowerWeight, double upperWeight, double scale, double upperExp, double exp) {
|
|
|
|
|
+ if (exp >= upperExp) {
|
|
|
|
|
+ return 1.0;
|
|
|
|
|
+ }
|
|
|
|
|
+ double weight = Math.log(exp + 1) / scale;
|
|
|
|
|
+ return Math.min(Math.max(lowerWeight, weight), upperWeight);
|
|
|
|
|
+ }
|
|
|
|
|
+}
|