|
@@ -1,5 +1,6 @@
|
|
|
package com.tzld.piaoquan.ad.engine.service.score.strategy;
|
|
|
|
|
|
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
import com.tzld.piaoquan.ad.engine.commons.score.ScoreParam;
|
|
|
import com.tzld.piaoquan.ad.engine.commons.score.ScorerUtils;
|
|
|
import com.tzld.piaoquan.ad.engine.commons.thread.ThreadPoolFactory;
|
|
@@ -9,12 +10,14 @@ import com.tzld.piaoquan.ad.engine.commons.dto.AdPlatformCreativeDTO;
|
|
|
import com.tzld.piaoquan.ad.engine.commons.param.RankRecommendRequestParam;
|
|
|
import com.tzld.piaoquan.recommend.feature.domain.ad.base.AdRankItem;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.apache.commons.collections4.CollectionUtils;
|
|
|
import org.apache.commons.collections4.MapUtils;
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
|
import org.springframework.stereotype.Component;
|
|
|
import org.xm.Similarity;
|
|
|
|
|
|
+import javax.annotation.PostConstruct;
|
|
|
import java.io.BufferedReader;
|
|
|
import java.io.IOException;
|
|
|
import java.io.InputStream;
|
|
@@ -26,11 +29,12 @@ import java.util.concurrent.Future;
|
|
|
import java.util.concurrent.TimeUnit;
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
+import static com.tzld.piaoquan.ad.engine.commons.math.Const.*;
|
|
|
+
|
|
|
@Slf4j
|
|
|
@Component
|
|
|
public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
|
|
|
-
|
|
|
private Map<String, double[]> bucketsMap = new HashMap<>();
|
|
|
|
|
|
private Map<String, Double> bucketsLen = new HashMap<>();
|
|
@@ -38,9 +42,28 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
@Value("${word2vec.exp:694}")
|
|
|
private String word2vecExp;
|
|
|
|
|
|
+ // FIXME(zhoutian): 可能需要独立配置
|
|
|
+ @ApolloJsonValue("${rank.score.weight.680:{}}")
|
|
|
+ private Map<String, Double> weightMap;
|
|
|
+
|
|
|
+ @ApolloJsonValue("${rank.score.neg_sample_rate:0.01}")
|
|
|
+ Double negSampleRate;
|
|
|
+
|
|
|
+ Set<String> sparseFeatureSet;
|
|
|
+
|
|
|
+ @PostConstruct
|
|
|
+ public void afterInit() {
|
|
|
+ this.readBucketFile();
|
|
|
+ this.initSparseFeatureNames();
|
|
|
+ }
|
|
|
+
|
|
|
@Override
|
|
|
public List<AdRankItem> adItemRank(RankRecommendRequestParam request, ScoreParam scoreParam) {
|
|
|
|
|
|
+ Map<String, Double> weightParam = ObjUtil.nullOrDefault(weightMap, new HashMap<>());
|
|
|
+
|
|
|
+
|
|
|
+ Map<Long, Double> creativeScoreCoefficient = getCreativeScoreCoefficient();
|
|
|
Set<String> noApiAdVerIds = getNoApiAdVerIds();
|
|
|
|
|
|
long ts = System.currentTimeMillis() / 1000;
|
|
@@ -75,6 +98,18 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
Map<String, String> e1Feature = userFeature.getOrDefault("alg_mid_feature_return_tags", new HashMap<>());
|
|
|
Map<String, String> e2Feature = userFeature.getOrDefault("alg_mid_feature_share_tags", new HashMap<>());
|
|
|
|
|
|
+ userFeatureMap.put("brand", request.getMachineInfo().getBrand().toUpperCase());
|
|
|
+ userFeatureMap.put("region", request.getRegion());
|
|
|
+ userFeatureMap.put("city", request.getCity());
|
|
|
+ userFeatureMap.put("vid", String.valueOf(request.getVideoId()));
|
|
|
+ userFeatureMap.put("cate1", d3Feature.get("merge_first_level_cate"));
|
|
|
+ userFeatureMap.put("cate2", d3Feature.get("merge_second_level_cate"));
|
|
|
+ userFeatureMap.put("user_vid_return_tags_2h", e1Feature.getOrDefault("tags_2h", null));
|
|
|
+ userFeatureMap.put("user_vid_return_tags_1d", e1Feature.getOrDefault("tags_1d", null));
|
|
|
+ userFeatureMap.put("user_vid_return_tags_3d", e1Feature.getOrDefault("tags_3d", null));
|
|
|
+ userFeatureMap.put("user_vid_return_tags_7d", e1Feature.getOrDefault("tags_7d", null));
|
|
|
+ userFeatureMap.put("user_vid_return_tags_14d", e1Feature.getOrDefault("tags_14d", null));
|
|
|
+
|
|
|
Map<String, String> sceneFeatureMap = this.handleSceneFeature(ts);
|
|
|
long time1 = System.currentTimeMillis();
|
|
|
|
|
@@ -100,6 +135,7 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
} else {
|
|
|
adRankItem.getExt().put("isApi", "1");
|
|
|
}
|
|
|
+
|
|
|
adRankItem.getExt().put("recallsources", dto.getRecallSources());
|
|
|
|
|
|
String cidStr = dto.getCreativeId().toString();
|
|
@@ -117,6 +153,10 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
this.handleC1UIFeature(midTimeDiffMap, actionStaticMap, cidFeatureMap, cidStr);
|
|
|
this.handleD1Feature(d1Feature, cidFeatureMap);
|
|
|
this.handleD2Feature(vidRankMaps, cidFeatureMap, cidStr);
|
|
|
+
|
|
|
+ cidFeatureMap.put("cid", dto.getCreativeId() != null ? String.valueOf(dto.getCreativeId()) : null);
|
|
|
+ cidFeatureMap.put("adid", dto.getAdId() != null ? String.valueOf(dto.getAdId()) : null);
|
|
|
+ cidFeatureMap.put("adverid", dto.getAdVerId());
|
|
|
return adRankItem;
|
|
|
} finally {
|
|
|
cdl1.countDown();
|
|
@@ -162,17 +202,14 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
}
|
|
|
});
|
|
|
}
|
|
|
- long time31 = System.currentTimeMillis();
|
|
|
try {
|
|
|
cdl2.await(150, TimeUnit.MILLISECONDS);
|
|
|
} catch (Exception e) {
|
|
|
log.error("handleE1AndE2Feature and handleD3AndB1Feature wait timeout", e);
|
|
|
}
|
|
|
|
|
|
- // feature4
|
|
|
long time3 = System.currentTimeMillis();
|
|
|
// 分桶
|
|
|
- this.readBucketFile();
|
|
|
userFeatureMap = this.featureBucket(userFeatureMap);
|
|
|
CountDownLatch cdl4 = new CountDownLatch(adRankItems.size());
|
|
|
for (AdRankItem adRankItem : adRankItems) {
|
|
@@ -193,54 +230,75 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
long time4 = System.currentTimeMillis();
|
|
|
// 打分排序
|
|
|
// getScorerPipeline
|
|
|
- List<AdRankItem> result = ScorerUtils.getScorerPipeline(ScorerUtils.XGBOOST_SCORE_CONF_20240909).scoring(sceneFeatureMap, userFeatureMap, adRankItems);
|
|
|
+ List<AdRankItem> result = ScorerUtils.getScorerPipeline(ScorerUtils.PAI_SCORE_CONF_20250214).scoring(sceneFeatureMap, userFeatureMap, adRankItems);
|
|
|
long time5 = System.currentTimeMillis();
|
|
|
+
|
|
|
+ // calibrate score for negative sampling
|
|
|
+ for (AdRankItem item : result) {
|
|
|
+ double originalScore = item.getLrScore();
|
|
|
+ double calibratedScore = originalScore / (originalScore + (1 - originalScore) / negSampleRate);
|
|
|
+ item.setLrScore(calibratedScore);
|
|
|
+ item.getScoreMap().put("originCtcvrScore", originalScore);
|
|
|
+ item.getScoreMap().put("ctcvrScore", calibratedScore);
|
|
|
+ }
|
|
|
+
|
|
|
// loop
|
|
|
+ double cpmCoefficient = weightParam.getOrDefault("cpmCoefficient", 0.9);
|
|
|
+
|
|
|
for (AdRankItem item : result) {
|
|
|
- item.setScore(item.getLrScore() * item.getCpa());
|
|
|
+
|
|
|
+ double scoreCoefficient = creativeScoreCoefficient.getOrDefault(item.getAdId(), 1d);
|
|
|
+ item.setScore(item.getLrScore() * scoreCoefficient * item.getCpa());
|
|
|
+
|
|
|
item.getScoreMap().put("cpa", item.getCpa());
|
|
|
item.getScoreMap().put("cpm", item.getCpm());
|
|
|
+ item.getScoreMap().put("cpmCoefficient", cpmCoefficient);
|
|
|
+ item.getScoreMap().put("scoreCoefficient", scoreCoefficient);
|
|
|
item.getFeatureMap().putAll(userFeatureMap);
|
|
|
item.getFeatureMap().putAll(sceneFeatureMap);
|
|
|
|
|
|
// 没有转化回传的广告主,使用后台配置的CPM
|
|
|
if (noApiAdVerIds.contains(item.getAdVerId())) {
|
|
|
- item.setScore(item.getCpm() / 1000);
|
|
|
+ item.setScore(item.getCpm() * cpmCoefficient / 1000);
|
|
|
}
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ result.sort(ComparatorUtil.equalsRandomComparator());
|
|
|
|
|
|
+ if (CollectionUtils.isNotEmpty(result)) {
|
|
|
+ AdRankItem top1Item = result.get(0);
|
|
|
for (Map.Entry<String, Map<String, String>> entry : videoFeature.entrySet()) {
|
|
|
if (MapUtils.isNotEmpty(entry.getValue())) {
|
|
|
- item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
|
|
|
+ top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
|
|
|
}
|
|
|
}
|
|
|
|
|
|
for (Map.Entry<String, Map<String, String>> entry : userFeature.entrySet()) {
|
|
|
if (MapUtils.isNotEmpty(entry.getValue())) {
|
|
|
- item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
|
|
|
+ top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- Map<String, Map<String, String>> adVerFeature = allAdVerFeature.getOrDefault(item.getAdVerId(), new HashMap<>());
|
|
|
+ Map<String, Map<String, String>> adVerFeature = allAdVerFeature.getOrDefault(top1Item.getAdVerId(), new HashMap<>());
|
|
|
for (Map.Entry<String, Map<String, String>> entry : adVerFeature.entrySet()) {
|
|
|
if (MapUtils.isNotEmpty(entry.getValue())) {
|
|
|
- item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
|
|
|
+ top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(String.valueOf(item.getAdId()), new HashMap<>());
|
|
|
+ Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(String.valueOf(top1Item.getAdId()), new HashMap<>());
|
|
|
for (Map.Entry<String, Map<String, String>> entry : cidFeature.entrySet()) {
|
|
|
if (MapUtils.isNotEmpty(entry.getValue())) {
|
|
|
- item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
|
|
|
+ top1Item.getMetaFeatureMap().put(entry.getKey(), entry.getValue());
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
- log.info("cost={}, feature1={}, feature2={}, feature31={}, feature32={}, feature4={}, getScorerPipeline={}, " +
|
|
|
- "adIdSize={}, adRankItemsSize={}",
|
|
|
- time5 - start, time1 - start, time2 - time1, time31 - time2, time3 - time31, time4 - time3,
|
|
|
- time5 - time4, request.getAdIdList().size(), adRankItems.size());
|
|
|
-
|
|
|
- result.sort(ComparatorUtil.equalsRandomComparator());
|
|
|
+ long time6 = System.currentTimeMillis();
|
|
|
+ log.info("cost={}, getFeature={}, handleFeature={}, similar={}, bucketFeature={}, getScorerPipeline={}, " +
|
|
|
+ "other={}, adIdSize={}, adRankItemsSize={}",
|
|
|
+ time6 - start, time1 - start, time2 - time1, time3 - time2, time4 - time3,
|
|
|
+ time5 - time4, time6 - time5, request.getAdIdList().size(), adRankItems.size());
|
|
|
|
|
|
return result;
|
|
|
}
|
|
@@ -289,15 +347,20 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
double view = Double.parseDouble(feature.getOrDefault("ad_view_" + time, "0"));
|
|
|
double click = Double.parseDouble(feature.getOrDefault("ad_click_" + time, "0"));
|
|
|
double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
|
|
|
- double f2 = NumUtil.div(conver, view);
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(NumUtil.div(click, view)));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(f2));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.div(conver, click)));
|
|
|
+ double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
|
|
|
+ double cpc = NumUtil.div(income, click);
|
|
|
+ double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
|
|
|
+ double ctcvr = NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR);
|
|
|
+ double ecpm = ctr * cpc * 1000;
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(ctr));
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(ctcvr));
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
|
|
|
cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(ecpm));
|
|
|
|
|
|
cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
|
|
|
cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * f2));
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * ctcvr));
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -320,16 +383,19 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
double click = Double.parseDouble(feature.getOrDefault("ad_click_" + time, "0"));
|
|
|
double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
|
|
|
double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
|
|
|
- double f2 = NumUtil.div(conver, view);
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(NumUtil.div(click, view)));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(f2));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.div(conver, click)));
|
|
|
+ double cpc = NumUtil.div(income, click);
|
|
|
+ double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
|
|
|
+ double ctcvr = NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR);
|
|
|
+ double ecpm = ctr * cpc * 1000;
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(ctr));
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(ctcvr));
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
|
|
|
cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
|
|
|
- // cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(NumUtil.div(income * 1000, view)));
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(ecpm));
|
|
|
|
|
|
cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
|
|
|
cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * f2));
|
|
|
+ cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * ctcvr));
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -363,8 +429,19 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
featureMap.put("ctr_all", String.valueOf(NumUtil.div(clickAll, viewAll)));
|
|
|
featureMap.put("ctcvr_all", String.valueOf(NumUtil.div(converAll, viewAll)));
|
|
|
featureMap.put("cvr_all", String.valueOf(NumUtil.div(clickAll, converAll)));
|
|
|
- // featureMap.put("ecpm_all", String.valueOf(NumUtil.div(incomeAll * 1000, viewAll)));
|
|
|
-
|
|
|
+ featureMap.put("ecpm_all", String.valueOf(NumUtil.div(incomeAll * 1000, viewAll)));
|
|
|
+ if (CollectionUtils.isNotEmpty(midActionList)) {
|
|
|
+ List<String> cidList = new ArrayList<>();
|
|
|
+ List<String> converList = new ArrayList<>();
|
|
|
+ for (TupleMapEntry<Tuple5> tupleMapEntry : midActionList) {
|
|
|
+ String cid = tupleMapEntry.key;
|
|
|
+ String conver = tupleMapEntry.value.f3;
|
|
|
+ cidList.add(cid);
|
|
|
+ converList.add(conver);
|
|
|
+ }
|
|
|
+ featureMap.put("user_cid_click_list", String.join(",", cidList));
|
|
|
+ featureMap.put("user_cid_conver_list", String.join(",", converList));
|
|
|
+ }
|
|
|
return midActionList;
|
|
|
}
|
|
|
|
|
@@ -413,11 +490,13 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
double click = Double.parseDouble(d1Feature.getOrDefault("ad_click_" + prefix, "0"));
|
|
|
double conver = Double.parseDouble(d1Feature.getOrDefault("ad_conversion_" + prefix, "0"));
|
|
|
double income = Double.parseDouble(d1Feature.getOrDefault("ad_income_" + prefix, "0"));
|
|
|
- featureMap.put("d1_feature_" + prefix + "_ctr", String.valueOf(NumUtil.div(click, view)));
|
|
|
- featureMap.put("d1_feature_" + prefix + "_ctcvr", String.valueOf(NumUtil.div(conver, view)));
|
|
|
- featureMap.put("d1_feature_" + prefix + "_cvr", String.valueOf(NumUtil.div(conver, click)));
|
|
|
+ double cpc = NumUtil.div(income, click);
|
|
|
+ double ctr = NumUtil.divSmoothV2(click, view, CTR_SMOOTH_BETA_FACTOR);
|
|
|
+ featureMap.put("d1_feature_" + prefix + "_ctr", String.valueOf(ctr));
|
|
|
+ featureMap.put("d1_feature_" + prefix + "_ctcvr", String.valueOf(NumUtil.divSmoothV2(conver, view, CTCVR_SMOOTH_BETA_FACTOR)));
|
|
|
+ featureMap.put("d1_feature_" + prefix + "_cvr", String.valueOf(NumUtil.divSmoothV2(conver, click, CVR_SMOOTH_BETA_FACTOR)));
|
|
|
featureMap.put("d1_feature_" + prefix + "_conver", String.valueOf(conver));
|
|
|
- // featureMap.put("d1_feature_" + prefix + "_ecpm", String.valueOf(NumUtil.div(income * 1000, view)));
|
|
|
+ featureMap.put("d1_feature_" + prefix + "_ecpm", String.valueOf(ctr * cpc * 1000));
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -426,8 +505,8 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- // List<String> prefixes1 = Arrays.asList("ctr", "ctcvr", "ecpm");
|
|
|
- List<String> prefixes1 = Arrays.asList("ctr", "ctcvr");
|
|
|
+ List<String> prefixes1 = Arrays.asList("ctr", "ctcvr", "ecpm");
|
|
|
+ // List<String> prefixes1 = Arrays.asList("ctr", "ctcvr");
|
|
|
List<String> prefixes2 = Arrays.asList("1d", "3d", "7d", "14d");
|
|
|
|
|
|
for (String prefix1 : prefixes1) {
|
|
@@ -477,7 +556,7 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
for (String tagsField : tagsFieldList) {
|
|
|
if (StringUtils.isNotEmpty(feature.get(tagsField))) {
|
|
|
String tags = feature.get(tagsField);
|
|
|
- //Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
|
|
|
+ // Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
|
|
|
Double[] doubles;
|
|
|
if (scoreParam.getExpCodeSet().contains(word2vecExp)) {
|
|
|
doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
|
|
@@ -560,7 +639,8 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
return;
|
|
|
}
|
|
|
synchronized (this) {
|
|
|
- InputStream resourceStream = RankStrategyBy688.class.getClassLoader().getResourceAsStream("20240718_ad_bucket_688.txt");
|
|
|
+ String bucketFile = "20250217_ad_bucket_688.txt";
|
|
|
+ InputStream resourceStream = this.getClass().getClassLoader().getResourceAsStream(bucketFile);
|
|
|
if (resourceStream != null) {
|
|
|
try (BufferedReader reader = new BufferedReader(new InputStreamReader(resourceStream))) {
|
|
|
Map<String, double[]> bucketsMap = new HashMap<>();
|
|
@@ -583,18 +663,46 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
this.bucketsMap = bucketsMap;
|
|
|
this.bucketsLen = bucketsLen;
|
|
|
} catch (IOException e) {
|
|
|
- log.error("something is wrong in parse bucket file:", e);
|
|
|
+ log.error("something is wrong in parse bucket file: ", e);
|
|
|
}
|
|
|
+ log.info("load bucket file success: {}", bucketFile);
|
|
|
} else {
|
|
|
log.error("no bucket file");
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ private void initSparseFeatureNames() {
|
|
|
+ this.sparseFeatureSet = new HashSet<String>() {{
|
|
|
+ add("brand");
|
|
|
+ add("region");
|
|
|
+ add("city");
|
|
|
+ add("vid");
|
|
|
+ add("cate1");
|
|
|
+ add("cate2");
|
|
|
+ add("cid");
|
|
|
+ add("adid");
|
|
|
+ add("adverid");
|
|
|
+ add("user_cid_click_list");
|
|
|
+ add("user_cid_conver_list");
|
|
|
+ add("user_vid_return_tags_2h");
|
|
|
+ add("user_vid_return_tags_1d");
|
|
|
+ add("user_vid_return_tags_3d");
|
|
|
+ add("user_vid_return_tags_7d");
|
|
|
+ add("user_vid_return_tags_14d");
|
|
|
+ }};
|
|
|
+ }
|
|
|
+
|
|
|
private Map<String, String> featureBucket(Map<String, String> featureMap) {
|
|
|
Map<String, String> newFeatureMap = new ConcurrentHashMap<>(featureMap.size());
|
|
|
for (Map.Entry<String, String> entry : featureMap.entrySet()) {
|
|
|
String name = entry.getKey();
|
|
|
+ if (this.sparseFeatureSet.contains(name)) {
|
|
|
+ if (entry.getValue() != null) {
|
|
|
+ newFeatureMap.put(name, entry.getValue());
|
|
|
+ }
|
|
|
+ continue;
|
|
|
+ }
|
|
|
double score = Double.parseDouble(entry.getValue());
|
|
|
// 注意:0值、不在分桶文件中的特征,会被过滤掉。
|
|
|
if (score > 1E-8) {
|
|
@@ -608,8 +716,6 @@ public class RankStrategyBy688 extends RankStrategyBasic {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
return newFeatureMap;
|
|
|
}
|
|
|
-
|
|
|
}
|