|
@@ -1,613 +1,213 @@
|
|
|
package com.tzld.piaoquan.ad.engine.service.score.strategy;
|
|
|
|
|
|
-import com.tzld.piaoquan.ad.engine.commons.score.ScoreParam;
|
|
|
-import com.tzld.piaoquan.ad.engine.commons.score.ScorerUtils;
|
|
|
-import com.tzld.piaoquan.ad.engine.commons.thread.ThreadPoolFactory;
|
|
|
-import com.tzld.piaoquan.ad.engine.commons.util.*;
|
|
|
-import com.tzld.piaoquan.ad.engine.service.entity.GuaranteeView;
|
|
|
-import com.tzld.piaoquan.ad.engine.service.feature.Feature;
|
|
|
+import com.alibaba.fastjson.JSON;
|
|
|
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
import com.tzld.piaoquan.ad.engine.commons.dto.AdPlatformCreativeDTO;
|
|
|
import com.tzld.piaoquan.ad.engine.commons.param.RankRecommendRequestParam;
|
|
|
+import com.tzld.piaoquan.ad.engine.commons.score.ScoreParam;
|
|
|
+import com.tzld.piaoquan.ad.engine.commons.util.WeightRandom;
|
|
|
+import com.tzld.piaoquan.ad.engine.service.feature.Feature;
|
|
|
+import com.tzld.piaoquan.ad.engine.service.feature.FeatureService;
|
|
|
import com.tzld.piaoquan.recommend.feature.domain.ad.base.AdRankItem;
|
|
|
+import lombok.AllArgsConstructor;
|
|
|
+import lombok.Data;
|
|
|
+import lombok.NoArgsConstructor;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
-import org.apache.commons.collections4.MapUtils;
|
|
|
+import org.apache.commons.collections4.CollectionUtils;
|
|
|
import org.apache.commons.lang3.StringUtils;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
-import org.xm.Similarity;
|
|
|
|
|
|
-import java.io.BufferedReader;
|
|
|
-import java.io.IOException;
|
|
|
-import java.io.InputStream;
|
|
|
-import java.io.InputStreamReader;
|
|
|
import java.util.*;
|
|
|
-import java.util.concurrent.ConcurrentHashMap;
|
|
|
-import java.util.concurrent.CountDownLatch;
|
|
|
-import java.util.concurrent.TimeUnit;
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
|
@Slf4j
|
|
|
@Service
|
|
|
public class RankStrategyBy687 extends RankStrategyBasic {
|
|
|
|
|
|
- private Map<String, double[]> bucketsMap = new HashMap<>();
|
|
|
-
|
|
|
- private Map<String, Double> bucketsLen = new HashMap<>();
|
|
|
-
|
|
|
- @Value("${similarity.concurrent: false}")
|
|
|
- private boolean similarityConcurrent;
|
|
|
-
|
|
|
- @Override
|
|
|
- public List<AdRankItem> adItemRank(RankRecommendRequestParam request, ScoreParam scoreParam) {
|
|
|
- Set<String> noApiAdVerIds = getNoApiAdVerIds();
|
|
|
-
|
|
|
- long ts = System.currentTimeMillis() / 1000;
|
|
|
-
|
|
|
- String brand = scoreParam.getRequestContext().getMachineinfoBrand();
|
|
|
- if (StringUtils.isNotEmpty(brand)) {
|
|
|
- scoreParam.getRequestContext().setMachineinfoBrand(brand + "-n");
|
|
|
- }
|
|
|
-
|
|
|
- long start = System.currentTimeMillis();
|
|
|
- // 特征处理
|
|
|
- Feature feature = this.getFeature(scoreParam, request);
|
|
|
-
|
|
|
- Map<String, Map<String, String>> userFeature = feature.getUserFeature();
|
|
|
- Map<String, Map<String, String>> videoFeature = feature.getVideoFeature();
|
|
|
- Map<String, Map<String, Map<String, String>>> allAdVerFeature = feature.getAdVerFeature();
|
|
|
- Map<String, Map<String, Map<String, String>>> allCidFeature = feature.getCidFeature();
|
|
|
- Map<String, Map<String, Map<String, String>>> allSkuFeature = feature.getSkuFeature();
|
|
|
- Map<String, String> reqFeature = this.getReqFeature(scoreParam, request);
|
|
|
-
|
|
|
-
|
|
|
- Map<String, String> userFeatureMap = new HashMap<>();
|
|
|
- Map<String, String> c1Feature = userFeature.getOrDefault("alg_mid_feature_ad_action", new HashMap<>());
|
|
|
- List<TupleMapEntry<Tuple5>> midActionList = this.handleC1Feature(c1Feature, userFeatureMap);
|
|
|
-
|
|
|
- Map<String, Double> midTimeDiffMap = this.parseC1FeatureListToTimeDiffMap(midActionList, ts);
|
|
|
- Map<String, Double> actionStaticMap = this.parseC1FeatureListToActionStaticMap(midActionList);
|
|
|
-
|
|
|
- Map<String, String> d2Feature = videoFeature.getOrDefault("alg_cid_feature_vid_cf_rank", new HashMap<>());
|
|
|
- Map<String, String> d3Feature = videoFeature.getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
|
|
|
-
|
|
|
- Map<String, Map<String, Double>> vidRankMaps = this.parseD2FeatureMap(d2Feature);
|
|
|
-
|
|
|
- Map<String, String> e1Feature = userFeature.getOrDefault("alg_mid_feature_return_tags", new HashMap<>());
|
|
|
- Map<String, String> e2Feature = userFeature.getOrDefault("alg_mid_feature_share_tags", new HashMap<>());
|
|
|
-
|
|
|
- Map<String, String> sceneFeatureMap = this.handleSceneFeature(ts);
|
|
|
- long time1 = System.currentTimeMillis();
|
|
|
-
|
|
|
- Map<String, GuaranteeView> map = getGuaranteeViewMap(request, scoreParam);
|
|
|
- List<AdRankItem> adRankItems = new ArrayList<>(request.getAdIdList().size());
|
|
|
- Random random = new Random();
|
|
|
- if (similarityConcurrent) {
|
|
|
- for (AdPlatformCreativeDTO dto : request.getAdIdList()) {
|
|
|
- AdRankItem adRankItem = new AdRankItem();
|
|
|
- adRankItem.setAdId(dto.getCreativeId());
|
|
|
- adRankItem.setCreativeCode(dto.getCreativeCode());
|
|
|
- adRankItem.setAdVerId(dto.getAdVerId());
|
|
|
- adRankItem.setVideoId(request.getVideoId());
|
|
|
- adRankItem.setCpa(dto.getCpa());
|
|
|
- adRankItem.setId(dto.getAdId());
|
|
|
- adRankItem.setCampaignId(dto.getCampaignId());
|
|
|
- adRankItem.setCpm(ObjUtil.nullOrDefault(dto.getCpm(), 90).doubleValue());
|
|
|
- adRankItem.setRandom(random.nextInt(1000));
|
|
|
- if (noApiAdVerIds.contains(dto.getAdVerId())) {
|
|
|
- adRankItem.getExt().put("isApi", "0");
|
|
|
- } else {
|
|
|
- adRankItem.getExt().put("isApi", "1");
|
|
|
- }
|
|
|
- adRankItem.getExt().put("recallsources", dto.getRecallSources());
|
|
|
- setGuaranteeWeight(map, dto.getAdVerId(), adRankItem.getExt());
|
|
|
-
|
|
|
- String cidStr = dto.getCreativeId().toString();
|
|
|
- Map<String, String> cidFeatureMap = adRankItem.getFeatureMap();
|
|
|
- Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(cidStr, new HashMap<>());
|
|
|
- Map<String, String> b1Feature = cidFeature.getOrDefault("alg_cid_feature_basic_info", new HashMap<>());
|
|
|
-
|
|
|
- Map<String, Map<String, String>> adVerFeature = allAdVerFeature.getOrDefault(dto.getAdVerId(), new HashMap<>());
|
|
|
-
|
|
|
- Map<String, String> d1Feature = cidFeature.getOrDefault("alg_cid_feature_vid_cf", new HashMap<>());
|
|
|
-
|
|
|
- this.handleB1Feature(b1Feature, cidFeatureMap, cidStr);
|
|
|
-
|
|
|
- this.handleB2ToB5AndB8ToB9Feature(cidFeature, adVerFeature, cidFeatureMap);
|
|
|
-
|
|
|
- this.handleB6ToB7Feature(cidFeature, cidFeatureMap);
|
|
|
+ @Autowired
|
|
|
+ private RankStrategyBy688 rankStrategyBy688;
|
|
|
|
|
|
- this.handleC1UIFeature(midTimeDiffMap, actionStaticMap, cidFeatureMap, cidStr);
|
|
|
+ @ApolloJsonValue("${cid.open.hot.rank.user.layer :[]}")
|
|
|
+ private Set<String> openHotRankUserLayers;
|
|
|
|
|
|
- this.handleD1Feature(d1Feature, cidFeatureMap);
|
|
|
+ @Value("${cid.hot.rank.view.threshold:5000}")
|
|
|
+ private Integer hotRankViewThreshold;
|
|
|
|
|
|
- this.handleD2Feature(vidRankMaps, cidFeatureMap, cidStr);
|
|
|
+ @Value("${cid.hot.rank.calc.weight.coefficient:2.5}")
|
|
|
+ private Double hotRankCalcWeightCoefficient;
|
|
|
|
|
|
+ @Value("${cid.hot.rank.max.weight:1000}")
|
|
|
+ private Double hotRankMaxWeight;
|
|
|
+ @Autowired
|
|
|
+ private FeatureService featureService;
|
|
|
|
|
|
- // adRankItem.setFeatureMap(cidFeatureMap);
|
|
|
-
|
|
|
- adRankItems.add(adRankItem);
|
|
|
-
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- CountDownLatch cdl = new CountDownLatch(adRankItems.size());
|
|
|
- for (AdRankItem item : adRankItems) {
|
|
|
- ThreadPoolFactory.defaultPool().submit(() -> {
|
|
|
- try {
|
|
|
- String cidStr = String.valueOf(item.getAdId());
|
|
|
- Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(cidStr, new HashMap<>());
|
|
|
- Map<String, String> b1Feature = cidFeature.getOrDefault("alg_cid_feature_basic_info", new HashMap<>());
|
|
|
- String title = b1Feature.getOrDefault("cidtitle", "");
|
|
|
- long time21 = System.currentTimeMillis();
|
|
|
- this.handleE1AndE2Feature(e1Feature, e2Feature, title, item.getFeatureMap());
|
|
|
- long time22 = System.currentTimeMillis();
|
|
|
- this.handleD3AndB1Feature(d3Feature, title, item.getFeatureMap());
|
|
|
- long time23 = System.currentTimeMillis();
|
|
|
- log.info("cost={} handleE1AndE2Feature={} handleD3AndB1Feature={}", time23 - time21, time22 - time21, time23 - time22);
|
|
|
- } finally {
|
|
|
- cdl.countDown();
|
|
|
- }
|
|
|
- });
|
|
|
- }
|
|
|
- try {
|
|
|
- cdl.await(200, TimeUnit.MILLISECONDS);
|
|
|
- } catch (Exception e) {
|
|
|
- log.error("handleE1AndE2Feature and handleD3AndB1Feature wait timeout", e);
|
|
|
- }
|
|
|
- } else {
|
|
|
- for (AdPlatformCreativeDTO dto : request.getAdIdList()) {
|
|
|
- long time20 = System.currentTimeMillis();
|
|
|
- AdRankItem adRankItem = new AdRankItem();
|
|
|
- adRankItem.setAdId(dto.getCreativeId());
|
|
|
- adRankItem.setCreativeCode(dto.getCreativeCode());
|
|
|
- adRankItem.setAdVerId(dto.getAdVerId());
|
|
|
- adRankItem.setVideoId(request.getVideoId());
|
|
|
- adRankItem.setCpa(dto.getCpa());
|
|
|
- adRankItem.setId(dto.getAdId());
|
|
|
- adRankItem.setCampaignId(dto.getCampaignId());
|
|
|
- adRankItem.setCpm(ObjUtil.nullOrDefault(dto.getCpm(), 90).doubleValue());
|
|
|
- adRankItem.setSkuId(dto.getSkuId());
|
|
|
- adRankItem.setRandom(random.nextInt(1000));
|
|
|
-
|
|
|
- String cidStr = dto.getCreativeId().toString();
|
|
|
- Map<String, String> cidFeatureMap = adRankItem.getFeatureMap();
|
|
|
- Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(cidStr, new HashMap<>());
|
|
|
- Map<String, String> b1Feature = cidFeature.getOrDefault("alg_cid_feature_basic_info", new HashMap<>());
|
|
|
-
|
|
|
- Map<String, Map<String, String>> adVerFeature = allAdVerFeature.getOrDefault(dto.getAdVerId(), new HashMap<>());
|
|
|
-
|
|
|
- Map<String, String> d1Feature = cidFeature.getOrDefault("alg_cid_feature_vid_cf", new HashMap<>());
|
|
|
-
|
|
|
- this.handleB1Feature(b1Feature, cidFeatureMap, cidStr);
|
|
|
-
|
|
|
- this.handleB2ToB5AndB8ToB9Feature(cidFeature, adVerFeature, cidFeatureMap);
|
|
|
-
|
|
|
- this.handleB6ToB7Feature(cidFeature, cidFeatureMap);
|
|
|
-
|
|
|
- this.handleC1UIFeature(midTimeDiffMap, actionStaticMap, cidFeatureMap, cidStr);
|
|
|
-
|
|
|
- this.handleD1Feature(d1Feature, cidFeatureMap);
|
|
|
-
|
|
|
- this.handleD2Feature(vidRankMaps, cidFeatureMap, cidStr);
|
|
|
-
|
|
|
- String title = b1Feature.getOrDefault("cidtitle", "");
|
|
|
- long time21 = System.currentTimeMillis();
|
|
|
- this.handleE1AndE2Feature(e1Feature, e2Feature, title, cidFeatureMap);
|
|
|
- long time22 = System.currentTimeMillis();
|
|
|
- this.handleD3AndB1Feature(d3Feature, title, cidFeatureMap);
|
|
|
- long time23 = System.currentTimeMillis();
|
|
|
- log.info("cost={} other={} handleE1AndE2Feature={} handleD3AndB1Feature={}", time23 - time20,
|
|
|
- time21 - time20, time22 - time21, time23 - time22);
|
|
|
-
|
|
|
- // adRankItem.setFeatureMap(cidFeatureMap);
|
|
|
-
|
|
|
- adRankItems.add(adRankItem);
|
|
|
-
|
|
|
- }
|
|
|
+ @Override
|
|
|
+ public List<AdRankItem> adItemRank(RankRecommendRequestParam request, ScoreParam scoreParam) {
|
|
|
+ // 没有mid或者未配置分层,走基线排序
|
|
|
+ if (StringUtils.isEmpty(request.getMid()) && CollectionUtils.isEmpty(openHotRankUserLayers)) {
|
|
|
+ return rankStrategyBy688.adItemRank(request, scoreParam);
|
|
|
}
|
|
|
|
|
|
- //
|
|
|
-
|
|
|
- long time2 = System.currentTimeMillis();
|
|
|
- // 分桶
|
|
|
- this.readBucketFile();
|
|
|
- userFeatureMap = this.featureBucket(userFeatureMap);
|
|
|
- for (AdRankItem adRankItem : adRankItems) {
|
|
|
- Map<String, String> featureMap = adRankItem.getFeatureMap();
|
|
|
- adRankItem.setFeatureMap(this.featureBucket(featureMap));
|
|
|
+ // 当前分层未开启此策略
|
|
|
+ String userLayer = this.getUserLayer(request.getMid());
|
|
|
+ if (!openHotRankUserLayers.contains(userLayer)) {
|
|
|
+ return rankStrategyBy688.adItemRank(request, scoreParam);
|
|
|
}
|
|
|
|
|
|
+ List<AdPlatformCreativeDTO> recallCreativeList = request.getAdIdList();
|
|
|
|
|
|
- // 打分排序
|
|
|
- List<AdRankItem> result = ScorerUtils.getScorerPipeline(ScorerUtils.XGBOOST_SCORE_CONF_683)
|
|
|
- .scoring(sceneFeatureMap, userFeatureMap, adRankItems);
|
|
|
+ // 获取所有创意对应的客户列表
|
|
|
+ List<Long> creativeIds = recallCreativeList.stream().map(AdPlatformCreativeDTO::getCreativeId).collect(Collectors.toList());
|
|
|
+ Feature feature = featureService.getCreativeBasicInfo(creativeIds);
|
|
|
+ this.fullCustomerInfo(feature.getCidFeature(), recallCreativeList);
|
|
|
|
|
|
- long time3 = System.currentTimeMillis();
|
|
|
- for (AdRankItem item : result) {
|
|
|
- double guaranteeScoreCoefficient = getGuaranteeScoreCoefficient(scoreParam, item.getExt());
|
|
|
- item.setScore(item.getLrScore() * item.getCpa() * guaranteeScoreCoefficient);
|
|
|
- item.getScoreMap().put("guaranteeScoreCoefficient", guaranteeScoreCoefficient);
|
|
|
- item.getScoreMap().put("cpa", item.getCpa());
|
|
|
- item.getScoreMap().put("cpm", item.getCpm());
|
|
|
- item.getFeatureMap().putAll(userFeatureMap);
|
|
|
- item.getFeatureMap().putAll(sceneFeatureMap);
|
|
|
+ List<String> customers = recallCreativeList.stream()
|
|
|
+ .map(AdPlatformCreativeDTO::getCustomer)
|
|
|
+ .distinct()
|
|
|
+ .collect(Collectors.toList());
|
|
|
|
|
|
- // 没有转化回传的广告主,使用后台配置的CPM
|
|
|
- if (noApiAdVerIds.contains(item.getAdVerId())) {
|
|
|
- item.setScore(item.getCpm() / 1000);
|
|
|
- }
|
|
|
- putMetaFeature(item, feature, reqFeature, sceneFeatureMap, request);
|
|
|
- }
|
|
|
-
|
|
|
- long time4 = System.currentTimeMillis();
|
|
|
- log.info("cost={}, feature1={}, feature2={}, getScorerPipeline={}, loop={}",
|
|
|
- time4 - start, time1 - start, time2 - time1, time3 - time2, time4 - time3);
|
|
|
+ // 获取客户列表,并选择一个客户
|
|
|
+ String keyFormat = "ad:engine:customer:layer:info:%s:" + userLayer;
|
|
|
+ List<HotRankFeatureInfo> customerFeature = this.multiGetFeature(customers, keyFormat);
|
|
|
+ String customer = this.choose(customerFeature);
|
|
|
|
|
|
- result.sort(ComparatorUtil.equalsRandomComparator());
|
|
|
+ // 从当前客户的所有创意中,选择一个创意
|
|
|
+ List<String> customerCreativeIds = recallCreativeList.stream()
|
|
|
+ .filter(c -> StringUtils.equals(c.getCustomer(), customer))
|
|
|
+ .map(AdPlatformCreativeDTO::getCreativeId)
|
|
|
+ .map(Objects::toString)
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ String cidKeyFormat = "ad:engine:cid:layer:info:%s:" + userLayer;
|
|
|
+ List<HotRankFeatureInfo> creativeFeature = this.multiGetFeature(customerCreativeIds, cidKeyFormat);
|
|
|
+ String chooseCreativeId = this.choose(creativeFeature);
|
|
|
|
|
|
- return result;
|
|
|
+ return null;
|
|
|
}
|
|
|
|
|
|
- private void handleB1Feature(Map<String, String> b1Feature, Map<String, String> cidFeatureMap, String cid) {
|
|
|
- cidFeatureMap.put("cid_" + cid, "0.1");
|
|
|
- // if (StringUtils.isNotBlank(b1Feature.get("adid"))) {
|
|
|
- // String adId = b1Feature.get("adid");
|
|
|
- // cidFeatureMap.put("adid_" + adId, idDefaultValue);
|
|
|
- // }
|
|
|
- if (StringUtils.isNotBlank(b1Feature.get("adverid"))) {
|
|
|
- String adVerId = b1Feature.get("adverid");
|
|
|
- cidFeatureMap.put("adverid_" + adVerId, "0.1");
|
|
|
- }
|
|
|
- // if (StringUtils.isNotBlank(b1Feature.get("targeting_conversion"))) {
|
|
|
- // String targetingConversion = b1Feature.get("targeting_conversion");
|
|
|
- // cidFeatureMap.put("targeting_conversion_" + targetingConversion, idDefaultValue);
|
|
|
- // }
|
|
|
- if (StringUtils.isNotBlank(b1Feature.get("cpa"))) {
|
|
|
- String cpa = b1Feature.get("cpa");
|
|
|
- cidFeatureMap.put("cpa", cpa);
|
|
|
- }
|
|
|
- }
|
|
|
+ private String getUserLayer(String mid) {
|
|
|
|
|
|
- private void handleB2ToB5AndB8ToB9Feature(Map<String, Map<String, String>> c1Feature, Map<String, Map<String, String>> adVerFeature, Map<String, String> cidFeatureMap) {
|
|
|
- Map<String, String> b2Feature = adVerFeature.getOrDefault("alg_cid_feature_adver_action", new HashMap<>());
|
|
|
- Map<String, String> b3Feature = c1Feature.getOrDefault("alg_cid_feature_cid_action", new HashMap<>());
|
|
|
- Map<String, String> b4Feature = c1Feature.getOrDefault("alg_cid_feature_region_action", new HashMap<>());
|
|
|
- Map<String, String> b5Feature = c1Feature.getOrDefault("alg_cid_feature_app_action", new HashMap<>());
|
|
|
- Map<String, String> b8Feature = c1Feature.getOrDefault("alg_cid_feature_brand_action", new HashMap<>());
|
|
|
- Map<String, String> b9Feature = c1Feature.getOrDefault("alg_cid_feature_weChatVersion_action", new HashMap<>());
|
|
|
-
|
|
|
- List<String> timeList = Arrays.asList("1h", "2h", "3h", "6h", "12h", "1d", "3d", "7d", "yesterday", "today");
|
|
|
- List<Tuple2<Map<String, String>, String>> featureList = Arrays.asList(
|
|
|
- new Tuple2<>(b2Feature, "b2"),
|
|
|
- new Tuple2<>(b3Feature, "b3"),
|
|
|
- new Tuple2<>(b4Feature, "b4"),
|
|
|
- new Tuple2<>(b5Feature, "b5"),
|
|
|
- new Tuple2<>(b8Feature, "b8"),
|
|
|
- new Tuple2<>(b9Feature, "b9")
|
|
|
- );
|
|
|
- for (Tuple2<Map<String, String>, String> tuple2 : featureList) {
|
|
|
- Map<String, String> feature = tuple2.f1;
|
|
|
- String prefix = tuple2.f2;
|
|
|
- for (String time : timeList) {
|
|
|
- double view = Double.parseDouble(feature.getOrDefault("ad_view_" + time, "0"));
|
|
|
- double click = Double.parseDouble(feature.getOrDefault("ad_click_" + time, "0"));
|
|
|
- double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
|
|
|
- double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
|
|
|
- double f2 = NumUtil.div(conver, view);
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(NumUtil.div(click, view)));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(f2));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.div(conver, click)));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
|
|
|
- // cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(NumUtil.div(income * 1000, view)));
|
|
|
-
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * f2));
|
|
|
- }
|
|
|
+ String key = String.format("ad:engine:mid:layer:%s", mid);
|
|
|
+ String value = algRedisHelper.get(key);
|
|
|
+ if (StringUtils.isEmpty(value)) {
|
|
|
+ return "无曝光";
|
|
|
}
|
|
|
-
|
|
|
+ return value;
|
|
|
}
|
|
|
|
|
|
- private void handleB6ToB7Feature(Map<String, Map<String, String>> c1Feature, Map<String, String> cidFeatureMap) {
|
|
|
- Map<String, String> b6Feature = c1Feature.getOrDefault("alg_cid_feature_week_action", new HashMap<>());
|
|
|
- Map<String, String> b7Feature = c1Feature.getOrDefault("alg_cid_feature_hour_action", new HashMap<>());
|
|
|
-
|
|
|
- List<String> timeList = Arrays.asList("7d", "14d");
|
|
|
- List<Tuple2<Map<String, String>, String>> featureList = Arrays.asList(
|
|
|
- new Tuple2<>(b6Feature, "b6"),
|
|
|
- new Tuple2<>(b7Feature, "b7")
|
|
|
- );
|
|
|
- for (Tuple2<Map<String, String>, String> tuple2 : featureList) {
|
|
|
- Map<String, String> feature = tuple2.f1;
|
|
|
- String prefix = tuple2.f2;
|
|
|
- for (String time : timeList) {
|
|
|
- double view = Double.parseDouble(feature.getOrDefault("ad_view_" + time, "0"));
|
|
|
- double click = Double.parseDouble(feature.getOrDefault("ad_click_" + time, "0"));
|
|
|
- double conver = Double.parseDouble(feature.getOrDefault("ad_conversion_" + time, "0"));
|
|
|
- double income = Double.parseDouble(feature.getOrDefault("ad_income_" + time, "0"));
|
|
|
- double f2 = NumUtil.div(conver, view);
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_ctr", String.valueOf(NumUtil.div(click, view)));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_ctcvr", String.valueOf(f2));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_cvr", String.valueOf(NumUtil.div(conver, click)));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_conver", String.valueOf(conver));
|
|
|
- // cidFeatureMap.put(prefix + "_" + time + "_ecpm", String.valueOf(NumUtil.div(income * 1000, view)));
|
|
|
-
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_click", String.valueOf(click));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_conver*log(view)", String.valueOf(conver * NumUtil.log(view)));
|
|
|
- cidFeatureMap.put(prefix + "_" + time + "_conver*ctcvr", String.valueOf(conver * f2));
|
|
|
- }
|
|
|
- }
|
|
|
+ private List<HotRankFeatureInfo> multiGetFeature(List<String> customers, String keyFormat) {
|
|
|
+ List<HotRankFeatureInfo> hotRankFeatureInfos = new ArrayList<>(customers.size());
|
|
|
|
|
|
- }
|
|
|
-
|
|
|
- private List<TupleMapEntry<Tuple5>> handleC1Feature(Map<String, String> c1Feature, Map<String, String> featureMap) {
|
|
|
-
|
|
|
- // 用户特征
|
|
|
- List<TupleMapEntry<Tuple5>> midActionList = new ArrayList<>();
|
|
|
- if (c1Feature.containsKey("action")) {
|
|
|
- String action = c1Feature.get("action");
|
|
|
- midActionList = Arrays.stream(action.split(","))
|
|
|
- .map(r -> {
|
|
|
- String[] rList = r.split(":");
|
|
|
- Tuple5 tuple5 = new Tuple5(rList[1], rList[2], rList[3], rList[4], rList[5]);
|
|
|
- return new TupleMapEntry<>(rList[0], tuple5);
|
|
|
- })
|
|
|
- // TODO 倒排
|
|
|
- .sorted((a, b) -> Integer.compare(Integer.parseInt(b.value.f1), Integer.parseInt(a.value.f1)))
|
|
|
- .collect(Collectors.toList());
|
|
|
+ List<String> keys = new ArrayList<>(customers.size());
|
|
|
+ for (String customer : customers) {
|
|
|
+ String key = String.format(keyFormat, customer);
|
|
|
+ keys.add(key);
|
|
|
}
|
|
|
|
|
|
- double viewAll = midActionList.size();
|
|
|
- double clickAll = midActionList.stream().mapToInt(e -> Integer.parseInt(e.value.f2)).sum();
|
|
|
- double converAll = midActionList.stream().mapToInt(e -> Integer.parseInt(e.value.f3)).sum();
|
|
|
- double incomeAll = midActionList.stream().mapToInt(e -> Integer.parseInt(e.value.f4)).sum();
|
|
|
- featureMap.put("viewAll", String.valueOf(viewAll));
|
|
|
- featureMap.put("clickAll", String.valueOf(clickAll));
|
|
|
- featureMap.put("converAll", String.valueOf(converAll));
|
|
|
- featureMap.put("incomeAll", String.valueOf(incomeAll));
|
|
|
- featureMap.put("ctr_all", String.valueOf(NumUtil.div(clickAll, viewAll)));
|
|
|
- featureMap.put("ctcvr_all", String.valueOf(NumUtil.div(converAll, viewAll)));
|
|
|
- featureMap.put("cvr_all", String.valueOf(NumUtil.div(clickAll, converAll)));
|
|
|
- // featureMap.put("ecpm_all", String.valueOf(NumUtil.div(incomeAll * 1000, viewAll)));
|
|
|
-
|
|
|
- return midActionList;
|
|
|
- }
|
|
|
+ List<String> values = algRedisHelper.mget(keys);
|
|
|
|
|
|
- private void handleC1UIFeature(Map<String, Double> midTimeDiffMap, Map<String, Double> midActionStatic, Map<String, String> featureMap, String cid) {
|
|
|
- if (midTimeDiffMap.containsKey("timediff_view_" + cid)) {
|
|
|
- featureMap.put("timediff_view", String.valueOf(midTimeDiffMap.getOrDefault("timediff_view_" + cid, 0.0)));
|
|
|
- }
|
|
|
- if (midTimeDiffMap.containsKey("timediff_click_" + cid)) {
|
|
|
- featureMap.put("timediff_click", String.valueOf(midTimeDiffMap.getOrDefault("timediff_click_" + cid, 0.0)));
|
|
|
- }
|
|
|
- if (midTimeDiffMap.containsKey("timediff_conver_" + cid)) {
|
|
|
- featureMap.put("timediff_conver", String.valueOf(midTimeDiffMap.getOrDefault("timediff_conver_" + cid, 0.0)));
|
|
|
- }
|
|
|
- if (midActionStatic.containsKey("actionstatic_view_" + cid)) {
|
|
|
- featureMap.put("actionstatic_view", String.valueOf(midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0)));
|
|
|
- }
|
|
|
- if (midActionStatic.containsKey("actionstatic_click_" + cid)) {
|
|
|
- featureMap.put("actionstatic_click", String.valueOf(midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0)));
|
|
|
- }
|
|
|
- if (midActionStatic.containsKey("actionstatic_conver_" + cid)) {
|
|
|
- featureMap.put("actionstatic_conver", String.valueOf(midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0)));
|
|
|
- }
|
|
|
- if (midActionStatic.containsKey("actionstatic_income_" + cid)) {
|
|
|
- featureMap.put("actionstatic_income", String.valueOf(midActionStatic.getOrDefault("actionstatic_income_" + cid, 0.0)));
|
|
|
- }
|
|
|
- if (midActionStatic.containsKey("actionstatic_view_" + cid) && midActionStatic.containsKey("actionstatic_click_" + cid)) {
|
|
|
- double ctr = NumUtil.div(
|
|
|
- midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0),
|
|
|
- midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0)
|
|
|
- );
|
|
|
- featureMap.put("actionstatic_ctr", String.valueOf(ctr));
|
|
|
- }
|
|
|
- if (midActionStatic.containsKey("actionstatic_view_" + cid) && midActionStatic.containsKey("actionstatic_conver_" + cid)) {
|
|
|
- double ctcvr = NumUtil.div(
|
|
|
- midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0),
|
|
|
- midActionStatic.getOrDefault("actionstatic_view_" + cid, 0.0)
|
|
|
- );
|
|
|
- featureMap.put("actionstatic_ctcvr", String.valueOf(ctcvr));
|
|
|
- }
|
|
|
- if (midActionStatic.containsKey("actionstatic_conver_" + cid) && midActionStatic.containsKey("actionstatic_click_" + cid)) {
|
|
|
- double cvr = NumUtil.div(
|
|
|
- midActionStatic.getOrDefault("actionstatic_conver_" + cid, 0.0),
|
|
|
- midActionStatic.getOrDefault("actionstatic_click_" + cid, 0.0)
|
|
|
- );
|
|
|
- featureMap.put("actionstatic_cvr", String.valueOf(cvr));
|
|
|
+ for (int i = 0; i < customers.size(); i++) {
|
|
|
+ String value = values.get(i);
|
|
|
+ HotRankFeatureInfo hotRankFeatureInfo = JSON.parseObject(value, HotRankFeatureInfo.class);
|
|
|
+ hotRankFeatureInfo.setLabel(customers.get(i));
|
|
|
+ hotRankFeatureInfos.add(hotRankFeatureInfo);
|
|
|
}
|
|
|
+
|
|
|
+ return hotRankFeatureInfos;
|
|
|
}
|
|
|
|
|
|
- private void handleD1Feature(Map<String, String> d1Feature, Map<String, String> featureMap) {
|
|
|
- for (String prefix : Arrays.asList("3h", "6h", "12h", "1d", "3d", "7d")) {
|
|
|
- double view = Double.parseDouble(d1Feature.getOrDefault("ad_view_" + prefix, "0"));
|
|
|
- double click = Double.parseDouble(d1Feature.getOrDefault("ad_click_" + prefix, "0"));
|
|
|
- double conver = Double.parseDouble(d1Feature.getOrDefault("ad_conversion_" + prefix, "0"));
|
|
|
- double income = Double.parseDouble(d1Feature.getOrDefault("ad_income_" + prefix, "0"));
|
|
|
- featureMap.put("d1_feature_" + prefix + "_ctr", String.valueOf(NumUtil.div(click, view)));
|
|
|
- featureMap.put("d1_feature_" + prefix + "_ctcvr", String.valueOf(NumUtil.div(conver, view)));
|
|
|
- featureMap.put("d1_feature_" + prefix + "_cvr", String.valueOf(NumUtil.div(conver, click)));
|
|
|
- featureMap.put("d1_feature_" + prefix + "_conver", String.valueOf(conver));
|
|
|
- // featureMap.put("d1_feature_" + prefix + "_ecpm", String.valueOf(NumUtil.div(income * 1000, view)));
|
|
|
+ private void fullCustomerInfo(Map<String, Map<String, Map<String, String>>> cidFeature, List<AdPlatformCreativeDTO> dtoList) {
|
|
|
+ for (AdPlatformCreativeDTO dto : dtoList) {
|
|
|
+ Map<String, Map<String, String>> feature = cidFeature.getOrDefault(String.valueOf(dto.getCreativeId()), new HashMap<>());
|
|
|
+ Map<String, String> basicInfo = feature.getOrDefault("alg_cid_feature_basic_info", new HashMap<>());
|
|
|
+ String customer = basicInfo.getOrDefault("customer", "");
|
|
|
+ dto.setCustomer(customer);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private void handleD2Feature(Map<String, Map<String, Double>> vidRankMaps, Map<String, String> featureMap, String cid) {
|
|
|
- if (MapUtils.isEmpty(vidRankMaps)) {
|
|
|
- return;
|
|
|
- }
|
|
|
+ private String choose(List<HotRankFeatureInfo> hotRankFeatureInfos) {
|
|
|
+ // 按CPM排序
|
|
|
+ hotRankFeatureInfos.sort(Comparator.comparing(HotRankFeatureInfo::getCpm));
|
|
|
|
|
|
- // List<String> prefixes1 = Arrays.asList("ctr", "ctcvr", "ecpm");
|
|
|
- List<String> prefixes1 = Arrays.asList("ctr", "ctcvr");
|
|
|
- List<String> prefixes2 = Arrays.asList("1d", "3d", "7d", "14d");
|
|
|
-
|
|
|
- for (String prefix1 : prefixes1) {
|
|
|
- for (String prefix2 : prefixes2) {
|
|
|
- String combinedKey = prefix1 + "_" + prefix2;
|
|
|
- if (vidRankMaps.containsKey(combinedKey)) {
|
|
|
- Double rank = vidRankMaps.get(combinedKey).getOrDefault(cid, 0.0);
|
|
|
- if (rank >= 1.0) {
|
|
|
- featureMap.put("vid_rank_" + combinedKey, String.valueOf(NumUtil.div(1, rank)));
|
|
|
- }
|
|
|
- }
|
|
|
+ // 按照曝光拆分
|
|
|
+ List<HotRankFeatureInfo> highView = new ArrayList<>();
|
|
|
+ List<HotRankFeatureInfo> tailView = new ArrayList<>();
|
|
|
+ for (HotRankFeatureInfo hotRankFeatureInfo : hotRankFeatureInfos) {
|
|
|
+ if (hotRankFeatureInfo.getView() >= hotRankViewThreshold) {
|
|
|
+ highView.add(hotRankFeatureInfo);
|
|
|
+ } else {
|
|
|
+ tailView.add(hotRankFeatureInfo);
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
|
|
|
- private void handleD3AndB1Feature(Map<String, String> d3Feature, String cTitle, Map<String, String> featureMap) {
|
|
|
- if (MapUtils.isEmpty(d3Feature) || !d3Feature.containsKey("title") || StringUtils.isEmpty(cTitle)) {
|
|
|
- return;
|
|
|
+ if (CollectionUtils.isNotEmpty(highView)) {
|
|
|
+ // 高曝光中CPM最大的设置初始权重
|
|
|
+ this.calcFirstItemWeight(highView, tailView, true);
|
|
|
+ // 计算高曝光列表中其他的权重
|
|
|
+ this.calcListOtherWeight(highView);
|
|
|
}
|
|
|
- String vTitle = d3Feature.get("title");
|
|
|
- double score = Similarity.conceptSimilarity(cTitle, vTitle);
|
|
|
- featureMap.put("ctitle_vtitle_similarity", String.valueOf(score));
|
|
|
- }
|
|
|
|
|
|
- private void handleE1AndE2Feature(Map<String, String> e1Feature, Map<String, String> e2Feature, String title, Map<String, String> featureMap) {
|
|
|
- if (StringUtils.isEmpty(title)) {
|
|
|
- return;
|
|
|
+ if (CollectionUtils.isNotEmpty(tailView)) {
|
|
|
+ this.calcFirstItemWeight(highView, tailView, false);
|
|
|
+ this.calcListOtherWeight(tailView);
|
|
|
}
|
|
|
|
|
|
- List<Tuple2<Map<String, String>, String>> tuple2List = Arrays.asList(
|
|
|
- new Tuple2<>(e1Feature, "e1"),
|
|
|
- new Tuple2<>(e2Feature, "e2")
|
|
|
- );
|
|
|
+ List<WeightRandom.ItemWithWeight<String>> itemWithWeights = new ArrayList<>(hotRankFeatureInfos.size());
|
|
|
+ for (HotRankFeatureInfo hotRankFeatureInfo : hotRankFeatureInfos) {
|
|
|
+ WeightRandom.ItemWithWeight<String> itemWeight = new WeightRandom.ItemWithWeight<>(hotRankFeatureInfo.getLabel(), hotRankFeatureInfo.getWeight());
|
|
|
+ itemWithWeights.add(itemWeight);
|
|
|
+ }
|
|
|
|
|
|
- List<String> tagsFieldList = Arrays.asList("tags_3d", "tags_7d", "tags_14d");
|
|
|
- for (Tuple2<Map<String, String>, String> tuple2 : tuple2List) {
|
|
|
- Map<String, String> feature = tuple2.f1;
|
|
|
- String prefix = tuple2.f2;
|
|
|
- if (MapUtils.isEmpty(feature)) {
|
|
|
- continue;
|
|
|
- }
|
|
|
+ return new WeightRandom<>(itemWithWeights).choose();
|
|
|
+ }
|
|
|
|
|
|
- for (String tagsField : tagsFieldList) {
|
|
|
- if (StringUtils.isNotEmpty(feature.get(tagsField))) {
|
|
|
- String tags = feature.get(tagsField);
|
|
|
- Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
|
|
|
- featureMap.put(prefix + "_" + tagsField + "_matchnum", String.valueOf(doubles[0]));
|
|
|
- featureMap.put(prefix + "_" + tagsField + "_maxscore", String.valueOf(doubles[1]));
|
|
|
- featureMap.put(prefix + "_" + tagsField + "_avgscore", String.valueOf(doubles[2]));
|
|
|
+ private void calcFirstItemWeight(List<HotRankFeatureInfo> high, List<HotRankFeatureInfo> tail, boolean isHigh) {
|
|
|
+ if (isHigh) {
|
|
|
+ high.get(0).setWeight(Math.max(1, hotRankMaxWeight));
|
|
|
+ } else {
|
|
|
+ HotRankFeatureInfo firstTail = tail.get(0);
|
|
|
+ double firstCpm = firstTail.getCpm();
|
|
|
+ for (HotRankFeatureInfo info : high) {
|
|
|
+ if (firstCpm >= info.getCpm()) {
|
|
|
+ // 权重最小为1
|
|
|
+ double weight = Math.max(1, info.getWeight() / hotRankCalcWeightCoefficient);
|
|
|
+ firstTail.setWeight(weight);
|
|
|
+ return;
|
|
|
}
|
|
|
}
|
|
|
+ firstTail.setWeight(1);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- private Map<String, Double> parseC1FeatureListToTimeDiffMap(List<TupleMapEntry<Tuple5>> midActionList, long ts) {
|
|
|
- Map<String, Double> midTimeDiffMap = new HashMap<>();
|
|
|
- for (TupleMapEntry<Tuple5> entry : midActionList) {
|
|
|
- String cid = entry.key;
|
|
|
- double tsHistory = Double.parseDouble(entry.value.f1);
|
|
|
- double click = Double.parseDouble(entry.value.f2);
|
|
|
- double conver = Double.parseDouble(entry.value.f3);
|
|
|
- double d = (ts - tsHistory) / 3600 / 24;
|
|
|
- if (!midTimeDiffMap.containsKey("timediff_view_" + cid)) {
|
|
|
- midTimeDiffMap.put("timediff_view_" + cid, NumUtil.div(1, d));
|
|
|
- }
|
|
|
- if (!midTimeDiffMap.containsKey("timediff_click_" + cid) && click > 0) {
|
|
|
- midTimeDiffMap.put("timediff_click_" + cid, NumUtil.div(1, d));
|
|
|
- }
|
|
|
- if (!midTimeDiffMap.containsKey("timediff_conver_" + cid) && conver > 0) {
|
|
|
- midTimeDiffMap.put("timediff_conver_" + cid, NumUtil.div(1, d));
|
|
|
+ /**
|
|
|
+ * 计算列表中第二个元素往后的权重
|
|
|
+ */
|
|
|
+ private void calcListOtherWeight(List<HotRankFeatureInfo> rankItem) {
|
|
|
+ for (int i = 1; i < rankItem.size(); i++) {
|
|
|
+ HotRankFeatureInfo curr = rankItem.get(i);
|
|
|
+ if (curr.getCpm() == 0) {
|
|
|
+ curr.setWeight(1);
|
|
|
+ continue;
|
|
|
}
|
|
|
+ HotRankFeatureInfo prev = rankItem.get(i - 1);
|
|
|
+ double currWeight1 = prev.getWeight() * (1 - (1 - curr.getCpm() / prev.getCpm())) * hotRankCalcWeightCoefficient;
|
|
|
+ double currWeight2 = prev.getWeight() / hotRankCalcWeightCoefficient;
|
|
|
+ double currWeight = Math.max(currWeight1, currWeight2);
|
|
|
+ curr.setWeight(Math.max(1, currWeight));
|
|
|
}
|
|
|
- return midTimeDiffMap;
|
|
|
}
|
|
|
|
|
|
- private Map<String, Double> parseC1FeatureListToActionStaticMap(List<TupleMapEntry<Tuple5>> midActionList) {
|
|
|
- Map<String, Double> midActionStaticsMap = new HashMap<>();
|
|
|
- for (TupleMapEntry<Tuple5> entry : midActionList) {
|
|
|
- String cid = entry.key;
|
|
|
- double click = Double.parseDouble(entry.value.f2);
|
|
|
- double conver = Double.parseDouble(entry.value.f3);
|
|
|
- double income = Double.parseDouble(entry.value.f4);
|
|
|
-
|
|
|
- Double viewSum = midActionStaticsMap.getOrDefault("actionstatic_view_" + cid, 0.0);
|
|
|
- midActionStaticsMap.put("actionstatic_view_" + cid, 1 + viewSum);
|
|
|
-
|
|
|
- Double clickSum = midActionStaticsMap.getOrDefault("actionstatic_click_" + cid, 0.0);
|
|
|
- midActionStaticsMap.put("actionstatic_click_" + cid, clickSum + click);
|
|
|
+ @Data
|
|
|
+ @NoArgsConstructor
|
|
|
+ @AllArgsConstructor
|
|
|
+ private static class HotRankFeatureInfo {
|
|
|
+ private String label;
|
|
|
|
|
|
- Double converSum = midActionStaticsMap.getOrDefault("actionstatic_conver_" + cid, 0.0);
|
|
|
- midActionStaticsMap.put("actionstatic_conver_" + cid, converSum + conver);
|
|
|
+ private double cpm;
|
|
|
|
|
|
- Double incomSum = midActionStaticsMap.getOrDefault("actionstatic_income_" + cid, 0.0);
|
|
|
- midActionStaticsMap.put("actionstatic_income_" + cid, incomSum + income);
|
|
|
- }
|
|
|
-
|
|
|
- return midActionStaticsMap;
|
|
|
- }
|
|
|
-
|
|
|
- private Map<String, Map<String, Double>> parseD2FeatureMap(Map<String, String> d2Feature) {
|
|
|
- Map<String, Map<String, Double>> vidRankMaps = new HashMap<>();
|
|
|
- for (Map.Entry<String, String> entry : d2Feature.entrySet()) {
|
|
|
- String key = entry.getKey();
|
|
|
- String value = entry.getValue();
|
|
|
- Map<String, Double> valueMap = Arrays.stream(value.split(","))
|
|
|
- .map(r -> r.split(":"))
|
|
|
- .collect(Collectors.toMap(rList -> rList[0], rList -> Double.parseDouble(rList[2])));
|
|
|
- vidRankMaps.put(key, valueMap);
|
|
|
- }
|
|
|
- return vidRankMaps;
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- private void readBucketFile() {
|
|
|
- if (MapUtils.isNotEmpty(bucketsMap)) {
|
|
|
- return;
|
|
|
- }
|
|
|
- synchronized (this) {
|
|
|
- InputStream resourceStream = RankStrategyBy687.class.getClassLoader().getResourceAsStream("20240718_ad_bucket_688.txt");
|
|
|
- if (resourceStream != null) {
|
|
|
- try (BufferedReader reader = new BufferedReader(new InputStreamReader(resourceStream))) {
|
|
|
- Map<String, double[]> bucketsMap = new HashMap<>();
|
|
|
- Map<String, Double> bucketsLen = new HashMap<>();
|
|
|
- String line;
|
|
|
- while ((line = reader.readLine()) != null) {
|
|
|
- // 替换空格和换行符,过滤空行
|
|
|
- line = line.replace(" ", "").replaceAll("\n", "");
|
|
|
- if (!line.isEmpty()) {
|
|
|
- String[] rList = line.split("\t");
|
|
|
- if (rList.length == 3) {
|
|
|
- String key = rList[0];
|
|
|
- double value1 = Double.parseDouble(rList[1]);
|
|
|
- bucketsLen.put(key, value1);
|
|
|
- double[] value2 = Arrays.stream(rList[2].split(","))
|
|
|
- .mapToDouble(Double::valueOf)
|
|
|
- .toArray();
|
|
|
- bucketsMap.put(key, value2);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- this.bucketsMap = bucketsMap;
|
|
|
- this.bucketsLen = bucketsLen;
|
|
|
- } catch (IOException e) {
|
|
|
- log.error("something is wrong in parse bucket file:", e);
|
|
|
- }
|
|
|
- } else {
|
|
|
- log.error("no bucket file");
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- private Map<String, String> featureBucket(Map<String, String> featureMap) {
|
|
|
- Map<String, String> newFeatureMap = new ConcurrentHashMap<>(featureMap.size());
|
|
|
- for (Map.Entry<String, String> entry : featureMap.entrySet()) {
|
|
|
- String name = entry.getKey();
|
|
|
- double score = Double.parseDouble(entry.getValue());
|
|
|
- // 注意:0值、不在分桶文件中的特征,会被过滤掉。
|
|
|
- if (score > 1E-8) {
|
|
|
- if (this.bucketsMap.containsKey(name) && this.bucketsLen.containsKey(name)) {
|
|
|
- double[] buckets = this.bucketsMap.get(name);
|
|
|
- double bucketNum = this.bucketsLen.get(name);
|
|
|
- Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
|
|
|
- newFeatureMap.put(name, String.valueOf(scoreNew));
|
|
|
- } else {
|
|
|
- newFeatureMap.put(name, String.valueOf(score));
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ private int view;
|
|
|
|
|
|
- return newFeatureMap;
|
|
|
+ private double weight = 1;
|
|
|
}
|
|
|
}
|