|
@@ -0,0 +1,257 @@
|
|
|
+package com.tzld.piaoquan.recommend.server.service.rank.strategy;
|
|
|
+
|
|
|
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
+import com.google.common.reflect.TypeToken;
|
|
|
+import com.tzld.piaoquan.recommend.server.common.base.RankItem;
|
|
|
+import com.tzld.piaoquan.recommend.server.model.Video;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.rank.RankParam;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.rank.RankResult;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.rank.RankService;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.rank.extractor.ExtractorUtils;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.rank.extractor.RankExtractorItemTags;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.rank.processor.RankProcessorBoost;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.rank.processor.RankProcessorDensity;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.rank.processor.RankProcessorInsert;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.rank.processor.RankProcessorTagFilter;
|
|
|
+import com.tzld.piaoquan.recommend.server.service.recall.strategy.*;
|
|
|
+import com.tzld.piaoquan.recommend.server.util.CommonCollectionUtils;
|
|
|
+import com.tzld.piaoquan.recommend.server.util.JSONUtils;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.apache.commons.collections4.CollectionUtils;
|
|
|
+import org.apache.commons.lang3.RandomUtils;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+
|
|
|
+import java.text.SimpleDateFormat;
|
|
|
+import java.util.*;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+
|
|
|
+/**
|
|
|
+ * @author zhangbo
|
|
|
+ * @desc 地域召回融合 流量池汤姆森
|
|
|
+ */
|
|
|
+@Service
|
|
|
+@Slf4j
|
|
|
+public class RankStrategy4RegionMergeModelBasic extends RankService {
|
|
|
+ @ApolloJsonValue("${rank.score.merge.weight:}")
|
|
|
+ private Map<String, Double> mergeWeight;
|
|
|
+ @ApolloJsonValue("${RankStrategy4DensityFilterV2:}")
|
|
|
+ private Map<String, Map<String, Map<String, String>>> filterRules = new HashMap<>();
|
|
|
+ final private String CLASS_NAME = this.getClass().getSimpleName();
|
|
|
+
|
|
|
+ public void duplicate(Set<Long> setVideo, List<Video> videos) {
|
|
|
+ Iterator<Video> iterator = videos.iterator();
|
|
|
+ while (iterator.hasNext()) {
|
|
|
+ Video v = iterator.next();
|
|
|
+ if (setVideo.contains(v.getVideoId())) {
|
|
|
+ iterator.remove();
|
|
|
+ } else {
|
|
|
+ setVideo.add(v.getVideoId());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public List<Video> mergeAndRankRovRecall(RankParam param) {
|
|
|
+ List<Video> result = new ArrayList<>();
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ public Double calScoreWeightNoTimeDecay(List<Double> data) {
|
|
|
+ Double up = 0.0;
|
|
|
+ Double down = 0.0;
|
|
|
+ for (int i = 0; i < data.size(); ++i) {
|
|
|
+ up += 1.0 * data.get(i);
|
|
|
+ down += 1.0;
|
|
|
+ }
|
|
|
+ return down > 1E-8 ? up / down : 0.0;
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<Double> getRateData(List<Double> ups, List<Double> downs, Double up, Double down) {
|
|
|
+ List<Double> data = new LinkedList<>();
|
|
|
+ for (int i = 0; i < ups.size(); ++i) {
|
|
|
+ if (ExtractorUtils.isDoubleEqualToZero(downs.get(i) + down)) {
|
|
|
+ data.add(0.0);
|
|
|
+ } else {
|
|
|
+ data.add(
|
|
|
+ (ups.get(i) + up) / (downs.get(i) + down)
|
|
|
+ );
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return data;
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<Double> getStaticData(Map<String, Map<String, Double>> itemRealMap,
|
|
|
+ List<String> datehours, String key) {
|
|
|
+ List<Double> views = new LinkedList<>();
|
|
|
+ Map<String, Double> tmp = itemRealMap.getOrDefault(key, new HashMap<>());
|
|
|
+ for (String dh : datehours) {
|
|
|
+ views.add(tmp.getOrDefault(dh, 0.0D) +
|
|
|
+ (views.isEmpty() ? 0.0 : views.get(views.size() - 1))
|
|
|
+ );
|
|
|
+ }
|
|
|
+ return views;
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<RankItem> model(List<Video> videos, RankParam param,
|
|
|
+ List<String> rtFeaPart) {
|
|
|
+ List<RankItem> result = new ArrayList<>();
|
|
|
+ if (videos.isEmpty()) {
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ List<RankItem> rankItems = CommonCollectionUtils.toList(videos, RankItem::new);
|
|
|
+ List<Long> videoIds = CommonCollectionUtils.toListDistinct(videos, Video::getVideoId);
|
|
|
+
|
|
|
+ // 2-2: item 实时特征处理
|
|
|
+ List<String> videoRtKeys2 = videoIds.stream().map(r -> "item_rt_fea_1h_" + r)
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ List<String> videoRtFeatures = this.redisTemplate.opsForValue().multiGet(videoRtKeys2);
|
|
|
+
|
|
|
+
|
|
|
+ if (videoRtFeatures != null) {
|
|
|
+ int j = 0;
|
|
|
+ for (RankItem item : rankItems) {
|
|
|
+ String vF = videoRtFeatures.get(j);
|
|
|
+ ++j;
|
|
|
+ if (vF == null) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ Map<String, String> vfMap = new HashMap<>();
|
|
|
+ Map<String, Map<String, Double>> vfMapNew = new HashMap<>();
|
|
|
+ try {
|
|
|
+ vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {
|
|
|
+ }, vfMap);
|
|
|
+
|
|
|
+ for (Map.Entry<String, String> entry : vfMap.entrySet()) {
|
|
|
+ String value = entry.getValue();
|
|
|
+ if (value == null) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ String[] var1 = value.split(",");
|
|
|
+ Map<String, Double> tmp = new HashMap<>();
|
|
|
+ for (String var2 : var1) {
|
|
|
+ String[] var3 = var2.split(":");
|
|
|
+ tmp.put(var3[0], Double.valueOf(var3[1]));
|
|
|
+ }
|
|
|
+ vfMapNew.put(entry.getKey(), tmp);
|
|
|
+ }
|
|
|
+ item.setItemRealTimeFeature(vfMapNew);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error(String.format("parse video item_rt_fea_1h_ json is wrong in {} with {}", this.CLASS_NAME, e));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return rankItems;
|
|
|
+ }
|
|
|
+
|
|
|
+ @Override
|
|
|
+ public RankResult mergeAndSort(RankParam param, List<Video> rovVideos, List<Video> flowVideos) {
|
|
|
+
|
|
|
+ //1 兜底策略,rov池子不足时,用冷启池填补。直接返回。
|
|
|
+ if (CollectionUtils.isEmpty(rovVideos)) {
|
|
|
+ if (param.getSize() < flowVideos.size()) {
|
|
|
+ return new RankResult(flowVideos.subList(0, param.getSize()));
|
|
|
+ } else {
|
|
|
+ return new RankResult(flowVideos);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //2 根据实验号解析阿波罗参数。
|
|
|
+ String abCode = param.getAbCode();
|
|
|
+ Map<String, Map<String, String>> rulesMap = this.filterRules.getOrDefault(abCode, new HashMap<>(0));
|
|
|
+
|
|
|
+ //3 标签读取
|
|
|
+ if (rulesMap != null && !rulesMap.isEmpty()) {
|
|
|
+ RankExtractorItemTags extractorItemTags = new RankExtractorItemTags(this.redisTemplate);
|
|
|
+ extractorItemTags.processor(rovVideos, flowVideos);
|
|
|
+ }
|
|
|
+ //6 合并结果时间卡控
|
|
|
+ if (rulesMap != null && !rulesMap.isEmpty()) {
|
|
|
+ RankProcessorTagFilter.processor(rovVideos, flowVideos, rulesMap);
|
|
|
+ }
|
|
|
+
|
|
|
+ //4 rov池提权功能
|
|
|
+ RankProcessorBoost.boostByTag(rovVideos, rulesMap);
|
|
|
+
|
|
|
+ //5 rov池强插功能
|
|
|
+ RankProcessorInsert.insertByTag(param, rovVideos, rulesMap);
|
|
|
+
|
|
|
+ //7 流量池按比例强插
|
|
|
+ List<Video> result = new ArrayList<>();
|
|
|
+ for (int i = 0; i < param.getTopK() && i < rovVideos.size(); i++) {
|
|
|
+ result.add(rovVideos.get(i));
|
|
|
+ }
|
|
|
+ double flowPoolP = getFlowPoolP(param);
|
|
|
+ int flowPoolIndex = 0;
|
|
|
+ int rovPoolIndex = param.getTopK();
|
|
|
+ for (int i = 0; i < param.getSize() - param.getTopK(); i++) {
|
|
|
+ double rand = RandomUtils.nextDouble(0, 1);
|
|
|
+ if (rand < flowPoolP) {
|
|
|
+ if (flowPoolIndex < flowVideos.size()) {
|
|
|
+ result.add(flowVideos.get(flowPoolIndex++));
|
|
|
+ } else {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if (rovPoolIndex < rovVideos.size()) {
|
|
|
+ result.add(rovVideos.get(rovPoolIndex++));
|
|
|
+ } else {
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (rovPoolIndex >= rovVideos.size()) {
|
|
|
+ for (int i = flowPoolIndex; i < flowVideos.size() && result.size() < param.getSize(); i++) {
|
|
|
+ result.add(flowVideos.get(i));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (flowPoolIndex >= flowVideos.size()) {
|
|
|
+ for (int i = rovPoolIndex; i < rovVideos.size() && result.size() < param.getSize(); i++) {
|
|
|
+ result.add(rovVideos.get(i));
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ //8 合并结果密度控制
|
|
|
+ Map<String, Integer> densityRules = new HashMap<>();
|
|
|
+ if (rulesMap != null && !rulesMap.isEmpty()) {
|
|
|
+ for (Map.Entry<String, Map<String, String>> entry : rulesMap.entrySet()) {
|
|
|
+ String key = entry.getKey();
|
|
|
+ Map<String, String> value = entry.getValue();
|
|
|
+ if (value.containsKey("density")) {
|
|
|
+ densityRules.put(key, Integer.valueOf(value.get("density")));
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ Set<Long> videosSet = result.stream().map(Video::getVideoId).collect(Collectors.toSet());
|
|
|
+ List<Video> rovRecallRankNew = rovVideos.stream().filter(r -> !videosSet.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
+ List<Video> flowPoolRankNew = flowVideos.stream().filter(r -> !videosSet.contains(r.getVideoId())).collect(Collectors.toList());
|
|
|
+ List<Video> resultWithDensity = RankProcessorDensity.mergeDensityControl(result,
|
|
|
+ rovRecallRankNew, flowPoolRankNew, densityRules);
|
|
|
+
|
|
|
+ return new RankResult(resultWithDensity);
|
|
|
+ }
|
|
|
+
|
|
|
+ public static void main(String[] args) {
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ List<Double> help(String s, String date, Integer h) {
|
|
|
+ Map<String, Double> maps = Arrays.stream(s.split(",")).map(pair -> pair.split(":"))
|
|
|
+ .collect(Collectors.toMap(
|
|
|
+ arr -> arr[0],
|
|
|
+ arr -> Double.valueOf(arr[1])
|
|
|
+ ));
|
|
|
+ List<String> datehours = new LinkedList<>(); // 时间是倒叙的
|
|
|
+ List<Double> result = new ArrayList<>();
|
|
|
+ for (int i = 0; i < h; ++i) {
|
|
|
+ Double d = (result.isEmpty() ? 0.0 : result.get(result.size() - 1));
|
|
|
+ result.add(d + maps.getOrDefault(date, 0D));
|
|
|
+ datehours.add(date);
|
|
|
+ date = ExtractorUtils.subtractHours(date, 1);
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+}
|